reg.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. #include "gc.h"
  2. Reg*
  3. rega(void)
  4. {
  5. Reg *r;
  6. r = freer;
  7. if(r == R) {
  8. r = alloc(sizeof(*r));
  9. } else
  10. freer = r->link;
  11. *r = zreg;
  12. return r;
  13. }
  14. int
  15. rcmp(const void *a1, const void *a2)
  16. {
  17. Rgn *p1, *p2;
  18. int c1, c2;
  19. p1 = (Rgn*)a1;
  20. p2 = (Rgn*)a2;
  21. c1 = p2->cost;
  22. c2 = p1->cost;
  23. if(c1 -= c2)
  24. return c1;
  25. return p2->varno - p1->varno;
  26. }
  27. void
  28. regopt(Prog *p)
  29. {
  30. Reg *r, *r1, *r2;
  31. Prog *p1;
  32. int i, z;
  33. long initpc, val, npc;
  34. ulong vreg;
  35. Bits bit;
  36. struct
  37. {
  38. long m;
  39. long c;
  40. Reg* p;
  41. } log5[6], *lp;
  42. firstr = R;
  43. lastr = R;
  44. nvar = 0;
  45. regbits = 0;
  46. for(z=0; z<BITS; z++) {
  47. externs.b[z] = 0;
  48. params.b[z] = 0;
  49. consts.b[z] = 0;
  50. addrs.b[z] = 0;
  51. }
  52. /*
  53. * pass 1
  54. * build aux data structure
  55. * allocate pcs
  56. * find use and set of variables
  57. */
  58. val = 5L * 5L * 5L * 5L * 5L;
  59. lp = log5;
  60. for(i=0; i<5; i++) {
  61. lp->m = val;
  62. lp->c = 0;
  63. lp->p = R;
  64. val /= 5L;
  65. lp++;
  66. }
  67. val = 0;
  68. for(; p != P; p = p->link) {
  69. switch(p->as) {
  70. case ADATA:
  71. case AGLOBL:
  72. case ANAME:
  73. continue;
  74. }
  75. r = rega();
  76. if(firstr == R) {
  77. firstr = r;
  78. lastr = r;
  79. } else {
  80. lastr->link = r;
  81. r->p1 = lastr;
  82. lastr->s1 = r;
  83. lastr = r;
  84. }
  85. r->prog = p;
  86. r->pc = val;
  87. val++;
  88. lp = log5;
  89. for(i=0; i<5; i++) {
  90. lp->c--;
  91. if(lp->c <= 0) {
  92. lp->c = lp->m;
  93. if(lp->p != R)
  94. lp->p->log5 = r;
  95. lp->p = r;
  96. (lp+1)->c = 0;
  97. break;
  98. }
  99. lp++;
  100. }
  101. r1 = r->p1;
  102. if(r1 != R)
  103. switch(r1->prog->as) {
  104. case ARETURN:
  105. case AJMP:
  106. case ARETT:
  107. r->p1 = R;
  108. r1->s1 = R;
  109. }
  110. /*
  111. * left side always read
  112. */
  113. bit = mkvar(&p->from, p->as==AMOVW);
  114. for(z=0; z<BITS; z++)
  115. r->use1.b[z] |= bit.b[z];
  116. /*
  117. * right side depends on opcode
  118. */
  119. bit = mkvar(&p->to, 0);
  120. if(bany(&bit))
  121. switch(p->as) {
  122. default:
  123. diag(Z, "reg: unknown asop: %A", p->as);
  124. break;
  125. /*
  126. * right side write
  127. */
  128. case ANOP:
  129. case AMOVB:
  130. case AMOVBU:
  131. case AMOVH:
  132. case AMOVHU:
  133. case AMOVW:
  134. case AFMOVF:
  135. case AFMOVD:
  136. for(z=0; z<BITS; z++)
  137. r->set.b[z] |= bit.b[z];
  138. break;
  139. /*
  140. * funny
  141. */
  142. case AJMPL:
  143. for(z=0; z<BITS; z++)
  144. addrs.b[z] |= bit.b[z];
  145. break;
  146. }
  147. }
  148. if(firstr == R)
  149. return;
  150. initpc = pc - val;
  151. npc = val;
  152. /*
  153. * pass 2
  154. * turn branch references to pointers
  155. * build back pointers
  156. */
  157. for(r = firstr; r != R; r = r->link) {
  158. p = r->prog;
  159. if(p->to.type == D_BRANCH) {
  160. val = p->to.offset - initpc;
  161. r1 = firstr;
  162. while(r1 != R) {
  163. r2 = r1->log5;
  164. if(r2 != R && val >= r2->pc) {
  165. r1 = r2;
  166. continue;
  167. }
  168. if(r1->pc == val)
  169. break;
  170. r1 = r1->link;
  171. }
  172. if(r1 == R) {
  173. nearln = p->lineno;
  174. diag(Z, "ref not found\n%P", p);
  175. continue;
  176. }
  177. if(r1 == r) {
  178. nearln = p->lineno;
  179. diag(Z, "ref to self\n%P", p);
  180. continue;
  181. }
  182. r->s2 = r1;
  183. r->p2link = r1->p2;
  184. r1->p2 = r;
  185. }
  186. }
  187. if(debug['R']) {
  188. p = firstr->prog;
  189. print("\n%L %D\n", p->lineno, &p->from);
  190. }
  191. /*
  192. * pass 2.5
  193. * find looping structure
  194. */
  195. for(r = firstr; r != R; r = r->link)
  196. r->active = 0;
  197. change = 0;
  198. loopit(firstr, npc);
  199. if(debug['R'] && debug['v']) {
  200. print("\nlooping structure:\n");
  201. for(r = firstr; r != R; r = r->link) {
  202. print("%ld:%P", r->loop, r->prog);
  203. for(z=0; z<BITS; z++)
  204. bit.b[z] = r->use1.b[z] |
  205. r->use2.b[z] | r->set.b[z];
  206. if(bany(&bit)) {
  207. print("\t");
  208. if(bany(&r->use1))
  209. print(" u1=%B", r->use1);
  210. if(bany(&r->use2))
  211. print(" u2=%B", r->use2);
  212. if(bany(&r->set))
  213. print(" st=%B", r->set);
  214. }
  215. print("\n");
  216. }
  217. }
  218. /*
  219. * pass 3
  220. * iterate propagating usage
  221. * back until flow graph is complete
  222. */
  223. loop1:
  224. change = 0;
  225. for(r = firstr; r != R; r = r->link)
  226. r->active = 0;
  227. for(r = firstr; r != R; r = r->link)
  228. if(r->prog->as == ARETURN)
  229. prop(r, zbits, zbits);
  230. loop11:
  231. /* pick up unreachable code */
  232. i = 0;
  233. for(r = firstr; r != R; r = r1) {
  234. r1 = r->link;
  235. if(r1 && r1->active && !r->active) {
  236. prop(r, zbits, zbits);
  237. i = 1;
  238. }
  239. }
  240. if(i)
  241. goto loop11;
  242. if(change)
  243. goto loop1;
  244. /*
  245. * pass 4
  246. * iterate propagating register/variable synchrony
  247. * forward until graph is complete
  248. */
  249. loop2:
  250. change = 0;
  251. for(r = firstr; r != R; r = r->link)
  252. r->active = 0;
  253. synch(firstr, zbits);
  254. if(change)
  255. goto loop2;
  256. /*
  257. * pass 5
  258. * isolate regions
  259. * calculate costs (paint1)
  260. */
  261. r = firstr;
  262. if(r) {
  263. for(z=0; z<BITS; z++)
  264. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  265. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  266. if(bany(&bit)) {
  267. nearln = r->prog->lineno;
  268. warn(Z, "used and not set: %B", bit);
  269. if(debug['R'] && !debug['w'])
  270. print("used and not set: %B\n", bit);
  271. }
  272. }
  273. if(debug['R'] && debug['v'])
  274. print("\nprop structure:\n");
  275. for(r = firstr; r != R; r = r->link)
  276. r->act = zbits;
  277. rgp = region;
  278. nregion = 0;
  279. for(r = firstr; r != R; r = r->link) {
  280. if(debug['R'] && debug['v'])
  281. print("%P\n set = %B; rah = %B; cal = %B\n",
  282. r->prog, r->set, r->refahead, r->calahead);
  283. for(z=0; z<BITS; z++)
  284. bit.b[z] = r->set.b[z] &
  285. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  286. if(bany(&bit)) {
  287. nearln = r->prog->lineno;
  288. warn(Z, "set and not used: %B", bit);
  289. if(debug['R'])
  290. print("set an not used: %B\n", bit);
  291. excise(r);
  292. }
  293. for(z=0; z<BITS; z++)
  294. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  295. while(bany(&bit)) {
  296. i = bnum(bit);
  297. rgp->enter = r;
  298. rgp->varno = i;
  299. change = 0;
  300. if(debug['R'] && debug['v'])
  301. print("\n");
  302. paint1(r, i);
  303. bit.b[i/32] &= ~(1L<<(i%32));
  304. if(change <= 0) {
  305. if(debug['R'])
  306. print("%L$%d: %B\n",
  307. r->prog->lineno, change, blsh(i));
  308. continue;
  309. }
  310. rgp->cost = change;
  311. nregion++;
  312. if(nregion >= NRGN) {
  313. warn(Z, "too many regions");
  314. goto brk;
  315. }
  316. rgp++;
  317. }
  318. }
  319. brk:
  320. qsort(region, nregion, sizeof(region[0]), rcmp);
  321. /*
  322. * pass 6
  323. * determine used registers (paint2)
  324. * replace code (paint3)
  325. */
  326. rgp = region;
  327. for(i=0; i<nregion; i++) {
  328. bit = blsh(rgp->varno);
  329. vreg = paint2(rgp->enter, rgp->varno);
  330. vreg = allreg(vreg, rgp);
  331. if(debug['R']) {
  332. if(rgp->regno >= NREG)
  333. print("%L$%d F%d: %B\n",
  334. rgp->enter->prog->lineno,
  335. rgp->cost,
  336. rgp->regno-NREG,
  337. bit);
  338. else
  339. print("%L$%d R%d: %B\n",
  340. rgp->enter->prog->lineno,
  341. rgp->cost,
  342. rgp->regno,
  343. bit);
  344. }
  345. if(rgp->regno != 0)
  346. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  347. rgp++;
  348. }
  349. /*
  350. * pass 7
  351. * peep-hole on basic block
  352. */
  353. if(!debug['R'] || debug['P'])
  354. peep();
  355. /*
  356. * pass 8
  357. * recalculate pc
  358. */
  359. val = initpc;
  360. for(r = firstr; r != R; r = r1) {
  361. r->pc = val;
  362. p = r->prog;
  363. p1 = P;
  364. r1 = r->link;
  365. if(r1 != R)
  366. p1 = r1->prog;
  367. for(; p != p1; p = p->link) {
  368. switch(p->as) {
  369. default:
  370. val++;
  371. break;
  372. case ANOP:
  373. case ADATA:
  374. case AGLOBL:
  375. case ANAME:
  376. break;
  377. }
  378. }
  379. }
  380. pc = val;
  381. /*
  382. * fix up branches
  383. */
  384. if(debug['R'])
  385. if(bany(&addrs))
  386. print("addrs: %B\n", addrs);
  387. r1 = 0; /* set */
  388. for(r = firstr; r != R; r = r->link) {
  389. p = r->prog;
  390. if(p->to.type == D_BRANCH)
  391. p->to.offset = r->s2->pc;
  392. r1 = r;
  393. }
  394. /*
  395. * last pass
  396. * eliminate nops
  397. * free aux structures
  398. */
  399. for(p = firstr->prog; p != P; p = p->link){
  400. while(p->link && p->link->as == ANOP)
  401. p->link = p->link->link;
  402. }
  403. if(r1 != R) {
  404. r1->link = freer;
  405. freer = firstr;
  406. }
  407. }
  408. /*
  409. * add mov b,rn
  410. * just after r
  411. */
  412. void
  413. addmove(Reg *r, int bn, int rn, int f)
  414. {
  415. Prog *p, *p1;
  416. Adr *a;
  417. Var *v;
  418. p1 = alloc(sizeof(*p1));
  419. *p1 = zprog;
  420. p = r->prog;
  421. p1->link = p->link;
  422. p->link = p1;
  423. p1->lineno = p->lineno;
  424. v = var + bn;
  425. a = &p1->to;
  426. a->sym = v->sym;
  427. a->name = v->name;
  428. a->offset = v->offset;
  429. a->etype = v->etype;
  430. a->type = D_OREG;
  431. if(a->etype == TARRAY || a->sym == S)
  432. a->type = D_CONST;
  433. p1->as = AMOVW;
  434. if(v->etype == TCHAR || v->etype == TUCHAR)
  435. p1->as = AMOVB;
  436. if(v->etype == TSHORT || v->etype == TUSHORT)
  437. p1->as = AMOVH;
  438. if(v->etype == TFLOAT)
  439. p1->as = AFMOVF;
  440. if(v->etype == TDOUBLE)
  441. p1->as = AFMOVD;
  442. p1->from.type = D_REG;
  443. p1->from.reg = rn;
  444. if(rn >= NREG) {
  445. p1->from.type = D_FREG;
  446. p1->from.reg = rn-NREG;
  447. }
  448. if(!f) {
  449. p1->from = *a;
  450. *a = zprog.from;
  451. a->type = D_REG;
  452. a->reg = rn;
  453. if(rn >= NREG) {
  454. a->type = D_FREG;
  455. a->reg = rn-NREG;
  456. }
  457. if(v->etype == TUCHAR)
  458. p1->as = AMOVBU;
  459. if(v->etype == TUSHORT)
  460. p1->as = AMOVHU;
  461. }
  462. if(debug['R'])
  463. print("%P\t.a%P\n", p, p1);
  464. }
  465. Bits
  466. mkvar(Adr *a, int docon)
  467. {
  468. Var *v;
  469. int i, t, n, et, z;
  470. long o;
  471. Bits bit;
  472. Sym *s;
  473. t = a->type;
  474. if(t == D_REG && a->reg != NREG)
  475. regbits |= RtoB(a->reg);
  476. if(t == D_FREG && a->reg != NREG)
  477. regbits |= FtoB(a->reg);
  478. s = a->sym;
  479. o = a->offset;
  480. et = a->etype;
  481. if(s == S) {
  482. if(t != D_CONST || !docon || a->reg != NREG)
  483. goto none;
  484. et = TLONG;
  485. }
  486. if(t == D_CONST) {
  487. if(s == S && sval(o))
  488. goto none;
  489. }
  490. n = a->name;
  491. v = var;
  492. for(i=0; i<nvar; i++) {
  493. if(s == v->sym)
  494. if(n == v->name)
  495. if(o == v->offset)
  496. goto out;
  497. v++;
  498. }
  499. if(s)
  500. if(s->name[0] == '.')
  501. goto none;
  502. if(nvar >= NVAR) {
  503. if(debug['w'] > 1 && s)
  504. warn(Z, "variable not optimized: %s", s->name);
  505. goto none;
  506. }
  507. i = nvar;
  508. nvar++;
  509. v = &var[i];
  510. v->sym = s;
  511. v->offset = o;
  512. v->etype = et;
  513. v->name = n;
  514. if(debug['R'])
  515. print("bit=%2d et=%2d %D\n", i, et, a);
  516. out:
  517. bit = blsh(i);
  518. if(n == D_EXTERN || n == D_STATIC)
  519. for(z=0; z<BITS; z++)
  520. externs.b[z] |= bit.b[z];
  521. if(n == D_PARAM)
  522. for(z=0; z<BITS; z++)
  523. params.b[z] |= bit.b[z];
  524. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  525. for(z=0; z<BITS; z++)
  526. addrs.b[z] |= bit.b[z];
  527. if(t == D_CONST) {
  528. if(s == S) {
  529. for(z=0; z<BITS; z++)
  530. consts.b[z] |= bit.b[z];
  531. return bit;
  532. }
  533. if(et != TARRAY)
  534. for(z=0; z<BITS; z++)
  535. addrs.b[z] |= bit.b[z];
  536. for(z=0; z<BITS; z++)
  537. params.b[z] |= bit.b[z];
  538. return bit;
  539. }
  540. if(t == D_OREG)
  541. return bit;
  542. none:
  543. return zbits;
  544. }
  545. void
  546. prop(Reg *r, Bits ref, Bits cal)
  547. {
  548. Reg *r1, *r2;
  549. int z;
  550. for(r1 = r; r1 != R; r1 = r1->p1) {
  551. for(z=0; z<BITS; z++) {
  552. ref.b[z] |= r1->refahead.b[z];
  553. if(ref.b[z] != r1->refahead.b[z]) {
  554. r1->refahead.b[z] = ref.b[z];
  555. change++;
  556. }
  557. cal.b[z] |= r1->calahead.b[z];
  558. if(cal.b[z] != r1->calahead.b[z]) {
  559. r1->calahead.b[z] = cal.b[z];
  560. change++;
  561. }
  562. }
  563. switch(r1->prog->as) {
  564. case AJMPL:
  565. for(z=0; z<BITS; z++) {
  566. cal.b[z] |= ref.b[z] | externs.b[z];
  567. ref.b[z] = 0;
  568. }
  569. break;
  570. case ATEXT:
  571. for(z=0; z<BITS; z++) {
  572. cal.b[z] = 0;
  573. ref.b[z] = 0;
  574. }
  575. break;
  576. case ARETURN:
  577. for(z=0; z<BITS; z++) {
  578. cal.b[z] = externs.b[z];
  579. ref.b[z] = 0;
  580. }
  581. }
  582. for(z=0; z<BITS; z++) {
  583. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  584. r1->use1.b[z] | r1->use2.b[z];
  585. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  586. r1->refbehind.b[z] = ref.b[z];
  587. r1->calbehind.b[z] = cal.b[z];
  588. }
  589. if(r1->active)
  590. break;
  591. r1->active = 1;
  592. }
  593. for(; r != r1; r = r->p1)
  594. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  595. prop(r2, r->refbehind, r->calbehind);
  596. }
  597. /*
  598. * find looping structure
  599. *
  600. * 1) find reverse postordering
  601. * 2) find approximate dominators,
  602. * the actual dominators if the flow graph is reducible
  603. * otherwise, dominators plus some other non-dominators.
  604. * See Matthew S. Hecht and Jeffrey D. Ullman,
  605. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  606. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  607. * Oct. 1-3, 1973, pp. 207-217.
  608. * 3) find all nodes with a predecessor dominated by the current node.
  609. * such a node is a loop head.
  610. * recursively, all preds with a greater rpo number are in the loop
  611. */
  612. long
  613. postorder(Reg *r, Reg **rpo2r, long n)
  614. {
  615. Reg *r1;
  616. r->rpo = 1;
  617. r1 = r->s1;
  618. if(r1 && !r1->rpo)
  619. n = postorder(r1, rpo2r, n);
  620. r1 = r->s2;
  621. if(r1 && !r1->rpo)
  622. n = postorder(r1, rpo2r, n);
  623. rpo2r[n] = r;
  624. n++;
  625. return n;
  626. }
  627. long
  628. rpolca(long *idom, long rpo1, long rpo2)
  629. {
  630. long t;
  631. if(rpo1 == -1)
  632. return rpo2;
  633. while(rpo1 != rpo2){
  634. if(rpo1 > rpo2){
  635. t = rpo2;
  636. rpo2 = rpo1;
  637. rpo1 = t;
  638. }
  639. while(rpo1 < rpo2){
  640. t = idom[rpo2];
  641. if(t >= rpo2)
  642. sysfatal("bad idom");
  643. rpo2 = t;
  644. }
  645. }
  646. return rpo1;
  647. }
  648. int
  649. doms(long *idom, long r, long s)
  650. {
  651. while(s > r)
  652. s = idom[s];
  653. return s == r;
  654. }
  655. int
  656. loophead(long *idom, Reg *r)
  657. {
  658. long src;
  659. src = r->rpo;
  660. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  661. return 1;
  662. for(r = r->p2; r != R; r = r->p2link)
  663. if(doms(idom, src, r->rpo))
  664. return 1;
  665. return 0;
  666. }
  667. void
  668. loopmark(Reg **rpo2r, long head, Reg *r)
  669. {
  670. if(r->rpo < head || r->active == head)
  671. return;
  672. r->active = head;
  673. r->loop += LOOP;
  674. if(r->p1 != R)
  675. loopmark(rpo2r, head, r->p1);
  676. for(r = r->p2; r != R; r = r->p2link)
  677. loopmark(rpo2r, head, r);
  678. }
  679. void
  680. loopit(Reg *r, long nr)
  681. {
  682. Reg *r1;
  683. long i, d, me;
  684. if(nr > maxnr) {
  685. rpo2r = alloc(nr * sizeof(Reg*));
  686. idom = alloc(nr * sizeof(long));
  687. maxnr = nr;
  688. }
  689. d = postorder(r, rpo2r, 0);
  690. if(d > nr)
  691. sysfatal("too many reg nodes");
  692. nr = d;
  693. for(i = 0; i < nr / 2; i++){
  694. r1 = rpo2r[i];
  695. rpo2r[i] = rpo2r[nr - 1 - i];
  696. rpo2r[nr - 1 - i] = r1;
  697. }
  698. for(i = 0; i < nr; i++)
  699. rpo2r[i]->rpo = i;
  700. idom[0] = 0;
  701. for(i = 0; i < nr; i++){
  702. r1 = rpo2r[i];
  703. me = r1->rpo;
  704. d = -1;
  705. if(r1->p1 != R && r1->p1->rpo < me)
  706. d = r1->p1->rpo;
  707. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  708. if(r1->rpo < me)
  709. d = rpolca(idom, d, r1->rpo);
  710. idom[i] = d;
  711. }
  712. for(i = 0; i < nr; i++){
  713. r1 = rpo2r[i];
  714. r1->loop++;
  715. if(r1->p2 != R && loophead(idom, r1))
  716. loopmark(rpo2r, i, r1);
  717. }
  718. }
  719. void
  720. synch(Reg *r, Bits dif)
  721. {
  722. Reg *r1;
  723. int z;
  724. for(r1 = r; r1 != R; r1 = r1->s1) {
  725. for(z=0; z<BITS; z++) {
  726. dif.b[z] = (dif.b[z] &
  727. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  728. r1->set.b[z] | r1->regdiff.b[z];
  729. if(dif.b[z] != r1->regdiff.b[z]) {
  730. r1->regdiff.b[z] = dif.b[z];
  731. change++;
  732. }
  733. }
  734. if(r1->active)
  735. break;
  736. r1->active = 1;
  737. for(z=0; z<BITS; z++)
  738. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  739. if(r1->s2 != R)
  740. synch(r1->s2, dif);
  741. }
  742. }
  743. ulong
  744. allreg(ulong b, Rgn *r)
  745. {
  746. Var *v;
  747. int i;
  748. v = var + r->varno;
  749. r->regno = 0;
  750. switch(v->etype) {
  751. default:
  752. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  753. break;
  754. case TCHAR:
  755. case TUCHAR:
  756. case TSHORT:
  757. case TUSHORT:
  758. case TINT:
  759. case TUINT:
  760. case TLONG:
  761. case TULONG:
  762. case TIND:
  763. case TARRAY:
  764. i = BtoR(~b);
  765. if(i && r->cost > 0) {
  766. r->regno = i;
  767. return RtoB(i);
  768. }
  769. break;
  770. case TDOUBLE:
  771. case TFLOAT:
  772. i = BtoF(~b);
  773. if(i && r->cost > 0) {
  774. r->regno = i+NREG;
  775. return FtoB(i);
  776. }
  777. break;
  778. }
  779. return 0;
  780. }
  781. void
  782. paint1(Reg *r, int bn)
  783. {
  784. Reg *r1;
  785. Prog *p;
  786. int z;
  787. ulong bb;
  788. z = bn/32;
  789. bb = 1L<<(bn%32);
  790. if(r->act.b[z] & bb)
  791. return;
  792. for(;;) {
  793. if(!(r->refbehind.b[z] & bb))
  794. break;
  795. r1 = r->p1;
  796. if(r1 == R)
  797. break;
  798. if(!(r1->refahead.b[z] & bb))
  799. break;
  800. if(r1->act.b[z] & bb)
  801. break;
  802. r = r1;
  803. }
  804. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  805. change -= CLOAD * r->loop;
  806. if(debug['R'] && debug['v'])
  807. print("%ld%P\tld %B $%d\n", r->loop,
  808. r->prog, blsh(bn), change);
  809. }
  810. for(;;) {
  811. r->act.b[z] |= bb;
  812. p = r->prog;
  813. if(r->use1.b[z] & bb) {
  814. change += CREF * r->loop;
  815. if(p->to.type == D_FREG && p->as == AMOVW)
  816. change = -CINF; /* cant go Rreg to Freg */
  817. if(debug['R'] && debug['v'])
  818. print("%ld%P\tu1 %B $%d\n", r->loop,
  819. p, blsh(bn), change);
  820. }
  821. if((r->use2.b[z]|r->set.b[z]) & bb) {
  822. change += CREF * r->loop;
  823. if(p->from.type == D_FREG && p->as == AMOVW)
  824. change = -CINF; /* cant go Rreg to Freg */
  825. if(debug['R'] && debug['v'])
  826. print("%ld%P\tu2 %B $%d\n", r->loop,
  827. p, blsh(bn), change);
  828. }
  829. if(STORE(r) & r->regdiff.b[z] & bb) {
  830. change -= CLOAD * r->loop;
  831. if(debug['R'] && debug['v'])
  832. print("%ld%P\tst %B $%d\n", r->loop,
  833. p, blsh(bn), change);
  834. }
  835. if(r->refbehind.b[z] & bb)
  836. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  837. if(r1->refahead.b[z] & bb)
  838. paint1(r1, bn);
  839. if(!(r->refahead.b[z] & bb))
  840. break;
  841. r1 = r->s2;
  842. if(r1 != R)
  843. if(r1->refbehind.b[z] & bb)
  844. paint1(r1, bn);
  845. r = r->s1;
  846. if(r == R)
  847. break;
  848. if(r->act.b[z] & bb)
  849. break;
  850. if(!(r->refbehind.b[z] & bb))
  851. break;
  852. }
  853. }
  854. ulong
  855. paint2(Reg *r, int bn)
  856. {
  857. Reg *r1;
  858. int z;
  859. ulong bb, vreg;
  860. z = bn/32;
  861. bb = 1L << (bn%32);
  862. vreg = regbits;
  863. if(!(r->act.b[z] & bb))
  864. return vreg;
  865. for(;;) {
  866. if(!(r->refbehind.b[z] & bb))
  867. break;
  868. r1 = r->p1;
  869. if(r1 == R)
  870. break;
  871. if(!(r1->refahead.b[z] & bb))
  872. break;
  873. if(!(r1->act.b[z] & bb))
  874. break;
  875. r = r1;
  876. }
  877. for(;;) {
  878. r->act.b[z] &= ~bb;
  879. vreg |= r->regu;
  880. if(r->refbehind.b[z] & bb)
  881. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  882. if(r1->refahead.b[z] & bb)
  883. vreg |= paint2(r1, bn);
  884. if(!(r->refahead.b[z] & bb))
  885. break;
  886. r1 = r->s2;
  887. if(r1 != R)
  888. if(r1->refbehind.b[z] & bb)
  889. vreg |= paint2(r1, bn);
  890. r = r->s1;
  891. if(r == R)
  892. break;
  893. if(!(r->act.b[z] & bb))
  894. break;
  895. if(!(r->refbehind.b[z] & bb))
  896. break;
  897. }
  898. return vreg;
  899. }
  900. void
  901. paint3(Reg *r, int bn, long rb, int rn)
  902. {
  903. Reg *r1;
  904. Prog *p;
  905. int z;
  906. ulong bb;
  907. z = bn/32;
  908. bb = 1L << (bn%32);
  909. if(r->act.b[z] & bb)
  910. return;
  911. for(;;) {
  912. if(!(r->refbehind.b[z] & bb))
  913. break;
  914. r1 = r->p1;
  915. if(r1 == R)
  916. break;
  917. if(!(r1->refahead.b[z] & bb))
  918. break;
  919. if(r1->act.b[z] & bb)
  920. break;
  921. r = r1;
  922. }
  923. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  924. addmove(r, bn, rn, 0);
  925. for(;;) {
  926. r->act.b[z] |= bb;
  927. p = r->prog;
  928. if(r->use1.b[z] & bb) {
  929. if(debug['R'])
  930. print("%P", p);
  931. addreg(&p->from, rn);
  932. if(debug['R'])
  933. print("\t.c%P\n", p);
  934. }
  935. if((r->use2.b[z]|r->set.b[z]) & bb) {
  936. if(debug['R'])
  937. print("%P", p);
  938. addreg(&p->to, rn);
  939. if(debug['R'])
  940. print("\t.c%P\n", p);
  941. }
  942. if(STORE(r) & r->regdiff.b[z] & bb)
  943. addmove(r, bn, rn, 1);
  944. r->regu |= rb;
  945. if(r->refbehind.b[z] & bb)
  946. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  947. if(r1->refahead.b[z] & bb)
  948. paint3(r1, bn, rb, rn);
  949. if(!(r->refahead.b[z] & bb))
  950. break;
  951. r1 = r->s2;
  952. if(r1 != R)
  953. if(r1->refbehind.b[z] & bb)
  954. paint3(r1, bn, rb, rn);
  955. r = r->s1;
  956. if(r == R)
  957. break;
  958. if(r->act.b[z] & bb)
  959. break;
  960. if(!(r->refbehind.b[z] & bb))
  961. break;
  962. }
  963. }
  964. void
  965. addreg(Adr *a, int rn)
  966. {
  967. a->sym = 0;
  968. a->name = D_NONE;
  969. a->type = D_REG;
  970. a->reg = rn;
  971. if(rn >= NREG) {
  972. a->type = D_FREG;
  973. a->reg = rn-NREG;
  974. }
  975. }
  976. /*
  977. * bit reg
  978. * 0 R9
  979. * 1 R10
  980. * ... ...
  981. * 4 R13
  982. * 5 R16
  983. * ... ...
  984. * 20 R31
  985. */
  986. long
  987. RtoB(int r)
  988. {
  989. if(r >= 9 && r <= 13)
  990. return 1L << (r-9);
  991. if(r >= 16 && r <= 31)
  992. return 1L << (r-11);
  993. return 0;
  994. }
  995. int
  996. BtoR(long b)
  997. {
  998. int r;
  999. b &= 0x001fffffL;
  1000. if(b == 0)
  1001. return 0;
  1002. r = bitno(b) + 9;
  1003. if(r >= 14)
  1004. r += 2;
  1005. return r;
  1006. }
  1007. /*
  1008. * bit reg
  1009. * 22 F4
  1010. * 23 F6
  1011. * ... ...
  1012. * 31 F22
  1013. */
  1014. long
  1015. FtoB(int f)
  1016. {
  1017. if(f < 4 || f > 22 || (f&1))
  1018. return 0;
  1019. return 1L << (f/2 + 20);
  1020. }
  1021. BtoF(long b)
  1022. {
  1023. b &= 0xffc00000L;
  1024. if(b == 0)
  1025. return 0;
  1026. return bitno(b)*2 - 40;
  1027. }