reg.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. #include "gc.h"
  2. void addsplits(void);
  3. Reg*
  4. rega(void)
  5. {
  6. Reg *r;
  7. r = freer;
  8. if(r == R) {
  9. r = alloc(sizeof(*r));
  10. } else
  11. freer = r->link;
  12. *r = zreg;
  13. return r;
  14. }
  15. int
  16. rcmp(const void *a1, const void *a2)
  17. {
  18. Rgn *p1, *p2;
  19. int c1, c2;
  20. p1 = (Rgn*)a1;
  21. p2 = (Rgn*)a2;
  22. c1 = p2->cost;
  23. c2 = p1->cost;
  24. if(c1 -= c2)
  25. return c1;
  26. return p2->varno - p1->varno;
  27. }
  28. void
  29. regopt(Prog *p)
  30. {
  31. Reg *r, *r1, *r2;
  32. Prog *p1;
  33. int i, z;
  34. long initpc, val, npc;
  35. ulong vreg;
  36. Bits bit;
  37. struct
  38. {
  39. long m;
  40. long c;
  41. Reg* p;
  42. } log5[6], *lp;
  43. firstr = R;
  44. lastr = R;
  45. nvar = 0;
  46. regbits = 0;
  47. for(z=0; z<BITS; z++) {
  48. externs.b[z] = 0;
  49. params.b[z] = 0;
  50. consts.b[z] = 0;
  51. addrs.b[z] = 0;
  52. }
  53. /*
  54. * pass 1
  55. * build aux data structure
  56. * allocate pcs
  57. * find use and set of variables
  58. */
  59. val = 5L * 5L * 5L * 5L * 5L;
  60. lp = log5;
  61. for(i=0; i<5; i++) {
  62. lp->m = val;
  63. lp->c = 0;
  64. lp->p = R;
  65. val /= 5L;
  66. lp++;
  67. }
  68. val = 0;
  69. for(; p != P; p = p->link) {
  70. switch(p->as) {
  71. case ADATA:
  72. case AGLOBL:
  73. case ANAME:
  74. case ASIGNAME:
  75. continue;
  76. }
  77. r = rega();
  78. if(firstr == R) {
  79. firstr = r;
  80. lastr = r;
  81. } else {
  82. lastr->link = r;
  83. r->p1 = lastr;
  84. lastr->s1 = r;
  85. lastr = r;
  86. }
  87. r->prog = p;
  88. r->pc = val;
  89. val++;
  90. lp = log5;
  91. for(i=0; i<5; i++) {
  92. lp->c--;
  93. if(lp->c <= 0) {
  94. lp->c = lp->m;
  95. if(lp->p != R)
  96. lp->p->log5 = r;
  97. lp->p = r;
  98. (lp+1)->c = 0;
  99. break;
  100. }
  101. lp++;
  102. }
  103. r1 = r->p1;
  104. if(r1 != R)
  105. switch(r1->prog->as) {
  106. case ARET:
  107. case AB:
  108. case ARFE:
  109. r->p1 = R;
  110. r1->s1 = R;
  111. }
  112. /*
  113. * left side always read
  114. */
  115. bit = mkvar(&p->from, p->as==AMOVW);
  116. for(z=0; z<BITS; z++)
  117. r->use1.b[z] |= bit.b[z];
  118. /*
  119. * right side depends on opcode
  120. */
  121. bit = mkvar(&p->to, 0);
  122. if(bany(&bit))
  123. switch(p->as) {
  124. default:
  125. diag(Z, "reg: unknown asop: %A", p->as);
  126. break;
  127. /*
  128. * right side write
  129. */
  130. case ANOP:
  131. case AMOVB:
  132. case AMOVBU:
  133. case AMOVH:
  134. case AMOVHU:
  135. case AMOVW:
  136. case AMOVF:
  137. case AMOVD:
  138. for(z=0; z<BITS; z++)
  139. r->set.b[z] |= bit.b[z];
  140. break;
  141. /*
  142. * funny
  143. */
  144. case ABL:
  145. for(z=0; z<BITS; z++)
  146. addrs.b[z] |= bit.b[z];
  147. break;
  148. }
  149. if(p->as == AMOVM) {
  150. if(p->from.type == D_CONST)
  151. z = p->from.offset;
  152. else
  153. z = p->to.offset;
  154. for(i=0; z; i++) {
  155. if(z&1)
  156. regbits |= RtoB(i);
  157. z >>= 1;
  158. }
  159. }
  160. }
  161. if(firstr == R)
  162. return;
  163. initpc = pc - val;
  164. npc = val;
  165. /*
  166. * pass 2
  167. * turn branch references to pointers
  168. * build back pointers
  169. */
  170. for(r = firstr; r != R; r = r->link) {
  171. p = r->prog;
  172. if(p->to.type == D_BRANCH) {
  173. val = p->to.offset - initpc;
  174. r1 = firstr;
  175. while(r1 != R) {
  176. r2 = r1->log5;
  177. if(r2 != R && val >= r2->pc) {
  178. r1 = r2;
  179. continue;
  180. }
  181. if(r1->pc == val)
  182. break;
  183. r1 = r1->link;
  184. }
  185. if(r1 == R) {
  186. nearln = p->lineno;
  187. diag(Z, "ref not found\n%P", p);
  188. continue;
  189. }
  190. if(r1 == r) {
  191. nearln = p->lineno;
  192. diag(Z, "ref to self\n%P", p);
  193. continue;
  194. }
  195. r->s2 = r1;
  196. r->p2link = r1->p2;
  197. r1->p2 = r;
  198. }
  199. }
  200. if(debug['R']) {
  201. p = firstr->prog;
  202. print("\n%L %D\n", p->lineno, &p->from);
  203. }
  204. /*
  205. * pass 2.5
  206. * find looping structure
  207. */
  208. for(r = firstr; r != R; r = r->link)
  209. r->active = 0;
  210. change = 0;
  211. loopit(firstr, npc);
  212. /*
  213. * pass 3
  214. * iterate propagating usage
  215. * back until flow graph is complete
  216. */
  217. loop1:
  218. change = 0;
  219. for(r = firstr; r != R; r = r->link)
  220. r->active = 0;
  221. for(r = firstr; r != R; r = r->link)
  222. if(r->prog->as == ARET)
  223. prop(r, zbits, zbits);
  224. loop11:
  225. /* pick up unreachable code */
  226. i = 0;
  227. for(r = firstr; r != R; r = r1) {
  228. r1 = r->link;
  229. if(r1 && r1->active && !r->active) {
  230. prop(r, zbits, zbits);
  231. i = 1;
  232. }
  233. }
  234. if(i)
  235. goto loop11;
  236. if(change)
  237. goto loop1;
  238. /*
  239. * pass 4
  240. * iterate propagating register/variable synchrony
  241. * forward until graph is complete
  242. */
  243. loop2:
  244. change = 0;
  245. for(r = firstr; r != R; r = r->link)
  246. r->active = 0;
  247. synch(firstr, zbits);
  248. if(change)
  249. goto loop2;
  250. addsplits();
  251. if(debug['R'] && debug['v']) {
  252. print("\nprop structure:\n");
  253. for(r = firstr; r != R; r = r->link) {
  254. print("%ld:%P", r->loop, r->prog);
  255. for(z=0; z<BITS; z++)
  256. bit.b[z] = r->set.b[z] |
  257. r->refahead.b[z] | r->calahead.b[z] |
  258. r->refbehind.b[z] | r->calbehind.b[z] |
  259. r->use1.b[z] | r->use2.b[z];
  260. if(bany(&bit)) {
  261. print("\t");
  262. if(bany(&r->use1))
  263. print(" u1=%B", r->use1);
  264. if(bany(&r->use2))
  265. print(" u2=%B", r->use2);
  266. if(bany(&r->set))
  267. print(" st=%B", r->set);
  268. if(bany(&r->refahead))
  269. print(" ra=%B", r->refahead);
  270. if(bany(&r->calahead))
  271. print(" ca=%B", r->calahead);
  272. if(bany(&r->refbehind))
  273. print(" rb=%B", r->refbehind);
  274. if(bany(&r->calbehind))
  275. print(" cb=%B", r->calbehind);
  276. }
  277. print("\n");
  278. }
  279. }
  280. /*
  281. * pass 5
  282. * isolate regions
  283. * calculate costs (paint1)
  284. */
  285. r = firstr;
  286. if(r) {
  287. for(z=0; z<BITS; z++)
  288. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  289. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  290. if(bany(&bit)) {
  291. nearln = r->prog->lineno;
  292. warn(Z, "used and not set: %B", bit);
  293. if(debug['R'] && !debug['w'])
  294. print("used and not set: %B\n", bit);
  295. }
  296. }
  297. for(r = firstr; r != R; r = r->link)
  298. r->act = zbits;
  299. rgp = region;
  300. nregion = 0;
  301. for(r = firstr; r != R; r = r->link) {
  302. for(z=0; z<BITS; z++)
  303. bit.b[z] = r->set.b[z] &
  304. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  305. if(bany(&bit)) {
  306. nearln = r->prog->lineno;
  307. warn(Z, "set and not used: %B", bit);
  308. if(debug['R'])
  309. print("set and not used: %B\n", bit);
  310. excise(r);
  311. }
  312. for(z=0; z<BITS; z++)
  313. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  314. while(bany(&bit)) {
  315. i = bnum(bit);
  316. rgp->enter = r;
  317. rgp->varno = i;
  318. change = 0;
  319. if(debug['R'] && debug['v'])
  320. print("\n");
  321. paint1(r, i);
  322. bit.b[i/32] &= ~(1L<<(i%32));
  323. if(change <= 0) {
  324. if(debug['R'])
  325. print("%L $%d: %B\n",
  326. r->prog->lineno, change, blsh(i));
  327. continue;
  328. }
  329. rgp->cost = change;
  330. nregion++;
  331. if(nregion >= NRGN) {
  332. warn(Z, "too many regions");
  333. goto brk;
  334. }
  335. rgp++;
  336. }
  337. }
  338. brk:
  339. qsort(region, nregion, sizeof(region[0]), rcmp);
  340. /*
  341. * pass 6
  342. * determine used registers (paint2)
  343. * replace code (paint3)
  344. */
  345. rgp = region;
  346. for(i=0; i<nregion; i++) {
  347. bit = blsh(rgp->varno);
  348. vreg = paint2(rgp->enter, rgp->varno);
  349. vreg = allreg(vreg, rgp);
  350. if(debug['R']) {
  351. if(rgp->regno >= NREG)
  352. print("%L $%d F%d: %B\n",
  353. rgp->enter->prog->lineno,
  354. rgp->cost,
  355. rgp->regno-NREG,
  356. bit);
  357. else
  358. print("%L $%d R%d: %B\n",
  359. rgp->enter->prog->lineno,
  360. rgp->cost,
  361. rgp->regno,
  362. bit);
  363. }
  364. if(rgp->regno != 0)
  365. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  366. rgp++;
  367. }
  368. /*
  369. * pass 7
  370. * peep-hole on basic block
  371. */
  372. if(!debug['R'] || debug['P'])
  373. peep();
  374. /*
  375. * pass 8
  376. * recalculate pc
  377. */
  378. val = initpc;
  379. for(r = firstr; r != R; r = r1) {
  380. r->pc = val;
  381. p = r->prog;
  382. p1 = P;
  383. r1 = r->link;
  384. if(r1 != R)
  385. p1 = r1->prog;
  386. for(; p != p1; p = p->link) {
  387. switch(p->as) {
  388. default:
  389. val++;
  390. break;
  391. case ANOP:
  392. case ADATA:
  393. case AGLOBL:
  394. case ANAME:
  395. case ASIGNAME:
  396. break;
  397. }
  398. }
  399. }
  400. pc = val;
  401. /*
  402. * fix up branches
  403. */
  404. if(debug['R'])
  405. if(bany(&addrs))
  406. print("addrs: %B\n", addrs);
  407. r1 = 0; /* set */
  408. for(r = firstr; r != R; r = r->link) {
  409. p = r->prog;
  410. if(p->to.type == D_BRANCH)
  411. p->to.offset = r->s2->pc;
  412. r1 = r;
  413. }
  414. /*
  415. * last pass
  416. * eliminate nops
  417. * free aux structures
  418. */
  419. for(p = firstr->prog; p != P; p = p->link){
  420. while(p->link && p->link->as == ANOP)
  421. p->link = p->link->link;
  422. }
  423. if(r1 != R) {
  424. r1->link = freer;
  425. freer = firstr;
  426. }
  427. }
  428. void
  429. addsplits(void)
  430. {
  431. Reg *r, *r1;
  432. int z, i;
  433. Bits bit;
  434. for(r = firstr; r != R; r = r->link) {
  435. if(r->loop > 1)
  436. continue;
  437. if(r->prog->as == ABL)
  438. continue;
  439. for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
  440. if(r1->loop <= 1)
  441. continue;
  442. for(z=0; z<BITS; z++)
  443. bit.b[z] = r1->calbehind.b[z] &
  444. (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
  445. ~(r->calahead.b[z] & addrs.b[z]);
  446. while(bany(&bit)) {
  447. i = bnum(bit);
  448. bit.b[i/32] &= ~(1L << (i%32));
  449. }
  450. }
  451. }
  452. }
  453. /*
  454. * add mov b,rn
  455. * just after r
  456. */
  457. void
  458. addmove(Reg *r, int bn, int rn, int f)
  459. {
  460. Prog *p, *p1;
  461. Adr *a;
  462. Var *v;
  463. p1 = alloc(sizeof(*p1));
  464. *p1 = zprog;
  465. p = r->prog;
  466. p1->link = p->link;
  467. p->link = p1;
  468. p1->lineno = p->lineno;
  469. v = var + bn;
  470. a = &p1->to;
  471. a->sym = v->sym;
  472. a->name = v->name;
  473. a->offset = v->offset;
  474. a->etype = v->etype;
  475. a->type = D_OREG;
  476. if(a->etype == TARRAY || a->sym == S)
  477. a->type = D_CONST;
  478. p1->as = AMOVW;
  479. if(v->etype == TCHAR || v->etype == TUCHAR)
  480. p1->as = AMOVB;
  481. if(v->etype == TSHORT || v->etype == TUSHORT)
  482. p1->as = AMOVH;
  483. if(v->etype == TFLOAT)
  484. p1->as = AMOVF;
  485. if(v->etype == TDOUBLE)
  486. p1->as = AMOVD;
  487. p1->from.type = D_REG;
  488. p1->from.reg = rn;
  489. if(rn >= NREG) {
  490. p1->from.type = D_FREG;
  491. p1->from.reg = rn-NREG;
  492. }
  493. if(!f) {
  494. p1->from = *a;
  495. *a = zprog.from;
  496. a->type = D_REG;
  497. a->reg = rn;
  498. if(rn >= NREG) {
  499. a->type = D_FREG;
  500. a->reg = rn-NREG;
  501. }
  502. if(v->etype == TUCHAR)
  503. p1->as = AMOVBU;
  504. if(v->etype == TUSHORT)
  505. p1->as = AMOVHU;
  506. }
  507. if(debug['R'])
  508. print("%P\t.a%P\n", p, p1);
  509. }
  510. Bits
  511. mkvar(Adr *a, int docon)
  512. {
  513. Var *v;
  514. int i, t, n, et, z;
  515. long o;
  516. Bits bit;
  517. Sym *s;
  518. t = a->type;
  519. if(t == D_REG && a->reg != NREG)
  520. regbits |= RtoB(a->reg);
  521. if(t == D_FREG && a->reg != NREG)
  522. regbits |= FtoB(a->reg);
  523. s = a->sym;
  524. o = a->offset;
  525. et = a->etype;
  526. if(s == S) {
  527. if(t != D_CONST || !docon || a->reg != NREG)
  528. goto none;
  529. et = TLONG;
  530. }
  531. if(t == D_CONST) {
  532. if(s == S && sval(o))
  533. goto none;
  534. }
  535. n = a->name;
  536. v = var;
  537. for(i=0; i<nvar; i++) {
  538. if(s == v->sym)
  539. if(n == v->name)
  540. if(o == v->offset)
  541. goto out;
  542. v++;
  543. }
  544. if(s)
  545. if(s->name[0] == '.')
  546. goto none;
  547. if(nvar >= NVAR) {
  548. if(debug['w'] > 1 && s)
  549. warn(Z, "variable not optimized: %s", s->name);
  550. goto none;
  551. }
  552. i = nvar;
  553. nvar++;
  554. v = &var[i];
  555. v->sym = s;
  556. v->offset = o;
  557. v->etype = et;
  558. v->name = n;
  559. if(debug['R'])
  560. print("bit=%2d et=%2d %D\n", i, et, a);
  561. out:
  562. bit = blsh(i);
  563. if(n == D_EXTERN || n == D_STATIC)
  564. for(z=0; z<BITS; z++)
  565. externs.b[z] |= bit.b[z];
  566. if(n == D_PARAM)
  567. for(z=0; z<BITS; z++)
  568. params.b[z] |= bit.b[z];
  569. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  570. for(z=0; z<BITS; z++)
  571. addrs.b[z] |= bit.b[z];
  572. if(t == D_CONST) {
  573. if(s == S) {
  574. for(z=0; z<BITS; z++)
  575. consts.b[z] |= bit.b[z];
  576. return bit;
  577. }
  578. if(et != TARRAY)
  579. for(z=0; z<BITS; z++)
  580. addrs.b[z] |= bit.b[z];
  581. for(z=0; z<BITS; z++)
  582. params.b[z] |= bit.b[z];
  583. return bit;
  584. }
  585. if(t == D_OREG)
  586. return bit;
  587. none:
  588. return zbits;
  589. }
  590. void
  591. prop(Reg *r, Bits ref, Bits cal)
  592. {
  593. Reg *r1, *r2;
  594. int z;
  595. for(r1 = r; r1 != R; r1 = r1->p1) {
  596. for(z=0; z<BITS; z++) {
  597. ref.b[z] |= r1->refahead.b[z];
  598. if(ref.b[z] != r1->refahead.b[z]) {
  599. r1->refahead.b[z] = ref.b[z];
  600. change++;
  601. }
  602. cal.b[z] |= r1->calahead.b[z];
  603. if(cal.b[z] != r1->calahead.b[z]) {
  604. r1->calahead.b[z] = cal.b[z];
  605. change++;
  606. }
  607. }
  608. switch(r1->prog->as) {
  609. case ABL:
  610. for(z=0; z<BITS; z++) {
  611. cal.b[z] |= ref.b[z] | externs.b[z];
  612. ref.b[z] = 0;
  613. }
  614. break;
  615. case ATEXT:
  616. for(z=0; z<BITS; z++) {
  617. cal.b[z] = 0;
  618. ref.b[z] = 0;
  619. }
  620. break;
  621. case ARET:
  622. for(z=0; z<BITS; z++) {
  623. cal.b[z] = externs.b[z];
  624. ref.b[z] = 0;
  625. }
  626. }
  627. for(z=0; z<BITS; z++) {
  628. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  629. r1->use1.b[z] | r1->use2.b[z];
  630. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  631. r1->refbehind.b[z] = ref.b[z];
  632. r1->calbehind.b[z] = cal.b[z];
  633. }
  634. if(r1->active)
  635. break;
  636. r1->active = 1;
  637. }
  638. for(; r != r1; r = r->p1)
  639. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  640. prop(r2, r->refbehind, r->calbehind);
  641. }
  642. /*
  643. * find looping structure
  644. *
  645. * 1) find reverse postordering
  646. * 2) find approximate dominators,
  647. * the actual dominators if the flow graph is reducible
  648. * otherwise, dominators plus some other non-dominators.
  649. * See Matthew S. Hecht and Jeffrey D. Ullman,
  650. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  651. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  652. * Oct. 1-3, 1973, pp. 207-217.
  653. * 3) find all nodes with a predecessor dominated by the current node.
  654. * such a node is a loop head.
  655. * recursively, all preds with a greater rpo number are in the loop
  656. */
  657. long
  658. postorder(Reg *r, Reg **rpo2r, long n)
  659. {
  660. Reg *r1;
  661. r->rpo = 1;
  662. r1 = r->s1;
  663. if(r1 && !r1->rpo)
  664. n = postorder(r1, rpo2r, n);
  665. r1 = r->s2;
  666. if(r1 && !r1->rpo)
  667. n = postorder(r1, rpo2r, n);
  668. rpo2r[n] = r;
  669. n++;
  670. return n;
  671. }
  672. long
  673. rpolca(long *idom, long rpo1, long rpo2)
  674. {
  675. long t;
  676. if(rpo1 == -1)
  677. return rpo2;
  678. while(rpo1 != rpo2){
  679. if(rpo1 > rpo2){
  680. t = rpo2;
  681. rpo2 = rpo1;
  682. rpo1 = t;
  683. }
  684. while(rpo1 < rpo2){
  685. t = idom[rpo2];
  686. if(t >= rpo2)
  687. fatal(Z, "bad idom");
  688. rpo2 = t;
  689. }
  690. }
  691. return rpo1;
  692. }
  693. int
  694. doms(long *idom, long r, long s)
  695. {
  696. while(s > r)
  697. s = idom[s];
  698. return s == r;
  699. }
  700. int
  701. loophead(long *idom, Reg *r)
  702. {
  703. long src;
  704. src = r->rpo;
  705. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  706. return 1;
  707. for(r = r->p2; r != R; r = r->p2link)
  708. if(doms(idom, src, r->rpo))
  709. return 1;
  710. return 0;
  711. }
  712. void
  713. loopmark(Reg **rpo2r, long head, Reg *r)
  714. {
  715. if(r->rpo < head || r->active == head)
  716. return;
  717. r->active = head;
  718. r->loop += LOOP;
  719. if(r->p1 != R)
  720. loopmark(rpo2r, head, r->p1);
  721. for(r = r->p2; r != R; r = r->p2link)
  722. loopmark(rpo2r, head, r);
  723. }
  724. void
  725. loopit(Reg *r, long nr)
  726. {
  727. Reg *r1;
  728. long i, d, me;
  729. if(nr > maxnr) {
  730. rpo2r = alloc(nr * sizeof(Reg*));
  731. idom = alloc(nr * sizeof(long));
  732. maxnr = nr;
  733. }
  734. d = postorder(r, rpo2r, 0);
  735. if(d > nr)
  736. fatal(Z, "too many reg nodes");
  737. nr = d;
  738. for(i = 0; i < nr / 2; i++){
  739. r1 = rpo2r[i];
  740. rpo2r[i] = rpo2r[nr - 1 - i];
  741. rpo2r[nr - 1 - i] = r1;
  742. }
  743. for(i = 0; i < nr; i++)
  744. rpo2r[i]->rpo = i;
  745. idom[0] = 0;
  746. for(i = 0; i < nr; i++){
  747. r1 = rpo2r[i];
  748. me = r1->rpo;
  749. d = -1;
  750. if(r1->p1 != R && r1->p1->rpo < me)
  751. d = r1->p1->rpo;
  752. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  753. if(r1->rpo < me)
  754. d = rpolca(idom, d, r1->rpo);
  755. idom[i] = d;
  756. }
  757. for(i = 0; i < nr; i++){
  758. r1 = rpo2r[i];
  759. r1->loop++;
  760. if(r1->p2 != R && loophead(idom, r1))
  761. loopmark(rpo2r, i, r1);
  762. }
  763. }
  764. void
  765. synch(Reg *r, Bits dif)
  766. {
  767. Reg *r1;
  768. int z;
  769. for(r1 = r; r1 != R; r1 = r1->s1) {
  770. for(z=0; z<BITS; z++) {
  771. dif.b[z] = (dif.b[z] &
  772. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  773. r1->set.b[z] | r1->regdiff.b[z];
  774. if(dif.b[z] != r1->regdiff.b[z]) {
  775. r1->regdiff.b[z] = dif.b[z];
  776. change++;
  777. }
  778. }
  779. if(r1->active)
  780. break;
  781. r1->active = 1;
  782. for(z=0; z<BITS; z++)
  783. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  784. if(r1->s2 != R)
  785. synch(r1->s2, dif);
  786. }
  787. }
  788. ulong
  789. allreg(ulong b, Rgn *r)
  790. {
  791. Var *v;
  792. int i;
  793. v = var + r->varno;
  794. r->regno = 0;
  795. switch(v->etype) {
  796. default:
  797. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  798. break;
  799. case TCHAR:
  800. case TUCHAR:
  801. case TSHORT:
  802. case TUSHORT:
  803. case TINT:
  804. case TUINT:
  805. case TLONG:
  806. case TULONG:
  807. case TIND:
  808. case TARRAY:
  809. i = BtoR(~b);
  810. if(i && r->cost >= 0) {
  811. r->regno = i;
  812. return RtoB(i);
  813. }
  814. break;
  815. case TVLONG:
  816. case TDOUBLE:
  817. case TFLOAT:
  818. i = BtoF(~b);
  819. if(i && r->cost >= 0) {
  820. r->regno = i+NREG;
  821. return FtoB(i);
  822. }
  823. break;
  824. }
  825. return 0;
  826. }
  827. void
  828. paint1(Reg *r, int bn)
  829. {
  830. Reg *r1;
  831. Prog *p;
  832. int z;
  833. ulong bb;
  834. z = bn/32;
  835. bb = 1L<<(bn%32);
  836. if(r->act.b[z] & bb)
  837. return;
  838. for(;;) {
  839. if(!(r->refbehind.b[z] & bb))
  840. break;
  841. r1 = r->p1;
  842. if(r1 == R)
  843. break;
  844. if(!(r1->refahead.b[z] & bb))
  845. break;
  846. if(r1->act.b[z] & bb)
  847. break;
  848. r = r1;
  849. }
  850. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  851. change -= CLOAD * r->loop;
  852. if(debug['R'] && debug['v'])
  853. print("%ld%P\tld %B $%d\n", r->loop,
  854. r->prog, blsh(bn), change);
  855. }
  856. for(;;) {
  857. r->act.b[z] |= bb;
  858. p = r->prog;
  859. if(r->use1.b[z] & bb) {
  860. change += CREF * r->loop;
  861. if(debug['R'] && debug['v'])
  862. print("%ld%P\tu1 %B $%d\n", r->loop,
  863. p, blsh(bn), change);
  864. }
  865. if((r->use2.b[z]|r->set.b[z]) & bb) {
  866. change += CREF * r->loop;
  867. if(debug['R'] && debug['v'])
  868. print("%ld%P\tu2 %B $%d\n", r->loop,
  869. p, blsh(bn), change);
  870. }
  871. if(STORE(r) & r->regdiff.b[z] & bb) {
  872. change -= CLOAD * r->loop;
  873. if(debug['R'] && debug['v'])
  874. print("%ld%P\tst %B $%d\n", r->loop,
  875. p, blsh(bn), change);
  876. }
  877. if(r->refbehind.b[z] & bb)
  878. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  879. if(r1->refahead.b[z] & bb)
  880. paint1(r1, bn);
  881. if(!(r->refahead.b[z] & bb))
  882. break;
  883. r1 = r->s2;
  884. if(r1 != R)
  885. if(r1->refbehind.b[z] & bb)
  886. paint1(r1, bn);
  887. r = r->s1;
  888. if(r == R)
  889. break;
  890. if(r->act.b[z] & bb)
  891. break;
  892. if(!(r->refbehind.b[z] & bb))
  893. break;
  894. }
  895. }
  896. ulong
  897. paint2(Reg *r, int bn)
  898. {
  899. Reg *r1;
  900. int z;
  901. ulong bb, vreg;
  902. z = bn/32;
  903. bb = 1L << (bn%32);
  904. vreg = regbits;
  905. if(!(r->act.b[z] & bb))
  906. return vreg;
  907. for(;;) {
  908. if(!(r->refbehind.b[z] & bb))
  909. break;
  910. r1 = r->p1;
  911. if(r1 == R)
  912. break;
  913. if(!(r1->refahead.b[z] & bb))
  914. break;
  915. if(!(r1->act.b[z] & bb))
  916. break;
  917. r = r1;
  918. }
  919. for(;;) {
  920. r->act.b[z] &= ~bb;
  921. vreg |= r->regu;
  922. if(r->refbehind.b[z] & bb)
  923. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  924. if(r1->refahead.b[z] & bb)
  925. vreg |= paint2(r1, bn);
  926. if(!(r->refahead.b[z] & bb))
  927. break;
  928. r1 = r->s2;
  929. if(r1 != R)
  930. if(r1->refbehind.b[z] & bb)
  931. vreg |= paint2(r1, bn);
  932. r = r->s1;
  933. if(r == R)
  934. break;
  935. if(!(r->act.b[z] & bb))
  936. break;
  937. if(!(r->refbehind.b[z] & bb))
  938. break;
  939. }
  940. return vreg;
  941. }
  942. void
  943. paint3(Reg *r, int bn, long rb, int rn)
  944. {
  945. Reg *r1;
  946. Prog *p;
  947. int z;
  948. ulong bb;
  949. z = bn/32;
  950. bb = 1L << (bn%32);
  951. if(r->act.b[z] & bb)
  952. return;
  953. for(;;) {
  954. if(!(r->refbehind.b[z] & bb))
  955. break;
  956. r1 = r->p1;
  957. if(r1 == R)
  958. break;
  959. if(!(r1->refahead.b[z] & bb))
  960. break;
  961. if(r1->act.b[z] & bb)
  962. break;
  963. r = r1;
  964. }
  965. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  966. addmove(r, bn, rn, 0);
  967. for(;;) {
  968. r->act.b[z] |= bb;
  969. p = r->prog;
  970. if(r->use1.b[z] & bb) {
  971. if(debug['R'])
  972. print("%P", p);
  973. addreg(&p->from, rn);
  974. if(debug['R'])
  975. print("\t.c%P\n", p);
  976. }
  977. if((r->use2.b[z]|r->set.b[z]) & bb) {
  978. if(debug['R'])
  979. print("%P", p);
  980. addreg(&p->to, rn);
  981. if(debug['R'])
  982. print("\t.c%P\n", p);
  983. }
  984. if(STORE(r) & r->regdiff.b[z] & bb)
  985. addmove(r, bn, rn, 1);
  986. r->regu |= rb;
  987. if(r->refbehind.b[z] & bb)
  988. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  989. if(r1->refahead.b[z] & bb)
  990. paint3(r1, bn, rb, rn);
  991. if(!(r->refahead.b[z] & bb))
  992. break;
  993. r1 = r->s2;
  994. if(r1 != R)
  995. if(r1->refbehind.b[z] & bb)
  996. paint3(r1, bn, rb, rn);
  997. r = r->s1;
  998. if(r == R)
  999. break;
  1000. if(r->act.b[z] & bb)
  1001. break;
  1002. if(!(r->refbehind.b[z] & bb))
  1003. break;
  1004. }
  1005. }
  1006. void
  1007. addreg(Adr *a, int rn)
  1008. {
  1009. a->sym = 0;
  1010. a->name = D_NONE;
  1011. a->type = D_REG;
  1012. a->reg = rn;
  1013. if(rn >= NREG) {
  1014. a->type = D_FREG;
  1015. a->reg = rn-NREG;
  1016. }
  1017. }
  1018. /*
  1019. * bit reg
  1020. * 0 R0
  1021. * 1 R1
  1022. * ... ...
  1023. * 10 R10
  1024. */
  1025. long
  1026. RtoB(int r)
  1027. {
  1028. if(r >= REGMIN && r <= REGMAX)
  1029. return 1L << r;
  1030. return 0;
  1031. }
  1032. int
  1033. BtoR(long b)
  1034. {
  1035. b &= 0x01fcL;
  1036. if(b == 0)
  1037. return 0;
  1038. return bitno(b);
  1039. }
  1040. /*
  1041. * bit reg
  1042. * 18 F2
  1043. * 19 F3
  1044. * ... ...
  1045. * 23 F7
  1046. */
  1047. long
  1048. FtoB(int f)
  1049. {
  1050. if(f < 2 || f > NFREG-1)
  1051. return 0;
  1052. return 1L << (f + 16);
  1053. }
  1054. int
  1055. BtoF(long b)
  1056. {
  1057. b &= 0xfc0000L;
  1058. if(b == 0)
  1059. return 0;
  1060. return bitno(b) - 16;
  1061. }