reg.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. #include "gc.h"
  2. void addsplits(void);
  3. Reg*
  4. rega(void)
  5. {
  6. Reg *r;
  7. r = freer;
  8. if(r == R) {
  9. r = alloc(sizeof(*r));
  10. } else
  11. freer = r->link;
  12. *r = zreg;
  13. return r;
  14. }
  15. int
  16. rcmp(const void *a1, const void *a2)
  17. {
  18. Rgn *p1, *p2;
  19. int c1, c2;
  20. p1 = (Rgn*)a1;
  21. p2 = (Rgn*)a2;
  22. c1 = p2->cost;
  23. c2 = p1->cost;
  24. if(c1 -= c2)
  25. return c1;
  26. return p2->varno - p1->varno;
  27. }
  28. void
  29. regopt(Prog *p)
  30. {
  31. Reg *r, *r1, *r2;
  32. Prog *p1;
  33. int i, z;
  34. long initpc, val, npc;
  35. ulong vreg;
  36. Bits bit;
  37. struct
  38. {
  39. long m;
  40. long c;
  41. Reg* p;
  42. } log5[6], *lp;
  43. firstr = R;
  44. lastr = R;
  45. nvar = 0;
  46. regbits = 0;
  47. for(z=0; z<BITS; z++) {
  48. externs.b[z] = 0;
  49. params.b[z] = 0;
  50. consts.b[z] = 0;
  51. addrs.b[z] = 0;
  52. }
  53. /*
  54. * pass 1
  55. * build aux data structure
  56. * allocate pcs
  57. * find use and set of variables
  58. */
  59. val = 5L * 5L * 5L * 5L * 5L;
  60. lp = log5;
  61. for(i=0; i<5; i++) {
  62. lp->m = val;
  63. lp->c = 0;
  64. lp->p = R;
  65. val /= 5L;
  66. lp++;
  67. }
  68. val = 0;
  69. for(; p != P; p = p->link) {
  70. switch(p->as) {
  71. case ADATA:
  72. case AGLOBL:
  73. case ANAME:
  74. continue;
  75. }
  76. r = rega();
  77. if(firstr == R) {
  78. firstr = r;
  79. lastr = r;
  80. } else {
  81. lastr->link = r;
  82. r->p1 = lastr;
  83. lastr->s1 = r;
  84. lastr = r;
  85. }
  86. r->prog = p;
  87. r->pc = val;
  88. val++;
  89. lp = log5;
  90. for(i=0; i<5; i++) {
  91. lp->c--;
  92. if(lp->c <= 0) {
  93. lp->c = lp->m;
  94. if(lp->p != R)
  95. lp->p->log5 = r;
  96. lp->p = r;
  97. (lp+1)->c = 0;
  98. break;
  99. }
  100. lp++;
  101. }
  102. r1 = r->p1;
  103. if(r1 != R)
  104. switch(r1->prog->as) {
  105. case ARET:
  106. case AJMP:
  107. case ARFE:
  108. r->p1 = R;
  109. r1->s1 = R;
  110. }
  111. /*
  112. * left side always read
  113. */
  114. bit = mkvar(&p->from, p->as==AMOVW);
  115. for(z=0; z<BITS; z++)
  116. r->use1.b[z] |= bit.b[z];
  117. /*
  118. * right side depends on opcode
  119. */
  120. bit = mkvar(&p->to, 0);
  121. if(bany(&bit))
  122. switch(p->as) {
  123. default:
  124. diag(Z, "reg: unknown asop: %A", p->as);
  125. break;
  126. /*
  127. * right side write
  128. */
  129. case ANOP:
  130. case AMOVB:
  131. case AMOVBU:
  132. case AMOVH:
  133. case AMOVHU:
  134. case AMOVW:
  135. case AMOVF:
  136. case AMOVD:
  137. for(z=0; z<BITS; z++)
  138. r->set.b[z] |= bit.b[z];
  139. break;
  140. /*
  141. * funny
  142. */
  143. case AJAL:
  144. for(z=0; z<BITS; z++)
  145. addrs.b[z] |= bit.b[z];
  146. break;
  147. }
  148. }
  149. if(firstr == R)
  150. return;
  151. initpc = pc - val;
  152. npc = val;
  153. /*
  154. * pass 2
  155. * turn branch references to pointers
  156. * build back pointers
  157. */
  158. for(r = firstr; r != R; r = r->link) {
  159. p = r->prog;
  160. if(p->to.type == D_BRANCH) {
  161. val = p->to.offset - initpc;
  162. r1 = firstr;
  163. while(r1 != R) {
  164. r2 = r1->log5;
  165. if(r2 != R && val >= r2->pc) {
  166. r1 = r2;
  167. continue;
  168. }
  169. if(r1->pc == val)
  170. break;
  171. r1 = r1->link;
  172. }
  173. if(r1 == R) {
  174. nearln = p->lineno;
  175. diag(Z, "ref not found\n%P", p);
  176. continue;
  177. }
  178. if(r1 == r) {
  179. nearln = p->lineno;
  180. diag(Z, "ref to self\n%P", p);
  181. continue;
  182. }
  183. r->s2 = r1;
  184. r->p2link = r1->p2;
  185. r1->p2 = r;
  186. }
  187. }
  188. if(debug['R']) {
  189. p = firstr->prog;
  190. print("\n%L %D\n", p->lineno, &p->from);
  191. }
  192. /*
  193. * pass 2.5
  194. * find looping structure
  195. */
  196. for(r = firstr; r != R; r = r->link)
  197. r->active = 0;
  198. change = 0;
  199. loopit(firstr, npc);
  200. /*
  201. * pass 3
  202. * iterate propagating usage
  203. * back until flow graph is complete
  204. */
  205. loop1:
  206. change = 0;
  207. for(r = firstr; r != R; r = r->link)
  208. r->active = 0;
  209. for(r = firstr; r != R; r = r->link)
  210. if(r->prog->as == ARET)
  211. prop(r, zbits, zbits);
  212. loop11:
  213. /* pick up unreachable code */
  214. i = 0;
  215. for(r = firstr; r != R; r = r1) {
  216. r1 = r->link;
  217. if(r1 && r1->active && !r->active) {
  218. prop(r, zbits, zbits);
  219. i = 1;
  220. }
  221. }
  222. if(i)
  223. goto loop11;
  224. if(change)
  225. goto loop1;
  226. /*
  227. * pass 4
  228. * iterate propagating register/variable synchrony
  229. * forward until graph is complete
  230. */
  231. loop2:
  232. change = 0;
  233. for(r = firstr; r != R; r = r->link)
  234. r->active = 0;
  235. synch(firstr, zbits);
  236. if(change)
  237. goto loop2;
  238. addsplits();
  239. if(debug['R'] && debug['v']) {
  240. print("\nprop structure:\n");
  241. for(r = firstr; r != R; r = r->link) {
  242. print("%ld:%P", r->loop, r->prog);
  243. for(z=0; z<BITS; z++)
  244. bit.b[z] = r->set.b[z] |
  245. r->refahead.b[z] | r->calahead.b[z] |
  246. r->refbehind.b[z] | r->calbehind.b[z] |
  247. r->use1.b[z] | r->use2.b[z];
  248. if(bany(&bit)) {
  249. print("\t");
  250. if(bany(&r->use1))
  251. print(" u1=%B", r->use1);
  252. if(bany(&r->use2))
  253. print(" u2=%B", r->use2);
  254. if(bany(&r->set))
  255. print(" st=%B", r->set);
  256. if(bany(&r->refahead))
  257. print(" ra=%B", r->refahead);
  258. if(bany(&r->calahead))
  259. print(" ca=%B", r->calahead);
  260. if(bany(&r->refbehind))
  261. print(" rb=%B", r->refbehind);
  262. if(bany(&r->calbehind))
  263. print(" cb=%B", r->calbehind);
  264. if(bany(&r->regdiff))
  265. print(" rd=%B", r->regdiff);
  266. }
  267. print("\n");
  268. }
  269. }
  270. /*
  271. * pass 5
  272. * isolate regions
  273. * calculate costs (paint1)
  274. */
  275. r = firstr;
  276. if(r) {
  277. for(z=0; z<BITS; z++)
  278. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  279. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  280. if(bany(&bit)) {
  281. nearln = r->prog->lineno;
  282. warn(Z, "used and not set: %B", bit);
  283. if(debug['R'] && !debug['w'])
  284. print("used and not set: %B\n", bit);
  285. }
  286. }
  287. for(r = firstr; r != R; r = r->link)
  288. r->act = zbits;
  289. rgp = region;
  290. nregion = 0;
  291. for(r = firstr; r != R; r = r->link) {
  292. for(z=0; z<BITS; z++)
  293. bit.b[z] = r->set.b[z] &
  294. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  295. if(bany(&bit)) {
  296. nearln = r->prog->lineno;
  297. warn(Z, "set and not used: %B", bit);
  298. if(debug['R'])
  299. print("set and not used: %B\n", bit);
  300. excise(r);
  301. }
  302. for(z=0; z<BITS; z++)
  303. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  304. while(bany(&bit)) {
  305. i = bnum(bit);
  306. rgp->enter = r;
  307. rgp->varno = i;
  308. change = 0;
  309. if(debug['R'] && debug['v'])
  310. print("\n");
  311. paint1(r, i);
  312. bit.b[i/32] &= ~(1L<<(i%32));
  313. if(change <= 0) {
  314. if(debug['R'])
  315. print("%L $%d: %B\n",
  316. r->prog->lineno, change, blsh(i));
  317. continue;
  318. }
  319. rgp->cost = change;
  320. nregion++;
  321. if(nregion >= NRGN) {
  322. warn(Z, "too many regions");
  323. goto brk;
  324. }
  325. rgp++;
  326. }
  327. }
  328. brk:
  329. qsort(region, nregion, sizeof(region[0]), rcmp);
  330. /*
  331. * pass 6
  332. * determine used registers (paint2)
  333. * replace code (paint3)
  334. */
  335. rgp = region;
  336. for(i=0; i<nregion; i++) {
  337. bit = blsh(rgp->varno);
  338. vreg = paint2(rgp->enter, rgp->varno);
  339. vreg = allreg(vreg, rgp);
  340. if(debug['R']) {
  341. if(rgp->regno >= NREG)
  342. print("%L $%d F%d: %B\n",
  343. rgp->enter->prog->lineno,
  344. rgp->cost,
  345. rgp->regno-NREG,
  346. bit);
  347. else
  348. print("%L $%d R%d: %B\n",
  349. rgp->enter->prog->lineno,
  350. rgp->cost,
  351. rgp->regno,
  352. bit);
  353. }
  354. if(rgp->regno != 0)
  355. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  356. rgp++;
  357. }
  358. /*
  359. * pass 7
  360. * peep-hole on basic block
  361. */
  362. if(!debug['R'] || debug['P'])
  363. peep();
  364. /*
  365. * pass 8
  366. * recalculate pc
  367. */
  368. val = initpc;
  369. for(r = firstr; r != R; r = r1) {
  370. r->pc = val;
  371. p = r->prog;
  372. p1 = P;
  373. r1 = r->link;
  374. if(r1 != R)
  375. p1 = r1->prog;
  376. for(; p != p1; p = p->link) {
  377. switch(p->as) {
  378. default:
  379. val++;
  380. break;
  381. case ANOP:
  382. case ADATA:
  383. case AGLOBL:
  384. case ANAME:
  385. break;
  386. }
  387. }
  388. }
  389. pc = val;
  390. /*
  391. * fix up branches
  392. */
  393. if(debug['R'])
  394. if(bany(&addrs))
  395. print("addrs: %B\n", addrs);
  396. r1 = 0; /* set */
  397. for(r = firstr; r != R; r = r->link) {
  398. p = r->prog;
  399. if(p->to.type == D_BRANCH)
  400. p->to.offset = r->s2->pc;
  401. r1 = r;
  402. }
  403. /*
  404. * last pass
  405. * eliminate nops
  406. * free aux structures
  407. */
  408. for(p = firstr->prog; p != P; p = p->link){
  409. while(p->link && p->link->as == ANOP)
  410. p->link = p->link->link;
  411. }
  412. if(r1 != R) {
  413. r1->link = freer;
  414. freer = firstr;
  415. }
  416. }
  417. void
  418. addsplits(void)
  419. {
  420. Reg *r, *r1;
  421. int z, i;
  422. Bits bit;
  423. for(r = firstr; r != R; r = r->link) {
  424. if(r->loop > 1)
  425. continue;
  426. if(r->prog->as == AJAL)
  427. continue;
  428. for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
  429. if(r1->loop <= 1)
  430. continue;
  431. for(z=0; z<BITS; z++)
  432. bit.b[z] = r1->calbehind.b[z] &
  433. (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
  434. ~(r->calahead.b[z] & addrs.b[z]);
  435. while(bany(&bit)) {
  436. i = bnum(bit);
  437. bit.b[i/32] &= ~(1L << (i%32));
  438. }
  439. }
  440. }
  441. }
  442. /*
  443. * add mov b,rn
  444. * just after r
  445. */
  446. void
  447. addmove(Reg *r, int bn, int rn, int f)
  448. {
  449. Prog *p, *p1;
  450. Adr *a;
  451. Var *v;
  452. p1 = alloc(sizeof(*p1));
  453. *p1 = zprog;
  454. p = r->prog;
  455. p1->link = p->link;
  456. p->link = p1;
  457. p1->lineno = p->lineno;
  458. v = var + bn;
  459. a = &p1->to;
  460. a->sym = v->sym;
  461. a->name = v->name;
  462. a->offset = v->offset;
  463. a->etype = v->etype;
  464. a->type = D_OREG;
  465. if(a->etype == TARRAY || a->sym == S)
  466. a->type = D_CONST;
  467. p1->as = AMOVW;
  468. if(v->etype == TCHAR || v->etype == TUCHAR)
  469. p1->as = AMOVB;
  470. if(v->etype == TSHORT || v->etype == TUSHORT)
  471. p1->as = AMOVH;
  472. if(v->etype == TFLOAT)
  473. p1->as = AMOVF;
  474. if(v->etype == TDOUBLE)
  475. p1->as = AMOVD;
  476. p1->from.type = D_REG;
  477. p1->from.reg = rn;
  478. if(rn >= NREG) {
  479. p1->from.type = D_FREG;
  480. p1->from.reg = rn-NREG;
  481. }
  482. if(!f) {
  483. p1->from = *a;
  484. *a = zprog.from;
  485. a->type = D_REG;
  486. a->reg = rn;
  487. if(rn >= NREG) {
  488. a->type = D_FREG;
  489. a->reg = rn-NREG;
  490. }
  491. if(v->etype == TUCHAR)
  492. p1->as = AMOVBU;
  493. if(v->etype == TUSHORT)
  494. p1->as = AMOVHU;
  495. }
  496. if(debug['R'])
  497. print("%P\t.a%P\n", p, p1);
  498. }
  499. Bits
  500. mkvar(Adr *a, int docon)
  501. {
  502. Var *v;
  503. int i, t, n, et, z;
  504. long o;
  505. Bits bit;
  506. Sym *s;
  507. t = a->type;
  508. if(t == D_REG && a->reg != NREG)
  509. regbits |= RtoB(a->reg);
  510. if(t == D_FREG && a->reg != NREG)
  511. regbits |= FtoB(a->reg);
  512. s = a->sym;
  513. o = a->offset;
  514. et = a->etype;
  515. if(s == S) {
  516. if(t != D_CONST || !docon || a->reg != NREG)
  517. goto none;
  518. et = TLONG;
  519. }
  520. if(t == D_CONST) {
  521. if(s == S && sval(o))
  522. goto none;
  523. }
  524. n = a->name;
  525. v = var;
  526. for(i=0; i<nvar; i++) {
  527. if(s == v->sym)
  528. if(n == v->name)
  529. if(o == v->offset)
  530. goto out;
  531. v++;
  532. }
  533. if(s)
  534. if(s->name[0] == '.')
  535. goto none;
  536. if(nvar >= NVAR) {
  537. if(debug['w'] > 1 && s)
  538. warn(Z, "variable not optimized: %s", s->name);
  539. goto none;
  540. }
  541. i = nvar;
  542. nvar++;
  543. v = &var[i];
  544. v->sym = s;
  545. v->offset = o;
  546. v->etype = et;
  547. v->name = n;
  548. if(debug['R'])
  549. print("bit=%2d et=%2d %D\n", i, et, a);
  550. out:
  551. bit = blsh(i);
  552. if(n == D_EXTERN || n == D_STATIC)
  553. for(z=0; z<BITS; z++)
  554. externs.b[z] |= bit.b[z];
  555. if(n == D_PARAM)
  556. for(z=0; z<BITS; z++)
  557. params.b[z] |= bit.b[z];
  558. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  559. for(z=0; z<BITS; z++)
  560. addrs.b[z] |= bit.b[z];
  561. if(t == D_CONST) {
  562. if(s == S) {
  563. for(z=0; z<BITS; z++)
  564. consts.b[z] |= bit.b[z];
  565. return bit;
  566. }
  567. if(et != TARRAY)
  568. for(z=0; z<BITS; z++)
  569. addrs.b[z] |= bit.b[z];
  570. for(z=0; z<BITS; z++)
  571. params.b[z] |= bit.b[z];
  572. return bit;
  573. }
  574. if(t == D_OREG)
  575. return bit;
  576. none:
  577. return zbits;
  578. }
  579. void
  580. prop(Reg *r, Bits ref, Bits cal)
  581. {
  582. Reg *r1, *r2;
  583. int z;
  584. for(r1 = r; r1 != R; r1 = r1->p1) {
  585. for(z=0; z<BITS; z++) {
  586. ref.b[z] |= r1->refahead.b[z];
  587. if(ref.b[z] != r1->refahead.b[z]) {
  588. r1->refahead.b[z] = ref.b[z];
  589. change++;
  590. }
  591. cal.b[z] |= r1->calahead.b[z];
  592. if(cal.b[z] != r1->calahead.b[z]) {
  593. r1->calahead.b[z] = cal.b[z];
  594. change++;
  595. }
  596. }
  597. switch(r1->prog->as) {
  598. case AJAL:
  599. for(z=0; z<BITS; z++) {
  600. cal.b[z] |= ref.b[z] | externs.b[z];
  601. ref.b[z] = 0;
  602. }
  603. break;
  604. case ATEXT:
  605. for(z=0; z<BITS; z++) {
  606. cal.b[z] = 0;
  607. ref.b[z] = 0;
  608. }
  609. break;
  610. case ARET:
  611. for(z=0; z<BITS; z++) {
  612. cal.b[z] = externs.b[z];
  613. ref.b[z] = 0;
  614. }
  615. }
  616. for(z=0; z<BITS; z++) {
  617. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  618. r1->use1.b[z] | r1->use2.b[z];
  619. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  620. r1->refbehind.b[z] = ref.b[z];
  621. r1->calbehind.b[z] = cal.b[z];
  622. }
  623. if(r1->active)
  624. break;
  625. r1->active = 1;
  626. }
  627. for(; r != r1; r = r->p1)
  628. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  629. prop(r2, r->refbehind, r->calbehind);
  630. }
  631. /*
  632. * find looping structure
  633. *
  634. * 1) find reverse postordering
  635. * 2) find approximate dominators,
  636. * the actual dominators if the flow graph is reducible
  637. * otherwise, dominators plus some other non-dominators.
  638. * See Matthew S. Hecht and Jeffrey D. Ullman,
  639. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  640. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  641. * Oct. 1-3, 1973, pp. 207-217.
  642. * 3) find all nodes with a predecessor dominated by the current node.
  643. * such a node is a loop head.
  644. * recursively, all preds with a greater rpo number are in the loop
  645. */
  646. long
  647. postorder(Reg *r, Reg **rpo2r, long n)
  648. {
  649. Reg *r1;
  650. r->rpo = 1;
  651. r1 = r->s1;
  652. if(r1 && !r1->rpo)
  653. n = postorder(r1, rpo2r, n);
  654. r1 = r->s2;
  655. if(r1 && !r1->rpo)
  656. n = postorder(r1, rpo2r, n);
  657. rpo2r[n] = r;
  658. n++;
  659. return n;
  660. }
  661. long
  662. rpolca(long *idom, long rpo1, long rpo2)
  663. {
  664. long t;
  665. if(rpo1 == -1)
  666. return rpo2;
  667. while(rpo1 != rpo2){
  668. if(rpo1 > rpo2){
  669. t = rpo2;
  670. rpo2 = rpo1;
  671. rpo1 = t;
  672. }
  673. while(rpo1 < rpo2){
  674. t = idom[rpo2];
  675. if(t >= rpo2)
  676. sysfatal("bad idom");
  677. rpo2 = t;
  678. }
  679. }
  680. return rpo1;
  681. }
  682. int
  683. doms(long *idom, long r, long s)
  684. {
  685. while(s > r)
  686. s = idom[s];
  687. return s == r;
  688. }
  689. int
  690. loophead(long *idom, Reg *r)
  691. {
  692. long src;
  693. src = r->rpo;
  694. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  695. return 1;
  696. for(r = r->p2; r != R; r = r->p2link)
  697. if(doms(idom, src, r->rpo))
  698. return 1;
  699. return 0;
  700. }
  701. void
  702. loopmark(Reg **rpo2r, long head, Reg *r)
  703. {
  704. if(r->rpo < head || r->active == head)
  705. return;
  706. r->active = head;
  707. r->loop += LOOP;
  708. if(r->p1 != R)
  709. loopmark(rpo2r, head, r->p1);
  710. for(r = r->p2; r != R; r = r->p2link)
  711. loopmark(rpo2r, head, r);
  712. }
  713. void
  714. loopit(Reg *r, long nr)
  715. {
  716. Reg *r1;
  717. long i, d, me;
  718. if(nr > maxnr) {
  719. rpo2r = alloc(nr * sizeof(Reg*));
  720. idom = alloc(nr * sizeof(long));
  721. maxnr = nr;
  722. }
  723. d = postorder(r, rpo2r, 0);
  724. if(d > nr)
  725. sysfatal("too many reg nodes");
  726. nr = d;
  727. for(i = 0; i < nr / 2; i++){
  728. r1 = rpo2r[i];
  729. rpo2r[i] = rpo2r[nr - 1 - i];
  730. rpo2r[nr - 1 - i] = r1;
  731. }
  732. for(i = 0; i < nr; i++)
  733. rpo2r[i]->rpo = i;
  734. idom[0] = 0;
  735. for(i = 0; i < nr; i++){
  736. r1 = rpo2r[i];
  737. me = r1->rpo;
  738. d = -1;
  739. if(r1->p1 != R && r1->p1->rpo < me)
  740. d = r1->p1->rpo;
  741. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  742. if(r1->rpo < me)
  743. d = rpolca(idom, d, r1->rpo);
  744. idom[i] = d;
  745. }
  746. for(i = 0; i < nr; i++){
  747. r1 = rpo2r[i];
  748. r1->loop++;
  749. if(r1->p2 != R && loophead(idom, r1))
  750. loopmark(rpo2r, i, r1);
  751. }
  752. }
  753. void
  754. synch(Reg *r, Bits dif)
  755. {
  756. Reg *r1;
  757. int z;
  758. for(r1 = r; r1 != R; r1 = r1->s1) {
  759. for(z=0; z<BITS; z++) {
  760. dif.b[z] = (dif.b[z] &
  761. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  762. r1->set.b[z] | r1->regdiff.b[z];
  763. if(dif.b[z] != r1->regdiff.b[z]) {
  764. r1->regdiff.b[z] = dif.b[z];
  765. change++;
  766. }
  767. }
  768. if(r1->active)
  769. break;
  770. r1->active = 1;
  771. for(z=0; z<BITS; z++)
  772. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  773. if(r1->s2 != R)
  774. synch(r1->s2, dif);
  775. }
  776. }
  777. ulong
  778. allreg(ulong b, Rgn *r)
  779. {
  780. Var *v;
  781. int i;
  782. v = var + r->varno;
  783. r->regno = 0;
  784. switch(v->etype) {
  785. default:
  786. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  787. break;
  788. case TCHAR:
  789. case TUCHAR:
  790. case TSHORT:
  791. case TUSHORT:
  792. case TINT:
  793. case TUINT:
  794. case TLONG:
  795. case TULONG:
  796. case TIND:
  797. case TARRAY:
  798. i = BtoR(~b);
  799. if(i && r->cost >= 0) {
  800. r->regno = i;
  801. return RtoB(i);
  802. }
  803. break;
  804. case TDOUBLE:
  805. case TFLOAT:
  806. i = BtoF(~b);
  807. if(i && r->cost >= 0) {
  808. r->regno = i+NREG;
  809. return FtoB(i);
  810. }
  811. break;
  812. }
  813. return 0;
  814. }
  815. void
  816. paint1(Reg *r, int bn)
  817. {
  818. Reg *r1;
  819. Prog *p;
  820. int z;
  821. ulong bb;
  822. z = bn/32;
  823. bb = 1L<<(bn%32);
  824. if(r->act.b[z] & bb)
  825. return;
  826. for(;;) {
  827. if(!(r->refbehind.b[z] & bb))
  828. break;
  829. r1 = r->p1;
  830. if(r1 == R)
  831. break;
  832. if(!(r1->refahead.b[z] & bb))
  833. break;
  834. if(r1->act.b[z] & bb)
  835. break;
  836. r = r1;
  837. }
  838. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  839. change -= CLOAD * r->loop;
  840. if(debug['R'] && debug['v'])
  841. print("%ld%P\tld %B $%d\n", r->loop,
  842. r->prog, blsh(bn), change);
  843. }
  844. for(;;) {
  845. r->act.b[z] |= bb;
  846. p = r->prog;
  847. if(r->use1.b[z] & bb) {
  848. change += CREF * r->loop;
  849. if(debug['R'] && debug['v'])
  850. print("%ld%P\tu1 %B $%d\n", r->loop,
  851. p, blsh(bn), change);
  852. }
  853. if((r->use2.b[z]|r->set.b[z]) & bb) {
  854. change += CREF * r->loop;
  855. if(debug['R'] && debug['v'])
  856. print("%ld%P\tu2 %B $%d\n", r->loop,
  857. p, blsh(bn), change);
  858. }
  859. if(STORE(r) & r->regdiff.b[z] & bb) {
  860. change -= CLOAD * r->loop;
  861. if(debug['R'] && debug['v'])
  862. print("%ld%P\tst %B $%d\n", r->loop,
  863. p, blsh(bn), change);
  864. }
  865. if(r->refbehind.b[z] & bb)
  866. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  867. if(r1->refahead.b[z] & bb)
  868. paint1(r1, bn);
  869. if(!(r->refahead.b[z] & bb))
  870. break;
  871. r1 = r->s2;
  872. if(r1 != R)
  873. if(r1->refbehind.b[z] & bb)
  874. paint1(r1, bn);
  875. r = r->s1;
  876. if(r == R)
  877. break;
  878. if(r->act.b[z] & bb)
  879. break;
  880. if(!(r->refbehind.b[z] & bb))
  881. break;
  882. }
  883. }
  884. ulong
  885. paint2(Reg *r, int bn)
  886. {
  887. Reg *r1;
  888. int z;
  889. ulong bb, vreg;
  890. z = bn/32;
  891. bb = 1L << (bn%32);
  892. vreg = regbits;
  893. if(!(r->act.b[z] & bb))
  894. return vreg;
  895. for(;;) {
  896. if(!(r->refbehind.b[z] & bb))
  897. break;
  898. r1 = r->p1;
  899. if(r1 == R)
  900. break;
  901. if(!(r1->refahead.b[z] & bb))
  902. break;
  903. if(!(r1->act.b[z] & bb))
  904. break;
  905. r = r1;
  906. }
  907. for(;;) {
  908. r->act.b[z] &= ~bb;
  909. vreg |= r->regu;
  910. if(r->refbehind.b[z] & bb)
  911. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  912. if(r1->refahead.b[z] & bb)
  913. vreg |= paint2(r1, bn);
  914. if(!(r->refahead.b[z] & bb))
  915. break;
  916. r1 = r->s2;
  917. if(r1 != R)
  918. if(r1->refbehind.b[z] & bb)
  919. vreg |= paint2(r1, bn);
  920. r = r->s1;
  921. if(r == R)
  922. break;
  923. if(!(r->act.b[z] & bb))
  924. break;
  925. if(!(r->refbehind.b[z] & bb))
  926. break;
  927. }
  928. return vreg;
  929. }
  930. void
  931. paint3(Reg *r, int bn, long rb, int rn)
  932. {
  933. Reg *r1;
  934. Prog *p;
  935. int z;
  936. ulong bb;
  937. z = bn/32;
  938. bb = 1L << (bn%32);
  939. if(r->act.b[z] & bb)
  940. return;
  941. for(;;) {
  942. if(!(r->refbehind.b[z] & bb))
  943. break;
  944. r1 = r->p1;
  945. if(r1 == R)
  946. break;
  947. if(!(r1->refahead.b[z] & bb))
  948. break;
  949. if(r1->act.b[z] & bb)
  950. break;
  951. r = r1;
  952. }
  953. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  954. addmove(r, bn, rn, 0);
  955. for(;;) {
  956. r->act.b[z] |= bb;
  957. p = r->prog;
  958. if(r->use1.b[z] & bb) {
  959. if(debug['R'])
  960. print("%P", p);
  961. addreg(&p->from, rn);
  962. if(debug['R'])
  963. print("\t.c%P\n", p);
  964. }
  965. if((r->use2.b[z]|r->set.b[z]) & bb) {
  966. if(debug['R'])
  967. print("%P", p);
  968. addreg(&p->to, rn);
  969. if(debug['R'])
  970. print("\t.c%P\n", p);
  971. }
  972. if(STORE(r) & r->regdiff.b[z] & bb)
  973. addmove(r, bn, rn, 1);
  974. r->regu |= rb;
  975. if(r->refbehind.b[z] & bb)
  976. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  977. if(r1->refahead.b[z] & bb)
  978. paint3(r1, bn, rb, rn);
  979. if(!(r->refahead.b[z] & bb))
  980. break;
  981. r1 = r->s2;
  982. if(r1 != R)
  983. if(r1->refbehind.b[z] & bb)
  984. paint3(r1, bn, rb, rn);
  985. r = r->s1;
  986. if(r == R)
  987. break;
  988. if(r->act.b[z] & bb)
  989. break;
  990. if(!(r->refbehind.b[z] & bb))
  991. break;
  992. }
  993. }
  994. void
  995. addreg(Adr *a, int rn)
  996. {
  997. a->sym = 0;
  998. a->name = D_NONE;
  999. a->type = D_REG;
  1000. a->reg = rn;
  1001. if(rn >= NREG) {
  1002. a->type = D_FREG;
  1003. a->reg = rn-NREG;
  1004. }
  1005. }
  1006. /*
  1007. * bit reg
  1008. * 0 R3
  1009. * 1 R4
  1010. * ... ...
  1011. * 19 R22
  1012. * 20 R23
  1013. */
  1014. long
  1015. RtoB(int r)
  1016. {
  1017. if(r < 3 || r > 23)
  1018. return 0;
  1019. return 1L << (r-3);
  1020. }
  1021. BtoR(long b)
  1022. {
  1023. b &= 0x001fffffL;
  1024. if(b == 0)
  1025. return 0;
  1026. return bitno(b) + 3;
  1027. }
  1028. /*
  1029. * bit reg
  1030. * 22 F4
  1031. * 23 F6
  1032. * ... ...
  1033. * 31 F22
  1034. */
  1035. long
  1036. FtoB(int f)
  1037. {
  1038. if(f < 4 || f > 22 || (f&1))
  1039. return 0;
  1040. return 1L << (f/2 + 20);
  1041. }
  1042. int
  1043. BtoF(long b)
  1044. {
  1045. b &= 0xffc00000L;
  1046. if(b == 0)
  1047. return 0;
  1048. return bitno(b)*2 - 40;
  1049. }