reg.c 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148
  1. #include "gc.h"
  2. void addsplits(void);
  3. Reg*
  4. rega(void)
  5. {
  6. Reg *r;
  7. r = freer;
  8. if(r == R) {
  9. r = alloc(sizeof(*r));
  10. } else
  11. freer = r->link;
  12. *r = zreg;
  13. return r;
  14. }
  15. int
  16. rcmp(const void *a1, const void *a2)
  17. {
  18. Rgn *p1, *p2;
  19. int c1, c2;
  20. p1 = (Rgn*)a1;
  21. p2 = (Rgn*)a2;
  22. c1 = p2->cost;
  23. c2 = p1->cost;
  24. if(c1 -= c2)
  25. return c1;
  26. return p2->varno - p1->varno;
  27. }
  28. void
  29. regopt(Prog *p)
  30. {
  31. Reg *r, *r1, *r2;
  32. Prog *p1;
  33. int i, z;
  34. long initpc, val, npc;
  35. ulong vreg;
  36. Bits bit;
  37. struct
  38. {
  39. long m;
  40. long c;
  41. Reg* p;
  42. } log5[6], *lp;
  43. firstr = R;
  44. lastr = R;
  45. nvar = 0;
  46. regbits = 0;
  47. for(z=0; z<BITS; z++) {
  48. externs.b[z] = 0;
  49. params.b[z] = 0;
  50. consts.b[z] = 0;
  51. addrs.b[z] = 0;
  52. }
  53. /*
  54. * pass 1
  55. * build aux data structure
  56. * allocate pcs
  57. * find use and set of variables
  58. */
  59. val = 5L * 5L * 5L * 5L * 5L;
  60. lp = log5;
  61. for(i=0; i<5; i++) {
  62. lp->m = val;
  63. lp->c = 0;
  64. lp->p = R;
  65. val /= 5L;
  66. lp++;
  67. }
  68. val = 0;
  69. for(; p != P; p = p->link) {
  70. switch(p->as) {
  71. case ADATA:
  72. case AGLOBL:
  73. case ANAME:
  74. case ASIGNAME:
  75. continue;
  76. }
  77. r = rega();
  78. if(firstr == R) {
  79. firstr = r;
  80. lastr = r;
  81. } else {
  82. lastr->link = r;
  83. r->p1 = lastr;
  84. lastr->s1 = r;
  85. lastr = r;
  86. }
  87. r->prog = p;
  88. r->pc = val;
  89. val++;
  90. lp = log5;
  91. for(i=0; i<5; i++) {
  92. lp->c--;
  93. if(lp->c <= 0) {
  94. lp->c = lp->m;
  95. if(lp->p != R)
  96. lp->p->log5 = r;
  97. lp->p = r;
  98. (lp+1)->c = 0;
  99. break;
  100. }
  101. lp++;
  102. }
  103. r1 = r->p1;
  104. if(r1 != R)
  105. switch(r1->prog->as) {
  106. case ARET:
  107. case AJMP:
  108. case ARFE:
  109. r->p1 = R;
  110. r1->s1 = R;
  111. }
  112. /*
  113. * left side always read
  114. */
  115. bit = mkvar(&p->from, p->as==AMOVW);
  116. for(z=0; z<BITS; z++)
  117. r->use1.b[z] |= bit.b[z];
  118. /*
  119. * right side depends on opcode
  120. */
  121. bit = mkvar(&p->to, 0);
  122. if(bany(&bit))
  123. switch(p->as) {
  124. default:
  125. diag(Z, "reg: unknown asop: %A", p->as);
  126. break;
  127. /*
  128. * right side write
  129. */
  130. case ANOP:
  131. case AMOVB:
  132. case AMOVBU:
  133. case AMOVH:
  134. case AMOVHU:
  135. case AMOVW:
  136. case AMOVF:
  137. case AMOVD:
  138. for(z=0; z<BITS; z++)
  139. r->set.b[z] |= bit.b[z];
  140. break;
  141. /*
  142. * funny
  143. */
  144. case AJAL:
  145. for(z=0; z<BITS; z++)
  146. addrs.b[z] |= bit.b[z];
  147. break;
  148. }
  149. }
  150. if(firstr == R)
  151. return;
  152. initpc = pc - val;
  153. npc = val;
  154. /*
  155. * pass 2
  156. * turn branch references to pointers
  157. * build back pointers
  158. */
  159. for(r = firstr; r != R; r = r->link) {
  160. p = r->prog;
  161. if(p->to.type == D_BRANCH) {
  162. val = p->to.offset - initpc;
  163. r1 = firstr;
  164. while(r1 != R) {
  165. r2 = r1->log5;
  166. if(r2 != R && val >= r2->pc) {
  167. r1 = r2;
  168. continue;
  169. }
  170. if(r1->pc == val)
  171. break;
  172. r1 = r1->link;
  173. }
  174. if(r1 == R) {
  175. nearln = p->lineno;
  176. diag(Z, "ref not found\n%P", p);
  177. continue;
  178. }
  179. if(r1 == r) {
  180. nearln = p->lineno;
  181. diag(Z, "ref to self\n%P", p);
  182. continue;
  183. }
  184. r->s2 = r1;
  185. r->p2link = r1->p2;
  186. r1->p2 = r;
  187. }
  188. }
  189. if(debug['R']) {
  190. p = firstr->prog;
  191. print("\n%L %D\n", p->lineno, &p->from);
  192. }
  193. /*
  194. * pass 2.5
  195. * find looping structure
  196. */
  197. for(r = firstr; r != R; r = r->link)
  198. r->active = 0;
  199. change = 0;
  200. loopit(firstr, npc);
  201. /*
  202. * pass 3
  203. * iterate propagating usage
  204. * back until flow graph is complete
  205. */
  206. loop1:
  207. change = 0;
  208. for(r = firstr; r != R; r = r->link)
  209. r->active = 0;
  210. for(r = firstr; r != R; r = r->link)
  211. if(r->prog->as == ARET)
  212. prop(r, zbits, zbits);
  213. loop11:
  214. /* pick up unreachable code */
  215. i = 0;
  216. for(r = firstr; r != R; r = r1) {
  217. r1 = r->link;
  218. if(r1 && r1->active && !r->active) {
  219. prop(r, zbits, zbits);
  220. i = 1;
  221. }
  222. }
  223. if(i)
  224. goto loop11;
  225. if(change)
  226. goto loop1;
  227. /*
  228. * pass 4
  229. * iterate propagating register/variable synchrony
  230. * forward until graph is complete
  231. */
  232. loop2:
  233. change = 0;
  234. for(r = firstr; r != R; r = r->link)
  235. r->active = 0;
  236. synch(firstr, zbits);
  237. if(change)
  238. goto loop2;
  239. addsplits();
  240. if(debug['R'] && debug['v']) {
  241. print("\nprop structure:\n");
  242. for(r = firstr; r != R; r = r->link) {
  243. print("%ld:%P", r->loop, r->prog);
  244. for(z=0; z<BITS; z++)
  245. bit.b[z] = r->set.b[z] |
  246. r->refahead.b[z] | r->calahead.b[z] |
  247. r->refbehind.b[z] | r->calbehind.b[z] |
  248. r->use1.b[z] | r->use2.b[z];
  249. if(bany(&bit)) {
  250. print("\t");
  251. if(bany(&r->use1))
  252. print(" u1=%B", r->use1);
  253. if(bany(&r->use2))
  254. print(" u2=%B", r->use2);
  255. if(bany(&r->set))
  256. print(" st=%B", r->set);
  257. if(bany(&r->refahead))
  258. print(" ra=%B", r->refahead);
  259. if(bany(&r->calahead))
  260. print(" ca=%B", r->calahead);
  261. if(bany(&r->refbehind))
  262. print(" rb=%B", r->refbehind);
  263. if(bany(&r->calbehind))
  264. print(" cb=%B", r->calbehind);
  265. if(bany(&r->regdiff))
  266. print(" rd=%B", r->regdiff);
  267. }
  268. print("\n");
  269. }
  270. }
  271. /*
  272. * pass 5
  273. * isolate regions
  274. * calculate costs (paint1)
  275. */
  276. r = firstr;
  277. if(r) {
  278. for(z=0; z<BITS; z++)
  279. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  280. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  281. if(bany(&bit)) {
  282. nearln = r->prog->lineno;
  283. warn(Z, "used and not set: %B", bit);
  284. if(debug['R'] && !debug['w'])
  285. print("used and not set: %B\n", bit);
  286. }
  287. }
  288. for(r = firstr; r != R; r = r->link)
  289. r->act = zbits;
  290. rgp = region;
  291. nregion = 0;
  292. for(r = firstr; r != R; r = r->link) {
  293. for(z=0; z<BITS; z++)
  294. bit.b[z] = r->set.b[z] &
  295. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  296. if(bany(&bit)) {
  297. nearln = r->prog->lineno;
  298. warn(Z, "set and not used: %B", bit);
  299. if(debug['R'])
  300. print("set and not used: %B\n", bit);
  301. excise(r);
  302. }
  303. for(z=0; z<BITS; z++)
  304. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  305. while(bany(&bit)) {
  306. i = bnum(bit);
  307. rgp->enter = r;
  308. rgp->varno = i;
  309. change = 0;
  310. if(debug['R'] && debug['v'])
  311. print("\n");
  312. paint1(r, i);
  313. bit.b[i/32] &= ~(1L<<(i%32));
  314. if(change <= 0) {
  315. if(debug['R'])
  316. print("%L $%d: %B\n",
  317. r->prog->lineno, change, blsh(i));
  318. continue;
  319. }
  320. rgp->cost = change;
  321. nregion++;
  322. if(nregion >= NRGN) {
  323. warn(Z, "too many regions");
  324. goto brk;
  325. }
  326. rgp++;
  327. }
  328. }
  329. brk:
  330. qsort(region, nregion, sizeof(region[0]), rcmp);
  331. /*
  332. * pass 6
  333. * determine used registers (paint2)
  334. * replace code (paint3)
  335. */
  336. rgp = region;
  337. for(i=0; i<nregion; i++) {
  338. bit = blsh(rgp->varno);
  339. vreg = paint2(rgp->enter, rgp->varno);
  340. vreg = allreg(vreg, rgp);
  341. if(debug['R']) {
  342. if(rgp->regno >= NREG)
  343. print("%L $%d F%d: %B\n",
  344. rgp->enter->prog->lineno,
  345. rgp->cost,
  346. rgp->regno-NREG,
  347. bit);
  348. else
  349. print("%L $%d R%d: %B\n",
  350. rgp->enter->prog->lineno,
  351. rgp->cost,
  352. rgp->regno,
  353. bit);
  354. }
  355. if(rgp->regno != 0)
  356. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  357. rgp++;
  358. }
  359. /*
  360. * pass 7
  361. * peep-hole on basic block
  362. */
  363. if(!debug['R'] || debug['P'])
  364. peep();
  365. /*
  366. * pass 8
  367. * recalculate pc
  368. */
  369. val = initpc;
  370. for(r = firstr; r != R; r = r1) {
  371. r->pc = val;
  372. p = r->prog;
  373. p1 = P;
  374. r1 = r->link;
  375. if(r1 != R)
  376. p1 = r1->prog;
  377. for(; p != p1; p = p->link) {
  378. switch(p->as) {
  379. default:
  380. val++;
  381. break;
  382. case ANOP:
  383. case ADATA:
  384. case AGLOBL:
  385. case ANAME:
  386. case ASIGNAME:
  387. break;
  388. }
  389. }
  390. }
  391. pc = val;
  392. /*
  393. * fix up branches
  394. */
  395. if(debug['R'])
  396. if(bany(&addrs))
  397. print("addrs: %B\n", addrs);
  398. r1 = 0; /* set */
  399. for(r = firstr; r != R; r = r->link) {
  400. p = r->prog;
  401. if(p->to.type == D_BRANCH)
  402. p->to.offset = r->s2->pc;
  403. r1 = r;
  404. }
  405. /*
  406. * last pass
  407. * eliminate nops
  408. * free aux structures
  409. */
  410. for(p = firstr->prog; p != P; p = p->link){
  411. while(p->link && p->link->as == ANOP)
  412. p->link = p->link->link;
  413. }
  414. if(r1 != R) {
  415. r1->link = freer;
  416. freer = firstr;
  417. }
  418. }
  419. void
  420. addsplits(void)
  421. {
  422. Reg *r, *r1;
  423. int z, i;
  424. Bits bit;
  425. for(r = firstr; r != R; r = r->link) {
  426. if(r->loop > 1)
  427. continue;
  428. if(r->prog->as == AJAL)
  429. continue;
  430. for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
  431. if(r1->loop <= 1)
  432. continue;
  433. for(z=0; z<BITS; z++)
  434. bit.b[z] = r1->calbehind.b[z] &
  435. (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
  436. ~(r->calahead.b[z] & addrs.b[z]);
  437. while(bany(&bit)) {
  438. i = bnum(bit);
  439. bit.b[i/32] &= ~(1L << (i%32));
  440. }
  441. }
  442. }
  443. }
  444. /*
  445. * add mov b,rn
  446. * just after r
  447. */
  448. void
  449. addmove(Reg *r, int bn, int rn, int f)
  450. {
  451. Prog *p, *p1;
  452. Adr *a;
  453. Var *v;
  454. p1 = alloc(sizeof(*p1));
  455. *p1 = zprog;
  456. p = r->prog;
  457. p1->link = p->link;
  458. p->link = p1;
  459. p1->lineno = p->lineno;
  460. v = var + bn;
  461. a = &p1->to;
  462. a->sym = v->sym;
  463. a->name = v->name;
  464. a->offset = v->offset;
  465. a->etype = v->etype;
  466. a->type = D_OREG;
  467. if(a->etype == TARRAY || a->sym == S)
  468. a->type = D_CONST;
  469. p1->as = AMOVW;
  470. if(v->etype == TCHAR || v->etype == TUCHAR)
  471. p1->as = AMOVB;
  472. if(v->etype == TSHORT || v->etype == TUSHORT)
  473. p1->as = AMOVH;
  474. if(v->etype == TFLOAT)
  475. p1->as = AMOVF;
  476. if(v->etype == TDOUBLE)
  477. p1->as = AMOVD;
  478. p1->from.type = D_REG;
  479. p1->from.reg = rn;
  480. if(rn >= NREG) {
  481. p1->from.type = D_FREG;
  482. p1->from.reg = rn-NREG;
  483. }
  484. if(!f) {
  485. p1->from = *a;
  486. *a = zprog.from;
  487. a->type = D_REG;
  488. a->reg = rn;
  489. if(rn >= NREG) {
  490. a->type = D_FREG;
  491. a->reg = rn-NREG;
  492. }
  493. if(v->etype == TUCHAR)
  494. p1->as = AMOVBU;
  495. if(v->etype == TUSHORT)
  496. p1->as = AMOVHU;
  497. }
  498. if(debug['R'])
  499. print("%P\t.a%P\n", p, p1);
  500. }
  501. Bits
  502. mkvar(Adr *a, int docon)
  503. {
  504. Var *v;
  505. int i, t, n, et, z;
  506. long o;
  507. Bits bit;
  508. Sym *s;
  509. t = a->type;
  510. if(t == D_REG && a->reg != NREG)
  511. regbits |= RtoB(a->reg);
  512. if(t == D_FREG && a->reg != NREG)
  513. regbits |= FtoB(a->reg);
  514. s = a->sym;
  515. o = a->offset;
  516. et = a->etype;
  517. if(s == S) {
  518. if(t != D_CONST || !docon || a->reg != NREG)
  519. goto none;
  520. et = TLONG;
  521. }
  522. if(t == D_CONST) {
  523. if(s == S && sval(o))
  524. goto none;
  525. }
  526. n = a->name;
  527. v = var;
  528. for(i=0; i<nvar; i++) {
  529. if(s == v->sym)
  530. if(n == v->name)
  531. if(o == v->offset)
  532. goto out;
  533. v++;
  534. }
  535. if(s)
  536. if(s->name[0] == '.')
  537. goto none;
  538. if(nvar >= NVAR) {
  539. if(debug['w'] > 1 && s)
  540. warn(Z, "variable not optimized: %s", s->name);
  541. goto none;
  542. }
  543. i = nvar;
  544. nvar++;
  545. v = &var[i];
  546. v->sym = s;
  547. v->offset = o;
  548. v->etype = et;
  549. v->name = n;
  550. if(debug['R'])
  551. print("bit=%2d et=%2d %D\n", i, et, a);
  552. out:
  553. bit = blsh(i);
  554. if(n == D_EXTERN || n == D_STATIC)
  555. for(z=0; z<BITS; z++)
  556. externs.b[z] |= bit.b[z];
  557. if(n == D_PARAM)
  558. for(z=0; z<BITS; z++)
  559. params.b[z] |= bit.b[z];
  560. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  561. for(z=0; z<BITS; z++)
  562. addrs.b[z] |= bit.b[z];
  563. if(t == D_CONST) {
  564. if(s == S) {
  565. for(z=0; z<BITS; z++)
  566. consts.b[z] |= bit.b[z];
  567. return bit;
  568. }
  569. if(et != TARRAY)
  570. for(z=0; z<BITS; z++)
  571. addrs.b[z] |= bit.b[z];
  572. for(z=0; z<BITS; z++)
  573. params.b[z] |= bit.b[z];
  574. return bit;
  575. }
  576. if(t == D_OREG)
  577. return bit;
  578. none:
  579. return zbits;
  580. }
  581. void
  582. prop(Reg *r, Bits ref, Bits cal)
  583. {
  584. Reg *r1, *r2;
  585. int z;
  586. for(r1 = r; r1 != R; r1 = r1->p1) {
  587. for(z=0; z<BITS; z++) {
  588. ref.b[z] |= r1->refahead.b[z];
  589. if(ref.b[z] != r1->refahead.b[z]) {
  590. r1->refahead.b[z] = ref.b[z];
  591. change++;
  592. }
  593. cal.b[z] |= r1->calahead.b[z];
  594. if(cal.b[z] != r1->calahead.b[z]) {
  595. r1->calahead.b[z] = cal.b[z];
  596. change++;
  597. }
  598. }
  599. switch(r1->prog->as) {
  600. case AJAL:
  601. for(z=0; z<BITS; z++) {
  602. cal.b[z] |= ref.b[z] | externs.b[z];
  603. ref.b[z] = 0;
  604. }
  605. break;
  606. case ATEXT:
  607. for(z=0; z<BITS; z++) {
  608. cal.b[z] = 0;
  609. ref.b[z] = 0;
  610. }
  611. break;
  612. case ARET:
  613. for(z=0; z<BITS; z++) {
  614. cal.b[z] = externs.b[z];
  615. ref.b[z] = 0;
  616. }
  617. }
  618. for(z=0; z<BITS; z++) {
  619. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  620. r1->use1.b[z] | r1->use2.b[z];
  621. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  622. r1->refbehind.b[z] = ref.b[z];
  623. r1->calbehind.b[z] = cal.b[z];
  624. }
  625. if(r1->active)
  626. break;
  627. r1->active = 1;
  628. }
  629. for(; r != r1; r = r->p1)
  630. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  631. prop(r2, r->refbehind, r->calbehind);
  632. }
  633. /*
  634. * find looping structure
  635. *
  636. * 1) find reverse postordering
  637. * 2) find approximate dominators,
  638. * the actual dominators if the flow graph is reducible
  639. * otherwise, dominators plus some other non-dominators.
  640. * See Matthew S. Hecht and Jeffrey D. Ullman,
  641. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  642. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  643. * Oct. 1-3, 1973, pp. 207-217.
  644. * 3) find all nodes with a predecessor dominated by the current node.
  645. * such a node is a loop head.
  646. * recursively, all preds with a greater rpo number are in the loop
  647. */
  648. long
  649. postorder(Reg *r, Reg **rpo2r, long n)
  650. {
  651. Reg *r1;
  652. r->rpo = 1;
  653. r1 = r->s1;
  654. if(r1 && !r1->rpo)
  655. n = postorder(r1, rpo2r, n);
  656. r1 = r->s2;
  657. if(r1 && !r1->rpo)
  658. n = postorder(r1, rpo2r, n);
  659. rpo2r[n] = r;
  660. n++;
  661. return n;
  662. }
  663. long
  664. rpolca(long *idom, long rpo1, long rpo2)
  665. {
  666. long t;
  667. if(rpo1 == -1)
  668. return rpo2;
  669. while(rpo1 != rpo2){
  670. if(rpo1 > rpo2){
  671. t = rpo2;
  672. rpo2 = rpo1;
  673. rpo1 = t;
  674. }
  675. while(rpo1 < rpo2){
  676. t = idom[rpo2];
  677. if(t >= rpo2)
  678. fatal(Z, "bad idom");
  679. rpo2 = t;
  680. }
  681. }
  682. return rpo1;
  683. }
  684. int
  685. doms(long *idom, long r, long s)
  686. {
  687. while(s > r)
  688. s = idom[s];
  689. return s == r;
  690. }
  691. int
  692. loophead(long *idom, Reg *r)
  693. {
  694. long src;
  695. src = r->rpo;
  696. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  697. return 1;
  698. for(r = r->p2; r != R; r = r->p2link)
  699. if(doms(idom, src, r->rpo))
  700. return 1;
  701. return 0;
  702. }
  703. void
  704. loopmark(Reg **rpo2r, long head, Reg *r)
  705. {
  706. if(r->rpo < head || r->active == head)
  707. return;
  708. r->active = head;
  709. r->loop += LOOP;
  710. if(r->p1 != R)
  711. loopmark(rpo2r, head, r->p1);
  712. for(r = r->p2; r != R; r = r->p2link)
  713. loopmark(rpo2r, head, r);
  714. }
  715. void
  716. loopit(Reg *r, long nr)
  717. {
  718. Reg *r1;
  719. long i, d, me;
  720. if(nr > maxnr) {
  721. rpo2r = alloc(nr * sizeof(Reg*));
  722. idom = alloc(nr * sizeof(long));
  723. maxnr = nr;
  724. }
  725. d = postorder(r, rpo2r, 0);
  726. if(d > nr)
  727. fatal(Z, "too many reg nodes");
  728. nr = d;
  729. for(i = 0; i < nr / 2; i++){
  730. r1 = rpo2r[i];
  731. rpo2r[i] = rpo2r[nr - 1 - i];
  732. rpo2r[nr - 1 - i] = r1;
  733. }
  734. for(i = 0; i < nr; i++)
  735. rpo2r[i]->rpo = i;
  736. idom[0] = 0;
  737. for(i = 0; i < nr; i++){
  738. r1 = rpo2r[i];
  739. me = r1->rpo;
  740. d = -1;
  741. if(r1->p1 != R && r1->p1->rpo < me)
  742. d = r1->p1->rpo;
  743. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  744. if(r1->rpo < me)
  745. d = rpolca(idom, d, r1->rpo);
  746. idom[i] = d;
  747. }
  748. for(i = 0; i < nr; i++){
  749. r1 = rpo2r[i];
  750. r1->loop++;
  751. if(r1->p2 != R && loophead(idom, r1))
  752. loopmark(rpo2r, i, r1);
  753. }
  754. }
  755. void
  756. synch(Reg *r, Bits dif)
  757. {
  758. Reg *r1;
  759. int z;
  760. for(r1 = r; r1 != R; r1 = r1->s1) {
  761. for(z=0; z<BITS; z++) {
  762. dif.b[z] = (dif.b[z] &
  763. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  764. r1->set.b[z] | r1->regdiff.b[z];
  765. if(dif.b[z] != r1->regdiff.b[z]) {
  766. r1->regdiff.b[z] = dif.b[z];
  767. change++;
  768. }
  769. }
  770. if(r1->active)
  771. break;
  772. r1->active = 1;
  773. for(z=0; z<BITS; z++)
  774. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  775. if(r1->s2 != R)
  776. synch(r1->s2, dif);
  777. }
  778. }
  779. ulong
  780. allreg(ulong b, Rgn *r)
  781. {
  782. Var *v;
  783. int i;
  784. v = var + r->varno;
  785. r->regno = 0;
  786. switch(v->etype) {
  787. default:
  788. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  789. break;
  790. case TCHAR:
  791. case TUCHAR:
  792. case TSHORT:
  793. case TUSHORT:
  794. case TINT:
  795. case TUINT:
  796. case TLONG:
  797. case TULONG:
  798. case TIND:
  799. case TARRAY:
  800. i = BtoR(~b);
  801. if(i && r->cost >= 0) {
  802. r->regno = i;
  803. return RtoB(i);
  804. }
  805. break;
  806. case TDOUBLE:
  807. case TFLOAT:
  808. i = BtoF(~b);
  809. if(i && r->cost >= 0) {
  810. r->regno = i+NREG;
  811. return FtoB(i);
  812. }
  813. break;
  814. }
  815. return 0;
  816. }
  817. void
  818. paint1(Reg *r, int bn)
  819. {
  820. Reg *r1;
  821. Prog *p;
  822. int z;
  823. ulong bb;
  824. z = bn/32;
  825. bb = 1L<<(bn%32);
  826. if(r->act.b[z] & bb)
  827. return;
  828. for(;;) {
  829. if(!(r->refbehind.b[z] & bb))
  830. break;
  831. r1 = r->p1;
  832. if(r1 == R)
  833. break;
  834. if(!(r1->refahead.b[z] & bb))
  835. break;
  836. if(r1->act.b[z] & bb)
  837. break;
  838. r = r1;
  839. }
  840. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  841. change -= CLOAD * r->loop;
  842. if(debug['R'] && debug['v'])
  843. print("%ld%P\tld %B $%d\n", r->loop,
  844. r->prog, blsh(bn), change);
  845. }
  846. for(;;) {
  847. r->act.b[z] |= bb;
  848. p = r->prog;
  849. if(r->use1.b[z] & bb) {
  850. change += CREF * r->loop;
  851. if(debug['R'] && debug['v'])
  852. print("%ld%P\tu1 %B $%d\n", r->loop,
  853. p, blsh(bn), change);
  854. }
  855. if((r->use2.b[z]|r->set.b[z]) & bb) {
  856. change += CREF * r->loop;
  857. if(debug['R'] && debug['v'])
  858. print("%ld%P\tu2 %B $%d\n", r->loop,
  859. p, blsh(bn), change);
  860. }
  861. if(STORE(r) & r->regdiff.b[z] & bb) {
  862. change -= CLOAD * r->loop;
  863. if(debug['R'] && debug['v'])
  864. print("%ld%P\tst %B $%d\n", r->loop,
  865. p, blsh(bn), change);
  866. }
  867. if(r->refbehind.b[z] & bb)
  868. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  869. if(r1->refahead.b[z] & bb)
  870. paint1(r1, bn);
  871. if(!(r->refahead.b[z] & bb))
  872. break;
  873. r1 = r->s2;
  874. if(r1 != R)
  875. if(r1->refbehind.b[z] & bb)
  876. paint1(r1, bn);
  877. r = r->s1;
  878. if(r == R)
  879. break;
  880. if(r->act.b[z] & bb)
  881. break;
  882. if(!(r->refbehind.b[z] & bb))
  883. break;
  884. }
  885. }
  886. ulong
  887. paint2(Reg *r, int bn)
  888. {
  889. Reg *r1;
  890. int z;
  891. ulong bb, vreg;
  892. z = bn/32;
  893. bb = 1L << (bn%32);
  894. vreg = regbits;
  895. if(!(r->act.b[z] & bb))
  896. return vreg;
  897. for(;;) {
  898. if(!(r->refbehind.b[z] & bb))
  899. break;
  900. r1 = r->p1;
  901. if(r1 == R)
  902. break;
  903. if(!(r1->refahead.b[z] & bb))
  904. break;
  905. if(!(r1->act.b[z] & bb))
  906. break;
  907. r = r1;
  908. }
  909. for(;;) {
  910. r->act.b[z] &= ~bb;
  911. vreg |= r->regu;
  912. if(r->refbehind.b[z] & bb)
  913. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  914. if(r1->refahead.b[z] & bb)
  915. vreg |= paint2(r1, bn);
  916. if(!(r->refahead.b[z] & bb))
  917. break;
  918. r1 = r->s2;
  919. if(r1 != R)
  920. if(r1->refbehind.b[z] & bb)
  921. vreg |= paint2(r1, bn);
  922. r = r->s1;
  923. if(r == R)
  924. break;
  925. if(!(r->act.b[z] & bb))
  926. break;
  927. if(!(r->refbehind.b[z] & bb))
  928. break;
  929. }
  930. return vreg;
  931. }
  932. void
  933. paint3(Reg *r, int bn, long rb, int rn)
  934. {
  935. Reg *r1;
  936. Prog *p;
  937. int z;
  938. ulong bb;
  939. z = bn/32;
  940. bb = 1L << (bn%32);
  941. if(r->act.b[z] & bb)
  942. return;
  943. for(;;) {
  944. if(!(r->refbehind.b[z] & bb))
  945. break;
  946. r1 = r->p1;
  947. if(r1 == R)
  948. break;
  949. if(!(r1->refahead.b[z] & bb))
  950. break;
  951. if(r1->act.b[z] & bb)
  952. break;
  953. r = r1;
  954. }
  955. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  956. addmove(r, bn, rn, 0);
  957. for(;;) {
  958. r->act.b[z] |= bb;
  959. p = r->prog;
  960. if(r->use1.b[z] & bb) {
  961. if(debug['R'])
  962. print("%P", p);
  963. addreg(&p->from, rn);
  964. if(debug['R'])
  965. print("\t.c%P\n", p);
  966. }
  967. if((r->use2.b[z]|r->set.b[z]) & bb) {
  968. if(debug['R'])
  969. print("%P", p);
  970. addreg(&p->to, rn);
  971. if(debug['R'])
  972. print("\t.c%P\n", p);
  973. }
  974. if(STORE(r) & r->regdiff.b[z] & bb)
  975. addmove(r, bn, rn, 1);
  976. r->regu |= rb;
  977. if(r->refbehind.b[z] & bb)
  978. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  979. if(r1->refahead.b[z] & bb)
  980. paint3(r1, bn, rb, rn);
  981. if(!(r->refahead.b[z] & bb))
  982. break;
  983. r1 = r->s2;
  984. if(r1 != R)
  985. if(r1->refbehind.b[z] & bb)
  986. paint3(r1, bn, rb, rn);
  987. r = r->s1;
  988. if(r == R)
  989. break;
  990. if(r->act.b[z] & bb)
  991. break;
  992. if(!(r->refbehind.b[z] & bb))
  993. break;
  994. }
  995. }
  996. void
  997. addreg(Adr *a, int rn)
  998. {
  999. a->sym = 0;
  1000. a->name = D_NONE;
  1001. a->type = D_REG;
  1002. a->reg = rn;
  1003. if(rn >= NREG) {
  1004. a->type = D_FREG;
  1005. a->reg = rn-NREG;
  1006. }
  1007. }
  1008. /*
  1009. * bit reg
  1010. * 0 R3
  1011. * 1 R4
  1012. * ... ...
  1013. * 19 R22
  1014. * 20 R23
  1015. */
  1016. long
  1017. RtoB(int r)
  1018. {
  1019. if(r < 3 || r > 23)
  1020. return 0;
  1021. return 1L << (r-3);
  1022. }
  1023. BtoR(long b)
  1024. {
  1025. b &= 0x001fffffL;
  1026. if(b == 0)
  1027. return 0;
  1028. return bitno(b) + 3;
  1029. }
  1030. /*
  1031. * bit reg
  1032. * 22 F4
  1033. * 23 F6
  1034. * ... ...
  1035. * 31 F22
  1036. */
  1037. long
  1038. FtoB(int f)
  1039. {
  1040. if(f < 4 || f > 22 || (f&1))
  1041. return 0;
  1042. return 1L << (f/2 + 20);
  1043. }
  1044. int
  1045. BtoF(long b)
  1046. {
  1047. b &= 0xffc00000L;
  1048. if(b == 0)
  1049. return 0;
  1050. return bitno(b)*2 - 40;
  1051. }