reg.c 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. #include "gc.h"
  2. void addsplits(void);
  3. Reg*
  4. rega(void)
  5. {
  6. Reg *r;
  7. r = freer;
  8. if(r == R) {
  9. r = alloc(sizeof(*r));
  10. } else
  11. freer = r->link;
  12. *r = zreg;
  13. return r;
  14. }
  15. int
  16. rcmp(const void *a1, const void *a2)
  17. {
  18. Rgn *p1, *p2;
  19. int c1, c2;
  20. p1 = (Rgn*)a1;
  21. p2 = (Rgn*)a2;
  22. c1 = p2->cost;
  23. c2 = p1->cost;
  24. if(c1 -= c2)
  25. return c1;
  26. return p2->varno - p1->varno;
  27. }
  28. void
  29. regopt(Prog *p)
  30. {
  31. Reg *r, *r1, *r2;
  32. Prog *p1;
  33. int i, z;
  34. long initpc, val, npc;
  35. ulong vreg;
  36. Bits bit;
  37. struct
  38. {
  39. long m;
  40. long c;
  41. Reg* p;
  42. } log5[6], *lp;
  43. firstr = R;
  44. lastr = R;
  45. nvar = 0;
  46. regbits = 0;
  47. for(z=0; z<BITS; z++) {
  48. externs.b[z] = 0;
  49. params.b[z] = 0;
  50. consts.b[z] = 0;
  51. addrs.b[z] = 0;
  52. }
  53. /*
  54. * pass 1
  55. * build aux data structure
  56. * allocate pcs
  57. * find use and set of variables
  58. */
  59. val = 5L * 5L * 5L * 5L * 5L;
  60. lp = log5;
  61. for(i=0; i<5; i++) {
  62. lp->m = val;
  63. lp->c = 0;
  64. lp->p = R;
  65. val /= 5L;
  66. lp++;
  67. }
  68. val = 0;
  69. for(; p != P; p = p->link) {
  70. switch(p->as) {
  71. case ADATA:
  72. case AGLOBL:
  73. case ANAME:
  74. continue;
  75. }
  76. r = rega();
  77. if(firstr == R) {
  78. firstr = r;
  79. lastr = r;
  80. } else {
  81. lastr->link = r;
  82. r->p1 = lastr;
  83. lastr->s1 = r;
  84. lastr = r;
  85. }
  86. r->prog = p;
  87. r->pc = val;
  88. val++;
  89. lp = log5;
  90. for(i=0; i<5; i++) {
  91. lp->c--;
  92. if(lp->c <= 0) {
  93. lp->c = lp->m;
  94. if(lp->p != R)
  95. lp->p->log5 = r;
  96. lp->p = r;
  97. (lp+1)->c = 0;
  98. break;
  99. }
  100. lp++;
  101. }
  102. r1 = r->p1;
  103. if(r1 != R)
  104. switch(r1->prog->as) {
  105. case ARET:
  106. case AB:
  107. case ARFE:
  108. r->p1 = R;
  109. r1->s1 = R;
  110. }
  111. /*
  112. * left side always read
  113. */
  114. bit = mkvar(&p->from, p->as==AMOVW);
  115. for(z=0; z<BITS; z++)
  116. r->use1.b[z] |= bit.b[z];
  117. /*
  118. * right side depends on opcode
  119. */
  120. bit = mkvar(&p->to, 0);
  121. if(bany(&bit))
  122. switch(p->as) {
  123. default:
  124. diag(Z, "reg: unknown asop: %A", p->as);
  125. break;
  126. /*
  127. * right side write
  128. */
  129. case ANOP:
  130. case AMOVB:
  131. case AMOVBU:
  132. case AMOVH:
  133. case AMOVHU:
  134. case AMOVW:
  135. case AMOVF:
  136. case AMOVD:
  137. for(z=0; z<BITS; z++)
  138. r->set.b[z] |= bit.b[z];
  139. break;
  140. /*
  141. * funny
  142. */
  143. case ABL:
  144. for(z=0; z<BITS; z++)
  145. addrs.b[z] |= bit.b[z];
  146. break;
  147. }
  148. if(p->as == AMOVM) {
  149. if(p->from.type == D_CONST)
  150. z = p->from.offset;
  151. else
  152. z = p->to.offset;
  153. for(i=0; z; i++) {
  154. if(z&1)
  155. regbits |= RtoB(i);
  156. z >>= 1;
  157. }
  158. }
  159. }
  160. if(firstr == R)
  161. return;
  162. initpc = pc - val;
  163. npc = val;
  164. /*
  165. * pass 2
  166. * turn branch references to pointers
  167. * build back pointers
  168. */
  169. for(r = firstr; r != R; r = r->link) {
  170. p = r->prog;
  171. if(p->to.type == D_BRANCH) {
  172. val = p->to.offset - initpc;
  173. r1 = firstr;
  174. while(r1 != R) {
  175. r2 = r1->log5;
  176. if(r2 != R && val >= r2->pc) {
  177. r1 = r2;
  178. continue;
  179. }
  180. if(r1->pc == val)
  181. break;
  182. r1 = r1->link;
  183. }
  184. if(r1 == R) {
  185. nearln = p->lineno;
  186. diag(Z, "ref not found\n%P", p);
  187. continue;
  188. }
  189. if(r1 == r) {
  190. nearln = p->lineno;
  191. diag(Z, "ref to self\n%P", p);
  192. continue;
  193. }
  194. r->s2 = r1;
  195. r->p2link = r1->p2;
  196. r1->p2 = r;
  197. }
  198. }
  199. if(debug['R']) {
  200. p = firstr->prog;
  201. print("\n%L %D\n", p->lineno, &p->from);
  202. }
  203. /*
  204. * pass 2.5
  205. * find looping structure
  206. */
  207. for(r = firstr; r != R; r = r->link)
  208. r->active = 0;
  209. change = 0;
  210. loopit(firstr, npc);
  211. /*
  212. * pass 3
  213. * iterate propagating usage
  214. * back until flow graph is complete
  215. */
  216. loop1:
  217. change = 0;
  218. for(r = firstr; r != R; r = r->link)
  219. r->active = 0;
  220. for(r = firstr; r != R; r = r->link)
  221. if(r->prog->as == ARET)
  222. prop(r, zbits, zbits);
  223. loop11:
  224. /* pick up unreachable code */
  225. i = 0;
  226. for(r = firstr; r != R; r = r1) {
  227. r1 = r->link;
  228. if(r1 && r1->active && !r->active) {
  229. prop(r, zbits, zbits);
  230. i = 1;
  231. }
  232. }
  233. if(i)
  234. goto loop11;
  235. if(change)
  236. goto loop1;
  237. /*
  238. * pass 4
  239. * iterate propagating register/variable synchrony
  240. * forward until graph is complete
  241. */
  242. loop2:
  243. change = 0;
  244. for(r = firstr; r != R; r = r->link)
  245. r->active = 0;
  246. synch(firstr, zbits);
  247. if(change)
  248. goto loop2;
  249. addsplits();
  250. if(debug['R'] && debug['v']) {
  251. print("\nprop structure:\n");
  252. for(r = firstr; r != R; r = r->link) {
  253. print("%ld:%P", r->loop, r->prog);
  254. for(z=0; z<BITS; z++)
  255. bit.b[z] = r->set.b[z] |
  256. r->refahead.b[z] | r->calahead.b[z] |
  257. r->refbehind.b[z] | r->calbehind.b[z] |
  258. r->use1.b[z] | r->use2.b[z];
  259. if(bany(&bit)) {
  260. print("\t");
  261. if(bany(&r->use1))
  262. print(" u1=%B", r->use1);
  263. if(bany(&r->use2))
  264. print(" u2=%B", r->use2);
  265. if(bany(&r->set))
  266. print(" st=%B", r->set);
  267. if(bany(&r->refahead))
  268. print(" ra=%B", r->refahead);
  269. if(bany(&r->calahead))
  270. print(" ca=%B", r->calahead);
  271. if(bany(&r->refbehind))
  272. print(" rb=%B", r->refbehind);
  273. if(bany(&r->calbehind))
  274. print(" cb=%B", r->calbehind);
  275. }
  276. print("\n");
  277. }
  278. }
  279. /*
  280. * pass 5
  281. * isolate regions
  282. * calculate costs (paint1)
  283. */
  284. r = firstr;
  285. if(r) {
  286. for(z=0; z<BITS; z++)
  287. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  288. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  289. if(bany(&bit)) {
  290. nearln = r->prog->lineno;
  291. warn(Z, "used and not set: %B", bit);
  292. if(debug['R'] && !debug['w'])
  293. print("used and not set: %B\n", bit);
  294. }
  295. }
  296. for(r = firstr; r != R; r = r->link)
  297. r->act = zbits;
  298. rgp = region;
  299. nregion = 0;
  300. for(r = firstr; r != R; r = r->link) {
  301. for(z=0; z<BITS; z++)
  302. bit.b[z] = r->set.b[z] &
  303. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  304. if(bany(&bit)) {
  305. nearln = r->prog->lineno;
  306. warn(Z, "set and not used: %B", bit);
  307. if(debug['R'])
  308. print("set and not used: %B\n", bit);
  309. excise(r);
  310. }
  311. for(z=0; z<BITS; z++)
  312. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  313. while(bany(&bit)) {
  314. i = bnum(bit);
  315. rgp->enter = r;
  316. rgp->varno = i;
  317. change = 0;
  318. if(debug['R'] && debug['v'])
  319. print("\n");
  320. paint1(r, i);
  321. bit.b[i/32] &= ~(1L<<(i%32));
  322. if(change <= 0) {
  323. if(debug['R'])
  324. print("%L $%d: %B\n",
  325. r->prog->lineno, change, blsh(i));
  326. continue;
  327. }
  328. rgp->cost = change;
  329. nregion++;
  330. if(nregion >= NRGN) {
  331. warn(Z, "too many regions");
  332. goto brk;
  333. }
  334. rgp++;
  335. }
  336. }
  337. brk:
  338. qsort(region, nregion, sizeof(region[0]), rcmp);
  339. /*
  340. * pass 6
  341. * determine used registers (paint2)
  342. * replace code (paint3)
  343. */
  344. rgp = region;
  345. for(i=0; i<nregion; i++) {
  346. bit = blsh(rgp->varno);
  347. vreg = paint2(rgp->enter, rgp->varno);
  348. vreg = allreg(vreg, rgp);
  349. if(debug['R']) {
  350. if(rgp->regno >= NREG)
  351. print("%L $%d F%d: %B\n",
  352. rgp->enter->prog->lineno,
  353. rgp->cost,
  354. rgp->regno-NREG,
  355. bit);
  356. else
  357. print("%L $%d R%d: %B\n",
  358. rgp->enter->prog->lineno,
  359. rgp->cost,
  360. rgp->regno,
  361. bit);
  362. }
  363. if(rgp->regno != 0)
  364. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  365. rgp++;
  366. }
  367. /*
  368. * pass 7
  369. * peep-hole on basic block
  370. */
  371. if(!debug['R'] || debug['P'])
  372. peep();
  373. /*
  374. * pass 8
  375. * recalculate pc
  376. */
  377. val = initpc;
  378. for(r = firstr; r != R; r = r1) {
  379. r->pc = val;
  380. p = r->prog;
  381. p1 = P;
  382. r1 = r->link;
  383. if(r1 != R)
  384. p1 = r1->prog;
  385. for(; p != p1; p = p->link) {
  386. switch(p->as) {
  387. default:
  388. val++;
  389. break;
  390. case ANOP:
  391. case ADATA:
  392. case AGLOBL:
  393. case ANAME:
  394. break;
  395. }
  396. }
  397. }
  398. pc = val;
  399. /*
  400. * fix up branches
  401. */
  402. if(debug['R'])
  403. if(bany(&addrs))
  404. print("addrs: %B\n", addrs);
  405. r1 = 0; /* set */
  406. for(r = firstr; r != R; r = r->link) {
  407. p = r->prog;
  408. if(p->to.type == D_BRANCH)
  409. p->to.offset = r->s2->pc;
  410. r1 = r;
  411. }
  412. /*
  413. * last pass
  414. * eliminate nops
  415. * free aux structures
  416. */
  417. for(p = firstr->prog; p != P; p = p->link){
  418. while(p->link && p->link->as == ANOP)
  419. p->link = p->link->link;
  420. }
  421. if(r1 != R) {
  422. r1->link = freer;
  423. freer = firstr;
  424. }
  425. }
  426. void
  427. addsplits(void)
  428. {
  429. Reg *r, *r1;
  430. int z, i;
  431. Bits bit;
  432. for(r = firstr; r != R; r = r->link) {
  433. if(r->loop > 1)
  434. continue;
  435. if(r->prog->as == ABL)
  436. continue;
  437. for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
  438. if(r1->loop <= 1)
  439. continue;
  440. for(z=0; z<BITS; z++)
  441. bit.b[z] = r1->calbehind.b[z] &
  442. (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
  443. ~(r->calahead.b[z] & addrs.b[z]);
  444. while(bany(&bit)) {
  445. i = bnum(bit);
  446. bit.b[i/32] &= ~(1L << (i%32));
  447. }
  448. }
  449. }
  450. }
  451. /*
  452. * add mov b,rn
  453. * just after r
  454. */
  455. void
  456. addmove(Reg *r, int bn, int rn, int f)
  457. {
  458. Prog *p, *p1;
  459. Adr *a;
  460. Var *v;
  461. p1 = alloc(sizeof(*p1));
  462. *p1 = zprog;
  463. p = r->prog;
  464. p1->link = p->link;
  465. p->link = p1;
  466. p1->lineno = p->lineno;
  467. v = var + bn;
  468. a = &p1->to;
  469. a->sym = v->sym;
  470. a->name = v->name;
  471. a->offset = v->offset;
  472. a->etype = v->etype;
  473. a->type = D_OREG;
  474. if(a->etype == TARRAY || a->sym == S)
  475. a->type = D_CONST;
  476. p1->as = AMOVW;
  477. if(v->etype == TCHAR || v->etype == TUCHAR)
  478. p1->as = AMOVB;
  479. if(v->etype == TSHORT || v->etype == TUSHORT)
  480. p1->as = AMOVH;
  481. if(v->etype == TFLOAT)
  482. p1->as = AMOVF;
  483. if(v->etype == TDOUBLE)
  484. p1->as = AMOVD;
  485. p1->from.type = D_REG;
  486. p1->from.reg = rn;
  487. if(rn >= NREG) {
  488. p1->from.type = D_FREG;
  489. p1->from.reg = rn-NREG;
  490. }
  491. if(!f) {
  492. p1->from = *a;
  493. *a = zprog.from;
  494. a->type = D_REG;
  495. a->reg = rn;
  496. if(rn >= NREG) {
  497. a->type = D_FREG;
  498. a->reg = rn-NREG;
  499. }
  500. if(v->etype == TUCHAR)
  501. p1->as = AMOVBU;
  502. if(v->etype == TUSHORT)
  503. p1->as = AMOVHU;
  504. }
  505. if(debug['R'])
  506. print("%P\t.a%P\n", p, p1);
  507. }
  508. Bits
  509. mkvar(Adr *a, int docon)
  510. {
  511. Var *v;
  512. int i, t, n, et, z;
  513. long o;
  514. Bits bit;
  515. Sym *s;
  516. t = a->type;
  517. if(t == D_REG && a->reg != NREG)
  518. regbits |= RtoB(a->reg);
  519. if(t == D_FREG && a->reg != NREG)
  520. regbits |= FtoB(a->reg);
  521. s = a->sym;
  522. o = a->offset;
  523. et = a->etype;
  524. if(s == S) {
  525. if(t != D_CONST || !docon || a->reg != NREG)
  526. goto none;
  527. et = TLONG;
  528. }
  529. if(t == D_CONST) {
  530. if(s == S && sval(o))
  531. goto none;
  532. }
  533. n = a->name;
  534. v = var;
  535. for(i=0; i<nvar; i++) {
  536. if(s == v->sym)
  537. if(n == v->name)
  538. if(o == v->offset)
  539. goto out;
  540. v++;
  541. }
  542. if(s)
  543. if(s->name[0] == '.')
  544. goto none;
  545. if(nvar >= NVAR) {
  546. if(debug['w'] > 1 && s)
  547. warn(Z, "variable not optimized: %s", s->name);
  548. goto none;
  549. }
  550. i = nvar;
  551. nvar++;
  552. v = &var[i];
  553. v->sym = s;
  554. v->offset = o;
  555. v->etype = et;
  556. v->name = n;
  557. if(debug['R'])
  558. print("bit=%2d et=%2d %D\n", i, et, a);
  559. out:
  560. bit = blsh(i);
  561. if(n == D_EXTERN || n == D_STATIC)
  562. for(z=0; z<BITS; z++)
  563. externs.b[z] |= bit.b[z];
  564. if(n == D_PARAM)
  565. for(z=0; z<BITS; z++)
  566. params.b[z] |= bit.b[z];
  567. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  568. for(z=0; z<BITS; z++)
  569. addrs.b[z] |= bit.b[z];
  570. if(t == D_CONST) {
  571. if(s == S) {
  572. for(z=0; z<BITS; z++)
  573. consts.b[z] |= bit.b[z];
  574. return bit;
  575. }
  576. if(et != TARRAY)
  577. for(z=0; z<BITS; z++)
  578. addrs.b[z] |= bit.b[z];
  579. for(z=0; z<BITS; z++)
  580. params.b[z] |= bit.b[z];
  581. return bit;
  582. }
  583. if(t == D_OREG)
  584. return bit;
  585. none:
  586. return zbits;
  587. }
  588. void
  589. prop(Reg *r, Bits ref, Bits cal)
  590. {
  591. Reg *r1, *r2;
  592. int z;
  593. for(r1 = r; r1 != R; r1 = r1->p1) {
  594. for(z=0; z<BITS; z++) {
  595. ref.b[z] |= r1->refahead.b[z];
  596. if(ref.b[z] != r1->refahead.b[z]) {
  597. r1->refahead.b[z] = ref.b[z];
  598. change++;
  599. }
  600. cal.b[z] |= r1->calahead.b[z];
  601. if(cal.b[z] != r1->calahead.b[z]) {
  602. r1->calahead.b[z] = cal.b[z];
  603. change++;
  604. }
  605. }
  606. switch(r1->prog->as) {
  607. case ABL:
  608. for(z=0; z<BITS; z++) {
  609. cal.b[z] |= ref.b[z] | externs.b[z];
  610. ref.b[z] = 0;
  611. }
  612. break;
  613. case ATEXT:
  614. for(z=0; z<BITS; z++) {
  615. cal.b[z] = 0;
  616. ref.b[z] = 0;
  617. }
  618. break;
  619. case ARET:
  620. for(z=0; z<BITS; z++) {
  621. cal.b[z] = externs.b[z];
  622. ref.b[z] = 0;
  623. }
  624. }
  625. for(z=0; z<BITS; z++) {
  626. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  627. r1->use1.b[z] | r1->use2.b[z];
  628. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  629. r1->refbehind.b[z] = ref.b[z];
  630. r1->calbehind.b[z] = cal.b[z];
  631. }
  632. if(r1->active)
  633. break;
  634. r1->active = 1;
  635. }
  636. for(; r != r1; r = r->p1)
  637. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  638. prop(r2, r->refbehind, r->calbehind);
  639. }
  640. /*
  641. * find looping structure
  642. *
  643. * 1) find reverse postordering
  644. * 2) find approximate dominators,
  645. * the actual dominators if the flow graph is reducible
  646. * otherwise, dominators plus some other non-dominators.
  647. * See Matthew S. Hecht and Jeffrey D. Ullman,
  648. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  649. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  650. * Oct. 1-3, 1973, pp. 207-217.
  651. * 3) find all nodes with a predecessor dominated by the current node.
  652. * such a node is a loop head.
  653. * recursively, all preds with a greater rpo number are in the loop
  654. */
  655. long
  656. postorder(Reg *r, Reg **rpo2r, long n)
  657. {
  658. Reg *r1;
  659. r->rpo = 1;
  660. r1 = r->s1;
  661. if(r1 && !r1->rpo)
  662. n = postorder(r1, rpo2r, n);
  663. r1 = r->s2;
  664. if(r1 && !r1->rpo)
  665. n = postorder(r1, rpo2r, n);
  666. rpo2r[n] = r;
  667. n++;
  668. return n;
  669. }
  670. long
  671. rpolca(long *idom, long rpo1, long rpo2)
  672. {
  673. long t;
  674. if(rpo1 == -1)
  675. return rpo2;
  676. while(rpo1 != rpo2){
  677. if(rpo1 > rpo2){
  678. t = rpo2;
  679. rpo2 = rpo1;
  680. rpo1 = t;
  681. }
  682. while(rpo1 < rpo2){
  683. t = idom[rpo2];
  684. if(t >= rpo2)
  685. sysfatal("bad idom");
  686. rpo2 = t;
  687. }
  688. }
  689. return rpo1;
  690. }
  691. int
  692. doms(long *idom, long r, long s)
  693. {
  694. while(s > r)
  695. s = idom[s];
  696. return s == r;
  697. }
  698. int
  699. loophead(long *idom, Reg *r)
  700. {
  701. long src;
  702. src = r->rpo;
  703. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  704. return 1;
  705. for(r = r->p2; r != R; r = r->p2link)
  706. if(doms(idom, src, r->rpo))
  707. return 1;
  708. return 0;
  709. }
  710. void
  711. loopmark(Reg **rpo2r, long head, Reg *r)
  712. {
  713. if(r->rpo < head || r->active == head)
  714. return;
  715. r->active = head;
  716. r->loop += LOOP;
  717. if(r->p1 != R)
  718. loopmark(rpo2r, head, r->p1);
  719. for(r = r->p2; r != R; r = r->p2link)
  720. loopmark(rpo2r, head, r);
  721. }
  722. void
  723. loopit(Reg *r, long nr)
  724. {
  725. Reg *r1;
  726. long i, d, me;
  727. if(nr > maxnr) {
  728. rpo2r = alloc(nr * sizeof(Reg*));
  729. idom = alloc(nr * sizeof(long));
  730. maxnr = nr;
  731. }
  732. d = postorder(r, rpo2r, 0);
  733. if(d > nr)
  734. sysfatal("too many reg nodes");
  735. nr = d;
  736. for(i = 0; i < nr / 2; i++){
  737. r1 = rpo2r[i];
  738. rpo2r[i] = rpo2r[nr - 1 - i];
  739. rpo2r[nr - 1 - i] = r1;
  740. }
  741. for(i = 0; i < nr; i++)
  742. rpo2r[i]->rpo = i;
  743. idom[0] = 0;
  744. for(i = 0; i < nr; i++){
  745. r1 = rpo2r[i];
  746. me = r1->rpo;
  747. d = -1;
  748. if(r1->p1 != R && r1->p1->rpo < me)
  749. d = r1->p1->rpo;
  750. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  751. if(r1->rpo < me)
  752. d = rpolca(idom, d, r1->rpo);
  753. idom[i] = d;
  754. }
  755. for(i = 0; i < nr; i++){
  756. r1 = rpo2r[i];
  757. r1->loop++;
  758. if(r1->p2 != R && loophead(idom, r1))
  759. loopmark(rpo2r, i, r1);
  760. }
  761. }
  762. void
  763. synch(Reg *r, Bits dif)
  764. {
  765. Reg *r1;
  766. int z;
  767. for(r1 = r; r1 != R; r1 = r1->s1) {
  768. for(z=0; z<BITS; z++) {
  769. dif.b[z] = (dif.b[z] &
  770. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  771. r1->set.b[z] | r1->regdiff.b[z];
  772. if(dif.b[z] != r1->regdiff.b[z]) {
  773. r1->regdiff.b[z] = dif.b[z];
  774. change++;
  775. }
  776. }
  777. if(r1->active)
  778. break;
  779. r1->active = 1;
  780. for(z=0; z<BITS; z++)
  781. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  782. if(r1->s2 != R)
  783. synch(r1->s2, dif);
  784. }
  785. }
  786. ulong
  787. allreg(ulong b, Rgn *r)
  788. {
  789. Var *v;
  790. int i;
  791. v = var + r->varno;
  792. r->regno = 0;
  793. switch(v->etype) {
  794. default:
  795. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  796. break;
  797. case TCHAR:
  798. case TUCHAR:
  799. case TSHORT:
  800. case TUSHORT:
  801. case TINT:
  802. case TUINT:
  803. case TLONG:
  804. case TULONG:
  805. case TIND:
  806. case TARRAY:
  807. i = BtoR(~b);
  808. if(i && r->cost >= 0) {
  809. r->regno = i;
  810. return RtoB(i);
  811. }
  812. break;
  813. case TVLONG:
  814. case TDOUBLE:
  815. case TFLOAT:
  816. i = BtoF(~b);
  817. if(i && r->cost >= 0) {
  818. r->regno = i+NREG;
  819. return FtoB(i);
  820. }
  821. break;
  822. }
  823. return 0;
  824. }
  825. void
  826. paint1(Reg *r, int bn)
  827. {
  828. Reg *r1;
  829. Prog *p;
  830. int z;
  831. ulong bb;
  832. z = bn/32;
  833. bb = 1L<<(bn%32);
  834. if(r->act.b[z] & bb)
  835. return;
  836. for(;;) {
  837. if(!(r->refbehind.b[z] & bb))
  838. break;
  839. r1 = r->p1;
  840. if(r1 == R)
  841. break;
  842. if(!(r1->refahead.b[z] & bb))
  843. break;
  844. if(r1->act.b[z] & bb)
  845. break;
  846. r = r1;
  847. }
  848. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  849. change -= CLOAD * r->loop;
  850. if(debug['R'] && debug['v'])
  851. print("%ld%P\tld %B $%d\n", r->loop,
  852. r->prog, blsh(bn), change);
  853. }
  854. for(;;) {
  855. r->act.b[z] |= bb;
  856. p = r->prog;
  857. if(r->use1.b[z] & bb) {
  858. change += CREF * r->loop;
  859. if(debug['R'] && debug['v'])
  860. print("%ld%P\tu1 %B $%d\n", r->loop,
  861. p, blsh(bn), change);
  862. }
  863. if((r->use2.b[z]|r->set.b[z]) & bb) {
  864. change += CREF * r->loop;
  865. if(debug['R'] && debug['v'])
  866. print("%ld%P\tu2 %B $%d\n", r->loop,
  867. p, blsh(bn), change);
  868. }
  869. if(STORE(r) & r->regdiff.b[z] & bb) {
  870. change -= CLOAD * r->loop;
  871. if(debug['R'] && debug['v'])
  872. print("%ld%P\tst %B $%d\n", r->loop,
  873. p, blsh(bn), change);
  874. }
  875. if(r->refbehind.b[z] & bb)
  876. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  877. if(r1->refahead.b[z] & bb)
  878. paint1(r1, bn);
  879. if(!(r->refahead.b[z] & bb))
  880. break;
  881. r1 = r->s2;
  882. if(r1 != R)
  883. if(r1->refbehind.b[z] & bb)
  884. paint1(r1, bn);
  885. r = r->s1;
  886. if(r == R)
  887. break;
  888. if(r->act.b[z] & bb)
  889. break;
  890. if(!(r->refbehind.b[z] & bb))
  891. break;
  892. }
  893. }
  894. ulong
  895. paint2(Reg *r, int bn)
  896. {
  897. Reg *r1;
  898. int z;
  899. ulong bb, vreg;
  900. z = bn/32;
  901. bb = 1L << (bn%32);
  902. vreg = regbits;
  903. if(!(r->act.b[z] & bb))
  904. return vreg;
  905. for(;;) {
  906. if(!(r->refbehind.b[z] & bb))
  907. break;
  908. r1 = r->p1;
  909. if(r1 == R)
  910. break;
  911. if(!(r1->refahead.b[z] & bb))
  912. break;
  913. if(!(r1->act.b[z] & bb))
  914. break;
  915. r = r1;
  916. }
  917. for(;;) {
  918. r->act.b[z] &= ~bb;
  919. vreg |= r->regu;
  920. if(r->refbehind.b[z] & bb)
  921. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  922. if(r1->refahead.b[z] & bb)
  923. vreg |= paint2(r1, bn);
  924. if(!(r->refahead.b[z] & bb))
  925. break;
  926. r1 = r->s2;
  927. if(r1 != R)
  928. if(r1->refbehind.b[z] & bb)
  929. vreg |= paint2(r1, bn);
  930. r = r->s1;
  931. if(r == R)
  932. break;
  933. if(!(r->act.b[z] & bb))
  934. break;
  935. if(!(r->refbehind.b[z] & bb))
  936. break;
  937. }
  938. return vreg;
  939. }
  940. void
  941. paint3(Reg *r, int bn, long rb, int rn)
  942. {
  943. Reg *r1;
  944. Prog *p;
  945. int z;
  946. ulong bb;
  947. z = bn/32;
  948. bb = 1L << (bn%32);
  949. if(r->act.b[z] & bb)
  950. return;
  951. for(;;) {
  952. if(!(r->refbehind.b[z] & bb))
  953. break;
  954. r1 = r->p1;
  955. if(r1 == R)
  956. break;
  957. if(!(r1->refahead.b[z] & bb))
  958. break;
  959. if(r1->act.b[z] & bb)
  960. break;
  961. r = r1;
  962. }
  963. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  964. addmove(r, bn, rn, 0);
  965. for(;;) {
  966. r->act.b[z] |= bb;
  967. p = r->prog;
  968. if(r->use1.b[z] & bb) {
  969. if(debug['R'])
  970. print("%P", p);
  971. addreg(&p->from, rn);
  972. if(debug['R'])
  973. print("\t.c%P\n", p);
  974. }
  975. if((r->use2.b[z]|r->set.b[z]) & bb) {
  976. if(debug['R'])
  977. print("%P", p);
  978. addreg(&p->to, rn);
  979. if(debug['R'])
  980. print("\t.c%P\n", p);
  981. }
  982. if(STORE(r) & r->regdiff.b[z] & bb)
  983. addmove(r, bn, rn, 1);
  984. r->regu |= rb;
  985. if(r->refbehind.b[z] & bb)
  986. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  987. if(r1->refahead.b[z] & bb)
  988. paint3(r1, bn, rb, rn);
  989. if(!(r->refahead.b[z] & bb))
  990. break;
  991. r1 = r->s2;
  992. if(r1 != R)
  993. if(r1->refbehind.b[z] & bb)
  994. paint3(r1, bn, rb, rn);
  995. r = r->s1;
  996. if(r == R)
  997. break;
  998. if(r->act.b[z] & bb)
  999. break;
  1000. if(!(r->refbehind.b[z] & bb))
  1001. break;
  1002. }
  1003. }
  1004. void
  1005. addreg(Adr *a, int rn)
  1006. {
  1007. a->sym = 0;
  1008. a->name = D_NONE;
  1009. a->type = D_REG;
  1010. a->reg = rn;
  1011. if(rn >= NREG) {
  1012. a->type = D_FREG;
  1013. a->reg = rn-NREG;
  1014. }
  1015. }
  1016. /*
  1017. * bit reg
  1018. * 0 R0
  1019. * 1 R1
  1020. * ... ...
  1021. * 10 R10
  1022. */
  1023. long
  1024. RtoB(int r)
  1025. {
  1026. if(r < 2 || r >= REGTMP)
  1027. return 0;
  1028. return 1L << r;
  1029. }
  1030. int
  1031. BtoR(long b)
  1032. {
  1033. b &= 0x07fcL;
  1034. if(b == 0)
  1035. return 0;
  1036. return bitno(b);
  1037. }
  1038. /*
  1039. * bit reg
  1040. * 18 F2
  1041. * 19 F3
  1042. * ... ...
  1043. * 23 F7
  1044. */
  1045. long
  1046. FtoB(int f)
  1047. {
  1048. if(f < 2 || f > NFREG-1)
  1049. return 0;
  1050. return 1L << (f + 16);
  1051. }
  1052. int
  1053. BtoF(long b)
  1054. {
  1055. b &= 0xfc0000L;
  1056. if(b == 0)
  1057. return 0;
  1058. return bitno(b) - 16;
  1059. }