reg.c 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "gc.h"
  10. Reg*
  11. rega(void)
  12. {
  13. Reg *r;
  14. r = freer;
  15. if(r == R) {
  16. r = alloc(sizeof(*r));
  17. } else
  18. freer = r->link;
  19. *r = zreg;
  20. return r;
  21. }
  22. int
  23. rcmp(const void *a1, const void *a2)
  24. {
  25. Rgn *p1, *p2;
  26. int c1, c2;
  27. p1 = (Rgn*)a1;
  28. p2 = (Rgn*)a2;
  29. c1 = p2->cost;
  30. c2 = p1->cost;
  31. if(c1 -= c2)
  32. return c1;
  33. return p2->varno - p1->varno;
  34. }
  35. void
  36. regopt(Prog *p)
  37. {
  38. Reg *r, *r1, *r2;
  39. Prog *p1;
  40. int i, z;
  41. int32_t initpc, val, npc;
  42. uint32_t vreg;
  43. Bits bit;
  44. struct
  45. {
  46. int32_t m;
  47. int32_t c;
  48. Reg* p;
  49. } log5[6], *lp;
  50. firstr = R;
  51. lastr = R;
  52. nvar = 0;
  53. regbits = 0;
  54. for(z=0; z<BITS; z++) {
  55. externs.b[z] = 0;
  56. params.b[z] = 0;
  57. consts.b[z] = 0;
  58. addrs.b[z] = 0;
  59. }
  60. /*
  61. * pass 1
  62. * build aux data structure
  63. * allocate pcs
  64. * find use and set of variables
  65. */
  66. val = 5L * 5L * 5L * 5L * 5L;
  67. lp = log5;
  68. for(i=0; i<5; i++) {
  69. lp->m = val;
  70. lp->c = 0;
  71. lp->p = R;
  72. val /= 5L;
  73. lp++;
  74. }
  75. val = 0;
  76. for(; p != P; p = p->link) {
  77. switch(p->as) {
  78. case ADATA:
  79. case AGLOBL:
  80. case ANAME:
  81. case ASIGNAME:
  82. continue;
  83. }
  84. r = rega();
  85. if(firstr == R) {
  86. firstr = r;
  87. lastr = r;
  88. } else {
  89. lastr->link = r;
  90. r->p1 = lastr;
  91. lastr->s1 = r;
  92. lastr = r;
  93. }
  94. r->prog = p;
  95. r->pc = val;
  96. val++;
  97. lp = log5;
  98. for(i=0; i<5; i++) {
  99. lp->c--;
  100. if(lp->c <= 0) {
  101. lp->c = lp->m;
  102. if(lp->p != R)
  103. lp->p->log5 = r;
  104. lp->p = r;
  105. (lp+1)->c = 0;
  106. break;
  107. }
  108. lp++;
  109. }
  110. r1 = r->p1;
  111. if(r1 != R)
  112. switch(r1->prog->as) {
  113. case ARETURN:
  114. case AJMP:
  115. case ARETT:
  116. r->p1 = R;
  117. r1->s1 = R;
  118. }
  119. /*
  120. * left side always read
  121. */
  122. bit = mkvar(&p->from, p->as==AMOVW);
  123. for(z=0; z<BITS; z++)
  124. r->use1.b[z] |= bit.b[z];
  125. /*
  126. * right side depends on opcode
  127. */
  128. bit = mkvar(&p->to, 0);
  129. if(bany(&bit))
  130. switch(p->as) {
  131. default:
  132. diag(Z, "reg: unknown asop: %A", p->as);
  133. break;
  134. /*
  135. * right side write
  136. */
  137. case ANOP:
  138. case AMOVB:
  139. case AMOVBU:
  140. case AMOVH:
  141. case AMOVHU:
  142. case AMOVW:
  143. case AFMOVF:
  144. case AFMOVD:
  145. for(z=0; z<BITS; z++)
  146. r->set.b[z] |= bit.b[z];
  147. break;
  148. /*
  149. * funny
  150. */
  151. case AJMPL:
  152. for(z=0; z<BITS; z++)
  153. addrs.b[z] |= bit.b[z];
  154. break;
  155. }
  156. }
  157. if(firstr == R)
  158. return;
  159. initpc = pc - val;
  160. npc = val;
  161. /*
  162. * pass 2
  163. * turn branch references to pointers
  164. * build back pointers
  165. */
  166. for(r = firstr; r != R; r = r->link) {
  167. p = r->prog;
  168. if(p->to.type == D_BRANCH) {
  169. val = p->to.offset - initpc;
  170. r1 = firstr;
  171. while(r1 != R) {
  172. r2 = r1->log5;
  173. if(r2 != R && val >= r2->pc) {
  174. r1 = r2;
  175. continue;
  176. }
  177. if(r1->pc == val)
  178. break;
  179. r1 = r1->link;
  180. }
  181. if(r1 == R) {
  182. nearln = p->lineno;
  183. diag(Z, "ref not found\n%P", p);
  184. continue;
  185. }
  186. if(r1 == r) {
  187. nearln = p->lineno;
  188. diag(Z, "ref to self\n%P", p);
  189. continue;
  190. }
  191. r->s2 = r1;
  192. r->p2link = r1->p2;
  193. r1->p2 = r;
  194. }
  195. }
  196. if(debug['R']) {
  197. p = firstr->prog;
  198. print("\n%L %D\n", p->lineno, &p->from);
  199. }
  200. /*
  201. * pass 2.5
  202. * find looping structure
  203. */
  204. for(r = firstr; r != R; r = r->link)
  205. r->active = 0;
  206. change = 0;
  207. loopit(firstr, npc);
  208. if(debug['R'] && debug['v']) {
  209. print("\nlooping structure:\n");
  210. for(r = firstr; r != R; r = r->link) {
  211. print("%ld:%P", r->loop, r->prog);
  212. for(z=0; z<BITS; z++)
  213. bit.b[z] = r->use1.b[z] |
  214. r->use2.b[z] | r->set.b[z];
  215. if(bany(&bit)) {
  216. print("\t");
  217. if(bany(&r->use1))
  218. print(" u1=%B", r->use1);
  219. if(bany(&r->use2))
  220. print(" u2=%B", r->use2);
  221. if(bany(&r->set))
  222. print(" st=%B", r->set);
  223. }
  224. print("\n");
  225. }
  226. }
  227. /*
  228. * pass 3
  229. * iterate propagating usage
  230. * back until flow graph is complete
  231. */
  232. loop1:
  233. change = 0;
  234. for(r = firstr; r != R; r = r->link)
  235. r->active = 0;
  236. for(r = firstr; r != R; r = r->link)
  237. if(r->prog->as == ARETURN)
  238. prop(r, zbits, zbits);
  239. loop11:
  240. /* pick up unreachable code */
  241. i = 0;
  242. for(r = firstr; r != R; r = r1) {
  243. r1 = r->link;
  244. if(r1 && r1->active && !r->active) {
  245. prop(r, zbits, zbits);
  246. i = 1;
  247. }
  248. }
  249. if(i)
  250. goto loop11;
  251. if(change)
  252. goto loop1;
  253. /*
  254. * pass 4
  255. * iterate propagating register/variable synchrony
  256. * forward until graph is complete
  257. */
  258. loop2:
  259. change = 0;
  260. for(r = firstr; r != R; r = r->link)
  261. r->active = 0;
  262. synch(firstr, zbits);
  263. if(change)
  264. goto loop2;
  265. /*
  266. * pass 5
  267. * isolate regions
  268. * calculate costs (paint1)
  269. */
  270. r = firstr;
  271. if(r) {
  272. for(z=0; z<BITS; z++)
  273. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  274. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  275. if(bany(&bit)) {
  276. nearln = r->prog->lineno;
  277. warn(Z, "used and not set: %B", bit);
  278. if(debug['R'] && !debug['w'])
  279. print("used and not set: %B\n", bit);
  280. }
  281. }
  282. if(debug['R'] && debug['v'])
  283. print("\nprop structure:\n");
  284. for(r = firstr; r != R; r = r->link)
  285. r->act = zbits;
  286. rgp = region;
  287. nregion = 0;
  288. for(r = firstr; r != R; r = r->link) {
  289. if(debug['R'] && debug['v'])
  290. print("%P\n set = %B; rah = %B; cal = %B\n",
  291. r->prog, r->set, r->refahead, r->calahead);
  292. for(z=0; z<BITS; z++)
  293. bit.b[z] = r->set.b[z] &
  294. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  295. if(bany(&bit)) {
  296. nearln = r->prog->lineno;
  297. warn(Z, "set and not used: %B", bit);
  298. if(debug['R'])
  299. print("set an not used: %B\n", bit);
  300. excise(r);
  301. }
  302. for(z=0; z<BITS; z++)
  303. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  304. while(bany(&bit)) {
  305. i = bnum(bit);
  306. rgp->enter = r;
  307. rgp->varno = i;
  308. change = 0;
  309. if(debug['R'] && debug['v'])
  310. print("\n");
  311. paint1(r, i);
  312. bit.b[i/32] &= ~(1L<<(i%32));
  313. if(change <= 0) {
  314. if(debug['R'])
  315. print("%L$%d: %B\n",
  316. r->prog->lineno, change, blsh(i));
  317. continue;
  318. }
  319. rgp->cost = change;
  320. nregion++;
  321. if(nregion >= NRGN) {
  322. warn(Z, "too many regions");
  323. goto brk;
  324. }
  325. rgp++;
  326. }
  327. }
  328. brk:
  329. qsort(region, nregion, sizeof(region[0]), rcmp);
  330. /*
  331. * pass 6
  332. * determine used registers (paint2)
  333. * replace code (paint3)
  334. */
  335. rgp = region;
  336. for(i=0; i<nregion; i++) {
  337. bit = blsh(rgp->varno);
  338. vreg = paint2(rgp->enter, rgp->varno);
  339. vreg = allreg(vreg, rgp);
  340. if(debug['R']) {
  341. if(rgp->regno >= NREG)
  342. print("%L$%d F%d: %B\n",
  343. rgp->enter->prog->lineno,
  344. rgp->cost,
  345. rgp->regno-NREG,
  346. bit);
  347. else
  348. print("%L$%d R%d: %B\n",
  349. rgp->enter->prog->lineno,
  350. rgp->cost,
  351. rgp->regno,
  352. bit);
  353. }
  354. if(rgp->regno != 0)
  355. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  356. rgp++;
  357. }
  358. /*
  359. * pass 7
  360. * peep-hole on basic block
  361. */
  362. if(!debug['R'] || debug['P'])
  363. peep();
  364. /*
  365. * pass 8
  366. * recalculate pc
  367. */
  368. val = initpc;
  369. for(r = firstr; r != R; r = r1) {
  370. r->pc = val;
  371. p = r->prog;
  372. p1 = P;
  373. r1 = r->link;
  374. if(r1 != R)
  375. p1 = r1->prog;
  376. for(; p != p1; p = p->link) {
  377. switch(p->as) {
  378. default:
  379. val++;
  380. break;
  381. case ANOP:
  382. case ADATA:
  383. case AGLOBL:
  384. case ANAME:
  385. case ASIGNAME:
  386. break;
  387. }
  388. }
  389. }
  390. pc = val;
  391. /*
  392. * fix up branches
  393. */
  394. if(debug['R'])
  395. if(bany(&addrs))
  396. print("addrs: %B\n", addrs);
  397. r1 = 0; /* set */
  398. for(r = firstr; r != R; r = r->link) {
  399. p = r->prog;
  400. if(p->to.type == D_BRANCH)
  401. p->to.offset = r->s2->pc;
  402. r1 = r;
  403. }
  404. /*
  405. * last pass
  406. * eliminate nops
  407. * free aux structures
  408. */
  409. for(p = firstr->prog; p != P; p = p->link){
  410. while(p->link && p->link->as == ANOP)
  411. p->link = p->link->link;
  412. }
  413. if(r1 != R) {
  414. r1->link = freer;
  415. freer = firstr;
  416. }
  417. }
  418. /*
  419. * add mov b,rn
  420. * just after r
  421. */
  422. void
  423. addmove(Reg *r, int bn, int rn, int f)
  424. {
  425. Prog *p, *p1;
  426. Adr *a;
  427. Var *v;
  428. p1 = alloc(sizeof(*p1));
  429. *p1 = zprog;
  430. p = r->prog;
  431. p1->link = p->link;
  432. p->link = p1;
  433. p1->lineno = p->lineno;
  434. v = var + bn;
  435. a = &p1->to;
  436. a->sym = v->sym;
  437. a->name = v->name;
  438. a->offset = v->offset;
  439. a->etype = v->etype;
  440. a->type = D_OREG;
  441. if(a->etype == TARRAY || a->sym == S)
  442. a->type = D_CONST;
  443. p1->as = AMOVW;
  444. if(v->etype == TCHAR || v->etype == TUCHAR)
  445. p1->as = AMOVB;
  446. if(v->etype == TSHORT || v->etype == TUSHORT)
  447. p1->as = AMOVH;
  448. if(v->etype == TFLOAT)
  449. p1->as = AFMOVF;
  450. if(v->etype == TDOUBLE)
  451. p1->as = AFMOVD;
  452. p1->from.type = D_REG;
  453. p1->from.reg = rn;
  454. if(rn >= NREG) {
  455. p1->from.type = D_FREG;
  456. p1->from.reg = rn-NREG;
  457. }
  458. if(!f) {
  459. p1->from = *a;
  460. *a = zprog.from;
  461. a->type = D_REG;
  462. a->reg = rn;
  463. if(rn >= NREG) {
  464. a->type = D_FREG;
  465. a->reg = rn-NREG;
  466. }
  467. if(v->etype == TUCHAR)
  468. p1->as = AMOVBU;
  469. if(v->etype == TUSHORT)
  470. p1->as = AMOVHU;
  471. }
  472. if(debug['R'])
  473. print("%P\t.a%P\n", p, p1);
  474. }
  475. Bits
  476. mkvar(Adr *a, int docon)
  477. {
  478. Var *v;
  479. int i, t, n, et, z;
  480. int32_t o;
  481. Bits bit;
  482. Sym *s;
  483. t = a->type;
  484. if(t == D_REG && a->reg != NREG)
  485. regbits |= RtoB(a->reg);
  486. if(t == D_FREG && a->reg != NREG)
  487. regbits |= FtoB(a->reg);
  488. s = a->sym;
  489. o = a->offset;
  490. et = a->etype;
  491. if(s == S) {
  492. if(t != D_CONST || !docon || a->reg != NREG)
  493. goto none;
  494. et = TLONG;
  495. }
  496. if(t == D_CONST) {
  497. if(s == S && sval(o))
  498. goto none;
  499. }
  500. n = a->name;
  501. v = var;
  502. for(i=0; i<nvar; i++) {
  503. if(s == v->sym)
  504. if(n == v->name)
  505. if(o == v->offset)
  506. goto out;
  507. v++;
  508. }
  509. if(s)
  510. if(s->name[0] == '.')
  511. goto none;
  512. if(nvar >= NVAR) {
  513. if(debug['w'] > 1 && s)
  514. warn(Z, "variable not optimized: %s", s->name);
  515. goto none;
  516. }
  517. i = nvar;
  518. nvar++;
  519. v = &var[i];
  520. v->sym = s;
  521. v->offset = o;
  522. v->etype = et;
  523. v->name = n;
  524. if(debug['R'])
  525. print("bit=%2d et=%2d %D\n", i, et, a);
  526. out:
  527. bit = blsh(i);
  528. if(n == D_EXTERN || n == D_STATIC)
  529. for(z=0; z<BITS; z++)
  530. externs.b[z] |= bit.b[z];
  531. if(n == D_PARAM)
  532. for(z=0; z<BITS; z++)
  533. params.b[z] |= bit.b[z];
  534. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  535. for(z=0; z<BITS; z++)
  536. addrs.b[z] |= bit.b[z];
  537. if(t == D_CONST) {
  538. if(s == S) {
  539. for(z=0; z<BITS; z++)
  540. consts.b[z] |= bit.b[z];
  541. return bit;
  542. }
  543. if(et != TARRAY)
  544. for(z=0; z<BITS; z++)
  545. addrs.b[z] |= bit.b[z];
  546. for(z=0; z<BITS; z++)
  547. params.b[z] |= bit.b[z];
  548. return bit;
  549. }
  550. if(t == D_OREG)
  551. return bit;
  552. none:
  553. return zbits;
  554. }
  555. void
  556. prop(Reg *r, Bits ref, Bits cal)
  557. {
  558. Reg *r1, *r2;
  559. int z;
  560. for(r1 = r; r1 != R; r1 = r1->p1) {
  561. for(z=0; z<BITS; z++) {
  562. ref.b[z] |= r1->refahead.b[z];
  563. if(ref.b[z] != r1->refahead.b[z]) {
  564. r1->refahead.b[z] = ref.b[z];
  565. change++;
  566. }
  567. cal.b[z] |= r1->calahead.b[z];
  568. if(cal.b[z] != r1->calahead.b[z]) {
  569. r1->calahead.b[z] = cal.b[z];
  570. change++;
  571. }
  572. }
  573. switch(r1->prog->as) {
  574. case AJMPL:
  575. for(z=0; z<BITS; z++) {
  576. cal.b[z] |= ref.b[z] | externs.b[z];
  577. ref.b[z] = 0;
  578. }
  579. break;
  580. case ATEXT:
  581. for(z=0; z<BITS; z++) {
  582. cal.b[z] = 0;
  583. ref.b[z] = 0;
  584. }
  585. break;
  586. case ARETURN:
  587. for(z=0; z<BITS; z++) {
  588. cal.b[z] = externs.b[z];
  589. ref.b[z] = 0;
  590. }
  591. }
  592. for(z=0; z<BITS; z++) {
  593. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  594. r1->use1.b[z] | r1->use2.b[z];
  595. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  596. r1->refbehind.b[z] = ref.b[z];
  597. r1->calbehind.b[z] = cal.b[z];
  598. }
  599. if(r1->active)
  600. break;
  601. r1->active = 1;
  602. }
  603. for(; r != r1; r = r->p1)
  604. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  605. prop(r2, r->refbehind, r->calbehind);
  606. }
  607. /*
  608. * find looping structure
  609. *
  610. * 1) find reverse postordering
  611. * 2) find approximate dominators,
  612. * the actual dominators if the flow graph is reducible
  613. * otherwise, dominators plus some other non-dominators.
  614. * See Matthew S. Hecht and Jeffrey D. Ullman,
  615. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  616. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  617. * Oct. 1-3, 1973, pp. 207-217.
  618. * 3) find all nodes with a predecessor dominated by the current node.
  619. * such a node is a loop head.
  620. * recursively, all preds with a greater rpo number are in the loop
  621. */
  622. int32_t
  623. postorder(Reg *r, Reg **rpo2r, int32_t n)
  624. {
  625. Reg *r1;
  626. r->rpo = 1;
  627. r1 = r->s1;
  628. if(r1 && !r1->rpo)
  629. n = postorder(r1, rpo2r, n);
  630. r1 = r->s2;
  631. if(r1 && !r1->rpo)
  632. n = postorder(r1, rpo2r, n);
  633. rpo2r[n] = r;
  634. n++;
  635. return n;
  636. }
  637. int32_t
  638. rpolca(int32_t *idom, int32_t rpo1, int32_t rpo2)
  639. {
  640. int32_t t;
  641. if(rpo1 == -1)
  642. return rpo2;
  643. while(rpo1 != rpo2){
  644. if(rpo1 > rpo2){
  645. t = rpo2;
  646. rpo2 = rpo1;
  647. rpo1 = t;
  648. }
  649. while(rpo1 < rpo2){
  650. t = idom[rpo2];
  651. if(t >= rpo2)
  652. fatal(Z, "bad idom");
  653. rpo2 = t;
  654. }
  655. }
  656. return rpo1;
  657. }
  658. int
  659. doms(int32_t *idom, int32_t r, int32_t s)
  660. {
  661. while(s > r)
  662. s = idom[s];
  663. return s == r;
  664. }
  665. int
  666. loophead(int32_t *idom, Reg *r)
  667. {
  668. int32_t src;
  669. src = r->rpo;
  670. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  671. return 1;
  672. for(r = r->p2; r != R; r = r->p2link)
  673. if(doms(idom, src, r->rpo))
  674. return 1;
  675. return 0;
  676. }
  677. void
  678. loopmark(Reg **rpo2r, int32_t head, Reg *r)
  679. {
  680. if(r->rpo < head || r->active == head)
  681. return;
  682. r->active = head;
  683. r->loop += LOOP;
  684. if(r->p1 != R)
  685. loopmark(rpo2r, head, r->p1);
  686. for(r = r->p2; r != R; r = r->p2link)
  687. loopmark(rpo2r, head, r);
  688. }
  689. void
  690. loopit(Reg *r, int32_t nr)
  691. {
  692. Reg *r1;
  693. int32_t i, d, me;
  694. if(nr > maxnr) {
  695. rpo2r = alloc(nr * sizeof(Reg*));
  696. idom = alloc(nr * sizeof(int32_t));
  697. maxnr = nr;
  698. }
  699. d = postorder(r, rpo2r, 0);
  700. if(d > nr)
  701. fatal(Z, "too many reg nodes");
  702. nr = d;
  703. for(i = 0; i < nr / 2; i++){
  704. r1 = rpo2r[i];
  705. rpo2r[i] = rpo2r[nr - 1 - i];
  706. rpo2r[nr - 1 - i] = r1;
  707. }
  708. for(i = 0; i < nr; i++)
  709. rpo2r[i]->rpo = i;
  710. idom[0] = 0;
  711. for(i = 0; i < nr; i++){
  712. r1 = rpo2r[i];
  713. me = r1->rpo;
  714. d = -1;
  715. if(r1->p1 != R && r1->p1->rpo < me)
  716. d = r1->p1->rpo;
  717. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  718. if(r1->rpo < me)
  719. d = rpolca(idom, d, r1->rpo);
  720. idom[i] = d;
  721. }
  722. for(i = 0; i < nr; i++){
  723. r1 = rpo2r[i];
  724. r1->loop++;
  725. if(r1->p2 != R && loophead(idom, r1))
  726. loopmark(rpo2r, i, r1);
  727. }
  728. }
  729. void
  730. synch(Reg *r, Bits dif)
  731. {
  732. Reg *r1;
  733. int z;
  734. for(r1 = r; r1 != R; r1 = r1->s1) {
  735. for(z=0; z<BITS; z++) {
  736. dif.b[z] = (dif.b[z] &
  737. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  738. r1->set.b[z] | r1->regdiff.b[z];
  739. if(dif.b[z] != r1->regdiff.b[z]) {
  740. r1->regdiff.b[z] = dif.b[z];
  741. change++;
  742. }
  743. }
  744. if(r1->active)
  745. break;
  746. r1->active = 1;
  747. for(z=0; z<BITS; z++)
  748. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  749. if(r1->s2 != R)
  750. synch(r1->s2, dif);
  751. }
  752. }
  753. uint32_t
  754. allreg(uint32_t b, Rgn *r)
  755. {
  756. Var *v;
  757. int i;
  758. v = var + r->varno;
  759. r->regno = 0;
  760. switch(v->etype) {
  761. default:
  762. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  763. break;
  764. case TCHAR:
  765. case TUCHAR:
  766. case TSHORT:
  767. case TUSHORT:
  768. case TINT:
  769. case TUINT:
  770. case TLONG:
  771. case TULONG:
  772. case TIND:
  773. case TARRAY:
  774. i = BtoR(~b);
  775. if(i && r->cost > 0) {
  776. r->regno = i;
  777. return RtoB(i);
  778. }
  779. break;
  780. case TDOUBLE:
  781. case TFLOAT:
  782. i = BtoF(~b);
  783. if(i && r->cost > 0) {
  784. r->regno = i+NREG;
  785. return FtoB(i);
  786. }
  787. break;
  788. }
  789. return 0;
  790. }
  791. void
  792. paint1(Reg *r, int bn)
  793. {
  794. Reg *r1;
  795. Prog *p;
  796. int z;
  797. uint32_t bb;
  798. z = bn/32;
  799. bb = 1L<<(bn%32);
  800. if(r->act.b[z] & bb)
  801. return;
  802. for(;;) {
  803. if(!(r->refbehind.b[z] & bb))
  804. break;
  805. r1 = r->p1;
  806. if(r1 == R)
  807. break;
  808. if(!(r1->refahead.b[z] & bb))
  809. break;
  810. if(r1->act.b[z] & bb)
  811. break;
  812. r = r1;
  813. }
  814. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  815. change -= CLOAD * r->loop;
  816. if(debug['R'] && debug['v'])
  817. print("%ld%P\tld %B $%d\n", r->loop,
  818. r->prog, blsh(bn), change);
  819. }
  820. for(;;) {
  821. r->act.b[z] |= bb;
  822. p = r->prog;
  823. if(r->use1.b[z] & bb) {
  824. change += CREF * r->loop;
  825. if(p->to.type == D_FREG && p->as == AMOVW)
  826. change = -CINF; /* cant go Rreg to Freg */
  827. if(debug['R'] && debug['v'])
  828. print("%ld%P\tu1 %B $%d\n", r->loop,
  829. p, blsh(bn), change);
  830. }
  831. if((r->use2.b[z]|r->set.b[z]) & bb) {
  832. change += CREF * r->loop;
  833. if(p->from.type == D_FREG && p->as == AMOVW)
  834. change = -CINF; /* cant go Rreg to Freg */
  835. if(debug['R'] && debug['v'])
  836. print("%ld%P\tu2 %B $%d\n", r->loop,
  837. p, blsh(bn), change);
  838. }
  839. if(STORE(r) & r->regdiff.b[z] & bb) {
  840. change -= CLOAD * r->loop;
  841. if(debug['R'] && debug['v'])
  842. print("%ld%P\tst %B $%d\n", r->loop,
  843. p, blsh(bn), change);
  844. }
  845. if(r->refbehind.b[z] & bb)
  846. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  847. if(r1->refahead.b[z] & bb)
  848. paint1(r1, bn);
  849. if(!(r->refahead.b[z] & bb))
  850. break;
  851. r1 = r->s2;
  852. if(r1 != R)
  853. if(r1->refbehind.b[z] & bb)
  854. paint1(r1, bn);
  855. r = r->s1;
  856. if(r == R)
  857. break;
  858. if(r->act.b[z] & bb)
  859. break;
  860. if(!(r->refbehind.b[z] & bb))
  861. break;
  862. }
  863. }
  864. uint32_t
  865. paint2(Reg *r, int bn)
  866. {
  867. Reg *r1;
  868. int z;
  869. uint32_t bb, vreg;
  870. z = bn/32;
  871. bb = 1L << (bn%32);
  872. vreg = regbits;
  873. if(!(r->act.b[z] & bb))
  874. return vreg;
  875. for(;;) {
  876. if(!(r->refbehind.b[z] & bb))
  877. break;
  878. r1 = r->p1;
  879. if(r1 == R)
  880. break;
  881. if(!(r1->refahead.b[z] & bb))
  882. break;
  883. if(!(r1->act.b[z] & bb))
  884. break;
  885. r = r1;
  886. }
  887. for(;;) {
  888. r->act.b[z] &= ~bb;
  889. vreg |= r->regu;
  890. if(r->refbehind.b[z] & bb)
  891. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  892. if(r1->refahead.b[z] & bb)
  893. vreg |= paint2(r1, bn);
  894. if(!(r->refahead.b[z] & bb))
  895. break;
  896. r1 = r->s2;
  897. if(r1 != R)
  898. if(r1->refbehind.b[z] & bb)
  899. vreg |= paint2(r1, bn);
  900. r = r->s1;
  901. if(r == R)
  902. break;
  903. if(!(r->act.b[z] & bb))
  904. break;
  905. if(!(r->refbehind.b[z] & bb))
  906. break;
  907. }
  908. return vreg;
  909. }
  910. void
  911. paint3(Reg *r, int bn, int32_t rb, int rn)
  912. {
  913. Reg *r1;
  914. Prog *p;
  915. int z;
  916. uint32_t bb;
  917. z = bn/32;
  918. bb = 1L << (bn%32);
  919. if(r->act.b[z] & bb)
  920. return;
  921. for(;;) {
  922. if(!(r->refbehind.b[z] & bb))
  923. break;
  924. r1 = r->p1;
  925. if(r1 == R)
  926. break;
  927. if(!(r1->refahead.b[z] & bb))
  928. break;
  929. if(r1->act.b[z] & bb)
  930. break;
  931. r = r1;
  932. }
  933. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  934. addmove(r, bn, rn, 0);
  935. for(;;) {
  936. r->act.b[z] |= bb;
  937. p = r->prog;
  938. if(r->use1.b[z] & bb) {
  939. if(debug['R'])
  940. print("%P", p);
  941. addreg(&p->from, rn);
  942. if(debug['R'])
  943. print("\t.c%P\n", p);
  944. }
  945. if((r->use2.b[z]|r->set.b[z]) & bb) {
  946. if(debug['R'])
  947. print("%P", p);
  948. addreg(&p->to, rn);
  949. if(debug['R'])
  950. print("\t.c%P\n", p);
  951. }
  952. if(STORE(r) & r->regdiff.b[z] & bb)
  953. addmove(r, bn, rn, 1);
  954. r->regu |= rb;
  955. if(r->refbehind.b[z] & bb)
  956. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  957. if(r1->refahead.b[z] & bb)
  958. paint3(r1, bn, rb, rn);
  959. if(!(r->refahead.b[z] & bb))
  960. break;
  961. r1 = r->s2;
  962. if(r1 != R)
  963. if(r1->refbehind.b[z] & bb)
  964. paint3(r1, bn, rb, rn);
  965. r = r->s1;
  966. if(r == R)
  967. break;
  968. if(r->act.b[z] & bb)
  969. break;
  970. if(!(r->refbehind.b[z] & bb))
  971. break;
  972. }
  973. }
  974. void
  975. addreg(Adr *a, int rn)
  976. {
  977. a->sym = 0;
  978. a->name = D_NONE;
  979. a->type = D_REG;
  980. a->reg = rn;
  981. if(rn >= NREG) {
  982. a->type = D_FREG;
  983. a->reg = rn-NREG;
  984. }
  985. }
  986. /*
  987. * bit reg
  988. * 0 R9
  989. * 1 R10
  990. * ... ...
  991. * 4 R13
  992. * 5 R16
  993. * ... ...
  994. * 20 R31
  995. */
  996. int32_t
  997. RtoB(int r)
  998. {
  999. if(r >= 9 && r <= 13)
  1000. return 1L << (r-9);
  1001. if(r >= 16 && r <= 31)
  1002. return 1L << (r-11);
  1003. return 0;
  1004. }
  1005. int
  1006. BtoR(int32_t b)
  1007. {
  1008. int r;
  1009. b &= 0x001fffffL;
  1010. if(b == 0)
  1011. return 0;
  1012. r = bitno(b) + 9;
  1013. if(r >= 14)
  1014. r += 2;
  1015. return r;
  1016. }
  1017. /*
  1018. * bit reg
  1019. * 22 F4
  1020. * 23 F6
  1021. * ... ...
  1022. * 31 F22
  1023. */
  1024. int32_t
  1025. FtoB(int f)
  1026. {
  1027. if(f < 4 || f > 22 || (f&1))
  1028. return 0;
  1029. return 1L << (f/2 + 20);
  1030. }
  1031. BtoF(long b)
  1032. {
  1033. b &= 0xffc00000L;
  1034. if(b == 0)
  1035. return 0;
  1036. return bitno(b)*2 - 40;
  1037. }