reg.c 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "gc.h"
  10. Reg*
  11. rega(void)
  12. {
  13. Reg *r;
  14. r = freer;
  15. if(r == R) {
  16. r = alloc(sizeof(*r));
  17. } else
  18. freer = r->link;
  19. *r = zreg;
  20. return r;
  21. }
  22. int
  23. rcmp(const void *a1, const void *a2)
  24. {
  25. Rgn *p1, *p2;
  26. int c1, c2;
  27. p1 = a1;
  28. p2 = a2;
  29. c1 = p2->cost;
  30. c2 = p1->cost;
  31. if(c1 -= c2)
  32. return c1;
  33. return p2->varno - p1->varno;
  34. }
  35. void
  36. regopt(Prog *p)
  37. {
  38. Reg *r, *r1, *r2;
  39. Prog *p1;
  40. int i, z;
  41. int32_t initpc, val, npc;
  42. uint32_t vreg;
  43. Bits bit;
  44. struct
  45. {
  46. int32_t m;
  47. int32_t c;
  48. Reg* p;
  49. } log5[6], *lp;
  50. firstr = R;
  51. lastr = R;
  52. nvar = 0;
  53. regbits = 0;
  54. for(z=0; z<BITS; z++) {
  55. externs.b[z] = 0;
  56. params.b[z] = 0;
  57. consts.b[z] = 0;
  58. addrs.b[z] = 0;
  59. }
  60. /*
  61. * pass 1
  62. * build aux data structure
  63. * allocate pcs
  64. * find use and set of variables
  65. */
  66. val = 5L * 5L * 5L * 5L * 5L;
  67. lp = log5;
  68. for(i=0; i<5; i++) {
  69. lp->m = val;
  70. lp->c = 0;
  71. lp->p = R;
  72. val /= 5L;
  73. lp++;
  74. }
  75. val = 0;
  76. for(; p != P; p = p->link) {
  77. switch(p->as) {
  78. case ADATA:
  79. case AGLOBL:
  80. case ANAME:
  81. case ASIGNAME:
  82. continue;
  83. }
  84. r = rega();
  85. if(firstr == R) {
  86. firstr = r;
  87. lastr = r;
  88. } else {
  89. lastr->link = r;
  90. r->p1 = lastr;
  91. lastr->s1 = r;
  92. lastr = r;
  93. }
  94. r->prog = p;
  95. r->pc = val;
  96. val++;
  97. lp = log5;
  98. for(i=0; i<5; i++) {
  99. lp->c--;
  100. if(lp->c <= 0) {
  101. lp->c = lp->m;
  102. if(lp->p != R)
  103. lp->p->log5 = r;
  104. lp->p = r;
  105. (lp+1)->c = 0;
  106. break;
  107. }
  108. lp++;
  109. }
  110. r1 = r->p1;
  111. if(r1 != R)
  112. switch(r1->prog->as) {
  113. case ARETURN:
  114. case ABR:
  115. case ARFI:
  116. r->p1 = R;
  117. r1->s1 = R;
  118. }
  119. /*
  120. * left side always read
  121. */
  122. bit = mkvar(&p->from, p->as==AMOVW);
  123. for(z=0; z<BITS; z++)
  124. r->use1.b[z] |= bit.b[z];
  125. /*
  126. * right side depends on opcode
  127. */
  128. bit = mkvar(&p->to, 0);
  129. if(bany(&bit))
  130. switch(p->as) {
  131. default:
  132. diag(Z, "reg: unknown asop: %A", p->as);
  133. break;
  134. /*
  135. * right side write
  136. */
  137. case ANOP:
  138. case AMOVB:
  139. case AMOVBU:
  140. case AMOVBZ:
  141. case AMOVBZU:
  142. case AMOVH:
  143. case AMOVHBR:
  144. case AMOVHU:
  145. case AMOVHZ:
  146. case AMOVHZU:
  147. case AMOVW:
  148. case AMOVWU:
  149. case AFMOVD:
  150. case AFMOVDCC:
  151. case AFMOVDU:
  152. case AFMOVS:
  153. case AFMOVSU:
  154. case AFRSP:
  155. for(z=0; z<BITS; z++)
  156. r->set.b[z] |= bit.b[z];
  157. break;
  158. /*
  159. * funny
  160. */
  161. case ABL:
  162. for(z=0; z<BITS; z++)
  163. addrs.b[z] |= bit.b[z];
  164. break;
  165. }
  166. }
  167. if(firstr == R)
  168. return;
  169. initpc = pc - val;
  170. npc = val;
  171. /*
  172. * pass 2
  173. * turn branch references to pointers
  174. * build back pointers
  175. */
  176. for(r = firstr; r != R; r = r->link) {
  177. p = r->prog;
  178. if(p->to.type == D_BRANCH) {
  179. val = p->to.offset - initpc;
  180. r1 = firstr;
  181. while(r1 != R) {
  182. r2 = r1->log5;
  183. if(r2 != R && val >= r2->pc) {
  184. r1 = r2;
  185. continue;
  186. }
  187. if(r1->pc == val)
  188. break;
  189. r1 = r1->link;
  190. }
  191. if(r1 == R) {
  192. nearln = p->lineno;
  193. diag(Z, "ref not found\n%P", p);
  194. continue;
  195. }
  196. if(r1 == r) {
  197. nearln = p->lineno;
  198. diag(Z, "ref to self\n%P", p);
  199. continue;
  200. }
  201. r->s2 = r1;
  202. r->p2link = r1->p2;
  203. r1->p2 = r;
  204. }
  205. }
  206. if(debug['R']) {
  207. p = firstr->prog;
  208. print("\n%L %D\n", p->lineno, &p->from);
  209. }
  210. /*
  211. * pass 2.5
  212. * find looping structure
  213. */
  214. for(r = firstr; r != R; r = r->link)
  215. r->active = 0;
  216. change = 0;
  217. loopit(firstr, npc);
  218. if(debug['R'] && debug['v']) {
  219. print("\nlooping structure:\n");
  220. for(r = firstr; r != R; r = r->link) {
  221. print("%ld:%P", r->loop, r->prog);
  222. for(z=0; z<BITS; z++)
  223. bit.b[z] = r->use1.b[z] |
  224. r->use2.b[z] | r->set.b[z];
  225. if(bany(&bit)) {
  226. print("\t");
  227. if(bany(&r->use1))
  228. print(" u1=%B", r->use1);
  229. if(bany(&r->use2))
  230. print(" u2=%B", r->use2);
  231. if(bany(&r->set))
  232. print(" st=%B", r->set);
  233. }
  234. print("\n");
  235. }
  236. }
  237. /*
  238. * pass 3
  239. * iterate propagating usage
  240. * back until flow graph is complete
  241. */
  242. loop1:
  243. change = 0;
  244. for(r = firstr; r != R; r = r->link)
  245. r->active = 0;
  246. for(r = firstr; r != R; r = r->link)
  247. if(r->prog->as == ARETURN)
  248. prop(r, zbits, zbits);
  249. loop11:
  250. /* pick up unreachable code */
  251. i = 0;
  252. for(r = firstr; r != R; r = r1) {
  253. r1 = r->link;
  254. if(r1 && r1->active && !r->active) {
  255. prop(r, zbits, zbits);
  256. i = 1;
  257. }
  258. }
  259. if(i)
  260. goto loop11;
  261. if(change)
  262. goto loop1;
  263. /*
  264. * pass 4
  265. * iterate propagating register/variable synchrony
  266. * forward until graph is complete
  267. */
  268. loop2:
  269. change = 0;
  270. for(r = firstr; r != R; r = r->link)
  271. r->active = 0;
  272. synch(firstr, zbits);
  273. if(change)
  274. goto loop2;
  275. /*
  276. * pass 5
  277. * isolate regions
  278. * calculate costs (paint1)
  279. */
  280. r = firstr;
  281. if(r) {
  282. for(z=0; z<BITS; z++)
  283. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  284. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  285. if(bany(&bit)) {
  286. nearln = r->prog->lineno;
  287. warn(Z, "used and not set: %B", bit);
  288. if(debug['R'] && !debug['w'])
  289. print("used and not set: %B\n", bit);
  290. }
  291. }
  292. if(debug['R'] && debug['v'])
  293. print("\nprop structure:\n");
  294. for(r = firstr; r != R; r = r->link)
  295. r->act = zbits;
  296. rgp = region;
  297. nregion = 0;
  298. for(r = firstr; r != R; r = r->link) {
  299. if(debug['R'] && debug['v'])
  300. print("%P\n set = %B; rah = %B; cal = %B\n",
  301. r->prog, r->set, r->refahead, r->calahead);
  302. for(z=0; z<BITS; z++)
  303. bit.b[z] = r->set.b[z] &
  304. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  305. if(bany(&bit)) {
  306. nearln = r->prog->lineno;
  307. warn(Z, "set and not used: %B", bit);
  308. if(debug['R'])
  309. print("set an not used: %B\n", bit);
  310. excise(r);
  311. }
  312. for(z=0; z<BITS; z++)
  313. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  314. while(bany(&bit)) {
  315. i = bnum(bit);
  316. rgp->enter = r;
  317. rgp->varno = i;
  318. change = 0;
  319. if(debug['R'] && debug['v'])
  320. print("\n");
  321. paint1(r, i);
  322. bit.b[i/32] &= ~(1L<<(i%32));
  323. if(change <= 0) {
  324. if(debug['R'])
  325. print("%L$%d: %B\n",
  326. r->prog->lineno, change, blsh(i));
  327. continue;
  328. }
  329. rgp->cost = change;
  330. nregion++;
  331. if(nregion >= NRGN) {
  332. warn(Z, "too many regions");
  333. goto brk;
  334. }
  335. rgp++;
  336. }
  337. }
  338. brk:
  339. qsort(region, nregion, sizeof(region[0]), rcmp);
  340. /*
  341. * pass 6
  342. * determine used registers (paint2)
  343. * replace code (paint3)
  344. */
  345. rgp = region;
  346. for(i=0; i<nregion; i++) {
  347. bit = blsh(rgp->varno);
  348. vreg = paint2(rgp->enter, rgp->varno);
  349. vreg = allreg(vreg, rgp);
  350. if(debug['R']) {
  351. if(rgp->regno >= NREG)
  352. print("%L$%d F%d: %B\n",
  353. rgp->enter->prog->lineno,
  354. rgp->cost,
  355. rgp->regno-NREG,
  356. bit);
  357. else
  358. print("%L$%d R%d: %B\n",
  359. rgp->enter->prog->lineno,
  360. rgp->cost,
  361. rgp->regno,
  362. bit);
  363. }
  364. if(rgp->regno != 0)
  365. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  366. rgp++;
  367. }
  368. /*
  369. * pass 7
  370. * peep-hole on basic block
  371. */
  372. if(!debug['R'] || debug['P'])
  373. peep();
  374. /*
  375. * pass 8
  376. * recalculate pc
  377. */
  378. val = initpc;
  379. for(r = firstr; r != R; r = r1) {
  380. r->pc = val;
  381. p = r->prog;
  382. p1 = P;
  383. r1 = r->link;
  384. if(r1 != R)
  385. p1 = r1->prog;
  386. for(; p != p1; p = p->link) {
  387. switch(p->as) {
  388. default:
  389. val++;
  390. break;
  391. case ANOP:
  392. case ADATA:
  393. case AGLOBL:
  394. case ANAME:
  395. case ASIGNAME:
  396. break;
  397. }
  398. }
  399. }
  400. pc = val;
  401. /*
  402. * fix up branches
  403. */
  404. if(debug['R'])
  405. if(bany(&addrs))
  406. print("addrs: %B\n", addrs);
  407. r1 = 0; /* set */
  408. for(r = firstr; r != R; r = r->link) {
  409. p = r->prog;
  410. if(p->to.type == D_BRANCH)
  411. p->to.offset = r->s2->pc;
  412. r1 = r;
  413. }
  414. /*
  415. * last pass
  416. * eliminate nops
  417. * free aux structures
  418. */
  419. for(p = firstr->prog; p != P; p = p->link){
  420. while(p->link && p->link->as == ANOP)
  421. p->link = p->link->link;
  422. }
  423. if(r1 != R) {
  424. r1->link = freer;
  425. freer = firstr;
  426. }
  427. }
  428. /*
  429. * add mov b,rn
  430. * just after r
  431. */
  432. void
  433. addmove(Reg *r, int bn, int rn, int f)
  434. {
  435. Prog *p, *p1;
  436. Adr *a;
  437. Var *v;
  438. p1 = alloc(sizeof(*p1));
  439. *p1 = zprog;
  440. p = r->prog;
  441. p1->link = p->link;
  442. p->link = p1;
  443. p1->lineno = p->lineno;
  444. v = var + bn;
  445. a = &p1->to;
  446. a->sym = v->sym;
  447. a->name = v->name;
  448. a->offset = v->offset;
  449. a->etype = v->etype;
  450. a->type = D_OREG;
  451. if(a->etype == TARRAY || a->sym == S)
  452. a->type = D_CONST;
  453. p1->as = AMOVW;
  454. if(v->etype == TCHAR || v->etype == TUCHAR)
  455. p1->as = AMOVB;
  456. if(v->etype == TSHORT || v->etype == TUSHORT)
  457. p1->as = AMOVH;
  458. if(v->etype == TFLOAT)
  459. p1->as = AFMOVS;
  460. if(v->etype == TDOUBLE)
  461. p1->as = AFMOVD;
  462. p1->from.type = D_REG;
  463. p1->from.reg = rn;
  464. if(rn >= NREG) {
  465. p1->from.type = D_FREG;
  466. p1->from.reg = rn-NREG;
  467. }
  468. if(!f) {
  469. p1->from = *a;
  470. *a = zprog.from;
  471. a->type = D_REG;
  472. a->reg = rn;
  473. if(rn >= NREG) {
  474. a->type = D_FREG;
  475. a->reg = rn-NREG;
  476. }
  477. if(v->etype == TUCHAR)
  478. p1->as = AMOVBZ;
  479. if(v->etype == TUSHORT)
  480. p1->as = AMOVHZ;
  481. }
  482. if(debug['R'])
  483. print("%P\t.a%P\n", p, p1);
  484. }
  485. Bits
  486. mkvar(Adr *a, int docon)
  487. {
  488. Var *v;
  489. int i, t, n, et, z;
  490. int32_t o;
  491. Bits bit;
  492. Sym *s;
  493. t = a->type;
  494. if(t == D_REG && a->reg != NREG)
  495. regbits |= RtoB(a->reg);
  496. if(t == D_FREG && a->reg != NREG)
  497. regbits |= FtoB(a->reg);
  498. s = a->sym;
  499. o = a->offset;
  500. et = a->etype;
  501. if(s == S) {
  502. if(t != D_CONST || !docon || a->reg != NREG)
  503. goto none;
  504. et = TLONG;
  505. }
  506. if(t == D_CONST) {
  507. if(s == S && sval(o))
  508. goto none;
  509. }
  510. n = a->name;
  511. v = var;
  512. for(i=0; i<nvar; i++) {
  513. if(s == v->sym)
  514. if(n == v->name)
  515. if(o == v->offset)
  516. goto out;
  517. v++;
  518. }
  519. if(s)
  520. if(s->name[0] == '.')
  521. goto none;
  522. if(nvar >= NVAR) {
  523. if(debug['w'] > 1 && s)
  524. warn(Z, "variable not optimized: %s", s->name);
  525. goto none;
  526. }
  527. i = nvar;
  528. nvar++;
  529. v = &var[i];
  530. v->sym = s;
  531. v->offset = o;
  532. v->etype = et;
  533. v->name = n;
  534. if(debug['R'])
  535. print("bit=%2d et=%2d %D\n", i, et, a);
  536. out:
  537. bit = blsh(i);
  538. if(n == D_EXTERN || n == D_STATIC)
  539. for(z=0; z<BITS; z++)
  540. externs.b[z] |= bit.b[z];
  541. if(n == D_PARAM)
  542. for(z=0; z<BITS; z++)
  543. params.b[z] |= bit.b[z];
  544. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  545. for(z=0; z<BITS; z++)
  546. addrs.b[z] |= bit.b[z];
  547. if(t == D_CONST) {
  548. if(s == S) {
  549. for(z=0; z<BITS; z++)
  550. consts.b[z] |= bit.b[z];
  551. return bit;
  552. }
  553. if(et != TARRAY)
  554. for(z=0; z<BITS; z++)
  555. addrs.b[z] |= bit.b[z];
  556. for(z=0; z<BITS; z++)
  557. params.b[z] |= bit.b[z];
  558. return bit;
  559. }
  560. if(t == D_OREG)
  561. return bit;
  562. none:
  563. return zbits;
  564. }
  565. void
  566. prop(Reg *r, Bits ref, Bits cal)
  567. {
  568. Reg *r1, *r2;
  569. int z;
  570. for(r1 = r; r1 != R; r1 = r1->p1) {
  571. for(z=0; z<BITS; z++) {
  572. ref.b[z] |= r1->refahead.b[z];
  573. if(ref.b[z] != r1->refahead.b[z]) {
  574. r1->refahead.b[z] = ref.b[z];
  575. change++;
  576. }
  577. cal.b[z] |= r1->calahead.b[z];
  578. if(cal.b[z] != r1->calahead.b[z]) {
  579. r1->calahead.b[z] = cal.b[z];
  580. change++;
  581. }
  582. }
  583. switch(r1->prog->as) {
  584. case ABL:
  585. for(z=0; z<BITS; z++) {
  586. cal.b[z] |= ref.b[z] | externs.b[z];
  587. ref.b[z] = 0;
  588. }
  589. break;
  590. case ATEXT:
  591. for(z=0; z<BITS; z++) {
  592. cal.b[z] = 0;
  593. ref.b[z] = 0;
  594. }
  595. break;
  596. case ARETURN:
  597. for(z=0; z<BITS; z++) {
  598. cal.b[z] = externs.b[z];
  599. ref.b[z] = 0;
  600. }
  601. }
  602. for(z=0; z<BITS; z++) {
  603. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  604. r1->use1.b[z] | r1->use2.b[z];
  605. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  606. r1->refbehind.b[z] = ref.b[z];
  607. r1->calbehind.b[z] = cal.b[z];
  608. }
  609. if(r1->active)
  610. break;
  611. r1->active = 1;
  612. }
  613. for(; r != r1; r = r->p1)
  614. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  615. prop(r2, r->refbehind, r->calbehind);
  616. }
  617. /*
  618. * find looping structure
  619. *
  620. * 1) find reverse postordering
  621. * 2) find approximate dominators,
  622. * the actual dominators if the flow graph is reducible
  623. * otherwise, dominators plus some other non-dominators.
  624. * See Matthew S. Hecht and Jeffrey D. Ullman,
  625. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  626. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  627. * Oct. 1-3, 1973, pp. 207-217.
  628. * 3) find all nodes with a predecessor dominated by the current node.
  629. * such a node is a loop head.
  630. * recursively, all preds with a greater rpo number are in the loop
  631. */
  632. int32_t
  633. postorder(Reg *r, Reg **rpo2r, int32_t n)
  634. {
  635. Reg *r1;
  636. r->rpo = 1;
  637. r1 = r->s1;
  638. if(r1 && !r1->rpo)
  639. n = postorder(r1, rpo2r, n);
  640. r1 = r->s2;
  641. if(r1 && !r1->rpo)
  642. n = postorder(r1, rpo2r, n);
  643. rpo2r[n] = r;
  644. n++;
  645. return n;
  646. }
  647. int32_t
  648. rpolca(int32_t *idom, int32_t rpo1, int32_t rpo2)
  649. {
  650. int32_t t;
  651. if(rpo1 == -1)
  652. return rpo2;
  653. while(rpo1 != rpo2){
  654. if(rpo1 > rpo2){
  655. t = rpo2;
  656. rpo2 = rpo1;
  657. rpo1 = t;
  658. }
  659. while(rpo1 < rpo2){
  660. t = idom[rpo2];
  661. if(t >= rpo2)
  662. fatal(Z, "bad idom");
  663. rpo2 = t;
  664. }
  665. }
  666. return rpo1;
  667. }
  668. int
  669. doms(int32_t *idom, int32_t r, int32_t s)
  670. {
  671. while(s > r)
  672. s = idom[s];
  673. return s == r;
  674. }
  675. int
  676. loophead(int32_t *idom, Reg *r)
  677. {
  678. int32_t src;
  679. src = r->rpo;
  680. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  681. return 1;
  682. for(r = r->p2; r != R; r = r->p2link)
  683. if(doms(idom, src, r->rpo))
  684. return 1;
  685. return 0;
  686. }
  687. void
  688. loopmark(Reg **rpo2r, int32_t head, Reg *r)
  689. {
  690. if(r->rpo < head || r->active == head)
  691. return;
  692. r->active = head;
  693. r->loop += LOOP;
  694. if(r->p1 != R)
  695. loopmark(rpo2r, head, r->p1);
  696. for(r = r->p2; r != R; r = r->p2link)
  697. loopmark(rpo2r, head, r);
  698. }
  699. void
  700. loopit(Reg *r, int32_t nr)
  701. {
  702. Reg *r1;
  703. int32_t i, d, me;
  704. if(nr > maxnr) {
  705. rpo2r = alloc(nr * sizeof(Reg*));
  706. idom = alloc(nr * sizeof(int32_t));
  707. maxnr = nr;
  708. }
  709. d = postorder(r, rpo2r, 0);
  710. if(d > nr)
  711. fatal(Z, "too many reg nodes");
  712. nr = d;
  713. for(i = 0; i < nr / 2; i++){
  714. r1 = rpo2r[i];
  715. rpo2r[i] = rpo2r[nr - 1 - i];
  716. rpo2r[nr - 1 - i] = r1;
  717. }
  718. for(i = 0; i < nr; i++)
  719. rpo2r[i]->rpo = i;
  720. idom[0] = 0;
  721. for(i = 0; i < nr; i++){
  722. r1 = rpo2r[i];
  723. me = r1->rpo;
  724. d = -1;
  725. if(r1->p1 != R && r1->p1->rpo < me)
  726. d = r1->p1->rpo;
  727. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  728. if(r1->rpo < me)
  729. d = rpolca(idom, d, r1->rpo);
  730. idom[i] = d;
  731. }
  732. for(i = 0; i < nr; i++){
  733. r1 = rpo2r[i];
  734. r1->loop++;
  735. if(r1->p2 != R && loophead(idom, r1))
  736. loopmark(rpo2r, i, r1);
  737. }
  738. }
  739. void
  740. synch(Reg *r, Bits dif)
  741. {
  742. Reg *r1;
  743. int z;
  744. for(r1 = r; r1 != R; r1 = r1->s1) {
  745. for(z=0; z<BITS; z++) {
  746. dif.b[z] = (dif.b[z] &
  747. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  748. r1->set.b[z] | r1->regdiff.b[z];
  749. if(dif.b[z] != r1->regdiff.b[z]) {
  750. r1->regdiff.b[z] = dif.b[z];
  751. change++;
  752. }
  753. }
  754. if(r1->active)
  755. break;
  756. r1->active = 1;
  757. for(z=0; z<BITS; z++)
  758. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  759. if(r1->s2 != R)
  760. synch(r1->s2, dif);
  761. }
  762. }
  763. uint32_t
  764. allreg(uint32_t b, Rgn *r)
  765. {
  766. Var *v;
  767. int i;
  768. v = var + r->varno;
  769. r->regno = 0;
  770. switch(v->etype) {
  771. default:
  772. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  773. break;
  774. case TCHAR:
  775. case TUCHAR:
  776. case TSHORT:
  777. case TUSHORT:
  778. case TINT:
  779. case TUINT:
  780. case TLONG:
  781. case TULONG:
  782. case TIND:
  783. case TARRAY:
  784. i = BtoR(~b);
  785. if(i && r->cost > 0) {
  786. r->regno = i;
  787. return RtoB(i);
  788. }
  789. break;
  790. case TDOUBLE:
  791. case TFLOAT:
  792. i = BtoF(~b);
  793. if(i && r->cost > 0) {
  794. r->regno = i+NREG;
  795. return FtoB(i);
  796. }
  797. break;
  798. }
  799. return 0;
  800. }
  801. void
  802. paint1(Reg *r, int bn)
  803. {
  804. Reg *r1;
  805. Prog *p;
  806. int z;
  807. uint32_t bb;
  808. z = bn/32;
  809. bb = 1L<<(bn%32);
  810. if(r->act.b[z] & bb)
  811. return;
  812. for(;;) {
  813. if(!(r->refbehind.b[z] & bb))
  814. break;
  815. r1 = r->p1;
  816. if(r1 == R)
  817. break;
  818. if(!(r1->refahead.b[z] & bb))
  819. break;
  820. if(r1->act.b[z] & bb)
  821. break;
  822. r = r1;
  823. }
  824. if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
  825. change -= CLOAD * r->loop;
  826. if(debug['R'] && debug['v'])
  827. print("%ld%P\tld %B $%d\n", r->loop,
  828. r->prog, blsh(bn), change);
  829. }
  830. for(;;) {
  831. r->act.b[z] |= bb;
  832. p = r->prog;
  833. if(r->use1.b[z] & bb) {
  834. change += CREF * r->loop;
  835. if(p->to.type == D_FREG && p->as == AMOVW)
  836. change = -CINF; /* cant go Rreg to Freg */
  837. if(debug['R'] && debug['v'])
  838. print("%ld%P\tu1 %B $%d\n", r->loop,
  839. p, blsh(bn), change);
  840. }
  841. if((r->use2.b[z]|r->set.b[z]) & bb) {
  842. change += CREF * r->loop;
  843. if(p->from.type == D_FREG && p->as == AMOVW)
  844. change = -CINF; /* cant go Rreg to Freg */
  845. if(debug['R'] && debug['v'])
  846. print("%ld%P\tu2 %B $%d\n", r->loop,
  847. p, blsh(bn), change);
  848. }
  849. if(STORE(r) & r->regdiff.b[z] & bb) {
  850. change -= CLOAD * r->loop;
  851. if(debug['R'] && debug['v'])
  852. print("%ld%P\tst %B $%d\n", r->loop,
  853. p, blsh(bn), change);
  854. }
  855. if(r->refbehind.b[z] & bb)
  856. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  857. if(r1->refahead.b[z] & bb)
  858. paint1(r1, bn);
  859. if(!(r->refahead.b[z] & bb))
  860. break;
  861. r1 = r->s2;
  862. if(r1 != R)
  863. if(r1->refbehind.b[z] & bb)
  864. paint1(r1, bn);
  865. r = r->s1;
  866. if(r == R)
  867. break;
  868. if(r->act.b[z] & bb)
  869. break;
  870. if(!(r->refbehind.b[z] & bb))
  871. break;
  872. }
  873. }
  874. uint32_t
  875. paint2(Reg *r, int bn)
  876. {
  877. Reg *r1;
  878. int z;
  879. uint32_t bb, vreg;
  880. z = bn/32;
  881. bb = 1L << (bn%32);
  882. vreg = regbits;
  883. if(!(r->act.b[z] & bb))
  884. return vreg;
  885. for(;;) {
  886. if(!(r->refbehind.b[z] & bb))
  887. break;
  888. r1 = r->p1;
  889. if(r1 == R)
  890. break;
  891. if(!(r1->refahead.b[z] & bb))
  892. break;
  893. if(!(r1->act.b[z] & bb))
  894. break;
  895. r = r1;
  896. }
  897. for(;;) {
  898. r->act.b[z] &= ~bb;
  899. vreg |= r->regu;
  900. if(r->refbehind.b[z] & bb)
  901. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  902. if(r1->refahead.b[z] & bb)
  903. vreg |= paint2(r1, bn);
  904. if(!(r->refahead.b[z] & bb))
  905. break;
  906. r1 = r->s2;
  907. if(r1 != R)
  908. if(r1->refbehind.b[z] & bb)
  909. vreg |= paint2(r1, bn);
  910. r = r->s1;
  911. if(r == R)
  912. break;
  913. if(!(r->act.b[z] & bb))
  914. break;
  915. if(!(r->refbehind.b[z] & bb))
  916. break;
  917. }
  918. return vreg;
  919. }
  920. void
  921. paint3(Reg *r, int bn, int32_t rb, int rn)
  922. {
  923. Reg *r1;
  924. Prog *p;
  925. int z;
  926. uint32_t bb;
  927. z = bn/32;
  928. bb = 1L << (bn%32);
  929. if(r->act.b[z] & bb)
  930. return;
  931. for(;;) {
  932. if(!(r->refbehind.b[z] & bb))
  933. break;
  934. r1 = r->p1;
  935. if(r1 == R)
  936. break;
  937. if(!(r1->refahead.b[z] & bb))
  938. break;
  939. if(r1->act.b[z] & bb)
  940. break;
  941. r = r1;
  942. }
  943. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  944. addmove(r, bn, rn, 0);
  945. for(;;) {
  946. r->act.b[z] |= bb;
  947. p = r->prog;
  948. if(r->use1.b[z] & bb) {
  949. if(debug['R'])
  950. print("%P", p);
  951. addreg(&p->from, rn);
  952. if(debug['R'])
  953. print("\t.c%P\n", p);
  954. }
  955. if((r->use2.b[z]|r->set.b[z]) & bb) {
  956. if(debug['R'])
  957. print("%P", p);
  958. addreg(&p->to, rn);
  959. if(debug['R'])
  960. print("\t.c%P\n", p);
  961. }
  962. if(STORE(r) & r->regdiff.b[z] & bb)
  963. addmove(r, bn, rn, 1);
  964. r->regu |= rb;
  965. if(r->refbehind.b[z] & bb)
  966. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  967. if(r1->refahead.b[z] & bb)
  968. paint3(r1, bn, rb, rn);
  969. if(!(r->refahead.b[z] & bb))
  970. break;
  971. r1 = r->s2;
  972. if(r1 != R)
  973. if(r1->refbehind.b[z] & bb)
  974. paint3(r1, bn, rb, rn);
  975. r = r->s1;
  976. if(r == R)
  977. break;
  978. if(r->act.b[z] & bb)
  979. break;
  980. if(!(r->refbehind.b[z] & bb))
  981. break;
  982. }
  983. }
  984. void
  985. addreg(Adr *a, int rn)
  986. {
  987. a->sym = 0;
  988. a->name = D_NONE;
  989. a->type = D_REG;
  990. a->reg = rn;
  991. if(rn >= NREG) {
  992. a->type = D_FREG;
  993. a->reg = rn-NREG;
  994. }
  995. }
  996. /*
  997. * track register variables including external registers:
  998. * bit reg
  999. * 0 R7
  1000. * 1 R8
  1001. * ... ...
  1002. * 21 R28
  1003. */
  1004. int32_t
  1005. RtoB(int r)
  1006. {
  1007. if(r >= REGMIN && r <= REGMAX)
  1008. return 1L << (r-REGMIN);
  1009. return 0;
  1010. }
  1011. int
  1012. BtoR(int32_t b)
  1013. {
  1014. b &= 0x001fffffL;
  1015. if(b == 0)
  1016. return 0;
  1017. return bitno(b) + REGMIN;
  1018. }
  1019. /*
  1020. * bit reg
  1021. * 22 F17
  1022. * 23 F18
  1023. * ... ...
  1024. * 31 F26
  1025. */
  1026. int32_t
  1027. FtoB(int f)
  1028. {
  1029. if(f < FREGMIN || f > FREGEXT)
  1030. return 0;
  1031. return 1L << (f - FREGMIN + 22);
  1032. }
  1033. int
  1034. BtoF(int32_t b)
  1035. {
  1036. b &= 0xffc00000L;
  1037. if(b == 0)
  1038. return 0;
  1039. return bitno(b) - 22 + FREGMIN;
  1040. }