reg.c 19 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "gc.h"
  10. void addsplits(void);
  11. Reg*
  12. rega(void)
  13. {
  14. Reg *r;
  15. r = freer;
  16. if(r == R) {
  17. r = alloc(sizeof(*r));
  18. } else
  19. freer = r->link;
  20. *r = zreg;
  21. return r;
  22. }
  23. int
  24. rcmp(const void *a1, const void *a2)
  25. {
  26. Rgn *p1, *p2;
  27. int c1, c2;
  28. p1 = (Rgn*)a1;
  29. p2 = (Rgn*)a2;
  30. c1 = p2->cost;
  31. c2 = p1->cost;
  32. if(c1 -= c2)
  33. return c1;
  34. return p2->varno - p1->varno;
  35. }
  36. void
  37. regopt(Prog *p)
  38. {
  39. Reg *r, *r1, *r2;
  40. Prog *p1;
  41. int i, z;
  42. int32_t initpc, val, npc;
  43. uint32_t vreg;
  44. Bits bit;
  45. struct
  46. {
  47. int32_t m;
  48. int32_t c;
  49. Reg* p;
  50. } log5[6], *lp;
  51. firstr = R;
  52. lastr = R;
  53. nvar = 0;
  54. regbits = 0;
  55. for(z=0; z<BITS; z++) {
  56. externs.b[z] = 0;
  57. params.b[z] = 0;
  58. consts.b[z] = 0;
  59. addrs.b[z] = 0;
  60. }
  61. /*
  62. * pass 1
  63. * build aux data structure
  64. * allocate pcs
  65. * find use and set of variables
  66. */
  67. val = 5L * 5L * 5L * 5L * 5L;
  68. lp = log5;
  69. for(i=0; i<5; i++) {
  70. lp->m = val;
  71. lp->c = 0;
  72. lp->p = R;
  73. val /= 5L;
  74. lp++;
  75. }
  76. val = 0;
  77. for(; p != P; p = p->link) {
  78. switch(p->as) {
  79. case ADATA:
  80. case AGLOBL:
  81. case ANAME:
  82. case ASIGNAME:
  83. continue;
  84. }
  85. r = rega();
  86. if(firstr == R) {
  87. firstr = r;
  88. lastr = r;
  89. } else {
  90. lastr->link = r;
  91. r->p1 = lastr;
  92. lastr->s1 = r;
  93. lastr = r;
  94. }
  95. r->prog = p;
  96. r->pc = val;
  97. val++;
  98. lp = log5;
  99. for(i=0; i<5; i++) {
  100. lp->c--;
  101. if(lp->c <= 0) {
  102. lp->c = lp->m;
  103. if(lp->p != R)
  104. lp->p->log5 = r;
  105. lp->p = r;
  106. (lp+1)->c = 0;
  107. break;
  108. }
  109. lp++;
  110. }
  111. r1 = r->p1;
  112. if(r1 != R)
  113. switch(r1->prog->as) {
  114. case ARET:
  115. case AJMP:
  116. case ARFE:
  117. r->p1 = R;
  118. r1->s1 = R;
  119. }
  120. /*
  121. * left side always read
  122. */
  123. bit = mkvar(&p->from, p->as==AMOVW);
  124. for(z=0; z<BITS; z++)
  125. r->use1.b[z] |= bit.b[z];
  126. /*
  127. * right side depends on opcode
  128. */
  129. bit = mkvar(&p->to, 0);
  130. if(bany(&bit))
  131. switch(p->as) {
  132. default:
  133. diag(Z, "reg: unknown asop: %A", p->as);
  134. break;
  135. /*
  136. * right side write
  137. */
  138. case ANOP:
  139. case AMOVB:
  140. case AMOVBU:
  141. case AMOVH:
  142. case AMOVHU:
  143. case AMOVW:
  144. case AMOVF:
  145. case AMOVD:
  146. for(z=0; z<BITS; z++)
  147. r->set.b[z] |= bit.b[z];
  148. break;
  149. /*
  150. * funny
  151. */
  152. case AJAL:
  153. for(z=0; z<BITS; z++)
  154. addrs.b[z] |= bit.b[z];
  155. break;
  156. }
  157. }
  158. if(firstr == R)
  159. return;
  160. initpc = pc - val;
  161. npc = val;
  162. /*
  163. * pass 2
  164. * turn branch references to pointers
  165. * build back pointers
  166. */
  167. for(r = firstr; r != R; r = r->link) {
  168. p = r->prog;
  169. if(p->to.type == D_BRANCH) {
  170. val = p->to.offset - initpc;
  171. r1 = firstr;
  172. while(r1 != R) {
  173. r2 = r1->log5;
  174. if(r2 != R && val >= r2->pc) {
  175. r1 = r2;
  176. continue;
  177. }
  178. if(r1->pc == val)
  179. break;
  180. r1 = r1->link;
  181. }
  182. if(r1 == R) {
  183. nearln = p->lineno;
  184. diag(Z, "ref not found\n%P", p);
  185. continue;
  186. }
  187. if(r1 == r) {
  188. nearln = p->lineno;
  189. diag(Z, "ref to self\n%P", p);
  190. continue;
  191. }
  192. r->s2 = r1;
  193. r->p2link = r1->p2;
  194. r1->p2 = r;
  195. }
  196. }
  197. if(debug['R']) {
  198. p = firstr->prog;
  199. print("\n%L %D\n", p->lineno, &p->from);
  200. }
  201. /*
  202. * pass 2.5
  203. * find looping structure
  204. */
  205. for(r = firstr; r != R; r = r->link)
  206. r->active = 0;
  207. change = 0;
  208. loopit(firstr, npc);
  209. /*
  210. * pass 3
  211. * iterate propagating usage
  212. * back until flow graph is complete
  213. */
  214. loop1:
  215. change = 0;
  216. for(r = firstr; r != R; r = r->link)
  217. r->active = 0;
  218. for(r = firstr; r != R; r = r->link)
  219. if(r->prog->as == ARET)
  220. prop(r, zbits, zbits);
  221. loop11:
  222. /* pick up unreachable code */
  223. i = 0;
  224. for(r = firstr; r != R; r = r1) {
  225. r1 = r->link;
  226. if(r1 && r1->active && !r->active) {
  227. prop(r, zbits, zbits);
  228. i = 1;
  229. }
  230. }
  231. if(i)
  232. goto loop11;
  233. if(change)
  234. goto loop1;
  235. /*
  236. * pass 4
  237. * iterate propagating register/variable synchrony
  238. * forward until graph is complete
  239. */
  240. loop2:
  241. change = 0;
  242. for(r = firstr; r != R; r = r->link)
  243. r->active = 0;
  244. synch(firstr, zbits);
  245. if(change)
  246. goto loop2;
  247. addsplits();
  248. if(debug['R'] && debug['v']) {
  249. print("\nprop structure:\n");
  250. for(r = firstr; r != R; r = r->link) {
  251. print("%ld:%P", r->loop, r->prog);
  252. for(z=0; z<BITS; z++)
  253. bit.b[z] = r->set.b[z] |
  254. r->refahead.b[z] | r->calahead.b[z] |
  255. r->refbehind.b[z] | r->calbehind.b[z] |
  256. r->use1.b[z] | r->use2.b[z];
  257. if(bany(&bit)) {
  258. print("\t");
  259. if(bany(&r->use1))
  260. print(" u1=%B", r->use1);
  261. if(bany(&r->use2))
  262. print(" u2=%B", r->use2);
  263. if(bany(&r->set))
  264. print(" st=%B", r->set);
  265. if(bany(&r->refahead))
  266. print(" ra=%B", r->refahead);
  267. if(bany(&r->calahead))
  268. print(" ca=%B", r->calahead);
  269. if(bany(&r->refbehind))
  270. print(" rb=%B", r->refbehind);
  271. if(bany(&r->calbehind))
  272. print(" cb=%B", r->calbehind);
  273. if(bany(&r->regdiff))
  274. print(" rd=%B", r->regdiff);
  275. }
  276. print("\n");
  277. }
  278. }
  279. /*
  280. * pass 5
  281. * isolate regions
  282. * calculate costs (paint1)
  283. */
  284. r = firstr;
  285. if(r) {
  286. for(z=0; z<BITS; z++)
  287. bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
  288. ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
  289. if(bany(&bit)) {
  290. nearln = r->prog->lineno;
  291. warn(Z, "used and not set: %B", bit);
  292. if(debug['R'] && !debug['w'])
  293. print("used and not set: %B\n", bit);
  294. }
  295. }
  296. for(r = firstr; r != R; r = r->link)
  297. r->act = zbits;
  298. rgp = region;
  299. nregion = 0;
  300. for(r = firstr; r != R; r = r->link) {
  301. for(z=0; z<BITS; z++)
  302. bit.b[z] = r->set.b[z] &
  303. ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
  304. if(bany(&bit)) {
  305. nearln = r->prog->lineno;
  306. warn(Z, "set and not used: %B", bit);
  307. if(debug['R'])
  308. print("set and not used: %B\n", bit);
  309. excise(r);
  310. }
  311. for(z=0; z<BITS; z++)
  312. bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
  313. while(bany(&bit)) {
  314. i = bnum(bit);
  315. rgp->enter = r;
  316. rgp->varno = i;
  317. change = 0;
  318. if(debug['R'] && debug['v'])
  319. print("\n");
  320. paint1(r, i);
  321. bit.b[i/32] &= ~(1L<<(i%32));
  322. if(change <= 0) {
  323. if(debug['R'])
  324. print("%L $%d: %B\n",
  325. r->prog->lineno, change, blsh(i));
  326. continue;
  327. }
  328. rgp->cost = change;
  329. nregion++;
  330. if(nregion >= NRGN) {
  331. warn(Z, "too many regions");
  332. goto brk;
  333. }
  334. rgp++;
  335. }
  336. }
  337. brk:
  338. qsort(region, nregion, sizeof(region[0]), rcmp);
  339. /*
  340. * pass 6
  341. * determine used registers (paint2)
  342. * replace code (paint3)
  343. */
  344. rgp = region;
  345. for(i=0; i<nregion; i++) {
  346. bit = blsh(rgp->varno);
  347. vreg = paint2(rgp->enter, rgp->varno);
  348. vreg = allreg(vreg, rgp);
  349. if(debug['R']) {
  350. if(rgp->regno >= NREG)
  351. print("%L $%d F%d: %B\n",
  352. rgp->enter->prog->lineno,
  353. rgp->cost,
  354. rgp->regno-NREG,
  355. bit);
  356. else
  357. print("%L $%d R%d: %B\n",
  358. rgp->enter->prog->lineno,
  359. rgp->cost,
  360. rgp->regno,
  361. bit);
  362. }
  363. if(rgp->regno != 0)
  364. paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
  365. rgp++;
  366. }
  367. /*
  368. * pass 7
  369. * peep-hole on basic block
  370. */
  371. if(!debug['R'] || debug['P'])
  372. peep();
  373. /*
  374. * pass 8
  375. * recalculate pc
  376. */
  377. val = initpc;
  378. for(r = firstr; r != R; r = r1) {
  379. r->pc = val;
  380. p = r->prog;
  381. p1 = P;
  382. r1 = r->link;
  383. if(r1 != R)
  384. p1 = r1->prog;
  385. for(; p != p1; p = p->link) {
  386. switch(p->as) {
  387. default:
  388. val++;
  389. break;
  390. case ANOP:
  391. case ADATA:
  392. case AGLOBL:
  393. case ANAME:
  394. case ASIGNAME:
  395. break;
  396. }
  397. }
  398. }
  399. pc = val;
  400. /*
  401. * fix up branches
  402. */
  403. if(debug['R'])
  404. if(bany(&addrs))
  405. print("addrs: %B\n", addrs);
  406. r1 = 0; /* set */
  407. for(r = firstr; r != R; r = r->link) {
  408. p = r->prog;
  409. if(p->to.type == D_BRANCH)
  410. p->to.offset = r->s2->pc;
  411. r1 = r;
  412. }
  413. /*
  414. * last pass
  415. * eliminate nops
  416. * free aux structures
  417. */
  418. for(p = firstr->prog; p != P; p = p->link){
  419. while(p->link && p->link->as == ANOP)
  420. p->link = p->link->link;
  421. }
  422. if(r1 != R) {
  423. r1->link = freer;
  424. freer = firstr;
  425. }
  426. }
  427. void
  428. addsplits(void)
  429. {
  430. Reg *r, *r1;
  431. int z, i;
  432. Bits bit;
  433. for(r = firstr; r != R; r = r->link) {
  434. if(r->loop > 1)
  435. continue;
  436. if(r->prog->as == AJAL)
  437. continue;
  438. for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
  439. if(r1->loop <= 1)
  440. continue;
  441. for(z=0; z<BITS; z++)
  442. bit.b[z] = r1->calbehind.b[z] &
  443. (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
  444. ~(r->calahead.b[z] & addrs.b[z]);
  445. while(bany(&bit)) {
  446. i = bnum(bit);
  447. bit.b[i/32] &= ~(1L << (i%32));
  448. }
  449. }
  450. }
  451. }
  452. /*
  453. * add mov b,rn
  454. * just after r
  455. */
  456. void
  457. addmove(Reg *r, int bn, int rn, int f)
  458. {
  459. Prog *p, *p1;
  460. Adr *a;
  461. Var *v;
  462. p1 = alloc(sizeof(*p1));
  463. *p1 = zprog;
  464. p = r->prog;
  465. p1->link = p->link;
  466. p->link = p1;
  467. p1->lineno = p->lineno;
  468. v = var + bn;
  469. a = &p1->to;
  470. a->sym = v->sym;
  471. a->name = v->name;
  472. a->offset = v->offset;
  473. a->etype = v->etype;
  474. a->type = D_OREG;
  475. if(a->etype == TARRAY || a->sym == S)
  476. a->type = D_CONST;
  477. p1->as = AMOVW;
  478. if(v->etype == TCHAR || v->etype == TUCHAR)
  479. p1->as = AMOVB;
  480. if(v->etype == TSHORT || v->etype == TUSHORT)
  481. p1->as = AMOVH;
  482. if(v->etype == TFLOAT)
  483. p1->as = AMOVF;
  484. if(v->etype == TDOUBLE)
  485. p1->as = AMOVD;
  486. p1->from.type = D_REG;
  487. p1->from.reg = rn;
  488. if(rn >= NREG) {
  489. p1->from.type = D_FREG;
  490. p1->from.reg = rn-NREG;
  491. }
  492. if(!f) {
  493. p1->from = *a;
  494. *a = zprog.from;
  495. a->type = D_REG;
  496. a->reg = rn;
  497. if(rn >= NREG) {
  498. a->type = D_FREG;
  499. a->reg = rn-NREG;
  500. }
  501. if(v->etype == TUCHAR)
  502. p1->as = AMOVBU;
  503. if(v->etype == TUSHORT)
  504. p1->as = AMOVHU;
  505. }
  506. if(debug['R'])
  507. print("%P\t.a%P\n", p, p1);
  508. }
  509. Bits
  510. mkvar(Adr *a, int docon)
  511. {
  512. Var *v;
  513. int i, t, n, et, z;
  514. int32_t o;
  515. Bits bit;
  516. Sym *s;
  517. t = a->type;
  518. if(t == D_REG && a->reg != NREG)
  519. regbits |= RtoB(a->reg);
  520. if(t == D_FREG && a->reg != NREG)
  521. regbits |= FtoB(a->reg);
  522. s = a->sym;
  523. o = a->offset;
  524. et = a->etype;
  525. if(s == S) {
  526. if(t != D_CONST || !docon || a->reg != NREG)
  527. goto none;
  528. et = TLONG;
  529. }
  530. if(t == D_CONST) {
  531. if(s == S && sval(o))
  532. goto none;
  533. }
  534. n = a->name;
  535. v = var;
  536. for(i=0; i<nvar; i++) {
  537. if(s == v->sym)
  538. if(n == v->name)
  539. if(o == v->offset)
  540. goto out;
  541. v++;
  542. }
  543. if(s)
  544. if(s->name[0] == '.')
  545. goto none;
  546. if(nvar >= NVAR) {
  547. if(debug['w'] > 1 && s)
  548. warn(Z, "variable not optimized: %s", s->name);
  549. goto none;
  550. }
  551. i = nvar;
  552. nvar++;
  553. v = &var[i];
  554. v->sym = s;
  555. v->offset = o;
  556. v->etype = et;
  557. v->name = n;
  558. if(debug['R'])
  559. print("bit=%2d et=%2d %D\n", i, et, a);
  560. out:
  561. bit = blsh(i);
  562. if(n == D_EXTERN || n == D_STATIC)
  563. for(z=0; z<BITS; z++)
  564. externs.b[z] |= bit.b[z];
  565. if(n == D_PARAM)
  566. for(z=0; z<BITS; z++)
  567. params.b[z] |= bit.b[z];
  568. if(v->etype != et || !typechlpfd[et]) /* funny punning */
  569. for(z=0; z<BITS; z++)
  570. addrs.b[z] |= bit.b[z];
  571. if(t == D_CONST) {
  572. if(s == S) {
  573. for(z=0; z<BITS; z++)
  574. consts.b[z] |= bit.b[z];
  575. return bit;
  576. }
  577. if(et != TARRAY)
  578. for(z=0; z<BITS; z++)
  579. addrs.b[z] |= bit.b[z];
  580. for(z=0; z<BITS; z++)
  581. params.b[z] |= bit.b[z];
  582. return bit;
  583. }
  584. if(t == D_OREG)
  585. return bit;
  586. none:
  587. return zbits;
  588. }
  589. void
  590. prop(Reg *r, Bits ref, Bits cal)
  591. {
  592. Reg *r1, *r2;
  593. int z;
  594. for(r1 = r; r1 != R; r1 = r1->p1) {
  595. for(z=0; z<BITS; z++) {
  596. ref.b[z] |= r1->refahead.b[z];
  597. if(ref.b[z] != r1->refahead.b[z]) {
  598. r1->refahead.b[z] = ref.b[z];
  599. change++;
  600. }
  601. cal.b[z] |= r1->calahead.b[z];
  602. if(cal.b[z] != r1->calahead.b[z]) {
  603. r1->calahead.b[z] = cal.b[z];
  604. change++;
  605. }
  606. }
  607. switch(r1->prog->as) {
  608. case AJAL:
  609. for(z=0; z<BITS; z++) {
  610. cal.b[z] |= ref.b[z] | externs.b[z];
  611. ref.b[z] = 0;
  612. }
  613. break;
  614. case ATEXT:
  615. for(z=0; z<BITS; z++) {
  616. cal.b[z] = 0;
  617. ref.b[z] = 0;
  618. }
  619. break;
  620. case ARET:
  621. for(z=0; z<BITS; z++) {
  622. cal.b[z] = externs.b[z];
  623. ref.b[z] = 0;
  624. }
  625. }
  626. for(z=0; z<BITS; z++) {
  627. ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
  628. r1->use1.b[z] | r1->use2.b[z];
  629. cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
  630. r1->refbehind.b[z] = ref.b[z];
  631. r1->calbehind.b[z] = cal.b[z];
  632. }
  633. if(r1->active)
  634. break;
  635. r1->active = 1;
  636. }
  637. for(; r != r1; r = r->p1)
  638. for(r2 = r->p2; r2 != R; r2 = r2->p2link)
  639. prop(r2, r->refbehind, r->calbehind);
  640. }
  641. /*
  642. * find looping structure
  643. *
  644. * 1) find reverse postordering
  645. * 2) find approximate dominators,
  646. * the actual dominators if the flow graph is reducible
  647. * otherwise, dominators plus some other non-dominators.
  648. * See Matthew S. Hecht and Jeffrey D. Ullman,
  649. * "Analysis of a Simple Algorithm for Global Data Flow Problems",
  650. * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
  651. * Oct. 1-3, 1973, pp. 207-217.
  652. * 3) find all nodes with a predecessor dominated by the current node.
  653. * such a node is a loop head.
  654. * recursively, all preds with a greater rpo number are in the loop
  655. */
  656. int32_t
  657. postorder(Reg *r, Reg **rpo2r, int32_t n)
  658. {
  659. Reg *r1;
  660. r->rpo = 1;
  661. r1 = r->s1;
  662. if(r1 && !r1->rpo)
  663. n = postorder(r1, rpo2r, n);
  664. r1 = r->s2;
  665. if(r1 && !r1->rpo)
  666. n = postorder(r1, rpo2r, n);
  667. rpo2r[n] = r;
  668. n++;
  669. return n;
  670. }
  671. int32_t
  672. rpolca(int32_t *idom, int32_t rpo1, int32_t rpo2)
  673. {
  674. int32_t t;
  675. if(rpo1 == -1)
  676. return rpo2;
  677. while(rpo1 != rpo2){
  678. if(rpo1 > rpo2){
  679. t = rpo2;
  680. rpo2 = rpo1;
  681. rpo1 = t;
  682. }
  683. while(rpo1 < rpo2){
  684. t = idom[rpo2];
  685. if(t >= rpo2)
  686. fatal(Z, "bad idom");
  687. rpo2 = t;
  688. }
  689. }
  690. return rpo1;
  691. }
  692. int
  693. doms(int32_t *idom, int32_t r, int32_t s)
  694. {
  695. while(s > r)
  696. s = idom[s];
  697. return s == r;
  698. }
  699. int
  700. loophead(int32_t *idom, Reg *r)
  701. {
  702. int32_t src;
  703. src = r->rpo;
  704. if(r->p1 != R && doms(idom, src, r->p1->rpo))
  705. return 1;
  706. for(r = r->p2; r != R; r = r->p2link)
  707. if(doms(idom, src, r->rpo))
  708. return 1;
  709. return 0;
  710. }
  711. void
  712. loopmark(Reg **rpo2r, int32_t head, Reg *r)
  713. {
  714. if(r->rpo < head || r->active == head)
  715. return;
  716. r->active = head;
  717. r->loop += LOOP;
  718. if(r->p1 != R)
  719. loopmark(rpo2r, head, r->p1);
  720. for(r = r->p2; r != R; r = r->p2link)
  721. loopmark(rpo2r, head, r);
  722. }
  723. void
  724. loopit(Reg *r, int32_t nr)
  725. {
  726. Reg *r1;
  727. int32_t i, d, me;
  728. if(nr > maxnr) {
  729. rpo2r = alloc(nr * sizeof(Reg*));
  730. idom = alloc(nr * sizeof(int32_t));
  731. maxnr = nr;
  732. }
  733. d = postorder(r, rpo2r, 0);
  734. if(d > nr)
  735. fatal(Z, "too many reg nodes");
  736. nr = d;
  737. for(i = 0; i < nr / 2; i++){
  738. r1 = rpo2r[i];
  739. rpo2r[i] = rpo2r[nr - 1 - i];
  740. rpo2r[nr - 1 - i] = r1;
  741. }
  742. for(i = 0; i < nr; i++)
  743. rpo2r[i]->rpo = i;
  744. idom[0] = 0;
  745. for(i = 0; i < nr; i++){
  746. r1 = rpo2r[i];
  747. me = r1->rpo;
  748. d = -1;
  749. if(r1->p1 != R && r1->p1->rpo < me)
  750. d = r1->p1->rpo;
  751. for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
  752. if(r1->rpo < me)
  753. d = rpolca(idom, d, r1->rpo);
  754. idom[i] = d;
  755. }
  756. for(i = 0; i < nr; i++){
  757. r1 = rpo2r[i];
  758. r1->loop++;
  759. if(r1->p2 != R && loophead(idom, r1))
  760. loopmark(rpo2r, i, r1);
  761. }
  762. }
  763. void
  764. synch(Reg *r, Bits dif)
  765. {
  766. Reg *r1;
  767. int z;
  768. for(r1 = r; r1 != R; r1 = r1->s1) {
  769. for(z=0; z<BITS; z++) {
  770. dif.b[z] = (dif.b[z] &
  771. ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
  772. r1->set.b[z] | r1->regdiff.b[z];
  773. if(dif.b[z] != r1->regdiff.b[z]) {
  774. r1->regdiff.b[z] = dif.b[z];
  775. change++;
  776. }
  777. }
  778. if(r1->active)
  779. break;
  780. r1->active = 1;
  781. for(z=0; z<BITS; z++)
  782. dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
  783. if(r1->s2 != R)
  784. synch(r1->s2, dif);
  785. }
  786. }
  787. uint32_t
  788. allreg(uint32_t b, Rgn *r)
  789. {
  790. Var *v;
  791. int i;
  792. v = var + r->varno;
  793. r->regno = 0;
  794. switch(v->etype) {
  795. default:
  796. diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
  797. break;
  798. case TCHAR:
  799. case TUCHAR:
  800. case TSHORT:
  801. case TUSHORT:
  802. case TINT:
  803. case TUINT:
  804. case TLONG:
  805. case TULONG:
  806. case TIND:
  807. case TARRAY:
  808. i = BtoR(~b);
  809. if(i && r->cost >= 0) {
  810. r->regno = i;
  811. return RtoB(i);
  812. }
  813. break;
  814. case TDOUBLE:
  815. case TFLOAT:
  816. i = BtoF(~b);
  817. if(i && r->cost >= 0) {
  818. r->regno = i+NREG;
  819. return FtoB(i);
  820. }
  821. break;
  822. }
  823. return 0;
  824. }
  825. void
  826. paint1(Reg *r, int bn)
  827. {
  828. Reg *r1;
  829. Prog *p;
  830. int z;
  831. uint32_t bb;
  832. z = bn/32;
  833. bb = 1L<<(bn%32);
  834. if(r->act.b[z] & bb)
  835. return;
  836. for(;;) {
  837. if(!(r->refbehind.b[z] & bb))
  838. break;
  839. r1 = r->p1;
  840. if(r1 == R)
  841. break;
  842. if(!(r1->refahead.b[z] & bb))
  843. break;
  844. if(r1->act.b[z] & bb)
  845. break;
  846. r = r1;
  847. }
  848. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
  849. change -= CLOAD * r->loop;
  850. if(debug['R'] && debug['v'])
  851. print("%ld%P\tld %B $%d\n", r->loop,
  852. r->prog, blsh(bn), change);
  853. }
  854. for(;;) {
  855. r->act.b[z] |= bb;
  856. p = r->prog;
  857. if(r->use1.b[z] & bb) {
  858. change += CREF * r->loop;
  859. if(debug['R'] && debug['v'])
  860. print("%ld%P\tu1 %B $%d\n", r->loop,
  861. p, blsh(bn), change);
  862. }
  863. if((r->use2.b[z]|r->set.b[z]) & bb) {
  864. change += CREF * r->loop;
  865. if(debug['R'] && debug['v'])
  866. print("%ld%P\tu2 %B $%d\n", r->loop,
  867. p, blsh(bn), change);
  868. }
  869. if(STORE(r) & r->regdiff.b[z] & bb) {
  870. change -= CLOAD * r->loop;
  871. if(debug['R'] && debug['v'])
  872. print("%ld%P\tst %B $%d\n", r->loop,
  873. p, blsh(bn), change);
  874. }
  875. if(r->refbehind.b[z] & bb)
  876. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  877. if(r1->refahead.b[z] & bb)
  878. paint1(r1, bn);
  879. if(!(r->refahead.b[z] & bb))
  880. break;
  881. r1 = r->s2;
  882. if(r1 != R)
  883. if(r1->refbehind.b[z] & bb)
  884. paint1(r1, bn);
  885. r = r->s1;
  886. if(r == R)
  887. break;
  888. if(r->act.b[z] & bb)
  889. break;
  890. if(!(r->refbehind.b[z] & bb))
  891. break;
  892. }
  893. }
  894. uint32_t
  895. paint2(Reg *r, int bn)
  896. {
  897. Reg *r1;
  898. int z;
  899. uint32_t bb, vreg;
  900. z = bn/32;
  901. bb = 1L << (bn%32);
  902. vreg = regbits;
  903. if(!(r->act.b[z] & bb))
  904. return vreg;
  905. for(;;) {
  906. if(!(r->refbehind.b[z] & bb))
  907. break;
  908. r1 = r->p1;
  909. if(r1 == R)
  910. break;
  911. if(!(r1->refahead.b[z] & bb))
  912. break;
  913. if(!(r1->act.b[z] & bb))
  914. break;
  915. r = r1;
  916. }
  917. for(;;) {
  918. r->act.b[z] &= ~bb;
  919. vreg |= r->regu;
  920. if(r->refbehind.b[z] & bb)
  921. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  922. if(r1->refahead.b[z] & bb)
  923. vreg |= paint2(r1, bn);
  924. if(!(r->refahead.b[z] & bb))
  925. break;
  926. r1 = r->s2;
  927. if(r1 != R)
  928. if(r1->refbehind.b[z] & bb)
  929. vreg |= paint2(r1, bn);
  930. r = r->s1;
  931. if(r == R)
  932. break;
  933. if(!(r->act.b[z] & bb))
  934. break;
  935. if(!(r->refbehind.b[z] & bb))
  936. break;
  937. }
  938. return vreg;
  939. }
  940. void
  941. paint3(Reg *r, int bn, int32_t rb, int rn)
  942. {
  943. Reg *r1;
  944. Prog *p;
  945. int z;
  946. uint32_t bb;
  947. z = bn/32;
  948. bb = 1L << (bn%32);
  949. if(r->act.b[z] & bb)
  950. return;
  951. for(;;) {
  952. if(!(r->refbehind.b[z] & bb))
  953. break;
  954. r1 = r->p1;
  955. if(r1 == R)
  956. break;
  957. if(!(r1->refahead.b[z] & bb))
  958. break;
  959. if(r1->act.b[z] & bb)
  960. break;
  961. r = r1;
  962. }
  963. if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
  964. addmove(r, bn, rn, 0);
  965. for(;;) {
  966. r->act.b[z] |= bb;
  967. p = r->prog;
  968. if(r->use1.b[z] & bb) {
  969. if(debug['R'])
  970. print("%P", p);
  971. addreg(&p->from, rn);
  972. if(debug['R'])
  973. print("\t.c%P\n", p);
  974. }
  975. if((r->use2.b[z]|r->set.b[z]) & bb) {
  976. if(debug['R'])
  977. print("%P", p);
  978. addreg(&p->to, rn);
  979. if(debug['R'])
  980. print("\t.c%P\n", p);
  981. }
  982. if(STORE(r) & r->regdiff.b[z] & bb)
  983. addmove(r, bn, rn, 1);
  984. r->regu |= rb;
  985. if(r->refbehind.b[z] & bb)
  986. for(r1 = r->p2; r1 != R; r1 = r1->p2link)
  987. if(r1->refahead.b[z] & bb)
  988. paint3(r1, bn, rb, rn);
  989. if(!(r->refahead.b[z] & bb))
  990. break;
  991. r1 = r->s2;
  992. if(r1 != R)
  993. if(r1->refbehind.b[z] & bb)
  994. paint3(r1, bn, rb, rn);
  995. r = r->s1;
  996. if(r == R)
  997. break;
  998. if(r->act.b[z] & bb)
  999. break;
  1000. if(!(r->refbehind.b[z] & bb))
  1001. break;
  1002. }
  1003. }
  1004. void
  1005. addreg(Adr *a, int rn)
  1006. {
  1007. a->sym = 0;
  1008. a->name = D_NONE;
  1009. a->type = D_REG;
  1010. a->reg = rn;
  1011. if(rn >= NREG) {
  1012. a->type = D_FREG;
  1013. a->reg = rn-NREG;
  1014. }
  1015. }
  1016. /*
  1017. * bit reg
  1018. * 0 R3
  1019. * 1 R4
  1020. * ... ...
  1021. * 19 R22
  1022. * 20 R23
  1023. */
  1024. int32_t
  1025. RtoB(int r)
  1026. {
  1027. if(r < 3 || r > 23)
  1028. return 0;
  1029. return 1L << (r-3);
  1030. }
  1031. BtoR(long b)
  1032. {
  1033. b &= 0x001fffffL;
  1034. if(b == 0)
  1035. return 0;
  1036. return bitno(b) + 3;
  1037. }
  1038. /*
  1039. * bit reg
  1040. * 22 F4
  1041. * 23 F6
  1042. * ... ...
  1043. * 31 F22
  1044. */
  1045. int32_t
  1046. FtoB(int f)
  1047. {
  1048. if(f < 4 || f > 22 || (f&1))
  1049. return 0;
  1050. return 1L << (f/2 + 20);
  1051. }
  1052. int
  1053. BtoF(int32_t b)
  1054. {
  1055. b &= 0xffc00000L;
  1056. if(b == 0)
  1057. return 0;
  1058. return bitno(b)*2 - 40;
  1059. }