sched.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. #include "l.h"
  2. enum
  3. {
  4. E_HILO = 1<<0,
  5. E_FCR = 1<<1,
  6. E_MCR = 1<<2,
  7. E_MEM = 1<<3,
  8. E_MEMSP = 1<<4, /* uses offset and size */
  9. E_MEMSB = 1<<5, /* uses offset and size */
  10. ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
  11. DELAY = BRANCH|LOAD|FCMP,
  12. };
  13. typedef struct Sch Sch;
  14. typedef struct Dep Dep;
  15. struct Dep
  16. {
  17. ulong ireg;
  18. ulong freg;
  19. ulong cc;
  20. };
  21. struct Sch
  22. {
  23. Prog p;
  24. Dep set;
  25. Dep used;
  26. long soffset;
  27. char size;
  28. char nop;
  29. char comp;
  30. };
  31. void regsused(Sch*, Prog*);
  32. int depend(Sch*, Sch*);
  33. int conflict(Sch*, Sch*);
  34. int offoverlap(Sch*, Sch*);
  35. void dumpbits(Sch*, Dep*);
  36. void
  37. sched(Prog *p0, Prog *pe)
  38. {
  39. Prog *p, *q;
  40. Sch sch[NSCHED], *s, *t, *u, *se, stmp;
  41. /*
  42. * build side structure
  43. */
  44. s = sch;
  45. for(p=p0;; p=p->link) {
  46. memset(s, 0, sizeof(*s));
  47. s->p = *p;
  48. regsused(s, p);
  49. if(debug['X']) {
  50. Bprint(&bso, "%P\t\tset", &s->p);
  51. dumpbits(s, &s->set);
  52. Bprint(&bso, "; used");
  53. dumpbits(s, &s->used);
  54. if(s->comp)
  55. Bprint(&bso, "; compound");
  56. if(s->p.mark & LOAD)
  57. Bprint(&bso, "; load");
  58. if(s->p.mark & BRANCH)
  59. Bprint(&bso, "; branch");
  60. if(s->p.mark & FCMP)
  61. Bprint(&bso, "; fcmp");
  62. Bprint(&bso, "\n");
  63. }
  64. if(p == pe)
  65. break;
  66. s++;
  67. }
  68. se = s;
  69. /*
  70. * prepass to move things around
  71. * does nothing, but tries to make
  72. * the actual scheduler work better
  73. */
  74. for(s=sch; s<=se; s++) {
  75. if(!(s->p.mark & LOAD))
  76. continue;
  77. /* always good to put nonconflict loads together */
  78. for(t=s+1; t<=se; t++) {
  79. if(!(t->p.mark & LOAD))
  80. continue;
  81. if(t->p.mark & BRANCH)
  82. break;
  83. if(conflict(s, t))
  84. break;
  85. for(u=t-1; u>s; u--)
  86. if(depend(u, t))
  87. goto no11;
  88. u = s+1;
  89. stmp = *t;
  90. memmove(s+2, u, (uchar*)t - (uchar*)u);
  91. *u = stmp;
  92. break;
  93. }
  94. no11:
  95. /* put schedule fodder above load */
  96. for(t=s+1; t<=se; t++) {
  97. if(t->p.mark & BRANCH)
  98. break;
  99. if(s > sch && conflict(s-1, t))
  100. continue;
  101. for(u=t-1; u>=s; u--)
  102. if(depend(t, u))
  103. goto no1;
  104. stmp = *t;
  105. memmove(s+1, s, (uchar*)t - (uchar*)s);
  106. *s = stmp;
  107. if(!(s->p.mark & LOAD))
  108. break;
  109. no1:;
  110. }
  111. }
  112. for(s=se; s>=sch; s--) {
  113. if(!(s->p.mark & DELAY))
  114. continue;
  115. if(s < se)
  116. if(!conflict(s, s+1))
  117. goto out3;
  118. /*
  119. * s is load, s+1 is immediate use of result or end of block
  120. * t is the trial instruction to insert between s and s+1
  121. */
  122. if(!debug['Y'])
  123. for(t=s-1; t>=sch; t--) {
  124. if(t->comp)
  125. if(s->p.mark & BRANCH)
  126. goto no2;
  127. if(t->p.mark & DELAY)
  128. if(s >= se || conflict(t, s+1))
  129. goto no2;
  130. for(u=t+1; u<=s; u++)
  131. if(depend(u, t))
  132. goto no2;
  133. goto out2;
  134. no2:;
  135. }
  136. if(debug['X'])
  137. Bprint(&bso, "?l%P\n", &s->p);
  138. s->nop = 1;
  139. if(debug['v']) {
  140. if(s->p.mark & LOAD) {
  141. nop.load.count++;
  142. nop.load.outof++;
  143. }
  144. if(s->p.mark & BRANCH) {
  145. nop.branch.count++;
  146. nop.branch.outof++;
  147. }
  148. if(s->p.mark & FCMP) {
  149. nop.fcmp.count++;
  150. nop.fcmp.outof++;
  151. }
  152. }
  153. continue;
  154. out2:
  155. if(debug['X']) {
  156. Bprint(&bso, "!l%P\n", &t->p);
  157. Bprint(&bso, "%P\n", &s->p);
  158. }
  159. stmp = *t;
  160. memmove(t, t+1, (uchar*)s - (uchar*)t);
  161. *s = stmp;
  162. s--;
  163. out3:
  164. if(debug['v']) {
  165. if(s->p.mark & LOAD)
  166. nop.load.outof++;
  167. if(s->p.mark & BRANCH)
  168. nop.branch.outof++;
  169. if(s->p.mark & FCMP)
  170. nop.fcmp.outof++;
  171. }
  172. }
  173. /* Avoid HI/LO use->set */
  174. t = sch+1;
  175. for(s=sch; s<se-1; s++, t++) {
  176. if((s->used.cc & E_HILO) == 0)
  177. continue;
  178. if(t->set.cc & E_HILO)
  179. s->nop = 2;
  180. }
  181. /*
  182. * put it all back
  183. */
  184. for(s=sch, p=p0; s<=se; s++, p=q) {
  185. q = p->link;
  186. if(q != s->p.link) {
  187. *p = s->p;
  188. p->link = q;
  189. }
  190. while(s->nop--)
  191. addnop(p);
  192. }
  193. if(debug['X']) {
  194. Bprint(&bso, "\n");
  195. Bflush(&bso);
  196. }
  197. }
  198. void
  199. regsused(Sch *s, Prog *realp)
  200. {
  201. int c, ar, ad, ld, sz;
  202. ulong m;
  203. Prog *p;
  204. p = &s->p;
  205. s->comp = compound(p);
  206. s->nop = 0;
  207. if(s->comp) {
  208. s->set.ireg |= 1<<REGTMP;
  209. s->used.ireg |= 1<<REGTMP;
  210. }
  211. ar = 0; /* dest is really reference */
  212. ad = 0; /* source/dest is really address */
  213. ld = 0; /* opcode is load instruction */
  214. sz = 20; /* size of load/store for overlap computation */
  215. /*
  216. * flags based on opcode
  217. */
  218. switch(p->as) {
  219. case ATEXT:
  220. curtext = realp;
  221. autosize = p->to.offset + 4;
  222. ad = 1;
  223. break;
  224. case AJAL:
  225. c = p->reg;
  226. if(c == NREG)
  227. c = REGLINK;
  228. s->set.ireg |= 1<<c;
  229. ar = 1;
  230. ad = 1;
  231. break;
  232. case ABGEZAL:
  233. case ABLTZAL:
  234. s->set.ireg |= 1<<REGLINK;
  235. case ABEQ:
  236. case ABGEZ:
  237. case ABGTZ:
  238. case ABLEZ:
  239. case ABLTZ:
  240. case ABNE:
  241. ar = 1;
  242. ad = 1;
  243. break;
  244. case ABFPT:
  245. case ABFPF:
  246. ad = 1;
  247. s->used.cc |= E_FCR;
  248. break;
  249. case ACMPEQD:
  250. case ACMPEQF:
  251. case ACMPGED:
  252. case ACMPGEF:
  253. case ACMPGTD:
  254. case ACMPGTF:
  255. ar = 1;
  256. s->set.cc |= E_FCR;
  257. p->mark |= FCMP;
  258. break;
  259. case AJMP:
  260. ar = 1;
  261. ad = 1;
  262. break;
  263. case AMOVB:
  264. case AMOVBU:
  265. sz = 1;
  266. ld = 1;
  267. break;
  268. case AMOVH:
  269. case AMOVHU:
  270. sz = 2;
  271. ld = 1;
  272. break;
  273. case AMOVF:
  274. case AMOVW:
  275. case AMOVWL:
  276. case AMOVWR:
  277. sz = 4;
  278. ld = 1;
  279. break;
  280. case AMOVD:
  281. case AMOVV:
  282. case AMOVVL:
  283. case AMOVVR:
  284. sz = 8;
  285. ld = 1;
  286. break;
  287. case ADIV:
  288. case ADIVU:
  289. case AMUL:
  290. case AMULU:
  291. case AREM:
  292. case AREMU:
  293. s->set.cc = E_HILO;
  294. case AADD:
  295. case AADDU:
  296. case AAND:
  297. case ANOR:
  298. case AOR:
  299. case ASGT:
  300. case ASGTU:
  301. case ASLL:
  302. case ASRA:
  303. case ASRL:
  304. case ASUB:
  305. case ASUBU:
  306. case AXOR:
  307. case AADDD:
  308. case AADDF:
  309. case AADDW:
  310. case ASUBD:
  311. case ASUBF:
  312. case ASUBW:
  313. case AMULF:
  314. case AMULD:
  315. case AMULW:
  316. case ADIVF:
  317. case ADIVD:
  318. case ADIVW:
  319. if(p->reg == NREG) {
  320. if(p->to.type == D_REG || p->to.type == D_FREG)
  321. p->reg = p->to.reg;
  322. if(p->reg == NREG)
  323. print("botch %P\n", p);
  324. }
  325. break;
  326. }
  327. /*
  328. * flags based on 'to' field
  329. */
  330. c = p->to.class;
  331. if(c == 0) {
  332. c = aclass(&p->to) + 1;
  333. p->to.class = c;
  334. }
  335. c--;
  336. switch(c) {
  337. default:
  338. print("unknown class %d %D\n", c, &p->to);
  339. case C_ZCON:
  340. case C_SCON:
  341. case C_ADD0CON:
  342. case C_AND0CON:
  343. case C_ADDCON:
  344. case C_ANDCON:
  345. case C_UCON:
  346. case C_LCON:
  347. case C_NONE:
  348. case C_SBRA:
  349. case C_LBRA:
  350. break;
  351. case C_HI:
  352. case C_LO:
  353. s->set.cc |= E_HILO;
  354. break;
  355. case C_FCREG:
  356. s->set.cc |= E_FCR;
  357. break;
  358. case C_MREG:
  359. s->set.cc |= E_MCR;
  360. break;
  361. case C_ZOREG:
  362. case C_SOREG:
  363. case C_LOREG:
  364. c = p->to.reg;
  365. s->used.ireg |= 1<<c;
  366. if(ad)
  367. break;
  368. s->size = sz;
  369. s->soffset = regoff(&p->to);
  370. m = ANYMEM;
  371. if(c == REGSB)
  372. m = E_MEMSB;
  373. if(c == REGSP)
  374. m = E_MEMSP;
  375. if(ar)
  376. s->used.cc |= m;
  377. else
  378. s->set.cc |= m;
  379. break;
  380. case C_SACON:
  381. case C_LACON:
  382. s->used.ireg |= 1<<REGSP;
  383. break;
  384. case C_SECON:
  385. case C_LECON:
  386. s->used.ireg |= 1<<REGSB;
  387. break;
  388. case C_REG:
  389. if(ar)
  390. s->used.ireg |= 1<<p->to.reg;
  391. else
  392. s->set.ireg |= 1<<p->to.reg;
  393. break;
  394. case C_FREG:
  395. /* do better -- determine double prec */
  396. if(ar) {
  397. s->used.freg |= 1<<p->to.reg;
  398. s->used.freg |= 1<<(p->to.reg|1);
  399. } else {
  400. s->set.freg |= 1<<p->to.reg;
  401. s->set.freg |= 1<<(p->to.reg|1);
  402. }
  403. if(ld && p->from.type == D_REG)
  404. p->mark |= LOAD;
  405. break;
  406. case C_SAUTO:
  407. case C_LAUTO:
  408. s->used.ireg |= 1<<REGSP;
  409. if(ad)
  410. break;
  411. s->size = sz;
  412. s->soffset = regoff(&p->to);
  413. if(ar)
  414. s->used.cc |= E_MEMSP;
  415. else
  416. s->set.cc |= E_MEMSP;
  417. break;
  418. case C_SEXT:
  419. case C_LEXT:
  420. s->used.ireg |= 1<<REGSB;
  421. if(ad)
  422. break;
  423. s->size = sz;
  424. s->soffset = regoff(&p->to);
  425. if(ar)
  426. s->used.cc |= E_MEMSB;
  427. else
  428. s->set.cc |= E_MEMSB;
  429. break;
  430. }
  431. /*
  432. * flags based on 'from' field
  433. */
  434. c = p->from.class;
  435. if(c == 0) {
  436. c = aclass(&p->from) + 1;
  437. p->from.class = c;
  438. }
  439. c--;
  440. switch(c) {
  441. default:
  442. print("unknown class %d %D\n", c, &p->from);
  443. case C_ZCON:
  444. case C_SCON:
  445. case C_ADD0CON:
  446. case C_AND0CON:
  447. case C_ADDCON:
  448. case C_ANDCON:
  449. case C_UCON:
  450. case C_LCON:
  451. case C_NONE:
  452. case C_SBRA:
  453. case C_LBRA:
  454. break;
  455. case C_HI:
  456. case C_LO:
  457. s->used.cc |= E_HILO;
  458. break;
  459. case C_FCREG:
  460. s->used.cc |= E_FCR;
  461. break;
  462. case C_MREG:
  463. s->used.cc |= E_MCR;
  464. break;
  465. case C_ZOREG:
  466. case C_SOREG:
  467. case C_LOREG:
  468. c = p->from.reg;
  469. s->used.ireg |= 1<<c;
  470. if(ld)
  471. p->mark |= LOAD;
  472. s->size = sz;
  473. s->soffset = regoff(&p->from);
  474. m = ANYMEM;
  475. if(c == REGSB)
  476. m = E_MEMSB;
  477. if(c == REGSP)
  478. m = E_MEMSP;
  479. s->used.cc |= m;
  480. break;
  481. case C_SACON:
  482. case C_LACON:
  483. s->used.ireg |= 1<<REGSP;
  484. break;
  485. case C_SECON:
  486. case C_LECON:
  487. s->used.ireg |= 1<<REGSB;
  488. break;
  489. case C_REG:
  490. s->used.ireg |= 1<<p->from.reg;
  491. break;
  492. case C_FREG:
  493. /* do better -- determine double prec */
  494. s->used.freg |= 1<<p->from.reg;
  495. s->used.freg |= 1<<(p->from.reg|1);
  496. if(ld && p->to.type == D_REG)
  497. p->mark |= LOAD;
  498. break;
  499. case C_SAUTO:
  500. case C_LAUTO:
  501. s->used.ireg |= 1<<REGSP;
  502. if(ld)
  503. p->mark |= LOAD;
  504. if(ad)
  505. break;
  506. s->size = sz;
  507. s->soffset = regoff(&p->from);
  508. s->used.cc |= E_MEMSP;
  509. break;
  510. case C_SEXT:
  511. case C_LEXT:
  512. s->used.ireg |= 1<<REGSB;
  513. if(ld)
  514. p->mark |= LOAD;
  515. if(ad)
  516. break;
  517. s->size = sz;
  518. s->soffset = regoff(&p->from);
  519. s->used.cc |= E_MEMSB;
  520. break;
  521. }
  522. c = p->reg;
  523. if(c != NREG) {
  524. if(p->from.type == D_FREG || p->to.type == D_FREG) {
  525. s->used.freg |= 1<<c;
  526. s->used.freg |= 1<<(c|1);
  527. } else
  528. s->used.ireg |= 1<<c;
  529. }
  530. s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */
  531. }
  532. /*
  533. * test to see if 2 instrictions can be
  534. * interchanged without changing semantics
  535. */
  536. int
  537. depend(Sch *sa, Sch *sb)
  538. {
  539. ulong x;
  540. if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
  541. return 1;
  542. if(sb->set.ireg & sa->used.ireg)
  543. return 1;
  544. if(sa->set.freg & (sb->set.freg|sb->used.freg))
  545. return 1;
  546. if(sb->set.freg & sa->used.freg)
  547. return 1;
  548. /*
  549. * special case.
  550. * loads from same address cannot pass.
  551. * this is for hardware fifo's and the like
  552. */
  553. if(sa->used.cc & sb->used.cc & E_MEM)
  554. if(sa->p.reg == sb->p.reg)
  555. if(regoff(&sa->p.from) == regoff(&sb->p.from))
  556. return 1;
  557. x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
  558. (sb->set.cc & sa->used.cc);
  559. if(x) {
  560. /*
  561. * allow SB and SP to pass each other.
  562. * allow SB to pass SB iff doffsets are ok
  563. * anything else conflicts
  564. */
  565. if(x != E_MEMSP && x != E_MEMSB)
  566. return 1;
  567. x = sa->set.cc | sb->set.cc |
  568. sa->used.cc | sb->used.cc;
  569. if(x & E_MEM)
  570. return 1;
  571. if(offoverlap(sa, sb))
  572. return 1;
  573. }
  574. return 0;
  575. }
  576. int
  577. offoverlap(Sch *sa, Sch *sb)
  578. {
  579. if(sa->soffset < sb->soffset) {
  580. if(sa->soffset+sa->size > sb->soffset)
  581. return 1;
  582. return 0;
  583. }
  584. if(sb->soffset+sb->size > sa->soffset)
  585. return 1;
  586. return 0;
  587. }
  588. /*
  589. * test 2 adjacent instructions
  590. * and find out if inserted instructions
  591. * are desired to prevent stalls.
  592. */
  593. int
  594. conflict(Sch *sa, Sch *sb)
  595. {
  596. if(sa->set.ireg & sb->used.ireg)
  597. return 1;
  598. if(sa->set.freg & sb->used.freg)
  599. return 1;
  600. if(sa->set.cc & sb->used.cc)
  601. return 1;
  602. return 0;
  603. }
  604. int
  605. compound(Prog *p)
  606. {
  607. Optab *o;
  608. o = oplook(p);
  609. if(o->size != 4)
  610. return 1;
  611. if(p->to.type == D_REG && p->to.reg == REGSB)
  612. return 1;
  613. return 0;
  614. }
  615. void
  616. dumpbits(Sch *s, Dep *d)
  617. {
  618. int i;
  619. for(i=0; i<32; i++)
  620. if(d->ireg & (1<<i))
  621. Bprint(&bso, " R%d", i);
  622. for(i=0; i<32; i++)
  623. if(d->freg & (1<<i))
  624. Bprint(&bso, " F%d", i);
  625. for(i=0; i<32; i++)
  626. switch(d->cc & (1<<i)) {
  627. default:
  628. break;
  629. case E_HILO:
  630. Bprint(&bso, " HILO");
  631. break;
  632. case E_FCR:
  633. Bprint(&bso, " FCR");
  634. break;
  635. case E_MCR:
  636. Bprint(&bso, " MCR");
  637. break;
  638. case E_MEM:
  639. Bprint(&bso, " MEM%d", s->size);
  640. break;
  641. case E_MEMSB:
  642. Bprint(&bso, " SB%d", s->size);
  643. break;
  644. case E_MEMSP:
  645. Bprint(&bso, " SP%d", s->size);
  646. break;
  647. }
  648. }