sched.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. #include "l.h"
  2. enum
  3. {
  4. E_ICC = 1<<0,
  5. E_FCC = 1<<1,
  6. E_MEM = 1<<2,
  7. E_MEMSP = 1<<3, /* uses offset and size */
  8. E_MEMSB = 1<<4, /* uses offset and size */
  9. ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
  10. ALL = ~0,
  11. };
  12. typedef struct Sch Sch;
  13. typedef struct Dep Dep;
  14. struct Dep
  15. {
  16. ulong ireg;
  17. ulong freg;
  18. ulong cc;
  19. };
  20. struct Sch
  21. {
  22. Prog p;
  23. Dep set;
  24. Dep used;
  25. long offset;
  26. char size;
  27. char nop;
  28. char comp;
  29. };
  30. void regused(Sch*, Prog*);
  31. int depend(Sch*, Sch*);
  32. int conflict(Sch*, Sch*);
  33. int offoverlap(Sch*, Sch*);
  34. void dumpbits(Sch*, Dep*);
  35. void
  36. sched(Prog *p0, Prog *pe)
  37. {
  38. Prog *p, *q;
  39. Sch sch[NSCHED], *s, *t, *u, *se, stmp;
  40. /*
  41. * build side structure
  42. */
  43. s = sch;
  44. for(p=p0;; p=p->link) {
  45. memset(s, 0, sizeof(*s));
  46. s->p = *p;
  47. regused(s, p);
  48. if(debug['X']) {
  49. Bprint(&bso, "%P\tset", &s->p);
  50. dumpbits(s, &s->set);
  51. Bprint(&bso, "; used");
  52. dumpbits(s, &s->used);
  53. if(s->comp)
  54. Bprint(&bso, "; compound");
  55. if(s->p.mark & LOAD)
  56. Bprint(&bso, "; load");
  57. if(s->p.mark & BRANCH)
  58. Bprint(&bso, "; branch");
  59. if(s->p.mark & FCMP)
  60. Bprint(&bso, "; fcmp");
  61. Bprint(&bso, "\n");
  62. }
  63. s++;
  64. if(p == pe)
  65. break;
  66. }
  67. se = s;
  68. for(s=se-1; s>=sch; s--) {
  69. /*
  70. * branch delay slot
  71. */
  72. if(s->p.mark & BRANCH) {
  73. /* t is the trial instruction to use */
  74. for(t=s-1; t>=sch; t--) {
  75. if(t->comp || (t->p.mark & FCMP))
  76. goto no1;
  77. for(u=t+1; u<=s; u++)
  78. if(depend(u, t))
  79. goto no1;
  80. goto out1;
  81. no1:;
  82. }
  83. if(debug['X'])
  84. Bprint(&bso, "?b%P\n", &s->p);
  85. s->nop = 1;
  86. continue;
  87. out1:
  88. if(debug['X']) {
  89. Bprint(&bso, "!b%P\n", &t->p);
  90. Bprint(&bso, "%P\n", &s->p);
  91. }
  92. stmp = *t;
  93. memmove(t, t+1, (uchar*)s - (uchar*)t);
  94. *s = stmp;
  95. s--;
  96. continue;
  97. }
  98. /*
  99. * load delay. interlocked.
  100. */
  101. if(s->p.mark & LOAD) {
  102. if(s >= se-1)
  103. continue;
  104. if(!conflict(s, (s+1)))
  105. continue;
  106. /*
  107. * s is load, s+1 is immediate use of result
  108. * t is the trial instruction to insert between s and s+1
  109. */
  110. for(t=s-1; t>=sch; t--) {
  111. if(t->p.mark & BRANCH)
  112. goto no2;
  113. if(t->p.mark & FCMP)
  114. if((s+1)->p.mark & BRANCH)
  115. goto no2;
  116. if(t->p.mark & LOAD)
  117. if(conflict(t, (s+1)))
  118. goto no2;
  119. for(u=t+1; u<=s; u++)
  120. if(depend(u, t))
  121. goto no2;
  122. goto out2;
  123. no2:;
  124. }
  125. if(debug['X'])
  126. Bprint(&bso, "?l%P\n", &s->p);
  127. continue;
  128. out2:
  129. if(debug['X']) {
  130. Bprint(&bso, "!l%P\n", &t->p);
  131. Bprint(&bso, "%P\n", &s->p);
  132. }
  133. stmp = *t;
  134. memmove(t, t+1, (uchar*)s - (uchar*)t);
  135. *s = stmp;
  136. s--;
  137. continue;
  138. }
  139. /*
  140. * fop2 delay.
  141. */
  142. if(s->p.mark & FCMP) {
  143. if(s >= se-1) {
  144. s->nop = 1;
  145. continue;
  146. }
  147. if(!((s+1)->p.mark & BRANCH))
  148. continue;
  149. /* t is the trial instruction to use */
  150. for(t=s-1; t>=sch; t--) {
  151. for(u=t+1; u<=s; u++)
  152. if(depend(u, t))
  153. goto no3;
  154. goto out3;
  155. no3:;
  156. }
  157. if(debug['X'])
  158. Bprint(&bso, "?f%P\n", &s->p);
  159. s->nop = 1;
  160. continue;
  161. out3:
  162. if(debug['X']) {
  163. Bprint(&bso, "!f%P\n", &t->p);
  164. Bprint(&bso, "%P\n", &s->p);
  165. }
  166. stmp = *t;
  167. memmove(t, t+1, (uchar*)s - (uchar*)t);
  168. *s = stmp;
  169. s--;
  170. continue;
  171. }
  172. }
  173. /*
  174. * put it all back
  175. */
  176. for(s=sch, p=p0; s<se; s++, p=q) {
  177. q = p->link;
  178. if(q != s->p.link) {
  179. *p = s->p;
  180. p->link = q;
  181. }
  182. if(s->nop)
  183. addnop(p);
  184. }
  185. if(debug['X'])
  186. Bprint(&bso, "\n");
  187. }
  188. void
  189. regused(Sch *s, Prog *realp)
  190. {
  191. int c, ar, ad, ld, sz;
  192. ulong m;
  193. Prog *p;
  194. p = &s->p;
  195. s->comp = compound(p);
  196. s->nop = 0;
  197. if(s->comp) {
  198. s->set.ireg |= 1<<REGTMP;
  199. s->used.ireg |= 1<<REGTMP;
  200. }
  201. ar = 0; /* dest is really reference */
  202. ad = 0; /* source/dest is really address */
  203. ld = 0; /* opcode is load instruction */
  204. sz = 20; /* size of load/store for overlap computation */
  205. /*
  206. * flags based on opcode
  207. */
  208. switch(p->as) {
  209. case ATEXT:
  210. curtext = realp;
  211. autosize = p->to.offset + 4;
  212. ad = 1;
  213. break;
  214. case ABL:
  215. c = p->reg;
  216. if(c == NREG)
  217. c = REGLINK;
  218. s->set.ireg |= 1<<c;
  219. ar = 1;
  220. ad = 1;
  221. break;
  222. case ABR:
  223. ar = 1;
  224. ad = 1;
  225. break;
  226. case ACMP:
  227. s->set.cc |= E_ICC;
  228. ar = 1;
  229. break;
  230. case AFCMPD:
  231. case AFCMPED:
  232. case AFCMPEF:
  233. case AFCMPEX:
  234. case AFCMPF:
  235. case AFCMPX:
  236. s->set.cc |= E_FCC;
  237. ar = 1;
  238. break;
  239. case ABE:
  240. case ABNE:
  241. case ABLE:
  242. case ABG:
  243. case ABL:
  244. case ABGE:
  245. case ABLEU:
  246. case ABGU:
  247. case ABCS:
  248. case ABCC:
  249. case ABNEG:
  250. case ABPOS:
  251. case ABVC:
  252. case ABVS:
  253. s->used.cc |= E_ICC;
  254. ar = 1;
  255. break;
  256. case AFBE:
  257. case AFBLG:
  258. case AFBG:
  259. case AFBLE:
  260. case AFBGE:
  261. case AFBL:
  262. s->used.cc |= E_FCC;
  263. ar = 1;
  264. break;
  265. case AMOVB:
  266. case AMOVBU:
  267. sz = 1;
  268. ld = 1;
  269. break;
  270. case AMOVH:
  271. case AMOVHU:
  272. sz = 2;
  273. ld = 1;
  274. break;
  275. case AFMOVF:
  276. case AMOVW:
  277. sz = 4;
  278. ld = 1;
  279. break;
  280. case AMOVD:
  281. case AFMOVD:
  282. sz = 8;
  283. ld = 1;
  284. break;
  285. case AFMOVX: /* gok */
  286. sz = 16;
  287. ld = 1;
  288. break;
  289. case AADDCC:
  290. case AADDXCC:
  291. case AANDCC:
  292. case AANDNCC:
  293. case AORCC:
  294. case AORNCC:
  295. case ASUBCC:
  296. case ASUBXCC:
  297. case ATADDCC:
  298. case ATADDCCTV:
  299. case ATSUBCC:
  300. case ATSUBCCTV:
  301. case AXNORCC:
  302. case AXORCC:
  303. s->set.cc |= E_ICC;
  304. break;
  305. case ADIV:
  306. case ADIVL:
  307. case AMOD:
  308. case AMODL:
  309. case AMUL:
  310. case AMULSCC:
  311. case ATAS:
  312. s->set.ireg = ALL;
  313. s->set.freg = ALL;
  314. s->set.cc = ALL;
  315. break;
  316. }
  317. /*
  318. * flags based on 'to' field
  319. */
  320. c = p->to.class;
  321. if(c == 0) {
  322. c = aclass(&p->to) + 1;
  323. p->to.class = c;
  324. }
  325. c--;
  326. switch(c) {
  327. default:
  328. print("unknown class %d %D\n", c, &p->to);
  329. case C_ZCON:
  330. case C_SCON:
  331. case C_UCON:
  332. case C_LCON:
  333. case C_NONE:
  334. case C_SBRA:
  335. case C_LBRA:
  336. break;
  337. case C_CREG:
  338. case C_FSR:
  339. case C_FQ:
  340. case C_PREG:
  341. s->set.ireg = ALL;
  342. s->set.freg = ALL;
  343. s->set.cc = ALL;
  344. break;
  345. case C_ZOREG:
  346. case C_SOREG:
  347. case C_LOREG:
  348. case C_ASI:
  349. c = p->to.reg;
  350. s->used.ireg |= 1<<c;
  351. if(ad)
  352. break;
  353. s->size = sz;
  354. s->offset = regoff(&p->to);
  355. m = ANYMEM;
  356. if(c == REGSB)
  357. m = E_MEMSB;
  358. if(c == REGSP)
  359. m = E_MEMSP;
  360. if(ar)
  361. s->used.cc |= m;
  362. else
  363. s->set.cc |= m;
  364. break;
  365. case C_SACON:
  366. case C_LACON:
  367. s->used.ireg |= 1<<REGSP;
  368. break;
  369. case C_SECON:
  370. case C_LECON:
  371. s->used.ireg |= 1<<REGSB;
  372. break;
  373. case C_REG:
  374. if(ar)
  375. s->used.ireg |= 1<<p->to.reg;
  376. else
  377. s->set.ireg |= 1<<p->to.reg;
  378. break;
  379. case C_FREG:
  380. /* do better -- determine double prec */
  381. if(ar) {
  382. s->used.freg |= 1<<p->to.reg;
  383. s->used.freg |= 1<<(p->to.reg|1);
  384. } else {
  385. s->set.freg |= 1<<p->to.reg;
  386. s->set.freg |= 1<<(p->to.reg|1);
  387. }
  388. break;
  389. case C_SAUTO:
  390. case C_LAUTO:
  391. case C_ESAUTO:
  392. case C_OSAUTO:
  393. case C_ELAUTO:
  394. case C_OLAUTO:
  395. s->used.ireg |= 1<<REGSP;
  396. if(ad)
  397. break;
  398. s->size = sz;
  399. s->offset = regoff(&p->to);
  400. if(ar)
  401. s->used.cc |= E_MEMSP;
  402. else
  403. s->set.cc |= E_MEMSP;
  404. break;
  405. case C_SEXT:
  406. case C_LEXT:
  407. case C_ESEXT:
  408. case C_OSEXT:
  409. case C_ELEXT:
  410. case C_OLEXT:
  411. s->used.ireg |= 1<<REGSB;
  412. if(ad)
  413. break;
  414. s->size = sz;
  415. s->offset = regoff(&p->to);
  416. if(ar)
  417. s->used.cc |= E_MEMSB;
  418. else
  419. s->set.cc |= E_MEMSB;
  420. break;
  421. }
  422. /*
  423. * flags based on 'from' field
  424. */
  425. c = p->from.class;
  426. if(c == 0) {
  427. c = aclass(&p->from) + 1;
  428. p->from.class = c;
  429. }
  430. c--;
  431. switch(c) {
  432. default:
  433. print("unknown class %d %D\n", c, &p->from);
  434. case C_ZCON:
  435. case C_SCON:
  436. case C_UCON:
  437. case C_LCON:
  438. case C_NONE:
  439. case C_SBRA:
  440. case C_LBRA:
  441. c = p->from.reg;
  442. if(c != NREG)
  443. s->used.ireg |= 1<<c;
  444. break;
  445. case C_CREG:
  446. case C_FSR:
  447. case C_FQ:
  448. case C_PREG:
  449. s->set.ireg = ALL;
  450. s->set.freg = ALL;
  451. s->set.cc = ALL;
  452. break;
  453. case C_ZOREG:
  454. case C_SOREG:
  455. case C_LOREG:
  456. case C_ASI:
  457. c = p->from.reg;
  458. s->used.ireg |= 1<<c;
  459. if(ld)
  460. p->mark |= LOAD;
  461. if(ad)
  462. break;
  463. s->size = sz;
  464. s->offset = regoff(&p->from);
  465. m = ANYMEM;
  466. if(c == REGSB)
  467. m = E_MEMSB;
  468. if(c == REGSP)
  469. m = E_MEMSP;
  470. s->used.cc |= m;
  471. break;
  472. case C_SACON:
  473. case C_LACON:
  474. s->used.ireg |= 1<<REGSP;
  475. break;
  476. case C_SECON:
  477. case C_LECON:
  478. s->used.ireg |= 1<<REGSB;
  479. break;
  480. case C_REG:
  481. s->used.ireg |= 1<<p->from.reg;
  482. break;
  483. case C_FREG:
  484. /* do better -- determine double prec */
  485. s->used.freg |= 1<<p->from.reg;
  486. s->used.freg |= 1<<(p->from.reg|1);
  487. break;
  488. case C_SAUTO:
  489. case C_LAUTO:
  490. case C_ESAUTO:
  491. case C_ELAUTO:
  492. case C_OSAUTO:
  493. case C_OLAUTO:
  494. s->used.ireg |= 1<<REGSP;
  495. if(ld)
  496. p->mark |= LOAD;
  497. if(ad)
  498. break;
  499. s->size = sz;
  500. s->offset = regoff(&p->from);
  501. s->used.cc |= E_MEMSP;
  502. break;
  503. case C_SEXT:
  504. case C_LEXT:
  505. case C_ESEXT:
  506. case C_ELEXT:
  507. case C_OSEXT:
  508. case C_OLEXT:
  509. s->used.ireg |= 1<<REGSB;
  510. if(ld)
  511. p->mark |= LOAD;
  512. if(ad)
  513. break;
  514. s->size = sz;
  515. s->offset = regoff(&p->from);
  516. s->used.cc |= E_MEMSB;
  517. break;
  518. }
  519. c = p->reg;
  520. if(c != NREG) {
  521. if(p->from.type == D_FREG || p->to.type == D_FREG) {
  522. s->used.freg |= 1<<c;
  523. s->used.freg |= 1<<(c|1);
  524. } else
  525. s->used.ireg |= 1<<c;
  526. }
  527. s->set.ireg &= ~(1<<0); /* R0 cant be set */
  528. }
  529. /*
  530. * test to see if 2 instrictions can be
  531. * interchanged without changing semantics
  532. */
  533. int
  534. depend(Sch *sa, Sch *sb)
  535. {
  536. ulong x;
  537. if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
  538. return 1;
  539. if(sb->set.ireg & sa->used.ireg)
  540. return 1;
  541. if(sa->set.freg & (sb->set.freg|sb->used.freg))
  542. return 1;
  543. if(sb->set.freg & sa->used.freg)
  544. return 1;
  545. x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
  546. (sb->set.cc & sa->used.cc);
  547. if(x) {
  548. /*
  549. * allow SB and SP to pass each other.
  550. * allow SB to pass SB iff doffsets are ok
  551. * anything else conflicts
  552. */
  553. if(x != E_MEMSP && x != E_MEMSB)
  554. return 1;
  555. x = sa->set.cc | sb->set.cc |
  556. sa->used.cc | sb->used.cc;
  557. if(x & E_MEM)
  558. return 1;
  559. if(offoverlap(sa, sb))
  560. return 1;
  561. }
  562. return 0;
  563. }
  564. int
  565. offoverlap(Sch *sa, Sch *sb)
  566. {
  567. if(sa->offset < sb->offset) {
  568. if(sa->offset+sa->size > sb->offset)
  569. return 1;
  570. return 0;
  571. }
  572. if(sb->offset+sb->size > sa->offset)
  573. return 1;
  574. return 0;
  575. }
  576. /*
  577. * test 2 adjacent instructions
  578. * and find out if inserted instructions
  579. * are desired to prevent stalls.
  580. * first instruction is a load instruction.
  581. */
  582. int
  583. conflict(Sch *sa, Sch *sb)
  584. {
  585. if(sa->set.ireg & sb->used.ireg)
  586. return 1;
  587. if(sa->set.freg & sb->used.freg)
  588. return 1;
  589. return 0;
  590. }
  591. int
  592. compound(Prog *p)
  593. {
  594. Optab *o;
  595. o = oplook(p);
  596. if(o->size != 4)
  597. return 1;
  598. if(p->to.type == D_REG && p->to.reg == REGSB)
  599. return 1;
  600. return 0;
  601. }
  602. void
  603. dumpbits(Sch *s, Dep *d)
  604. {
  605. int i;
  606. for(i=0; i<32; i++)
  607. if(d->ireg & (1<<i))
  608. Bprint(&bso, " R%d", i);
  609. for(i=0; i<32; i++)
  610. if(d->freg & (1<<i))
  611. Bprint(&bso, " F%d", i);
  612. for(i=0; i<32; i++)
  613. switch(d->cc & (1<<i)) {
  614. default:
  615. break;
  616. case E_ICC:
  617. Bprint(&bso, " ICC");
  618. break;
  619. case E_FCC:
  620. Bprint(&bso, " FCC");
  621. break;
  622. case E_MEM:
  623. Bprint(&bso, " MEM%d", s->size);
  624. break;
  625. case E_MEMSB:
  626. Bprint(&bso, " SB%d", s->size);
  627. break;
  628. case E_MEMSP:
  629. Bprint(&bso, " SP%d", s->size);
  630. break;
  631. }
  632. }