sched.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. #include "l.h"
  2. enum
  3. {
  4. E_MEM = 1<<3,
  5. E_MEMSP = 1<<4, /* uses offset and size */
  6. E_MEMSB = 1<<5, /* uses offset and size */
  7. ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
  8. DELAY = BRANCH|LOAD|FCMP,
  9. NRWORD = 8,
  10. isdouble = 0,
  11. };
  12. typedef struct Sch Sch;
  13. typedef struct Dep Dep;
  14. struct Dep
  15. {
  16. ulong ireg[NRWORD];
  17. ulong cc;
  18. };
  19. struct Sch
  20. {
  21. Prog p;
  22. Dep set;
  23. Dep used;
  24. long offset;
  25. char size;
  26. char nop;
  27. char comp;
  28. };
  29. void bitset(ulong*, int);
  30. int bittest(ulong*, int);
  31. void regused(Sch*, Prog*);
  32. int depend(Sch*, Sch*);
  33. int conflict(Sch*, Sch*);
  34. int offoverlap(Sch*, Sch*);
  35. void dumpbits(Sch*, Dep*);
  36. void
  37. sched(Prog *p0, Prog *pe)
  38. {
  39. Prog *p, *q;
  40. Sch sch[NSCHED], *s, *t, *u, *se, stmp;
  41. /*
  42. * build side structure
  43. */
  44. s = sch;
  45. for(p=p0;; p=p->link) {
  46. memset(s, 0, sizeof(*s));
  47. s->p = *p;
  48. regused(s, p);
  49. if(debug['X']) {
  50. Bprint(&bso, "%P\t\tmark %x set", &s->p, s->p.mark);
  51. dumpbits(s, &s->set);
  52. Bprint(&bso, "; used");
  53. dumpbits(s, &s->used);
  54. if(s->comp)
  55. Bprint(&bso, "; compound");
  56. if(s->p.mark & LOAD)
  57. Bprint(&bso, "; load");
  58. if(s->p.mark & BRANCH)
  59. Bprint(&bso, "; branch");
  60. if(s->p.mark & FCMP)
  61. Bprint(&bso, "; fcmp");
  62. Bprint(&bso, "\n");
  63. }
  64. if(p == pe)
  65. break;
  66. s++;
  67. }
  68. se = s;
  69. /*
  70. * prepass to move things around
  71. * does nothing, but tries to make
  72. * the actual scheduler work better
  73. */
  74. if(!debug['Y'])
  75. for(s=sch; s<=se; s++) {
  76. if(!(s->p.mark & LOAD))
  77. continue;
  78. /* always good to put nonconflict loads together */
  79. for(t=s+1; t<=se; t++) {
  80. if(!(t->p.mark & LOAD))
  81. continue;
  82. if(t->p.mark & BRANCH)
  83. break;
  84. if(conflict(s, t))
  85. break;
  86. for(u=t-1; u>s; u--)
  87. if(depend(u, t))
  88. goto no11;
  89. u = s+1;
  90. stmp = *t;
  91. memmove(s+2, u, (uchar*)t - (uchar*)u);
  92. *u = stmp;
  93. break;
  94. }
  95. no11:
  96. /* put schedule fodder above load */
  97. for(t=s+1; t<=se; t++) {
  98. if(t->p.mark & BRANCH)
  99. break;
  100. if(s > sch && conflict(s-1, t))
  101. continue;
  102. for(u=t-1; u>=s; u--)
  103. if(depend(t, u))
  104. goto no1;
  105. stmp = *t;
  106. memmove(s+1, s, (uchar*)t - (uchar*)s);
  107. *s = stmp;
  108. if(!(s->p.mark & LOAD))
  109. break;
  110. no1:;
  111. }
  112. }
  113. for(s=se; s>=sch; s--) {
  114. if(!(s->p.mark & DELAY))
  115. continue;
  116. if(s < se)
  117. if(!conflict(s, s+1))
  118. goto out3;
  119. /*
  120. * s is load, s+1 is immediate use of result or end of block
  121. * t is the trial instruction to insert between s and s+1
  122. */
  123. if(!debug['Y'])
  124. for(t=s-1; t>=sch; t--) {
  125. if(t->comp)
  126. if(s->p.mark & BRANCH)
  127. goto no2;
  128. if(t->p.mark & DELAY)
  129. if(s >= se || conflict(t, s+1))
  130. goto no2;
  131. for(u=t+1; u<=s; u++)
  132. if(depend(u, t))
  133. goto no2;
  134. goto out2;
  135. no2:;
  136. }
  137. if(debug['X'])
  138. Bprint(&bso, "?l%P\n", &s->p);
  139. if(s->p.mark & BRANCH)
  140. s->nop = 1;
  141. if(debug['v']) {
  142. if(s->p.mark & LOAD) {
  143. nop.load.count++;
  144. nop.load.outof++;
  145. }
  146. if(s->p.mark & BRANCH) {
  147. nop.branch.count++;
  148. nop.branch.outof++;
  149. }
  150. if(s->p.mark & FCMP) {
  151. nop.fcmp.count++;
  152. nop.fcmp.outof++;
  153. }
  154. }
  155. continue;
  156. out2:
  157. if(debug['X']) {
  158. Bprint(&bso, "!l%P\n", &t->p);
  159. Bprint(&bso, "%P\n", &s->p);
  160. }
  161. stmp = *t;
  162. memmove(t, t+1, (uchar*)s - (uchar*)t);
  163. *s = stmp;
  164. s--;
  165. out3:
  166. if(debug['v']) {
  167. if(s->p.mark & LOAD)
  168. nop.load.outof++;
  169. if(s->p.mark & BRANCH)
  170. nop.branch.outof++;
  171. if(s->p.mark & FCMP)
  172. nop.fcmp.outof++;
  173. }
  174. }
  175. /*
  176. * put it all back
  177. */
  178. for(s=sch, p=p0; s<=se; s++, p=q) {
  179. q = p->link;
  180. if(q != s->p.link) {
  181. *p = s->p;
  182. p->link = q;
  183. }
  184. while(s->nop--)
  185. addnop(p);
  186. }
  187. if(debug['X']) {
  188. Bprint(&bso, "\n");
  189. Bflush(&bso);
  190. }
  191. }
  192. void
  193. regused(Sch *s, Prog *realp)
  194. {
  195. int c, ar, ad, ld, sz;
  196. ulong m;
  197. Prog *p;
  198. p = &s->p;
  199. s->comp = compound(p);
  200. s->nop = 0;
  201. if(s->comp) {
  202. bitset(s->set.ireg, REGTMP);
  203. bitset(s->used.ireg, REGTMP);
  204. }
  205. ar = 0; /* dest is really reference */
  206. ad = 0; /* source/dest is really address */
  207. ld = 0; /* opcode is load instruction */
  208. sz = 20; /* size of load/store for overlap computation */
  209. /*
  210. * flags based on opcode
  211. */
  212. switch(p->as) {
  213. case ATEXT:
  214. curtext = realp;
  215. autosize = p->to.offset + 4;
  216. ad = 1;
  217. break;
  218. case ACALL:
  219. c = p->reg;
  220. if(c == NREG)
  221. c = REGLINK;
  222. bitset(s->set.ireg, c);
  223. ar = 1;
  224. ad = 1;
  225. break;
  226. case AJMP:
  227. ar = 1;
  228. ad = 1;
  229. break;
  230. case AMOVB:
  231. case AMOVBU:
  232. sz = 1;
  233. ld = 1;
  234. break;
  235. case AMOVH:
  236. case AMOVHU:
  237. sz = 2;
  238. ld = 1;
  239. break;
  240. case AMOVF:
  241. case AMOVL:
  242. sz = 4;
  243. ld = 1;
  244. break;
  245. case ADIVL:
  246. case ADIVUL:
  247. case AMULL:
  248. case AMULUL:
  249. case AMULML:
  250. case AMULMUL:
  251. case AMSTEPL:
  252. case AMSTEPUL:
  253. case AMSTEPLL:
  254. case ADSTEPL:
  255. case ADSTEP0L:
  256. case ADSTEPLL:
  257. case ADSTEPRL:
  258. bitset(s->set.ireg, REGQ);
  259. bitset(s->used.ireg, REGQ);
  260. break;
  261. case AMOVD:
  262. sz = 8;
  263. ld = 1;
  264. if(p->from.type == D_REG && p->to.type == D_REG)
  265. break;
  266. case ALOADM:
  267. case ASTOREM:
  268. bitset(s->set.ireg, REGC);
  269. bitset(s->used.ireg, REGC);
  270. break;
  271. }
  272. /*
  273. * flags based on 'to' field
  274. */
  275. c = p->to.class;
  276. if(c == 0) {
  277. c = aclass(&p->to) + 1;
  278. p->to.class = c;
  279. }
  280. c--;
  281. switch(c) {
  282. default:
  283. print("unknown class %d %D\n", c, &p->to);
  284. case C_ZCON:
  285. case C_SCON:
  286. case C_MCON:
  287. case C_PCON:
  288. case C_NCON:
  289. case C_LCON:
  290. case C_NONE:
  291. case C_SBRA:
  292. case C_LBRA:
  293. break;
  294. case C_ZOREG:
  295. case C_SOREG:
  296. case C_MOREG:
  297. case C_POREG:
  298. case C_NOREG:
  299. case C_LOREG:
  300. c = p->to.reg;
  301. bitset(s->used.ireg, c);
  302. if(ad)
  303. break;
  304. s->size = sz;
  305. s->offset = regoff(&p->to);
  306. m = ANYMEM;
  307. if(c == REGSP)
  308. m = E_MEMSP;
  309. if(ar)
  310. s->used.cc |= m;
  311. else
  312. s->set.cc |= m;
  313. break;
  314. case C_ZACON:
  315. case C_SACON:
  316. case C_MACON:
  317. case C_PACON:
  318. case C_NACON:
  319. case C_LACON:
  320. bitset(s->used.ireg, REGSP);
  321. break;
  322. case C_REG:
  323. if(ar)
  324. bitset(s->used.ireg, p->to.reg);
  325. else
  326. bitset(s->set.ireg, p->to.reg);
  327. break;
  328. case C_ZAUTO:
  329. case C_SAUTO:
  330. case C_MAUTO:
  331. case C_PAUTO:
  332. case C_NAUTO:
  333. case C_LAUTO:
  334. bitset(s->used.ireg, REGSP);
  335. if(ad)
  336. break;
  337. s->size = sz;
  338. s->offset = regoff(&p->to);
  339. if(ar)
  340. s->used.cc |= E_MEMSP;
  341. else
  342. s->set.cc |= E_MEMSP;
  343. break;
  344. case C_SEXT:
  345. case C_LEXT:
  346. if(ad)
  347. break;
  348. s->size = sz;
  349. s->offset = regoff(&p->to);
  350. if(ar)
  351. s->used.cc |= E_MEMSB;
  352. else
  353. s->set.cc |= E_MEMSB;
  354. break;
  355. }
  356. /*
  357. * flags based on 'from' field
  358. */
  359. c = p->from.class;
  360. if(c == 0) {
  361. c = aclass(&p->from) + 1;
  362. p->from.class = c;
  363. }
  364. c--;
  365. switch(c) {
  366. default:
  367. print("unknown class %d %D\n", c, &p->from);
  368. case C_ZCON:
  369. case C_SCON:
  370. case C_MCON:
  371. case C_PCON:
  372. case C_NCON:
  373. case C_LCON:
  374. case C_NONE:
  375. case C_SBRA:
  376. case C_LBRA:
  377. break;
  378. case C_ZOREG:
  379. case C_SOREG:
  380. case C_MOREG:
  381. case C_POREG:
  382. case C_NOREG:
  383. case C_LOREG:
  384. c = p->from.reg;
  385. bitset(s->used.ireg, c);
  386. if(ld)
  387. p->mark |= LOAD;
  388. s->size = sz;
  389. s->offset = regoff(&p->from);
  390. m = ANYMEM;
  391. if(c == REGSP)
  392. m = E_MEMSP;
  393. s->used.cc |= m;
  394. break;
  395. case C_ZACON:
  396. case C_SACON:
  397. case C_MACON:
  398. case C_PACON:
  399. case C_NACON:
  400. case C_LACON:
  401. bitset(s->used.ireg, REGSP);
  402. break;
  403. case C_REG:
  404. bitset(s->used.ireg, p->from.reg);
  405. if(isdouble)
  406. bitset(s->used.ireg, p->from.reg+1);
  407. break;
  408. case C_ZAUTO:
  409. case C_SAUTO:
  410. case C_MAUTO:
  411. case C_PAUTO:
  412. case C_NAUTO:
  413. case C_LAUTO:
  414. bitset(s->used.ireg, REGSP);
  415. if(ld)
  416. p->mark |= LOAD;
  417. if(ad)
  418. break;
  419. s->size = sz;
  420. s->offset = regoff(&p->from);
  421. s->used.cc |= E_MEMSP;
  422. break;
  423. case C_SEXT:
  424. case C_LEXT:
  425. if(ld)
  426. p->mark |= LOAD;
  427. if(ad)
  428. break;
  429. s->size = sz;
  430. s->offset = regoff(&p->from);
  431. s->used.cc |= E_MEMSB;
  432. break;
  433. }
  434. c = p->reg;
  435. if(c != NREG) {
  436. if(isdouble) {
  437. bitset(s->used.ireg, c);
  438. bitset(s->used.ireg, c+1);
  439. } else
  440. bitset(s->used.ireg, c);
  441. }
  442. }
  443. /*
  444. * test to see if 2 instrictions can be
  445. * interchanged without changing semantics
  446. */
  447. int
  448. depend(Sch *sa, Sch *sb)
  449. {
  450. ulong x;
  451. int i;
  452. for(i=0; i<NRWORD; i++) {
  453. if(sa->set.ireg[i] & (sb->set.ireg[i]|sb->used.ireg[i]))
  454. return 1;
  455. if(sb->set.ireg[i] & sa->used.ireg[i])
  456. return 1;
  457. }
  458. /*
  459. * special case.
  460. * loads from same address cannot pass.
  461. * this is for hardware fifo's and the like
  462. */
  463. if(sa->used.cc & sb->used.cc & E_MEM)
  464. if(sa->p.reg == sb->p.reg)
  465. if(regoff(&sa->p.from) == regoff(&sb->p.from))
  466. return 1;
  467. x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
  468. (sb->set.cc & sa->used.cc);
  469. if(x) {
  470. /*
  471. * allow SB and SP to pass each other.
  472. * allow SB to pass SB iff doffsets are ok
  473. * anything else conflicts
  474. */
  475. if(x != E_MEMSP && x != E_MEMSB)
  476. return 1;
  477. x = sa->set.cc | sb->set.cc |
  478. sa->used.cc | sb->used.cc;
  479. if(x & E_MEM)
  480. return 1;
  481. if(offoverlap(sa, sb))
  482. return 1;
  483. }
  484. return 0;
  485. }
  486. int
  487. offoverlap(Sch *sa, Sch *sb)
  488. {
  489. if(sa->offset < sb->offset) {
  490. if(sa->offset+sa->size > sb->offset)
  491. return 1;
  492. return 0;
  493. }
  494. if(sb->offset+sb->size > sa->offset)
  495. return 1;
  496. return 0;
  497. }
  498. /*
  499. * test 2 adjacent instructions
  500. * and find out if inserted instructions
  501. * are desired to prevent stalls.
  502. */
  503. int
  504. conflict(Sch *sa, Sch *sb)
  505. {
  506. int i;
  507. for(i=0; i<NRWORD; i++)
  508. if(sa->set.ireg[i] & sb->used.ireg[i])
  509. return 1;
  510. if(sa->set.cc & sb->used.cc)
  511. return 1;
  512. return 0;
  513. }
  514. int
  515. compound(Prog *p)
  516. {
  517. Optab *o;
  518. o = oplook(p);
  519. if(o->size != 4)
  520. return 1;
  521. return 0;
  522. }
  523. void
  524. dumpbits(Sch *s, Dep *d)
  525. {
  526. int i;
  527. for(i=0; i<NRWORD*32; i++)
  528. if(bittest(d->ireg, i))
  529. Bprint(&bso, " R%d", i);
  530. for(i=0; i<32; i++)
  531. switch(d->cc & (1<<i)) {
  532. default:
  533. Bprint(&bso, " CC%d", i);
  534. case 0:
  535. break;
  536. case E_MEM:
  537. Bprint(&bso, " MEM.%d", s->size);
  538. break;
  539. case E_MEMSB:
  540. Bprint(&bso, " SB.%ld.%d", s->offset, s->size);
  541. break;
  542. case E_MEMSP:
  543. Bprint(&bso, " SP.%ld.%d", s->offset, s->size);
  544. break;
  545. }
  546. }
  547. void
  548. bitset(ulong *b, int r)
  549. {
  550. b[r>>5] |= 1<<(r&31);
  551. }
  552. int
  553. bittest(ulong *b, int r)
  554. {
  555. if(b[r>>5] & 1<<(r&31))
  556. return 1;
  557. return 0;
  558. }