noop.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. #include "l.h"
  2. /*
  3. * flag: insert nops to prevent three consecutive stores.
  4. * workaround for 24k erratum #48, costs about 10% in text space,
  5. * so only enable this if you need it. test cases are "hoc -e '7^6'"
  6. * and "{ echo moon; echo plot } | scat".
  7. */
  8. enum {
  9. Mips24k = 0,
  10. };
  11. static int
  12. isdblwrdmov(Prog *p)
  13. {
  14. if(p == nil)
  15. return 0;
  16. switch(p->as){
  17. case AMOVD:
  18. case AMOVDF:
  19. case AMOVDW:
  20. case AMOVFD:
  21. case AMOVWD:
  22. case AMOVV:
  23. case AMOVVL:
  24. case AMOVVR:
  25. case AMOVFV:
  26. case AMOVDV:
  27. case AMOVVF:
  28. case AMOVVD:
  29. return 1;
  30. }
  31. return 0;
  32. }
  33. static int
  34. ismove(Prog *p)
  35. {
  36. if(p == nil)
  37. return 0;
  38. switch(p->as){
  39. case AMOVB:
  40. case AMOVBU:
  41. case AMOVF:
  42. case AMOVFW:
  43. case AMOVH:
  44. case AMOVHU:
  45. case AMOVW:
  46. case AMOVWF:
  47. case AMOVWL:
  48. case AMOVWR:
  49. case AMOVWU:
  50. return 1;
  51. }
  52. if(isdblwrdmov(p))
  53. return 1;
  54. return 0;
  55. }
  56. static int
  57. isstore(Prog *p)
  58. {
  59. if(p == nil)
  60. return 0;
  61. if(ismove(p))
  62. switch(p->to.type) {
  63. case D_OREG:
  64. case D_EXTERN:
  65. case D_STATIC:
  66. case D_AUTO:
  67. case D_PARAM:
  68. return 1;
  69. }
  70. return 0;
  71. }
  72. static int
  73. iscondbranch(Prog *p)
  74. {
  75. if(p == nil)
  76. return 0;
  77. switch(p->as){
  78. case ABEQ:
  79. case ABFPF:
  80. case ABFPT:
  81. case ABGEZ:
  82. case ABGEZAL:
  83. case ABGTZ:
  84. case ABLEZ:
  85. case ABLTZ:
  86. case ABLTZAL:
  87. case ABNE:
  88. return 1;
  89. }
  90. return 0;
  91. }
  92. static int
  93. isbranch(Prog *p)
  94. {
  95. if(p == nil)
  96. return 0;
  97. switch(p->as){
  98. case AJAL:
  99. case AJMP:
  100. case ARET:
  101. case ARFE:
  102. return 1;
  103. }
  104. if(iscondbranch(p))
  105. return 1;
  106. return 0;
  107. }
  108. static void
  109. nopafter(Prog *p)
  110. {
  111. p->mark |= LABEL|SYNC;
  112. addnop(p);
  113. }
  114. /*
  115. * workaround for 24k erratum #48, costs about 0.5% in space.
  116. * inserts a NOP before the last of 3 consecutive stores.
  117. * double-word stores complicate things.
  118. */
  119. static int
  120. no3stores(Prog *p)
  121. {
  122. Prog *p1;
  123. if(!isstore(p))
  124. return 0;
  125. p1 = p->link;
  126. if(!isstore(p1))
  127. return 0;
  128. if(isdblwrdmov(p) || isdblwrdmov(p1)) {
  129. nopafter(p);
  130. nop.store.count++;
  131. nop.store.outof++;
  132. return 1;
  133. }
  134. if(isstore(p1->link)) {
  135. nopafter(p1);
  136. nop.store.count++;
  137. nop.store.outof++;
  138. return 1;
  139. }
  140. return 0;
  141. }
  142. /*
  143. * keep stores out of branch delay slots.
  144. * this is costly in space (the other 9.5%), but makes no3stores effective.
  145. * there is undoubtedly a better way to do this.
  146. */
  147. void
  148. storesnosched(void)
  149. {
  150. Prog *p;
  151. for(p = firstp; p != P; p = p->link)
  152. if(isstore(p))
  153. p->mark |= NOSCHED;
  154. }
  155. int
  156. triplestorenops(void)
  157. {
  158. int r;
  159. Prog *p, *p1;
  160. r = 0;
  161. for(p = firstp; p != P; p = p1) {
  162. p1 = p->link;
  163. // if (p->mark & NOSCHED)
  164. // continue;
  165. if(ismove(p) && isstore(p)) {
  166. if (no3stores(p))
  167. r++;
  168. /*
  169. * given storenosched, the next two
  170. * checks shouldn't be necessary.
  171. */
  172. /*
  173. * add nop after first MOV in `MOV; Bcond; MOV'.
  174. */
  175. else if(isbranch(p1) && isstore(p1->link)) {
  176. nopafter(p);
  177. nop.branch.count++;
  178. nop.branch.outof++;
  179. r++;
  180. }
  181. /*
  182. * this may be a branch target, so insert a nop after,
  183. * in case a branch leading here has a store in its
  184. * delay slot and we have consecutive stores here.
  185. */
  186. if(p->mark & (LABEL|SYNC) && !isnop(p1)) {
  187. nopafter(p);
  188. nop.branch.count++;
  189. nop.branch.outof++;
  190. r++;
  191. }
  192. } else if (isbranch(p))
  193. /*
  194. * can't ignore delay slot of a conditional branch;
  195. * the branch could fail and fall through.
  196. */
  197. if (!iscondbranch(p) && p1)
  198. p1 = p1->link; /* skip its delay slot */
  199. }
  200. return r;
  201. }
  202. void
  203. noops(void)
  204. {
  205. Prog *p, *p1, *q, *q1;
  206. int o, curframe, curbecome, maxbecome;
  207. /*
  208. * find leaf subroutines
  209. * become sizes
  210. * frame sizes
  211. * strip NOPs
  212. * expand RET
  213. * expand BECOME pseudo
  214. */
  215. if(debug['v'])
  216. Bprint(&bso, "%5.2f noops\n", cputime());
  217. Bflush(&bso);
  218. curframe = 0;
  219. curbecome = 0;
  220. maxbecome = 0;
  221. curtext = 0;
  222. q = P;
  223. for(p = firstp; p != P; p = p->link) {
  224. /* find out how much arg space is used in this TEXT */
  225. if(p->to.type == D_OREG && p->to.reg == REGSP)
  226. if(p->to.offset > curframe)
  227. curframe = p->to.offset;
  228. switch(p->as) {
  229. case ATEXT:
  230. if(curtext && curtext->from.sym) {
  231. curtext->from.sym->frame = curframe;
  232. curtext->from.sym->become = curbecome;
  233. if(curbecome > maxbecome)
  234. maxbecome = curbecome;
  235. }
  236. curframe = 0;
  237. curbecome = 0;
  238. p->mark |= LABEL|LEAF|SYNC;
  239. if(p->link)
  240. p->link->mark |= LABEL;
  241. curtext = p;
  242. break;
  243. /* too hard, just leave alone */
  244. case AMOVW:
  245. if(p->to.type == D_FCREG ||
  246. p->to.type == D_MREG) {
  247. p->mark |= LABEL|SYNC;
  248. break;
  249. }
  250. if(p->from.type == D_FCREG ||
  251. p->from.type == D_MREG) {
  252. p->mark |= LABEL|SYNC;
  253. addnop(p);
  254. addnop(p);
  255. nop.mfrom.count += 2;
  256. nop.mfrom.outof += 2;
  257. break;
  258. }
  259. break;
  260. /* too hard, just leave alone */
  261. case ACASE:
  262. case ASYSCALL:
  263. case AWORD:
  264. case ATLBWR:
  265. case ATLBWI:
  266. case ATLBP:
  267. case ATLBR:
  268. p->mark |= LABEL|SYNC;
  269. break;
  270. case ANOR:
  271. if(p->to.type == D_REG && p->to.reg == REGZERO)
  272. p->mark |= LABEL|SYNC;
  273. break;
  274. case ARET:
  275. /* special form of RET is BECOME */
  276. if(p->from.type == D_CONST)
  277. if(p->from.offset > curbecome)
  278. curbecome = p->from.offset;
  279. if(p->link != P)
  280. p->link->mark |= LABEL;
  281. break;
  282. case ANOP:
  283. q1 = p->link;
  284. q->link = q1; /* q is non-nop */
  285. q1->mark |= p->mark;
  286. continue;
  287. case ABCASE:
  288. p->mark |= LABEL|SYNC;
  289. goto dstlab;
  290. case ABGEZAL:
  291. case ABLTZAL:
  292. case AJAL:
  293. if(curtext != P)
  294. curtext->mark &= ~LEAF;
  295. case AJMP:
  296. case ABEQ:
  297. case ABGEZ:
  298. case ABGTZ:
  299. case ABLEZ:
  300. case ABLTZ:
  301. case ABNE:
  302. case ABFPT:
  303. case ABFPF:
  304. p->mark |= BRANCH;
  305. dstlab:
  306. q1 = p->cond;
  307. if(q1 != P) {
  308. while(q1->as == ANOP) {
  309. q1 = q1->link;
  310. p->cond = q1;
  311. }
  312. if(!(q1->mark & LEAF))
  313. q1->mark |= LABEL;
  314. } else
  315. p->mark |= LABEL;
  316. q1 = p->link;
  317. if(q1 != P)
  318. q1->mark |= LABEL;
  319. break;
  320. }
  321. q = p;
  322. }
  323. if(curtext && curtext->from.sym) {
  324. curtext->from.sym->frame = curframe;
  325. curtext->from.sym->become = curbecome;
  326. if(curbecome > maxbecome)
  327. maxbecome = curbecome;
  328. }
  329. if(debug['b'])
  330. print("max become = %d\n", maxbecome);
  331. xdefine("ALEFbecome", STEXT, maxbecome);
  332. curtext = 0;
  333. for(p = firstp; p != P; p = p->link) {
  334. switch(p->as) {
  335. case ATEXT:
  336. curtext = p;
  337. break;
  338. case AJAL:
  339. if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
  340. o = maxbecome - curtext->from.sym->frame;
  341. if(o <= 0)
  342. break;
  343. /* calling a become or calling a variable */
  344. if(p->to.sym == S || p->to.sym->become) {
  345. curtext->to.offset += o;
  346. if(debug['b']) {
  347. curp = p;
  348. print("%D calling %D increase %d\n",
  349. &curtext->from, &p->to, o);
  350. }
  351. }
  352. }
  353. break;
  354. }
  355. }
  356. for(p = firstp; p != P; p = p->link) {
  357. o = p->as;
  358. switch(o) {
  359. case ATEXT:
  360. curtext = p;
  361. autosize = p->to.offset + 4;
  362. if(autosize <= 4)
  363. if(curtext->mark & LEAF) {
  364. p->to.offset = -4;
  365. autosize = 0;
  366. }
  367. q = p;
  368. if(autosize) {
  369. q = prg();
  370. q->as = AADD;
  371. q->line = p->line;
  372. q->from.type = D_CONST;
  373. q->from.offset = -autosize;
  374. q->to.type = D_REG;
  375. q->to.reg = REGSP;
  376. q->link = p->link;
  377. p->link = q;
  378. } else
  379. if(!(curtext->mark & LEAF)) {
  380. if(debug['v'])
  381. Bprint(&bso, "save suppressed in: %s\n",
  382. curtext->from.sym->name);
  383. Bflush(&bso);
  384. curtext->mark |= LEAF;
  385. }
  386. if(curtext->mark & LEAF) {
  387. if(curtext->from.sym)
  388. curtext->from.sym->type = SLEAF;
  389. break;
  390. }
  391. q1 = prg();
  392. q1->as = AMOVW;
  393. q1->line = p->line;
  394. q1->from.type = D_REG;
  395. q1->from.reg = REGLINK;
  396. q1->to.type = D_OREG;
  397. q1->from.offset = 0;
  398. q1->to.reg = REGSP;
  399. q1->link = q->link;
  400. q->link = q1;
  401. break;
  402. case ARET:
  403. nocache(p);
  404. if(p->from.type == D_CONST)
  405. goto become;
  406. if(curtext->mark & LEAF) {
  407. if(!autosize) {
  408. p->as = AJMP;
  409. p->from = zprg.from;
  410. p->to.type = D_OREG;
  411. p->to.offset = 0;
  412. p->to.reg = REGLINK;
  413. p->mark |= BRANCH;
  414. break;
  415. }
  416. p->as = AADD;
  417. p->from.type = D_CONST;
  418. p->from.offset = autosize;
  419. p->to.type = D_REG;
  420. p->to.reg = REGSP;
  421. q = prg();
  422. q->as = AJMP;
  423. q->line = p->line;
  424. q->to.type = D_OREG;
  425. q->to.offset = 0;
  426. q->to.reg = REGLINK;
  427. q->mark |= BRANCH;
  428. q->link = p->link;
  429. p->link = q;
  430. break;
  431. }
  432. p->as = AMOVW;
  433. p->from.type = D_OREG;
  434. p->from.offset = 0;
  435. p->from.reg = REGSP;
  436. p->to.type = D_REG;
  437. p->to.reg = 2;
  438. q = p;
  439. if(autosize) {
  440. q = prg();
  441. q->as = AADD;
  442. q->line = p->line;
  443. q->from.type = D_CONST;
  444. q->from.offset = autosize;
  445. q->to.type = D_REG;
  446. q->to.reg = REGSP;
  447. q->link = p->link;
  448. p->link = q;
  449. }
  450. q1 = prg();
  451. q1->as = AJMP;
  452. q1->line = p->line;
  453. q1->to.type = D_OREG;
  454. q1->to.offset = 0;
  455. q1->to.reg = 2;
  456. q1->mark |= BRANCH;
  457. q1->link = q->link;
  458. q->link = q1;
  459. break;
  460. become:
  461. if(curtext->mark & LEAF) {
  462. q = prg();
  463. q->line = p->line;
  464. q->as = AJMP;
  465. q->from = zprg.from;
  466. q->to = p->to;
  467. q->cond = p->cond;
  468. q->link = p->link;
  469. q->mark |= BRANCH;
  470. p->link = q;
  471. p->as = AADD;
  472. p->from = zprg.from;
  473. p->from.type = D_CONST;
  474. p->from.offset = autosize;
  475. p->to = zprg.to;
  476. p->to.type = D_REG;
  477. p->to.reg = REGSP;
  478. break;
  479. }
  480. q = prg();
  481. q->line = p->line;
  482. q->as = AJMP;
  483. q->from = zprg.from;
  484. q->to = p->to;
  485. q->cond = p->cond;
  486. q->link = p->link;
  487. q->mark |= BRANCH;
  488. p->link = q;
  489. q = prg();
  490. q->line = p->line;
  491. q->as = AADD;
  492. q->from.type = D_CONST;
  493. q->from.offset = autosize;
  494. q->to.type = D_REG;
  495. q->to.reg = REGSP;
  496. q->link = p->link;
  497. p->link = q;
  498. p->as = AMOVW;
  499. p->from = zprg.from;
  500. p->from.type = D_OREG;
  501. p->from.offset = 0;
  502. p->from.reg = REGSP;
  503. p->to = zprg.to;
  504. p->to.type = D_REG;
  505. p->to.reg = REGLINK;
  506. break;
  507. }
  508. }
  509. if (Mips24k)
  510. storesnosched();
  511. curtext = P;
  512. q = P; /* p - 1 */
  513. q1 = firstp; /* top of block */
  514. o = 0; /* count of instructions */
  515. for(p = firstp; p != P; p = p1) {
  516. p1 = p->link;
  517. o++;
  518. if(p->mark & NOSCHED){
  519. if(q1 != p){
  520. sched(q1, q);
  521. }
  522. for(; p != P; p = p->link){
  523. if(!(p->mark & NOSCHED))
  524. break;
  525. q = p;
  526. }
  527. p1 = p;
  528. q1 = p;
  529. o = 0;
  530. continue;
  531. }
  532. if(p->mark & (LABEL|SYNC)) {
  533. if(q1 != p)
  534. sched(q1, q);
  535. q1 = p;
  536. o = 1;
  537. }
  538. if(p->mark & (BRANCH|SYNC)) {
  539. sched(q1, p);
  540. q1 = p1;
  541. o = 0;
  542. }
  543. if(o >= NSCHED) {
  544. sched(q1, p);
  545. q1 = p1;
  546. o = 0;
  547. }
  548. q = p;
  549. }
  550. if (Mips24k)
  551. triplestorenops();
  552. }
  553. void
  554. addnop(Prog *p)
  555. {
  556. Prog *q;
  557. q = prg();
  558. q->as = ANOR;
  559. q->line = p->line;
  560. q->from.type = D_REG;
  561. q->from.reg = REGZERO;
  562. q->to.type = D_REG;
  563. q->to.reg = REGZERO;
  564. q->link = p->link;
  565. p->link = q;
  566. }
  567. void
  568. nocache(Prog *p)
  569. {
  570. p->optab = 0;
  571. p->from.class = 0;
  572. p->to.class = 0;
  573. }