noop.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. #include "l.h"
  2. Prog *divuconst(Prog *, uvlong, int, int, int);
  3. Prog *divconst(Prog *, vlong, int, int, int);
  4. Prog *modconst(Prog *, vlong, int, int, int);
  5. void excise(Prog *);
  6. void
  7. noops(void)
  8. {
  9. Prog *p, *p1, *q, *q1, *q2;
  10. int o, curframe, curbecome, maxbecome, shift;
  11. /*
  12. * find leaf subroutines
  13. * become sizes
  14. * frame sizes
  15. * strip NOPs
  16. * expand RET and other macros
  17. * expand BECOME pseudo
  18. * use conditional moves where appropriate
  19. */
  20. if(debug['v'])
  21. Bprint(&bso, "%5.2f noops\n", cputime());
  22. Bflush(&bso);
  23. curframe = 0;
  24. curbecome = 0;
  25. maxbecome = 0;
  26. curtext = 0;
  27. q = P;
  28. for(p = firstp; p != P; p = p->link) {
  29. /* find out how much arg space is used in this TEXT */
  30. if(p->to.type == D_OREG && p->to.reg == REGSP)
  31. if(p->to.offset > curframe)
  32. curframe = p->to.offset;
  33. switch(p->as) {
  34. case ATEXT:
  35. if(curtext && curtext->from.sym) {
  36. curtext->from.sym->frame = curframe;
  37. curtext->from.sym->become = curbecome;
  38. if(curbecome > maxbecome)
  39. maxbecome = curbecome;
  40. }
  41. curframe = 0;
  42. curbecome = 0;
  43. p->mark |= LABEL|LEAF|SYNC;
  44. if(p->link)
  45. p->link->mark |= LABEL;
  46. curtext = p;
  47. break;
  48. /* don't mess with what we don't understand */
  49. case AWORD:
  50. case ACALL_PAL:
  51. /* etc. */
  52. p->mark |= LABEL;
  53. for(q1=p->link; q1 != P; q1 = q1->link) {
  54. q1->mark |= LABEL;
  55. if(q1->as != AXORNOT) /* used as NOP in PALcode */
  56. break;
  57. }
  58. break;
  59. case ARET:
  60. /* special form of RET is BECOME */
  61. if(p->from.type == D_CONST)
  62. if(p->from.offset > curbecome)
  63. curbecome = p->from.offset;
  64. if(p->link != P)
  65. p->link->mark |= LABEL;
  66. break;
  67. case ANOP:
  68. q1 = p->link;
  69. q->link = q1; /* q is non-nop */
  70. q1->mark |= p->mark;
  71. continue;
  72. case AJSR:
  73. if(curtext != P)
  74. curtext->mark &= ~LEAF;
  75. case ABEQ:
  76. case ABNE:
  77. case ABGE:
  78. case ABGT:
  79. case ABLE:
  80. case ABLT:
  81. case ABLBC:
  82. case ABLBS:
  83. case AFBEQ:
  84. case AFBNE:
  85. case AFBGE:
  86. case AFBGT:
  87. case AFBLE:
  88. case AFBLT:
  89. case AJMP:
  90. p->mark |= BRANCH;
  91. q1 = p->cond;
  92. if(q1 != P) {
  93. while(q1->as == ANOP) {
  94. q1 = q1->link;
  95. p->cond = q1;
  96. }
  97. if(!(q1->mark & LEAF)) {
  98. if (q1->mark & LABEL)
  99. q1->mark |= LABEL2;
  100. else
  101. q1->mark |= LABEL;
  102. }
  103. } else
  104. p->mark |= LABEL;
  105. q1 = p->link;
  106. if(q1 != P) {
  107. if (q1->mark & LABEL)
  108. q1->mark |= LABEL2;
  109. else
  110. q1->mark |= LABEL;
  111. }
  112. else
  113. p->mark |= LABEL; /* ??? */
  114. break;
  115. case ADIVQ:
  116. case ADIVQU:
  117. case AMODQ:
  118. case AMODQU:
  119. case ADIVL:
  120. case ADIVLU:
  121. case AMODL:
  122. case AMODLU:
  123. if(p->from.type == D_CONST /*&& !debug['d']*/)
  124. continue;
  125. if(prog_divq == P)
  126. initdiv();
  127. if(curtext != P)
  128. curtext->mark &= ~LEAF;
  129. break;
  130. }
  131. q = p;
  132. }
  133. if(curtext && curtext->from.sym) {
  134. curtext->from.sym->frame = curframe;
  135. curtext->from.sym->become = curbecome;
  136. if(curbecome > maxbecome)
  137. maxbecome = curbecome;
  138. }
  139. if(debug['b'])
  140. print("max become = %d\n", maxbecome);
  141. xdefine("ALEFbecome", STEXT, maxbecome);
  142. curtext = 0;
  143. for(p = firstp; p != P; p = p->link) {
  144. switch(p->as) {
  145. case ATEXT:
  146. curtext = p;
  147. break;
  148. case AJSR:
  149. if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
  150. o = maxbecome - curtext->from.sym->frame;
  151. if(o <= 0)
  152. break;
  153. /* calling a become or calling a variable */
  154. if(p->to.sym == S || p->to.sym->become) {
  155. curtext->to.offset += o;
  156. if(debug['b']) {
  157. curp = p;
  158. print("%D calling %D increase %d\n",
  159. &curtext->from, &p->to, o);
  160. }
  161. }
  162. }
  163. break;
  164. }
  165. }
  166. for(p = firstp; p != P; p = p->link) {
  167. o = p->as;
  168. switch(o) {
  169. case ATEXT:
  170. curtext = p;
  171. autosize = p->to.offset + 8;
  172. if(autosize <= 8)
  173. if(curtext->mark & LEAF) {
  174. p->to.offset = -8;
  175. autosize = 0;
  176. }
  177. if (autosize & 4)
  178. autosize += 4;
  179. q = p;
  180. if(autosize)
  181. q = genIRR(p, ASUBQ, autosize, NREG, REGSP);
  182. else if(!(curtext->mark & LEAF)) {
  183. if(debug['v'])
  184. Bprint(&bso, "save suppressed in: %s\n",
  185. curtext->from.sym->name);
  186. Bflush(&bso);
  187. curtext->mark |= LEAF;
  188. }
  189. if(curtext->mark & LEAF) {
  190. if(curtext->from.sym)
  191. curtext->from.sym->type = SLEAF;
  192. break;
  193. }
  194. genstore(q, AMOVL, REGLINK, 0LL, REGSP);
  195. break;
  196. case ARET:
  197. nocache(p);
  198. if(p->from.type == D_CONST)
  199. goto become;
  200. if(curtext->mark & LEAF) {
  201. if(!autosize) {
  202. p->as = AJMP;
  203. p->from = zprg.from;
  204. p->to.type = D_OREG;
  205. p->to.offset = 0;
  206. p->to.reg = REGLINK;
  207. break;
  208. }
  209. p->as = AADDQ;
  210. p->from.type = D_CONST;
  211. p->from.offset = autosize;
  212. p->to.type = D_REG;
  213. p->to.reg = REGSP;
  214. q = prg();
  215. q->as = AJMP;
  216. q->line = p->line;
  217. q->to.type = D_OREG;
  218. q->to.offset = 0;
  219. q->to.reg = REGLINK;
  220. q->mark |= BRANCH;
  221. q->link = p->link;
  222. p->link = q;
  223. break;
  224. }
  225. p->as = AMOVL;
  226. p->from.type = D_OREG;
  227. p->from.offset = 0;
  228. p->from.reg = REGSP;
  229. p->to.type = D_REG;
  230. p->to.reg = REGLINK;
  231. q = p;
  232. if(autosize)
  233. q = genIRR(p, AADDQ, autosize, NREG, REGSP);
  234. q1 = prg();
  235. q1->as = AJMP;
  236. q1->line = p->line;
  237. q1->to.type = D_OREG;
  238. q1->to.offset = 0;
  239. q1->to.reg = REGLINK;
  240. q1->mark |= BRANCH;
  241. q1->link = q->link;
  242. q->link = q1;
  243. break;
  244. become:
  245. if(curtext->mark & LEAF) {
  246. q = prg();
  247. q->line = p->line;
  248. q->as = AJMP;
  249. q->from = zprg.from;
  250. q->to = p->to;
  251. q->cond = p->cond;
  252. q->link = p->link;
  253. q->mark |= BRANCH;
  254. p->link = q;
  255. p->as = AADDQ;
  256. p->from = zprg.from;
  257. p->from.type = D_CONST;
  258. p->from.offset = autosize;
  259. p->to = zprg.to;
  260. p->to.type = D_REG;
  261. p->to.reg = REGSP;
  262. break;
  263. }
  264. q = prg();
  265. q->line = p->line;
  266. q->as = AJMP;
  267. q->from = zprg.from;
  268. q->to = p->to;
  269. q->cond = p->cond;
  270. q->link = p->link;
  271. q->mark |= BRANCH;
  272. p->link = q;
  273. q = genIRR(p, AADDQ, autosize, NREG, REGSP);
  274. p->as = AMOVL;
  275. p->from = zprg.from;
  276. p->from.type = D_OREG;
  277. p->from.offset = 0;
  278. p->from.reg = REGSP;
  279. p->to = zprg.to;
  280. p->to.type = D_REG;
  281. p->to.reg = REGLINK;
  282. break;
  283. /* All I wanted was a MOVB... */
  284. case AMOVB:
  285. case AMOVW:
  286. /* rewrite sign extend; could use v3 extension in asmout case 1 */
  287. if (p->to.type == D_REG) {
  288. nocache(p);
  289. shift = (p->as == AMOVB) ? (64-8) : (64-16);
  290. if (p->from.type == D_REG) {
  291. p->as = ASLLQ;
  292. p->reg = p->from.reg;
  293. p->from.type = D_CONST;
  294. p->from.offset = shift;
  295. q = genIRR(p, ASRAQ, shift, p->to.reg, p->to.reg);
  296. break;
  297. }
  298. else {
  299. p->as = (p->as == AMOVB) ? AMOVBU : AMOVWU;
  300. q = genIRR(p, ASLLQ, shift, p->to.reg, p->to.reg);
  301. q = genIRR(q, ASRAQ, shift, p->to.reg, p->to.reg);
  302. }
  303. }
  304. /* fall through... */
  305. case AMOVBU:
  306. case AMOVWU:
  307. if(!debug['x'])
  308. break; /* use BWX extension */
  309. o = p->as;
  310. nocache(p);
  311. if (p->from.type == D_OREG) {
  312. if (p->to.type != D_REG)
  313. break;
  314. p->as = AMOVQU;
  315. q = genXXX(p, AEXTBL, &p->to, REGTMP2, &p->to);
  316. if (o == AMOVW || o == AMOVWU)
  317. q->as = AEXTWL;
  318. p->to.reg = REGTMP2;
  319. if ((p->from.offset & 7) != 0 || aclass(&p->from) != C_SOREG) {
  320. q1 = genXXX(p, AMOVA, &p->from, NREG, &q->to);
  321. q1->from.offset &= 7;
  322. q->from = q->to;
  323. }
  324. else
  325. q->from.reg = p->from.reg;
  326. if (o == AMOVB || o == AMOVW)
  327. genXXX(q, o, &q->to, NREG, &q->to);
  328. }
  329. else if (p->to.type == D_OREG) {
  330. if (aclass(&p->from) == C_ZCON) {
  331. p->from.type = D_REG;
  332. p->from.reg = REGZERO;
  333. }
  334. else if (p->from.type != D_REG)
  335. break;
  336. p->as = AMOVQU;
  337. q = genRRR(p, AMSKBL, p->to.reg, REGTMP2, REGTMP2);
  338. q1 = genRRR(q, AINSBL, p->to.reg, p->from.reg, REGTMP);
  339. if (o == AMOVW || o == AMOVWU) {
  340. q->as = AMSKWL;
  341. q1->as = AINSWL;
  342. }
  343. q2 = genXXX(q1, AOR, &q->to, REGTMP, &q->to);
  344. genXXX(q2, AMOVQU, &q->to, NREG, &p->to);
  345. p->from = p->to;
  346. p->to = q->to;
  347. if ((p->from.offset & 7) != 0 || aclass(&p->from) != C_SOREG) {
  348. q->from.reg = REGTMP;
  349. q1->from.reg = REGTMP;
  350. q = genXXX(p, AMOVA, &p->from, NREG, &q->from);
  351. q->from.offset &= 7;
  352. }
  353. }
  354. break;
  355. case ASLLL:
  356. p->as = ASLLQ;
  357. p = genXXX(p, AADDL, &p->to, REGZERO, &p->to);
  358. break;
  359. case ASRLL:
  360. if (p->to.type != D_REG) {
  361. diag("illegal dest type in %P", p);
  362. break;
  363. }
  364. if (p->reg == NREG)
  365. p->reg = p->to.reg;
  366. q = genXXX(p, ASRLQ, &p->from, REGTMP, &p->to);
  367. p->as = AZAP;
  368. p->from.type = D_CONST;
  369. p->from.offset = 0xf0;
  370. p->to.reg = REGTMP;
  371. p = q;
  372. p = genXXX(p, AADDL, &p->to, REGZERO, &p->to);
  373. break;
  374. case ASRAL:
  375. p->as = ASRAQ;
  376. break;
  377. case ADIVQ:
  378. case ADIVQU:
  379. case AMODQ:
  380. case AMODQU:
  381. case ADIVL:
  382. case ADIVLU:
  383. case AMODL:
  384. case AMODLU:
  385. /* if (debug['d'])
  386. print("%P\n", p); */
  387. if(p->to.type != D_REG)
  388. break;
  389. /*if(debug['d'] && p->from.type == D_CONST) {
  390. q = genRRR(p, p->as, REGTMP, p->reg, p->to.reg);
  391. p->as = AMOVQ;
  392. p->reg = NREG;
  393. p->to.reg = REGTMP;
  394. p = q;
  395. }*/
  396. if(p->from.type == D_CONST) {
  397. if (p->reg == NREG)
  398. p->reg = p->to.reg;
  399. switch (p->as) {
  400. case ADIVQ:
  401. q = divconst(p, p->from.offset, p->reg, p->to.reg, 64);
  402. break;
  403. case ADIVQU:
  404. q = divuconst(p, p->from.offset, p->reg, p->to.reg, 64);
  405. break;
  406. case AMODQ:
  407. q = modconst(p, p->from.offset, p->reg, p->to.reg, 64);
  408. break;
  409. case AMODQU:
  410. q = divuconst(p, p->from.offset, p->reg, REGTMP2, 64);
  411. q = genIRR(q, AMULQ, p->from.offset, REGTMP2, REGTMP2);
  412. q = genRRR(q, ASUBQ, REGTMP2, p->reg, p->to.reg);
  413. break;
  414. case ADIVL:
  415. q = divconst(p, p->from.offset, p->reg, p->to.reg, 32);
  416. break;
  417. case ADIVLU:
  418. q = divuconst(p, p->from.offset, p->reg, p->to.reg, 32);
  419. break;
  420. case AMODL:
  421. q = modconst(p, p->from.offset, p->reg, p->to.reg, 32);
  422. break;
  423. case AMODLU:
  424. q = divuconst(p, p->from.offset, p->reg, REGTMP2, 32);
  425. q = genIRR(q, AMULQ, p->from.offset, REGTMP2, REGTMP2);
  426. q = genRRR(q, ASUBQ, REGTMP2, p->reg, p->to.reg);
  427. break;
  428. }
  429. excise(p);
  430. p = q;
  431. break;
  432. }
  433. if(p->from.type != D_REG){
  434. diag("bad instruction %P", p);
  435. break;
  436. }
  437. o = p->as;
  438. q = genIRR(p, ASUBQ, 16LL, NREG, REGSP);
  439. q = genstore(q, AMOVQ, p->from.reg, 8LL, REGSP);
  440. if (o == ADIVL || o == ADIVL || o == AMODL || o == AMODLU)
  441. q->as = AMOVL;
  442. q = genRRR(q, AMOVQ, p->reg, NREG, REGTMP);
  443. if (p->reg == NREG)
  444. q->from.reg = p->to.reg;
  445. /* CALL appropriate */
  446. q1 = prg();
  447. q1->link = q->link;
  448. q->link = q1;
  449. q1->as = AJSR;
  450. q1->line = p->line;
  451. q1->to.type = D_BRANCH;
  452. q1->cond = divsubr(o);
  453. q1->mark |= BRANCH;
  454. q = q1;
  455. q = genRRR(q, AMOVQ, REGTMP, NREG, p->to.reg);
  456. q = genIRR(q, AADDQ, 16LL, NREG, REGSP);
  457. excise(p);
  458. p = q;
  459. break;
  460. /* Attempt to replace {cond. branch, mov} with a cmov */
  461. /* XXX warning: this is all a bit experimental */
  462. case ABEQ:
  463. case ABNE:
  464. case ABGE:
  465. case ABGT:
  466. case ABLE:
  467. case ABLT:
  468. case ABLBC:
  469. case ABLBS:
  470. q = p->link;
  471. if (q == P)
  472. break;
  473. q1 = q->link;
  474. if (q1 != p->cond || q1 == P)
  475. break;
  476. /*print("%P\n", q); /* */
  477. if (q->to.type != D_REG)
  478. break;
  479. if (q->from.type != D_REG && (q->from.type != D_CONST || q->from.name != D_NONE))
  480. break;
  481. if (q->mark&LABEL2)
  482. break;
  483. /* print("%P\n", q); /* */
  484. if (q->as != AMOVQ) /* XXX can handle more than this! */
  485. break;
  486. q->as = (p->as^1) + ACMOVEQ-ABEQ; /* sleazy hack */
  487. q->reg = p->from.reg; /* XXX check CMOVx operand order! */
  488. excise(p); /* XXX p's LABEL? */
  489. if (!(q1->mark&LABEL2))
  490. q1->mark &= ~LABEL;
  491. break;
  492. case AFBEQ:
  493. case AFBNE:
  494. case AFBGE:
  495. case AFBGT:
  496. case AFBLE:
  497. case AFBLT:
  498. q = p->link;
  499. if (q == P)
  500. break;
  501. q1 = q->link;
  502. if (q1 != p->cond || q1 == P)
  503. break;
  504. if (q->from.type != D_FREG || q->to.type != D_FREG)
  505. break;
  506. /* print("%P\n", q); /* */
  507. if (q->mark&LABEL2)
  508. break;
  509. if (q->as != AMOVT) /* XXX can handle more than this! */
  510. break;
  511. q->as = (p->as^1) + AFCMOVEQ-AFBEQ; /* sleazy hack */
  512. q->reg = p->from.reg; /* XXX check CMOVx operand order! */
  513. excise(p); /* XXX p's LABEL? */
  514. if (!(q1->mark&LABEL2))
  515. q1->mark &= ~LABEL;
  516. break;
  517. }
  518. }
  519. curtext = P;
  520. q = P; /* p - 1 */
  521. q1 = firstp; /* top of block */
  522. o = 0; /* count of instructions */
  523. for(p = firstp; p != P; p = p1) {
  524. p1 = p->link;
  525. o++;
  526. if(p->mark & NOSCHED){
  527. if(q1 != p){
  528. sched(q1, q);
  529. }
  530. for(; p != P; p = p->link){
  531. if(!(p->mark & NOSCHED))
  532. break;
  533. q = p;
  534. }
  535. p1 = p;
  536. q1 = p;
  537. o = 0;
  538. continue;
  539. }
  540. if(p->mark & (LABEL|SYNC)) {
  541. if(q1 != p)
  542. sched(q1, q);
  543. q1 = p;
  544. o = 1;
  545. }
  546. if(p->mark & (BRANCH|SYNC)) {
  547. sched(q1, p);
  548. q1 = p1;
  549. o = 0;
  550. }
  551. if(o >= NSCHED) {
  552. sched(q1, p);
  553. q1 = p1;
  554. o = 0;
  555. }
  556. q = p;
  557. }
  558. }
  559. void
  560. nocache(Prog *p)
  561. {
  562. p->optab = 0;
  563. p->from.class = 0;
  564. p->to.class = 0;
  565. }
  566. /* XXX use of this may lose important LABEL flags, check that this isn't happening (or fix) */
  567. void
  568. excise(Prog *p)
  569. {
  570. Prog *q;
  571. q = p->link;
  572. *p = *q;
  573. }
  574. void
  575. initdiv(void)
  576. {
  577. Sym *s1, *s2, *s3, *s4, *s5, *s6, *s7, *s8;
  578. Prog *p;
  579. s1 = lookup("_divq", 0);
  580. s2 = lookup("_divqu", 0);
  581. s3 = lookup("_modq", 0);
  582. s4 = lookup("_modqu", 0);
  583. s5 = lookup("_divl", 0);
  584. s6 = lookup("_divlu", 0);
  585. s7 = lookup("_modl", 0);
  586. s8 = lookup("_modlu", 0);
  587. for(p = firstp; p != P; p = p->link)
  588. if(p->as == ATEXT) {
  589. if(p->from.sym == s1)
  590. prog_divq = p;
  591. if(p->from.sym == s2)
  592. prog_divqu = p;
  593. if(p->from.sym == s3)
  594. prog_modq = p;
  595. if(p->from.sym == s4)
  596. prog_modqu = p;
  597. if(p->from.sym == s5)
  598. prog_divl = p;
  599. if(p->from.sym == s6)
  600. prog_divlu = p;
  601. if(p->from.sym == s7)
  602. prog_modl = p;
  603. if(p->from.sym == s8)
  604. prog_modlu = p;
  605. }
  606. if(prog_divq == P) {
  607. diag("undefined: %s\n", s1->name);
  608. prog_divq = curtext;
  609. }
  610. if(prog_divqu == P) {
  611. diag("undefined: %s\n", s2->name);
  612. prog_divqu = curtext;
  613. }
  614. if(prog_modq == P) {
  615. diag("undefined: %s\n", s3->name);
  616. prog_modq = curtext;
  617. }
  618. if(prog_modqu == P) {
  619. diag("undefined: %s\n", s4->name);
  620. prog_modqu = curtext;
  621. }
  622. if(prog_divl == P) {
  623. diag("undefined: %s\n", s5->name);
  624. prog_divl = curtext;
  625. }
  626. if(prog_divlu == P) {
  627. diag("undefined: %s\n", s6->name);
  628. prog_divlu = curtext;
  629. }
  630. if(prog_modl == P) {
  631. diag("undefined: %s\n", s7->name);
  632. prog_modl = curtext;
  633. }
  634. if(prog_modlu == P) {
  635. diag("undefined: %s\n", s8->name);
  636. prog_modlu = curtext;
  637. }
  638. }
  639. Prog *
  640. divsubr(int o)
  641. {
  642. switch(o) {
  643. case ADIVQ:
  644. return prog_divq;
  645. case ADIVQU:
  646. return prog_divqu;
  647. case AMODQ:
  648. return prog_modq;
  649. case AMODQU:
  650. return prog_modqu;
  651. case ADIVL:
  652. return prog_divl;
  653. case ADIVLU:
  654. return prog_divlu;
  655. case AMODL:
  656. return prog_modl;
  657. case AMODLU:
  658. return prog_modlu;
  659. default:
  660. diag("bad op %O in divsubr", o);
  661. return prog_modlu;
  662. }
  663. }
  664. Prog*
  665. divuconst(Prog *p, uvlong y, int num, int quot, int bits)
  666. {
  667. int logy, i, shift;
  668. uvlong k, m, n, mult, tmp, msb;
  669. if(num == NREG)
  670. num = quot;
  671. if(y == 0) {
  672. diag("division by zero");
  673. return p;
  674. }
  675. if(y == 1)
  676. return genRRR(p, AMOVQ, num, NREG, quot);
  677. if(num == REGTMP || quot == REGTMP)
  678. diag("bad register in divuconst");
  679. tmp = y;
  680. for(logy = -1; tmp != 0; logy++)
  681. tmp >>= 1;
  682. msb = (1LL << (bits-1));
  683. if((y & (y-1)) == 0) /* power of 2 */
  684. return genIRR(p, ASRLQ, logy, num, quot);
  685. if(y > msb)
  686. return genIRR(p, ACMPUGE, y, num, quot);
  687. /* k = (-2^(bits+logy)) % y */
  688. m = msb/y;
  689. n = msb%y;
  690. if(debug['d'])
  691. Bprint(&bso, "divuconst: y=%lld msb=%lld m=%lld n=%lld\n",
  692. y, msb, m, n);
  693. for(i = 0; i <= logy; i++) {
  694. m *= 2LL;
  695. n *= 2LL;
  696. if(n > y) {
  697. m += 1LL;
  698. n -= y;
  699. }
  700. }
  701. if(debug['d'])
  702. Bprint(&bso, "divuconst: y=%lld msb=%lld m=%lld n=%lld\n",
  703. y, msb, m, n);
  704. k = y - n;
  705. if(k > (1LL << logy)) {
  706. mult = 2LL*m + 1LL;
  707. bits++;
  708. } else
  709. mult = m + 1LL;
  710. shift = bits + logy;
  711. if(debug['d'])
  712. Bprint(&bso, "divuconst: y=%lld mult=%lld shift=%d bits=%d k=%lld\n",
  713. y, mult, shift, bits, k);
  714. if(bits <= 32) {
  715. p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
  716. p = genRRR(p, AEXTLL, REGZERO, num, quot);
  717. p = genRRR(p, AMULQ, REGTMP, quot, quot);
  718. p = genIRR(p, ASRLQ, shift, quot, quot);
  719. p = genRRR(p, AADDL, quot, REGZERO, quot);
  720. return p;
  721. }
  722. if(bits == 33) {
  723. if(shift < 64) {
  724. mult <<= (64-shift);
  725. shift = 64;
  726. }
  727. p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
  728. p = genRRR(p, AEXTLL, REGZERO, num, quot);
  729. p = genRRR(p, AUMULH, REGTMP, quot, quot);
  730. if(shift != 64)
  731. p = genIRR(p, ASRLQ, shift-64, quot, quot);
  732. p = genRRR(p, AADDL, quot, REGZERO, quot);
  733. return p;
  734. }
  735. if(bits <= 64) {
  736. if(shift < 64) {
  737. mult <<= (64-shift);
  738. shift = 64;
  739. }
  740. p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
  741. p = genRRR(p, AUMULH, REGTMP, num, quot);
  742. if(shift != 64)
  743. p = genIRR(p, ASRLQ, shift-64, quot, quot);
  744. return p;
  745. }
  746. p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
  747. p = genRRR(p, AUMULH, REGTMP, num, REGTMP);
  748. p = genRRR(p, AADDQ, num, REGTMP, quot);
  749. p = genRRR(p, ACMPUGT, REGTMP, quot, REGTMP);
  750. p = genIRR(p, ASLLQ, 128-shift, REGTMP, REGTMP);
  751. p = genIRR(p, ASRLQ, shift-64, quot, quot);
  752. p = genRRR(p, AADDQ, REGTMP, quot, quot);
  753. return p;
  754. }
  755. Prog *
  756. divconst(Prog *p, vlong y, int num, int quot, int bits)
  757. {
  758. vlong yabs;
  759. Prog *q;
  760. yabs = y;
  761. if (y < 0)
  762. yabs = -y;
  763. q = genRRR(p, ASUBQ, num, REGZERO, REGTMP2);
  764. if (num != quot)
  765. q = genRRR(q, AMOVQ, num, NREG, quot);
  766. q = genRRR(q, ACMOVGT, REGTMP2, REGTMP2, quot);
  767. q = divuconst(q, yabs, quot, quot, bits-1);
  768. q = genRRR(q, ASUBQ, quot, REGZERO, REGTMP);
  769. q = genRRR(q, (y < 0)? ACMOVLT: ACMOVGT, REGTMP, REGTMP2, quot);
  770. return q;
  771. }
  772. Prog *
  773. modconst(Prog *p, vlong y, int num, int quot, int bits)
  774. {
  775. vlong yabs;
  776. Prog *q;
  777. yabs = y;
  778. if (y < 0)
  779. yabs = -y;
  780. q = genRRR(p, ASUBQ, num, REGZERO, REGTMP2);
  781. q = genRRR(q, ACMOVLT, num, REGTMP2, REGTMP2);
  782. q = divuconst(q, yabs, REGTMP2, REGTMP2, bits-1);
  783. q = genRRR(q, ASUBQ, REGTMP2, REGZERO, REGTMP);
  784. q = genRRR(q, ACMOVLT, REGTMP, num, REGTMP2);
  785. q = genIRR(q, AMULQ, yabs, REGTMP2, REGTMP2);
  786. q = genRRR(q, ASUBQ, REGTMP2, num, quot);
  787. return q;
  788. }
  789. Prog *
  790. genXXX(Prog *q, int op, Adr *from, int reg, Adr *to)
  791. {
  792. Prog *p;
  793. p = prg();
  794. p->as = op;
  795. p->line = q->line;
  796. p->from = *from;
  797. p->to = *to;
  798. p->reg = reg;
  799. p->link = q->link;
  800. q->link = p;
  801. return p;
  802. }
  803. Prog *
  804. genRRR(Prog *q, int op, int from, int reg, int to)
  805. {
  806. Prog *p;
  807. p = prg();
  808. p->as = op;
  809. p->line = q->line;
  810. p->from.type = D_REG;
  811. p->from.reg = from;
  812. p->to.type = D_REG;
  813. p->to.reg = to;
  814. p->reg = reg;
  815. p->link = q->link;
  816. q->link = p;
  817. return p;
  818. }
  819. Prog *
  820. genIRR(Prog *q, int op, vlong v, int reg, int to)
  821. {
  822. Prog *p;
  823. p = prg();
  824. p->as = op;
  825. p->line = q->line;
  826. p->from.type = D_CONST;
  827. p->from.offset = v;
  828. p->to.type = D_REG;
  829. p->to.reg = to;
  830. p->reg = reg;
  831. p->link = q->link;
  832. q->link = p;
  833. return p;
  834. }
  835. Prog *
  836. genstore(Prog *q, int op, int from, vlong offset, int to)
  837. {
  838. Prog *p;
  839. p = prg();
  840. p->as = op;
  841. p->line = q->line;
  842. p->from.type = D_REG;
  843. p->from.reg = from;
  844. p->to.type = D_OREG;
  845. p->to.reg = to;
  846. p->to.offset = offset;
  847. p->reg = NREG;
  848. p->link = q->link;
  849. q->link = p;
  850. return p;
  851. }
  852. Prog *
  853. genload(Prog *q, int op, vlong offset, int from, int to)
  854. {
  855. Prog *p;
  856. p = prg();
  857. p->as = op;
  858. p->line = q->line;
  859. p->from.type = D_OREG;
  860. p->from.offset = offset;
  861. p->from.reg = from;
  862. p->to.type = D_REG;
  863. p->to.reg = to;
  864. p->reg = NREG;
  865. p->link = q->link;
  866. q->link = p;
  867. return p;
  868. }