mul.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "gc.h"
  10. /*
  11. * code sequences for multiply by constant.
  12. * [a-l][0-3]
  13. * lsl $(A-'a'),r0,r1
  14. * [+][0-7]
  15. * add r0,r1,r2
  16. * [-][0-7]
  17. * sub r0,r1,r2
  18. */
  19. static int maxmulops = 3; /* max # of ops to replace mul with */
  20. static int multabp;
  21. static int32_t mulval;
  22. static char* mulcp;
  23. static int32_t valmax;
  24. static int shmax;
  25. static int docode(char *hp, char *cp, int r0, int r1);
  26. static int gen1(int len);
  27. static int gen2(int len, int32_t r1);
  28. static int gen3(int len, int32_t r0, int32_t r1, int flag);
  29. enum
  30. {
  31. SR1 = 1<<0, /* r1 has been shifted */
  32. SR0 = 1<<1, /* r0 has been shifted */
  33. UR1 = 1<<2, /* r1 has not been used */
  34. UR0 = 1<<3, /* r0 has not been used */
  35. };
  36. Multab*
  37. mulcon0(int32_t v)
  38. {
  39. int a1, a2, g;
  40. Multab *m, *m1;
  41. char hint[10];
  42. if(v < 0)
  43. v = -v;
  44. /*
  45. * look in cache
  46. */
  47. m = multab;
  48. for(g=0; g<nelem(multab); g++) {
  49. if(m->val == v) {
  50. if(m->code[0] == 0)
  51. return 0;
  52. return m;
  53. }
  54. m++;
  55. }
  56. /*
  57. * select a spot in cache to overwrite
  58. */
  59. multabp++;
  60. if(multabp < 0 || multabp >= nelem(multab))
  61. multabp = 0;
  62. m = multab+multabp;
  63. m->val = v;
  64. mulval = v;
  65. /*
  66. * look in execption hint table
  67. */
  68. a1 = 0;
  69. a2 = hintabsize;
  70. for(;;) {
  71. if(a1 >= a2)
  72. goto no;
  73. g = (a2 + a1)/2;
  74. if(v < hintab[g].val) {
  75. a2 = g;
  76. continue;
  77. }
  78. if(v > hintab[g].val) {
  79. a1 = g+1;
  80. continue;
  81. }
  82. break;
  83. }
  84. if(docode(hintab[g].hint, m->code, 1, 0))
  85. return m;
  86. print("multiply table failure %ld\n", v);
  87. m->code[0] = 0;
  88. return 0;
  89. no:
  90. /*
  91. * try to search
  92. */
  93. hint[0] = 0;
  94. for(g=1; g<=maxmulops; g++) {
  95. if(g >= maxmulops && v >= 65535)
  96. break;
  97. mulcp = hint+g;
  98. *mulcp = 0;
  99. if(gen1(g)) {
  100. if(docode(hint, m->code, 1, 0))
  101. return m;
  102. print("multiply table failure %ld\n", v);
  103. break;
  104. }
  105. }
  106. /*
  107. * try a recur followed by a shift
  108. */
  109. g = 0;
  110. while(!(v & 1)) {
  111. g++;
  112. v >>= 1;
  113. }
  114. if(g) {
  115. m1 = mulcon0(v);
  116. if(m1) {
  117. strcpy(m->code, m1->code);
  118. sprint(strchr(m->code, 0), "%c0", g+'a');
  119. return m;
  120. }
  121. }
  122. m->code[0] = 0;
  123. return 0;
  124. }
  125. static int
  126. docode(char *hp, char *cp, int r0, int r1)
  127. {
  128. int c, i;
  129. c = *hp++;
  130. *cp = c;
  131. cp += 2;
  132. switch(c) {
  133. default:
  134. c -= 'a';
  135. if(c < 1 || c >= 30)
  136. break;
  137. for(i=0; i<4; i++) {
  138. switch(i) {
  139. case 0:
  140. if(docode(hp, cp, r0<<c, r1))
  141. goto out;
  142. break;
  143. case 1:
  144. if(docode(hp, cp, r1<<c, r1))
  145. goto out;
  146. break;
  147. case 2:
  148. if(docode(hp, cp, r0, r0<<c))
  149. goto out;
  150. break;
  151. case 3:
  152. if(docode(hp, cp, r0, r1<<c))
  153. goto out;
  154. break;
  155. }
  156. }
  157. break;
  158. case '+':
  159. for(i=0; i<8; i++) {
  160. cp[-1] = i+'0';
  161. switch(i) {
  162. case 1:
  163. if(docode(hp, cp, r0+r1, r1))
  164. goto out;
  165. break;
  166. case 5:
  167. if(docode(hp, cp, r0, r0+r1))
  168. goto out;
  169. break;
  170. }
  171. }
  172. break;
  173. case '-':
  174. for(i=0; i<8; i++) {
  175. cp[-1] = i+'0';
  176. switch(i) {
  177. case 1:
  178. if(docode(hp, cp, r0-r1, r1))
  179. goto out;
  180. break;
  181. case 2:
  182. if(docode(hp, cp, r1-r0, r1))
  183. goto out;
  184. break;
  185. case 5:
  186. if(docode(hp, cp, r0, r0-r1))
  187. goto out;
  188. break;
  189. case 6:
  190. if(docode(hp, cp, r0, r1-r0))
  191. goto out;
  192. break;
  193. }
  194. }
  195. break;
  196. case 0:
  197. if(r0 == mulval)
  198. return 1;
  199. }
  200. return 0;
  201. out:
  202. cp[-1] = i+'0';
  203. return 1;
  204. }
  205. static int
  206. gen1(int len)
  207. {
  208. int i;
  209. for(shmax=1; shmax<30; shmax++) {
  210. valmax = 1<<shmax;
  211. if(valmax >= mulval)
  212. break;
  213. }
  214. if(mulval == 1)
  215. return 1;
  216. len--;
  217. for(i=1; i<=shmax; i++)
  218. if(gen2(len, 1<<i)) {
  219. *--mulcp = 'a'+i;
  220. return 1;
  221. }
  222. return 0;
  223. }
  224. static int
  225. gen2(int len, int32_t r1)
  226. {
  227. int i;
  228. if(len <= 0) {
  229. if(r1 == mulval)
  230. return 1;
  231. return 0;
  232. }
  233. len--;
  234. if(len == 0)
  235. goto calcr0;
  236. if(gen3(len, r1, r1+1, UR1)) {
  237. i = '+';
  238. goto out;
  239. }
  240. if(gen3(len, r1-1, r1, UR0)) {
  241. i = '-';
  242. goto out;
  243. }
  244. if(gen3(len, 1, r1+1, UR1)) {
  245. i = '+';
  246. goto out;
  247. }
  248. if(gen3(len, 1, r1-1, UR1)) {
  249. i = '-';
  250. goto out;
  251. }
  252. return 0;
  253. calcr0:
  254. if(mulval == r1+1) {
  255. i = '+';
  256. goto out;
  257. }
  258. if(mulval == r1-1) {
  259. i = '-';
  260. goto out;
  261. }
  262. return 0;
  263. out:
  264. *--mulcp = i;
  265. return 1;
  266. }
  267. static int
  268. gen3(int len, int32_t r0, int32_t r1, int flag)
  269. {
  270. int i, f1, f2;
  271. int32_t x;
  272. if(r0 <= 0 ||
  273. r0 >= r1 ||
  274. r1 > valmax)
  275. return 0;
  276. len--;
  277. if(len == 0)
  278. goto calcr0;
  279. if(!(flag & UR1)) {
  280. f1 = UR1|SR1;
  281. for(i=1; i<=shmax; i++) {
  282. x = r0<<i;
  283. if(x > valmax)
  284. break;
  285. if(gen3(len, r0, x, f1)) {
  286. i += 'a';
  287. goto out;
  288. }
  289. }
  290. }
  291. if(!(flag & UR0)) {
  292. f1 = UR1|SR1;
  293. for(i=1; i<=shmax; i++) {
  294. x = r1<<i;
  295. if(x > valmax)
  296. break;
  297. if(gen3(len, r1, x, f1)) {
  298. i += 'a';
  299. goto out;
  300. }
  301. }
  302. }
  303. if(!(flag & SR1)) {
  304. f1 = UR1|SR1|(flag&UR0);
  305. for(i=1; i<=shmax; i++) {
  306. x = r1<<i;
  307. if(x > valmax)
  308. break;
  309. if(gen3(len, r0, x, f1)) {
  310. i += 'a';
  311. goto out;
  312. }
  313. }
  314. }
  315. if(!(flag & SR0)) {
  316. f1 = UR0|SR0|(flag&(SR1|UR1));
  317. f2 = UR1|SR1;
  318. if(flag & UR1)
  319. f2 |= UR0;
  320. if(flag & SR1)
  321. f2 |= SR0;
  322. for(i=1; i<=shmax; i++) {
  323. x = r0<<i;
  324. if(x > valmax)
  325. break;
  326. if(x > r1) {
  327. if(gen3(len, r1, x, f2)) {
  328. i += 'a';
  329. goto out;
  330. }
  331. } else
  332. if(gen3(len, x, r1, f1)) {
  333. i += 'a';
  334. goto out;
  335. }
  336. }
  337. }
  338. x = r1+r0;
  339. if(gen3(len, r0, x, UR1)) {
  340. i = '+';
  341. goto out;
  342. }
  343. if(gen3(len, r1, x, UR1)) {
  344. i = '+';
  345. goto out;
  346. }
  347. x = r1-r0;
  348. if(gen3(len, x, r1, UR0)) {
  349. i = '-';
  350. goto out;
  351. }
  352. if(x > r0) {
  353. if(gen3(len, r0, x, UR1)) {
  354. i = '-';
  355. goto out;
  356. }
  357. } else
  358. if(gen3(len, x, r0, UR0)) {
  359. i = '-';
  360. goto out;
  361. }
  362. return 0;
  363. calcr0:
  364. f1 = flag & (UR0|UR1);
  365. if(f1 == UR1) {
  366. for(i=1; i<=shmax; i++) {
  367. x = r1<<i;
  368. if(x >= mulval) {
  369. if(x == mulval) {
  370. i += 'a';
  371. goto out;
  372. }
  373. break;
  374. }
  375. }
  376. }
  377. if(mulval == r1+r0) {
  378. i = '+';
  379. goto out;
  380. }
  381. if(mulval == r1-r0) {
  382. i = '-';
  383. goto out;
  384. }
  385. return 0;
  386. out:
  387. *--mulcp = i;
  388. return 1;
  389. }
  390. /*
  391. * hint table has numbers that
  392. * the search algorithm fails on.
  393. * <1000:
  394. * all numbers
  395. * <5000:
  396. * ÷ by 5
  397. * <10000:
  398. * ÷ by 50
  399. * <65536:
  400. * ÷ by 250
  401. */
  402. Hintab hintab[] =
  403. {
  404. 683, "b++d+e+",
  405. 687, "b+e++e-",
  406. 691, "b++d+e+",
  407. 731, "b++d+e+",
  408. 811, "b++d+i+",
  409. 821, "b++e+e+",
  410. 843, "b+d++e+",
  411. 851, "b+f-+e-",
  412. 853, "b++e+e+",
  413. 877, "c++++g-",
  414. 933, "b+c++g-",
  415. 981, "c-+e-d+",
  416. 1375, "b+c+b+h-",
  417. 1675, "d+b++h+",
  418. 2425, "c++f-e+",
  419. 2675, "c+d++f-",
  420. 2750, "b+d-b+h-",
  421. 2775, "c-+g-e-",
  422. 3125, "b++e+g+",
  423. 3275, "b+c+g+e+",
  424. 3350, "c++++i+",
  425. 3475, "c-+e-f-",
  426. 3525, "c-+d+g-",
  427. 3625, "c-+e-j+",
  428. 3675, "b+d+d+e+",
  429. 3725, "b+d-+h+",
  430. 3925, "b+d+f-d-",
  431. 4275, "b+g++e+",
  432. 4325, "b+h-+d+",
  433. 4425, "b+b+g-j-",
  434. 4525, "b+d-d+f+",
  435. 4675, "c++d-g+",
  436. 4775, "b+d+b+g-",
  437. 4825, "c+c-+i-",
  438. 4850, "c++++i-",
  439. 4925, "b++e-g-",
  440. 4975, "c+f++e-",
  441. 5500, "b+g-c+d+",
  442. 6700, "d+b++i+",
  443. 9700, "d++++j-",
  444. 11000, "b+f-c-h-",
  445. 11750, "b+d+g+j-",
  446. 12500, "b+c+e-k+",
  447. 13250, "b+d+e-f+",
  448. 13750, "b+h-c-d+",
  449. 14250, "b+g-c+e-",
  450. 14500, "c+f+j-d-",
  451. 14750, "d-g--f+",
  452. 16750, "b+e-d-n+",
  453. 17750, "c+h-b+e+",
  454. 18250, "d+b+h-d+",
  455. 18750, "b+g-++f+",
  456. 19250, "b+e+b+h+",
  457. 19750, "b++h--f-",
  458. 20250, "b+e-l-c+",
  459. 20750, "c++bi+e-",
  460. 21250, "b+i+l+c+",
  461. 22000, "b+e+d-g-",
  462. 22250, "b+d-h+k-",
  463. 22750, "b+d-e-g+",
  464. 23250, "b+c+h+e-",
  465. 23500, "b+g-c-g-",
  466. 23750, "b+g-b+h-",
  467. 24250, "c++g+m-",
  468. 24750, "b+e+e+j-",
  469. 25000, "b++dh+g+",
  470. 25250, "b+e+d-g-",
  471. 25750, "b+e+b+j+",
  472. 26250, "b+h+c+e+",
  473. 26500, "b+h+c+g+",
  474. 26750, "b+d+e+g-",
  475. 27250, "b+e+e+f+",
  476. 27500, "c-i-c-d+",
  477. 27750, "b+bd++j+",
  478. 28250, "d-d-++i-",
  479. 28500, "c+c-h-e-",
  480. 29000, "b+g-d-f+",
  481. 29500, "c+h+++e-",
  482. 29750, "b+g+f-c+",
  483. 30250, "b+f-g-c+",
  484. 33500, "c-f-d-n+",
  485. 33750, "b+d-b+j-",
  486. 34250, "c+e+++i+",
  487. 35250, "e+b+d+k+",
  488. 35500, "c+e+d-g-",
  489. 35750, "c+i-++e+",
  490. 36250, "b+bh-d+e+",
  491. 36500, "c+c-h-e-",
  492. 36750, "d+e--i+",
  493. 37250, "b+g+g+b+",
  494. 37500, "b+h-b+f+",
  495. 37750, "c+be++j-",
  496. 38500, "b+e+b+i+",
  497. 38750, "d+i-b+d+",
  498. 39250, "b+g-l-+d+",
  499. 39500, "b+g-c+g-",
  500. 39750, "b+bh-c+f-",
  501. 40250, "b+bf+d+g-",
  502. 40500, "b+g-c+g+",
  503. 40750, "c+b+i-e+",
  504. 41250, "d++bf+h+",
  505. 41500, "b+j+c+d-",
  506. 41750, "c+f+b+h-",
  507. 42500, "c+h++g+",
  508. 42750, "b+g+d-f-",
  509. 43250, "b+l-e+d-",
  510. 43750, "c+bd+h+f-",
  511. 44000, "b+f+g-d-",
  512. 44250, "b+d-g--f+",
  513. 44500, "c+e+c+h+",
  514. 44750, "b+e+d-h-",
  515. 45250, "b++g+j-g+",
  516. 45500, "c+d+e-g+",
  517. 45750, "b+d-h-e-",
  518. 46250, "c+bd++j+",
  519. 46500, "b+d-c-j-",
  520. 46750, "e-e-b+g-",
  521. 47000, "b+c+d-j-",
  522. 47250, "b+e+e-g-",
  523. 47500, "b+g-c-h-",
  524. 47750, "b+f-c+h-",
  525. 48250, "d--h+n-",
  526. 48500, "b+c-g+m-",
  527. 48750, "b+e+e-g+",
  528. 49500, "c-f+e+j-",
  529. 49750, "c+c+g++f-",
  530. 50000, "b+e+e+k+",
  531. 50250, "b++i++g+",
  532. 50500, "c+g+f-i+",
  533. 50750, "b+e+d+k-",
  534. 51500, "b+i+c-f+",
  535. 51750, "b+bd+g-e-",
  536. 52250, "b+d+g-j+",
  537. 52500, "c+c+f+g+",
  538. 52750, "b+c+e+i+",
  539. 53000, "b+i+c+g+",
  540. 53500, "c+g+g-n+",
  541. 53750, "b+j+d-c+",
  542. 54250, "b+d-g-j-",
  543. 54500, "c-f+e+f+",
  544. 54750, "b+f-+c+g+",
  545. 55000, "b+g-d-g-",
  546. 55250, "b+e+e+g+",
  547. 55500, "b+cd++j+",
  548. 55750, "b+bh-d-f-",
  549. 56250, "c+d-b+j-",
  550. 56500, "c+d+c+i+",
  551. 56750, "b+e+d++h-",
  552. 57000, "b+d+g-f+",
  553. 57250, "b+f-m+d-",
  554. 57750, "b+i+c+e-",
  555. 58000, "b+e+d+h+",
  556. 58250, "c+b+g+g+",
  557. 58750, "d-e-j--e+",
  558. 59000, "d-i-+e+",
  559. 59250, "e--h-m+",
  560. 59500, "c+c-h+f-",
  561. 59750, "b+bh-e+i-",
  562. 60250, "b+bh-e-e-",
  563. 60500, "c+c-g-g-",
  564. 60750, "b+e-l-e-",
  565. 61250, "b+g-g-c+",
  566. 61750, "b+g-c+g+",
  567. 62250, "f--+c-i-",
  568. 62750, "e+f--+g+",
  569. 64750, "b+f+d+p-",
  570. };
  571. int hintabsize = nelem(hintab);