mips1.s 7.8 KB


  1. /* This assember is for R2000/R3000 machines, or higher ones that do
  2. * no want to do any 64 bit arithmatic.
  3. * Make sure that the SSLeay bignum library is compiled with
  4. * THIRTY_TWO_BIT set.
  5. * This must either be compiled with the system CC, or, if you use GNU gas,
  6. * cc -E mips1.s|gas -o mips1.o
  7. */
  8. .set reorder
  9. .set noat
  10. #define R1 $1
  11. #define CC $2
  12. #define R2 $3
  13. #define R3 $8
  14. #define R4 $9
  15. #define L1 $10
  16. #define L2 $11
  17. #define L3 $12
  18. #define L4 $13
  19. #define H1 $14
  20. #define H2 $15
  21. #define H3 $24
  22. #define H4 $25
  23. #define P1 $4
  24. #define P2 $5
  25. #define P3 $6
  26. #define P4 $7
  27. .align 2
  28. .ent bn_mul_add_words
  29. .globl bn_mul_add_words
  30. .text
  31. bn_mul_add_words:
  32. .frame $sp,0,$31
  33. .mask 0x00000000,0
  34. .fmask 0x00000000,0
  35. #blt P3,4,$lab34
  36. subu R1,P3,4
  37. move CC,$0
  38. bltz R1,$lab34
  39. $lab2:
  40. lw R1,0(P1)
  41. lw L1,0(P2)
  42. lw R2,4(P1)
  43. lw L2,4(P2)
  44. lw R3,8(P1)
  45. lw L3,8(P2)
  46. lw R4,12(P1)
  47. lw L4,12(P2)
  48. multu L1,P4
  49. addu R1,R1,CC
  50. mflo L1
  51. sltu CC,R1,CC
  52. addu R1,R1,L1
  53. mfhi H1
  54. sltu L1,R1,L1
  55. sw R1,0(P1)
  56. addu CC,CC,L1
  57. multu L2,P4
  58. addu CC,H1,CC
  59. mflo L2
  60. addu R2,R2,CC
  61. sltu CC,R2,CC
  62. mfhi H2
  63. addu R2,R2,L2
  64. addu P2,P2,16
  65. sltu L2,R2,L2
  66. sw R2,4(P1)
  67. addu CC,CC,L2
  68. multu L3,P4
  69. addu CC,H2,CC
  70. mflo L3
  71. addu R3,R3,CC
  72. sltu CC,R3,CC
  73. mfhi H3
  74. addu R3,R3,L3
  75. addu P1,P1,16
  76. sltu L3,R3,L3
  77. sw R3,-8(P1)
  78. addu CC,CC,L3
  79. multu L4,P4
  80. addu CC,H3,CC
  81. mflo L4
  82. addu R4,R4,CC
  83. sltu CC,R4,CC
  84. mfhi H4
  85. addu R4,R4,L4
  86. subu P3,P3,4
  87. sltu L4,R4,L4
  88. addu CC,CC,L4
  89. addu CC,H4,CC
  90. subu R1,P3,4
  91. sw R4,-4(P1) # delay slot
  92. bgez R1,$lab2
  93. bleu P3,0,$lab3
  94. .align 2
  95. $lab33:
  96. lw L1,0(P2)
  97. lw R1,0(P1)
  98. multu L1,P4
  99. addu R1,R1,CC
  100. sltu CC,R1,CC
  101. addu P1,P1,4
  102. mflo L1
  103. mfhi H1
  104. addu R1,R1,L1
  105. addu P2,P2,4
  106. sltu L1,R1,L1
  107. subu P3,P3,1
  108. addu CC,CC,L1
  109. sw R1,-4(P1)
  110. addu CC,H1,CC
  111. bgtz P3,$lab33
  112. j $31
  113. .align 2
  114. $lab3:
  115. j $31
  116. .align 2
  117. $lab34:
  118. bgt P3,0,$lab33
  119. j $31
  120. .end bn_mul_add_words
  121. .align 2
  122. # Program Unit: bn_mul_words
  123. .ent bn_mul_words
  124. .globl bn_mul_words
  125. .text
  126. bn_mul_words:
  127. .frame $sp,0,$31
  128. .mask 0x00000000,0
  129. .fmask 0x00000000,0
  130. subu P3,P3,4
  131. move CC,$0
  132. bltz P3,$lab45
  133. $lab44:
  134. lw L1,0(P2)
  135. lw L2,4(P2)
  136. lw L3,8(P2)
  137. lw L4,12(P2)
  138. multu L1,P4
  139. subu P3,P3,4
  140. mflo L1
  141. mfhi H1
  142. addu L1,L1,CC
  143. multu L2,P4
  144. sltu CC,L1,CC
  145. sw L1,0(P1)
  146. addu CC,H1,CC
  147. mflo L2
  148. mfhi H2
  149. addu L2,L2,CC
  150. multu L3,P4
  151. sltu CC,L2,CC
  152. sw L2,4(P1)
  153. addu CC,H2,CC
  154. mflo L3
  155. mfhi H3
  156. addu L3,L3,CC
  157. multu L4,P4
  158. sltu CC,L3,CC
  159. sw L3,8(P1)
  160. addu CC,H3,CC
  161. mflo L4
  162. mfhi H4
  163. addu L4,L4,CC
  164. addu P1,P1,16
  165. sltu CC,L4,CC
  166. addu P2,P2,16
  167. addu CC,H4,CC
  168. sw L4,-4(P1)
  169. bgez P3,$lab44
  170. b $lab45
  171. $lab46:
  172. lw L1,0(P2)
  173. addu P1,P1,4
  174. multu L1,P4
  175. addu P2,P2,4
  176. mflo L1
  177. mfhi H1
  178. addu L1,L1,CC
  179. subu P3,P3,1
  180. sltu CC,L1,CC
  181. sw L1,-4(P1)
  182. addu CC,H1,CC
  183. bgtz P3,$lab46
  184. j $31
  185. $lab45:
  186. addu P3,P3,4
  187. bgtz P3,$lab46
  188. j $31
  189. .align 2
  190. .end bn_mul_words
  191. # Program Unit: bn_sqr_words
  192. .ent bn_sqr_words
  193. .globl bn_sqr_words
  194. .text
  195. bn_sqr_words:
  196. .frame $sp,0,$31
  197. .mask 0x00000000,0
  198. .fmask 0x00000000,0
  199. subu P3,P3,4
  200. bltz P3,$lab55
  201. $lab54:
  202. lw L1,0(P2)
  203. lw L2,4(P2)
  204. lw L3,8(P2)
  205. lw L4,12(P2)
  206. multu L1,L1
  207. subu P3,P3,4
  208. mflo L1
  209. mfhi H1
  210. sw L1,0(P1)
  211. sw H1,4(P1)
  212. multu L2,L2
  213. addu P1,P1,32
  214. mflo L2
  215. mfhi H2
  216. sw L2,-24(P1)
  217. sw H2,-20(P1)
  218. multu L3,L3
  219. addu P2,P2,16
  220. mflo L3
  221. mfhi H3
  222. sw L3,-16(P1)
  223. sw H3,-12(P1)
  224. multu L4,L4
  225. mflo L4
  226. mfhi H4
  227. sw L4,-8(P1)
  228. sw H4,-4(P1)
  229. bgtz P3,$lab54
  230. b $lab55
  231. $lab56:
  232. lw L1,0(P2)
  233. addu P1,P1,8
  234. multu L1,L1
  235. addu P2,P2,4
  236. subu P3,P3,1
  237. mflo L1
  238. mfhi H1
  239. sw L1,-8(P1)
  240. sw H1,-4(P1)
  241. bgtz P3,$lab56
  242. j $31
  243. $lab55:
  244. addu P3,P3,4
  245. bgtz P3,$lab56
  246. j $31
  247. .align 2
  248. .end bn_sqr_words
  249. # Program Unit: bn_add_words
  250. .ent bn_add_words
  251. .globl bn_add_words
  252. .text
  253. bn_add_words: # 0x590
  254. .frame $sp,0,$31
  255. .mask 0x00000000,0
  256. .fmask 0x00000000,0
  257. subu P4,P4,4
  258. move CC,$0
  259. bltz P4,$lab65
  260. $lab64:
  261. lw L1,0(P2)
  262. lw R1,0(P3)
  263. lw L2,4(P2)
  264. lw R2,4(P3)
  265. addu L1,L1,CC
  266. lw L3,8(P2)
  267. sltu CC,L1,CC
  268. addu L1,L1,R1
  269. sltu R1,L1,R1
  270. lw R3,8(P3)
  271. addu CC,CC,R1
  272. lw L4,12(P2)
  273. addu L2,L2,CC
  274. lw R4,12(P3)
  275. sltu CC,L2,CC
  276. addu L2,L2,R2
  277. sltu R2,L2,R2
  278. sw L1,0(P1)
  279. addu CC,CC,R2
  280. addu P1,P1,16
  281. addu L3,L3,CC
  282. sw L2,-12(P1)
  283. sltu CC,L3,CC
  284. addu L3,L3,R3
  285. sltu R3,L3,R3
  286. addu P2,P2,16
  287. addu CC,CC,R3
  288. addu L4,L4,CC
  289. addu P3,P3,16
  290. sltu CC,L4,CC
  291. addu L4,L4,R4
  292. subu P4,P4,4
  293. sltu R4,L4,R4
  294. sw L3,-8(P1)
  295. addu CC,CC,R4
  296. sw L4,-4(P1)
  297. bgtz P4,$lab64
  298. b $lab65
  299. $lab66:
  300. lw L1,0(P2)
  301. lw R1,0(P3)
  302. addu L1,L1,CC
  303. addu P1,P1,4
  304. sltu CC,L1,CC
  305. addu P2,P2,4
  306. addu P3,P3,4
  307. addu L1,L1,R1
  308. subu P4,P4,1
  309. sltu R1,L1,R1
  310. sw L1,-4(P1)
  311. addu CC,CC,R1
  312. bgtz P4,$lab66
  313. j $31
  314. $lab65:
  315. addu P4,P4,4
  316. bgtz P4,$lab66
  317. j $31
  318. .end bn_add_words
  319. # Program Unit: bn_div64
  320. .set at
  321. .set reorder
  322. .text
  323. .align 2
  324. .globl bn_div64
  325. # 321 {
  326. .ent bn_div64 2
  327. bn_div64:
  328. subu $sp, 64
  329. sw $31, 56($sp)
  330. sw $16, 48($sp)
  331. .mask 0x80010000, -56
  332. .frame $sp, 64, $31
  333. move $9, $4
  334. move $12, $5
  335. move $16, $6
  336. # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
  337. move $31, $0
  338. # 323 int i,count=2;
  339. li $13, 2
  340. # 324
  341. # 325 if (d == 0) return(BN_MASK2);
  342. bne $16, 0, $80
  343. li $2, -1
  344. b $93
  345. $80:
  346. # 326
  347. # 327 i=BN_num_bits_word(d);
  348. move $4, $16
  349. sw $31, 16($sp)
  350. sw $9, 24($sp)
  351. sw $12, 32($sp)
  352. sw $13, 40($sp)
  353. .livereg 0x800ff0e,0xfff
  354. jal BN_num_bits_word
  355. li $4, 32
  356. lw $31, 16($sp)
  357. lw $9, 24($sp)
  358. lw $12, 32($sp)
  359. lw $13, 40($sp)
  360. move $3, $2
  361. # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
  362. beq $2, $4, $81
  363. li $14, 1
  364. sll $15, $14, $2
  365. bleu $9, $15, $81
  366. # 329 {
  367. # 330 #if !defined(NO_STDIO) && !defined(WIN16)
  368. # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
  369. # 332 #endif
  370. # 333 abort();
  371. sw $3, 8($sp)
  372. sw $9, 24($sp)
  373. sw $12, 32($sp)
  374. sw $13, 40($sp)
  375. sw $31, 26($sp)
  376. .livereg 0xff0e,0xfff
  377. jal abort
  378. lw $3, 8($sp)
  379. li $4, 32
  380. lw $9, 24($sp)
  381. lw $12, 32($sp)
  382. lw $13, 40($sp)
  383. lw $31, 26($sp)
  384. # 334 }
  385. $81:
  386. # 335 i=BN_BITS2-i;
  387. subu $3, $4, $3
  388. # 336 if (h >= d) h-=d;
  389. bltu $9, $16, $82
  390. subu $9, $9, $16
  391. $82:
  392. # 337
  393. # 338 if (i)
  394. beq $3, 0, $83
  395. # 339 {
  396. # 340 d<<=i;
  397. sll $16, $16, $3
  398. # 341 h=(h<<i)|(l>>(BN_BITS2-i));
  399. sll $24, $9, $3
  400. subu $25, $4, $3
  401. srl $14, $12, $25
  402. or $9, $24, $14
  403. # 342 l<<=i;
  404. sll $12, $12, $3
  405. # 343 }
  406. $83:
  407. # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
  408. # 345 dl=(d&BN_MASK2l);
  409. and $8, $16, -65536
  410. srl $8, $8, 16
  411. and $10, $16, 65535
  412. li $6, -65536
  413. $84:
  414. # 346 for (;;)
  415. # 347 {
  416. # 348 if ((h>>BN_BITS4) == dh)
  417. srl $15, $9, 16
  418. bne $8, $15, $85
  419. # 349 q=BN_MASK2l;
  420. li $5, 65535
  421. b $86
  422. $85:
  423. # 350 else
  424. # 351 q=h/dh;
  425. divu $5, $9, $8
  426. $86:
  427. # 352
  428. # 353 for (;;)
  429. # 354 {
  430. # 355 t=(h-q*dh);
  431. mul $4, $5, $8
  432. subu $2, $9, $4
  433. move $3, $2
  434. # 356 if ((t&BN_MASK2h) ||
  435. # 357 ((dl*q) <= (
  436. # 358 (t<<BN_BITS4)+
  437. # 359 ((l&BN_MASK2h)>>BN_BITS4))))
  438. and $25, $2, $6
  439. bne $25, $0, $87
  440. mul $24, $10, $5
  441. sll $14, $3, 16
  442. and $15, $12, $6
  443. srl $25, $15, 16
  444. addu $15, $14, $25
  445. bgtu $24, $15, $88
  446. $87:
  447. # 360 break;
  448. mul $3, $10, $5
  449. b $89
  450. $88:
  451. # 361 q--;
  452. addu $5, $5, -1
  453. # 362 }
  454. b $86
  455. $89:
  456. # 363 th=q*dh;
  457. # 364 tl=q*dl;
  458. # 365 t=(tl>>BN_BITS4);
  459. # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
  460. sll $14, $3, 16
  461. and $2, $14, $6
  462. move $11, $2
  463. # 367 th+=t;
  464. srl $25, $3, 16
  465. addu $7, $4, $25
  466. # 368
  467. # 369 if (l < tl) th++;
  468. bgeu $12, $2, $90
  469. addu $7, $7, 1
  470. $90:
  471. # 370 l-=tl;
  472. subu $12, $12, $11
  473. # 371 if (h < th)
  474. bgeu $9, $7, $91
  475. # 372 {
  476. # 373 h+=d;
  477. addu $9, $9, $16
  478. # 374 q--;
  479. addu $5, $5, -1
  480. # 375 }
  481. $91:
  482. # 376 h-=th;
  483. subu $9, $9, $7
  484. # 377
  485. # 378 if (--count == 0) break;
  486. addu $13, $13, -1
  487. beq $13, 0, $92
  488. # 379
  489. # 380 ret=q<<BN_BITS4;
  490. sll $31, $5, 16
  491. # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
  492. sll $24, $9, 16
  493. srl $15, $12, 16
  494. or $9, $24, $15
  495. # 382 l=(l&BN_MASK2l)<<BN_BITS4;
  496. and $12, $12, 65535
  497. sll $12, $12, 16
  498. # 383 }
  499. b $84
  500. $92:
  501. # 384 ret|=q;
  502. or $31, $31, $5
  503. # 385 return(ret);
  504. move $2, $31
  505. $93:
  506. lw $16, 48($sp)
  507. lw $31, 56($sp)
  508. addu $sp, 64
  509. j $31
  510. .end bn_div64