aes-mips.pl 41 KB


  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # AES for MIPS
  9. # October 2010
  10. #
  11. # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
  12. # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
  13. # faster than gcc-generated code, which is not very impressive. But
  14. # recall that compressed S-box requires extra processing, namely
  15. # additional rotations. Rotations are implemented with lwl/lwr pairs,
  16. # which is normally used for loading unaligned data. Another cool
  17. # thing about this module is its endian neutrality, which means that
  18. # it processes data without ever changing byte order...
  19. ######################################################################
  20. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  21. # widely used. Then there is a new contender: NUBI. It appears that if
  22. # one picks the latter, it's possible to arrange code in ABI neutral
  23. # manner. Therefore let's stick to NUBI register layout:
  24. #
  25. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  26. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  27. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  28. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  29. #
  30. # The return value is placed in $a0. Following coding rules facilitate
  31. # interoperability:
  32. #
  33. # - never ever touch $tp, "thread pointer", former $gp;
  34. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  35. # old code];
  36. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  37. #
  38. # For reference here is register layout for N32/64 MIPS ABIs:
  39. #
  40. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  41. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  42. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  43. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  44. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  45. #
  46. $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
  47. if ($flavour =~ /64|n32/i) {
  48. $PTR_ADD="dadd"; # incidentally works even on n32
  49. $PTR_SUB="dsub"; # incidentally works even on n32
  50. $REG_S="sd";
  51. $REG_L="ld";
  52. $PTR_SLL="dsll"; # incidentally works even on n32
  53. $SZREG=8;
  54. } else {
  55. $PTR_ADD="add";
  56. $PTR_SUB="sub";
  57. $REG_S="sw";
  58. $REG_L="lw";
  59. $PTR_SLL="sll";
  60. $SZREG=4;
  61. }
  62. $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
  63. #
  64. # <appro@openssl.org>
  65. #
  66. ######################################################################
  67. $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0;
  68. for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
  69. open STDOUT,">$output";
  70. if (!defined($big_endian))
  71. { $big_endian=(unpack('L',pack('N',1))==1); }
  72. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  73. open STDOUT,">$output";
  74. my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
  75. $code.=<<___;
  76. .text
  77. #ifdef OPENSSL_FIPSCANISTER
  78. # include <openssl/fipssyms.h>
  79. #endif
  80. #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
  81. .option pic2
  82. #endif
  83. .set noat
  84. ___
  85. {{{
  86. my $FRAMESIZE=16*$SZREG;
  87. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
  88. my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
  89. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  90. my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
  91. my ($key0,$cnt)=($gp,$fp);
  92. # instuction ordering is "stolen" from output from MIPSpro assembler
  93. # invoked with -mips3 -O3 arguments...
  94. $code.=<<___;
  95. .align 5
  96. .ent _mips_AES_encrypt
  97. _mips_AES_encrypt:
  98. .frame $sp,0,$ra
  99. .set reorder
  100. lw $t0,0($key)
  101. lw $t1,4($key)
  102. lw $t2,8($key)
  103. lw $t3,12($key)
  104. lw $cnt,240($key)
  105. $PTR_ADD $key0,$key,16
  106. xor $s0,$t0
  107. xor $s1,$t1
  108. xor $s2,$t2
  109. xor $s3,$t3
  110. sub $cnt,1
  111. _xtr $i0,$s1,16-2
  112. .Loop_enc:
  113. _xtr $i1,$s2,16-2
  114. _xtr $i2,$s3,16-2
  115. _xtr $i3,$s0,16-2
  116. and $i0,0x3fc
  117. and $i1,0x3fc
  118. and $i2,0x3fc
  119. and $i3,0x3fc
  120. $PTR_ADD $i0,$Tbl
  121. $PTR_ADD $i1,$Tbl
  122. $PTR_ADD $i2,$Tbl
  123. $PTR_ADD $i3,$Tbl
  124. lwl $t0,3($i0) # Te1[s1>>16]
  125. lwl $t1,3($i1) # Te1[s2>>16]
  126. lwl $t2,3($i2) # Te1[s3>>16]
  127. lwl $t3,3($i3) # Te1[s0>>16]
  128. lwr $t0,2($i0) # Te1[s1>>16]
  129. lwr $t1,2($i1) # Te1[s2>>16]
  130. lwr $t2,2($i2) # Te1[s3>>16]
  131. lwr $t3,2($i3) # Te1[s0>>16]
  132. _xtr $i0,$s2,8-2
  133. _xtr $i1,$s3,8-2
  134. _xtr $i2,$s0,8-2
  135. _xtr $i3,$s1,8-2
  136. and $i0,0x3fc
  137. and $i1,0x3fc
  138. and $i2,0x3fc
  139. and $i3,0x3fc
  140. $PTR_ADD $i0,$Tbl
  141. $PTR_ADD $i1,$Tbl
  142. $PTR_ADD $i2,$Tbl
  143. $PTR_ADD $i3,$Tbl
  144. lwl $t4,2($i0) # Te2[s2>>8]
  145. lwl $t5,2($i1) # Te2[s3>>8]
  146. lwl $t6,2($i2) # Te2[s0>>8]
  147. lwl $t7,2($i3) # Te2[s1>>8]
  148. lwr $t4,1($i0) # Te2[s2>>8]
  149. lwr $t5,1($i1) # Te2[s3>>8]
  150. lwr $t6,1($i2) # Te2[s0>>8]
  151. lwr $t7,1($i3) # Te2[s1>>8]
  152. _xtr $i0,$s3,0-2
  153. _xtr $i1,$s0,0-2
  154. _xtr $i2,$s1,0-2
  155. _xtr $i3,$s2,0-2
  156. and $i0,0x3fc
  157. and $i1,0x3fc
  158. and $i2,0x3fc
  159. and $i3,0x3fc
  160. $PTR_ADD $i0,$Tbl
  161. $PTR_ADD $i1,$Tbl
  162. $PTR_ADD $i2,$Tbl
  163. $PTR_ADD $i3,$Tbl
  164. lwl $t8,1($i0) # Te3[s3]
  165. lwl $t9,1($i1) # Te3[s0]
  166. lwl $t10,1($i2) # Te3[s1]
  167. lwl $t11,1($i3) # Te3[s2]
  168. lwr $t8,0($i0) # Te3[s3]
  169. lwr $t9,0($i1) # Te3[s0]
  170. lwr $t10,0($i2) # Te3[s1]
  171. lwr $t11,0($i3) # Te3[s2]
  172. _xtr $i0,$s0,24-2
  173. _xtr $i1,$s1,24-2
  174. _xtr $i2,$s2,24-2
  175. _xtr $i3,$s3,24-2
  176. and $i0,0x3fc
  177. and $i1,0x3fc
  178. and $i2,0x3fc
  179. and $i3,0x3fc
  180. $PTR_ADD $i0,$Tbl
  181. $PTR_ADD $i1,$Tbl
  182. $PTR_ADD $i2,$Tbl
  183. $PTR_ADD $i3,$Tbl
  184. xor $t0,$t4
  185. xor $t1,$t5
  186. xor $t2,$t6
  187. xor $t3,$t7
  188. lw $t4,0($i0) # Te0[s0>>24]
  189. lw $t5,0($i1) # Te0[s1>>24]
  190. lw $t6,0($i2) # Te0[s2>>24]
  191. lw $t7,0($i3) # Te0[s3>>24]
  192. lw $s0,0($key0)
  193. lw $s1,4($key0)
  194. lw $s2,8($key0)
  195. lw $s3,12($key0)
  196. xor $t0,$t8
  197. xor $t1,$t9
  198. xor $t2,$t10
  199. xor $t3,$t11
  200. xor $t0,$t4
  201. xor $t1,$t5
  202. xor $t2,$t6
  203. xor $t3,$t7
  204. sub $cnt,1
  205. $PTR_ADD $key0,16
  206. xor $s0,$t0
  207. xor $s1,$t1
  208. xor $s2,$t2
  209. xor $s3,$t3
  210. .set noreorder
  211. bnez $cnt,.Loop_enc
  212. _xtr $i0,$s1,16-2
  213. .set reorder
  214. _xtr $i1,$s2,16-2
  215. _xtr $i2,$s3,16-2
  216. _xtr $i3,$s0,16-2
  217. and $i0,0x3fc
  218. and $i1,0x3fc
  219. and $i2,0x3fc
  220. and $i3,0x3fc
  221. $PTR_ADD $i0,$Tbl
  222. $PTR_ADD $i1,$Tbl
  223. $PTR_ADD $i2,$Tbl
  224. $PTR_ADD $i3,$Tbl
  225. lbu $t0,2($i0) # Te4[s1>>16]
  226. lbu $t1,2($i1) # Te4[s2>>16]
  227. lbu $t2,2($i2) # Te4[s3>>16]
  228. lbu $t3,2($i3) # Te4[s0>>16]
  229. _xtr $i0,$s2,8-2
  230. _xtr $i1,$s3,8-2
  231. _xtr $i2,$s0,8-2
  232. _xtr $i3,$s1,8-2
  233. and $i0,0x3fc
  234. and $i1,0x3fc
  235. and $i2,0x3fc
  236. and $i3,0x3fc
  237. $PTR_ADD $i0,$Tbl
  238. $PTR_ADD $i1,$Tbl
  239. $PTR_ADD $i2,$Tbl
  240. $PTR_ADD $i3,$Tbl
  241. lbu $t4,2($i0) # Te4[s2>>8]
  242. lbu $t5,2($i1) # Te4[s3>>8]
  243. lbu $t6,2($i2) # Te4[s0>>8]
  244. lbu $t7,2($i3) # Te4[s1>>8]
  245. _xtr $i0,$s0,24-2
  246. _xtr $i1,$s1,24-2
  247. _xtr $i2,$s2,24-2
  248. _xtr $i3,$s3,24-2
  249. and $i0,0x3fc
  250. and $i1,0x3fc
  251. and $i2,0x3fc
  252. and $i3,0x3fc
  253. $PTR_ADD $i0,$Tbl
  254. $PTR_ADD $i1,$Tbl
  255. $PTR_ADD $i2,$Tbl
  256. $PTR_ADD $i3,$Tbl
  257. lbu $t8,2($i0) # Te4[s0>>24]
  258. lbu $t9,2($i1) # Te4[s1>>24]
  259. lbu $t10,2($i2) # Te4[s2>>24]
  260. lbu $t11,2($i3) # Te4[s3>>24]
  261. _xtr $i0,$s3,0-2
  262. _xtr $i1,$s0,0-2
  263. _xtr $i2,$s1,0-2
  264. _xtr $i3,$s2,0-2
  265. and $i0,0x3fc
  266. and $i1,0x3fc
  267. and $i2,0x3fc
  268. and $i3,0x3fc
  269. _ins $t0,16
  270. _ins $t1,16
  271. _ins $t2,16
  272. _ins $t3,16
  273. _ins $t4,8
  274. _ins $t5,8
  275. _ins $t6,8
  276. _ins $t7,8
  277. xor $t0,$t4
  278. xor $t1,$t5
  279. xor $t2,$t6
  280. xor $t3,$t7
  281. $PTR_ADD $i0,$Tbl
  282. $PTR_ADD $i1,$Tbl
  283. $PTR_ADD $i2,$Tbl
  284. $PTR_ADD $i3,$Tbl
  285. lbu $t4,2($i0) # Te4[s3]
  286. lbu $t5,2($i1) # Te4[s0]
  287. lbu $t6,2($i2) # Te4[s1]
  288. lbu $t7,2($i3) # Te4[s2]
  289. _ins $t8,24
  290. _ins $t9,24
  291. _ins $t10,24
  292. _ins $t11,24
  293. lw $s0,0($key0)
  294. lw $s1,4($key0)
  295. lw $s2,8($key0)
  296. lw $s3,12($key0)
  297. xor $t0,$t8
  298. xor $t1,$t9
  299. xor $t2,$t10
  300. xor $t3,$t11
  301. _ins $t4,0
  302. _ins $t5,0
  303. _ins $t6,0
  304. _ins $t7,0
  305. xor $t0,$t4
  306. xor $t1,$t5
  307. xor $t2,$t6
  308. xor $t3,$t7
  309. xor $s0,$t0
  310. xor $s1,$t1
  311. xor $s2,$t2
  312. xor $s3,$t3
  313. jr $ra
  314. .end _mips_AES_encrypt
  315. .align 5
  316. .globl AES_encrypt
  317. .ent AES_encrypt
  318. AES_encrypt:
  319. .frame $sp,$FRAMESIZE,$ra
  320. .mask $SAVED_REGS_MASK,-$SZREG
  321. .set noreorder
  322. ___
  323. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  324. .cpload $pf
  325. ___
  326. $code.=<<___;
  327. $PTR_SUB $sp,$FRAMESIZE
  328. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  329. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  330. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  331. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  332. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  333. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  334. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  335. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  336. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  337. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  338. ___
  339. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  340. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  341. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  342. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  343. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  344. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  345. ___
  346. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  347. .cplocal $Tbl
  348. .cpsetup $pf,$zero,AES_encrypt
  349. ___
  350. $code.=<<___;
  351. .set reorder
  352. la $Tbl,AES_Te # PIC-ified 'load address'
  353. lwl $s0,0+$MSB($inp)
  354. lwl $s1,4+$MSB($inp)
  355. lwl $s2,8+$MSB($inp)
  356. lwl $s3,12+$MSB($inp)
  357. lwr $s0,0+$LSB($inp)
  358. lwr $s1,4+$LSB($inp)
  359. lwr $s2,8+$LSB($inp)
  360. lwr $s3,12+$LSB($inp)
  361. bal _mips_AES_encrypt
  362. swr $s0,0+$LSB($out)
  363. swr $s1,4+$LSB($out)
  364. swr $s2,8+$LSB($out)
  365. swr $s3,12+$LSB($out)
  366. swl $s0,0+$MSB($out)
  367. swl $s1,4+$MSB($out)
  368. swl $s2,8+$MSB($out)
  369. swl $s3,12+$MSB($out)
  370. .set noreorder
  371. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  372. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  373. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  374. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  375. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  376. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  377. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  378. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  379. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  380. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  381. ___
  382. $code.=<<___ if ($flavour =~ /nubi/i);
  383. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  384. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  385. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  386. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  387. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  388. ___
  389. $code.=<<___;
  390. jr $ra
  391. $PTR_ADD $sp,$FRAMESIZE
  392. .end AES_encrypt
  393. ___
  394. $code.=<<___;
  395. .align 5
  396. .ent _mips_AES_decrypt
  397. _mips_AES_decrypt:
  398. .frame $sp,0,$ra
  399. .set reorder
  400. lw $t0,0($key)
  401. lw $t1,4($key)
  402. lw $t2,8($key)
  403. lw $t3,12($key)
  404. lw $cnt,240($key)
  405. $PTR_ADD $key0,$key,16
  406. xor $s0,$t0
  407. xor $s1,$t1
  408. xor $s2,$t2
  409. xor $s3,$t3
  410. sub $cnt,1
  411. _xtr $i0,$s3,16-2
  412. .Loop_dec:
  413. _xtr $i1,$s0,16-2
  414. _xtr $i2,$s1,16-2
  415. _xtr $i3,$s2,16-2
  416. and $i0,0x3fc
  417. and $i1,0x3fc
  418. and $i2,0x3fc
  419. and $i3,0x3fc
  420. $PTR_ADD $i0,$Tbl
  421. $PTR_ADD $i1,$Tbl
  422. $PTR_ADD $i2,$Tbl
  423. $PTR_ADD $i3,$Tbl
  424. lwl $t0,3($i0) # Td1[s3>>16]
  425. lwl $t1,3($i1) # Td1[s0>>16]
  426. lwl $t2,3($i2) # Td1[s1>>16]
  427. lwl $t3,3($i3) # Td1[s2>>16]
  428. lwr $t0,2($i0) # Td1[s3>>16]
  429. lwr $t1,2($i1) # Td1[s0>>16]
  430. lwr $t2,2($i2) # Td1[s1>>16]
  431. lwr $t3,2($i3) # Td1[s2>>16]
  432. _xtr $i0,$s2,8-2
  433. _xtr $i1,$s3,8-2
  434. _xtr $i2,$s0,8-2
  435. _xtr $i3,$s1,8-2
  436. and $i0,0x3fc
  437. and $i1,0x3fc
  438. and $i2,0x3fc
  439. and $i3,0x3fc
  440. $PTR_ADD $i0,$Tbl
  441. $PTR_ADD $i1,$Tbl
  442. $PTR_ADD $i2,$Tbl
  443. $PTR_ADD $i3,$Tbl
  444. lwl $t4,2($i0) # Td2[s2>>8]
  445. lwl $t5,2($i1) # Td2[s3>>8]
  446. lwl $t6,2($i2) # Td2[s0>>8]
  447. lwl $t7,2($i3) # Td2[s1>>8]
  448. lwr $t4,1($i0) # Td2[s2>>8]
  449. lwr $t5,1($i1) # Td2[s3>>8]
  450. lwr $t6,1($i2) # Td2[s0>>8]
  451. lwr $t7,1($i3) # Td2[s1>>8]
  452. _xtr $i0,$s1,0-2
  453. _xtr $i1,$s2,0-2
  454. _xtr $i2,$s3,0-2
  455. _xtr $i3,$s0,0-2
  456. and $i0,0x3fc
  457. and $i1,0x3fc
  458. and $i2,0x3fc
  459. and $i3,0x3fc
  460. $PTR_ADD $i0,$Tbl
  461. $PTR_ADD $i1,$Tbl
  462. $PTR_ADD $i2,$Tbl
  463. $PTR_ADD $i3,$Tbl
  464. lwl $t8,1($i0) # Td3[s1]
  465. lwl $t9,1($i1) # Td3[s2]
  466. lwl $t10,1($i2) # Td3[s3]
  467. lwl $t11,1($i3) # Td3[s0]
  468. lwr $t8,0($i0) # Td3[s1]
  469. lwr $t9,0($i1) # Td3[s2]
  470. lwr $t10,0($i2) # Td3[s3]
  471. lwr $t11,0($i3) # Td3[s0]
  472. _xtr $i0,$s0,24-2
  473. _xtr $i1,$s1,24-2
  474. _xtr $i2,$s2,24-2
  475. _xtr $i3,$s3,24-2
  476. and $i0,0x3fc
  477. and $i1,0x3fc
  478. and $i2,0x3fc
  479. and $i3,0x3fc
  480. $PTR_ADD $i0,$Tbl
  481. $PTR_ADD $i1,$Tbl
  482. $PTR_ADD $i2,$Tbl
  483. $PTR_ADD $i3,$Tbl
  484. xor $t0,$t4
  485. xor $t1,$t5
  486. xor $t2,$t6
  487. xor $t3,$t7
  488. lw $t4,0($i0) # Td0[s0>>24]
  489. lw $t5,0($i1) # Td0[s1>>24]
  490. lw $t6,0($i2) # Td0[s2>>24]
  491. lw $t7,0($i3) # Td0[s3>>24]
  492. lw $s0,0($key0)
  493. lw $s1,4($key0)
  494. lw $s2,8($key0)
  495. lw $s3,12($key0)
  496. xor $t0,$t8
  497. xor $t1,$t9
  498. xor $t2,$t10
  499. xor $t3,$t11
  500. xor $t0,$t4
  501. xor $t1,$t5
  502. xor $t2,$t6
  503. xor $t3,$t7
  504. sub $cnt,1
  505. $PTR_ADD $key0,16
  506. xor $s0,$t0
  507. xor $s1,$t1
  508. xor $s2,$t2
  509. xor $s3,$t3
  510. .set noreorder
  511. bnez $cnt,.Loop_dec
  512. _xtr $i0,$s3,16-2
  513. .set reorder
  514. lw $t4,1024($Tbl) # prefetch Td4
  515. lw $t5,1024+32($Tbl)
  516. lw $t6,1024+64($Tbl)
  517. lw $t7,1024+96($Tbl)
  518. lw $t8,1024+128($Tbl)
  519. lw $t9,1024+160($Tbl)
  520. lw $t10,1024+192($Tbl)
  521. lw $t11,1024+224($Tbl)
  522. _xtr $i0,$s3,16
  523. _xtr $i1,$s0,16
  524. _xtr $i2,$s1,16
  525. _xtr $i3,$s2,16
  526. and $i0,0xff
  527. and $i1,0xff
  528. and $i2,0xff
  529. and $i3,0xff
  530. $PTR_ADD $i0,$Tbl
  531. $PTR_ADD $i1,$Tbl
  532. $PTR_ADD $i2,$Tbl
  533. $PTR_ADD $i3,$Tbl
  534. lbu $t0,1024($i0) # Td4[s3>>16]
  535. lbu $t1,1024($i1) # Td4[s0>>16]
  536. lbu $t2,1024($i2) # Td4[s1>>16]
  537. lbu $t3,1024($i3) # Td4[s2>>16]
  538. _xtr $i0,$s2,8
  539. _xtr $i1,$s3,8
  540. _xtr $i2,$s0,8
  541. _xtr $i3,$s1,8
  542. and $i0,0xff
  543. and $i1,0xff
  544. and $i2,0xff
  545. and $i3,0xff
  546. $PTR_ADD $i0,$Tbl
  547. $PTR_ADD $i1,$Tbl
  548. $PTR_ADD $i2,$Tbl
  549. $PTR_ADD $i3,$Tbl
  550. lbu $t4,1024($i0) # Td4[s2>>8]
  551. lbu $t5,1024($i1) # Td4[s3>>8]
  552. lbu $t6,1024($i2) # Td4[s0>>8]
  553. lbu $t7,1024($i3) # Td4[s1>>8]
  554. _xtr $i0,$s0,24
  555. _xtr $i1,$s1,24
  556. _xtr $i2,$s2,24
  557. _xtr $i3,$s3,24
  558. $PTR_ADD $i0,$Tbl
  559. $PTR_ADD $i1,$Tbl
  560. $PTR_ADD $i2,$Tbl
  561. $PTR_ADD $i3,$Tbl
  562. lbu $t8,1024($i0) # Td4[s0>>24]
  563. lbu $t9,1024($i1) # Td4[s1>>24]
  564. lbu $t10,1024($i2) # Td4[s2>>24]
  565. lbu $t11,1024($i3) # Td4[s3>>24]
  566. _xtr $i0,$s1,0
  567. _xtr $i1,$s2,0
  568. _xtr $i2,$s3,0
  569. _xtr $i3,$s0,0
  570. _ins $t0,16
  571. _ins $t1,16
  572. _ins $t2,16
  573. _ins $t3,16
  574. _ins $t4,8
  575. _ins $t5,8
  576. _ins $t6,8
  577. _ins $t7,8
  578. xor $t0,$t4
  579. xor $t1,$t5
  580. xor $t2,$t6
  581. xor $t3,$t7
  582. $PTR_ADD $i0,$Tbl
  583. $PTR_ADD $i1,$Tbl
  584. $PTR_ADD $i2,$Tbl
  585. $PTR_ADD $i3,$Tbl
  586. lbu $t4,1024($i0) # Td4[s1]
  587. lbu $t5,1024($i1) # Td4[s2]
  588. lbu $t6,1024($i2) # Td4[s3]
  589. lbu $t7,1024($i3) # Td4[s0]
  590. _ins $t8,24
  591. _ins $t9,24
  592. _ins $t10,24
  593. _ins $t11,24
  594. lw $s0,0($key0)
  595. lw $s1,4($key0)
  596. lw $s2,8($key0)
  597. lw $s3,12($key0)
  598. _ins $t4,0
  599. _ins $t5,0
  600. _ins $t6,0
  601. _ins $t7,0
  602. xor $t0,$t8
  603. xor $t1,$t9
  604. xor $t2,$t10
  605. xor $t3,$t11
  606. xor $t0,$t4
  607. xor $t1,$t5
  608. xor $t2,$t6
  609. xor $t3,$t7
  610. xor $s0,$t0
  611. xor $s1,$t1
  612. xor $s2,$t2
  613. xor $s3,$t3
  614. jr $ra
  615. .end _mips_AES_decrypt
  616. .align 5
  617. .globl AES_decrypt
  618. .ent AES_decrypt
  619. AES_decrypt:
  620. .frame $sp,$FRAMESIZE,$ra
  621. .mask $SAVED_REGS_MASK,-$SZREG
  622. .set noreorder
  623. ___
  624. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  625. .cpload $pf
  626. ___
  627. $code.=<<___;
  628. $PTR_SUB $sp,$FRAMESIZE
  629. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  630. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  631. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  632. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  633. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  634. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  635. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  636. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  637. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  638. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  639. ___
  640. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  641. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  642. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  643. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  644. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  645. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  646. ___
  647. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  648. .cplocal $Tbl
  649. .cpsetup $pf,$zero,AES_decrypt
  650. ___
  651. $code.=<<___;
  652. .set reorder
  653. la $Tbl,AES_Td # PIC-ified 'load address'
  654. lwl $s0,0+$MSB($inp)
  655. lwl $s1,4+$MSB($inp)
  656. lwl $s2,8+$MSB($inp)
  657. lwl $s3,12+$MSB($inp)
  658. lwr $s0,0+$LSB($inp)
  659. lwr $s1,4+$LSB($inp)
  660. lwr $s2,8+$LSB($inp)
  661. lwr $s3,12+$LSB($inp)
  662. bal _mips_AES_decrypt
  663. swr $s0,0+$LSB($out)
  664. swr $s1,4+$LSB($out)
  665. swr $s2,8+$LSB($out)
  666. swr $s3,12+$LSB($out)
  667. swl $s0,0+$MSB($out)
  668. swl $s1,4+$MSB($out)
  669. swl $s2,8+$MSB($out)
  670. swl $s3,12+$MSB($out)
  671. .set noreorder
  672. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  673. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  674. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  675. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  676. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  677. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  678. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  679. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  680. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  681. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  682. ___
  683. $code.=<<___ if ($flavour =~ /nubi/i);
  684. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  685. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  686. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  687. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  688. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  689. ___
  690. $code.=<<___;
  691. jr $ra
  692. $PTR_ADD $sp,$FRAMESIZE
  693. .end AES_decrypt
  694. ___
  695. }}}
  696. {{{
  697. my $FRAMESIZE=8*$SZREG;
  698. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
  699. my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
  700. my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  701. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  702. my ($rcon,$cnt)=($gp,$fp);
  703. $code.=<<___;
  704. .align 5
  705. .ent _mips_AES_set_encrypt_key
  706. _mips_AES_set_encrypt_key:
  707. .frame $sp,0,$ra
  708. .set noreorder
  709. beqz $inp,.Lekey_done
  710. li $t0,-1
  711. beqz $key,.Lekey_done
  712. $PTR_ADD $rcon,$Tbl,1024+256
  713. .set reorder
  714. lwl $rk0,0+$MSB($inp) # load 128 bits
  715. lwl $rk1,4+$MSB($inp)
  716. lwl $rk2,8+$MSB($inp)
  717. lwl $rk3,12+$MSB($inp)
  718. li $at,128
  719. lwr $rk0,0+$LSB($inp)
  720. lwr $rk1,4+$LSB($inp)
  721. lwr $rk2,8+$LSB($inp)
  722. lwr $rk3,12+$LSB($inp)
  723. .set noreorder
  724. beq $bits,$at,.L128bits
  725. li $cnt,10
  726. .set reorder
  727. lwl $rk4,16+$MSB($inp) # load 192 bits
  728. lwl $rk5,20+$MSB($inp)
  729. li $at,192
  730. lwr $rk4,16+$LSB($inp)
  731. lwr $rk5,20+$LSB($inp)
  732. .set noreorder
  733. beq $bits,$at,.L192bits
  734. li $cnt,8
  735. .set reorder
  736. lwl $rk6,24+$MSB($inp) # load 256 bits
  737. lwl $rk7,28+$MSB($inp)
  738. li $at,256
  739. lwr $rk6,24+$LSB($inp)
  740. lwr $rk7,28+$LSB($inp)
  741. .set noreorder
  742. beq $bits,$at,.L256bits
  743. li $cnt,7
  744. b .Lekey_done
  745. li $t0,-2
  746. .align 4
  747. .L128bits:
  748. .set reorder
  749. srl $i0,$rk3,16
  750. srl $i1,$rk3,8
  751. and $i0,0xff
  752. and $i1,0xff
  753. and $i2,$rk3,0xff
  754. srl $i3,$rk3,24
  755. $PTR_ADD $i0,$Tbl
  756. $PTR_ADD $i1,$Tbl
  757. $PTR_ADD $i2,$Tbl
  758. $PTR_ADD $i3,$Tbl
  759. lbu $i0,1024($i0)
  760. lbu $i1,1024($i1)
  761. lbu $i2,1024($i2)
  762. lbu $i3,1024($i3)
  763. sw $rk0,0($key)
  764. sw $rk1,4($key)
  765. sw $rk2,8($key)
  766. sw $rk3,12($key)
  767. sub $cnt,1
  768. $PTR_ADD $key,16
  769. _bias $i0,24
  770. _bias $i1,16
  771. _bias $i2,8
  772. _bias $i3,0
  773. xor $rk0,$i0
  774. lw $i0,0($rcon)
  775. xor $rk0,$i1
  776. xor $rk0,$i2
  777. xor $rk0,$i3
  778. xor $rk0,$i0
  779. xor $rk1,$rk0
  780. xor $rk2,$rk1
  781. xor $rk3,$rk2
  782. .set noreorder
  783. bnez $cnt,.L128bits
  784. $PTR_ADD $rcon,4
  785. sw $rk0,0($key)
  786. sw $rk1,4($key)
  787. sw $rk2,8($key)
  788. li $cnt,10
  789. sw $rk3,12($key)
  790. li $t0,0
  791. sw $cnt,80($key)
  792. b .Lekey_done
  793. $PTR_SUB $key,10*16
  794. .align 4
  795. .L192bits:
  796. .set reorder
  797. srl $i0,$rk5,16
  798. srl $i1,$rk5,8
  799. and $i0,0xff
  800. and $i1,0xff
  801. and $i2,$rk5,0xff
  802. srl $i3,$rk5,24
  803. $PTR_ADD $i0,$Tbl
  804. $PTR_ADD $i1,$Tbl
  805. $PTR_ADD $i2,$Tbl
  806. $PTR_ADD $i3,$Tbl
  807. lbu $i0,1024($i0)
  808. lbu $i1,1024($i1)
  809. lbu $i2,1024($i2)
  810. lbu $i3,1024($i3)
  811. sw $rk0,0($key)
  812. sw $rk1,4($key)
  813. sw $rk2,8($key)
  814. sw $rk3,12($key)
  815. sw $rk4,16($key)
  816. sw $rk5,20($key)
  817. sub $cnt,1
  818. $PTR_ADD $key,24
  819. _bias $i0,24
  820. _bias $i1,16
  821. _bias $i2,8
  822. _bias $i3,0
  823. xor $rk0,$i0
  824. lw $i0,0($rcon)
  825. xor $rk0,$i1
  826. xor $rk0,$i2
  827. xor $rk0,$i3
  828. xor $rk0,$i0
  829. xor $rk1,$rk0
  830. xor $rk2,$rk1
  831. xor $rk3,$rk2
  832. xor $rk4,$rk3
  833. xor $rk5,$rk4
  834. .set noreorder
  835. bnez $cnt,.L192bits
  836. $PTR_ADD $rcon,4
  837. sw $rk0,0($key)
  838. sw $rk1,4($key)
  839. sw $rk2,8($key)
  840. li $cnt,12
  841. sw $rk3,12($key)
  842. li $t0,0
  843. sw $cnt,48($key)
  844. b .Lekey_done
  845. $PTR_SUB $key,12*16
  846. .align 4
  847. .L256bits:
  848. .set reorder
  849. srl $i0,$rk7,16
  850. srl $i1,$rk7,8
  851. and $i0,0xff
  852. and $i1,0xff
  853. and $i2,$rk7,0xff
  854. srl $i3,$rk7,24
  855. $PTR_ADD $i0,$Tbl
  856. $PTR_ADD $i1,$Tbl
  857. $PTR_ADD $i2,$Tbl
  858. $PTR_ADD $i3,$Tbl
  859. lbu $i0,1024($i0)
  860. lbu $i1,1024($i1)
  861. lbu $i2,1024($i2)
  862. lbu $i3,1024($i3)
  863. sw $rk0,0($key)
  864. sw $rk1,4($key)
  865. sw $rk2,8($key)
  866. sw $rk3,12($key)
  867. sw $rk4,16($key)
  868. sw $rk5,20($key)
  869. sw $rk6,24($key)
  870. sw $rk7,28($key)
  871. sub $cnt,1
  872. _bias $i0,24
  873. _bias $i1,16
  874. _bias $i2,8
  875. _bias $i3,0
  876. xor $rk0,$i0
  877. lw $i0,0($rcon)
  878. xor $rk0,$i1
  879. xor $rk0,$i2
  880. xor $rk0,$i3
  881. xor $rk0,$i0
  882. xor $rk1,$rk0
  883. xor $rk2,$rk1
  884. xor $rk3,$rk2
  885. beqz $cnt,.L256bits_done
  886. srl $i0,$rk3,24
  887. srl $i1,$rk3,16
  888. srl $i2,$rk3,8
  889. and $i3,$rk3,0xff
  890. and $i1,0xff
  891. and $i2,0xff
  892. $PTR_ADD $i0,$Tbl
  893. $PTR_ADD $i1,$Tbl
  894. $PTR_ADD $i2,$Tbl
  895. $PTR_ADD $i3,$Tbl
  896. lbu $i0,1024($i0)
  897. lbu $i1,1024($i1)
  898. lbu $i2,1024($i2)
  899. lbu $i3,1024($i3)
  900. sll $i0,24
  901. sll $i1,16
  902. sll $i2,8
  903. xor $rk4,$i0
  904. xor $rk4,$i1
  905. xor $rk4,$i2
  906. xor $rk4,$i3
  907. xor $rk5,$rk4
  908. xor $rk6,$rk5
  909. xor $rk7,$rk6
  910. $PTR_ADD $key,32
  911. .set noreorder
  912. b .L256bits
  913. $PTR_ADD $rcon,4
  914. .L256bits_done:
  915. sw $rk0,32($key)
  916. sw $rk1,36($key)
  917. sw $rk2,40($key)
  918. li $cnt,14
  919. sw $rk3,44($key)
  920. li $t0,0
  921. sw $cnt,48($key)
  922. $PTR_SUB $key,12*16
  923. .Lekey_done:
  924. jr $ra
  925. nop
  926. .end _mips_AES_set_encrypt_key
  927. .globl AES_set_encrypt_key
  928. .ent AES_set_encrypt_key
  929. AES_set_encrypt_key:
  930. .frame $sp,$FRAMESIZE,$ra
  931. .mask $SAVED_REGS_MASK,-$SZREG
  932. .set noreorder
  933. ___
  934. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  935. .cpload $pf
  936. ___
  937. $code.=<<___;
  938. $PTR_SUB $sp,$FRAMESIZE
  939. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  940. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  941. ___
  942. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  943. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  944. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  945. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  946. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  947. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  948. ___
  949. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  950. .cplocal $Tbl
  951. .cpsetup $pf,$zero,AES_set_encrypt_key
  952. ___
  953. $code.=<<___;
  954. .set reorder
  955. la $Tbl,AES_Te # PIC-ified 'load address'
  956. bal _mips_AES_set_encrypt_key
  957. .set noreorder
  958. move $a0,$t0
  959. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  960. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  961. ___
  962. $code.=<<___ if ($flavour =~ /nubi/i);
  963. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  964. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  965. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  966. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  967. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  968. ___
  969. $code.=<<___;
  970. jr $ra
  971. $PTR_ADD $sp,$FRAMESIZE
  972. .end AES_set_encrypt_key
  973. ___
  974. my ($head,$tail)=($inp,$bits);
  975. my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  976. my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
  977. $code.=<<___;
  978. .align 5
  979. .globl AES_set_decrypt_key
  980. .ent AES_set_decrypt_key
  981. AES_set_decrypt_key:
  982. .frame $sp,$FRAMESIZE,$ra
  983. .mask $SAVED_REGS_MASK,-$SZREG
  984. .set noreorder
  985. ___
  986. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  987. .cpload $pf
  988. ___
  989. $code.=<<___;
  990. $PTR_SUB $sp,$FRAMESIZE
  991. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  992. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  993. ___
  994. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  995. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  996. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  997. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  998. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  999. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  1000. ___
  1001. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1002. .cplocal $Tbl
  1003. .cpsetup $pf,$zero,AES_set_decrypt_key
  1004. ___
  1005. $code.=<<___;
  1006. .set reorder
  1007. la $Tbl,AES_Te # PIC-ified 'load address'
  1008. bal _mips_AES_set_encrypt_key
  1009. bltz $t0,.Ldkey_done
  1010. sll $at,$cnt,4
  1011. $PTR_ADD $head,$key,0
  1012. $PTR_ADD $tail,$key,$at
  1013. .align 4
  1014. .Lswap:
  1015. lw $rk0,0($head)
  1016. lw $rk1,4($head)
  1017. lw $rk2,8($head)
  1018. lw $rk3,12($head)
  1019. lw $rk4,0($tail)
  1020. lw $rk5,4($tail)
  1021. lw $rk6,8($tail)
  1022. lw $rk7,12($tail)
  1023. sw $rk0,0($tail)
  1024. sw $rk1,4($tail)
  1025. sw $rk2,8($tail)
  1026. sw $rk3,12($tail)
  1027. $PTR_ADD $head,16
  1028. $PTR_SUB $tail,16
  1029. sw $rk4,-16($head)
  1030. sw $rk5,-12($head)
  1031. sw $rk6,-8($head)
  1032. sw $rk7,-4($head)
  1033. bne $head,$tail,.Lswap
  1034. lw $tp1,16($key) # modulo-scheduled
  1035. lui $x80808080,0x8080
  1036. sub $cnt,1
  1037. or $x80808080,0x8080
  1038. sll $cnt,2
  1039. $PTR_ADD $key,16
  1040. lui $x1b1b1b1b,0x1b1b
  1041. nor $x7f7f7f7f,$zero,$x80808080
  1042. or $x1b1b1b1b,0x1b1b
  1043. .align 4
  1044. .Lmix:
  1045. and $m,$tp1,$x80808080
  1046. and $tp2,$tp1,$x7f7f7f7f
  1047. srl $tp4,$m,7
  1048. addu $tp2,$tp2 # tp2<<1
  1049. subu $m,$tp4
  1050. and $m,$x1b1b1b1b
  1051. xor $tp2,$m
  1052. and $m,$tp2,$x80808080
  1053. and $tp4,$tp2,$x7f7f7f7f
  1054. srl $tp8,$m,7
  1055. addu $tp4,$tp4 # tp4<<1
  1056. subu $m,$tp8
  1057. and $m,$x1b1b1b1b
  1058. xor $tp4,$m
  1059. and $m,$tp4,$x80808080
  1060. and $tp8,$tp4,$x7f7f7f7f
  1061. srl $tp9,$m,7
  1062. addu $tp8,$tp8 # tp8<<1
  1063. subu $m,$tp9
  1064. and $m,$x1b1b1b1b
  1065. xor $tp8,$m
  1066. xor $tp9,$tp8,$tp1
  1067. xor $tpe,$tp8,$tp4
  1068. xor $tpb,$tp9,$tp2
  1069. xor $tpd,$tp9,$tp4
  1070. _ror $tp1,$tpd,16
  1071. xor $tpe,$tp2
  1072. _ror $tp2,$tpd,-16
  1073. xor $tpe,$tp1
  1074. _ror $tp1,$tp9,8
  1075. xor $tpe,$tp2
  1076. _ror $tp2,$tp9,-24
  1077. xor $tpe,$tp1
  1078. _ror $tp1,$tpb,24
  1079. xor $tpe,$tp2
  1080. _ror $tp2,$tpb,-8
  1081. xor $tpe,$tp1
  1082. lw $tp1,4($key) # modulo-scheduled
  1083. xor $tpe,$tp2
  1084. sub $cnt,1
  1085. sw $tpe,0($key)
  1086. $PTR_ADD $key,4
  1087. bnez $cnt,.Lmix
  1088. li $t0,0
  1089. .Ldkey_done:
  1090. .set noreorder
  1091. move $a0,$t0
  1092. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1093. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1094. ___
  1095. $code.=<<___ if ($flavour =~ /nubi/i);
  1096. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1097. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1098. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1099. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1100. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1101. ___
  1102. $code.=<<___;
  1103. jr $ra
  1104. $PTR_ADD $sp,$FRAMESIZE
  1105. .end AES_set_decrypt_key
  1106. ___
  1107. }}}
  1108. ######################################################################
  1109. # Tables are kept in endian-neutral manner
  1110. $code.=<<___;
  1111. .rdata
  1112. .align 6
  1113. AES_Te:
  1114. .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
  1115. .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
  1116. .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
  1117. .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
  1118. .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
  1119. .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
  1120. .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
  1121. .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
  1122. .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
  1123. .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
  1124. .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
  1125. .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
  1126. .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
  1127. .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
  1128. .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
  1129. .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
  1130. .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
  1131. .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
  1132. .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
  1133. .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
  1134. .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
  1135. .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
  1136. .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
  1137. .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
  1138. .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
  1139. .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
  1140. .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
  1141. .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
  1142. .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
  1143. .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
  1144. .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
  1145. .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
  1146. .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
  1147. .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
  1148. .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
  1149. .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
  1150. .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
  1151. .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
  1152. .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
  1153. .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
  1154. .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
  1155. .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
  1156. .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
  1157. .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
  1158. .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
  1159. .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
  1160. .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
  1161. .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
  1162. .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
  1163. .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
  1164. .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
  1165. .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
  1166. .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
  1167. .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
  1168. .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
  1169. .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
  1170. .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
  1171. .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
  1172. .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
  1173. .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
  1174. .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
  1175. .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
  1176. .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
  1177. .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
  1178. .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
  1179. .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
  1180. .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
  1181. .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
  1182. .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
  1183. .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
  1184. .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
  1185. .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
  1186. .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
  1187. .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
  1188. .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
  1189. .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
  1190. .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
  1191. .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
  1192. .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
  1193. .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
  1194. .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
  1195. .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
  1196. .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
  1197. .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
  1198. .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
  1199. .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
  1200. .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
  1201. .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
  1202. .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
  1203. .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
  1204. .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
  1205. .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
  1206. .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
  1207. .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
  1208. .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
  1209. .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
  1210. .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
  1211. .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
  1212. .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
  1213. .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
  1214. .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
  1215. .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
  1216. .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
  1217. .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
  1218. .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
  1219. .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
  1220. .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
  1221. .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
  1222. .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
  1223. .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
  1224. .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
  1225. .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
  1226. .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
  1227. .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
  1228. .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
  1229. .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
  1230. .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
  1231. .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
  1232. .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
  1233. .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
  1234. .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
  1235. .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
  1236. .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
  1237. .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
  1238. .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
  1239. .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
  1240. .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
  1241. .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
  1242. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
  1243. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  1244. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  1245. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  1246. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  1247. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  1248. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  1249. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  1250. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  1251. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  1252. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  1253. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  1254. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  1255. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  1256. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  1257. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  1258. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  1259. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  1260. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  1261. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  1262. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  1263. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  1264. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  1265. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  1266. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  1267. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  1268. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  1269. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  1270. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  1271. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  1272. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  1273. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  1274. .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
  1275. .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
  1276. .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
  1277. .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
  1278. .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
  1279. .align 6
  1280. AES_Td:
  1281. .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
  1282. .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
  1283. .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
  1284. .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
  1285. .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
  1286. .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
  1287. .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
  1288. .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
  1289. .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
  1290. .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
  1291. .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
  1292. .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
  1293. .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
  1294. .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
  1295. .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
  1296. .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
  1297. .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
  1298. .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
  1299. .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
  1300. .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
  1301. .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
  1302. .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
  1303. .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
  1304. .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
  1305. .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
  1306. .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
  1307. .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
  1308. .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
  1309. .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
  1310. .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
  1311. .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
  1312. .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
  1313. .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
  1314. .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
  1315. .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
  1316. .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
  1317. .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
  1318. .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
  1319. .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
  1320. .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
  1321. .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
  1322. .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
  1323. .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
  1324. .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
  1325. .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
  1326. .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
  1327. .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
  1328. .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
  1329. .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
  1330. .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
  1331. .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
  1332. .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
  1333. .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
  1334. .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
  1335. .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
  1336. .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
  1337. .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
  1338. .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
  1339. .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
  1340. .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
  1341. .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
  1342. .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
  1343. .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
  1344. .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
  1345. .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
  1346. .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
  1347. .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
  1348. .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
  1349. .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
  1350. .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
  1351. .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
  1352. .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
  1353. .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
  1354. .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
  1355. .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
  1356. .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
  1357. .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
  1358. .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
  1359. .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
  1360. .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
  1361. .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
  1362. .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
  1363. .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
  1364. .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
  1365. .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
  1366. .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
  1367. .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
  1368. .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
  1369. .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
  1370. .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
  1371. .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
  1372. .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
  1373. .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
  1374. .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
  1375. .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
  1376. .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
  1377. .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
  1378. .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
  1379. .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
  1380. .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
  1381. .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
  1382. .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
  1383. .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
  1384. .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
  1385. .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
  1386. .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
  1387. .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
  1388. .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
  1389. .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
  1390. .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
  1391. .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
  1392. .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
  1393. .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
  1394. .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
  1395. .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
  1396. .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
  1397. .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
  1398. .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
  1399. .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
  1400. .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
  1401. .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
  1402. .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
  1403. .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
  1404. .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
  1405. .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
  1406. .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
  1407. .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
  1408. .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
  1409. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
  1410. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  1411. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  1412. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  1413. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  1414. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  1415. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  1416. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  1417. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  1418. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  1419. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  1420. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  1421. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  1422. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  1423. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  1424. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  1425. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  1426. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  1427. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  1428. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  1429. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  1430. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  1431. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  1432. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  1433. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  1434. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  1435. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  1436. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  1437. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  1438. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  1439. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  1440. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  1441. ___
  1442. foreach (split("\n",$code)) {
  1443. s/\`([^\`]*)\`/eval $1/ge;
  1444. # made-up _instructions, _xtr, _ins, _ror and _bias, cope
  1445. # with byte order dependencies...
  1446. if (/^\s+_/) {
  1447. s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
  1448. s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
  1449. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1450. : eval("24-$3"))/e or
  1451. s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1452. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1453. : eval("24-$3"))/e or
  1454. s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
  1455. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1456. : eval("$3*-1"))/e or
  1457. s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1458. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1459. : eval("($3-16)&31"))/e;
  1460. s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
  1461. sprintf("sll\t$1,$2,$3")/e or
  1462. s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
  1463. sprintf("and\t$1,$2,0xff")/e or
  1464. s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
  1465. }
  1466. # convert lwl/lwr and swr/swl to little-endian order
  1467. if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
  1468. s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
  1469. sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
  1470. s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
  1471. sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
  1472. }
  1473. print $_,"\n";
  1474. }
  1475. close STDOUT;