2
0

aes-mips.pl 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # AES for MIPS
  9. # October 2010
  10. #
  11. # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
  12. # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
  13. # faster than gcc-generated code, which is not very impressive. But
  14. # recall that compressed S-box requires extra processing, namely
  15. # additional rotations. Rotations are implemented with lwl/lwr pairs,
  16. # which is normally used for loading unaligned data. Another cool
  17. # thing about this module is its endian neutrality, which means that
  18. # it processes data without ever changing byte order...
  19. ######################################################################
  20. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  21. # widely used. Then there is a new contender: NUBI. It appears that if
  22. # one picks the latter, it's possible to arrange code in ABI neutral
  23. # manner. Therefore let's stick to NUBI register layout:
  24. #
  25. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  26. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  27. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  28. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  29. #
  30. # The return value is placed in $a0. Following coding rules facilitate
  31. # interoperability:
  32. #
  33. # - never ever touch $tp, "thread pointer", former $gp;
  34. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  35. # old code];
  36. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  37. #
  38. # For reference here is register layout for N32/64 MIPS ABIs:
  39. #
  40. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  41. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  42. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  43. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  44. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  45. #
  46. $flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
  47. if ($flavour =~ /64|n32/i) {
  48. $PTR_ADD="dadd"; # incidentally works even on n32
  49. $PTR_SUB="dsub"; # incidentally works even on n32
  50. $REG_S="sd";
  51. $REG_L="ld";
  52. $PTR_SLL="dsll"; # incidentally works even on n32
  53. $SZREG=8;
  54. } else {
  55. $PTR_ADD="add";
  56. $PTR_SUB="sub";
  57. $REG_S="sw";
  58. $REG_L="lw";
  59. $PTR_SLL="sll";
  60. $SZREG=4;
  61. }
  62. $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
  63. #
  64. # <appro@openssl.org>
  65. #
  66. ######################################################################
  67. for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/);
  68. $big_endian=0 if (/\-DL_ENDIAN/);
  69. $output=$_ if (/^\w[\w\-]*\.\w+$/); }
  70. open STDOUT,">$output";
  71. if (!defined($big_endian))
  72. { $big_endian=(unpack('L',pack('N',1))==1); }
  73. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  74. open STDOUT,">$output";
  75. my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
  76. $code.=<<___;
  77. .text
  78. .option pic2
  79. .set noat
  80. ___
  81. {{{
  82. my $FRAMESIZE=16*$SZREG;
  83. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
  84. my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
  85. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  86. my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
  87. my ($key0,$cnt)=($gp,$fp);
  88. # instuction ordering is "stolen" from output from MIPSpro assembler
  89. # invoked with -mips3 -O3 arguments...
  90. $code.=<<___;
  91. .align 5
  92. .ent _mips_AES_encrypt
  93. _mips_AES_encrypt:
  94. .frame $sp,0,$ra
  95. .set reorder
  96. lw $t0,0($key)
  97. lw $t1,4($key)
  98. lw $t2,8($key)
  99. lw $t3,12($key)
  100. lw $cnt,240($key)
  101. $PTR_ADD $key0,$key,16
  102. xor $s0,$t0
  103. xor $s1,$t1
  104. xor $s2,$t2
  105. xor $s3,$t3
  106. sub $cnt,1
  107. _xtr $i0,$s1,16-2
  108. .Loop_enc:
  109. _xtr $i1,$s2,16-2
  110. _xtr $i2,$s3,16-2
  111. _xtr $i3,$s0,16-2
  112. and $i0,0x3fc
  113. and $i1,0x3fc
  114. and $i2,0x3fc
  115. and $i3,0x3fc
  116. $PTR_ADD $i0,$Tbl
  117. $PTR_ADD $i1,$Tbl
  118. $PTR_ADD $i2,$Tbl
  119. $PTR_ADD $i3,$Tbl
  120. lwl $t0,3($i0) # Te1[s1>>16]
  121. lwl $t1,3($i1) # Te1[s2>>16]
  122. lwl $t2,3($i2) # Te1[s3>>16]
  123. lwl $t3,3($i3) # Te1[s0>>16]
  124. lwr $t0,2($i0) # Te1[s1>>16]
  125. lwr $t1,2($i1) # Te1[s2>>16]
  126. lwr $t2,2($i2) # Te1[s3>>16]
  127. lwr $t3,2($i3) # Te1[s0>>16]
  128. _xtr $i0,$s2,8-2
  129. _xtr $i1,$s3,8-2
  130. _xtr $i2,$s0,8-2
  131. _xtr $i3,$s1,8-2
  132. and $i0,0x3fc
  133. and $i1,0x3fc
  134. and $i2,0x3fc
  135. and $i3,0x3fc
  136. $PTR_ADD $i0,$Tbl
  137. $PTR_ADD $i1,$Tbl
  138. $PTR_ADD $i2,$Tbl
  139. $PTR_ADD $i3,$Tbl
  140. lwl $t4,2($i0) # Te2[s2>>8]
  141. lwl $t5,2($i1) # Te2[s3>>8]
  142. lwl $t6,2($i2) # Te2[s0>>8]
  143. lwl $t7,2($i3) # Te2[s1>>8]
  144. lwr $t4,1($i0) # Te2[s2>>8]
  145. lwr $t5,1($i1) # Te2[s3>>8]
  146. lwr $t6,1($i2) # Te2[s0>>8]
  147. lwr $t7,1($i3) # Te2[s1>>8]
  148. _xtr $i0,$s3,0-2
  149. _xtr $i1,$s0,0-2
  150. _xtr $i2,$s1,0-2
  151. _xtr $i3,$s2,0-2
  152. and $i0,0x3fc
  153. and $i1,0x3fc
  154. and $i2,0x3fc
  155. and $i3,0x3fc
  156. $PTR_ADD $i0,$Tbl
  157. $PTR_ADD $i1,$Tbl
  158. $PTR_ADD $i2,$Tbl
  159. $PTR_ADD $i3,$Tbl
  160. lwl $t8,1($i0) # Te3[s3]
  161. lwl $t9,1($i1) # Te3[s0]
  162. lwl $t10,1($i2) # Te3[s1]
  163. lwl $t11,1($i3) # Te3[s2]
  164. lwr $t8,0($i0) # Te3[s3]
  165. lwr $t9,0($i1) # Te3[s0]
  166. lwr $t10,0($i2) # Te3[s1]
  167. lwr $t11,0($i3) # Te3[s2]
  168. _xtr $i0,$s0,24-2
  169. _xtr $i1,$s1,24-2
  170. _xtr $i2,$s2,24-2
  171. _xtr $i3,$s3,24-2
  172. and $i0,0x3fc
  173. and $i1,0x3fc
  174. and $i2,0x3fc
  175. and $i3,0x3fc
  176. $PTR_ADD $i0,$Tbl
  177. $PTR_ADD $i1,$Tbl
  178. $PTR_ADD $i2,$Tbl
  179. $PTR_ADD $i3,$Tbl
  180. xor $t0,$t4
  181. xor $t1,$t5
  182. xor $t2,$t6
  183. xor $t3,$t7
  184. lw $t4,0($i0) # Te0[s0>>24]
  185. lw $t5,0($i1) # Te0[s1>>24]
  186. lw $t6,0($i2) # Te0[s2>>24]
  187. lw $t7,0($i3) # Te0[s3>>24]
  188. lw $s0,0($key0)
  189. lw $s1,4($key0)
  190. lw $s2,8($key0)
  191. lw $s3,12($key0)
  192. xor $t0,$t8
  193. xor $t1,$t9
  194. xor $t2,$t10
  195. xor $t3,$t11
  196. xor $t0,$t4
  197. xor $t1,$t5
  198. xor $t2,$t6
  199. xor $t3,$t7
  200. sub $cnt,1
  201. $PTR_ADD $key0,16
  202. xor $s0,$t0
  203. xor $s1,$t1
  204. xor $s2,$t2
  205. xor $s3,$t3
  206. .set noreorder
  207. bnez $cnt,.Loop_enc
  208. _xtr $i0,$s1,16-2
  209. .set reorder
  210. _xtr $i1,$s2,16-2
  211. _xtr $i2,$s3,16-2
  212. _xtr $i3,$s0,16-2
  213. and $i0,0x3fc
  214. and $i1,0x3fc
  215. and $i2,0x3fc
  216. and $i3,0x3fc
  217. $PTR_ADD $i0,$Tbl
  218. $PTR_ADD $i1,$Tbl
  219. $PTR_ADD $i2,$Tbl
  220. $PTR_ADD $i3,$Tbl
  221. lbu $t0,2($i0) # Te4[s1>>16]
  222. lbu $t1,2($i1) # Te4[s2>>16]
  223. lbu $t2,2($i2) # Te4[s3>>16]
  224. lbu $t3,2($i3) # Te4[s0>>16]
  225. _xtr $i0,$s2,8-2
  226. _xtr $i1,$s3,8-2
  227. _xtr $i2,$s0,8-2
  228. _xtr $i3,$s1,8-2
  229. and $i0,0x3fc
  230. and $i1,0x3fc
  231. and $i2,0x3fc
  232. and $i3,0x3fc
  233. $PTR_ADD $i0,$Tbl
  234. $PTR_ADD $i1,$Tbl
  235. $PTR_ADD $i2,$Tbl
  236. $PTR_ADD $i3,$Tbl
  237. lbu $t4,2($i0) # Te4[s2>>8]
  238. lbu $t5,2($i1) # Te4[s3>>8]
  239. lbu $t6,2($i2) # Te4[s0>>8]
  240. lbu $t7,2($i3) # Te4[s1>>8]
  241. _xtr $i0,$s0,24-2
  242. _xtr $i1,$s1,24-2
  243. _xtr $i2,$s2,24-2
  244. _xtr $i3,$s3,24-2
  245. and $i0,0x3fc
  246. and $i1,0x3fc
  247. and $i2,0x3fc
  248. and $i3,0x3fc
  249. $PTR_ADD $i0,$Tbl
  250. $PTR_ADD $i1,$Tbl
  251. $PTR_ADD $i2,$Tbl
  252. $PTR_ADD $i3,$Tbl
  253. lbu $t8,2($i0) # Te4[s0>>24]
  254. lbu $t9,2($i1) # Te4[s1>>24]
  255. lbu $t10,2($i2) # Te4[s2>>24]
  256. lbu $t11,2($i3) # Te4[s3>>24]
  257. _xtr $i0,$s3,0-2
  258. _xtr $i1,$s0,0-2
  259. _xtr $i2,$s1,0-2
  260. _xtr $i3,$s2,0-2
  261. and $i0,0x3fc
  262. and $i1,0x3fc
  263. and $i2,0x3fc
  264. and $i3,0x3fc
  265. _ins $t0,16
  266. _ins $t1,16
  267. _ins $t2,16
  268. _ins $t3,16
  269. _ins $t4,8
  270. _ins $t5,8
  271. _ins $t6,8
  272. _ins $t7,8
  273. xor $t0,$t4
  274. xor $t1,$t5
  275. xor $t2,$t6
  276. xor $t3,$t7
  277. $PTR_ADD $i0,$Tbl
  278. $PTR_ADD $i1,$Tbl
  279. $PTR_ADD $i2,$Tbl
  280. $PTR_ADD $i3,$Tbl
  281. lbu $t4,2($i0) # Te4[s3]
  282. lbu $t5,2($i1) # Te4[s0]
  283. lbu $t6,2($i2) # Te4[s1]
  284. lbu $t7,2($i3) # Te4[s2]
  285. _ins $t8,24
  286. _ins $t9,24
  287. _ins $t10,24
  288. _ins $t11,24
  289. lw $s0,0($key0)
  290. lw $s1,4($key0)
  291. lw $s2,8($key0)
  292. lw $s3,12($key0)
  293. xor $t0,$t8
  294. xor $t1,$t9
  295. xor $t2,$t10
  296. xor $t3,$t11
  297. _ins $t4,0
  298. _ins $t5,0
  299. _ins $t6,0
  300. _ins $t7,0
  301. xor $t0,$t4
  302. xor $t1,$t5
  303. xor $t2,$t6
  304. xor $t3,$t7
  305. xor $s0,$t0
  306. xor $s1,$t1
  307. xor $s2,$t2
  308. xor $s3,$t3
  309. jr $ra
  310. .end _mips_AES_encrypt
  311. .align 5
  312. .globl AES_encrypt
  313. .ent AES_encrypt
  314. AES_encrypt:
  315. .frame $sp,$FRAMESIZE,$ra
  316. .mask $SAVED_REGS_MASK,-$SZREG
  317. .set noreorder
  318. ___
  319. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  320. .cpload $pf
  321. ___
  322. $code.=<<___;
  323. $PTR_SUB $sp,$FRAMESIZE
  324. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  325. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  326. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  327. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  328. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  329. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  330. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  331. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  332. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  333. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  334. ___
  335. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  336. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  337. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  338. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  339. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  340. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  341. ___
  342. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  343. .cplocal $Tbl
  344. .cpsetup $pf,$zero,AES_encrypt
  345. ___
  346. $code.=<<___;
  347. .set reorder
  348. la $Tbl,AES_Te # PIC-ified 'load address'
  349. lwl $s0,0+$MSB($inp)
  350. lwl $s1,4+$MSB($inp)
  351. lwl $s2,8+$MSB($inp)
  352. lwl $s3,12+$MSB($inp)
  353. lwr $s0,0+$LSB($inp)
  354. lwr $s1,4+$LSB($inp)
  355. lwr $s2,8+$LSB($inp)
  356. lwr $s3,12+$LSB($inp)
  357. bal _mips_AES_encrypt
  358. swr $s0,0+$LSB($out)
  359. swr $s1,4+$LSB($out)
  360. swr $s2,8+$LSB($out)
  361. swr $s3,12+$LSB($out)
  362. swl $s0,0+$MSB($out)
  363. swl $s1,4+$MSB($out)
  364. swl $s2,8+$MSB($out)
  365. swl $s3,12+$MSB($out)
  366. .set noreorder
  367. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  368. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  369. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  370. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  371. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  372. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  373. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  374. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  375. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  376. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  377. ___
  378. $code.=<<___ if ($flavour =~ /nubi/i);
  379. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  380. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  381. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  382. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  383. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  384. ___
  385. $code.=<<___;
  386. jr $ra
  387. $PTR_ADD $sp,$FRAMESIZE
  388. .end AES_encrypt
  389. ___
  390. $code.=<<___;
  391. .align 5
  392. .ent _mips_AES_decrypt
  393. _mips_AES_decrypt:
  394. .frame $sp,0,$ra
  395. .set reorder
  396. lw $t0,0($key)
  397. lw $t1,4($key)
  398. lw $t2,8($key)
  399. lw $t3,12($key)
  400. lw $cnt,240($key)
  401. $PTR_ADD $key0,$key,16
  402. xor $s0,$t0
  403. xor $s1,$t1
  404. xor $s2,$t2
  405. xor $s3,$t3
  406. sub $cnt,1
  407. _xtr $i0,$s3,16-2
  408. .Loop_dec:
  409. _xtr $i1,$s0,16-2
  410. _xtr $i2,$s1,16-2
  411. _xtr $i3,$s2,16-2
  412. and $i0,0x3fc
  413. and $i1,0x3fc
  414. and $i2,0x3fc
  415. and $i3,0x3fc
  416. $PTR_ADD $i0,$Tbl
  417. $PTR_ADD $i1,$Tbl
  418. $PTR_ADD $i2,$Tbl
  419. $PTR_ADD $i3,$Tbl
  420. lwl $t0,3($i0) # Td1[s3>>16]
  421. lwl $t1,3($i1) # Td1[s0>>16]
  422. lwl $t2,3($i2) # Td1[s1>>16]
  423. lwl $t3,3($i3) # Td1[s2>>16]
  424. lwr $t0,2($i0) # Td1[s3>>16]
  425. lwr $t1,2($i1) # Td1[s0>>16]
  426. lwr $t2,2($i2) # Td1[s1>>16]
  427. lwr $t3,2($i3) # Td1[s2>>16]
  428. _xtr $i0,$s2,8-2
  429. _xtr $i1,$s3,8-2
  430. _xtr $i2,$s0,8-2
  431. _xtr $i3,$s1,8-2
  432. and $i0,0x3fc
  433. and $i1,0x3fc
  434. and $i2,0x3fc
  435. and $i3,0x3fc
  436. $PTR_ADD $i0,$Tbl
  437. $PTR_ADD $i1,$Tbl
  438. $PTR_ADD $i2,$Tbl
  439. $PTR_ADD $i3,$Tbl
  440. lwl $t4,2($i0) # Td2[s2>>8]
  441. lwl $t5,2($i1) # Td2[s3>>8]
  442. lwl $t6,2($i2) # Td2[s0>>8]
  443. lwl $t7,2($i3) # Td2[s1>>8]
  444. lwr $t4,1($i0) # Td2[s2>>8]
  445. lwr $t5,1($i1) # Td2[s3>>8]
  446. lwr $t6,1($i2) # Td2[s0>>8]
  447. lwr $t7,1($i3) # Td2[s1>>8]
  448. _xtr $i0,$s1,0-2
  449. _xtr $i1,$s2,0-2
  450. _xtr $i2,$s3,0-2
  451. _xtr $i3,$s0,0-2
  452. and $i0,0x3fc
  453. and $i1,0x3fc
  454. and $i2,0x3fc
  455. and $i3,0x3fc
  456. $PTR_ADD $i0,$Tbl
  457. $PTR_ADD $i1,$Tbl
  458. $PTR_ADD $i2,$Tbl
  459. $PTR_ADD $i3,$Tbl
  460. lwl $t8,1($i0) # Td3[s1]
  461. lwl $t9,1($i1) # Td3[s2]
  462. lwl $t10,1($i2) # Td3[s3]
  463. lwl $t11,1($i3) # Td3[s0]
  464. lwr $t8,0($i0) # Td3[s1]
  465. lwr $t9,0($i1) # Td3[s2]
  466. lwr $t10,0($i2) # Td3[s3]
  467. lwr $t11,0($i3) # Td3[s0]
  468. _xtr $i0,$s0,24-2
  469. _xtr $i1,$s1,24-2
  470. _xtr $i2,$s2,24-2
  471. _xtr $i3,$s3,24-2
  472. and $i0,0x3fc
  473. and $i1,0x3fc
  474. and $i2,0x3fc
  475. and $i3,0x3fc
  476. $PTR_ADD $i0,$Tbl
  477. $PTR_ADD $i1,$Tbl
  478. $PTR_ADD $i2,$Tbl
  479. $PTR_ADD $i3,$Tbl
  480. xor $t0,$t4
  481. xor $t1,$t5
  482. xor $t2,$t6
  483. xor $t3,$t7
  484. lw $t4,0($i0) # Td0[s0>>24]
  485. lw $t5,0($i1) # Td0[s1>>24]
  486. lw $t6,0($i2) # Td0[s2>>24]
  487. lw $t7,0($i3) # Td0[s3>>24]
  488. lw $s0,0($key0)
  489. lw $s1,4($key0)
  490. lw $s2,8($key0)
  491. lw $s3,12($key0)
  492. xor $t0,$t8
  493. xor $t1,$t9
  494. xor $t2,$t10
  495. xor $t3,$t11
  496. xor $t0,$t4
  497. xor $t1,$t5
  498. xor $t2,$t6
  499. xor $t3,$t7
  500. sub $cnt,1
  501. $PTR_ADD $key0,16
  502. xor $s0,$t0
  503. xor $s1,$t1
  504. xor $s2,$t2
  505. xor $s3,$t3
  506. .set noreorder
  507. bnez $cnt,.Loop_dec
  508. _xtr $i0,$s3,16-2
  509. .set reorder
  510. lw $t4,1024($Tbl) # prefetch Td4
  511. lw $t5,1024+32($Tbl)
  512. lw $t6,1024+64($Tbl)
  513. lw $t7,1024+96($Tbl)
  514. lw $t8,1024+128($Tbl)
  515. lw $t9,1024+160($Tbl)
  516. lw $t10,1024+192($Tbl)
  517. lw $t11,1024+224($Tbl)
  518. _xtr $i0,$s3,16
  519. _xtr $i1,$s0,16
  520. _xtr $i2,$s1,16
  521. _xtr $i3,$s2,16
  522. and $i0,0xff
  523. and $i1,0xff
  524. and $i2,0xff
  525. and $i3,0xff
  526. $PTR_ADD $i0,$Tbl
  527. $PTR_ADD $i1,$Tbl
  528. $PTR_ADD $i2,$Tbl
  529. $PTR_ADD $i3,$Tbl
  530. lbu $t0,1024($i0) # Td4[s3>>16]
  531. lbu $t1,1024($i1) # Td4[s0>>16]
  532. lbu $t2,1024($i2) # Td4[s1>>16]
  533. lbu $t3,1024($i3) # Td4[s2>>16]
  534. _xtr $i0,$s2,8
  535. _xtr $i1,$s3,8
  536. _xtr $i2,$s0,8
  537. _xtr $i3,$s1,8
  538. and $i0,0xff
  539. and $i1,0xff
  540. and $i2,0xff
  541. and $i3,0xff
  542. $PTR_ADD $i0,$Tbl
  543. $PTR_ADD $i1,$Tbl
  544. $PTR_ADD $i2,$Tbl
  545. $PTR_ADD $i3,$Tbl
  546. lbu $t4,1024($i0) # Td4[s2>>8]
  547. lbu $t5,1024($i1) # Td4[s3>>8]
  548. lbu $t6,1024($i2) # Td4[s0>>8]
  549. lbu $t7,1024($i3) # Td4[s1>>8]
  550. _xtr $i0,$s0,24
  551. _xtr $i1,$s1,24
  552. _xtr $i2,$s2,24
  553. _xtr $i3,$s3,24
  554. $PTR_ADD $i0,$Tbl
  555. $PTR_ADD $i1,$Tbl
  556. $PTR_ADD $i2,$Tbl
  557. $PTR_ADD $i3,$Tbl
  558. lbu $t8,1024($i0) # Td4[s0>>24]
  559. lbu $t9,1024($i1) # Td4[s1>>24]
  560. lbu $t10,1024($i2) # Td4[s2>>24]
  561. lbu $t11,1024($i3) # Td4[s3>>24]
  562. _xtr $i0,$s1,0
  563. _xtr $i1,$s2,0
  564. _xtr $i2,$s3,0
  565. _xtr $i3,$s0,0
  566. _ins $t0,16
  567. _ins $t1,16
  568. _ins $t2,16
  569. _ins $t3,16
  570. _ins $t4,8
  571. _ins $t5,8
  572. _ins $t6,8
  573. _ins $t7,8
  574. xor $t0,$t4
  575. xor $t1,$t5
  576. xor $t2,$t6
  577. xor $t3,$t7
  578. $PTR_ADD $i0,$Tbl
  579. $PTR_ADD $i1,$Tbl
  580. $PTR_ADD $i2,$Tbl
  581. $PTR_ADD $i3,$Tbl
  582. lbu $t4,1024($i0) # Td4[s1]
  583. lbu $t5,1024($i1) # Td4[s2]
  584. lbu $t6,1024($i2) # Td4[s3]
  585. lbu $t7,1024($i3) # Td4[s0]
  586. _ins $t8,24
  587. _ins $t9,24
  588. _ins $t10,24
  589. _ins $t11,24
  590. lw $s0,0($key0)
  591. lw $s1,4($key0)
  592. lw $s2,8($key0)
  593. lw $s3,12($key0)
  594. _ins $t4,0
  595. _ins $t5,0
  596. _ins $t6,0
  597. _ins $t7,0
  598. xor $t0,$t8
  599. xor $t1,$t9
  600. xor $t2,$t10
  601. xor $t3,$t11
  602. xor $t0,$t4
  603. xor $t1,$t5
  604. xor $t2,$t6
  605. xor $t3,$t7
  606. xor $s0,$t0
  607. xor $s1,$t1
  608. xor $s2,$t2
  609. xor $s3,$t3
  610. jr $ra
  611. .end _mips_AES_decrypt
  612. .align 5
  613. .globl AES_decrypt
  614. .ent AES_decrypt
  615. AES_decrypt:
  616. .frame $sp,$FRAMESIZE,$ra
  617. .mask $SAVED_REGS_MASK,-$SZREG
  618. .set noreorder
  619. ___
  620. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  621. .cpload $pf
  622. ___
  623. $code.=<<___;
  624. $PTR_SUB $sp,$FRAMESIZE
  625. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  626. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  627. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  628. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  629. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  630. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  631. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  632. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  633. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  634. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  635. ___
  636. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  637. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  638. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  639. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  640. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  641. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  642. ___
  643. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  644. .cplocal $Tbl
  645. .cpsetup $pf,$zero,AES_decrypt
  646. ___
  647. $code.=<<___;
  648. .set reorder
  649. la $Tbl,AES_Td # PIC-ified 'load address'
  650. lwl $s0,0+$MSB($inp)
  651. lwl $s1,4+$MSB($inp)
  652. lwl $s2,8+$MSB($inp)
  653. lwl $s3,12+$MSB($inp)
  654. lwr $s0,0+$LSB($inp)
  655. lwr $s1,4+$LSB($inp)
  656. lwr $s2,8+$LSB($inp)
  657. lwr $s3,12+$LSB($inp)
  658. bal _mips_AES_decrypt
  659. swr $s0,0+$LSB($out)
  660. swr $s1,4+$LSB($out)
  661. swr $s2,8+$LSB($out)
  662. swr $s3,12+$LSB($out)
  663. swl $s0,0+$MSB($out)
  664. swl $s1,4+$MSB($out)
  665. swl $s2,8+$MSB($out)
  666. swl $s3,12+$MSB($out)
  667. .set noreorder
  668. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  669. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  670. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  671. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  672. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  673. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  674. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  675. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  676. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  677. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  678. ___
  679. $code.=<<___ if ($flavour =~ /nubi/i);
  680. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  681. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  682. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  683. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  684. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  685. ___
  686. $code.=<<___;
  687. jr $ra
  688. $PTR_ADD $sp,$FRAMESIZE
  689. .end AES_decrypt
  690. ___
  691. }}}
  692. {{{
  693. my $FRAMESIZE=8*$SZREG;
  694. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
  695. my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
  696. my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  697. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  698. my ($rcon,$cnt)=($gp,$fp);
  699. $code.=<<___;
  700. .align 5
  701. .ent _mips_AES_set_encrypt_key
  702. _mips_AES_set_encrypt_key:
  703. .frame $sp,0,$ra
  704. .set noreorder
  705. beqz $inp,.Lekey_done
  706. li $t0,-1
  707. beqz $key,.Lekey_done
  708. $PTR_ADD $rcon,$Tbl,1024+256
  709. .set reorder
  710. lwl $rk0,0+$MSB($inp) # load 128 bits
  711. lwl $rk1,4+$MSB($inp)
  712. lwl $rk2,8+$MSB($inp)
  713. lwl $rk3,12+$MSB($inp)
  714. li $at,128
  715. lwr $rk0,0+$LSB($inp)
  716. lwr $rk1,4+$LSB($inp)
  717. lwr $rk2,8+$LSB($inp)
  718. lwr $rk3,12+$LSB($inp)
  719. .set noreorder
  720. beq $bits,$at,.L128bits
  721. li $cnt,10
  722. .set reorder
  723. lwl $rk4,16+$MSB($inp) # load 192 bits
  724. lwl $rk5,20+$MSB($inp)
  725. li $at,192
  726. lwr $rk4,16+$LSB($inp)
  727. lwr $rk5,20+$LSB($inp)
  728. .set noreorder
  729. beq $bits,$at,.L192bits
  730. li $cnt,8
  731. .set reorder
  732. lwl $rk6,24+$MSB($inp) # load 256 bits
  733. lwl $rk7,28+$MSB($inp)
  734. li $at,256
  735. lwr $rk6,24+$LSB($inp)
  736. lwr $rk7,28+$LSB($inp)
  737. .set noreorder
  738. beq $bits,$at,.L256bits
  739. li $cnt,7
  740. b .Lekey_done
  741. li $t0,-2
  742. .align 4
  743. .L128bits:
  744. .set reorder
  745. srl $i0,$rk3,16
  746. srl $i1,$rk3,8
  747. and $i0,0xff
  748. and $i1,0xff
  749. and $i2,$rk3,0xff
  750. srl $i3,$rk3,24
  751. $PTR_ADD $i0,$Tbl
  752. $PTR_ADD $i1,$Tbl
  753. $PTR_ADD $i2,$Tbl
  754. $PTR_ADD $i3,$Tbl
  755. lbu $i0,1024($i0)
  756. lbu $i1,1024($i1)
  757. lbu $i2,1024($i2)
  758. lbu $i3,1024($i3)
  759. sw $rk0,0($key)
  760. sw $rk1,4($key)
  761. sw $rk2,8($key)
  762. sw $rk3,12($key)
  763. sub $cnt,1
  764. $PTR_ADD $key,16
  765. _bias $i0,24
  766. _bias $i1,16
  767. _bias $i2,8
  768. _bias $i3,0
  769. xor $rk0,$i0
  770. lw $i0,0($rcon)
  771. xor $rk0,$i1
  772. xor $rk0,$i2
  773. xor $rk0,$i3
  774. xor $rk0,$i0
  775. xor $rk1,$rk0
  776. xor $rk2,$rk1
  777. xor $rk3,$rk2
  778. .set noreorder
  779. bnez $cnt,.L128bits
  780. $PTR_ADD $rcon,4
  781. sw $rk0,0($key)
  782. sw $rk1,4($key)
  783. sw $rk2,8($key)
  784. li $cnt,10
  785. sw $rk3,12($key)
  786. li $t0,0
  787. sw $cnt,80($key)
  788. b .Lekey_done
  789. $PTR_SUB $key,10*16
  790. .align 4
  791. .L192bits:
  792. .set reorder
  793. srl $i0,$rk5,16
  794. srl $i1,$rk5,8
  795. and $i0,0xff
  796. and $i1,0xff
  797. and $i2,$rk5,0xff
  798. srl $i3,$rk5,24
  799. $PTR_ADD $i0,$Tbl
  800. $PTR_ADD $i1,$Tbl
  801. $PTR_ADD $i2,$Tbl
  802. $PTR_ADD $i3,$Tbl
  803. lbu $i0,1024($i0)
  804. lbu $i1,1024($i1)
  805. lbu $i2,1024($i2)
  806. lbu $i3,1024($i3)
  807. sw $rk0,0($key)
  808. sw $rk1,4($key)
  809. sw $rk2,8($key)
  810. sw $rk3,12($key)
  811. sw $rk4,16($key)
  812. sw $rk5,20($key)
  813. sub $cnt,1
  814. $PTR_ADD $key,24
  815. _bias $i0,24
  816. _bias $i1,16
  817. _bias $i2,8
  818. _bias $i3,0
  819. xor $rk0,$i0
  820. lw $i0,0($rcon)
  821. xor $rk0,$i1
  822. xor $rk0,$i2
  823. xor $rk0,$i3
  824. xor $rk0,$i0
  825. xor $rk1,$rk0
  826. xor $rk2,$rk1
  827. xor $rk3,$rk2
  828. xor $rk4,$rk3
  829. xor $rk5,$rk4
  830. .set noreorder
  831. bnez $cnt,.L192bits
  832. $PTR_ADD $rcon,4
  833. sw $rk0,0($key)
  834. sw $rk1,4($key)
  835. sw $rk2,8($key)
  836. li $cnt,12
  837. sw $rk3,12($key)
  838. li $t0,0
  839. sw $cnt,48($key)
  840. b .Lekey_done
  841. $PTR_SUB $key,12*16
  842. .align 4
  843. .L256bits:
  844. .set reorder
  845. srl $i0,$rk7,16
  846. srl $i1,$rk7,8
  847. and $i0,0xff
  848. and $i1,0xff
  849. and $i2,$rk7,0xff
  850. srl $i3,$rk7,24
  851. $PTR_ADD $i0,$Tbl
  852. $PTR_ADD $i1,$Tbl
  853. $PTR_ADD $i2,$Tbl
  854. $PTR_ADD $i3,$Tbl
  855. lbu $i0,1024($i0)
  856. lbu $i1,1024($i1)
  857. lbu $i2,1024($i2)
  858. lbu $i3,1024($i3)
  859. sw $rk0,0($key)
  860. sw $rk1,4($key)
  861. sw $rk2,8($key)
  862. sw $rk3,12($key)
  863. sw $rk4,16($key)
  864. sw $rk5,20($key)
  865. sw $rk6,24($key)
  866. sw $rk7,28($key)
  867. sub $cnt,1
  868. _bias $i0,24
  869. _bias $i1,16
  870. _bias $i2,8
  871. _bias $i3,0
  872. xor $rk0,$i0
  873. lw $i0,0($rcon)
  874. xor $rk0,$i1
  875. xor $rk0,$i2
  876. xor $rk0,$i3
  877. xor $rk0,$i0
  878. xor $rk1,$rk0
  879. xor $rk2,$rk1
  880. xor $rk3,$rk2
  881. beqz $cnt,.L256bits_done
  882. srl $i0,$rk3,24
  883. srl $i1,$rk3,16
  884. srl $i2,$rk3,8
  885. and $i3,$rk3,0xff
  886. and $i1,0xff
  887. and $i2,0xff
  888. $PTR_ADD $i0,$Tbl
  889. $PTR_ADD $i1,$Tbl
  890. $PTR_ADD $i2,$Tbl
  891. $PTR_ADD $i3,$Tbl
  892. lbu $i0,1024($i0)
  893. lbu $i1,1024($i1)
  894. lbu $i2,1024($i2)
  895. lbu $i3,1024($i3)
  896. sll $i0,24
  897. sll $i1,16
  898. sll $i2,8
  899. xor $rk4,$i0
  900. xor $rk4,$i1
  901. xor $rk4,$i2
  902. xor $rk4,$i3
  903. xor $rk5,$rk4
  904. xor $rk6,$rk5
  905. xor $rk7,$rk6
  906. $PTR_ADD $key,32
  907. .set noreorder
  908. b .L256bits
  909. $PTR_ADD $rcon,4
  910. .L256bits_done:
  911. sw $rk0,32($key)
  912. sw $rk1,36($key)
  913. sw $rk2,40($key)
  914. li $cnt,14
  915. sw $rk3,44($key)
  916. li $t0,0
  917. sw $cnt,48($key)
  918. $PTR_SUB $key,12*16
  919. .Lekey_done:
  920. jr $ra
  921. nop
  922. .end _mips_AES_set_encrypt_key
  923. .globl AES_set_encrypt_key
  924. .ent AES_set_encrypt_key
  925. AES_set_encrypt_key:
  926. .frame $sp,$FRAMESIZE,$ra
  927. .mask $SAVED_REGS_MASK,-$SZREG
  928. .set noreorder
  929. ___
  930. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  931. .cpload $pf
  932. ___
  933. $code.=<<___;
  934. $PTR_SUB $sp,$FRAMESIZE
  935. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  936. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  937. ___
  938. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  939. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  940. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  941. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  942. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  943. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  944. ___
  945. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  946. .cplocal $Tbl
  947. .cpsetup $pf,$zero,AES_set_encrypt_key
  948. ___
  949. $code.=<<___;
  950. .set reorder
  951. la $Tbl,AES_Te # PIC-ified 'load address'
  952. bal _mips_AES_set_encrypt_key
  953. .set noreorder
  954. move $a0,$t0
  955. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  956. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  957. ___
  958. $code.=<<___ if ($flavour =~ /nubi/i);
  959. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  960. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  961. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  962. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  963. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  964. ___
  965. $code.=<<___;
  966. jr $ra
  967. $PTR_ADD $sp,$FRAMESIZE
  968. .end AES_set_encrypt_key
  969. ___
  970. my ($head,$tail)=($inp,$bits);
  971. my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  972. my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
  973. $code.=<<___;
  974. .align 5
  975. .globl AES_set_decrypt_key
  976. .ent AES_set_decrypt_key
  977. AES_set_decrypt_key:
  978. .frame $sp,$FRAMESIZE,$ra
  979. .mask $SAVED_REGS_MASK,-$SZREG
  980. .set noreorder
  981. ___
  982. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  983. .cpload $pf
  984. ___
  985. $code.=<<___;
  986. $PTR_SUB $sp,$FRAMESIZE
  987. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  988. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  989. ___
  990. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  991. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  992. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  993. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  994. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  995. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  996. ___
  997. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  998. .cplocal $Tbl
  999. .cpsetup $pf,$zero,AES_set_decrypt_key
  1000. ___
  1001. $code.=<<___;
  1002. .set reorder
  1003. la $Tbl,AES_Te # PIC-ified 'load address'
  1004. bal _mips_AES_set_encrypt_key
  1005. bltz $t0,.Ldkey_done
  1006. sll $at,$cnt,4
  1007. $PTR_ADD $head,$key,0
  1008. $PTR_ADD $tail,$key,$at
  1009. .align 4
  1010. .Lswap:
  1011. lw $rk0,0($head)
  1012. lw $rk1,4($head)
  1013. lw $rk2,8($head)
  1014. lw $rk3,12($head)
  1015. lw $rk4,0($tail)
  1016. lw $rk5,4($tail)
  1017. lw $rk6,8($tail)
  1018. lw $rk7,12($tail)
  1019. sw $rk0,0($tail)
  1020. sw $rk1,4($tail)
  1021. sw $rk2,8($tail)
  1022. sw $rk3,12($tail)
  1023. $PTR_ADD $head,16
  1024. $PTR_SUB $tail,16
  1025. sw $rk4,-16($head)
  1026. sw $rk5,-12($head)
  1027. sw $rk6,-8($head)
  1028. sw $rk7,-4($head)
  1029. bne $head,$tail,.Lswap
  1030. lw $tp1,16($key) # modulo-scheduled
  1031. lui $x80808080,0x8080
  1032. sub $cnt,1
  1033. or $x80808080,0x8080
  1034. sll $cnt,2
  1035. $PTR_ADD $key,16
  1036. lui $x1b1b1b1b,0x1b1b
  1037. nor $x7f7f7f7f,$zero,$x80808080
  1038. or $x1b1b1b1b,0x1b1b
  1039. .align 4
  1040. .Lmix:
  1041. and $m,$tp1,$x80808080
  1042. and $tp2,$tp1,$x7f7f7f7f
  1043. srl $tp4,$m,7
  1044. addu $tp2,$tp2 # tp2<<1
  1045. subu $m,$tp4
  1046. and $m,$x1b1b1b1b
  1047. xor $tp2,$m
  1048. and $m,$tp2,$x80808080
  1049. and $tp4,$tp2,$x7f7f7f7f
  1050. srl $tp8,$m,7
  1051. addu $tp4,$tp4 # tp4<<1
  1052. subu $m,$tp8
  1053. and $m,$x1b1b1b1b
  1054. xor $tp4,$m
  1055. and $m,$tp4,$x80808080
  1056. and $tp8,$tp4,$x7f7f7f7f
  1057. srl $tp9,$m,7
  1058. addu $tp8,$tp8 # tp8<<1
  1059. subu $m,$tp9
  1060. and $m,$x1b1b1b1b
  1061. xor $tp8,$m
  1062. xor $tp9,$tp8,$tp1
  1063. xor $tpe,$tp8,$tp4
  1064. xor $tpb,$tp9,$tp2
  1065. xor $tpd,$tp9,$tp4
  1066. _ror $tp1,$tpd,16
  1067. xor $tpe,$tp2
  1068. _ror $tp2,$tpd,-16
  1069. xor $tpe,$tp1
  1070. _ror $tp1,$tp9,8
  1071. xor $tpe,$tp2
  1072. _ror $tp2,$tp9,-24
  1073. xor $tpe,$tp1
  1074. _ror $tp1,$tpb,24
  1075. xor $tpe,$tp2
  1076. _ror $tp2,$tpb,-8
  1077. xor $tpe,$tp1
  1078. lw $tp1,4($key) # modulo-scheduled
  1079. xor $tpe,$tp2
  1080. sub $cnt,1
  1081. sw $tpe,0($key)
  1082. $PTR_ADD $key,4
  1083. bnez $cnt,.Lmix
  1084. li $t0,0
  1085. .Ldkey_done:
  1086. .set noreorder
  1087. move $a0,$t0
  1088. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1089. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1090. ___
  1091. $code.=<<___ if ($flavour =~ /nubi/i);
  1092. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1093. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1094. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1095. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1096. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1097. ___
  1098. $code.=<<___;
  1099. jr $ra
  1100. $PTR_ADD $sp,$FRAMESIZE
  1101. .end AES_set_decrypt_key
  1102. ___
  1103. }}}
  1104. ######################################################################
  1105. # Tables are kept in endian-neutral manner
  1106. $code.=<<___;
  1107. .rdata
  1108. .align 6
  1109. AES_Te:
  1110. .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
  1111. .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
  1112. .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
  1113. .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
  1114. .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
  1115. .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
  1116. .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
  1117. .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
  1118. .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
  1119. .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
  1120. .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
  1121. .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
  1122. .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
  1123. .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
  1124. .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
  1125. .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
  1126. .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
  1127. .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
  1128. .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
  1129. .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
  1130. .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
  1131. .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
  1132. .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
  1133. .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
  1134. .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
  1135. .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
  1136. .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
  1137. .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
  1138. .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
  1139. .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
  1140. .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
  1141. .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
  1142. .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
  1143. .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
  1144. .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
  1145. .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
  1146. .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
  1147. .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
  1148. .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
  1149. .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
  1150. .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
  1151. .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
  1152. .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
  1153. .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
  1154. .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
  1155. .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
  1156. .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
  1157. .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
  1158. .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
  1159. .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
  1160. .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
  1161. .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
  1162. .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
  1163. .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
  1164. .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
  1165. .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
  1166. .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
  1167. .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
  1168. .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
  1169. .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
  1170. .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
  1171. .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
  1172. .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
  1173. .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
  1174. .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
  1175. .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
  1176. .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
  1177. .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
  1178. .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
  1179. .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
  1180. .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
  1181. .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
  1182. .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
  1183. .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
  1184. .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
  1185. .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
  1186. .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
  1187. .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
  1188. .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
  1189. .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
  1190. .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
  1191. .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
  1192. .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
  1193. .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
  1194. .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
  1195. .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
  1196. .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
  1197. .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
  1198. .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
  1199. .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
  1200. .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
  1201. .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
  1202. .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
  1203. .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
  1204. .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
  1205. .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
  1206. .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
  1207. .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
  1208. .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
  1209. .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
  1210. .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
  1211. .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
  1212. .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
  1213. .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
  1214. .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
  1215. .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
  1216. .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
  1217. .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
  1218. .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
  1219. .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
  1220. .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
  1221. .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
  1222. .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
  1223. .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
  1224. .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
  1225. .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
  1226. .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
  1227. .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
  1228. .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
  1229. .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
  1230. .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
  1231. .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
  1232. .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
  1233. .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
  1234. .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
  1235. .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
  1236. .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
  1237. .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
  1238. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
  1239. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  1240. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  1241. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  1242. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  1243. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  1244. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  1245. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  1246. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  1247. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  1248. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  1249. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  1250. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  1251. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  1252. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  1253. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  1254. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  1255. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  1256. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  1257. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  1258. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  1259. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  1260. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  1261. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  1262. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  1263. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  1264. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  1265. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  1266. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  1267. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  1268. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  1269. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  1270. .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
  1271. .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
  1272. .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
  1273. .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
  1274. .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
  1275. .align 6
  1276. AES_Td:
  1277. .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
  1278. .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
  1279. .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
  1280. .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
  1281. .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
  1282. .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
  1283. .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
  1284. .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
  1285. .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
  1286. .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
  1287. .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
  1288. .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
  1289. .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
  1290. .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
  1291. .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
  1292. .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
  1293. .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
  1294. .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
  1295. .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
  1296. .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
  1297. .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
  1298. .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
  1299. .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
  1300. .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
  1301. .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
  1302. .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
  1303. .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
  1304. .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
  1305. .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
  1306. .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
  1307. .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
  1308. .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
  1309. .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
  1310. .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
  1311. .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
  1312. .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
  1313. .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
  1314. .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
  1315. .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
  1316. .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
  1317. .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
  1318. .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
  1319. .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
  1320. .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
  1321. .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
  1322. .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
  1323. .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
  1324. .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
  1325. .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
  1326. .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
  1327. .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
  1328. .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
  1329. .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
  1330. .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
  1331. .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
  1332. .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
  1333. .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
  1334. .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
  1335. .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
  1336. .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
  1337. .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
  1338. .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
  1339. .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
  1340. .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
  1341. .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
  1342. .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
  1343. .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
  1344. .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
  1345. .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
  1346. .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
  1347. .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
  1348. .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
  1349. .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
  1350. .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
  1351. .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
  1352. .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
  1353. .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
  1354. .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
  1355. .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
  1356. .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
  1357. .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
  1358. .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
  1359. .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
  1360. .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
  1361. .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
  1362. .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
  1363. .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
  1364. .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
  1365. .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
  1366. .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
  1367. .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
  1368. .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
  1369. .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
  1370. .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
  1371. .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
  1372. .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
  1373. .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
  1374. .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
  1375. .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
  1376. .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
  1377. .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
  1378. .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
  1379. .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
  1380. .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
  1381. .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
  1382. .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
  1383. .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
  1384. .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
  1385. .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
  1386. .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
  1387. .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
  1388. .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
  1389. .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
  1390. .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
  1391. .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
  1392. .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
  1393. .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
  1394. .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
  1395. .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
  1396. .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
  1397. .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
  1398. .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
  1399. .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
  1400. .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
  1401. .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
  1402. .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
  1403. .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
  1404. .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
  1405. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
  1406. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  1407. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  1408. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  1409. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  1410. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  1411. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  1412. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  1413. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  1414. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  1415. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  1416. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  1417. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  1418. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  1419. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  1420. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  1421. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  1422. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  1423. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  1424. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  1425. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  1426. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  1427. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  1428. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  1429. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  1430. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  1431. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  1432. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  1433. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  1434. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  1435. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  1436. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  1437. ___
  1438. foreach (split("\n",$code)) {
  1439. s/\`([^\`]*)\`/eval $1/ge;
  1440. # made-up _instructions, _xtr, _ins, _ror and _bias, cope
  1441. # with byte order dependencies...
  1442. if (/^\s+_/) {
  1443. s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
  1444. s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
  1445. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1446. : eval("24-$3"))/e or
  1447. s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1448. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1449. : eval("24-$3"))/e or
  1450. s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
  1451. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1452. : eval("$3*-1"))/e or
  1453. s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1454. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1455. : eval("($3-16)&31"))/e;
  1456. s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
  1457. sprintf("sll\t$1,$2,$3")/e or
  1458. s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
  1459. sprintf("and\t$1,$2,0xff")/e or
  1460. s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
  1461. }
  1462. # convert lwl/lwr and swr/swl to little-endian order
  1463. if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
  1464. s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
  1465. sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
  1466. s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
  1467. sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
  1468. }
  1469. print $_,"\n";
  1470. }
  1471. close STDOUT;