aes-mips.pl 53 KB


  1. #! /usr/bin/env perl
  2. # Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # AES for MIPS
  15. # October 2010
  16. #
  17. # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
  18. # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
  19. # faster than gcc-generated code, which is not very impressive. But
  20. # recall that compressed S-box requires extra processing, namely
  21. # additional rotations. Rotations are implemented with lwl/lwr pairs,
  22. # which is normally used for loading unaligned data. Another cool
  23. # thing about this module is its endian neutrality, which means that
  24. # it processes data without ever changing byte order...
  25. # September 2012
  26. #
  27. # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
  28. # ~25% less instructions) code. Note that there is no run-time switch,
  29. # instead, code path is chosen upon pre-process time, pass -mips32r2
  30. # or/and -msmartmips.
  31. # February 2019
  32. #
  33. # Normalize MIPS32R2 AES table address calculation by always using EXT
  34. # instruction. This reduces the standard codebase by another 10%.
  35. ######################################################################
  36. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  37. # widely used. Then there is a new contender: NUBI. It appears that if
  38. # one picks the latter, it's possible to arrange code in ABI neutral
  39. # manner. Therefore let's stick to NUBI register layout:
  40. #
  41. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  42. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  43. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  44. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  45. #
  46. # The return value is placed in $a0. Following coding rules facilitate
  47. # interoperability:
  48. #
  49. # - never ever touch $tp, "thread pointer", former $gp;
  50. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  51. # old code];
  52. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  53. #
  54. # For reference here is register layout for N32/64 MIPS ABIs:
  55. #
  56. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  57. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  58. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  59. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  60. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  61. # $output is the last argument if it looks like a file (it has an extension)
  62. # $flavour is the first argument if it doesn't look like a file
  63. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  64. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  65. $flavour ||= "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
  66. if ($flavour =~ /64|n32/i) {
  67. $PTR_LA="dla";
  68. $PTR_ADD="daddu"; # incidentally works even on n32
  69. $PTR_SUB="dsubu"; # incidentally works even on n32
  70. $PTR_INS="dins";
  71. $REG_S="sd";
  72. $REG_L="ld";
  73. $PTR_SLL="dsll"; # incidentally works even on n32
  74. $SZREG=8;
  75. } else {
  76. $PTR_LA="la";
  77. $PTR_ADD="addu";
  78. $PTR_SUB="subu";
  79. $PTR_INS="ins";
  80. $REG_S="sw";
  81. $REG_L="lw";
  82. $PTR_SLL="sll";
  83. $SZREG=4;
  84. }
  85. $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
  86. #
  87. # <appro@openssl.org>
  88. #
  89. ######################################################################
  90. $big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
  91. if (!defined($big_endian))
  92. { $big_endian=(unpack('L',pack('N',1))==1); }
  93. my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
  94. $output and open STDOUT,">$output";
  95. $code.=<<___;
  96. #include "mips_arch.h"
  97. .text
  98. #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
  99. .option pic2
  100. #endif
  101. .set noat
  102. ___
  103. {{{
  104. my $FRAMESIZE=16*$SZREG;
  105. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
  106. my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
  107. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  108. my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
  109. my ($key0,$cnt)=($gp,$fp);
  110. # instruction ordering is "stolen" from output from MIPSpro assembler
  111. # invoked with -mips3 -O3 arguments...
  112. $code.=<<___;
  113. .align 5
  114. .ent _mips_AES_encrypt
  115. _mips_AES_encrypt:
  116. .frame $sp,0,$ra
  117. .set reorder
  118. lw $t0,0($key)
  119. lw $t1,4($key)
  120. lw $t2,8($key)
  121. lw $t3,12($key)
  122. lw $cnt,240($key)
  123. $PTR_ADD $key0,$key,16
  124. xor $s0,$t0
  125. xor $s1,$t1
  126. xor $s2,$t2
  127. xor $s3,$t3
  128. subu $cnt,1
  129. #if defined(__mips_smartmips)
  130. ext $i0,$s1,16,8
  131. .Loop_enc:
  132. ext $i1,$s2,16,8
  133. ext $i2,$s3,16,8
  134. ext $i3,$s0,16,8
  135. lwxs $t0,$i0($Tbl) # Te1[s1>>16]
  136. ext $i0,$s2,8,8
  137. lwxs $t1,$i1($Tbl) # Te1[s2>>16]
  138. ext $i1,$s3,8,8
  139. lwxs $t2,$i2($Tbl) # Te1[s3>>16]
  140. ext $i2,$s0,8,8
  141. lwxs $t3,$i3($Tbl) # Te1[s0>>16]
  142. ext $i3,$s1,8,8
  143. lwxs $t4,$i0($Tbl) # Te2[s2>>8]
  144. ext $i0,$s3,0,8
  145. lwxs $t5,$i1($Tbl) # Te2[s3>>8]
  146. ext $i1,$s0,0,8
  147. lwxs $t6,$i2($Tbl) # Te2[s0>>8]
  148. ext $i2,$s1,0,8
  149. lwxs $t7,$i3($Tbl) # Te2[s1>>8]
  150. ext $i3,$s2,0,8
  151. lwxs $t8,$i0($Tbl) # Te3[s3]
  152. ext $i0,$s0,24,8
  153. lwxs $t9,$i1($Tbl) # Te3[s0]
  154. ext $i1,$s1,24,8
  155. lwxs $t10,$i2($Tbl) # Te3[s1]
  156. ext $i2,$s2,24,8
  157. lwxs $t11,$i3($Tbl) # Te3[s2]
  158. ext $i3,$s3,24,8
  159. rotr $t0,$t0,8
  160. rotr $t1,$t1,8
  161. rotr $t2,$t2,8
  162. rotr $t3,$t3,8
  163. rotr $t4,$t4,16
  164. rotr $t5,$t5,16
  165. rotr $t6,$t6,16
  166. rotr $t7,$t7,16
  167. xor $t0,$t4
  168. lwxs $t4,$i0($Tbl) # Te0[s0>>24]
  169. xor $t1,$t5
  170. lwxs $t5,$i1($Tbl) # Te0[s1>>24]
  171. xor $t2,$t6
  172. lwxs $t6,$i2($Tbl) # Te0[s2>>24]
  173. xor $t3,$t7
  174. lwxs $t7,$i3($Tbl) # Te0[s3>>24]
  175. rotr $t8,$t8,24
  176. lw $s0,0($key0)
  177. rotr $t9,$t9,24
  178. lw $s1,4($key0)
  179. rotr $t10,$t10,24
  180. lw $s2,8($key0)
  181. rotr $t11,$t11,24
  182. lw $s3,12($key0)
  183. xor $t0,$t8
  184. xor $t1,$t9
  185. xor $t2,$t10
  186. xor $t3,$t11
  187. xor $t0,$t4
  188. xor $t1,$t5
  189. xor $t2,$t6
  190. xor $t3,$t7
  191. subu $cnt,1
  192. $PTR_ADD $key0,16
  193. xor $s0,$t0
  194. xor $s1,$t1
  195. xor $s2,$t2
  196. xor $s3,$t3
  197. .set noreorder
  198. bnez $cnt,.Loop_enc
  199. ext $i0,$s1,16,8
  200. _xtr $i0,$s1,16-2
  201. #else
  202. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  203. move $i0,$Tbl
  204. move $i1,$Tbl
  205. move $i2,$Tbl
  206. move $i3,$Tbl
  207. ext $t0,$s1,16,8
  208. .Loop_enc:
  209. ext $t1,$s2,16,8
  210. ext $t2,$s3,16,8
  211. ext $t3,$s0,16,8
  212. $PTR_INS $i0,$t0,2,8
  213. $PTR_INS $i1,$t1,2,8
  214. $PTR_INS $i2,$t2,2,8
  215. $PTR_INS $i3,$t3,2,8
  216. lw $t0,0($i0) # Te1[s1>>16]
  217. ext $t4,$s2,8,8
  218. lw $t1,0($i1) # Te1[s2>>16]
  219. ext $t5,$s3,8,8
  220. lw $t2,0($i2) # Te1[s3>>16]
  221. ext $t6,$s0,8,8
  222. lw $t3,0($i3) # Te1[s0>>16]
  223. ext $t7,$s1,8,8
  224. $PTR_INS $i0,$t4,2,8
  225. $PTR_INS $i1,$t5,2,8
  226. $PTR_INS $i2,$t6,2,8
  227. $PTR_INS $i3,$t7,2,8
  228. #else
  229. _xtr $i0,$s1,16-2
  230. .Loop_enc:
  231. _xtr $i1,$s2,16-2
  232. _xtr $i2,$s3,16-2
  233. _xtr $i3,$s0,16-2
  234. and $i0,0x3fc
  235. and $i1,0x3fc
  236. and $i2,0x3fc
  237. and $i3,0x3fc
  238. $PTR_ADD $i0,$Tbl
  239. $PTR_ADD $i1,$Tbl
  240. $PTR_ADD $i2,$Tbl
  241. $PTR_ADD $i3,$Tbl
  242. lwl $t0,3($i0) # Te1[s1>>16]
  243. lwl $t1,3($i1) # Te1[s2>>16]
  244. lwl $t2,3($i2) # Te1[s3>>16]
  245. lwl $t3,3($i3) # Te1[s0>>16]
  246. lwr $t0,2($i0) # Te1[s1>>16]
  247. _xtr $i0,$s2,8-2
  248. lwr $t1,2($i1) # Te1[s2>>16]
  249. _xtr $i1,$s3,8-2
  250. lwr $t2,2($i2) # Te1[s3>>16]
  251. _xtr $i2,$s0,8-2
  252. lwr $t3,2($i3) # Te1[s0>>16]
  253. _xtr $i3,$s1,8-2
  254. and $i0,0x3fc
  255. and $i1,0x3fc
  256. and $i2,0x3fc
  257. and $i3,0x3fc
  258. $PTR_ADD $i0,$Tbl
  259. $PTR_ADD $i1,$Tbl
  260. $PTR_ADD $i2,$Tbl
  261. $PTR_ADD $i3,$Tbl
  262. #endif
  263. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  264. rotr $t0,$t0,8
  265. rotr $t1,$t1,8
  266. rotr $t2,$t2,8
  267. rotr $t3,$t3,8
  268. # if defined(_MIPSEL)
  269. lw $t4,0($i0) # Te2[s2>>8]
  270. ext $t8,$s3,0,8
  271. lw $t5,0($i1) # Te2[s3>>8]
  272. ext $t9,$s0,0,8
  273. lw $t6,0($i2) # Te2[s0>>8]
  274. ext $t10,$s1,0,8
  275. lw $t7,0($i3) # Te2[s1>>8]
  276. ext $t11,$s2,0,8
  277. $PTR_INS $i0,$t8,2,8
  278. $PTR_INS $i1,$t9,2,8
  279. $PTR_INS $i2,$t10,2,8
  280. $PTR_INS $i3,$t11,2,8
  281. lw $t8,0($i0) # Te3[s3]
  282. $PTR_INS $i0,$s0,2,8
  283. lw $t9,0($i1) # Te3[s0]
  284. $PTR_INS $i1,$s1,2,8
  285. lw $t10,0($i2) # Te3[s1]
  286. $PTR_INS $i2,$s2,2,8
  287. lw $t11,0($i3) # Te3[s2]
  288. $PTR_INS $i3,$s3,2,8
  289. # else
  290. lw $t4,0($i0) # Te2[s2>>8]
  291. $PTR_INS $i0,$s3,2,8
  292. lw $t5,0($i1) # Te2[s3>>8]
  293. $PTR_INS $i1,$s0,2,8
  294. lw $t6,0($i2) # Te2[s0>>8]
  295. $PTR_INS $i2,$s1,2,8
  296. lw $t7,0($i3) # Te2[s1>>8]
  297. $PTR_INS $i3,$s2,2,8
  298. lw $t8,0($i0) # Te3[s3]
  299. _xtr $i0,$s0,24-2
  300. lw $t9,0($i1) # Te3[s0]
  301. _xtr $i1,$s1,24-2
  302. lw $t10,0($i2) # Te3[s1]
  303. _xtr $i2,$s2,24-2
  304. lw $t11,0($i3) # Te3[s2]
  305. _xtr $i3,$s3,24-2
  306. and $i0,0x3fc
  307. and $i1,0x3fc
  308. and $i2,0x3fc
  309. and $i3,0x3fc
  310. $PTR_ADD $i0,$Tbl
  311. $PTR_ADD $i1,$Tbl
  312. $PTR_ADD $i2,$Tbl
  313. $PTR_ADD $i3,$Tbl
  314. # endif
  315. rotr $t4,$t4,16
  316. rotr $t5,$t5,16
  317. rotr $t6,$t6,16
  318. rotr $t7,$t7,16
  319. rotr $t8,$t8,24
  320. rotr $t9,$t9,24
  321. rotr $t10,$t10,24
  322. rotr $t11,$t11,24
  323. #else
  324. lwl $t4,2($i0) # Te2[s2>>8]
  325. lwl $t5,2($i1) # Te2[s3>>8]
  326. lwl $t6,2($i2) # Te2[s0>>8]
  327. lwl $t7,2($i3) # Te2[s1>>8]
  328. lwr $t4,1($i0) # Te2[s2>>8]
  329. _xtr $i0,$s3,0-2
  330. lwr $t5,1($i1) # Te2[s3>>8]
  331. _xtr $i1,$s0,0-2
  332. lwr $t6,1($i2) # Te2[s0>>8]
  333. _xtr $i2,$s1,0-2
  334. lwr $t7,1($i3) # Te2[s1>>8]
  335. _xtr $i3,$s2,0-2
  336. and $i0,0x3fc
  337. and $i1,0x3fc
  338. and $i2,0x3fc
  339. and $i3,0x3fc
  340. $PTR_ADD $i0,$Tbl
  341. $PTR_ADD $i1,$Tbl
  342. $PTR_ADD $i2,$Tbl
  343. $PTR_ADD $i3,$Tbl
  344. lwl $t8,1($i0) # Te3[s3]
  345. lwl $t9,1($i1) # Te3[s0]
  346. lwl $t10,1($i2) # Te3[s1]
  347. lwl $t11,1($i3) # Te3[s2]
  348. lwr $t8,0($i0) # Te3[s3]
  349. _xtr $i0,$s0,24-2
  350. lwr $t9,0($i1) # Te3[s0]
  351. _xtr $i1,$s1,24-2
  352. lwr $t10,0($i2) # Te3[s1]
  353. _xtr $i2,$s2,24-2
  354. lwr $t11,0($i3) # Te3[s2]
  355. _xtr $i3,$s3,24-2
  356. and $i0,0x3fc
  357. and $i1,0x3fc
  358. and $i2,0x3fc
  359. and $i3,0x3fc
  360. $PTR_ADD $i0,$Tbl
  361. $PTR_ADD $i1,$Tbl
  362. $PTR_ADD $i2,$Tbl
  363. $PTR_ADD $i3,$Tbl
  364. #endif
  365. xor $t0,$t4
  366. lw $t4,0($i0) # Te0[s0>>24]
  367. xor $t1,$t5
  368. lw $t5,0($i1) # Te0[s1>>24]
  369. xor $t2,$t6
  370. lw $t6,0($i2) # Te0[s2>>24]
  371. xor $t3,$t7
  372. lw $t7,0($i3) # Te0[s3>>24]
  373. xor $t0,$t8
  374. lw $s0,0($key0)
  375. xor $t1,$t9
  376. lw $s1,4($key0)
  377. xor $t2,$t10
  378. lw $s2,8($key0)
  379. xor $t3,$t11
  380. lw $s3,12($key0)
  381. xor $t0,$t4
  382. xor $t1,$t5
  383. xor $t2,$t6
  384. xor $t3,$t7
  385. subu $cnt,1
  386. $PTR_ADD $key0,16
  387. xor $s0,$t0
  388. xor $s1,$t1
  389. xor $s2,$t2
  390. xor $s3,$t3
  391. .set noreorder
  392. bnez $cnt,.Loop_enc
  393. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  394. ext $t0,$s1,16,8
  395. #endif
  396. _xtr $i0,$s1,16-2
  397. #endif
  398. .set reorder
  399. _xtr $i1,$s2,16-2
  400. _xtr $i2,$s3,16-2
  401. _xtr $i3,$s0,16-2
  402. and $i0,0x3fc
  403. and $i1,0x3fc
  404. and $i2,0x3fc
  405. and $i3,0x3fc
  406. $PTR_ADD $i0,$Tbl
  407. $PTR_ADD $i1,$Tbl
  408. $PTR_ADD $i2,$Tbl
  409. $PTR_ADD $i3,$Tbl
  410. lbu $t0,2($i0) # Te4[s1>>16]
  411. _xtr $i0,$s2,8-2
  412. lbu $t1,2($i1) # Te4[s2>>16]
  413. _xtr $i1,$s3,8-2
  414. lbu $t2,2($i2) # Te4[s3>>16]
  415. _xtr $i2,$s0,8-2
  416. lbu $t3,2($i3) # Te4[s0>>16]
  417. _xtr $i3,$s1,8-2
  418. and $i0,0x3fc
  419. and $i1,0x3fc
  420. and $i2,0x3fc
  421. and $i3,0x3fc
  422. $PTR_ADD $i0,$Tbl
  423. $PTR_ADD $i1,$Tbl
  424. $PTR_ADD $i2,$Tbl
  425. $PTR_ADD $i3,$Tbl
  426. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  427. # if defined(_MIPSEL)
  428. lbu $t4,2($i0) # Te4[s2>>8]
  429. $PTR_INS $i0,$s0,2,8
  430. lbu $t5,2($i1) # Te4[s3>>8]
  431. $PTR_INS $i1,$s1,2,8
  432. lbu $t6,2($i2) # Te4[s0>>8]
  433. $PTR_INS $i2,$s2,2,8
  434. lbu $t7,2($i3) # Te4[s1>>8]
  435. $PTR_INS $i3,$s3,2,8
  436. lbu $t8,2($i0) # Te4[s0>>24]
  437. _xtr $i0,$s3,0-2
  438. lbu $t9,2($i1) # Te4[s1>>24]
  439. _xtr $i1,$s0,0-2
  440. lbu $t10,2($i2) # Te4[s2>>24]
  441. _xtr $i2,$s1,0-2
  442. lbu $t11,2($i3) # Te4[s3>>24]
  443. _xtr $i3,$s2,0-2
  444. and $i0,0x3fc
  445. and $i1,0x3fc
  446. and $i2,0x3fc
  447. and $i3,0x3fc
  448. $PTR_ADD $i0,$Tbl
  449. $PTR_ADD $i1,$Tbl
  450. $PTR_ADD $i2,$Tbl
  451. $PTR_ADD $i3,$Tbl
  452. # else
  453. lbu $t4,2($i0) # Te4[s2>>8]
  454. _xtr $i0,$s0,24-2
  455. lbu $t5,2($i1) # Te4[s3>>8]
  456. _xtr $i1,$s1,24-2
  457. lbu $t6,2($i2) # Te4[s0>>8]
  458. _xtr $i2,$s2,24-2
  459. lbu $t7,2($i3) # Te4[s1>>8]
  460. _xtr $i3,$s3,24-2
  461. and $i0,0x3fc
  462. and $i1,0x3fc
  463. and $i2,0x3fc
  464. and $i3,0x3fc
  465. $PTR_ADD $i0,$Tbl
  466. $PTR_ADD $i1,$Tbl
  467. $PTR_ADD $i2,$Tbl
  468. $PTR_ADD $i3,$Tbl
  469. lbu $t8,2($i0) # Te4[s0>>24]
  470. $PTR_INS $i0,$s3,2,8
  471. lbu $t9,2($i1) # Te4[s1>>24]
  472. $PTR_INS $i1,$s0,2,8
  473. lbu $t10,2($i2) # Te4[s2>>24]
  474. $PTR_INS $i2,$s1,2,8
  475. lbu $t11,2($i3) # Te4[s3>>24]
  476. $PTR_INS $i3,$s2,2,8
  477. # endif
  478. _ins $t0,16
  479. _ins $t1,16
  480. _ins $t2,16
  481. _ins $t3,16
  482. _ins2 $t0,$t4,8
  483. lbu $t4,2($i0) # Te4[s3]
  484. _ins2 $t1,$t5,8
  485. lbu $t5,2($i1) # Te4[s0]
  486. _ins2 $t2,$t6,8
  487. lbu $t6,2($i2) # Te4[s1]
  488. _ins2 $t3,$t7,8
  489. lbu $t7,2($i3) # Te4[s2]
  490. _ins2 $t0,$t8,24
  491. lw $s0,0($key0)
  492. _ins2 $t1,$t9,24
  493. lw $s1,4($key0)
  494. _ins2 $t2,$t10,24
  495. lw $s2,8($key0)
  496. _ins2 $t3,$t11,24
  497. lw $s3,12($key0)
  498. _ins2 $t0,$t4,0
  499. _ins2 $t1,$t5,0
  500. _ins2 $t2,$t6,0
  501. _ins2 $t3,$t7,0
  502. #else
  503. lbu $t4,2($i0) # Te4[s2>>8]
  504. _xtr $i0,$s0,24-2
  505. lbu $t5,2($i1) # Te4[s3>>8]
  506. _xtr $i1,$s1,24-2
  507. lbu $t6,2($i2) # Te4[s0>>8]
  508. _xtr $i2,$s2,24-2
  509. lbu $t7,2($i3) # Te4[s1>>8]
  510. _xtr $i3,$s3,24-2
  511. and $i0,0x3fc
  512. and $i1,0x3fc
  513. and $i2,0x3fc
  514. and $i3,0x3fc
  515. $PTR_ADD $i0,$Tbl
  516. $PTR_ADD $i1,$Tbl
  517. $PTR_ADD $i2,$Tbl
  518. $PTR_ADD $i3,$Tbl
  519. lbu $t8,2($i0) # Te4[s0>>24]
  520. _xtr $i0,$s3,0-2
  521. lbu $t9,2($i1) # Te4[s1>>24]
  522. _xtr $i1,$s0,0-2
  523. lbu $t10,2($i2) # Te4[s2>>24]
  524. _xtr $i2,$s1,0-2
  525. lbu $t11,2($i3) # Te4[s3>>24]
  526. _xtr $i3,$s2,0-2
  527. and $i0,0x3fc
  528. and $i1,0x3fc
  529. and $i2,0x3fc
  530. and $i3,0x3fc
  531. $PTR_ADD $i0,$Tbl
  532. $PTR_ADD $i1,$Tbl
  533. $PTR_ADD $i2,$Tbl
  534. $PTR_ADD $i3,$Tbl
  535. _ins $t0,16
  536. _ins $t1,16
  537. _ins $t2,16
  538. _ins $t3,16
  539. _ins $t4,8
  540. _ins $t5,8
  541. _ins $t6,8
  542. _ins $t7,8
  543. xor $t0,$t4
  544. lbu $t4,2($i0) # Te4[s3]
  545. xor $t1,$t5
  546. lbu $t5,2($i1) # Te4[s0]
  547. xor $t2,$t6
  548. lbu $t6,2($i2) # Te4[s1]
  549. xor $t3,$t7
  550. lbu $t7,2($i3) # Te4[s2]
  551. _ins $t8,24
  552. lw $s0,0($key0)
  553. _ins $t9,24
  554. lw $s1,4($key0)
  555. _ins $t10,24
  556. lw $s2,8($key0)
  557. _ins $t11,24
  558. lw $s3,12($key0)
  559. xor $t0,$t8
  560. xor $t1,$t9
  561. xor $t2,$t10
  562. xor $t3,$t11
  563. _ins $t4,0
  564. _ins $t5,0
  565. _ins $t6,0
  566. _ins $t7,0
  567. xor $t0,$t4
  568. xor $t1,$t5
  569. xor $t2,$t6
  570. xor $t3,$t7
  571. #endif
  572. xor $s0,$t0
  573. xor $s1,$t1
  574. xor $s2,$t2
  575. xor $s3,$t3
  576. jr $ra
  577. .end _mips_AES_encrypt
  578. .align 5
  579. .globl AES_encrypt
  580. .ent AES_encrypt
  581. AES_encrypt:
  582. .frame $sp,$FRAMESIZE,$ra
  583. .mask $SAVED_REGS_MASK,-$SZREG
  584. .set noreorder
  585. ___
  586. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  587. .cpload $pf
  588. ___
  589. $code.=<<___;
  590. $PTR_SUB $sp,$FRAMESIZE
  591. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  592. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  593. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  594. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  595. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  596. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  597. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  598. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  599. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  600. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  601. ___
  602. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  603. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  604. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  605. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  606. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  607. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  608. ___
  609. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  610. .cplocal $Tbl
  611. .cpsetup $pf,$zero,AES_encrypt
  612. ___
  613. $code.=<<___;
  614. .set reorder
  615. $PTR_LA $Tbl,AES_Te # PIC-ified 'load address'
  616. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  617. lw $s0,0($inp)
  618. lw $s1,4($inp)
  619. lw $s2,8($inp)
  620. lw $s3,12($inp)
  621. #else
  622. lwl $s0,0+$MSB($inp)
  623. lwl $s1,4+$MSB($inp)
  624. lwl $s2,8+$MSB($inp)
  625. lwl $s3,12+$MSB($inp)
  626. lwr $s0,0+$LSB($inp)
  627. lwr $s1,4+$LSB($inp)
  628. lwr $s2,8+$LSB($inp)
  629. lwr $s3,12+$LSB($inp)
  630. #endif
  631. bal _mips_AES_encrypt
  632. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  633. sw $s0,0($out)
  634. sw $s1,4($out)
  635. sw $s2,8($out)
  636. sw $s3,12($out)
  637. #else
  638. swr $s0,0+$LSB($out)
  639. swr $s1,4+$LSB($out)
  640. swr $s2,8+$LSB($out)
  641. swr $s3,12+$LSB($out)
  642. swl $s0,0+$MSB($out)
  643. swl $s1,4+$MSB($out)
  644. swl $s2,8+$MSB($out)
  645. swl $s3,12+$MSB($out)
  646. #endif
  647. .set noreorder
  648. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  649. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  650. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  651. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  652. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  653. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  654. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  655. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  656. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  657. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  658. ___
  659. $code.=<<___ if ($flavour =~ /nubi/i);
  660. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  661. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  662. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  663. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  664. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  665. ___
  666. $code.=<<___;
  667. jr $ra
  668. $PTR_ADD $sp,$FRAMESIZE
  669. .end AES_encrypt
  670. ___
  671. $code.=<<___;
  672. .align 5
  673. .ent _mips_AES_decrypt
  674. _mips_AES_decrypt:
  675. .frame $sp,0,$ra
  676. .set reorder
  677. lw $t0,0($key)
  678. lw $t1,4($key)
  679. lw $t2,8($key)
  680. lw $t3,12($key)
  681. lw $cnt,240($key)
  682. $PTR_ADD $key0,$key,16
  683. xor $s0,$t0
  684. xor $s1,$t1
  685. xor $s2,$t2
  686. xor $s3,$t3
  687. subu $cnt,1
  688. #if defined(__mips_smartmips)
  689. ext $i0,$s3,16,8
  690. .Loop_dec:
  691. ext $i1,$s0,16,8
  692. ext $i2,$s1,16,8
  693. ext $i3,$s2,16,8
  694. lwxs $t0,$i0($Tbl) # Td1[s3>>16]
  695. ext $i0,$s2,8,8
  696. lwxs $t1,$i1($Tbl) # Td1[s0>>16]
  697. ext $i1,$s3,8,8
  698. lwxs $t2,$i2($Tbl) # Td1[s1>>16]
  699. ext $i2,$s0,8,8
  700. lwxs $t3,$i3($Tbl) # Td1[s2>>16]
  701. ext $i3,$s1,8,8
  702. lwxs $t4,$i0($Tbl) # Td2[s2>>8]
  703. ext $i0,$s1,0,8
  704. lwxs $t5,$i1($Tbl) # Td2[s3>>8]
  705. ext $i1,$s2,0,8
  706. lwxs $t6,$i2($Tbl) # Td2[s0>>8]
  707. ext $i2,$s3,0,8
  708. lwxs $t7,$i3($Tbl) # Td2[s1>>8]
  709. ext $i3,$s0,0,8
  710. lwxs $t8,$i0($Tbl) # Td3[s1]
  711. ext $i0,$s0,24,8
  712. lwxs $t9,$i1($Tbl) # Td3[s2]
  713. ext $i1,$s1,24,8
  714. lwxs $t10,$i2($Tbl) # Td3[s3]
  715. ext $i2,$s2,24,8
  716. lwxs $t11,$i3($Tbl) # Td3[s0]
  717. ext $i3,$s3,24,8
  718. rotr $t0,$t0,8
  719. rotr $t1,$t1,8
  720. rotr $t2,$t2,8
  721. rotr $t3,$t3,8
  722. rotr $t4,$t4,16
  723. rotr $t5,$t5,16
  724. rotr $t6,$t6,16
  725. rotr $t7,$t7,16
  726. xor $t0,$t4
  727. lwxs $t4,$i0($Tbl) # Td0[s0>>24]
  728. xor $t1,$t5
  729. lwxs $t5,$i1($Tbl) # Td0[s1>>24]
  730. xor $t2,$t6
  731. lwxs $t6,$i2($Tbl) # Td0[s2>>24]
  732. xor $t3,$t7
  733. lwxs $t7,$i3($Tbl) # Td0[s3>>24]
  734. rotr $t8,$t8,24
  735. lw $s0,0($key0)
  736. rotr $t9,$t9,24
  737. lw $s1,4($key0)
  738. rotr $t10,$t10,24
  739. lw $s2,8($key0)
  740. rotr $t11,$t11,24
  741. lw $s3,12($key0)
  742. xor $t0,$t8
  743. xor $t1,$t9
  744. xor $t2,$t10
  745. xor $t3,$t11
  746. xor $t0,$t4
  747. xor $t1,$t5
  748. xor $t2,$t6
  749. xor $t3,$t7
  750. subu $cnt,1
  751. $PTR_ADD $key0,16
  752. xor $s0,$t0
  753. xor $s1,$t1
  754. xor $s2,$t2
  755. xor $s3,$t3
  756. .set noreorder
  757. bnez $cnt,.Loop_dec
  758. ext $i0,$s3,16,8
  759. _xtr $i0,$s3,16-2
  760. #else
  761. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  762. move $i0,$Tbl
  763. move $i1,$Tbl
  764. move $i2,$Tbl
  765. move $i3,$Tbl
  766. ext $t0,$s3,16,8
  767. .Loop_dec:
  768. ext $t1,$s0,16,8
  769. ext $t2,$s1,16,8
  770. ext $t3,$s2,16,8
  771. $PTR_INS $i0,$t0,2,8
  772. $PTR_INS $i1,$t1,2,8
  773. $PTR_INS $i2,$t2,2,8
  774. $PTR_INS $i3,$t3,2,8
  775. lw $t0,0($i0) # Td1[s3>>16]
  776. ext $t4,$s2,8,8
  777. lw $t1,0($i1) # Td1[s0>>16]
  778. ext $t5,$s3,8,8
  779. lw $t2,0($i2) # Td1[s1>>16]
  780. ext $t6,$s0,8,8
  781. lw $t3,0($i3) # Td1[s2>>16]
  782. ext $t7,$s1,8,8
  783. $PTR_INS $i0,$t4,2,8
  784. $PTR_INS $i1,$t5,2,8
  785. $PTR_INS $i2,$t6,2,8
  786. $PTR_INS $i3,$t7,2,8
  787. #else
  788. _xtr $i0,$s3,16-2
  789. .Loop_dec:
  790. _xtr $i1,$s0,16-2
  791. _xtr $i2,$s1,16-2
  792. _xtr $i3,$s2,16-2
  793. and $i0,0x3fc
  794. and $i1,0x3fc
  795. and $i2,0x3fc
  796. and $i3,0x3fc
  797. $PTR_ADD $i0,$Tbl
  798. $PTR_ADD $i1,$Tbl
  799. $PTR_ADD $i2,$Tbl
  800. $PTR_ADD $i3,$Tbl
  801. lwl $t0,3($i0) # Td1[s3>>16]
  802. lwl $t1,3($i1) # Td1[s0>>16]
  803. lwl $t2,3($i2) # Td1[s1>>16]
  804. lwl $t3,3($i3) # Td1[s2>>16]
  805. lwr $t0,2($i0) # Td1[s3>>16]
  806. _xtr $i0,$s2,8-2
  807. lwr $t1,2($i1) # Td1[s0>>16]
  808. _xtr $i1,$s3,8-2
  809. lwr $t2,2($i2) # Td1[s1>>16]
  810. _xtr $i2,$s0,8-2
  811. lwr $t3,2($i3) # Td1[s2>>16]
  812. _xtr $i3,$s1,8-2
  813. and $i0,0x3fc
  814. and $i1,0x3fc
  815. and $i2,0x3fc
  816. and $i3,0x3fc
  817. $PTR_ADD $i0,$Tbl
  818. $PTR_ADD $i1,$Tbl
  819. $PTR_ADD $i2,$Tbl
  820. $PTR_ADD $i3,$Tbl
  821. #endif
  822. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  823. rotr $t0,$t0,8
  824. rotr $t1,$t1,8
  825. rotr $t2,$t2,8
  826. rotr $t3,$t3,8
  827. # if defined(_MIPSEL)
  828. lw $t4,0($i0) # Td2[s2>>8]
  829. ext $t8,$s1,0,8
  830. lw $t5,0($i1) # Td2[s3>>8]
  831. ext $t9,$s2,0,8
  832. lw $t6,0($i2) # Td2[s0>>8]
  833. ext $t10,$s3,0,8
  834. lw $t7,0($i3) # Td2[s1>>8]
  835. ext $t11,$s0,0,8
  836. $PTR_INS $i0,$t8,2,8
  837. $PTR_INS $i1,$t9,2,8
  838. $PTR_INS $i2,$t10,2,8
  839. $PTR_INS $i3,$t11,2,8
  840. lw $t8,0($i0) # Td3[s1]
  841. $PTR_INS $i0,$s0,2,8
  842. lw $t9,0($i1) # Td3[s2]
  843. $PTR_INS $i1,$s1,2,8
  844. lw $t10,0($i2) # Td3[s3]
  845. $PTR_INS $i2,$s2,2,8
  846. lw $t11,0($i3) # Td3[s0]
  847. $PTR_INS $i3,$s3,2,8
  848. #else
  849. lw $t4,0($i0) # Td2[s2>>8]
  850. $PTR_INS $i0,$s1,2,8
  851. lw $t5,0($i1) # Td2[s3>>8]
  852. $PTR_INS $i1,$s2,2,8
  853. lw $t6,0($i2) # Td2[s0>>8]
  854. $PTR_INS $i2,$s3,2,8
  855. lw $t7,0($i3) # Td2[s1>>8]
  856. $PTR_INS $i3,$s0,2,8
  857. lw $t8,0($i0) # Td3[s1]
  858. _xtr $i0,$s0,24-2
  859. lw $t9,0($i1) # Td3[s2]
  860. _xtr $i1,$s1,24-2
  861. lw $t10,0($i2) # Td3[s3]
  862. _xtr $i2,$s2,24-2
  863. lw $t11,0($i3) # Td3[s0]
  864. _xtr $i3,$s3,24-2
  865. and $i0,0x3fc
  866. and $i1,0x3fc
  867. and $i2,0x3fc
  868. and $i3,0x3fc
  869. $PTR_ADD $i0,$Tbl
  870. $PTR_ADD $i1,$Tbl
  871. $PTR_ADD $i2,$Tbl
  872. $PTR_ADD $i3,$Tbl
  873. #endif
  874. rotr $t4,$t4,16
  875. rotr $t5,$t5,16
  876. rotr $t6,$t6,16
  877. rotr $t7,$t7,16
  878. rotr $t8,$t8,24
  879. rotr $t9,$t9,24
  880. rotr $t10,$t10,24
  881. rotr $t11,$t11,24
  882. #else
  883. lwl $t4,2($i0) # Td2[s2>>8]
  884. lwl $t5,2($i1) # Td2[s3>>8]
  885. lwl $t6,2($i2) # Td2[s0>>8]
  886. lwl $t7,2($i3) # Td2[s1>>8]
  887. lwr $t4,1($i0) # Td2[s2>>8]
  888. _xtr $i0,$s1,0-2
  889. lwr $t5,1($i1) # Td2[s3>>8]
  890. _xtr $i1,$s2,0-2
  891. lwr $t6,1($i2) # Td2[s0>>8]
  892. _xtr $i2,$s3,0-2
  893. lwr $t7,1($i3) # Td2[s1>>8]
  894. _xtr $i3,$s0,0-2
  895. and $i0,0x3fc
  896. and $i1,0x3fc
  897. and $i2,0x3fc
  898. and $i3,0x3fc
  899. $PTR_ADD $i0,$Tbl
  900. $PTR_ADD $i1,$Tbl
  901. $PTR_ADD $i2,$Tbl
  902. $PTR_ADD $i3,$Tbl
  903. lwl $t8,1($i0) # Td3[s1]
  904. lwl $t9,1($i1) # Td3[s2]
  905. lwl $t10,1($i2) # Td3[s3]
  906. lwl $t11,1($i3) # Td3[s0]
  907. lwr $t8,0($i0) # Td3[s1]
  908. _xtr $i0,$s0,24-2
  909. lwr $t9,0($i1) # Td3[s2]
  910. _xtr $i1,$s1,24-2
  911. lwr $t10,0($i2) # Td3[s3]
  912. _xtr $i2,$s2,24-2
  913. lwr $t11,0($i3) # Td3[s0]
  914. _xtr $i3,$s3,24-2
  915. and $i0,0x3fc
  916. and $i1,0x3fc
  917. and $i2,0x3fc
  918. and $i3,0x3fc
  919. $PTR_ADD $i0,$Tbl
  920. $PTR_ADD $i1,$Tbl
  921. $PTR_ADD $i2,$Tbl
  922. $PTR_ADD $i3,$Tbl
  923. #endif
  924. xor $t0,$t4
  925. lw $t4,0($i0) # Td0[s0>>24]
  926. xor $t1,$t5
  927. lw $t5,0($i1) # Td0[s1>>24]
  928. xor $t2,$t6
  929. lw $t6,0($i2) # Td0[s2>>24]
  930. xor $t3,$t7
  931. lw $t7,0($i3) # Td0[s3>>24]
  932. xor $t0,$t8
  933. lw $s0,0($key0)
  934. xor $t1,$t9
  935. lw $s1,4($key0)
  936. xor $t2,$t10
  937. lw $s2,8($key0)
  938. xor $t3,$t11
  939. lw $s3,12($key0)
  940. xor $t0,$t4
  941. xor $t1,$t5
  942. xor $t2,$t6
  943. xor $t3,$t7
  944. subu $cnt,1
  945. $PTR_ADD $key0,16
  946. xor $s0,$t0
  947. xor $s1,$t1
  948. xor $s2,$t2
  949. xor $s3,$t3
  950. .set noreorder
  951. bnez $cnt,.Loop_dec
  952. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  953. ext $t0,$s3,16,8
  954. #endif
  955. _xtr $i0,$s3,16-2
  956. #endif
  957. .set reorder
  958. lw $t4,1024($Tbl) # prefetch Td4
  959. _xtr $i0,$s3,16
  960. lw $t5,1024+32($Tbl)
  961. _xtr $i1,$s0,16
  962. lw $t6,1024+64($Tbl)
  963. _xtr $i2,$s1,16
  964. lw $t7,1024+96($Tbl)
  965. _xtr $i3,$s2,16
  966. lw $t8,1024+128($Tbl)
  967. and $i0,0xff
  968. lw $t9,1024+160($Tbl)
  969. and $i1,0xff
  970. lw $t10,1024+192($Tbl)
  971. and $i2,0xff
  972. lw $t11,1024+224($Tbl)
  973. and $i3,0xff
  974. $PTR_ADD $i0,$Tbl
  975. $PTR_ADD $i1,$Tbl
  976. $PTR_ADD $i2,$Tbl
  977. $PTR_ADD $i3,$Tbl
  978. lbu $t0,1024($i0) # Td4[s3>>16]
  979. _xtr $i0,$s2,8
  980. lbu $t1,1024($i1) # Td4[s0>>16]
  981. _xtr $i1,$s3,8
  982. lbu $t2,1024($i2) # Td4[s1>>16]
  983. _xtr $i2,$s0,8
  984. lbu $t3,1024($i3) # Td4[s2>>16]
  985. _xtr $i3,$s1,8
  986. and $i0,0xff
  987. and $i1,0xff
  988. and $i2,0xff
  989. and $i3,0xff
  990. $PTR_ADD $i0,$Tbl
  991. $PTR_ADD $i1,$Tbl
  992. $PTR_ADD $i2,$Tbl
  993. $PTR_ADD $i3,$Tbl
  994. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  995. # if defined(_MIPSEL)
  996. lbu $t4,1024($i0) # Td4[s2>>8]
  997. $PTR_INS $i0,$s0,0,8
  998. lbu $t5,1024($i1) # Td4[s3>>8]
  999. $PTR_INS $i1,$s1,0,8
  1000. lbu $t6,1024($i2) # Td4[s0>>8]
  1001. $PTR_INS $i2,$s2,0,8
  1002. lbu $t7,1024($i3) # Td4[s1>>8]
  1003. $PTR_INS $i3,$s3,0,8
  1004. lbu $t8,1024($i0) # Td4[s0>>24]
  1005. _xtr $i0,$s1,0
  1006. lbu $t9,1024($i1) # Td4[s1>>24]
  1007. _xtr $i1,$s2,0
  1008. lbu $t10,1024($i2) # Td4[s2>>24]
  1009. _xtr $i2,$s3,0
  1010. lbu $t11,1024($i3) # Td4[s3>>24]
  1011. _xtr $i3,$s0,0
  1012. $PTR_ADD $i0,$Tbl
  1013. $PTR_ADD $i1,$Tbl
  1014. $PTR_ADD $i2,$Tbl
  1015. $PTR_ADD $i3,$Tbl
  1016. # else
  1017. lbu $t4,1024($i0) # Td4[s2>>8]
  1018. _xtr $i0,$s0,24
  1019. lbu $t5,1024($i1) # Td4[s3>>8]
  1020. _xtr $i1,$s1,24
  1021. lbu $t6,1024($i2) # Td4[s0>>8]
  1022. _xtr $i2,$s2,24
  1023. lbu $t7,1024($i3) # Td4[s1>>8]
  1024. _xtr $i3,$s3,24
  1025. $PTR_ADD $i0,$Tbl
  1026. $PTR_ADD $i1,$Tbl
  1027. $PTR_ADD $i2,$Tbl
  1028. $PTR_ADD $i3,$Tbl
  1029. lbu $t8,1024($i0) # Td4[s0>>24]
  1030. $PTR_INS $i0,$s1,0,8
  1031. lbu $t9,1024($i1) # Td4[s1>>24]
  1032. $PTR_INS $i1,$s2,0,8
  1033. lbu $t10,1024($i2) # Td4[s2>>24]
  1034. $PTR_INS $i2,$s3,0,8
  1035. lbu $t11,1024($i3) # Td4[s3>>24]
  1036. $PTR_INS $i3,$s0,0,8
  1037. # endif
  1038. _ins $t0,16
  1039. _ins $t1,16
  1040. _ins $t2,16
  1041. _ins $t3,16
  1042. _ins2 $t0,$t4,8
  1043. lbu $t4,1024($i0) # Td4[s1]
  1044. _ins2 $t1,$t5,8
  1045. lbu $t5,1024($i1) # Td4[s2]
  1046. _ins2 $t2,$t6,8
  1047. lbu $t6,1024($i2) # Td4[s3]
  1048. _ins2 $t3,$t7,8
  1049. lbu $t7,1024($i3) # Td4[s0]
  1050. _ins2 $t0,$t8,24
  1051. lw $s0,0($key0)
  1052. _ins2 $t1,$t9,24
  1053. lw $s1,4($key0)
  1054. _ins2 $t2,$t10,24
  1055. lw $s2,8($key0)
  1056. _ins2 $t3,$t11,24
  1057. lw $s3,12($key0)
  1058. _ins2 $t0,$t4,0
  1059. _ins2 $t1,$t5,0
  1060. _ins2 $t2,$t6,0
  1061. _ins2 $t3,$t7,0
  1062. #else
  1063. lbu $t4,1024($i0) # Td4[s2>>8]
  1064. _xtr $i0,$s0,24
  1065. lbu $t5,1024($i1) # Td4[s3>>8]
  1066. _xtr $i1,$s1,24
  1067. lbu $t6,1024($i2) # Td4[s0>>8]
  1068. _xtr $i2,$s2,24
  1069. lbu $t7,1024($i3) # Td4[s1>>8]
  1070. _xtr $i3,$s3,24
  1071. $PTR_ADD $i0,$Tbl
  1072. $PTR_ADD $i1,$Tbl
  1073. $PTR_ADD $i2,$Tbl
  1074. $PTR_ADD $i3,$Tbl
  1075. lbu $t8,1024($i0) # Td4[s0>>24]
  1076. _xtr $i0,$s1,0
  1077. lbu $t9,1024($i1) # Td4[s1>>24]
  1078. _xtr $i1,$s2,0
  1079. lbu $t10,1024($i2) # Td4[s2>>24]
  1080. _xtr $i2,$s3,0
  1081. lbu $t11,1024($i3) # Td4[s3>>24]
  1082. _xtr $i3,$s0,0
  1083. $PTR_ADD $i0,$Tbl
  1084. $PTR_ADD $i1,$Tbl
  1085. $PTR_ADD $i2,$Tbl
  1086. $PTR_ADD $i3,$Tbl
  1087. _ins $t0,16
  1088. _ins $t1,16
  1089. _ins $t2,16
  1090. _ins $t3,16
  1091. _ins $t4,8
  1092. _ins $t5,8
  1093. _ins $t6,8
  1094. _ins $t7,8
  1095. xor $t0,$t4
  1096. lbu $t4,1024($i0) # Td4[s1]
  1097. xor $t1,$t5
  1098. lbu $t5,1024($i1) # Td4[s2]
  1099. xor $t2,$t6
  1100. lbu $t6,1024($i2) # Td4[s3]
  1101. xor $t3,$t7
  1102. lbu $t7,1024($i3) # Td4[s0]
  1103. _ins $t8,24
  1104. lw $s0,0($key0)
  1105. _ins $t9,24
  1106. lw $s1,4($key0)
  1107. _ins $t10,24
  1108. lw $s2,8($key0)
  1109. _ins $t11,24
  1110. lw $s3,12($key0)
  1111. xor $t0,$t8
  1112. xor $t1,$t9
  1113. xor $t2,$t10
  1114. xor $t3,$t11
  1115. _ins $t4,0
  1116. _ins $t5,0
  1117. _ins $t6,0
  1118. _ins $t7,0
  1119. xor $t0,$t4
  1120. xor $t1,$t5
  1121. xor $t2,$t6
  1122. xor $t3,$t7
  1123. #endif
  1124. xor $s0,$t0
  1125. xor $s1,$t1
  1126. xor $s2,$t2
  1127. xor $s3,$t3
  1128. jr $ra
  1129. .end _mips_AES_decrypt
  1130. .align 5
  1131. .globl AES_decrypt
  1132. .ent AES_decrypt
  1133. AES_decrypt:
  1134. .frame $sp,$FRAMESIZE,$ra
  1135. .mask $SAVED_REGS_MASK,-$SZREG
  1136. .set noreorder
  1137. ___
  1138. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1139. .cpload $pf
  1140. ___
  1141. $code.=<<___;
  1142. $PTR_SUB $sp,$FRAMESIZE
  1143. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1144. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1145. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  1146. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  1147. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  1148. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  1149. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  1150. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  1151. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  1152. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  1153. ___
  1154. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1155. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  1156. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  1157. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  1158. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  1159. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  1160. ___
  1161. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1162. .cplocal $Tbl
  1163. .cpsetup $pf,$zero,AES_decrypt
  1164. ___
  1165. $code.=<<___;
  1166. .set reorder
  1167. $PTR_LA $Tbl,AES_Td # PIC-ified 'load address'
  1168. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  1169. lw $s0,0($inp)
  1170. lw $s1,4($inp)
  1171. lw $s2,8($inp)
  1172. lw $s3,12($inp)
  1173. #else
  1174. lwl $s0,0+$MSB($inp)
  1175. lwl $s1,4+$MSB($inp)
  1176. lwl $s2,8+$MSB($inp)
  1177. lwl $s3,12+$MSB($inp)
  1178. lwr $s0,0+$LSB($inp)
  1179. lwr $s1,4+$LSB($inp)
  1180. lwr $s2,8+$LSB($inp)
  1181. lwr $s3,12+$LSB($inp)
  1182. #endif
  1183. bal _mips_AES_decrypt
  1184. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  1185. sw $s0,0($out)
  1186. sw $s1,4($out)
  1187. sw $s2,8($out)
  1188. sw $s3,12($out)
  1189. #else
  1190. swr $s0,0+$LSB($out)
  1191. swr $s1,4+$LSB($out)
  1192. swr $s2,8+$LSB($out)
  1193. swr $s3,12+$LSB($out)
  1194. swl $s0,0+$MSB($out)
  1195. swl $s1,4+$MSB($out)
  1196. swl $s2,8+$MSB($out)
  1197. swl $s3,12+$MSB($out)
  1198. #endif
  1199. .set noreorder
  1200. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1201. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1202. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  1203. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  1204. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  1205. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  1206. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  1207. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  1208. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  1209. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  1210. ___
  1211. $code.=<<___ if ($flavour =~ /nubi/i);
  1212. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  1213. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  1214. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  1215. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  1216. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1217. ___
  1218. $code.=<<___;
  1219. jr $ra
  1220. $PTR_ADD $sp,$FRAMESIZE
  1221. .end AES_decrypt
  1222. ___
  1223. }}}
  1224. {{{
  1225. my $FRAMESIZE=8*$SZREG;
  1226. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
  1227. my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
  1228. my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  1229. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  1230. my ($rcon,$cnt)=($gp,$fp);
  1231. $code.=<<___;
  1232. .align 5
  1233. .ent _mips_AES_set_encrypt_key
  1234. _mips_AES_set_encrypt_key:
  1235. .frame $sp,0,$ra
  1236. .set noreorder
  1237. beqz $inp,.Lekey_done
  1238. li $t0,-1
  1239. beqz $key,.Lekey_done
  1240. $PTR_ADD $rcon,$Tbl,256
  1241. .set reorder
  1242. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  1243. lw $rk0,0($inp) # load 128 bits
  1244. lw $rk1,4($inp)
  1245. lw $rk2,8($inp)
  1246. lw $rk3,12($inp)
  1247. #else
  1248. lwl $rk0,0+$MSB($inp) # load 128 bits
  1249. lwl $rk1,4+$MSB($inp)
  1250. lwl $rk2,8+$MSB($inp)
  1251. lwl $rk3,12+$MSB($inp)
  1252. lwr $rk0,0+$LSB($inp)
  1253. lwr $rk1,4+$LSB($inp)
  1254. lwr $rk2,8+$LSB($inp)
  1255. lwr $rk3,12+$LSB($inp)
  1256. #endif
  1257. li $at,128
  1258. .set noreorder
  1259. beq $bits,$at,.L128bits
  1260. li $cnt,10
  1261. .set reorder
  1262. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  1263. lw $rk4,16($inp) # load 192 bits
  1264. lw $rk5,20($inp)
  1265. #else
  1266. lwl $rk4,16+$MSB($inp) # load 192 bits
  1267. lwl $rk5,20+$MSB($inp)
  1268. lwr $rk4,16+$LSB($inp)
  1269. lwr $rk5,20+$LSB($inp)
  1270. #endif
  1271. li $at,192
  1272. .set noreorder
  1273. beq $bits,$at,.L192bits
  1274. li $cnt,8
  1275. .set reorder
  1276. #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
  1277. lw $rk6,24($inp) # load 256 bits
  1278. lw $rk7,28($inp)
  1279. #else
  1280. lwl $rk6,24+$MSB($inp) # load 256 bits
  1281. lwl $rk7,28+$MSB($inp)
  1282. lwr $rk6,24+$LSB($inp)
  1283. lwr $rk7,28+$LSB($inp)
  1284. #endif
  1285. li $at,256
  1286. .set noreorder
  1287. beq $bits,$at,.L256bits
  1288. li $cnt,7
  1289. b .Lekey_done
  1290. li $t0,-2
  1291. .align 4
  1292. .L128bits:
  1293. .set reorder
  1294. srl $i0,$rk3,16
  1295. srl $i1,$rk3,8
  1296. and $i0,0xff
  1297. and $i1,0xff
  1298. and $i2,$rk3,0xff
  1299. srl $i3,$rk3,24
  1300. $PTR_ADD $i0,$Tbl
  1301. $PTR_ADD $i1,$Tbl
  1302. $PTR_ADD $i2,$Tbl
  1303. $PTR_ADD $i3,$Tbl
  1304. lbu $i0,0($i0)
  1305. lbu $i1,0($i1)
  1306. lbu $i2,0($i2)
  1307. lbu $i3,0($i3)
  1308. sw $rk0,0($key)
  1309. sw $rk1,4($key)
  1310. sw $rk2,8($key)
  1311. sw $rk3,12($key)
  1312. subu $cnt,1
  1313. $PTR_ADD $key,16
  1314. _bias $i0,24
  1315. _bias $i1,16
  1316. _bias $i2,8
  1317. _bias $i3,0
  1318. xor $rk0,$i0
  1319. lw $i0,0($rcon)
  1320. xor $rk0,$i1
  1321. xor $rk0,$i2
  1322. xor $rk0,$i3
  1323. xor $rk0,$i0
  1324. xor $rk1,$rk0
  1325. xor $rk2,$rk1
  1326. xor $rk3,$rk2
  1327. .set noreorder
  1328. bnez $cnt,.L128bits
  1329. $PTR_ADD $rcon,4
  1330. sw $rk0,0($key)
  1331. sw $rk1,4($key)
  1332. sw $rk2,8($key)
  1333. li $cnt,10
  1334. sw $rk3,12($key)
  1335. li $t0,0
  1336. sw $cnt,80($key)
  1337. b .Lekey_done
  1338. $PTR_SUB $key,10*16
  1339. .align 4
  1340. .L192bits:
  1341. .set reorder
  1342. srl $i0,$rk5,16
  1343. srl $i1,$rk5,8
  1344. and $i0,0xff
  1345. and $i1,0xff
  1346. and $i2,$rk5,0xff
  1347. srl $i3,$rk5,24
  1348. $PTR_ADD $i0,$Tbl
  1349. $PTR_ADD $i1,$Tbl
  1350. $PTR_ADD $i2,$Tbl
  1351. $PTR_ADD $i3,$Tbl
  1352. lbu $i0,0($i0)
  1353. lbu $i1,0($i1)
  1354. lbu $i2,0($i2)
  1355. lbu $i3,0($i3)
  1356. sw $rk0,0($key)
  1357. sw $rk1,4($key)
  1358. sw $rk2,8($key)
  1359. sw $rk3,12($key)
  1360. sw $rk4,16($key)
  1361. sw $rk5,20($key)
  1362. subu $cnt,1
  1363. $PTR_ADD $key,24
  1364. _bias $i0,24
  1365. _bias $i1,16
  1366. _bias $i2,8
  1367. _bias $i3,0
  1368. xor $rk0,$i0
  1369. lw $i0,0($rcon)
  1370. xor $rk0,$i1
  1371. xor $rk0,$i2
  1372. xor $rk0,$i3
  1373. xor $rk0,$i0
  1374. xor $rk1,$rk0
  1375. xor $rk2,$rk1
  1376. xor $rk3,$rk2
  1377. xor $rk4,$rk3
  1378. xor $rk5,$rk4
  1379. .set noreorder
  1380. bnez $cnt,.L192bits
  1381. $PTR_ADD $rcon,4
  1382. sw $rk0,0($key)
  1383. sw $rk1,4($key)
  1384. sw $rk2,8($key)
  1385. li $cnt,12
  1386. sw $rk3,12($key)
  1387. li $t0,0
  1388. sw $cnt,48($key)
  1389. b .Lekey_done
  1390. $PTR_SUB $key,12*16
  1391. .align 4
  1392. .L256bits:
  1393. .set reorder
  1394. srl $i0,$rk7,16
  1395. srl $i1,$rk7,8
  1396. and $i0,0xff
  1397. and $i1,0xff
  1398. and $i2,$rk7,0xff
  1399. srl $i3,$rk7,24
  1400. $PTR_ADD $i0,$Tbl
  1401. $PTR_ADD $i1,$Tbl
  1402. $PTR_ADD $i2,$Tbl
  1403. $PTR_ADD $i3,$Tbl
  1404. lbu $i0,0($i0)
  1405. lbu $i1,0($i1)
  1406. lbu $i2,0($i2)
  1407. lbu $i3,0($i3)
  1408. sw $rk0,0($key)
  1409. sw $rk1,4($key)
  1410. sw $rk2,8($key)
  1411. sw $rk3,12($key)
  1412. sw $rk4,16($key)
  1413. sw $rk5,20($key)
  1414. sw $rk6,24($key)
  1415. sw $rk7,28($key)
  1416. subu $cnt,1
  1417. _bias $i0,24
  1418. _bias $i1,16
  1419. _bias $i2,8
  1420. _bias $i3,0
  1421. xor $rk0,$i0
  1422. lw $i0,0($rcon)
  1423. xor $rk0,$i1
  1424. xor $rk0,$i2
  1425. xor $rk0,$i3
  1426. xor $rk0,$i0
  1427. xor $rk1,$rk0
  1428. xor $rk2,$rk1
  1429. xor $rk3,$rk2
  1430. beqz $cnt,.L256bits_done
  1431. srl $i0,$rk3,24
  1432. srl $i1,$rk3,16
  1433. srl $i2,$rk3,8
  1434. and $i3,$rk3,0xff
  1435. and $i1,0xff
  1436. and $i2,0xff
  1437. $PTR_ADD $i0,$Tbl
  1438. $PTR_ADD $i1,$Tbl
  1439. $PTR_ADD $i2,$Tbl
  1440. $PTR_ADD $i3,$Tbl
  1441. lbu $i0,0($i0)
  1442. lbu $i1,0($i1)
  1443. lbu $i2,0($i2)
  1444. lbu $i3,0($i3)
  1445. sll $i0,24
  1446. sll $i1,16
  1447. sll $i2,8
  1448. xor $rk4,$i0
  1449. xor $rk4,$i1
  1450. xor $rk4,$i2
  1451. xor $rk4,$i3
  1452. xor $rk5,$rk4
  1453. xor $rk6,$rk5
  1454. xor $rk7,$rk6
  1455. $PTR_ADD $key,32
  1456. .set noreorder
  1457. b .L256bits
  1458. $PTR_ADD $rcon,4
  1459. .L256bits_done:
  1460. sw $rk0,32($key)
  1461. sw $rk1,36($key)
  1462. sw $rk2,40($key)
  1463. li $cnt,14
  1464. sw $rk3,44($key)
  1465. li $t0,0
  1466. sw $cnt,48($key)
  1467. $PTR_SUB $key,12*16
  1468. .Lekey_done:
  1469. jr $ra
  1470. nop
  1471. .end _mips_AES_set_encrypt_key
  1472. .globl AES_set_encrypt_key
  1473. .ent AES_set_encrypt_key
  1474. AES_set_encrypt_key:
  1475. .frame $sp,$FRAMESIZE,$ra
  1476. .mask $SAVED_REGS_MASK,-$SZREG
  1477. .set noreorder
  1478. ___
  1479. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1480. .cpload $pf
  1481. ___
  1482. $code.=<<___;
  1483. $PTR_SUB $sp,$FRAMESIZE
  1484. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1485. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1486. ___
  1487. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1488. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  1489. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  1490. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  1491. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  1492. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  1493. ___
  1494. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1495. .cplocal $Tbl
  1496. .cpsetup $pf,$zero,AES_set_encrypt_key
  1497. ___
  1498. $code.=<<___;
  1499. .set reorder
  1500. $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
  1501. bal _mips_AES_set_encrypt_key
  1502. .set noreorder
  1503. move $a0,$t0
  1504. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1505. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1506. ___
  1507. $code.=<<___ if ($flavour =~ /nubi/i);
  1508. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1509. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1510. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1511. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1512. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1513. ___
  1514. $code.=<<___;
  1515. jr $ra
  1516. $PTR_ADD $sp,$FRAMESIZE
  1517. .end AES_set_encrypt_key
  1518. ___
  1519. my ($head,$tail)=($inp,$bits);
  1520. my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  1521. my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
  1522. $code.=<<___;
  1523. .align 5
  1524. .globl AES_set_decrypt_key
  1525. .ent AES_set_decrypt_key
  1526. AES_set_decrypt_key:
  1527. .frame $sp,$FRAMESIZE,$ra
  1528. .mask $SAVED_REGS_MASK,-$SZREG
  1529. .set noreorder
  1530. ___
  1531. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1532. .cpload $pf
  1533. ___
  1534. $code.=<<___;
  1535. $PTR_SUB $sp,$FRAMESIZE
  1536. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1537. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1538. ___
  1539. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1540. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  1541. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  1542. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  1543. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  1544. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  1545. ___
  1546. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1547. .cplocal $Tbl
  1548. .cpsetup $pf,$zero,AES_set_decrypt_key
  1549. ___
  1550. $code.=<<___;
  1551. .set reorder
  1552. $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
  1553. bal _mips_AES_set_encrypt_key
  1554. bltz $t0,.Ldkey_done
  1555. sll $at,$cnt,4
  1556. $PTR_ADD $head,$key,0
  1557. $PTR_ADD $tail,$key,$at
  1558. .align 4
  1559. .Lswap:
  1560. lw $rk0,0($head)
  1561. lw $rk1,4($head)
  1562. lw $rk2,8($head)
  1563. lw $rk3,12($head)
  1564. lw $rk4,0($tail)
  1565. lw $rk5,4($tail)
  1566. lw $rk6,8($tail)
  1567. lw $rk7,12($tail)
  1568. sw $rk0,0($tail)
  1569. sw $rk1,4($tail)
  1570. sw $rk2,8($tail)
  1571. sw $rk3,12($tail)
  1572. $PTR_ADD $head,16
  1573. $PTR_SUB $tail,16
  1574. sw $rk4,-16($head)
  1575. sw $rk5,-12($head)
  1576. sw $rk6,-8($head)
  1577. sw $rk7,-4($head)
  1578. bne $head,$tail,.Lswap
  1579. lw $tp1,16($key) # modulo-scheduled
  1580. lui $x80808080,0x8080
  1581. subu $cnt,1
  1582. or $x80808080,0x8080
  1583. sll $cnt,2
  1584. $PTR_ADD $key,16
  1585. lui $x1b1b1b1b,0x1b1b
  1586. nor $x7f7f7f7f,$zero,$x80808080
  1587. or $x1b1b1b1b,0x1b1b
  1588. .align 4
  1589. .Lmix:
  1590. and $m,$tp1,$x80808080
  1591. and $tp2,$tp1,$x7f7f7f7f
  1592. srl $tp4,$m,7
  1593. addu $tp2,$tp2 # tp2<<1
  1594. subu $m,$tp4
  1595. and $m,$x1b1b1b1b
  1596. xor $tp2,$m
  1597. and $m,$tp2,$x80808080
  1598. and $tp4,$tp2,$x7f7f7f7f
  1599. srl $tp8,$m,7
  1600. addu $tp4,$tp4 # tp4<<1
  1601. subu $m,$tp8
  1602. and $m,$x1b1b1b1b
  1603. xor $tp4,$m
  1604. and $m,$tp4,$x80808080
  1605. and $tp8,$tp4,$x7f7f7f7f
  1606. srl $tp9,$m,7
  1607. addu $tp8,$tp8 # tp8<<1
  1608. subu $m,$tp9
  1609. and $m,$x1b1b1b1b
  1610. xor $tp8,$m
  1611. xor $tp9,$tp8,$tp1
  1612. xor $tpe,$tp8,$tp4
  1613. xor $tpb,$tp9,$tp2
  1614. xor $tpd,$tp9,$tp4
  1615. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  1616. rotr $tp1,$tpd,16
  1617. xor $tpe,$tp2
  1618. rotr $tp2,$tp9,8
  1619. xor $tpe,$tp1
  1620. rotr $tp4,$tpb,24
  1621. xor $tpe,$tp2
  1622. lw $tp1,4($key) # modulo-scheduled
  1623. xor $tpe,$tp4
  1624. #else
  1625. _ror $tp1,$tpd,16
  1626. xor $tpe,$tp2
  1627. _ror $tp2,$tpd,-16
  1628. xor $tpe,$tp1
  1629. _ror $tp1,$tp9,8
  1630. xor $tpe,$tp2
  1631. _ror $tp2,$tp9,-24
  1632. xor $tpe,$tp1
  1633. _ror $tp1,$tpb,24
  1634. xor $tpe,$tp2
  1635. _ror $tp2,$tpb,-8
  1636. xor $tpe,$tp1
  1637. lw $tp1,4($key) # modulo-scheduled
  1638. xor $tpe,$tp2
  1639. #endif
  1640. subu $cnt,1
  1641. sw $tpe,0($key)
  1642. $PTR_ADD $key,4
  1643. bnez $cnt,.Lmix
  1644. li $t0,0
  1645. .Ldkey_done:
  1646. .set noreorder
  1647. move $a0,$t0
  1648. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1649. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1650. ___
  1651. $code.=<<___ if ($flavour =~ /nubi/i);
  1652. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1653. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1654. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1655. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1656. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1657. ___
  1658. $code.=<<___;
  1659. jr $ra
  1660. $PTR_ADD $sp,$FRAMESIZE
  1661. .end AES_set_decrypt_key
  1662. ___
  1663. }}}
  1664. ######################################################################
  1665. # Tables are kept in endian-neutral manner
  1666. $code.=<<___;
  1667. .rdata
  1668. .align 10
  1669. AES_Te:
  1670. .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
  1671. .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
  1672. .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
  1673. .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
  1674. .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
  1675. .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
  1676. .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
  1677. .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
  1678. .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
  1679. .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
  1680. .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
  1681. .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
  1682. .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
  1683. .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
  1684. .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
  1685. .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
  1686. .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
  1687. .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
  1688. .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
  1689. .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
  1690. .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
  1691. .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
  1692. .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
  1693. .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
  1694. .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
  1695. .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
  1696. .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
  1697. .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
  1698. .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
  1699. .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
  1700. .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
  1701. .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
  1702. .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
  1703. .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
  1704. .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
  1705. .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
  1706. .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
  1707. .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
  1708. .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
  1709. .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
  1710. .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
  1711. .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
  1712. .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
  1713. .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
  1714. .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
  1715. .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
  1716. .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
  1717. .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
  1718. .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
  1719. .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
  1720. .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
  1721. .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
  1722. .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
  1723. .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
  1724. .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
  1725. .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
  1726. .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
  1727. .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
  1728. .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
  1729. .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
  1730. .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
  1731. .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
  1732. .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
  1733. .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
  1734. .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
  1735. .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
  1736. .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
  1737. .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
  1738. .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
  1739. .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
  1740. .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
  1741. .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
  1742. .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
  1743. .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
  1744. .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
  1745. .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
  1746. .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
  1747. .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
  1748. .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
  1749. .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
  1750. .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
  1751. .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
  1752. .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
  1753. .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
  1754. .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
  1755. .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
  1756. .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
  1757. .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
  1758. .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
  1759. .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
  1760. .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
  1761. .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
  1762. .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
  1763. .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
  1764. .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
  1765. .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
  1766. .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
  1767. .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
  1768. .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
  1769. .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
  1770. .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
  1771. .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
  1772. .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
  1773. .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
  1774. .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
  1775. .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
  1776. .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
  1777. .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
  1778. .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
  1779. .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
  1780. .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
  1781. .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
  1782. .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
  1783. .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
  1784. .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
  1785. .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
  1786. .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
  1787. .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
  1788. .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
  1789. .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
  1790. .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
  1791. .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
  1792. .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
  1793. .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
  1794. .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
  1795. .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
  1796. .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
  1797. .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
  1798. AES_Td:
  1799. .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
  1800. .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
  1801. .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
  1802. .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
  1803. .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
  1804. .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
  1805. .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
  1806. .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
  1807. .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
  1808. .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
  1809. .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
  1810. .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
  1811. .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
  1812. .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
  1813. .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
  1814. .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
  1815. .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
  1816. .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
  1817. .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
  1818. .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
  1819. .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
  1820. .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
  1821. .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
  1822. .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
  1823. .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
  1824. .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
  1825. .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
  1826. .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
  1827. .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
  1828. .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
  1829. .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
  1830. .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
  1831. .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
  1832. .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
  1833. .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
  1834. .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
  1835. .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
  1836. .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
  1837. .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
  1838. .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
  1839. .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
  1840. .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
  1841. .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
  1842. .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
  1843. .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
  1844. .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
  1845. .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
  1846. .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
  1847. .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
  1848. .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
  1849. .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
  1850. .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
  1851. .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
  1852. .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
  1853. .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
  1854. .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
  1855. .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
  1856. .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
  1857. .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
  1858. .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
  1859. .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
  1860. .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
  1861. .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
  1862. .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
  1863. .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
  1864. .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
  1865. .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
  1866. .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
  1867. .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
  1868. .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
  1869. .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
  1870. .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
  1871. .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
  1872. .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
  1873. .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
  1874. .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
  1875. .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
  1876. .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
  1877. .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
  1878. .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
  1879. .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
  1880. .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
  1881. .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
  1882. .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
  1883. .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
  1884. .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
  1885. .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
  1886. .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
  1887. .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
  1888. .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
  1889. .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
  1890. .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
  1891. .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
  1892. .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
  1893. .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
  1894. .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
  1895. .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
  1896. .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
  1897. .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
  1898. .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
  1899. .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
  1900. .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
  1901. .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
  1902. .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
  1903. .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
  1904. .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
  1905. .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
  1906. .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
  1907. .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
  1908. .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
  1909. .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
  1910. .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
  1911. .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
  1912. .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
  1913. .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
  1914. .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
  1915. .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
  1916. .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
  1917. .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
  1918. .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
  1919. .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
  1920. .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
  1921. .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
  1922. .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
  1923. .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
  1924. .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
  1925. .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
  1926. .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
  1927. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
  1928. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  1929. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  1930. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  1931. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  1932. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  1933. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  1934. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  1935. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  1936. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  1937. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  1938. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  1939. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  1940. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  1941. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  1942. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  1943. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  1944. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  1945. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  1946. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  1947. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  1948. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  1949. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  1950. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  1951. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  1952. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  1953. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  1954. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  1955. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  1956. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  1957. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  1958. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  1959. AES_Te4:
  1960. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
  1961. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  1962. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  1963. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  1964. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  1965. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  1966. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  1967. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  1968. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  1969. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  1970. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  1971. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  1972. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  1973. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  1974. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  1975. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  1976. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  1977. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  1978. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  1979. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  1980. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  1981. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  1982. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  1983. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  1984. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  1985. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  1986. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  1987. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  1988. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  1989. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  1990. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  1991. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  1992. .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
  1993. .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
  1994. .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
  1995. .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
  1996. .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
  1997. ___
  1998. foreach (split("\n",$code)) {
  1999. s/\`([^\`]*)\`/eval $1/ge;
  2000. # made-up _instructions, _xtr, _ins, _ror and _bias, cope
  2001. # with byte order dependencies...
  2002. if (/^\s+_/) {
  2003. s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
  2004. s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
  2005. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  2006. : eval("24-$3"))/e or
  2007. s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  2008. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  2009. : eval("24-$3"))/e or
  2010. s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  2011. sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
  2012. : eval("24-$3"))/e or
  2013. s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
  2014. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  2015. : eval("$3*-1"))/e or
  2016. s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  2017. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  2018. : eval("($3-16)&31"))/e;
  2019. s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
  2020. sprintf("sll\t$1,$2,$3")/e or
  2021. s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
  2022. sprintf("and\t$1,$2,0xff")/e or
  2023. s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
  2024. }
  2025. # convert lwl/lwr and swr/swl to little-endian order
  2026. if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
  2027. s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
  2028. sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
  2029. s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
  2030. sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
  2031. }
  2032. if (!$big_endian) {
  2033. s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
  2034. s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
  2035. }
  2036. print $_,"\n";
  2037. }
  2038. close STDOUT or die "error closing STDOUT: $!";