des_enc.m4 46 KB


  1. ! des_enc.m4
  2. ! des_enc.S (generated from des_enc.m4)
  3. !
  4. ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
  5. !
  6. ! Version 1.0. 32-bit version.
  7. !
  8. ! June 8, 2000.
  9. !
  10. ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
  11. ! by Andy Polyakov.
  12. !
  13. ! January 1, 2003.
  14. !
  15. ! Assembler version: Copyright Svend Olaf Mikkelsen.
  16. !
  17. ! Original C code: Copyright Eric A. Young.
  18. !
  19. ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
  20. !
  21. ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
  22. !
  23. ! This version can be redistributed.
  24. !
  25. ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
  26. !
  27. ! Global registers 1 to 5 are used. This is the same as done by the
  28. ! cc compiler. The UltraSPARC load/store little endian feature is used.
  29. !
  30. ! Instruction grouping often refers to one CPU cycle.
  31. !
  32. ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
  33. !
  34. ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
  35. !
  36. ! Performance improvement according to './apps/openssl speed des'
  37. !
  38. ! 32-bit build:
  39. ! 23% faster than cc-5.2 -xarch=v8plus -xO5
  40. ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
  41. ! 64-bit build:
  42. ! 50% faster than cc-5.2 -xarch=v9 -xO5
  43. ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
  44. !
  45. .ident "des_enc.m4 2.1"
  46. .file "des_enc-sparc.S"
  47. #include <openssl/opensslconf.h>
  48. #ifdef OPENSSL_FIPSCANISTER
  49. #include <openssl/fipssyms.h>
  50. #endif
  51. #if defined(__SUNPRO_C) && defined(__sparcv9)
  52. # define ABI64 /* They've said -xarch=v9 at command line */
  53. #elif defined(__GNUC__) && defined(__arch64__)
  54. # define ABI64 /* They've said -m64 at command line */
  55. #endif
  56. #ifdef ABI64
  57. .register %g2,#scratch
  58. .register %g3,#scratch
  59. # define FRAME -192
  60. # define BIAS 2047
  61. # define LDPTR ldx
  62. # define STPTR stx
  63. # define ARG0 128
  64. # define ARGSZ 8
  65. #else
  66. # define FRAME -96
  67. # define BIAS 0
  68. # define LDPTR ld
  69. # define STPTR st
  70. # define ARG0 68
  71. # define ARGSZ 4
  72. #endif
  73. #define LOOPS 7
  74. #define global0 %g0
  75. #define global1 %g1
  76. #define global2 %g2
  77. #define global3 %g3
  78. #define global4 %g4
  79. #define global5 %g5
  80. #define local0 %l0
  81. #define local1 %l1
  82. #define local2 %l2
  83. #define local3 %l3
  84. #define local4 %l4
  85. #define local5 %l5
  86. #define local7 %l6
  87. #define local6 %l7
  88. #define in0 %i0
  89. #define in1 %i1
  90. #define in2 %i2
  91. #define in3 %i3
  92. #define in4 %i4
  93. #define in5 %i5
  94. #define in6 %i6
  95. #define in7 %i7
  96. #define out0 %o0
  97. #define out1 %o1
  98. #define out2 %o2
  99. #define out3 %o3
  100. #define out4 %o4
  101. #define out5 %o5
  102. #define out6 %o6
  103. #define out7 %o7
  104. #define stub stb
  105. changequote({,})
  106. ! Macro definitions:
  107. ! {ip_macro}
  108. !
  109. ! The logic used in initial and final permutations is the same as in
  110. ! the C code. The permutations are done with a clever shift, xor, and
  111. ! technique.
  112. !
  113. ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
  114. ! sbox 6 to local6, and addres sbox 8 to out3.
  115. !
  116. ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
  117. !
  118. ! Loads key first round from address in parameter 5 to out0, out1.
  119. !
  120. ! After the the original LibDES initial permutation, the resulting left
  121. ! is in the variable initially used for right and vice versa. The macro
  122. ! implements the possibility to keep the halfs in the original registers.
  123. !
  124. ! parameter 1 left
  125. ! parameter 2 right
  126. ! parameter 3 result left (modify in first round)
  127. ! parameter 4 result right (use in first round)
  128. ! parameter 5 key address
  129. ! parameter 6 1/2 for include encryption/decryption
  130. ! parameter 7 1 for move in1 to in3
  131. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  132. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  133. define(ip_macro, {
  134. ! {ip_macro}
  135. ! $1 $2 $4 $3 $5 $6 $7 $8 $9
  136. ld [out2+256], local1
  137. srl $2, 4, local4
  138. xor local4, $1, local4
  139. ifelse($7,1,{mov in1, in3},{nop})
  140. ld [out2+260], local2
  141. and local4, local1, local4
  142. ifelse($8,1,{mov in3, in4},{})
  143. ifelse($8,2,{mov in4, in3},{})
  144. ld [out2+280], out4 ! loop counter
  145. sll local4, 4, local1
  146. xor $1, local4, $1
  147. ld [out2+264], local3
  148. srl $1, 16, local4
  149. xor $2, local1, $2
  150. ifelse($9,1,{LDPTR KS3, in4},{})
  151. xor local4, $2, local4
  152. nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
  153. ifelse($9,1,{LDPTR KS2, in3},{})
  154. and local4, local2, local4
  155. nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
  156. sll local4, 16, local1
  157. xor $2, local4, $2
  158. srl $2, 2, local4
  159. xor $1, local1, $1
  160. sethi %hi(16711680), local5
  161. xor local4, $1, local4
  162. and local4, local3, local4
  163. or local5, 255, local5
  164. sll local4, 2, local2
  165. xor $1, local4, $1
  166. srl $1, 8, local4
  167. xor $2, local2, $2
  168. xor local4, $2, local4
  169. add global1, 768, global4
  170. and local4, local5, local4
  171. add global1, 1024, global5
  172. ld [out2+272], local7
  173. sll local4, 8, local1
  174. xor $2, local4, $2
  175. srl $2, 1, local4
  176. xor $1, local1, $1
  177. ld [$5], out0 ! key 7531
  178. xor local4, $1, local4
  179. add global1, 256, global2
  180. ld [$5+4], out1 ! key 8642
  181. and local4, local7, local4
  182. add global1, 512, global3
  183. sll local4, 1, local1
  184. xor $1, local4, $1
  185. sll $1, 3, local3
  186. xor $2, local1, $2
  187. sll $2, 3, local2
  188. add global1, 1280, local6 ! address sbox 8
  189. srl $1, 29, local4
  190. add global1, 1792, out3 ! address sbox 8
  191. srl $2, 29, local1
  192. or local4, local3, $4
  193. or local2, local1, $3
  194. ifelse($6, 1, {
  195. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  196. or local2, local1, $3
  197. xor $4, out0, local1
  198. call .des_enc.1
  199. and local1, 252, local1
  200. },{})
  201. ifelse($6, 2, {
  202. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  203. or local2, local1, $3
  204. xor $4, out0, local1
  205. call .des_dec.1
  206. and local1, 252, local1
  207. },{})
  208. })
  209. ! {rounds_macro}
  210. !
  211. ! The logic used in the DES rounds is the same as in the C code,
  212. ! except that calculations for sbox 1 and sbox 5 begin before
  213. ! the previous round is finished.
  214. !
  215. ! In each round one half (work) is modified based on key and the
  216. ! other half (use).
  217. !
  218. ! In this version we do two rounds in a loop repeated 7 times
  219. ! and two rounds separately.
  220. !
  221. ! One half has the bits for the sboxes in the following positions:
  222. !
  223. ! 777777xx555555xx333333xx111111xx
  224. !
  225. ! 88xx666666xx444444xx222222xx8888
  226. !
  227. ! The bits for each sbox are xor-ed with the key bits for that box.
  228. ! The above xx bits are cleared, and the result used for lookup in
  229. ! the sbox table. Each sbox entry contains the 4 output bits permuted
  230. ! into 32 bits according to the P permutation.
  231. !
  232. ! In the description of DES, left and right are switched after
  233. ! each round, except after last round. In this code the original
  234. ! left and right are kept in the same register in all rounds, meaning
  235. ! that after the 16 rounds the result for right is in the register
  236. ! originally used for left.
  237. !
  238. ! parameter 1 first work (left in first round)
  239. ! parameter 2 first use (right in first round)
  240. ! parameter 3 enc/dec 1/-1
  241. ! parameter 4 loop label
  242. ! parameter 5 key address register
  243. ! parameter 6 optional address for key next encryption/decryption
  244. ! parameter 7 not empty for include retl
  245. !
  246. ! also compares in2 to 8
  247. define(rounds_macro, {
  248. ! {rounds_macro}
  249. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  250. xor $2, out0, local1
  251. ld [out2+284], local5 ! 0x0000FC00
  252. ba $4
  253. and local1, 252, local1
  254. .align 32
  255. $4:
  256. ! local6 is address sbox 6
  257. ! out3 is address sbox 8
  258. ! out4 is loop counter
  259. ld [global1+local1], local1
  260. xor $2, out1, out1 ! 8642
  261. xor $2, out0, out0 ! 7531
  262. ! fmovs %f0, %f0 ! fxor used for alignment
  263. srl out1, 4, local0 ! rotate 4 right
  264. and out0, local5, local3 ! 3
  265. ! fmovs %f0, %f0
  266. ld [$5+$3*8], local7 ! key 7531 next round
  267. srl local3, 8, local3 ! 3
  268. and local0, 252, local2 ! 2
  269. ! fmovs %f0, %f0
  270. ld [global3+local3],local3 ! 3
  271. sll out1, 28, out1 ! rotate
  272. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  273. ld [global2+local2], local2 ! 2
  274. srl out0, 24, local1 ! 7
  275. or out1, local0, out1 ! rotate
  276. ldub [out2+local1], local1 ! 7 (and 0xFC)
  277. srl out1, 24, local0 ! 8
  278. and out1, local5, local4 ! 4
  279. ldub [out2+local0], local0 ! 8 (and 0xFC)
  280. srl local4, 8, local4 ! 4
  281. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  282. ld [global4+local4],local4 ! 4
  283. srl out1, 16, local2 ! 6
  284. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  285. ld [out3+local0],local0 ! 8
  286. and local2, 252, local2 ! 6
  287. add global1, 1536, local5 ! address sbox 7
  288. ld [local6+local2], local2 ! 6
  289. srl out0, 16, local3 ! 5
  290. xor $1, local4, $1 ! 4 finished
  291. ld [local5+local1],local1 ! 7
  292. and local3, 252, local3 ! 5
  293. xor $1, local0, $1 ! 8 finished
  294. ld [global5+local3],local3 ! 5
  295. xor $1, local2, $1 ! 6 finished
  296. subcc out4, 1, out4
  297. ld [$5+$3*8+4], out0 ! key 8642 next round
  298. xor $1, local7, local2 ! sbox 5 next round
  299. xor $1, local1, $1 ! 7 finished
  300. srl local2, 16, local2 ! sbox 5 next round
  301. xor $1, local3, $1 ! 5 finished
  302. ld [$5+$3*16+4], out1 ! key 8642 next round again
  303. and local2, 252, local2 ! sbox5 next round
  304. ! next round
  305. xor $1, local7, local7 ! 7531
  306. ld [global5+local2], local2 ! 5
  307. srl local7, 24, local3 ! 7
  308. xor $1, out0, out0 ! 8642
  309. ldub [out2+local3], local3 ! 7 (and 0xFC)
  310. srl out0, 4, local0 ! rotate 4 right
  311. and local7, 252, local1 ! 1
  312. sll out0, 28, out0 ! rotate
  313. xor $2, local2, $2 ! 5 finished local2 used
  314. srl local0, 8, local4 ! 4
  315. and local0, 252, local2 ! 2
  316. ld [local5+local3], local3 ! 7
  317. srl local0, 16, local5 ! 6
  318. or out0, local0, out0 ! rotate
  319. ld [global2+local2], local2 ! 2
  320. srl out0, 24, local0
  321. ld [$5+$3*16], out0 ! key 7531 next round
  322. and local4, 252, local4 ! 4
  323. and local5, 252, local5 ! 6
  324. ld [global4+local4], local4 ! 4
  325. xor $2, local3, $2 ! 7 finished local3 used
  326. and local0, 252, local0 ! 8
  327. ld [local6+local5], local5 ! 6
  328. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  329. srl local7, 8, local2 ! 3 start
  330. ld [out3+local0], local0 ! 8
  331. xor $2, local4, $2 ! 4 finished
  332. and local2, 252, local2 ! 3
  333. ld [global1+local1], local1 ! 1
  334. xor $2, local5, $2 ! 6 finished local5 used
  335. ld [global3+local2], local2 ! 3
  336. xor $2, local0, $2 ! 8 finished
  337. add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
  338. ld [out2+284], local5 ! 0x0000FC00
  339. xor $2, out0, local4 ! sbox 1 next round
  340. xor $2, local1, $2 ! 1 finished
  341. xor $2, local2, $2 ! 3 finished
  342. bne $4
  343. and local4, 252, local1 ! sbox 1 next round
  344. ! two rounds more:
  345. ld [global1+local1], local1
  346. xor $2, out1, out1
  347. xor $2, out0, out0
  348. srl out1, 4, local0 ! rotate
  349. and out0, local5, local3
  350. ld [$5+$3*8], local7 ! key 7531
  351. srl local3, 8, local3
  352. and local0, 252, local2
  353. ld [global3+local3],local3
  354. sll out1, 28, out1 ! rotate
  355. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  356. ld [global2+local2], local2
  357. srl out0, 24, local1
  358. or out1, local0, out1 ! rotate
  359. ldub [out2+local1], local1
  360. srl out1, 24, local0
  361. and out1, local5, local4
  362. ldub [out2+local0], local0
  363. srl local4, 8, local4
  364. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  365. ld [global4+local4],local4
  366. srl out1, 16, local2
  367. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  368. ld [out3+local0],local0
  369. and local2, 252, local2
  370. add global1, 1536, local5 ! address sbox 7
  371. ld [local6+local2], local2
  372. srl out0, 16, local3
  373. xor $1, local4, $1 ! 4 finished
  374. ld [local5+local1],local1
  375. and local3, 252, local3
  376. xor $1, local0, $1
  377. ld [global5+local3],local3
  378. xor $1, local2, $1 ! 6 finished
  379. cmp in2, 8
  380. ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
  381. xor $1, local7, local2 ! sbox 5 next round
  382. xor $1, local1, $1 ! 7 finished
  383. ld [$5+$3*8+4], out0
  384. srl local2, 16, local2 ! sbox 5 next round
  385. xor $1, local3, $1 ! 5 finished
  386. and local2, 252, local2
  387. ! next round (two rounds more)
  388. xor $1, local7, local7 ! 7531
  389. ld [global5+local2], local2
  390. srl local7, 24, local3
  391. xor $1, out0, out0 ! 8642
  392. ldub [out2+local3], local3
  393. srl out0, 4, local0 ! rotate
  394. and local7, 252, local1
  395. sll out0, 28, out0 ! rotate
  396. xor $2, local2, $2 ! 5 finished local2 used
  397. srl local0, 8, local4
  398. and local0, 252, local2
  399. ld [local5+local3], local3
  400. srl local0, 16, local5
  401. or out0, local0, out0 ! rotate
  402. ld [global2+local2], local2
  403. srl out0, 24, local0
  404. ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
  405. and local4, 252, local4
  406. and local5, 252, local5
  407. ld [global4+local4], local4
  408. xor $2, local3, $2 ! 7 finished local3 used
  409. and local0, 252, local0
  410. ld [local6+local5], local5
  411. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  412. srl local7, 8, local2 ! 3 start
  413. ld [out3+local0], local0
  414. xor $2, local4, $2
  415. and local2, 252, local2
  416. ld [global1+local1], local1
  417. xor $2, local5, $2 ! 6 finished local5 used
  418. ld [global3+local2], local2
  419. srl $1, 3, local3
  420. xor $2, local0, $2
  421. ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
  422. sll $1, 29, local4
  423. xor $2, local1, $2
  424. ifelse($7,{}, {}, {retl})
  425. xor $2, local2, $2
  426. })
  427. ! {fp_macro}
  428. !
  429. ! parameter 1 right (original left)
  430. ! parameter 2 left (original right)
  431. ! parameter 3 1 for optional store to [in0]
  432. ! parameter 4 1 for load input/output address to local5/7
  433. !
  434. ! The final permutation logic switches the halfes, meaning that
  435. ! left and right ends up the the registers originally used.
  436. define(fp_macro, {
  437. ! {fp_macro}
  438. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  439. ! initially undo the rotate 3 left done after initial permutation
  440. ! original left is received shifted 3 right and 29 left in local3/4
  441. sll $2, 29, local1
  442. or local3, local4, $1
  443. srl $2, 3, $2
  444. sethi %hi(0x55555555), local2
  445. or $2, local1, $2
  446. or local2, %lo(0x55555555), local2
  447. srl $2, 1, local3
  448. sethi %hi(0x00ff00ff), local1
  449. xor local3, $1, local3
  450. or local1, %lo(0x00ff00ff), local1
  451. and local3, local2, local3
  452. sethi %hi(0x33333333), local4
  453. sll local3, 1, local2
  454. xor $1, local3, $1
  455. srl $1, 8, local3
  456. xor $2, local2, $2
  457. xor local3, $2, local3
  458. or local4, %lo(0x33333333), local4
  459. and local3, local1, local3
  460. sethi %hi(0x0000ffff), local1
  461. sll local3, 8, local2
  462. xor $2, local3, $2
  463. srl $2, 2, local3
  464. xor $1, local2, $1
  465. xor local3, $1, local3
  466. or local1, %lo(0x0000ffff), local1
  467. and local3, local4, local3
  468. sethi %hi(0x0f0f0f0f), local4
  469. sll local3, 2, local2
  470. ifelse($4,1, {LDPTR INPUT, local5})
  471. xor $1, local3, $1
  472. ifelse($4,1, {LDPTR OUTPUT, local7})
  473. srl $1, 16, local3
  474. xor $2, local2, $2
  475. xor local3, $2, local3
  476. or local4, %lo(0x0f0f0f0f), local4
  477. and local3, local1, local3
  478. sll local3, 16, local2
  479. xor $2, local3, local1
  480. srl local1, 4, local3
  481. xor $1, local2, $1
  482. xor local3, $1, local3
  483. and local3, local4, local3
  484. sll local3, 4, local2
  485. xor $1, local3, $1
  486. ! optional store:
  487. ifelse($3,1, {st $1, [in0]})
  488. xor local1, local2, $2
  489. ifelse($3,1, {st $2, [in0+4]})
  490. })
  491. ! {fp_ip_macro}
  492. !
  493. ! Does initial permutation for next block mixed with
  494. ! final permutation for current block.
  495. !
  496. ! parameter 1 original left
  497. ! parameter 2 original right
  498. ! parameter 3 left ip
  499. ! parameter 4 right ip
  500. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  501. ! 2: mov in4 to in3
  502. !
  503. ! also adds -8 to length in2 and loads loop counter to out4
  504. define(fp_ip_macro, {
  505. ! {fp_ip_macro}
  506. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  507. define({temp1},{out4})
  508. define({temp2},{local3})
  509. define({ip1},{local1})
  510. define({ip2},{local2})
  511. define({ip4},{local4})
  512. define({ip5},{local5})
  513. ! $1 in local3, local4
  514. ld [out2+256], ip1
  515. sll out5, 29, temp1
  516. or local3, local4, $1
  517. srl out5, 3, $2
  518. ifelse($5,2,{mov in4, in3})
  519. ld [out2+272], ip5
  520. srl $4, 4, local0
  521. or $2, temp1, $2
  522. srl $2, 1, temp1
  523. xor temp1, $1, temp1
  524. and temp1, ip5, temp1
  525. xor local0, $3, local0
  526. sll temp1, 1, temp2
  527. xor $1, temp1, $1
  528. and local0, ip1, local0
  529. add in2, -8, in2
  530. sll local0, 4, local7
  531. xor $3, local0, $3
  532. ld [out2+268], ip4
  533. srl $1, 8, temp1
  534. xor $2, temp2, $2
  535. ld [out2+260], ip2
  536. srl $3, 16, local0
  537. xor $4, local7, $4
  538. xor temp1, $2, temp1
  539. xor local0, $4, local0
  540. and temp1, ip4, temp1
  541. and local0, ip2, local0
  542. sll temp1, 8, temp2
  543. xor $2, temp1, $2
  544. sll local0, 16, local7
  545. xor $4, local0, $4
  546. srl $2, 2, temp1
  547. xor $1, temp2, $1
  548. ld [out2+264], temp2 ! ip3
  549. srl $4, 2, local0
  550. xor $3, local7, $3
  551. xor temp1, $1, temp1
  552. xor local0, $3, local0
  553. and temp1, temp2, temp1
  554. and local0, temp2, local0
  555. sll temp1, 2, temp2
  556. xor $1, temp1, $1
  557. sll local0, 2, local7
  558. xor $3, local0, $3
  559. srl $1, 16, temp1
  560. xor $2, temp2, $2
  561. srl $3, 8, local0
  562. xor $4, local7, $4
  563. xor temp1, $2, temp1
  564. xor local0, $4, local0
  565. and temp1, ip2, temp1
  566. and local0, ip4, local0
  567. sll temp1, 16, temp2
  568. xor $2, temp1, local4
  569. sll local0, 8, local7
  570. xor $4, local0, $4
  571. srl $4, 1, local0
  572. xor $3, local7, $3
  573. srl local4, 4, temp1
  574. xor local0, $3, local0
  575. xor $1, temp2, $1
  576. and local0, ip5, local0
  577. sll local0, 1, local7
  578. xor temp1, $1, temp1
  579. xor $3, local0, $3
  580. xor $4, local7, $4
  581. sll $3, 3, local5
  582. and temp1, ip1, temp1
  583. sll temp1, 4, temp2
  584. xor $1, temp1, $1
  585. ifelse($5,1,{LDPTR KS2, in4})
  586. sll $4, 3, local2
  587. xor local4, temp2, $2
  588. ! reload since used as temporar:
  589. ld [out2+280], out4 ! loop counter
  590. srl $3, 29, local0
  591. ifelse($5,1,{add in4, 120, in4})
  592. ifelse($5,1,{LDPTR KS1, in3})
  593. srl $4, 29, local7
  594. or local0, local5, $4
  595. or local2, local7, $3
  596. })
  597. ! {load_little_endian}
  598. !
  599. ! parameter 1 address
  600. ! parameter 2 destination left
  601. ! parameter 3 destination right
  602. ! parameter 4 temporar
  603. ! parameter 5 label
  604. define(load_little_endian, {
  605. ! {load_little_endian}
  606. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  607. ! first in memory to rightmost in register
  608. $5:
  609. ldub [$1+3], $2
  610. ldub [$1+2], $4
  611. sll $2, 8, $2
  612. or $2, $4, $2
  613. ldub [$1+1], $4
  614. sll $2, 8, $2
  615. or $2, $4, $2
  616. ldub [$1+0], $4
  617. sll $2, 8, $2
  618. or $2, $4, $2
  619. ldub [$1+3+4], $3
  620. ldub [$1+2+4], $4
  621. sll $3, 8, $3
  622. or $3, $4, $3
  623. ldub [$1+1+4], $4
  624. sll $3, 8, $3
  625. or $3, $4, $3
  626. ldub [$1+0+4], $4
  627. sll $3, 8, $3
  628. or $3, $4, $3
  629. $5a:
  630. })
  631. ! {load_little_endian_inc}
  632. !
  633. ! parameter 1 address
  634. ! parameter 2 destination left
  635. ! parameter 3 destination right
  636. ! parameter 4 temporar
  637. ! parameter 4 label
  638. !
  639. ! adds 8 to address
  640. define(load_little_endian_inc, {
  641. ! {load_little_endian_inc}
  642. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  643. ! first in memory to rightmost in register
  644. $5:
  645. ldub [$1+3], $2
  646. ldub [$1+2], $4
  647. sll $2, 8, $2
  648. or $2, $4, $2
  649. ldub [$1+1], $4
  650. sll $2, 8, $2
  651. or $2, $4, $2
  652. ldub [$1+0], $4
  653. sll $2, 8, $2
  654. or $2, $4, $2
  655. ldub [$1+3+4], $3
  656. add $1, 8, $1
  657. ldub [$1+2+4-8], $4
  658. sll $3, 8, $3
  659. or $3, $4, $3
  660. ldub [$1+1+4-8], $4
  661. sll $3, 8, $3
  662. or $3, $4, $3
  663. ldub [$1+0+4-8], $4
  664. sll $3, 8, $3
  665. or $3, $4, $3
  666. $5a:
  667. })
  668. ! {load_n_bytes}
  669. !
  670. ! Loads 1 to 7 bytes little endian
  671. ! Remaining bytes are zeroed.
  672. !
  673. ! parameter 1 address
  674. ! parameter 2 length
  675. ! parameter 3 destination register left
  676. ! parameter 4 destination register right
  677. ! parameter 5 temp
  678. ! parameter 6 temp2
  679. ! parameter 7 label
  680. ! parameter 8 return label
  681. define(load_n_bytes, {
  682. ! {load_n_bytes}
  683. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  684. $7.0: call .+8
  685. sll $2, 2, $6
  686. add %o7,$7.jmp.table-$7.0,$5
  687. add $5, $6, $5
  688. mov 0, $4
  689. ld [$5], $5
  690. jmp %o7+$5
  691. mov 0, $3
  692. $7.7:
  693. ldub [$1+6], $5
  694. sll $5, 16, $5
  695. or $3, $5, $3
  696. $7.6:
  697. ldub [$1+5], $5
  698. sll $5, 8, $5
  699. or $3, $5, $3
  700. $7.5:
  701. ldub [$1+4], $5
  702. or $3, $5, $3
  703. $7.4:
  704. ldub [$1+3], $5
  705. sll $5, 24, $5
  706. or $4, $5, $4
  707. $7.3:
  708. ldub [$1+2], $5
  709. sll $5, 16, $5
  710. or $4, $5, $4
  711. $7.2:
  712. ldub [$1+1], $5
  713. sll $5, 8, $5
  714. or $4, $5, $4
  715. $7.1:
  716. ldub [$1+0], $5
  717. ba $8
  718. or $4, $5, $4
  719. .align 4
  720. $7.jmp.table:
  721. .word 0
  722. .word $7.1-$7.0
  723. .word $7.2-$7.0
  724. .word $7.3-$7.0
  725. .word $7.4-$7.0
  726. .word $7.5-$7.0
  727. .word $7.6-$7.0
  728. .word $7.7-$7.0
  729. })
  730. ! {store_little_endian}
  731. !
  732. ! parameter 1 address
  733. ! parameter 2 source left
  734. ! parameter 3 source right
  735. ! parameter 4 temporar
  736. define(store_little_endian, {
  737. ! {store_little_endian}
  738. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  739. ! rightmost in register to first in memory
  740. $5:
  741. and $2, 255, $4
  742. stub $4, [$1+0]
  743. srl $2, 8, $4
  744. and $4, 255, $4
  745. stub $4, [$1+1]
  746. srl $2, 16, $4
  747. and $4, 255, $4
  748. stub $4, [$1+2]
  749. srl $2, 24, $4
  750. stub $4, [$1+3]
  751. and $3, 255, $4
  752. stub $4, [$1+0+4]
  753. srl $3, 8, $4
  754. and $4, 255, $4
  755. stub $4, [$1+1+4]
  756. srl $3, 16, $4
  757. and $4, 255, $4
  758. stub $4, [$1+2+4]
  759. srl $3, 24, $4
  760. stub $4, [$1+3+4]
  761. $5a:
  762. })
  763. ! {store_n_bytes}
  764. !
  765. ! Stores 1 to 7 bytes little endian
  766. !
  767. ! parameter 1 address
  768. ! parameter 2 length
  769. ! parameter 3 source register left
  770. ! parameter 4 source register right
  771. ! parameter 5 temp
  772. ! parameter 6 temp2
  773. ! parameter 7 label
  774. ! parameter 8 return label
  775. define(store_n_bytes, {
  776. ! {store_n_bytes}
  777. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  778. $7.0: call .+8
  779. sll $2, 2, $6
  780. add %o7,$7.jmp.table-$7.0,$5
  781. add $5, $6, $5
  782. ld [$5], $5
  783. jmp %o7+$5
  784. nop
  785. $7.7:
  786. srl $3, 16, $5
  787. and $5, 0xff, $5
  788. stub $5, [$1+6]
  789. $7.6:
  790. srl $3, 8, $5
  791. and $5, 0xff, $5
  792. stub $5, [$1+5]
  793. $7.5:
  794. and $3, 0xff, $5
  795. stub $5, [$1+4]
  796. $7.4:
  797. srl $4, 24, $5
  798. stub $5, [$1+3]
  799. $7.3:
  800. srl $4, 16, $5
  801. and $5, 0xff, $5
  802. stub $5, [$1+2]
  803. $7.2:
  804. srl $4, 8, $5
  805. and $5, 0xff, $5
  806. stub $5, [$1+1]
  807. $7.1:
  808. and $4, 0xff, $5
  809. ba $8
  810. stub $5, [$1]
  811. .align 4
  812. $7.jmp.table:
  813. .word 0
  814. .word $7.1-$7.0
  815. .word $7.2-$7.0
  816. .word $7.3-$7.0
  817. .word $7.4-$7.0
  818. .word $7.5-$7.0
  819. .word $7.6-$7.0
  820. .word $7.7-$7.0
  821. })
  822. define(testvalue,{1})
  823. define(register_init, {
  824. ! For test purposes:
  825. sethi %hi(testvalue), local0
  826. or local0, %lo(testvalue), local0
  827. ifelse($1,{},{}, {mov local0, $1})
  828. ifelse($2,{},{}, {mov local0, $2})
  829. ifelse($3,{},{}, {mov local0, $3})
  830. ifelse($4,{},{}, {mov local0, $4})
  831. ifelse($5,{},{}, {mov local0, $5})
  832. ifelse($6,{},{}, {mov local0, $6})
  833. ifelse($7,{},{}, {mov local0, $7})
  834. ifelse($8,{},{}, {mov local0, $8})
  835. mov local0, local1
  836. mov local0, local2
  837. mov local0, local3
  838. mov local0, local4
  839. mov local0, local5
  840. mov local0, local7
  841. mov local0, local6
  842. mov local0, out0
  843. mov local0, out1
  844. mov local0, out2
  845. mov local0, out3
  846. mov local0, out4
  847. mov local0, out5
  848. mov local0, global1
  849. mov local0, global2
  850. mov local0, global3
  851. mov local0, global4
  852. mov local0, global5
  853. })
  854. .section ".text"
  855. .align 32
  856. .des_enc:
  857. ! key address in3
  858. ! loads key next encryption/decryption first round from [in4]
  859. rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
  860. .align 32
  861. .des_dec:
  862. ! implemented with out5 as first parameter to avoid
  863. ! register exchange in ede modes
  864. ! key address in4
  865. ! loads key next encryption/decryption first round from [in3]
  866. rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
  867. ! void DES_encrypt1(data, ks, enc)
  868. ! *******************************
  869. .align 32
  870. .global DES_encrypt1
  871. .type DES_encrypt1,#function
  872. DES_encrypt1:
  873. save %sp, FRAME, %sp
  874. sethi %hi(.PIC.DES_SPtrans-1f),global1
  875. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  876. 1: call .+8
  877. add %o7,global1,global1
  878. sub global1,.PIC.DES_SPtrans-.des_and,out2
  879. ld [in0], in5 ! left
  880. cmp in2, 0 ! enc
  881. be .encrypt.dec
  882. ld [in0+4], out5 ! right
  883. ! parameter 6 1/2 for include encryption/decryption
  884. ! parameter 7 1 for move in1 to in3
  885. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  886. ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
  887. rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
  888. fp_macro(in5, out5, 1) ! 1 for store to [in0]
  889. ret
  890. restore
  891. .encrypt.dec:
  892. add in1, 120, in3 ! use last subkey for first round
  893. ! parameter 6 1/2 for include encryption/decryption
  894. ! parameter 7 1 for move in1 to in3
  895. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  896. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
  897. fp_macro(out5, in5, 1) ! 1 for store to [in0]
  898. ret
  899. restore
  900. .DES_encrypt1.end:
  901. .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
  902. ! void DES_encrypt2(data, ks, enc)
  903. !*********************************
  904. ! encrypts/decrypts without initial/final permutation
  905. .align 32
  906. .global DES_encrypt2
  907. .type DES_encrypt2,#function
  908. DES_encrypt2:
  909. save %sp, FRAME, %sp
  910. sethi %hi(.PIC.DES_SPtrans-1f),global1
  911. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  912. 1: call .+8
  913. add %o7,global1,global1
  914. sub global1,.PIC.DES_SPtrans-.des_and,out2
  915. ! Set sbox address 1 to 6 and rotate halfs 3 left
  916. ! Errors caught by destest? Yes. Still? *NO*
  917. !sethi %hi(DES_SPtrans), global1 ! address sbox 1
  918. !or global1, %lo(DES_SPtrans), global1 ! sbox 1
  919. add global1, 256, global2 ! sbox 2
  920. add global1, 512, global3 ! sbox 3
  921. ld [in0], out5 ! right
  922. add global1, 768, global4 ! sbox 4
  923. add global1, 1024, global5 ! sbox 5
  924. ld [in0+4], in5 ! left
  925. add global1, 1280, local6 ! sbox 6
  926. add global1, 1792, out3 ! sbox 8
  927. ! rotate
  928. sll in5, 3, local5
  929. mov in1, in3 ! key address to in3
  930. sll out5, 3, local7
  931. srl in5, 29, in5
  932. srl out5, 29, out5
  933. add in5, local5, in5
  934. add out5, local7, out5
  935. cmp in2, 0
  936. ! we use our own stackframe
  937. be .encrypt2.dec
  938. STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
  939. ld [in3], out0 ! key 7531 first round
  940. mov LOOPS, out4 ! loop counter
  941. ld [in3+4], out1 ! key 8642 first round
  942. sethi %hi(0x0000FC00), local5
  943. call .des_enc
  944. mov in3, in4
  945. ! rotate
  946. sll in5, 29, in0
  947. srl in5, 3, in5
  948. sll out5, 29, in1
  949. add in5, in0, in5
  950. srl out5, 3, out5
  951. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  952. add out5, in1, out5
  953. st in5, [in0]
  954. st out5, [in0+4]
  955. ret
  956. restore
  957. .encrypt2.dec:
  958. add in3, 120, in4
  959. ld [in4], out0 ! key 7531 first round
  960. mov LOOPS, out4 ! loop counter
  961. ld [in4+4], out1 ! key 8642 first round
  962. sethi %hi(0x0000FC00), local5
  963. mov in5, local1 ! left expected in out5
  964. mov out5, in5
  965. call .des_dec
  966. mov local1, out5
  967. .encrypt2.finish:
  968. ! rotate
  969. sll in5, 29, in0
  970. srl in5, 3, in5
  971. sll out5, 29, in1
  972. add in5, in0, in5
  973. srl out5, 3, out5
  974. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  975. add out5, in1, out5
  976. st out5, [in0]
  977. st in5, [in0+4]
  978. ret
  979. restore
  980. .DES_encrypt2.end:
  981. .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
  982. ! void DES_encrypt3(data, ks1, ks2, ks3)
  983. ! **************************************
  984. .align 32
  985. .global DES_encrypt3
  986. .type DES_encrypt3,#function
  987. DES_encrypt3:
  988. save %sp, FRAME, %sp
  989. sethi %hi(.PIC.DES_SPtrans-1f),global1
  990. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  991. 1: call .+8
  992. add %o7,global1,global1
  993. sub global1,.PIC.DES_SPtrans-.des_and,out2
  994. ld [in0], in5 ! left
  995. add in2, 120, in4 ! ks2
  996. ld [in0+4], out5 ! right
  997. mov in3, in2 ! save ks3
  998. ! parameter 6 1/2 for include encryption/decryption
  999. ! parameter 7 1 for mov in1 to in3
  1000. ! parameter 8 1 for mov in3 to in4
  1001. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1002. ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
  1003. call .des_dec
  1004. mov in2, in3 ! preload ks3
  1005. call .des_enc
  1006. nop
  1007. fp_macro(in5, out5, 1)
  1008. ret
  1009. restore
  1010. .DES_encrypt3.end:
  1011. .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
  1012. ! void DES_decrypt3(data, ks1, ks2, ks3)
  1013. ! **************************************
  1014. .align 32
  1015. .global DES_decrypt3
  1016. .type DES_decrypt3,#function
  1017. DES_decrypt3:
  1018. save %sp, FRAME, %sp
  1019. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1020. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1021. 1: call .+8
  1022. add %o7,global1,global1
  1023. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1024. ld [in0], in5 ! left
  1025. add in3, 120, in4 ! ks3
  1026. ld [in0+4], out5 ! right
  1027. mov in2, in3 ! ks2
  1028. ! parameter 6 1/2 for include encryption/decryption
  1029. ! parameter 7 1 for mov in1 to in3
  1030. ! parameter 8 1 for mov in3 to in4
  1031. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1032. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
  1033. call .des_enc
  1034. add in1, 120, in4 ! preload ks1
  1035. call .des_dec
  1036. nop
  1037. fp_macro(out5, in5, 1)
  1038. ret
  1039. restore
  1040. .DES_decrypt3.end:
  1041. .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
  1042. ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
  1043. ! *****************************************************************
  1044. .align 32
  1045. .global DES_ncbc_encrypt
  1046. .type DES_ncbc_encrypt,#function
  1047. DES_ncbc_encrypt:
  1048. save %sp, FRAME, %sp
  1049. define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
  1050. define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
  1051. define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1052. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1053. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1054. 1: call .+8
  1055. add %o7,global1,global1
  1056. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1057. cmp in5, 0 ! enc
  1058. be .ncbc.dec
  1059. STPTR in4, IVEC
  1060. ! addr left right temp label
  1061. load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
  1062. addcc in2, -8, in2 ! bytes missing when first block done
  1063. bl .ncbc.enc.seven.or.less
  1064. mov in3, in4 ! schedule
  1065. .ncbc.enc.next.block:
  1066. load_little_endian(in0, out4, global4, local3, .LLE2) ! block
  1067. .ncbc.enc.next.block_1:
  1068. xor in5, out4, in5 ! iv xor
  1069. xor out5, global4, out5 ! iv xor
  1070. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  1071. ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
  1072. .ncbc.enc.next.block_2:
  1073. !// call .des_enc ! compares in2 to 8
  1074. ! rounds inlined for alignment purposes
  1075. add global1, 768, global4 ! address sbox 4 since register used below
  1076. rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
  1077. bl .ncbc.enc.next.block_fp
  1078. add in0, 8, in0 ! input address
  1079. ! If 8 or more bytes are to be encrypted after this block,
  1080. ! we combine final permutation for this block with initial
  1081. ! permutation for next block. Load next block:
  1082. load_little_endian(in0, global3, global4, local5, .LLE12)
  1083. ! parameter 1 original left
  1084. ! parameter 2 original right
  1085. ! parameter 3 left ip
  1086. ! parameter 4 right ip
  1087. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1088. ! 2: mov in4 to in3
  1089. !
  1090. ! also adds -8 to length in2 and loads loop counter to out4
  1091. fp_ip_macro(out0, out1, global3, global4, 2)
  1092. store_little_endian(in1, out0, out1, local3, .SLE10) ! block
  1093. ld [in3], out0 ! key 7531 first round next block
  1094. mov in5, local1
  1095. xor global3, out5, in5 ! iv xor next block
  1096. ld [in3+4], out1 ! key 8642
  1097. add global1, 512, global3 ! address sbox 3 since register used
  1098. xor global4, local1, out5 ! iv xor next block
  1099. ba .ncbc.enc.next.block_2
  1100. add in1, 8, in1 ! output address
  1101. .ncbc.enc.next.block_fp:
  1102. fp_macro(in5, out5)
  1103. store_little_endian(in1, in5, out5, local3, .SLE1) ! block
  1104. addcc in2, -8, in2 ! bytes missing when next block done
  1105. bpos .ncbc.enc.next.block
  1106. add in1, 8, in1
  1107. .ncbc.enc.seven.or.less:
  1108. cmp in2, -8
  1109. ble .ncbc.enc.finish
  1110. nop
  1111. add in2, 8, local1 ! bytes to load
  1112. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1113. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
  1114. ! Loads 1 to 7 bytes little endian to global4, out4
  1115. .ncbc.enc.finish:
  1116. LDPTR IVEC, local4
  1117. store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
  1118. ret
  1119. restore
  1120. .ncbc.dec:
  1121. STPTR in0, INPUT
  1122. cmp in2, 0 ! length
  1123. add in3, 120, in3
  1124. LDPTR IVEC, local7 ! ivec
  1125. ble .ncbc.dec.finish
  1126. mov in3, in4 ! schedule
  1127. STPTR in1, OUTPUT
  1128. mov in0, local5 ! input
  1129. load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
  1130. .ncbc.dec.next.block:
  1131. load_little_endian(local5, in5, out5, local3, .LLE4) ! block
  1132. ! parameter 6 1/2 for include encryption/decryption
  1133. ! parameter 7 1 for mov in1 to in3
  1134. ! parameter 8 1 for mov in3 to in4
  1135. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
  1136. fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
  1137. ! in2 is bytes left to be stored
  1138. ! in2 is compared to 8 in the rounds
  1139. xor out5, in0, out4 ! iv xor
  1140. bl .ncbc.dec.seven.or.less
  1141. xor in5, in1, global4 ! iv xor
  1142. ! Load ivec next block now, since input and output address might be the same.
  1143. load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
  1144. store_little_endian(local7, out4, global4, local3, .SLE3)
  1145. STPTR local5, INPUT
  1146. add local7, 8, local7
  1147. addcc in2, -8, in2
  1148. bg .ncbc.dec.next.block
  1149. STPTR local7, OUTPUT
  1150. .ncbc.dec.store.iv:
  1151. LDPTR IVEC, local4 ! ivec
  1152. store_little_endian(local4, in0, in1, local5, .SLE4)
  1153. .ncbc.dec.finish:
  1154. ret
  1155. restore
  1156. .ncbc.dec.seven.or.less:
  1157. load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
  1158. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
  1159. .DES_ncbc_encrypt.end:
  1160. .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
  1161. ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
  1162. ! **************************************************************************
  1163. .align 32
  1164. .global DES_ede3_cbc_encrypt
  1165. .type DES_ede3_cbc_encrypt,#function
  1166. DES_ede3_cbc_encrypt:
  1167. save %sp, FRAME, %sp
  1168. define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
  1169. define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1170. define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
  1171. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1172. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1173. 1: call .+8
  1174. add %o7,global1,global1
  1175. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1176. LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
  1177. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1178. cmp local3, 0 ! enc
  1179. be .ede3.dec
  1180. STPTR in4, KS2
  1181. STPTR in5, KS3
  1182. load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
  1183. addcc in2, -8, in2 ! bytes missing after next block
  1184. bl .ede3.enc.seven.or.less
  1185. STPTR in3, KS1
  1186. .ede3.enc.next.block:
  1187. load_little_endian(in0, out4, global4, local3, .LLE7)
  1188. .ede3.enc.next.block_1:
  1189. LDPTR KS2, in4
  1190. xor in5, out4, in5 ! iv xor
  1191. xor out5, global4, out5 ! iv xor
  1192. LDPTR KS1, in3
  1193. add in4, 120, in4 ! for decryption we use last subkey first
  1194. nop
  1195. ip_macro(in5, out5, in5, out5, in3)
  1196. .ede3.enc.next.block_2:
  1197. call .des_enc ! ks1 in3
  1198. nop
  1199. call .des_dec ! ks2 in4
  1200. LDPTR KS3, in3
  1201. call .des_enc ! ks3 in3 compares in2 to 8
  1202. nop
  1203. bl .ede3.enc.next.block_fp
  1204. add in0, 8, in0
  1205. ! If 8 or more bytes are to be encrypted after this block,
  1206. ! we combine final permutation for this block with initial
  1207. ! permutation for next block. Load next block:
  1208. load_little_endian(in0, global3, global4, local5, .LLE11)
  1209. ! parameter 1 original left
  1210. ! parameter 2 original right
  1211. ! parameter 3 left ip
  1212. ! parameter 4 right ip
  1213. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1214. ! 2: mov in4 to in3
  1215. !
  1216. ! also adds -8 to length in2 and loads loop counter to out4
  1217. fp_ip_macro(out0, out1, global3, global4, 1)
  1218. store_little_endian(in1, out0, out1, local3, .SLE9) ! block
  1219. mov in5, local1
  1220. xor global3, out5, in5 ! iv xor next block
  1221. ld [in3], out0 ! key 7531
  1222. add global1, 512, global3 ! address sbox 3
  1223. xor global4, local1, out5 ! iv xor next block
  1224. ld [in3+4], out1 ! key 8642
  1225. add global1, 768, global4 ! address sbox 4
  1226. ba .ede3.enc.next.block_2
  1227. add in1, 8, in1
  1228. .ede3.enc.next.block_fp:
  1229. fp_macro(in5, out5)
  1230. store_little_endian(in1, in5, out5, local3, .SLE5) ! block
  1231. addcc in2, -8, in2 ! bytes missing when next block done
  1232. bpos .ede3.enc.next.block
  1233. add in1, 8, in1
  1234. .ede3.enc.seven.or.less:
  1235. cmp in2, -8
  1236. ble .ede3.enc.finish
  1237. nop
  1238. add in2, 8, local1 ! bytes to load
  1239. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1240. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
  1241. .ede3.enc.finish:
  1242. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1243. store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
  1244. ret
  1245. restore
  1246. .ede3.dec:
  1247. STPTR in0, INPUT
  1248. add in5, 120, in5
  1249. STPTR in1, OUTPUT
  1250. mov in0, local5
  1251. add in3, 120, in3
  1252. STPTR in3, KS1
  1253. cmp in2, 0
  1254. ble .ede3.dec.finish
  1255. STPTR in5, KS3
  1256. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
  1257. load_little_endian(local7, in0, in1, local3, .LLE8)
  1258. .ede3.dec.next.block:
  1259. load_little_endian(local5, in5, out5, local3, .LLE9)
  1260. ! parameter 6 1/2 for include encryption/decryption
  1261. ! parameter 7 1 for mov in1 to in3
  1262. ! parameter 8 1 for mov in3 to in4
  1263. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1264. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
  1265. call .des_enc ! ks2 in3
  1266. LDPTR KS1, in4
  1267. call .des_dec ! ks1 in4
  1268. nop
  1269. fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
  1270. ! in2 is bytes left to be stored
  1271. ! in2 is compared to 8 in the rounds
  1272. xor out5, in0, out4
  1273. bl .ede3.dec.seven.or.less
  1274. xor in5, in1, global4
  1275. load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
  1276. store_little_endian(local7, out4, global4, local3, .SLE7) ! block
  1277. STPTR local5, INPUT
  1278. addcc in2, -8, in2
  1279. add local7, 8, local7
  1280. bg .ede3.dec.next.block
  1281. STPTR local7, OUTPUT
  1282. .ede3.dec.store.iv:
  1283. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1284. store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
  1285. .ede3.dec.finish:
  1286. ret
  1287. restore
  1288. .ede3.dec.seven.or.less:
  1289. load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
  1290. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
  1291. .DES_ede3_cbc_encrypt.end:
  1292. .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
  1293. .align 256
  1294. .type .des_and,#object
  1295. .size .des_and,284
  1296. .des_and:
  1297. ! This table is used for AND 0xFC when it is known that register
  1298. ! bits 8-31 are zero. Makes it possible to do three arithmetic
  1299. ! operations in one cycle.
  1300. .byte 0, 0, 0, 0, 4, 4, 4, 4
  1301. .byte 8, 8, 8, 8, 12, 12, 12, 12
  1302. .byte 16, 16, 16, 16, 20, 20, 20, 20
  1303. .byte 24, 24, 24, 24, 28, 28, 28, 28
  1304. .byte 32, 32, 32, 32, 36, 36, 36, 36
  1305. .byte 40, 40, 40, 40, 44, 44, 44, 44
  1306. .byte 48, 48, 48, 48, 52, 52, 52, 52
  1307. .byte 56, 56, 56, 56, 60, 60, 60, 60
  1308. .byte 64, 64, 64, 64, 68, 68, 68, 68
  1309. .byte 72, 72, 72, 72, 76, 76, 76, 76
  1310. .byte 80, 80, 80, 80, 84, 84, 84, 84
  1311. .byte 88, 88, 88, 88, 92, 92, 92, 92
  1312. .byte 96, 96, 96, 96, 100, 100, 100, 100
  1313. .byte 104, 104, 104, 104, 108, 108, 108, 108
  1314. .byte 112, 112, 112, 112, 116, 116, 116, 116
  1315. .byte 120, 120, 120, 120, 124, 124, 124, 124
  1316. .byte 128, 128, 128, 128, 132, 132, 132, 132
  1317. .byte 136, 136, 136, 136, 140, 140, 140, 140
  1318. .byte 144, 144, 144, 144, 148, 148, 148, 148
  1319. .byte 152, 152, 152, 152, 156, 156, 156, 156
  1320. .byte 160, 160, 160, 160, 164, 164, 164, 164
  1321. .byte 168, 168, 168, 168, 172, 172, 172, 172
  1322. .byte 176, 176, 176, 176, 180, 180, 180, 180
  1323. .byte 184, 184, 184, 184, 188, 188, 188, 188
  1324. .byte 192, 192, 192, 192, 196, 196, 196, 196
  1325. .byte 200, 200, 200, 200, 204, 204, 204, 204
  1326. .byte 208, 208, 208, 208, 212, 212, 212, 212
  1327. .byte 216, 216, 216, 216, 220, 220, 220, 220
  1328. .byte 224, 224, 224, 224, 228, 228, 228, 228
  1329. .byte 232, 232, 232, 232, 236, 236, 236, 236
  1330. .byte 240, 240, 240, 240, 244, 244, 244, 244
  1331. .byte 248, 248, 248, 248, 252, 252, 252, 252
  1332. ! 5 numbers for initil/final permutation
  1333. .word 0x0f0f0f0f ! offset 256
  1334. .word 0x0000ffff ! 260
  1335. .word 0x33333333 ! 264
  1336. .word 0x00ff00ff ! 268
  1337. .word 0x55555555 ! 272
  1338. .word 0 ! 276
  1339. .word LOOPS ! 280
  1340. .word 0x0000FC00 ! 284
  1341. .global DES_SPtrans
  1342. .type DES_SPtrans,#object
  1343. .size DES_SPtrans,2048
  1344. .align 64
  1345. DES_SPtrans:
  1346. .PIC.DES_SPtrans:
  1347. ! nibble 0
  1348. .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
  1349. .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
  1350. .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
  1351. .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
  1352. .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
  1353. .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
  1354. .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
  1355. .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
  1356. .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
  1357. .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
  1358. .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
  1359. .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
  1360. .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
  1361. .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
  1362. .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
  1363. .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
  1364. ! nibble 1
  1365. .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
  1366. .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
  1367. .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
  1368. .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
  1369. .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
  1370. .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
  1371. .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
  1372. .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
  1373. .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
  1374. .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
  1375. .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
  1376. .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
  1377. .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
  1378. .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
  1379. .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
  1380. .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
  1381. ! nibble 2
  1382. .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
  1383. .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
  1384. .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
  1385. .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
  1386. .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
  1387. .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
  1388. .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
  1389. .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
  1390. .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
  1391. .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
  1392. .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
  1393. .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
  1394. .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
  1395. .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
  1396. .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
  1397. .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
  1398. ! nibble 3
  1399. .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
  1400. .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
  1401. .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
  1402. .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
  1403. .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
  1404. .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
  1405. .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
  1406. .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
  1407. .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
  1408. .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
  1409. .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
  1410. .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
  1411. .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
  1412. .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
  1413. .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
  1414. .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
  1415. ! nibble 4
  1416. .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
  1417. .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
  1418. .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
  1419. .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
  1420. .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
  1421. .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
  1422. .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
  1423. .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
  1424. .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
  1425. .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
  1426. .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
  1427. .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
  1428. .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
  1429. .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
  1430. .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
  1431. .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
  1432. ! nibble 5
  1433. .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
  1434. .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
  1435. .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
  1436. .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
  1437. .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
  1438. .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
  1439. .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
  1440. .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
  1441. .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
  1442. .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
  1443. .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
  1444. .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
  1445. .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
  1446. .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
  1447. .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
  1448. .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
  1449. ! nibble 6
  1450. .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
  1451. .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
  1452. .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
  1453. .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
  1454. .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
  1455. .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
  1456. .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
  1457. .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
  1458. .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
  1459. .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
  1460. .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
  1461. .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
  1462. .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
  1463. .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
  1464. .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
  1465. .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
  1466. ! nibble 7
  1467. .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
  1468. .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
  1469. .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
  1470. .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
  1471. .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
  1472. .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
  1473. .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
  1474. .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
  1475. .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
  1476. .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
  1477. .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
  1478. .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
  1479. .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
  1480. .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
  1481. .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
  1482. .word 0x20000000, 0x20800080, 0x00020000, 0x00820080