des_enc.m4 46 KB


  1. ! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
  2. !
  3. ! Licensed under the Apache License 2.0 (the "License"). You may not use
  4. ! this file except in compliance with the License. You can obtain a copy
  5. ! in the file LICENSE in the source distribution or at
  6. ! https://www.openssl.org/source/license.html
  7. !
  8. ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
  9. !
  10. ! Global registers 1 to 5 are used. This is the same as done by the
  11. ! cc compiler. The UltraSPARC load/store little endian feature is used.
  12. !
  13. ! Instruction grouping often refers to one CPU cycle.
  14. !
  15. ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
  16. !
  17. ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
  18. !
  19. ! Performance improvement according to './apps/openssl speed des'
  20. !
  21. ! 32-bit build:
  22. ! 23% faster than cc-5.2 -xarch=v8plus -xO5
  23. ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
  24. ! 64-bit build:
  25. ! 50% faster than cc-5.2 -xarch=v9 -xO5
  26. ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
  27. !
  28. .ident "des_enc.m4 2.1"
  29. .file "des_enc-sparc.S"
  30. #if defined(__SUNPRO_C) && defined(__sparcv9)
  31. # define ABI64 /* They've said -xarch=v9 at command line */
  32. #elif defined(__GNUC__) && defined(__arch64__)
  33. # define ABI64 /* They've said -m64 at command line */
  34. #endif
  35. #ifdef ABI64
  36. .register %g2,#scratch
  37. .register %g3,#scratch
  38. # define FRAME -192
  39. # define BIAS 2047
  40. # define LDPTR ldx
  41. # define STPTR stx
  42. # define ARG0 128
  43. # define ARGSZ 8
  44. #else
  45. # define FRAME -96
  46. # define BIAS 0
  47. # define LDPTR ld
  48. # define STPTR st
  49. # define ARG0 68
  50. # define ARGSZ 4
  51. #endif
  52. #define LOOPS 7
  53. #define global0 %g0
  54. #define global1 %g1
  55. #define global2 %g2
  56. #define global3 %g3
  57. #define global4 %g4
  58. #define global5 %g5
  59. #define local0 %l0
  60. #define local1 %l1
  61. #define local2 %l2
  62. #define local3 %l3
  63. #define local4 %l4
  64. #define local5 %l5
  65. #define local7 %l6
  66. #define local6 %l7
  67. #define in0 %i0
  68. #define in1 %i1
  69. #define in2 %i2
  70. #define in3 %i3
  71. #define in4 %i4
  72. #define in5 %i5
  73. #define in6 %i6
  74. #define in7 %i7
  75. #define out0 %o0
  76. #define out1 %o1
  77. #define out2 %o2
  78. #define out3 %o3
  79. #define out4 %o4
  80. #define out5 %o5
  81. #define out6 %o6
  82. #define out7 %o7
  83. #define stub stb
  84. changequote({,})
  85. ! Macro definitions:
  86. ! {ip_macro}
  87. !
  88. ! The logic used in initial and final permutations is the same as in
  89. ! the C code. The permutations are done with a clever shift, xor, and
  90. ! technique.
  91. !
  92. ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
  93. ! sbox 6 to local6, and address sbox 8 to out3.
  94. !
  95. ! Rotates the halves 3 left to bring the sbox bits in convenient positions.
  96. !
  97. ! Loads key first round from address in parameter 5 to out0, out1.
  98. !
  99. ! After the original LibDES initial permutation, the resulting left
  100. ! is in the variable initially used for right and vice versa. The macro
  101. ! implements the possibility to keep the halves in the original registers.
  102. !
  103. ! parameter 1 left
  104. ! parameter 2 right
  105. ! parameter 3 result left (modify in first round)
  106. ! parameter 4 result right (use in first round)
  107. ! parameter 5 key address
  108. ! parameter 6 1/2 for include encryption/decryption
  109. ! parameter 7 1 for move in1 to in3
  110. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  111. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  112. define(ip_macro, {
  113. ! {ip_macro}
  114. ! $1 $2 $4 $3 $5 $6 $7 $8 $9
  115. ld [out2+256], local1
  116. srl $2, 4, local4
  117. xor local4, $1, local4
  118. ifelse($7,1,{mov in1, in3},{nop})
  119. ld [out2+260], local2
  120. and local4, local1, local4
  121. ifelse($8,1,{mov in3, in4},{})
  122. ifelse($8,2,{mov in4, in3},{})
  123. ld [out2+280], out4 ! loop counter
  124. sll local4, 4, local1
  125. xor $1, local4, $1
  126. ld [out2+264], local3
  127. srl $1, 16, local4
  128. xor $2, local1, $2
  129. ifelse($9,1,{LDPTR KS3, in4},{})
  130. xor local4, $2, local4
  131. nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
  132. ifelse($9,1,{LDPTR KS2, in3},{})
  133. and local4, local2, local4
  134. nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
  135. sll local4, 16, local1
  136. xor $2, local4, $2
  137. srl $2, 2, local4
  138. xor $1, local1, $1
  139. sethi %hi(16711680), local5
  140. xor local4, $1, local4
  141. and local4, local3, local4
  142. or local5, 255, local5
  143. sll local4, 2, local2
  144. xor $1, local4, $1
  145. srl $1, 8, local4
  146. xor $2, local2, $2
  147. xor local4, $2, local4
  148. add global1, 768, global4
  149. and local4, local5, local4
  150. add global1, 1024, global5
  151. ld [out2+272], local7
  152. sll local4, 8, local1
  153. xor $2, local4, $2
  154. srl $2, 1, local4
  155. xor $1, local1, $1
  156. ld [$5], out0 ! key 7531
  157. xor local4, $1, local4
  158. add global1, 256, global2
  159. ld [$5+4], out1 ! key 8642
  160. and local4, local7, local4
  161. add global1, 512, global3
  162. sll local4, 1, local1
  163. xor $1, local4, $1
  164. sll $1, 3, local3
  165. xor $2, local1, $2
  166. sll $2, 3, local2
  167. add global1, 1280, local6 ! address sbox 8
  168. srl $1, 29, local4
  169. add global1, 1792, out3 ! address sbox 8
  170. srl $2, 29, local1
  171. or local4, local3, $4
  172. or local2, local1, $3
  173. ifelse($6, 1, {
  174. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  175. or local2, local1, $3
  176. xor $4, out0, local1
  177. call .des_enc.1
  178. and local1, 252, local1
  179. },{})
  180. ifelse($6, 2, {
  181. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  182. or local2, local1, $3
  183. xor $4, out0, local1
  184. call .des_dec.1
  185. and local1, 252, local1
  186. },{})
  187. })
  188. ! {rounds_macro}
  189. !
  190. ! The logic used in the DES rounds is the same as in the C code,
  191. ! except that calculations for sbox 1 and sbox 5 begin before
  192. ! the previous round is finished.
  193. !
  194. ! In each round one half (work) is modified based on key and the
  195. ! other half (use).
  196. !
  197. ! In this version we do two rounds in a loop repeated 7 times
  198. ! and two rounds separately.
  199. !
  200. ! One half has the bits for the sboxes in the following positions:
  201. !
  202. ! 777777xx555555xx333333xx111111xx
  203. !
  204. ! 88xx666666xx444444xx222222xx8888
  205. !
  206. ! The bits for each sbox are xor-ed with the key bits for that box.
  207. ! The above xx bits are cleared, and the result used for lookup in
  208. ! the sbox table. Each sbox entry contains the 4 output bits permuted
  209. ! into 32 bits according to the P permutation.
  210. !
  211. ! In the description of DES, left and right are switched after
  212. ! each round, except after last round. In this code the original
  213. ! left and right are kept in the same register in all rounds, meaning
  214. ! that after the 16 rounds the result for right is in the register
  215. ! originally used for left.
  216. !
  217. ! parameter 1 first work (left in first round)
  218. ! parameter 2 first use (right in first round)
  219. ! parameter 3 enc/dec 1/-1
  220. ! parameter 4 loop label
  221. ! parameter 5 key address register
  222. ! parameter 6 optional address for key next encryption/decryption
  223. ! parameter 7 not empty for include retl
  224. !
  225. ! also compares in2 to 8
  226. define(rounds_macro, {
  227. ! {rounds_macro}
  228. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  229. xor $2, out0, local1
  230. ld [out2+284], local5 ! 0x0000FC00
  231. ba $4
  232. and local1, 252, local1
  233. .align 32
  234. $4:
  235. ! local6 is address sbox 6
  236. ! out3 is address sbox 8
  237. ! out4 is loop counter
  238. ld [global1+local1], local1
  239. xor $2, out1, out1 ! 8642
  240. xor $2, out0, out0 ! 7531
  241. ! fmovs %f0, %f0 ! fxor used for alignment
  242. srl out1, 4, local0 ! rotate 4 right
  243. and out0, local5, local3 ! 3
  244. ! fmovs %f0, %f0
  245. ld [$5+$3*8], local7 ! key 7531 next round
  246. srl local3, 8, local3 ! 3
  247. and local0, 252, local2 ! 2
  248. ! fmovs %f0, %f0
  249. ld [global3+local3],local3 ! 3
  250. sll out1, 28, out1 ! rotate
  251. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  252. ld [global2+local2], local2 ! 2
  253. srl out0, 24, local1 ! 7
  254. or out1, local0, out1 ! rotate
  255. ldub [out2+local1], local1 ! 7 (and 0xFC)
  256. srl out1, 24, local0 ! 8
  257. and out1, local5, local4 ! 4
  258. ldub [out2+local0], local0 ! 8 (and 0xFC)
  259. srl local4, 8, local4 ! 4
  260. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  261. ld [global4+local4],local4 ! 4
  262. srl out1, 16, local2 ! 6
  263. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  264. ld [out3+local0],local0 ! 8
  265. and local2, 252, local2 ! 6
  266. add global1, 1536, local5 ! address sbox 7
  267. ld [local6+local2], local2 ! 6
  268. srl out0, 16, local3 ! 5
  269. xor $1, local4, $1 ! 4 finished
  270. ld [local5+local1],local1 ! 7
  271. and local3, 252, local3 ! 5
  272. xor $1, local0, $1 ! 8 finished
  273. ld [global5+local3],local3 ! 5
  274. xor $1, local2, $1 ! 6 finished
  275. subcc out4, 1, out4
  276. ld [$5+$3*8+4], out0 ! key 8642 next round
  277. xor $1, local7, local2 ! sbox 5 next round
  278. xor $1, local1, $1 ! 7 finished
  279. srl local2, 16, local2 ! sbox 5 next round
  280. xor $1, local3, $1 ! 5 finished
  281. ld [$5+$3*16+4], out1 ! key 8642 next round again
  282. and local2, 252, local2 ! sbox5 next round
  283. ! next round
  284. xor $1, local7, local7 ! 7531
  285. ld [global5+local2], local2 ! 5
  286. srl local7, 24, local3 ! 7
  287. xor $1, out0, out0 ! 8642
  288. ldub [out2+local3], local3 ! 7 (and 0xFC)
  289. srl out0, 4, local0 ! rotate 4 right
  290. and local7, 252, local1 ! 1
  291. sll out0, 28, out0 ! rotate
  292. xor $2, local2, $2 ! 5 finished local2 used
  293. srl local0, 8, local4 ! 4
  294. and local0, 252, local2 ! 2
  295. ld [local5+local3], local3 ! 7
  296. srl local0, 16, local5 ! 6
  297. or out0, local0, out0 ! rotate
  298. ld [global2+local2], local2 ! 2
  299. srl out0, 24, local0
  300. ld [$5+$3*16], out0 ! key 7531 next round
  301. and local4, 252, local4 ! 4
  302. and local5, 252, local5 ! 6
  303. ld [global4+local4], local4 ! 4
  304. xor $2, local3, $2 ! 7 finished local3 used
  305. and local0, 252, local0 ! 8
  306. ld [local6+local5], local5 ! 6
  307. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  308. srl local7, 8, local2 ! 3 start
  309. ld [out3+local0], local0 ! 8
  310. xor $2, local4, $2 ! 4 finished
  311. and local2, 252, local2 ! 3
  312. ld [global1+local1], local1 ! 1
  313. xor $2, local5, $2 ! 6 finished local5 used
  314. ld [global3+local2], local2 ! 3
  315. xor $2, local0, $2 ! 8 finished
  316. add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
  317. ld [out2+284], local5 ! 0x0000FC00
  318. xor $2, out0, local4 ! sbox 1 next round
  319. xor $2, local1, $2 ! 1 finished
  320. xor $2, local2, $2 ! 3 finished
  321. bne $4
  322. and local4, 252, local1 ! sbox 1 next round
  323. ! two rounds more:
  324. ld [global1+local1], local1
  325. xor $2, out1, out1
  326. xor $2, out0, out0
  327. srl out1, 4, local0 ! rotate
  328. and out0, local5, local3
  329. ld [$5+$3*8], local7 ! key 7531
  330. srl local3, 8, local3
  331. and local0, 252, local2
  332. ld [global3+local3],local3
  333. sll out1, 28, out1 ! rotate
  334. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  335. ld [global2+local2], local2
  336. srl out0, 24, local1
  337. or out1, local0, out1 ! rotate
  338. ldub [out2+local1], local1
  339. srl out1, 24, local0
  340. and out1, local5, local4
  341. ldub [out2+local0], local0
  342. srl local4, 8, local4
  343. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  344. ld [global4+local4],local4
  345. srl out1, 16, local2
  346. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  347. ld [out3+local0],local0
  348. and local2, 252, local2
  349. add global1, 1536, local5 ! address sbox 7
  350. ld [local6+local2], local2
  351. srl out0, 16, local3
  352. xor $1, local4, $1 ! 4 finished
  353. ld [local5+local1],local1
  354. and local3, 252, local3
  355. xor $1, local0, $1
  356. ld [global5+local3],local3
  357. xor $1, local2, $1 ! 6 finished
  358. cmp in2, 8
  359. ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
  360. xor $1, local7, local2 ! sbox 5 next round
  361. xor $1, local1, $1 ! 7 finished
  362. ld [$5+$3*8+4], out0
  363. srl local2, 16, local2 ! sbox 5 next round
  364. xor $1, local3, $1 ! 5 finished
  365. and local2, 252, local2
  366. ! next round (two rounds more)
  367. xor $1, local7, local7 ! 7531
  368. ld [global5+local2], local2
  369. srl local7, 24, local3
  370. xor $1, out0, out0 ! 8642
  371. ldub [out2+local3], local3
  372. srl out0, 4, local0 ! rotate
  373. and local7, 252, local1
  374. sll out0, 28, out0 ! rotate
  375. xor $2, local2, $2 ! 5 finished local2 used
  376. srl local0, 8, local4
  377. and local0, 252, local2
  378. ld [local5+local3], local3
  379. srl local0, 16, local5
  380. or out0, local0, out0 ! rotate
  381. ld [global2+local2], local2
  382. srl out0, 24, local0
  383. ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
  384. and local4, 252, local4
  385. and local5, 252, local5
  386. ld [global4+local4], local4
  387. xor $2, local3, $2 ! 7 finished local3 used
  388. and local0, 252, local0
  389. ld [local6+local5], local5
  390. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  391. srl local7, 8, local2 ! 3 start
  392. ld [out3+local0], local0
  393. xor $2, local4, $2
  394. and local2, 252, local2
  395. ld [global1+local1], local1
  396. xor $2, local5, $2 ! 6 finished local5 used
  397. ld [global3+local2], local2
  398. srl $1, 3, local3
  399. xor $2, local0, $2
  400. ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
  401. sll $1, 29, local4
  402. xor $2, local1, $2
  403. ifelse($7,{}, {}, {retl})
  404. xor $2, local2, $2
  405. })
  406. ! {fp_macro}
  407. !
  408. ! parameter 1 right (original left)
  409. ! parameter 2 left (original right)
  410. ! parameter 3 1 for optional store to [in0]
  411. ! parameter 4 1 for load input/output address to local5/7
  412. !
  413. ! The final permutation logic switches the halves, meaning that
  414. ! left and right ends up the registers originally used.
  415. define(fp_macro, {
  416. ! {fp_macro}
  417. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  418. ! initially undo the rotate 3 left done after initial permutation
  419. ! original left is received shifted 3 right and 29 left in local3/4
  420. sll $2, 29, local1
  421. or local3, local4, $1
  422. srl $2, 3, $2
  423. sethi %hi(0x55555555), local2
  424. or $2, local1, $2
  425. or local2, %lo(0x55555555), local2
  426. srl $2, 1, local3
  427. sethi %hi(0x00ff00ff), local1
  428. xor local3, $1, local3
  429. or local1, %lo(0x00ff00ff), local1
  430. and local3, local2, local3
  431. sethi %hi(0x33333333), local4
  432. sll local3, 1, local2
  433. xor $1, local3, $1
  434. srl $1, 8, local3
  435. xor $2, local2, $2
  436. xor local3, $2, local3
  437. or local4, %lo(0x33333333), local4
  438. and local3, local1, local3
  439. sethi %hi(0x0000ffff), local1
  440. sll local3, 8, local2
  441. xor $2, local3, $2
  442. srl $2, 2, local3
  443. xor $1, local2, $1
  444. xor local3, $1, local3
  445. or local1, %lo(0x0000ffff), local1
  446. and local3, local4, local3
  447. sethi %hi(0x0f0f0f0f), local4
  448. sll local3, 2, local2
  449. ifelse($4,1, {LDPTR INPUT, local5})
  450. xor $1, local3, $1
  451. ifelse($4,1, {LDPTR OUTPUT, local7})
  452. srl $1, 16, local3
  453. xor $2, local2, $2
  454. xor local3, $2, local3
  455. or local4, %lo(0x0f0f0f0f), local4
  456. and local3, local1, local3
  457. sll local3, 16, local2
  458. xor $2, local3, local1
  459. srl local1, 4, local3
  460. xor $1, local2, $1
  461. xor local3, $1, local3
  462. and local3, local4, local3
  463. sll local3, 4, local2
  464. xor $1, local3, $1
  465. ! optional store:
  466. ifelse($3,1, {st $1, [in0]})
  467. xor local1, local2, $2
  468. ifelse($3,1, {st $2, [in0+4]})
  469. })
  470. ! {fp_ip_macro}
  471. !
  472. ! Does initial permutation for next block mixed with
  473. ! final permutation for current block.
  474. !
  475. ! parameter 1 original left
  476. ! parameter 2 original right
  477. ! parameter 3 left ip
  478. ! parameter 4 right ip
  479. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  480. ! 2: mov in4 to in3
  481. !
  482. ! also adds -8 to length in2 and loads loop counter to out4
  483. define(fp_ip_macro, {
  484. ! {fp_ip_macro}
  485. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  486. define({temp1},{out4})
  487. define({temp2},{local3})
  488. define({ip1},{local1})
  489. define({ip2},{local2})
  490. define({ip4},{local4})
  491. define({ip5},{local5})
  492. ! $1 in local3, local4
  493. ld [out2+256], ip1
  494. sll out5, 29, temp1
  495. or local3, local4, $1
  496. srl out5, 3, $2
  497. ifelse($5,2,{mov in4, in3})
  498. ld [out2+272], ip5
  499. srl $4, 4, local0
  500. or $2, temp1, $2
  501. srl $2, 1, temp1
  502. xor temp1, $1, temp1
  503. and temp1, ip5, temp1
  504. xor local0, $3, local0
  505. sll temp1, 1, temp2
  506. xor $1, temp1, $1
  507. and local0, ip1, local0
  508. add in2, -8, in2
  509. sll local0, 4, local7
  510. xor $3, local0, $3
  511. ld [out2+268], ip4
  512. srl $1, 8, temp1
  513. xor $2, temp2, $2
  514. ld [out2+260], ip2
  515. srl $3, 16, local0
  516. xor $4, local7, $4
  517. xor temp1, $2, temp1
  518. xor local0, $4, local0
  519. and temp1, ip4, temp1
  520. and local0, ip2, local0
  521. sll temp1, 8, temp2
  522. xor $2, temp1, $2
  523. sll local0, 16, local7
  524. xor $4, local0, $4
  525. srl $2, 2, temp1
  526. xor $1, temp2, $1
  527. ld [out2+264], temp2 ! ip3
  528. srl $4, 2, local0
  529. xor $3, local7, $3
  530. xor temp1, $1, temp1
  531. xor local0, $3, local0
  532. and temp1, temp2, temp1
  533. and local0, temp2, local0
  534. sll temp1, 2, temp2
  535. xor $1, temp1, $1
  536. sll local0, 2, local7
  537. xor $3, local0, $3
  538. srl $1, 16, temp1
  539. xor $2, temp2, $2
  540. srl $3, 8, local0
  541. xor $4, local7, $4
  542. xor temp1, $2, temp1
  543. xor local0, $4, local0
  544. and temp1, ip2, temp1
  545. and local0, ip4, local0
  546. sll temp1, 16, temp2
  547. xor $2, temp1, local4
  548. sll local0, 8, local7
  549. xor $4, local0, $4
  550. srl $4, 1, local0
  551. xor $3, local7, $3
  552. srl local4, 4, temp1
  553. xor local0, $3, local0
  554. xor $1, temp2, $1
  555. and local0, ip5, local0
  556. sll local0, 1, local7
  557. xor temp1, $1, temp1
  558. xor $3, local0, $3
  559. xor $4, local7, $4
  560. sll $3, 3, local5
  561. and temp1, ip1, temp1
  562. sll temp1, 4, temp2
  563. xor $1, temp1, $1
  564. ifelse($5,1,{LDPTR KS2, in4})
  565. sll $4, 3, local2
  566. xor local4, temp2, $2
  567. ! reload since used as temporary:
  568. ld [out2+280], out4 ! loop counter
  569. srl $3, 29, local0
  570. ifelse($5,1,{add in4, 120, in4})
  571. ifelse($5,1,{LDPTR KS1, in3})
  572. srl $4, 29, local7
  573. or local0, local5, $4
  574. or local2, local7, $3
  575. })
  576. ! {load_little_endian}
  577. !
  578. ! parameter 1 address
  579. ! parameter 2 destination left
  580. ! parameter 3 destination right
  581. ! parameter 4 temporary
  582. ! parameter 5 label
  583. define(load_little_endian, {
  584. ! {load_little_endian}
  585. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  586. ! first in memory to rightmost in register
  587. $5:
  588. ldub [$1+3], $2
  589. ldub [$1+2], $4
  590. sll $2, 8, $2
  591. or $2, $4, $2
  592. ldub [$1+1], $4
  593. sll $2, 8, $2
  594. or $2, $4, $2
  595. ldub [$1+0], $4
  596. sll $2, 8, $2
  597. or $2, $4, $2
  598. ldub [$1+3+4], $3
  599. ldub [$1+2+4], $4
  600. sll $3, 8, $3
  601. or $3, $4, $3
  602. ldub [$1+1+4], $4
  603. sll $3, 8, $3
  604. or $3, $4, $3
  605. ldub [$1+0+4], $4
  606. sll $3, 8, $3
  607. or $3, $4, $3
  608. $5a:
  609. })
  610. ! {load_little_endian_inc}
  611. !
  612. ! parameter 1 address
  613. ! parameter 2 destination left
  614. ! parameter 3 destination right
  615. ! parameter 4 temporary
  616. ! parameter 4 label
  617. !
  618. ! adds 8 to address
  619. define(load_little_endian_inc, {
  620. ! {load_little_endian_inc}
  621. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  622. ! first in memory to rightmost in register
  623. $5:
  624. ldub [$1+3], $2
  625. ldub [$1+2], $4
  626. sll $2, 8, $2
  627. or $2, $4, $2
  628. ldub [$1+1], $4
  629. sll $2, 8, $2
  630. or $2, $4, $2
  631. ldub [$1+0], $4
  632. sll $2, 8, $2
  633. or $2, $4, $2
  634. ldub [$1+3+4], $3
  635. add $1, 8, $1
  636. ldub [$1+2+4-8], $4
  637. sll $3, 8, $3
  638. or $3, $4, $3
  639. ldub [$1+1+4-8], $4
  640. sll $3, 8, $3
  641. or $3, $4, $3
  642. ldub [$1+0+4-8], $4
  643. sll $3, 8, $3
  644. or $3, $4, $3
  645. $5a:
  646. })
  647. ! {load_n_bytes}
  648. !
  649. ! Loads 1 to 7 bytes little endian
  650. ! Remaining bytes are zeroed.
  651. !
  652. ! parameter 1 address
  653. ! parameter 2 length
  654. ! parameter 3 destination register left
  655. ! parameter 4 destination register right
  656. ! parameter 5 temp
  657. ! parameter 6 temp2
  658. ! parameter 7 label
  659. ! parameter 8 return label
  660. define(load_n_bytes, {
  661. ! {load_n_bytes}
  662. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  663. $7.0: call .+8
  664. sll $2, 2, $6
  665. add %o7,$7.jmp.table-$7.0,$5
  666. add $5, $6, $5
  667. mov 0, $4
  668. ld [$5], $5
  669. jmp %o7+$5
  670. mov 0, $3
  671. $7.7:
  672. ldub [$1+6], $5
  673. sll $5, 16, $5
  674. or $3, $5, $3
  675. $7.6:
  676. ldub [$1+5], $5
  677. sll $5, 8, $5
  678. or $3, $5, $3
  679. $7.5:
  680. ldub [$1+4], $5
  681. or $3, $5, $3
  682. $7.4:
  683. ldub [$1+3], $5
  684. sll $5, 24, $5
  685. or $4, $5, $4
  686. $7.3:
  687. ldub [$1+2], $5
  688. sll $5, 16, $5
  689. or $4, $5, $4
  690. $7.2:
  691. ldub [$1+1], $5
  692. sll $5, 8, $5
  693. or $4, $5, $4
  694. $7.1:
  695. ldub [$1+0], $5
  696. ba $8
  697. or $4, $5, $4
  698. .align 4
  699. $7.jmp.table:
  700. .word 0
  701. .word $7.1-$7.0
  702. .word $7.2-$7.0
  703. .word $7.3-$7.0
  704. .word $7.4-$7.0
  705. .word $7.5-$7.0
  706. .word $7.6-$7.0
  707. .word $7.7-$7.0
  708. })
  709. ! {store_little_endian}
  710. !
  711. ! parameter 1 address
  712. ! parameter 2 source left
  713. ! parameter 3 source right
  714. ! parameter 4 temporary
  715. define(store_little_endian, {
  716. ! {store_little_endian}
  717. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  718. ! rightmost in register to first in memory
  719. $5:
  720. and $2, 255, $4
  721. stub $4, [$1+0]
  722. srl $2, 8, $4
  723. and $4, 255, $4
  724. stub $4, [$1+1]
  725. srl $2, 16, $4
  726. and $4, 255, $4
  727. stub $4, [$1+2]
  728. srl $2, 24, $4
  729. stub $4, [$1+3]
  730. and $3, 255, $4
  731. stub $4, [$1+0+4]
  732. srl $3, 8, $4
  733. and $4, 255, $4
  734. stub $4, [$1+1+4]
  735. srl $3, 16, $4
  736. and $4, 255, $4
  737. stub $4, [$1+2+4]
  738. srl $3, 24, $4
  739. stub $4, [$1+3+4]
  740. $5a:
  741. })
  742. ! {store_n_bytes}
  743. !
  744. ! Stores 1 to 7 bytes little endian
  745. !
  746. ! parameter 1 address
  747. ! parameter 2 length
  748. ! parameter 3 source register left
  749. ! parameter 4 source register right
  750. ! parameter 5 temp
  751. ! parameter 6 temp2
  752. ! parameter 7 label
  753. ! parameter 8 return label
  754. define(store_n_bytes, {
  755. ! {store_n_bytes}
  756. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  757. $7.0: call .+8
  758. sll $2, 2, $6
  759. add %o7,$7.jmp.table-$7.0,$5
  760. add $5, $6, $5
  761. ld [$5], $5
  762. jmp %o7+$5
  763. nop
  764. $7.7:
  765. srl $3, 16, $5
  766. and $5, 0xff, $5
  767. stub $5, [$1+6]
  768. $7.6:
  769. srl $3, 8, $5
  770. and $5, 0xff, $5
  771. stub $5, [$1+5]
  772. $7.5:
  773. and $3, 0xff, $5
  774. stub $5, [$1+4]
  775. $7.4:
  776. srl $4, 24, $5
  777. stub $5, [$1+3]
  778. $7.3:
  779. srl $4, 16, $5
  780. and $5, 0xff, $5
  781. stub $5, [$1+2]
  782. $7.2:
  783. srl $4, 8, $5
  784. and $5, 0xff, $5
  785. stub $5, [$1+1]
  786. $7.1:
  787. and $4, 0xff, $5
  788. ba $8
  789. stub $5, [$1]
  790. .align 4
  791. $7.jmp.table:
  792. .word 0
  793. .word $7.1-$7.0
  794. .word $7.2-$7.0
  795. .word $7.3-$7.0
  796. .word $7.4-$7.0
  797. .word $7.5-$7.0
  798. .word $7.6-$7.0
  799. .word $7.7-$7.0
  800. })
  801. define(testvalue,{1})
  802. define(register_init, {
  803. ! For test purposes:
  804. sethi %hi(testvalue), local0
  805. or local0, %lo(testvalue), local0
  806. ifelse($1,{},{}, {mov local0, $1})
  807. ifelse($2,{},{}, {mov local0, $2})
  808. ifelse($3,{},{}, {mov local0, $3})
  809. ifelse($4,{},{}, {mov local0, $4})
  810. ifelse($5,{},{}, {mov local0, $5})
  811. ifelse($6,{},{}, {mov local0, $6})
  812. ifelse($7,{},{}, {mov local0, $7})
  813. ifelse($8,{},{}, {mov local0, $8})
  814. mov local0, local1
  815. mov local0, local2
  816. mov local0, local3
  817. mov local0, local4
  818. mov local0, local5
  819. mov local0, local7
  820. mov local0, local6
  821. mov local0, out0
  822. mov local0, out1
  823. mov local0, out2
  824. mov local0, out3
  825. mov local0, out4
  826. mov local0, out5
  827. mov local0, global1
  828. mov local0, global2
  829. mov local0, global3
  830. mov local0, global4
  831. mov local0, global5
  832. })
  833. .section ".text"
  834. .align 32
  835. .des_enc:
  836. ! key address in3
  837. ! loads key next encryption/decryption first round from [in4]
  838. rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
  839. .align 32
  840. .des_dec:
  841. ! implemented with out5 as first parameter to avoid
  842. ! register exchange in ede modes
  843. ! key address in4
  844. ! loads key next encryption/decryption first round from [in3]
  845. rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
  846. ! void DES_encrypt1(data, ks, enc)
  847. ! *******************************
  848. .align 32
  849. .global DES_encrypt1
  850. .type DES_encrypt1,#function
  851. DES_encrypt1:
  852. save %sp, FRAME, %sp
  853. sethi %hi(.PIC.DES_SPtrans-1f),global1
  854. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  855. 1: call .+8
  856. add %o7,global1,global1
  857. sub global1,.PIC.DES_SPtrans-.des_and,out2
  858. ld [in0], in5 ! left
  859. cmp in2, 0 ! enc
  860. be .encrypt.dec
  861. ld [in0+4], out5 ! right
  862. ! parameter 6 1/2 for include encryption/decryption
  863. ! parameter 7 1 for move in1 to in3
  864. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  865. ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
  866. rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
  867. fp_macro(in5, out5, 1) ! 1 for store to [in0]
  868. ret
  869. restore
  870. .encrypt.dec:
  871. add in1, 120, in3 ! use last subkey for first round
  872. ! parameter 6 1/2 for include encryption/decryption
  873. ! parameter 7 1 for move in1 to in3
  874. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  875. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
  876. fp_macro(out5, in5, 1) ! 1 for store to [in0]
  877. ret
  878. restore
  879. .DES_encrypt1.end:
  880. .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
  881. ! void DES_encrypt2(data, ks, enc)
  882. !*********************************
  883. ! encrypts/decrypts without initial/final permutation
  884. .align 32
  885. .global DES_encrypt2
  886. .type DES_encrypt2,#function
  887. DES_encrypt2:
  888. save %sp, FRAME, %sp
  889. sethi %hi(.PIC.DES_SPtrans-1f),global1
  890. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  891. 1: call .+8
  892. add %o7,global1,global1
  893. sub global1,.PIC.DES_SPtrans-.des_and,out2
  894. ! Set sbox address 1 to 6 and rotate halves 3 left
  895. ! Errors caught by destest? Yes. Still? *NO*
  896. !sethi %hi(DES_SPtrans), global1 ! address sbox 1
  897. !or global1, %lo(DES_SPtrans), global1 ! sbox 1
  898. add global1, 256, global2 ! sbox 2
  899. add global1, 512, global3 ! sbox 3
  900. ld [in0], out5 ! right
  901. add global1, 768, global4 ! sbox 4
  902. add global1, 1024, global5 ! sbox 5
  903. ld [in0+4], in5 ! left
  904. add global1, 1280, local6 ! sbox 6
  905. add global1, 1792, out3 ! sbox 8
  906. ! rotate
  907. sll in5, 3, local5
  908. mov in1, in3 ! key address to in3
  909. sll out5, 3, local7
  910. srl in5, 29, in5
  911. srl out5, 29, out5
  912. add in5, local5, in5
  913. add out5, local7, out5
  914. cmp in2, 0
  915. ! we use our own stackframe
  916. be .encrypt2.dec
  917. STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
  918. ld [in3], out0 ! key 7531 first round
  919. mov LOOPS, out4 ! loop counter
  920. ld [in3+4], out1 ! key 8642 first round
  921. sethi %hi(0x0000FC00), local5
  922. call .des_enc
  923. mov in3, in4
  924. ! rotate
  925. sll in5, 29, in0
  926. srl in5, 3, in5
  927. sll out5, 29, in1
  928. add in5, in0, in5
  929. srl out5, 3, out5
  930. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  931. add out5, in1, out5
  932. st in5, [in0]
  933. st out5, [in0+4]
  934. ret
  935. restore
  936. .encrypt2.dec:
  937. add in3, 120, in4
  938. ld [in4], out0 ! key 7531 first round
  939. mov LOOPS, out4 ! loop counter
  940. ld [in4+4], out1 ! key 8642 first round
  941. sethi %hi(0x0000FC00), local5
  942. mov in5, local1 ! left expected in out5
  943. mov out5, in5
  944. call .des_dec
  945. mov local1, out5
  946. .encrypt2.finish:
  947. ! rotate
  948. sll in5, 29, in0
  949. srl in5, 3, in5
  950. sll out5, 29, in1
  951. add in5, in0, in5
  952. srl out5, 3, out5
  953. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  954. add out5, in1, out5
  955. st out5, [in0]
  956. st in5, [in0+4]
  957. ret
  958. restore
  959. .DES_encrypt2.end:
  960. .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
  961. ! void DES_encrypt3(data, ks1, ks2, ks3)
  962. ! **************************************
  963. .align 32
  964. .global DES_encrypt3
  965. .type DES_encrypt3,#function
  966. DES_encrypt3:
  967. save %sp, FRAME, %sp
  968. sethi %hi(.PIC.DES_SPtrans-1f),global1
  969. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  970. 1: call .+8
  971. add %o7,global1,global1
  972. sub global1,.PIC.DES_SPtrans-.des_and,out2
  973. ld [in0], in5 ! left
  974. add in2, 120, in4 ! ks2
  975. ld [in0+4], out5 ! right
  976. mov in3, in2 ! save ks3
  977. ! parameter 6 1/2 for include encryption/decryption
  978. ! parameter 7 1 for mov in1 to in3
  979. ! parameter 8 1 for mov in3 to in4
  980. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  981. ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
  982. call .des_dec
  983. mov in2, in3 ! preload ks3
  984. call .des_enc
  985. nop
  986. fp_macro(in5, out5, 1)
  987. ret
  988. restore
  989. .DES_encrypt3.end:
  990. .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
  991. ! void DES_decrypt3(data, ks1, ks2, ks3)
  992. ! **************************************
  993. .align 32
  994. .global DES_decrypt3
  995. .type DES_decrypt3,#function
  996. DES_decrypt3:
  997. save %sp, FRAME, %sp
  998. sethi %hi(.PIC.DES_SPtrans-1f),global1
  999. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1000. 1: call .+8
  1001. add %o7,global1,global1
  1002. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1003. ld [in0], in5 ! left
  1004. add in3, 120, in4 ! ks3
  1005. ld [in0+4], out5 ! right
  1006. mov in2, in3 ! ks2
  1007. ! parameter 6 1/2 for include encryption/decryption
  1008. ! parameter 7 1 for mov in1 to in3
  1009. ! parameter 8 1 for mov in3 to in4
  1010. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1011. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
  1012. call .des_enc
  1013. add in1, 120, in4 ! preload ks1
  1014. call .des_dec
  1015. nop
  1016. fp_macro(out5, in5, 1)
  1017. ret
  1018. restore
  1019. .DES_decrypt3.end:
  1020. .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
  1021. ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
  1022. ! *****************************************************************
  1023. .align 32
  1024. .global DES_ncbc_encrypt
  1025. .type DES_ncbc_encrypt,#function
  1026. DES_ncbc_encrypt:
  1027. save %sp, FRAME, %sp
  1028. define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
  1029. define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
  1030. define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1031. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1032. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1033. 1: call .+8
  1034. add %o7,global1,global1
  1035. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1036. cmp in5, 0 ! enc
  1037. be .ncbc.dec
  1038. STPTR in4, IVEC
  1039. ! addr left right temp label
  1040. load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
  1041. addcc in2, -8, in2 ! bytes missing when first block done
  1042. bl .ncbc.enc.seven.or.less
  1043. mov in3, in4 ! schedule
  1044. .ncbc.enc.next.block:
  1045. load_little_endian(in0, out4, global4, local3, .LLE2) ! block
  1046. .ncbc.enc.next.block_1:
  1047. xor in5, out4, in5 ! iv xor
  1048. xor out5, global4, out5 ! iv xor
  1049. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  1050. ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
  1051. .ncbc.enc.next.block_2:
  1052. !// call .des_enc ! compares in2 to 8
  1053. ! rounds inlined for alignment purposes
  1054. add global1, 768, global4 ! address sbox 4 since register used below
  1055. rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
  1056. bl .ncbc.enc.next.block_fp
  1057. add in0, 8, in0 ! input address
  1058. ! If 8 or more bytes are to be encrypted after this block,
  1059. ! we combine final permutation for this block with initial
  1060. ! permutation for next block. Load next block:
  1061. load_little_endian(in0, global3, global4, local5, .LLE12)
  1062. ! parameter 1 original left
  1063. ! parameter 2 original right
  1064. ! parameter 3 left ip
  1065. ! parameter 4 right ip
  1066. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1067. ! 2: mov in4 to in3
  1068. !
  1069. ! also adds -8 to length in2 and loads loop counter to out4
  1070. fp_ip_macro(out0, out1, global3, global4, 2)
  1071. store_little_endian(in1, out0, out1, local3, .SLE10) ! block
  1072. ld [in3], out0 ! key 7531 first round next block
  1073. mov in5, local1
  1074. xor global3, out5, in5 ! iv xor next block
  1075. ld [in3+4], out1 ! key 8642
  1076. add global1, 512, global3 ! address sbox 3 since register used
  1077. xor global4, local1, out5 ! iv xor next block
  1078. ba .ncbc.enc.next.block_2
  1079. add in1, 8, in1 ! output address
  1080. .ncbc.enc.next.block_fp:
  1081. fp_macro(in5, out5)
  1082. store_little_endian(in1, in5, out5, local3, .SLE1) ! block
  1083. addcc in2, -8, in2 ! bytes missing when next block done
  1084. bpos .ncbc.enc.next.block
  1085. add in1, 8, in1
  1086. .ncbc.enc.seven.or.less:
  1087. cmp in2, -8
  1088. ble .ncbc.enc.finish
  1089. nop
  1090. add in2, 8, local1 ! bytes to load
  1091. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1092. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
  1093. ! Loads 1 to 7 bytes little endian to global4, out4
  1094. .ncbc.enc.finish:
  1095. LDPTR IVEC, local4
  1096. store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
  1097. ret
  1098. restore
  1099. .ncbc.dec:
  1100. STPTR in0, INPUT
  1101. cmp in2, 0 ! length
  1102. add in3, 120, in3
  1103. LDPTR IVEC, local7 ! ivec
  1104. ble .ncbc.dec.finish
  1105. mov in3, in4 ! schedule
  1106. STPTR in1, OUTPUT
  1107. mov in0, local5 ! input
  1108. load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
  1109. .ncbc.dec.next.block:
  1110. load_little_endian(local5, in5, out5, local3, .LLE4) ! block
  1111. ! parameter 6 1/2 for include encryption/decryption
  1112. ! parameter 7 1 for mov in1 to in3
  1113. ! parameter 8 1 for mov in3 to in4
  1114. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
  1115. fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
  1116. ! in2 is bytes left to be stored
  1117. ! in2 is compared to 8 in the rounds
  1118. xor out5, in0, out4 ! iv xor
  1119. bl .ncbc.dec.seven.or.less
  1120. xor in5, in1, global4 ! iv xor
  1121. ! Load ivec next block now, since input and output address might be the same.
  1122. load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
  1123. store_little_endian(local7, out4, global4, local3, .SLE3)
  1124. STPTR local5, INPUT
  1125. add local7, 8, local7
  1126. addcc in2, -8, in2
  1127. bg .ncbc.dec.next.block
  1128. STPTR local7, OUTPUT
  1129. .ncbc.dec.store.iv:
  1130. LDPTR IVEC, local4 ! ivec
  1131. store_little_endian(local4, in0, in1, local5, .SLE4)
  1132. .ncbc.dec.finish:
  1133. ret
  1134. restore
  1135. .ncbc.dec.seven.or.less:
  1136. load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
  1137. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
  1138. .DES_ncbc_encrypt.end:
  1139. .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
  1140. ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
  1141. ! **************************************************************************
  1142. .align 32
  1143. .global DES_ede3_cbc_encrypt
  1144. .type DES_ede3_cbc_encrypt,#function
  1145. DES_ede3_cbc_encrypt:
  1146. save %sp, FRAME, %sp
  1147. define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
  1148. define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1149. define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
  1150. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1151. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1152. 1: call .+8
  1153. add %o7,global1,global1
  1154. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1155. LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
  1156. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1157. cmp local3, 0 ! enc
  1158. be .ede3.dec
  1159. STPTR in4, KS2
  1160. STPTR in5, KS3
  1161. load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
  1162. addcc in2, -8, in2 ! bytes missing after next block
  1163. bl .ede3.enc.seven.or.less
  1164. STPTR in3, KS1
  1165. .ede3.enc.next.block:
  1166. load_little_endian(in0, out4, global4, local3, .LLE7)
  1167. .ede3.enc.next.block_1:
  1168. LDPTR KS2, in4
  1169. xor in5, out4, in5 ! iv xor
  1170. xor out5, global4, out5 ! iv xor
  1171. LDPTR KS1, in3
  1172. add in4, 120, in4 ! for decryption we use last subkey first
  1173. nop
  1174. ip_macro(in5, out5, in5, out5, in3)
  1175. .ede3.enc.next.block_2:
  1176. call .des_enc ! ks1 in3
  1177. nop
  1178. call .des_dec ! ks2 in4
  1179. LDPTR KS3, in3
  1180. call .des_enc ! ks3 in3 compares in2 to 8
  1181. nop
  1182. bl .ede3.enc.next.block_fp
  1183. add in0, 8, in0
  1184. ! If 8 or more bytes are to be encrypted after this block,
  1185. ! we combine final permutation for this block with initial
  1186. ! permutation for next block. Load next block:
  1187. load_little_endian(in0, global3, global4, local5, .LLE11)
  1188. ! parameter 1 original left
  1189. ! parameter 2 original right
  1190. ! parameter 3 left ip
  1191. ! parameter 4 right ip
  1192. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1193. ! 2: mov in4 to in3
  1194. !
  1195. ! also adds -8 to length in2 and loads loop counter to out4
  1196. fp_ip_macro(out0, out1, global3, global4, 1)
  1197. store_little_endian(in1, out0, out1, local3, .SLE9) ! block
  1198. mov in5, local1
  1199. xor global3, out5, in5 ! iv xor next block
  1200. ld [in3], out0 ! key 7531
  1201. add global1, 512, global3 ! address sbox 3
  1202. xor global4, local1, out5 ! iv xor next block
  1203. ld [in3+4], out1 ! key 8642
  1204. add global1, 768, global4 ! address sbox 4
  1205. ba .ede3.enc.next.block_2
  1206. add in1, 8, in1
  1207. .ede3.enc.next.block_fp:
  1208. fp_macro(in5, out5)
  1209. store_little_endian(in1, in5, out5, local3, .SLE5) ! block
  1210. addcc in2, -8, in2 ! bytes missing when next block done
  1211. bpos .ede3.enc.next.block
  1212. add in1, 8, in1
  1213. .ede3.enc.seven.or.less:
  1214. cmp in2, -8
  1215. ble .ede3.enc.finish
  1216. nop
  1217. add in2, 8, local1 ! bytes to load
  1218. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1219. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
  1220. .ede3.enc.finish:
  1221. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1222. store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
  1223. ret
  1224. restore
  1225. .ede3.dec:
  1226. STPTR in0, INPUT
  1227. add in5, 120, in5
  1228. STPTR in1, OUTPUT
  1229. mov in0, local5
  1230. add in3, 120, in3
  1231. STPTR in3, KS1
  1232. cmp in2, 0
  1233. ble .ede3.dec.finish
  1234. STPTR in5, KS3
  1235. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
  1236. load_little_endian(local7, in0, in1, local3, .LLE8)
  1237. .ede3.dec.next.block:
  1238. load_little_endian(local5, in5, out5, local3, .LLE9)
  1239. ! parameter 6 1/2 for include encryption/decryption
  1240. ! parameter 7 1 for mov in1 to in3
  1241. ! parameter 8 1 for mov in3 to in4
  1242. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1243. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
  1244. call .des_enc ! ks2 in3
  1245. LDPTR KS1, in4
  1246. call .des_dec ! ks1 in4
  1247. nop
  1248. fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
  1249. ! in2 is bytes left to be stored
  1250. ! in2 is compared to 8 in the rounds
  1251. xor out5, in0, out4
  1252. bl .ede3.dec.seven.or.less
  1253. xor in5, in1, global4
  1254. load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
  1255. store_little_endian(local7, out4, global4, local3, .SLE7) ! block
  1256. STPTR local5, INPUT
  1257. addcc in2, -8, in2
  1258. add local7, 8, local7
  1259. bg .ede3.dec.next.block
  1260. STPTR local7, OUTPUT
  1261. .ede3.dec.store.iv:
  1262. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1263. store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
  1264. .ede3.dec.finish:
  1265. ret
  1266. restore
  1267. .ede3.dec.seven.or.less:
  1268. load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
  1269. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
  1270. .DES_ede3_cbc_encrypt.end:
  1271. .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
  1272. .align 256
  1273. .type .des_and,#object
  1274. .size .des_and,284
  1275. .des_and:
  1276. ! This table is used for AND 0xFC when it is known that register
  1277. ! bits 8-31 are zero. Makes it possible to do three arithmetic
  1278. ! operations in one cycle.
  1279. .byte 0, 0, 0, 0, 4, 4, 4, 4
  1280. .byte 8, 8, 8, 8, 12, 12, 12, 12
  1281. .byte 16, 16, 16, 16, 20, 20, 20, 20
  1282. .byte 24, 24, 24, 24, 28, 28, 28, 28
  1283. .byte 32, 32, 32, 32, 36, 36, 36, 36
  1284. .byte 40, 40, 40, 40, 44, 44, 44, 44
  1285. .byte 48, 48, 48, 48, 52, 52, 52, 52
  1286. .byte 56, 56, 56, 56, 60, 60, 60, 60
  1287. .byte 64, 64, 64, 64, 68, 68, 68, 68
  1288. .byte 72, 72, 72, 72, 76, 76, 76, 76
  1289. .byte 80, 80, 80, 80, 84, 84, 84, 84
  1290. .byte 88, 88, 88, 88, 92, 92, 92, 92
  1291. .byte 96, 96, 96, 96, 100, 100, 100, 100
  1292. .byte 104, 104, 104, 104, 108, 108, 108, 108
  1293. .byte 112, 112, 112, 112, 116, 116, 116, 116
  1294. .byte 120, 120, 120, 120, 124, 124, 124, 124
  1295. .byte 128, 128, 128, 128, 132, 132, 132, 132
  1296. .byte 136, 136, 136, 136, 140, 140, 140, 140
  1297. .byte 144, 144, 144, 144, 148, 148, 148, 148
  1298. .byte 152, 152, 152, 152, 156, 156, 156, 156
  1299. .byte 160, 160, 160, 160, 164, 164, 164, 164
  1300. .byte 168, 168, 168, 168, 172, 172, 172, 172
  1301. .byte 176, 176, 176, 176, 180, 180, 180, 180
  1302. .byte 184, 184, 184, 184, 188, 188, 188, 188
  1303. .byte 192, 192, 192, 192, 196, 196, 196, 196
  1304. .byte 200, 200, 200, 200, 204, 204, 204, 204
  1305. .byte 208, 208, 208, 208, 212, 212, 212, 212
  1306. .byte 216, 216, 216, 216, 220, 220, 220, 220
  1307. .byte 224, 224, 224, 224, 228, 228, 228, 228
  1308. .byte 232, 232, 232, 232, 236, 236, 236, 236
  1309. .byte 240, 240, 240, 240, 244, 244, 244, 244
  1310. .byte 248, 248, 248, 248, 252, 252, 252, 252
  1311. ! 5 numbers for initial/final permutation
  1312. .word 0x0f0f0f0f ! offset 256
  1313. .word 0x0000ffff ! 260
  1314. .word 0x33333333 ! 264
  1315. .word 0x00ff00ff ! 268
  1316. .word 0x55555555 ! 272
  1317. .word 0 ! 276
  1318. .word LOOPS ! 280
  1319. .word 0x0000FC00 ! 284
  1320. .global DES_SPtrans
  1321. .type DES_SPtrans,#object
  1322. .size DES_SPtrans,2048
  1323. .align 64
  1324. DES_SPtrans:
  1325. .PIC.DES_SPtrans:
  1326. ! nibble 0
  1327. .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
  1328. .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
  1329. .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
  1330. .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
  1331. .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
  1332. .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
  1333. .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
  1334. .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
  1335. .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
  1336. .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
  1337. .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
  1338. .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
  1339. .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
  1340. .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
  1341. .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
  1342. .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
  1343. ! nibble 1
  1344. .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
  1345. .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
  1346. .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
  1347. .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
  1348. .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
  1349. .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
  1350. .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
  1351. .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
  1352. .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
  1353. .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
  1354. .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
  1355. .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
  1356. .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
  1357. .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
  1358. .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
  1359. .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
  1360. ! nibble 2
  1361. .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
  1362. .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
  1363. .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
  1364. .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
  1365. .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
  1366. .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
  1367. .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
  1368. .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
  1369. .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
  1370. .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
  1371. .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
  1372. .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
  1373. .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
  1374. .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
  1375. .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
  1376. .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
  1377. ! nibble 3
  1378. .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
  1379. .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
  1380. .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
  1381. .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
  1382. .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
  1383. .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
  1384. .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
  1385. .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
  1386. .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
  1387. .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
  1388. .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
  1389. .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
  1390. .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
  1391. .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
  1392. .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
  1393. .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
  1394. ! nibble 4
  1395. .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
  1396. .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
  1397. .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
  1398. .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
  1399. .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
  1400. .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
  1401. .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
  1402. .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
  1403. .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
  1404. .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
  1405. .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
  1406. .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
  1407. .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
  1408. .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
  1409. .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
  1410. .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
  1411. ! nibble 5
  1412. .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
  1413. .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
  1414. .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
  1415. .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
  1416. .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
  1417. .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
  1418. .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
  1419. .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
  1420. .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
  1421. .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
  1422. .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
  1423. .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
  1424. .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
  1425. .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
  1426. .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
  1427. .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
  1428. ! nibble 6
  1429. .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
  1430. .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
  1431. .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
  1432. .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
  1433. .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
  1434. .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
  1435. .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
  1436. .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
  1437. .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
  1438. .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
  1439. .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
  1440. .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
  1441. .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
  1442. .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
  1443. .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
  1444. .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
  1445. ! nibble 7
  1446. .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
  1447. .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
  1448. .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
  1449. .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
  1450. .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
  1451. .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
  1452. .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
  1453. .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
  1454. .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
  1455. .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
  1456. .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
  1457. .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
  1458. .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
  1459. .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
  1460. .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
  1461. .word 0x20000000, 0x20800080, 0x00020000, 0x00820080