des_enc.m4 48 KB


  1. ! des_enc.m4
  2. ! des_enc.S (generated from des_enc.m4)
  3. !
  4. ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
  5. !
  6. ! Version 1.0. 32-bit version.
  7. !
  8. ! June 8, 2000.
  9. !
  10. ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
  11. ! by Andy Polyakov.
  12. !
  13. ! January 1, 2003.
  14. !
  15. ! Assembler version: Copyright Svend Olaf Mikkelsen.
  16. !
  17. ! Original C code: Copyright Eric A. Young.
  18. !
  19. ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
  20. !
  21. ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
  22. !
  23. ! This version can be redistributed.
  24. !
  25. ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
  26. !
  27. ! Global registers 1 to 5 are used. This is the same as done by the
  28. ! cc compiler. The UltraSPARC load/store little endian feature is used.
  29. !
  30. ! Instruction grouping often refers to one CPU cycle.
  31. !
  32. ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
  33. !
  34. ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
  35. !
  36. ! Performance improvement according to './apps/openssl speed des'
  37. !
  38. ! 32-bit build:
  39. ! 23% faster than cc-5.2 -xarch=v8plus -xO5
  40. ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
  41. ! 64-bit build:
  42. ! 50% faster than cc-5.2 -xarch=v9 -xO5
  43. ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
  44. !
  45. .ident "des_enc.m4 2.1"
  46. .file "des_enc-sparc.S"
  47. #include <openssl/opensslconf.h>
  48. #if defined(__SUNPRO_C) && defined(__sparcv9)
  49. # define ABI64 /* They've said -xarch=v9 at command line */
  50. #elif defined(__GNUC__) && defined(__arch64__)
  51. # define ABI64 /* They've said -m64 at command line */
  52. #endif
  53. #ifdef ABI64
  54. .register %g2,#scratch
  55. .register %g3,#scratch
  56. # define FRAME -192
  57. # define BIAS 2047
  58. # define LDPTR ldx
  59. # define STPTR stx
  60. # define ARG0 128
  61. # define ARGSZ 8
  62. # ifndef OPENSSL_SYSNAME_ULTRASPARC
  63. # define OPENSSL_SYSNAME_ULTRASPARC
  64. # endif
  65. #else
  66. # define FRAME -96
  67. # define BIAS 0
  68. # define LDPTR ld
  69. # define STPTR st
  70. # define ARG0 68
  71. # define ARGSZ 4
  72. #endif
  73. #define LOOPS 7
  74. #define global0 %g0
  75. #define global1 %g1
  76. #define global2 %g2
  77. #define global3 %g3
  78. #define global4 %g4
  79. #define global5 %g5
  80. #define local0 %l0
  81. #define local1 %l1
  82. #define local2 %l2
  83. #define local3 %l3
  84. #define local4 %l4
  85. #define local5 %l5
  86. #define local7 %l6
  87. #define local6 %l7
  88. #define in0 %i0
  89. #define in1 %i1
  90. #define in2 %i2
  91. #define in3 %i3
  92. #define in4 %i4
  93. #define in5 %i5
  94. #define in6 %i6
  95. #define in7 %i7
  96. #define out0 %o0
  97. #define out1 %o1
  98. #define out2 %o2
  99. #define out3 %o3
  100. #define out4 %o4
  101. #define out5 %o5
  102. #define out6 %o6
  103. #define out7 %o7
  104. #define stub stb
  105. changequote({,})
  106. ! Macro definitions:
  107. ! {ip_macro}
  108. !
  109. ! The logic used in initial and final permutations is the same as in
  110. ! the C code. The permutations are done with a clever shift, xor, and
  111. ! technique.
  112. !
  113. ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
  114. ! sbox 6 to local6, and addres sbox 8 to out3.
  115. !
  116. ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
  117. !
  118. ! Loads key first round from address in parameter 5 to out0, out1.
  119. !
  120. ! After the the original LibDES initial permutation, the resulting left
  121. ! is in the variable initially used for right and vice versa. The macro
  122. ! implements the possibility to keep the halfs in the original registers.
  123. !
  124. ! parameter 1 left
  125. ! parameter 2 right
  126. ! parameter 3 result left (modify in first round)
  127. ! parameter 4 result right (use in first round)
  128. ! parameter 5 key address
  129. ! parameter 6 1/2 for include encryption/decryption
  130. ! parameter 7 1 for move in1 to in3
  131. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  132. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  133. define(ip_macro, {
  134. ! {ip_macro}
  135. ! $1 $2 $4 $3 $5 $6 $7 $8 $9
  136. ld [out2+256], local1
  137. srl $2, 4, local4
  138. xor local4, $1, local4
  139. ifelse($7,1,{mov in1, in3},{nop})
  140. ld [out2+260], local2
  141. and local4, local1, local4
  142. ifelse($8,1,{mov in3, in4},{})
  143. ifelse($8,2,{mov in4, in3},{})
  144. ld [out2+280], out4 ! loop counter
  145. sll local4, 4, local1
  146. xor $1, local4, $1
  147. ld [out2+264], local3
  148. srl $1, 16, local4
  149. xor $2, local1, $2
  150. ifelse($9,1,{LDPTR KS3, in4},{})
  151. xor local4, $2, local4
  152. nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
  153. ifelse($9,1,{LDPTR KS2, in3},{})
  154. and local4, local2, local4
  155. nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
  156. sll local4, 16, local1
  157. xor $2, local4, $2
  158. srl $2, 2, local4
  159. xor $1, local1, $1
  160. sethi %hi(16711680), local5
  161. xor local4, $1, local4
  162. and local4, local3, local4
  163. or local5, 255, local5
  164. sll local4, 2, local2
  165. xor $1, local4, $1
  166. srl $1, 8, local4
  167. xor $2, local2, $2
  168. xor local4, $2, local4
  169. add global1, 768, global4
  170. and local4, local5, local4
  171. add global1, 1024, global5
  172. ld [out2+272], local7
  173. sll local4, 8, local1
  174. xor $2, local4, $2
  175. srl $2, 1, local4
  176. xor $1, local1, $1
  177. ld [$5], out0 ! key 7531
  178. xor local4, $1, local4
  179. add global1, 256, global2
  180. ld [$5+4], out1 ! key 8642
  181. and local4, local7, local4
  182. add global1, 512, global3
  183. sll local4, 1, local1
  184. xor $1, local4, $1
  185. sll $1, 3, local3
  186. xor $2, local1, $2
  187. sll $2, 3, local2
  188. add global1, 1280, local6 ! address sbox 8
  189. srl $1, 29, local4
  190. add global1, 1792, out3 ! address sbox 8
  191. srl $2, 29, local1
  192. or local4, local3, $4
  193. or local2, local1, $3
  194. ifelse($6, 1, {
  195. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  196. or local2, local1, $3
  197. xor $4, out0, local1
  198. call .des_enc.1
  199. and local1, 252, local1
  200. },{})
  201. ifelse($6, 2, {
  202. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  203. or local2, local1, $3
  204. xor $4, out0, local1
  205. call .des_dec.1
  206. and local1, 252, local1
  207. },{})
  208. })
  209. ! {rounds_macro}
  210. !
  211. ! The logic used in the DES rounds is the same as in the C code,
  212. ! except that calculations for sbox 1 and sbox 5 begin before
  213. ! the previous round is finished.
  214. !
  215. ! In each round one half (work) is modified based on key and the
  216. ! other half (use).
  217. !
  218. ! In this version we do two rounds in a loop repeated 7 times
  219. ! and two rounds seperately.
  220. !
  221. ! One half has the bits for the sboxes in the following positions:
  222. !
  223. ! 777777xx555555xx333333xx111111xx
  224. !
  225. ! 88xx666666xx444444xx222222xx8888
  226. !
  227. ! The bits for each sbox are xor-ed with the key bits for that box.
  228. ! The above xx bits are cleared, and the result used for lookup in
  229. ! the sbox table. Each sbox entry contains the 4 output bits permuted
  230. ! into 32 bits according to the P permutation.
  231. !
  232. ! In the description of DES, left and right are switched after
  233. ! each round, except after last round. In this code the original
  234. ! left and right are kept in the same register in all rounds, meaning
  235. ! that after the 16 rounds the result for right is in the register
  236. ! originally used for left.
  237. !
  238. ! parameter 1 first work (left in first round)
  239. ! parameter 2 first use (right in first round)
  240. ! parameter 3 enc/dec 1/-1
  241. ! parameter 4 loop label
  242. ! parameter 5 key address register
  243. ! parameter 6 optional address for key next encryption/decryption
  244. ! parameter 7 not empty for include retl
  245. !
  246. ! also compares in2 to 8
  247. define(rounds_macro, {
  248. ! {rounds_macro}
  249. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  250. xor $2, out0, local1
  251. ld [out2+284], local5 ! 0x0000FC00
  252. ba $4
  253. and local1, 252, local1
  254. .align 32
  255. $4:
  256. ! local6 is address sbox 6
  257. ! out3 is address sbox 8
  258. ! out4 is loop counter
  259. ld [global1+local1], local1
  260. xor $2, out1, out1 ! 8642
  261. xor $2, out0, out0 ! 7531
  262. ! fmovs %f0, %f0 ! fxor used for alignment
  263. srl out1, 4, local0 ! rotate 4 right
  264. and out0, local5, local3 ! 3
  265. ! fmovs %f0, %f0
  266. ld [$5+$3*8], local7 ! key 7531 next round
  267. srl local3, 8, local3 ! 3
  268. and local0, 252, local2 ! 2
  269. ! fmovs %f0, %f0
  270. ld [global3+local3],local3 ! 3
  271. sll out1, 28, out1 ! rotate
  272. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  273. ld [global2+local2], local2 ! 2
  274. srl out0, 24, local1 ! 7
  275. or out1, local0, out1 ! rotate
  276. ldub [out2+local1], local1 ! 7 (and 0xFC)
  277. srl out1, 24, local0 ! 8
  278. and out1, local5, local4 ! 4
  279. ldub [out2+local0], local0 ! 8 (and 0xFC)
  280. srl local4, 8, local4 ! 4
  281. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  282. ld [global4+local4],local4 ! 4
  283. srl out1, 16, local2 ! 6
  284. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  285. ld [out3+local0],local0 ! 8
  286. and local2, 252, local2 ! 6
  287. add global1, 1536, local5 ! address sbox 7
  288. ld [local6+local2], local2 ! 6
  289. srl out0, 16, local3 ! 5
  290. xor $1, local4, $1 ! 4 finished
  291. ld [local5+local1],local1 ! 7
  292. and local3, 252, local3 ! 5
  293. xor $1, local0, $1 ! 8 finished
  294. ld [global5+local3],local3 ! 5
  295. xor $1, local2, $1 ! 6 finished
  296. subcc out4, 1, out4
  297. ld [$5+$3*8+4], out0 ! key 8642 next round
  298. xor $1, local7, local2 ! sbox 5 next round
  299. xor $1, local1, $1 ! 7 finished
  300. srl local2, 16, local2 ! sbox 5 next round
  301. xor $1, local3, $1 ! 5 finished
  302. ld [$5+$3*16+4], out1 ! key 8642 next round again
  303. and local2, 252, local2 ! sbox5 next round
  304. ! next round
  305. xor $1, local7, local7 ! 7531
  306. ld [global5+local2], local2 ! 5
  307. srl local7, 24, local3 ! 7
  308. xor $1, out0, out0 ! 8642
  309. ldub [out2+local3], local3 ! 7 (and 0xFC)
  310. srl out0, 4, local0 ! rotate 4 right
  311. and local7, 252, local1 ! 1
  312. sll out0, 28, out0 ! rotate
  313. xor $2, local2, $2 ! 5 finished local2 used
  314. srl local0, 8, local4 ! 4
  315. and local0, 252, local2 ! 2
  316. ld [local5+local3], local3 ! 7
  317. srl local0, 16, local5 ! 6
  318. or out0, local0, out0 ! rotate
  319. ld [global2+local2], local2 ! 2
  320. srl out0, 24, local0
  321. ld [$5+$3*16], out0 ! key 7531 next round
  322. and local4, 252, local4 ! 4
  323. and local5, 252, local5 ! 6
  324. ld [global4+local4], local4 ! 4
  325. xor $2, local3, $2 ! 7 finished local3 used
  326. and local0, 252, local0 ! 8
  327. ld [local6+local5], local5 ! 6
  328. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  329. srl local7, 8, local2 ! 3 start
  330. ld [out3+local0], local0 ! 8
  331. xor $2, local4, $2 ! 4 finished
  332. and local2, 252, local2 ! 3
  333. ld [global1+local1], local1 ! 1
  334. xor $2, local5, $2 ! 6 finished local5 used
  335. ld [global3+local2], local2 ! 3
  336. xor $2, local0, $2 ! 8 finished
  337. add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
  338. ld [out2+284], local5 ! 0x0000FC00
  339. xor $2, out0, local4 ! sbox 1 next round
  340. xor $2, local1, $2 ! 1 finished
  341. xor $2, local2, $2 ! 3 finished
  342. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  343. bne,pt %icc, $4
  344. #else
  345. bne $4
  346. #endif
  347. and local4, 252, local1 ! sbox 1 next round
  348. ! two rounds more:
  349. ld [global1+local1], local1
  350. xor $2, out1, out1
  351. xor $2, out0, out0
  352. srl out1, 4, local0 ! rotate
  353. and out0, local5, local3
  354. ld [$5+$3*8], local7 ! key 7531
  355. srl local3, 8, local3
  356. and local0, 252, local2
  357. ld [global3+local3],local3
  358. sll out1, 28, out1 ! rotate
  359. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  360. ld [global2+local2], local2
  361. srl out0, 24, local1
  362. or out1, local0, out1 ! rotate
  363. ldub [out2+local1], local1
  364. srl out1, 24, local0
  365. and out1, local5, local4
  366. ldub [out2+local0], local0
  367. srl local4, 8, local4
  368. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  369. ld [global4+local4],local4
  370. srl out1, 16, local2
  371. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  372. ld [out3+local0],local0
  373. and local2, 252, local2
  374. add global1, 1536, local5 ! address sbox 7
  375. ld [local6+local2], local2
  376. srl out0, 16, local3
  377. xor $1, local4, $1 ! 4 finished
  378. ld [local5+local1],local1
  379. and local3, 252, local3
  380. xor $1, local0, $1
  381. ld [global5+local3],local3
  382. xor $1, local2, $1 ! 6 finished
  383. cmp in2, 8
  384. ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
  385. xor $1, local7, local2 ! sbox 5 next round
  386. xor $1, local1, $1 ! 7 finished
  387. ld [$5+$3*8+4], out0
  388. srl local2, 16, local2 ! sbox 5 next round
  389. xor $1, local3, $1 ! 5 finished
  390. and local2, 252, local2
  391. ! next round (two rounds more)
  392. xor $1, local7, local7 ! 7531
  393. ld [global5+local2], local2
  394. srl local7, 24, local3
  395. xor $1, out0, out0 ! 8642
  396. ldub [out2+local3], local3
  397. srl out0, 4, local0 ! rotate
  398. and local7, 252, local1
  399. sll out0, 28, out0 ! rotate
  400. xor $2, local2, $2 ! 5 finished local2 used
  401. srl local0, 8, local4
  402. and local0, 252, local2
  403. ld [local5+local3], local3
  404. srl local0, 16, local5
  405. or out0, local0, out0 ! rotate
  406. ld [global2+local2], local2
  407. srl out0, 24, local0
  408. ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
  409. and local4, 252, local4
  410. and local5, 252, local5
  411. ld [global4+local4], local4
  412. xor $2, local3, $2 ! 7 finished local3 used
  413. and local0, 252, local0
  414. ld [local6+local5], local5
  415. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  416. srl local7, 8, local2 ! 3 start
  417. ld [out3+local0], local0
  418. xor $2, local4, $2
  419. and local2, 252, local2
  420. ld [global1+local1], local1
  421. xor $2, local5, $2 ! 6 finished local5 used
  422. ld [global3+local2], local2
  423. srl $1, 3, local3
  424. xor $2, local0, $2
  425. ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
  426. sll $1, 29, local4
  427. xor $2, local1, $2
  428. ifelse($7,{}, {}, {retl})
  429. xor $2, local2, $2
  430. })
  431. ! {fp_macro}
  432. !
  433. ! parameter 1 right (original left)
  434. ! parameter 2 left (original right)
  435. ! parameter 3 1 for optional store to [in0]
  436. ! parameter 4 1 for load input/output address to local5/7
  437. !
  438. ! The final permutation logic switches the halfes, meaning that
  439. ! left and right ends up the the registers originally used.
  440. define(fp_macro, {
  441. ! {fp_macro}
  442. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  443. ! initially undo the rotate 3 left done after initial permutation
  444. ! original left is received shifted 3 right and 29 left in local3/4
  445. sll $2, 29, local1
  446. or local3, local4, $1
  447. srl $2, 3, $2
  448. sethi %hi(0x55555555), local2
  449. or $2, local1, $2
  450. or local2, %lo(0x55555555), local2
  451. srl $2, 1, local3
  452. sethi %hi(0x00ff00ff), local1
  453. xor local3, $1, local3
  454. or local1, %lo(0x00ff00ff), local1
  455. and local3, local2, local3
  456. sethi %hi(0x33333333), local4
  457. sll local3, 1, local2
  458. xor $1, local3, $1
  459. srl $1, 8, local3
  460. xor $2, local2, $2
  461. xor local3, $2, local3
  462. or local4, %lo(0x33333333), local4
  463. and local3, local1, local3
  464. sethi %hi(0x0000ffff), local1
  465. sll local3, 8, local2
  466. xor $2, local3, $2
  467. srl $2, 2, local3
  468. xor $1, local2, $1
  469. xor local3, $1, local3
  470. or local1, %lo(0x0000ffff), local1
  471. and local3, local4, local3
  472. sethi %hi(0x0f0f0f0f), local4
  473. sll local3, 2, local2
  474. ifelse($4,1, {LDPTR INPUT, local5})
  475. xor $1, local3, $1
  476. ifelse($4,1, {LDPTR OUTPUT, local7})
  477. srl $1, 16, local3
  478. xor $2, local2, $2
  479. xor local3, $2, local3
  480. or local4, %lo(0x0f0f0f0f), local4
  481. and local3, local1, local3
  482. sll local3, 16, local2
  483. xor $2, local3, local1
  484. srl local1, 4, local3
  485. xor $1, local2, $1
  486. xor local3, $1, local3
  487. and local3, local4, local3
  488. sll local3, 4, local2
  489. xor $1, local3, $1
  490. ! optional store:
  491. ifelse($3,1, {st $1, [in0]})
  492. xor local1, local2, $2
  493. ifelse($3,1, {st $2, [in0+4]})
  494. })
  495. ! {fp_ip_macro}
  496. !
  497. ! Does initial permutation for next block mixed with
  498. ! final permutation for current block.
  499. !
  500. ! parameter 1 original left
  501. ! parameter 2 original right
  502. ! parameter 3 left ip
  503. ! parameter 4 right ip
  504. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  505. ! 2: mov in4 to in3
  506. !
  507. ! also adds -8 to length in2 and loads loop counter to out4
  508. define(fp_ip_macro, {
  509. ! {fp_ip_macro}
  510. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  511. define({temp1},{out4})
  512. define({temp2},{local3})
  513. define({ip1},{local1})
  514. define({ip2},{local2})
  515. define({ip4},{local4})
  516. define({ip5},{local5})
  517. ! $1 in local3, local4
  518. ld [out2+256], ip1
  519. sll out5, 29, temp1
  520. or local3, local4, $1
  521. srl out5, 3, $2
  522. ifelse($5,2,{mov in4, in3})
  523. ld [out2+272], ip5
  524. srl $4, 4, local0
  525. or $2, temp1, $2
  526. srl $2, 1, temp1
  527. xor temp1, $1, temp1
  528. and temp1, ip5, temp1
  529. xor local0, $3, local0
  530. sll temp1, 1, temp2
  531. xor $1, temp1, $1
  532. and local0, ip1, local0
  533. add in2, -8, in2
  534. sll local0, 4, local7
  535. xor $3, local0, $3
  536. ld [out2+268], ip4
  537. srl $1, 8, temp1
  538. xor $2, temp2, $2
  539. ld [out2+260], ip2
  540. srl $3, 16, local0
  541. xor $4, local7, $4
  542. xor temp1, $2, temp1
  543. xor local0, $4, local0
  544. and temp1, ip4, temp1
  545. and local0, ip2, local0
  546. sll temp1, 8, temp2
  547. xor $2, temp1, $2
  548. sll local0, 16, local7
  549. xor $4, local0, $4
  550. srl $2, 2, temp1
  551. xor $1, temp2, $1
  552. ld [out2+264], temp2 ! ip3
  553. srl $4, 2, local0
  554. xor $3, local7, $3
  555. xor temp1, $1, temp1
  556. xor local0, $3, local0
  557. and temp1, temp2, temp1
  558. and local0, temp2, local0
  559. sll temp1, 2, temp2
  560. xor $1, temp1, $1
  561. sll local0, 2, local7
  562. xor $3, local0, $3
  563. srl $1, 16, temp1
  564. xor $2, temp2, $2
  565. srl $3, 8, local0
  566. xor $4, local7, $4
  567. xor temp1, $2, temp1
  568. xor local0, $4, local0
  569. and temp1, ip2, temp1
  570. and local0, ip4, local0
  571. sll temp1, 16, temp2
  572. xor $2, temp1, local4
  573. sll local0, 8, local7
  574. xor $4, local0, $4
  575. srl $4, 1, local0
  576. xor $3, local7, $3
  577. srl local4, 4, temp1
  578. xor local0, $3, local0
  579. xor $1, temp2, $1
  580. and local0, ip5, local0
  581. sll local0, 1, local7
  582. xor temp1, $1, temp1
  583. xor $3, local0, $3
  584. xor $4, local7, $4
  585. sll $3, 3, local5
  586. and temp1, ip1, temp1
  587. sll temp1, 4, temp2
  588. xor $1, temp1, $1
  589. ifelse($5,1,{LDPTR KS2, in4})
  590. sll $4, 3, local2
  591. xor local4, temp2, $2
  592. ! reload since used as temporar:
  593. ld [out2+280], out4 ! loop counter
  594. srl $3, 29, local0
  595. ifelse($5,1,{add in4, 120, in4})
  596. ifelse($5,1,{LDPTR KS1, in3})
  597. srl $4, 29, local7
  598. or local0, local5, $4
  599. or local2, local7, $3
  600. })
  601. ! {load_little_endian}
  602. !
  603. ! parameter 1 address
  604. ! parameter 2 destination left
  605. ! parameter 3 destination right
  606. ! parameter 4 temporar
  607. ! parameter 5 label
  608. define(load_little_endian, {
  609. ! {load_little_endian}
  610. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  611. ! first in memory to rightmost in register
  612. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  613. andcc $1, 3, global0
  614. bne,pn %icc, $5
  615. nop
  616. lda [$1] 0x88, $2
  617. add $1, 4, $4
  618. ba,pt %icc, $5a
  619. lda [$4] 0x88, $3
  620. #endif
  621. $5:
  622. ldub [$1+3], $2
  623. ldub [$1+2], $4
  624. sll $2, 8, $2
  625. or $2, $4, $2
  626. ldub [$1+1], $4
  627. sll $2, 8, $2
  628. or $2, $4, $2
  629. ldub [$1+0], $4
  630. sll $2, 8, $2
  631. or $2, $4, $2
  632. ldub [$1+3+4], $3
  633. ldub [$1+2+4], $4
  634. sll $3, 8, $3
  635. or $3, $4, $3
  636. ldub [$1+1+4], $4
  637. sll $3, 8, $3
  638. or $3, $4, $3
  639. ldub [$1+0+4], $4
  640. sll $3, 8, $3
  641. or $3, $4, $3
  642. $5a:
  643. })
  644. ! {load_little_endian_inc}
  645. !
  646. ! parameter 1 address
  647. ! parameter 2 destination left
  648. ! parameter 3 destination right
  649. ! parameter 4 temporar
  650. ! parameter 4 label
  651. !
  652. ! adds 8 to address
  653. define(load_little_endian_inc, {
  654. ! {load_little_endian_inc}
  655. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  656. ! first in memory to rightmost in register
  657. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  658. andcc $1, 3, global0
  659. bne,pn %icc, $5
  660. nop
  661. lda [$1] 0x88, $2
  662. add $1, 4, $1
  663. lda [$1] 0x88, $3
  664. ba,pt %icc, $5a
  665. add $1, 4, $1
  666. #endif
  667. $5:
  668. ldub [$1+3], $2
  669. ldub [$1+2], $4
  670. sll $2, 8, $2
  671. or $2, $4, $2
  672. ldub [$1+1], $4
  673. sll $2, 8, $2
  674. or $2, $4, $2
  675. ldub [$1+0], $4
  676. sll $2, 8, $2
  677. or $2, $4, $2
  678. ldub [$1+3+4], $3
  679. add $1, 8, $1
  680. ldub [$1+2+4-8], $4
  681. sll $3, 8, $3
  682. or $3, $4, $3
  683. ldub [$1+1+4-8], $4
  684. sll $3, 8, $3
  685. or $3, $4, $3
  686. ldub [$1+0+4-8], $4
  687. sll $3, 8, $3
  688. or $3, $4, $3
  689. $5a:
  690. })
  691. ! {load_n_bytes}
  692. !
  693. ! Loads 1 to 7 bytes little endian
  694. ! Remaining bytes are zeroed.
  695. !
  696. ! parameter 1 address
  697. ! parameter 2 length
  698. ! parameter 3 destination register left
  699. ! parameter 4 destination register right
  700. ! parameter 5 temp
  701. ! parameter 6 temp2
  702. ! parameter 7 label
  703. ! parameter 8 return label
  704. define(load_n_bytes, {
  705. ! {load_n_bytes}
  706. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  707. $7.0: call .+8
  708. sll $2, 2, $6
  709. add %o7,$7.jmp.table-$7.0,$5
  710. add $5, $6, $5
  711. mov 0, $4
  712. ld [$5], $5
  713. jmp %o7+$5
  714. mov 0, $3
  715. $7.7:
  716. ldub [$1+6], $5
  717. sll $5, 16, $5
  718. or $3, $5, $3
  719. $7.6:
  720. ldub [$1+5], $5
  721. sll $5, 8, $5
  722. or $3, $5, $3
  723. $7.5:
  724. ldub [$1+4], $5
  725. or $3, $5, $3
  726. $7.4:
  727. ldub [$1+3], $5
  728. sll $5, 24, $5
  729. or $4, $5, $4
  730. $7.3:
  731. ldub [$1+2], $5
  732. sll $5, 16, $5
  733. or $4, $5, $4
  734. $7.2:
  735. ldub [$1+1], $5
  736. sll $5, 8, $5
  737. or $4, $5, $4
  738. $7.1:
  739. ldub [$1+0], $5
  740. ba $8
  741. or $4, $5, $4
  742. .align 4
  743. $7.jmp.table:
  744. .word 0
  745. .word $7.1-$7.0
  746. .word $7.2-$7.0
  747. .word $7.3-$7.0
  748. .word $7.4-$7.0
  749. .word $7.5-$7.0
  750. .word $7.6-$7.0
  751. .word $7.7-$7.0
  752. })
  753. ! {store_little_endian}
  754. !
  755. ! parameter 1 address
  756. ! parameter 2 source left
  757. ! parameter 3 source right
  758. ! parameter 4 temporar
  759. define(store_little_endian, {
  760. ! {store_little_endian}
  761. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  762. ! rightmost in register to first in memory
  763. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  764. andcc $1, 3, global0
  765. bne,pn %icc, $5
  766. nop
  767. sta $2, [$1] 0x88
  768. add $1, 4, $4
  769. ba,pt %icc, $5a
  770. sta $3, [$4] 0x88
  771. #endif
  772. $5:
  773. and $2, 255, $4
  774. stub $4, [$1+0]
  775. srl $2, 8, $4
  776. and $4, 255, $4
  777. stub $4, [$1+1]
  778. srl $2, 16, $4
  779. and $4, 255, $4
  780. stub $4, [$1+2]
  781. srl $2, 24, $4
  782. stub $4, [$1+3]
  783. and $3, 255, $4
  784. stub $4, [$1+0+4]
  785. srl $3, 8, $4
  786. and $4, 255, $4
  787. stub $4, [$1+1+4]
  788. srl $3, 16, $4
  789. and $4, 255, $4
  790. stub $4, [$1+2+4]
  791. srl $3, 24, $4
  792. stub $4, [$1+3+4]
  793. $5a:
  794. })
  795. ! {store_n_bytes}
  796. !
  797. ! Stores 1 to 7 bytes little endian
  798. !
  799. ! parameter 1 address
  800. ! parameter 2 length
  801. ! parameter 3 source register left
  802. ! parameter 4 source register right
  803. ! parameter 5 temp
  804. ! parameter 6 temp2
  805. ! parameter 7 label
  806. ! parameter 8 return label
  807. define(store_n_bytes, {
  808. ! {store_n_bytes}
  809. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  810. $7.0: call .+8
  811. sll $2, 2, $6
  812. add %o7,$7.jmp.table-$7.0,$5
  813. add $5, $6, $5
  814. ld [$5], $5
  815. jmp %o7+$5
  816. nop
  817. $7.7:
  818. srl $3, 16, $5
  819. and $5, 0xff, $5
  820. stub $5, [$1+6]
  821. $7.6:
  822. srl $3, 8, $5
  823. and $5, 0xff, $5
  824. stub $5, [$1+5]
  825. $7.5:
  826. and $3, 0xff, $5
  827. stub $5, [$1+4]
  828. $7.4:
  829. srl $4, 24, $5
  830. stub $5, [$1+3]
  831. $7.3:
  832. srl $4, 16, $5
  833. and $5, 0xff, $5
  834. stub $5, [$1+2]
  835. $7.2:
  836. srl $4, 8, $5
  837. and $5, 0xff, $5
  838. stub $5, [$1+1]
  839. $7.1:
  840. and $4, 0xff, $5
  841. ba $8
  842. stub $5, [$1]
  843. .align 4
  844. $7.jmp.table:
  845. .word 0
  846. .word $7.1-$7.0
  847. .word $7.2-$7.0
  848. .word $7.3-$7.0
  849. .word $7.4-$7.0
  850. .word $7.5-$7.0
  851. .word $7.6-$7.0
  852. .word $7.7-$7.0
  853. })
  854. define(testvalue,{1})
  855. define(register_init, {
  856. ! For test purposes:
  857. sethi %hi(testvalue), local0
  858. or local0, %lo(testvalue), local0
  859. ifelse($1,{},{}, {mov local0, $1})
  860. ifelse($2,{},{}, {mov local0, $2})
  861. ifelse($3,{},{}, {mov local0, $3})
  862. ifelse($4,{},{}, {mov local0, $4})
  863. ifelse($5,{},{}, {mov local0, $5})
  864. ifelse($6,{},{}, {mov local0, $6})
  865. ifelse($7,{},{}, {mov local0, $7})
  866. ifelse($8,{},{}, {mov local0, $8})
  867. mov local0, local1
  868. mov local0, local2
  869. mov local0, local3
  870. mov local0, local4
  871. mov local0, local5
  872. mov local0, local7
  873. mov local0, local6
  874. mov local0, out0
  875. mov local0, out1
  876. mov local0, out2
  877. mov local0, out3
  878. mov local0, out4
  879. mov local0, out5
  880. mov local0, global1
  881. mov local0, global2
  882. mov local0, global3
  883. mov local0, global4
  884. mov local0, global5
  885. })
  886. .section ".text"
  887. .align 32
  888. .des_enc:
  889. ! key address in3
  890. ! loads key next encryption/decryption first round from [in4]
  891. rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
  892. .align 32
  893. .des_dec:
  894. ! implemented with out5 as first parameter to avoid
  895. ! register exchange in ede modes
  896. ! key address in4
  897. ! loads key next encryption/decryption first round from [in3]
  898. rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
  899. ! void DES_encrypt1(data, ks, enc)
  900. ! *******************************
  901. .align 32
  902. .global DES_encrypt1
  903. .type DES_encrypt1,#function
  904. DES_encrypt1:
  905. save %sp, FRAME, %sp
  906. sethi %hi(.PIC.DES_SPtrans-1f),global1
  907. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  908. 1: call .+8
  909. add %o7,global1,global1
  910. sub global1,.PIC.DES_SPtrans-.des_and,out2
  911. ld [in0], in5 ! left
  912. cmp in2, 0 ! enc
  913. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  914. be,pn %icc, .encrypt.dec ! enc/dec
  915. #else
  916. be .encrypt.dec
  917. #endif
  918. ld [in0+4], out5 ! right
  919. ! parameter 6 1/2 for include encryption/decryption
  920. ! parameter 7 1 for move in1 to in3
  921. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  922. ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
  923. rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
  924. fp_macro(in5, out5, 1) ! 1 for store to [in0]
  925. ret
  926. restore
  927. .encrypt.dec:
  928. add in1, 120, in3 ! use last subkey for first round
  929. ! parameter 6 1/2 for include encryption/decryption
  930. ! parameter 7 1 for move in1 to in3
  931. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  932. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
  933. fp_macro(out5, in5, 1) ! 1 for store to [in0]
  934. ret
  935. restore
  936. .DES_encrypt1.end:
  937. .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
  938. ! void DES_encrypt2(data, ks, enc)
  939. !*********************************
  940. ! encrypts/decrypts without initial/final permutation
  941. .align 32
  942. .global DES_encrypt2
  943. .type DES_encrypt2,#function
  944. DES_encrypt2:
  945. save %sp, FRAME, %sp
  946. sethi %hi(.PIC.DES_SPtrans-1f),global1
  947. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  948. 1: call .+8
  949. add %o7,global1,global1
  950. sub global1,.PIC.DES_SPtrans-.des_and,out2
  951. ! Set sbox address 1 to 6 and rotate halfs 3 left
  952. ! Errors caught by destest? Yes. Still? *NO*
  953. !sethi %hi(DES_SPtrans), global1 ! address sbox 1
  954. !or global1, %lo(DES_SPtrans), global1 ! sbox 1
  955. add global1, 256, global2 ! sbox 2
  956. add global1, 512, global3 ! sbox 3
  957. ld [in0], out5 ! right
  958. add global1, 768, global4 ! sbox 4
  959. add global1, 1024, global5 ! sbox 5
  960. ld [in0+4], in5 ! left
  961. add global1, 1280, local6 ! sbox 6
  962. add global1, 1792, out3 ! sbox 8
  963. ! rotate
  964. sll in5, 3, local5
  965. mov in1, in3 ! key address to in3
  966. sll out5, 3, local7
  967. srl in5, 29, in5
  968. srl out5, 29, out5
  969. add in5, local5, in5
  970. add out5, local7, out5
  971. cmp in2, 0
  972. ! we use our own stackframe
  973. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  974. be,pn %icc, .encrypt2.dec ! decryption
  975. #else
  976. be .encrypt2.dec
  977. #endif
  978. STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
  979. ld [in3], out0 ! key 7531 first round
  980. mov LOOPS, out4 ! loop counter
  981. ld [in3+4], out1 ! key 8642 first round
  982. sethi %hi(0x0000FC00), local5
  983. call .des_enc
  984. mov in3, in4
  985. ! rotate
  986. sll in5, 29, in0
  987. srl in5, 3, in5
  988. sll out5, 29, in1
  989. add in5, in0, in5
  990. srl out5, 3, out5
  991. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  992. add out5, in1, out5
  993. st in5, [in0]
  994. st out5, [in0+4]
  995. ret
  996. restore
  997. .encrypt2.dec:
  998. add in3, 120, in4
  999. ld [in4], out0 ! key 7531 first round
  1000. mov LOOPS, out4 ! loop counter
  1001. ld [in4+4], out1 ! key 8642 first round
  1002. sethi %hi(0x0000FC00), local5
  1003. mov in5, local1 ! left expected in out5
  1004. mov out5, in5
  1005. call .des_dec
  1006. mov local1, out5
  1007. .encrypt2.finish:
  1008. ! rotate
  1009. sll in5, 29, in0
  1010. srl in5, 3, in5
  1011. sll out5, 29, in1
  1012. add in5, in0, in5
  1013. srl out5, 3, out5
  1014. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  1015. add out5, in1, out5
  1016. st out5, [in0]
  1017. st in5, [in0+4]
  1018. ret
  1019. restore
  1020. .DES_encrypt2.end:
  1021. .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
  1022. ! void DES_encrypt3(data, ks1, ks2, ks3)
  1023. ! **************************************
  1024. .align 32
  1025. .global DES_encrypt3
  1026. .type DES_encrypt3,#function
  1027. DES_encrypt3:
  1028. save %sp, FRAME, %sp
  1029. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1030. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1031. 1: call .+8
  1032. add %o7,global1,global1
  1033. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1034. ld [in0], in5 ! left
  1035. add in2, 120, in4 ! ks2
  1036. ld [in0+4], out5 ! right
  1037. mov in3, in2 ! save ks3
  1038. ! parameter 6 1/2 for include encryption/decryption
  1039. ! parameter 7 1 for mov in1 to in3
  1040. ! parameter 8 1 for mov in3 to in4
  1041. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1042. ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
  1043. call .des_dec
  1044. mov in2, in3 ! preload ks3
  1045. call .des_enc
  1046. nop
  1047. fp_macro(in5, out5, 1)
  1048. ret
  1049. restore
  1050. .DES_encrypt3.end:
  1051. .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
  1052. ! void DES_decrypt3(data, ks1, ks2, ks3)
  1053. ! **************************************
  1054. .align 32
  1055. .global DES_decrypt3
  1056. .type DES_decrypt3,#function
  1057. DES_decrypt3:
  1058. save %sp, FRAME, %sp
  1059. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1060. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1061. 1: call .+8
  1062. add %o7,global1,global1
  1063. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1064. ld [in0], in5 ! left
  1065. add in3, 120, in4 ! ks3
  1066. ld [in0+4], out5 ! right
  1067. mov in2, in3 ! ks2
  1068. ! parameter 6 1/2 for include encryption/decryption
  1069. ! parameter 7 1 for mov in1 to in3
  1070. ! parameter 8 1 for mov in3 to in4
  1071. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1072. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
  1073. call .des_enc
  1074. add in1, 120, in4 ! preload ks1
  1075. call .des_dec
  1076. nop
  1077. fp_macro(out5, in5, 1)
  1078. ret
  1079. restore
  1080. .DES_decrypt3.end:
  1081. .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
  1082. ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
  1083. ! *****************************************************************
  1084. .align 32
  1085. .global DES_ncbc_encrypt
  1086. .type DES_ncbc_encrypt,#function
  1087. DES_ncbc_encrypt:
  1088. save %sp, FRAME, %sp
  1089. define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
  1090. define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
  1091. define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1092. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1093. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1094. 1: call .+8
  1095. add %o7,global1,global1
  1096. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1097. cmp in5, 0 ! enc
  1098. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1099. be,pn %icc, .ncbc.dec
  1100. #else
  1101. be .ncbc.dec
  1102. #endif
  1103. STPTR in4, IVEC
  1104. ! addr left right temp label
  1105. load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
  1106. addcc in2, -8, in2 ! bytes missing when first block done
  1107. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1108. bl,pn %icc, .ncbc.enc.seven.or.less
  1109. #else
  1110. bl .ncbc.enc.seven.or.less
  1111. #endif
  1112. mov in3, in4 ! schedule
  1113. .ncbc.enc.next.block:
  1114. load_little_endian(in0, out4, global4, local3, .LLE2) ! block
  1115. .ncbc.enc.next.block_1:
  1116. xor in5, out4, in5 ! iv xor
  1117. xor out5, global4, out5 ! iv xor
  1118. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  1119. ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
  1120. .ncbc.enc.next.block_2:
  1121. !// call .des_enc ! compares in2 to 8
  1122. ! rounds inlined for alignment purposes
  1123. add global1, 768, global4 ! address sbox 4 since register used below
  1124. rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
  1125. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1126. bl,pn %icc, .ncbc.enc.next.block_fp
  1127. #else
  1128. bl .ncbc.enc.next.block_fp
  1129. #endif
  1130. add in0, 8, in0 ! input address
  1131. ! If 8 or more bytes are to be encrypted after this block,
  1132. ! we combine final permutation for this block with initial
  1133. ! permutation for next block. Load next block:
  1134. load_little_endian(in0, global3, global4, local5, .LLE12)
  1135. ! parameter 1 original left
  1136. ! parameter 2 original right
  1137. ! parameter 3 left ip
  1138. ! parameter 4 right ip
  1139. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1140. ! 2: mov in4 to in3
  1141. !
  1142. ! also adds -8 to length in2 and loads loop counter to out4
  1143. fp_ip_macro(out0, out1, global3, global4, 2)
  1144. store_little_endian(in1, out0, out1, local3, .SLE10) ! block
  1145. ld [in3], out0 ! key 7531 first round next block
  1146. mov in5, local1
  1147. xor global3, out5, in5 ! iv xor next block
  1148. ld [in3+4], out1 ! key 8642
  1149. add global1, 512, global3 ! address sbox 3 since register used
  1150. xor global4, local1, out5 ! iv xor next block
  1151. ba .ncbc.enc.next.block_2
  1152. add in1, 8, in1 ! output adress
  1153. .ncbc.enc.next.block_fp:
  1154. fp_macro(in5, out5)
  1155. store_little_endian(in1, in5, out5, local3, .SLE1) ! block
  1156. addcc in2, -8, in2 ! bytes missing when next block done
  1157. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1158. bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
  1159. #else
  1160. bpos .ncbc.enc.next.block
  1161. #endif
  1162. add in1, 8, in1
  1163. .ncbc.enc.seven.or.less:
  1164. cmp in2, -8
  1165. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1166. ble,pt %icc, .ncbc.enc.finish
  1167. #else
  1168. ble .ncbc.enc.finish
  1169. #endif
  1170. nop
  1171. add in2, 8, local1 ! bytes to load
  1172. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1173. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
  1174. ! Loads 1 to 7 bytes little endian to global4, out4
  1175. .ncbc.enc.finish:
  1176. LDPTR IVEC, local4
  1177. store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
  1178. ret
  1179. restore
  1180. .ncbc.dec:
  1181. STPTR in0, INPUT
  1182. cmp in2, 0 ! length
  1183. add in3, 120, in3
  1184. LDPTR IVEC, local7 ! ivec
  1185. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1186. ble,pn %icc, .ncbc.dec.finish
  1187. #else
  1188. ble .ncbc.dec.finish
  1189. #endif
  1190. mov in3, in4 ! schedule
  1191. STPTR in1, OUTPUT
  1192. mov in0, local5 ! input
  1193. load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
  1194. .ncbc.dec.next.block:
  1195. load_little_endian(local5, in5, out5, local3, .LLE4) ! block
  1196. ! parameter 6 1/2 for include encryption/decryption
  1197. ! parameter 7 1 for mov in1 to in3
  1198. ! parameter 8 1 for mov in3 to in4
  1199. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
  1200. fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
  1201. ! in2 is bytes left to be stored
  1202. ! in2 is compared to 8 in the rounds
  1203. xor out5, in0, out4 ! iv xor
  1204. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1205. bl,pn %icc, .ncbc.dec.seven.or.less
  1206. #else
  1207. bl .ncbc.dec.seven.or.less
  1208. #endif
  1209. xor in5, in1, global4 ! iv xor
  1210. ! Load ivec next block now, since input and output address might be the same.
  1211. load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
  1212. store_little_endian(local7, out4, global4, local3, .SLE3)
  1213. STPTR local5, INPUT
  1214. add local7, 8, local7
  1215. addcc in2, -8, in2
  1216. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1217. bg,pt %icc, .ncbc.dec.next.block
  1218. #else
  1219. bg .ncbc.dec.next.block
  1220. #endif
  1221. STPTR local7, OUTPUT
  1222. .ncbc.dec.store.iv:
  1223. LDPTR IVEC, local4 ! ivec
  1224. store_little_endian(local4, in0, in1, local5, .SLE4)
  1225. .ncbc.dec.finish:
  1226. ret
  1227. restore
  1228. .ncbc.dec.seven.or.less:
  1229. load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
  1230. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
  1231. .DES_ncbc_encrypt.end:
  1232. .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
  1233. ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
  1234. ! **************************************************************************
  1235. .align 32
  1236. .global DES_ede3_cbc_encrypt
  1237. .type DES_ede3_cbc_encrypt,#function
  1238. DES_ede3_cbc_encrypt:
  1239. save %sp, FRAME, %sp
  1240. define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
  1241. define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1242. define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
  1243. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1244. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1245. 1: call .+8
  1246. add %o7,global1,global1
  1247. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1248. LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
  1249. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1250. cmp local3, 0 ! enc
  1251. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1252. be,pn %icc, .ede3.dec
  1253. #else
  1254. be .ede3.dec
  1255. #endif
  1256. STPTR in4, KS2
  1257. STPTR in5, KS3
  1258. load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
  1259. addcc in2, -8, in2 ! bytes missing after next block
  1260. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1261. bl,pn %icc, .ede3.enc.seven.or.less
  1262. #else
  1263. bl .ede3.enc.seven.or.less
  1264. #endif
  1265. STPTR in3, KS1
  1266. .ede3.enc.next.block:
  1267. load_little_endian(in0, out4, global4, local3, .LLE7)
  1268. .ede3.enc.next.block_1:
  1269. LDPTR KS2, in4
  1270. xor in5, out4, in5 ! iv xor
  1271. xor out5, global4, out5 ! iv xor
  1272. LDPTR KS1, in3
  1273. add in4, 120, in4 ! for decryption we use last subkey first
  1274. nop
  1275. ip_macro(in5, out5, in5, out5, in3)
  1276. .ede3.enc.next.block_2:
  1277. call .des_enc ! ks1 in3
  1278. nop
  1279. call .des_dec ! ks2 in4
  1280. LDPTR KS3, in3
  1281. call .des_enc ! ks3 in3 compares in2 to 8
  1282. nop
  1283. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1284. bl,pn %icc, .ede3.enc.next.block_fp
  1285. #else
  1286. bl .ede3.enc.next.block_fp
  1287. #endif
  1288. add in0, 8, in0
  1289. ! If 8 or more bytes are to be encrypted after this block,
  1290. ! we combine final permutation for this block with initial
  1291. ! permutation for next block. Load next block:
  1292. load_little_endian(in0, global3, global4, local5, .LLE11)
  1293. ! parameter 1 original left
  1294. ! parameter 2 original right
  1295. ! parameter 3 left ip
  1296. ! parameter 4 right ip
  1297. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1298. ! 2: mov in4 to in3
  1299. !
  1300. ! also adds -8 to length in2 and loads loop counter to out4
  1301. fp_ip_macro(out0, out1, global3, global4, 1)
  1302. store_little_endian(in1, out0, out1, local3, .SLE9) ! block
  1303. mov in5, local1
  1304. xor global3, out5, in5 ! iv xor next block
  1305. ld [in3], out0 ! key 7531
  1306. add global1, 512, global3 ! address sbox 3
  1307. xor global4, local1, out5 ! iv xor next block
  1308. ld [in3+4], out1 ! key 8642
  1309. add global1, 768, global4 ! address sbox 4
  1310. ba .ede3.enc.next.block_2
  1311. add in1, 8, in1
  1312. .ede3.enc.next.block_fp:
  1313. fp_macro(in5, out5)
  1314. store_little_endian(in1, in5, out5, local3, .SLE5) ! block
  1315. addcc in2, -8, in2 ! bytes missing when next block done
  1316. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1317. bpos,pt %icc, .ede3.enc.next.block
  1318. #else
  1319. bpos .ede3.enc.next.block
  1320. #endif
  1321. add in1, 8, in1
  1322. .ede3.enc.seven.or.less:
  1323. cmp in2, -8
  1324. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1325. ble,pt %icc, .ede3.enc.finish
  1326. #else
  1327. ble .ede3.enc.finish
  1328. #endif
  1329. nop
  1330. add in2, 8, local1 ! bytes to load
  1331. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1332. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
  1333. .ede3.enc.finish:
  1334. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1335. store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
  1336. ret
  1337. restore
  1338. .ede3.dec:
  1339. STPTR in0, INPUT
  1340. add in5, 120, in5
  1341. STPTR in1, OUTPUT
  1342. mov in0, local5
  1343. add in3, 120, in3
  1344. STPTR in3, KS1
  1345. cmp in2, 0
  1346. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1347. ble %icc, .ede3.dec.finish
  1348. #else
  1349. ble .ede3.dec.finish
  1350. #endif
  1351. STPTR in5, KS3
  1352. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
  1353. load_little_endian(local7, in0, in1, local3, .LLE8)
  1354. .ede3.dec.next.block:
  1355. load_little_endian(local5, in5, out5, local3, .LLE9)
  1356. ! parameter 6 1/2 for include encryption/decryption
  1357. ! parameter 7 1 for mov in1 to in3
  1358. ! parameter 8 1 for mov in3 to in4
  1359. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1360. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
  1361. call .des_enc ! ks2 in3
  1362. LDPTR KS1, in4
  1363. call .des_dec ! ks1 in4
  1364. nop
  1365. fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
  1366. ! in2 is bytes left to be stored
  1367. ! in2 is compared to 8 in the rounds
  1368. xor out5, in0, out4
  1369. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1370. bl,pn %icc, .ede3.dec.seven.or.less
  1371. #else
  1372. bl .ede3.dec.seven.or.less
  1373. #endif
  1374. xor in5, in1, global4
  1375. load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
  1376. store_little_endian(local7, out4, global4, local3, .SLE7) ! block
  1377. STPTR local5, INPUT
  1378. addcc in2, -8, in2
  1379. add local7, 8, local7
  1380. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1381. bg,pt %icc, .ede3.dec.next.block
  1382. #else
  1383. bg .ede3.dec.next.block
  1384. #endif
  1385. STPTR local7, OUTPUT
  1386. .ede3.dec.store.iv:
  1387. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1388. store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
  1389. .ede3.dec.finish:
  1390. ret
  1391. restore
  1392. .ede3.dec.seven.or.less:
  1393. load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
  1394. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
  1395. .DES_ede3_cbc_encrypt.end:
  1396. .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
  1397. .align 256
  1398. .type .des_and,#object
  1399. .size .des_and,284
  1400. .des_and:
  1401. ! This table is used for AND 0xFC when it is known that register
  1402. ! bits 8-31 are zero. Makes it possible to do three arithmetic
  1403. ! operations in one cycle.
  1404. .byte 0, 0, 0, 0, 4, 4, 4, 4
  1405. .byte 8, 8, 8, 8, 12, 12, 12, 12
  1406. .byte 16, 16, 16, 16, 20, 20, 20, 20
  1407. .byte 24, 24, 24, 24, 28, 28, 28, 28
  1408. .byte 32, 32, 32, 32, 36, 36, 36, 36
  1409. .byte 40, 40, 40, 40, 44, 44, 44, 44
  1410. .byte 48, 48, 48, 48, 52, 52, 52, 52
  1411. .byte 56, 56, 56, 56, 60, 60, 60, 60
  1412. .byte 64, 64, 64, 64, 68, 68, 68, 68
  1413. .byte 72, 72, 72, 72, 76, 76, 76, 76
  1414. .byte 80, 80, 80, 80, 84, 84, 84, 84
  1415. .byte 88, 88, 88, 88, 92, 92, 92, 92
  1416. .byte 96, 96, 96, 96, 100, 100, 100, 100
  1417. .byte 104, 104, 104, 104, 108, 108, 108, 108
  1418. .byte 112, 112, 112, 112, 116, 116, 116, 116
  1419. .byte 120, 120, 120, 120, 124, 124, 124, 124
  1420. .byte 128, 128, 128, 128, 132, 132, 132, 132
  1421. .byte 136, 136, 136, 136, 140, 140, 140, 140
  1422. .byte 144, 144, 144, 144, 148, 148, 148, 148
  1423. .byte 152, 152, 152, 152, 156, 156, 156, 156
  1424. .byte 160, 160, 160, 160, 164, 164, 164, 164
  1425. .byte 168, 168, 168, 168, 172, 172, 172, 172
  1426. .byte 176, 176, 176, 176, 180, 180, 180, 180
  1427. .byte 184, 184, 184, 184, 188, 188, 188, 188
  1428. .byte 192, 192, 192, 192, 196, 196, 196, 196
  1429. .byte 200, 200, 200, 200, 204, 204, 204, 204
  1430. .byte 208, 208, 208, 208, 212, 212, 212, 212
  1431. .byte 216, 216, 216, 216, 220, 220, 220, 220
  1432. .byte 224, 224, 224, 224, 228, 228, 228, 228
  1433. .byte 232, 232, 232, 232, 236, 236, 236, 236
  1434. .byte 240, 240, 240, 240, 244, 244, 244, 244
  1435. .byte 248, 248, 248, 248, 252, 252, 252, 252
  1436. ! 5 numbers for initil/final permutation
  1437. .word 0x0f0f0f0f ! offset 256
  1438. .word 0x0000ffff ! 260
  1439. .word 0x33333333 ! 264
  1440. .word 0x00ff00ff ! 268
  1441. .word 0x55555555 ! 272
  1442. .word 0 ! 276
  1443. .word LOOPS ! 280
  1444. .word 0x0000FC00 ! 284
  1445. .global DES_SPtrans
  1446. .type DES_SPtrans,#object
  1447. .size DES_SPtrans,2048
  1448. .align 64
  1449. DES_SPtrans:
  1450. .PIC.DES_SPtrans:
  1451. ! nibble 0
  1452. .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
  1453. .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
  1454. .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
  1455. .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
  1456. .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
  1457. .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
  1458. .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
  1459. .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
  1460. .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
  1461. .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
  1462. .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
  1463. .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
  1464. .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
  1465. .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
  1466. .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
  1467. .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
  1468. ! nibble 1
  1469. .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
  1470. .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
  1471. .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
  1472. .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
  1473. .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
  1474. .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
  1475. .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
  1476. .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
  1477. .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
  1478. .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
  1479. .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
  1480. .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
  1481. .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
  1482. .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
  1483. .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
  1484. .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
  1485. ! nibble 2
  1486. .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
  1487. .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
  1488. .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
  1489. .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
  1490. .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
  1491. .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
  1492. .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
  1493. .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
  1494. .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
  1495. .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
  1496. .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
  1497. .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
  1498. .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
  1499. .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
  1500. .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
  1501. .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
  1502. ! nibble 3
  1503. .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
  1504. .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
  1505. .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
  1506. .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
  1507. .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
  1508. .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
  1509. .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
  1510. .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
  1511. .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
  1512. .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
  1513. .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
  1514. .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
  1515. .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
  1516. .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
  1517. .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
  1518. .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
  1519. ! nibble 4
  1520. .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
  1521. .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
  1522. .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
  1523. .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
  1524. .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
  1525. .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
  1526. .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
  1527. .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
  1528. .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
  1529. .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
  1530. .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
  1531. .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
  1532. .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
  1533. .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
  1534. .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
  1535. .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
  1536. ! nibble 5
  1537. .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
  1538. .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
  1539. .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
  1540. .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
  1541. .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
  1542. .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
  1543. .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
  1544. .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
  1545. .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
  1546. .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
  1547. .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
  1548. .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
  1549. .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
  1550. .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
  1551. .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
  1552. .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
  1553. ! nibble 6
  1554. .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
  1555. .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
  1556. .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
  1557. .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
  1558. .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
  1559. .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
  1560. .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
  1561. .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
  1562. .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
  1563. .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
  1564. .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
  1565. .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
  1566. .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
  1567. .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
  1568. .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
  1569. .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
  1570. ! nibble 7
  1571. .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
  1572. .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
  1573. .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
  1574. .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
  1575. .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
  1576. .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
  1577. .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
  1578. .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
  1579. .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
  1580. .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
  1581. .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
  1582. .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
  1583. .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
  1584. .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
  1585. .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
  1586. .word 0x20000000, 0x20800080, 0x00020000, 0x00820080