aes-gcm-ppc.pl 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438
  1. #! /usr/bin/env perl
  2. # Copyright 2014-2022 The OpenSSL Project Authors. All Rights Reserved.
  3. # Copyright 2021- IBM Inc. All rights reserved
  4. #
  5. # Licensed under the Apache License 2.0 (the "License"). You may not use
  6. # this file except in compliance with the License. You can obtain a copy
  7. # in the file LICENSE in the source distribution or at
  8. # https://www.openssl.org/source/license.html
  9. #
  10. #===================================================================================
  11. # Written by Danny Tsen <dtsen@us.ibm.com> for OpenSSL Project,
  12. #
  13. # GHASH is based on the Karatsuba multiplication method.
  14. #
  15. # Xi xor X1
  16. #
  17. # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
  18. # (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
  19. # (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
  20. # (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
  21. # (X4.h * H.h + X4.l * H.l + X4 * H)
  22. #
  23. # Xi = v0
  24. # H Poly = v2
  25. # Hash keys = v3 - v14
  26. # ( H.l, H, H.h)
  27. # ( H^2.l, H^2, H^2.h)
  28. # ( H^3.l, H^3, H^3.h)
  29. # ( H^4.l, H^4, H^4.h)
  30. #
  31. # v30 is IV
  32. # v31 - counter 1
  33. #
  34. # AES used,
  35. # vs0 - vs14 for round keys
  36. # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
  37. #
  38. # This implementation uses stitched AES-GCM approach to improve overall performance.
  39. # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
  40. #
  41. # Current large block (16384 bytes) performance per second with 128 bit key --
  42. #
  43. # Encrypt Decrypt
  44. # Power10[le] (3.5GHz) 5.32G 5.26G
  45. #
  46. # ===================================================================================
  47. #
  48. # $output is the last argument if it looks like a file (it has an extension)
  49. # $flavour is the first argument if it doesn't look like a file
  50. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  51. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  52. if ($flavour =~ /64/) {
  53. $SIZE_T=8;
  54. $LRSAVE=2*$SIZE_T;
  55. $STU="stdu";
  56. $POP="ld";
  57. $PUSH="std";
  58. $UCMP="cmpld";
  59. $SHRI="srdi";
  60. } elsif ($flavour =~ /32/) {
  61. $SIZE_T=4;
  62. $LRSAVE=$SIZE_T;
  63. $STU="stwu";
  64. $POP="lwz";
  65. $PUSH="stw";
  66. $UCMP="cmplw";
  67. $SHRI="srwi";
  68. } else { die "nonsense $flavour"; }
  69. $sp="r1";
  70. $FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
  71. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  72. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  73. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  74. die "can't locate ppc-xlate.pl";
  75. open STDOUT,"| $^X $xlate $flavour \"$output\""
  76. or die "can't call $xlate: $!";
  77. $code=<<___;
  78. .machine "any"
  79. .text
  80. # 4x loops
  81. # v15 - v18 - input states
  82. # vs1 - vs9 - round keys
  83. #
  84. .macro Loop_aes_middle4x
  85. xxlor 19+32, 1, 1
  86. xxlor 20+32, 2, 2
  87. xxlor 21+32, 3, 3
  88. xxlor 22+32, 4, 4
  89. vcipher 15, 15, 19
  90. vcipher 16, 16, 19
  91. vcipher 17, 17, 19
  92. vcipher 18, 18, 19
  93. vcipher 15, 15, 20
  94. vcipher 16, 16, 20
  95. vcipher 17, 17, 20
  96. vcipher 18, 18, 20
  97. vcipher 15, 15, 21
  98. vcipher 16, 16, 21
  99. vcipher 17, 17, 21
  100. vcipher 18, 18, 21
  101. vcipher 15, 15, 22
  102. vcipher 16, 16, 22
  103. vcipher 17, 17, 22
  104. vcipher 18, 18, 22
  105. xxlor 19+32, 5, 5
  106. xxlor 20+32, 6, 6
  107. xxlor 21+32, 7, 7
  108. xxlor 22+32, 8, 8
  109. vcipher 15, 15, 19
  110. vcipher 16, 16, 19
  111. vcipher 17, 17, 19
  112. vcipher 18, 18, 19
  113. vcipher 15, 15, 20
  114. vcipher 16, 16, 20
  115. vcipher 17, 17, 20
  116. vcipher 18, 18, 20
  117. vcipher 15, 15, 21
  118. vcipher 16, 16, 21
  119. vcipher 17, 17, 21
  120. vcipher 18, 18, 21
  121. vcipher 15, 15, 22
  122. vcipher 16, 16, 22
  123. vcipher 17, 17, 22
  124. vcipher 18, 18, 22
  125. xxlor 23+32, 9, 9
  126. vcipher 15, 15, 23
  127. vcipher 16, 16, 23
  128. vcipher 17, 17, 23
  129. vcipher 18, 18, 23
  130. .endm
  131. # 8x loops
  132. # v15 - v22 - input states
  133. # vs1 - vs9 - round keys
  134. #
  135. .macro Loop_aes_middle8x
  136. xxlor 23+32, 1, 1
  137. xxlor 24+32, 2, 2
  138. xxlor 25+32, 3, 3
  139. xxlor 26+32, 4, 4
  140. vcipher 15, 15, 23
  141. vcipher 16, 16, 23
  142. vcipher 17, 17, 23
  143. vcipher 18, 18, 23
  144. vcipher 19, 19, 23
  145. vcipher 20, 20, 23
  146. vcipher 21, 21, 23
  147. vcipher 22, 22, 23
  148. vcipher 15, 15, 24
  149. vcipher 16, 16, 24
  150. vcipher 17, 17, 24
  151. vcipher 18, 18, 24
  152. vcipher 19, 19, 24
  153. vcipher 20, 20, 24
  154. vcipher 21, 21, 24
  155. vcipher 22, 22, 24
  156. vcipher 15, 15, 25
  157. vcipher 16, 16, 25
  158. vcipher 17, 17, 25
  159. vcipher 18, 18, 25
  160. vcipher 19, 19, 25
  161. vcipher 20, 20, 25
  162. vcipher 21, 21, 25
  163. vcipher 22, 22, 25
  164. vcipher 15, 15, 26
  165. vcipher 16, 16, 26
  166. vcipher 17, 17, 26
  167. vcipher 18, 18, 26
  168. vcipher 19, 19, 26
  169. vcipher 20, 20, 26
  170. vcipher 21, 21, 26
  171. vcipher 22, 22, 26
  172. xxlor 23+32, 5, 5
  173. xxlor 24+32, 6, 6
  174. xxlor 25+32, 7, 7
  175. xxlor 26+32, 8, 8
  176. vcipher 15, 15, 23
  177. vcipher 16, 16, 23
  178. vcipher 17, 17, 23
  179. vcipher 18, 18, 23
  180. vcipher 19, 19, 23
  181. vcipher 20, 20, 23
  182. vcipher 21, 21, 23
  183. vcipher 22, 22, 23
  184. vcipher 15, 15, 24
  185. vcipher 16, 16, 24
  186. vcipher 17, 17, 24
  187. vcipher 18, 18, 24
  188. vcipher 19, 19, 24
  189. vcipher 20, 20, 24
  190. vcipher 21, 21, 24
  191. vcipher 22, 22, 24
  192. vcipher 15, 15, 25
  193. vcipher 16, 16, 25
  194. vcipher 17, 17, 25
  195. vcipher 18, 18, 25
  196. vcipher 19, 19, 25
  197. vcipher 20, 20, 25
  198. vcipher 21, 21, 25
  199. vcipher 22, 22, 25
  200. vcipher 15, 15, 26
  201. vcipher 16, 16, 26
  202. vcipher 17, 17, 26
  203. vcipher 18, 18, 26
  204. vcipher 19, 19, 26
  205. vcipher 20, 20, 26
  206. vcipher 21, 21, 26
  207. vcipher 22, 22, 26
  208. xxlor 23+32, 9, 9
  209. vcipher 15, 15, 23
  210. vcipher 16, 16, 23
  211. vcipher 17, 17, 23
  212. vcipher 18, 18, 23
  213. vcipher 19, 19, 23
  214. vcipher 20, 20, 23
  215. vcipher 21, 21, 23
  216. vcipher 22, 22, 23
  217. .endm
  218. #
  219. # Compute 4x hash values based on Karatsuba method.
  220. #
  221. ppc_aes_gcm_ghash:
  222. vxor 15, 15, 0
  223. xxlxor 29, 29, 29
  224. vpmsumd 23, 12, 15 # H4.L * X.L
  225. vpmsumd 24, 9, 16
  226. vpmsumd 25, 6, 17
  227. vpmsumd 26, 3, 18
  228. vxor 23, 23, 24
  229. vxor 23, 23, 25
  230. vxor 23, 23, 26 # L
  231. vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
  232. vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
  233. vpmsumd 26, 7, 17
  234. vpmsumd 27, 4, 18
  235. vxor 24, 24, 25
  236. vxor 24, 24, 26
  237. vxor 24, 24, 27 # M
  238. # sum hash and reduction with H Poly
  239. vpmsumd 28, 23, 2 # reduction
  240. xxlor 29+32, 29, 29
  241. vsldoi 26, 24, 29, 8 # mL
  242. vsldoi 29, 29, 24, 8 # mH
  243. vxor 23, 23, 26 # mL + L
  244. vsldoi 23, 23, 23, 8 # swap
  245. vxor 23, 23, 28
  246. vpmsumd 24, 14, 15 # H4.H * X.H
  247. vpmsumd 25, 11, 16
  248. vpmsumd 26, 8, 17
  249. vpmsumd 27, 5, 18
  250. vxor 24, 24, 25
  251. vxor 24, 24, 26
  252. vxor 24, 24, 27
  253. vxor 24, 24, 29
  254. # sum hash and reduction with H Poly
  255. vsldoi 27, 23, 23, 8 # swap
  256. vpmsumd 23, 23, 2
  257. vxor 27, 27, 24
  258. vxor 23, 23, 27
  259. xxlor 32, 23+32, 23+32 # update hash
  260. blr
  261. #
  262. # Combine two 4x ghash
  263. # v15 - v22 - input blocks
  264. #
  265. .macro ppc_aes_gcm_ghash2_4x
  266. # first 4x hash
  267. vxor 15, 15, 0 # Xi + X
  268. xxlxor 29, 29, 29
  269. vpmsumd 23, 12, 15 # H4.L * X.L
  270. vpmsumd 24, 9, 16
  271. vpmsumd 25, 6, 17
  272. vpmsumd 26, 3, 18
  273. vxor 23, 23, 24
  274. vxor 23, 23, 25
  275. vxor 23, 23, 26 # L
  276. vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
  277. vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
  278. vpmsumd 26, 7, 17
  279. vpmsumd 27, 4, 18
  280. vxor 24, 24, 25
  281. vxor 24, 24, 26
  282. # sum hash and reduction with H Poly
  283. vpmsumd 28, 23, 2 # reduction
  284. xxlor 29+32, 29, 29
  285. vxor 24, 24, 27 # M
  286. vsldoi 26, 24, 29, 8 # mL
  287. vsldoi 29, 29, 24, 8 # mH
  288. vxor 23, 23, 26 # mL + L
  289. vsldoi 23, 23, 23, 8 # swap
  290. vxor 23, 23, 28
  291. vpmsumd 24, 14, 15 # H4.H * X.H
  292. vpmsumd 25, 11, 16
  293. vpmsumd 26, 8, 17
  294. vpmsumd 27, 5, 18
  295. vxor 24, 24, 25
  296. vxor 24, 24, 26
  297. vxor 24, 24, 27 # H
  298. vxor 24, 24, 29 # H + mH
  299. # sum hash and reduction with H Poly
  300. vsldoi 27, 23, 23, 8 # swap
  301. vpmsumd 23, 23, 2
  302. vxor 27, 27, 24
  303. vxor 27, 23, 27 # 1st Xi
  304. # 2nd 4x hash
  305. vpmsumd 24, 9, 20
  306. vpmsumd 25, 6, 21
  307. vpmsumd 26, 3, 22
  308. vxor 19, 19, 27 # Xi + X
  309. vpmsumd 23, 12, 19 # H4.L * X.L
  310. vxor 23, 23, 24
  311. vxor 23, 23, 25
  312. vxor 23, 23, 26 # L
  313. vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
  314. vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
  315. vpmsumd 26, 7, 21
  316. vpmsumd 27, 4, 22
  317. vxor 24, 24, 25
  318. vxor 24, 24, 26
  319. # sum hash and reduction with H Poly
  320. vpmsumd 28, 23, 2 # reduction
  321. xxlor 29+32, 29, 29
  322. vxor 24, 24, 27 # M
  323. vsldoi 26, 24, 29, 8 # mL
  324. vsldoi 29, 29, 24, 8 # mH
  325. vxor 23, 23, 26 # mL + L
  326. vsldoi 23, 23, 23, 8 # swap
  327. vxor 23, 23, 28
  328. vpmsumd 24, 14, 19 # H4.H * X.H
  329. vpmsumd 25, 11, 20
  330. vpmsumd 26, 8, 21
  331. vpmsumd 27, 5, 22
  332. vxor 24, 24, 25
  333. vxor 24, 24, 26
  334. vxor 24, 24, 27 # H
  335. vxor 24, 24, 29 # H + mH
  336. # sum hash and reduction with H Poly
  337. vsldoi 27, 23, 23, 8 # swap
  338. vpmsumd 23, 23, 2
  339. vxor 27, 27, 24
  340. vxor 23, 23, 27
  341. xxlor 32, 23+32, 23+32 # update hash
  342. .endm
  343. #
  344. # Compute update single hash
  345. #
  346. .macro ppc_update_hash_1x
  347. vxor 28, 28, 0
  348. vxor 19, 19, 19
  349. vpmsumd 22, 3, 28 # L
  350. vpmsumd 23, 4, 28 # M
  351. vpmsumd 24, 5, 28 # H
  352. vpmsumd 27, 22, 2 # reduction
  353. vsldoi 25, 23, 19, 8 # mL
  354. vsldoi 26, 19, 23, 8 # mH
  355. vxor 22, 22, 25 # LL + LL
  356. vxor 24, 24, 26 # HH + HH
  357. vsldoi 22, 22, 22, 8 # swap
  358. vxor 22, 22, 27
  359. vsldoi 20, 22, 22, 8 # swap
  360. vpmsumd 22, 22, 2 # reduction
  361. vxor 20, 20, 24
  362. vxor 22, 22, 20
  363. vmr 0, 22 # update hash
  364. .endm
  365. #
  366. # ppc_aes_gcm_encrypt (const void *inp, void *out, size_t len,
  367. # const AES_KEY *key, unsigned char iv[16],
  368. # void *Xip);
  369. #
  370. # r3 - inp
  371. # r4 - out
  372. # r5 - len
  373. # r6 - AES round keys
  374. # r7 - iv
  375. # r8 - Xi, HPoli, hash keys
  376. #
  377. .global ppc_aes_gcm_encrypt
  378. .align 5
  379. ppc_aes_gcm_encrypt:
  380. _ppc_aes_gcm_encrypt:
  381. stdu 1,-512(1)
  382. mflr 0
  383. std 14,112(1)
  384. std 15,120(1)
  385. std 16,128(1)
  386. std 17,136(1)
  387. std 18,144(1)
  388. std 19,152(1)
  389. std 20,160(1)
  390. std 21,168(1)
  391. li 9, 256
  392. stvx 20, 9, 1
  393. addi 9, 9, 16
  394. stvx 21, 9, 1
  395. addi 9, 9, 16
  396. stvx 22, 9, 1
  397. addi 9, 9, 16
  398. stvx 23, 9, 1
  399. addi 9, 9, 16
  400. stvx 24, 9, 1
  401. addi 9, 9, 16
  402. stvx 25, 9, 1
  403. addi 9, 9, 16
  404. stvx 26, 9, 1
  405. addi 9, 9, 16
  406. stvx 27, 9, 1
  407. addi 9, 9, 16
  408. stvx 28, 9, 1
  409. addi 9, 9, 16
  410. stvx 29, 9, 1
  411. addi 9, 9, 16
  412. stvx 30, 9, 1
  413. addi 9, 9, 16
  414. stvx 31, 9, 1
  415. std 0, 528(1)
  416. # Load Xi
  417. lxvb16x 32, 0, 8 # load Xi
  418. # load Hash - h^4, h^3, h^2, h
  419. li 10, 32
  420. lxvd2x 2+32, 10, 8 # H Poli
  421. li 10, 48
  422. lxvd2x 3+32, 10, 8 # Hl
  423. li 10, 64
  424. lxvd2x 4+32, 10, 8 # H
  425. li 10, 80
  426. lxvd2x 5+32, 10, 8 # Hh
  427. li 10, 96
  428. lxvd2x 6+32, 10, 8 # H^2l
  429. li 10, 112
  430. lxvd2x 7+32, 10, 8 # H^2
  431. li 10, 128
  432. lxvd2x 8+32, 10, 8 # H^2h
  433. li 10, 144
  434. lxvd2x 9+32, 10, 8 # H^3l
  435. li 10, 160
  436. lxvd2x 10+32, 10, 8 # H^3
  437. li 10, 176
  438. lxvd2x 11+32, 10, 8 # H^3h
  439. li 10, 192
  440. lxvd2x 12+32, 10, 8 # H^4l
  441. li 10, 208
  442. lxvd2x 13+32, 10, 8 # H^4
  443. li 10, 224
  444. lxvd2x 14+32, 10, 8 # H^4h
  445. # initialize ICB: GHASH( IV ), IV - r7
  446. lxvb16x 30+32, 0, 7 # load IV - v30
  447. mr 12, 5 # length
  448. li 11, 0 # block index
  449. # counter 1
  450. vxor 31, 31, 31
  451. vspltisb 22, 1
  452. vsldoi 31, 31, 22,1 # counter 1
  453. # load round key to VSR
  454. lxv 0, 0(6)
  455. lxv 1, 0x10(6)
  456. lxv 2, 0x20(6)
  457. lxv 3, 0x30(6)
  458. lxv 4, 0x40(6)
  459. lxv 5, 0x50(6)
  460. lxv 6, 0x60(6)
  461. lxv 7, 0x70(6)
  462. lxv 8, 0x80(6)
  463. lxv 9, 0x90(6)
  464. lxv 10, 0xa0(6)
  465. # load rounds - 10 (128), 12 (192), 14 (256)
  466. lwz 9,240(6)
  467. #
  468. # vxor state, state, w # addroundkey
  469. xxlor 32+29, 0, 0
  470. vxor 15, 30, 29 # IV + round key - add round key 0
  471. cmpdi 9, 10
  472. beq Loop_aes_gcm_8x
  473. # load 2 more round keys (v11, v12)
  474. lxv 11, 0xb0(6)
  475. lxv 12, 0xc0(6)
  476. cmpdi 9, 12
  477. beq Loop_aes_gcm_8x
  478. # load 2 more round keys (v11, v12, v13, v14)
  479. lxv 13, 0xd0(6)
  480. lxv 14, 0xe0(6)
  481. cmpdi 9, 14
  482. beq Loop_aes_gcm_8x
  483. b aes_gcm_out
  484. .align 5
  485. Loop_aes_gcm_8x:
  486. mr 14, 3
  487. mr 9, 4
  488. # n blocks
  489. li 10, 128
  490. divdu 10, 5, 10 # n 128 bytes-blocks
  491. cmpdi 10, 0
  492. beq Loop_last_block
  493. vaddudm 30, 30, 31 # IV + counter
  494. vxor 16, 30, 29
  495. vaddudm 30, 30, 31
  496. vxor 17, 30, 29
  497. vaddudm 30, 30, 31
  498. vxor 18, 30, 29
  499. vaddudm 30, 30, 31
  500. vxor 19, 30, 29
  501. vaddudm 30, 30, 31
  502. vxor 20, 30, 29
  503. vaddudm 30, 30, 31
  504. vxor 21, 30, 29
  505. vaddudm 30, 30, 31
  506. vxor 22, 30, 29
  507. mtctr 10
  508. li 15, 16
  509. li 16, 32
  510. li 17, 48
  511. li 18, 64
  512. li 19, 80
  513. li 20, 96
  514. li 21, 112
  515. lwz 10, 240(6)
  516. Loop_8x_block:
  517. lxvb16x 15, 0, 14 # load block
  518. lxvb16x 16, 15, 14 # load block
  519. lxvb16x 17, 16, 14 # load block
  520. lxvb16x 18, 17, 14 # load block
  521. lxvb16x 19, 18, 14 # load block
  522. lxvb16x 20, 19, 14 # load block
  523. lxvb16x 21, 20, 14 # load block
  524. lxvb16x 22, 21, 14 # load block
  525. addi 14, 14, 128
  526. Loop_aes_middle8x
  527. xxlor 23+32, 10, 10
  528. cmpdi 10, 10
  529. beq Do_next_ghash
  530. # 192 bits
  531. xxlor 24+32, 11, 11
  532. vcipher 15, 15, 23
  533. vcipher 16, 16, 23
  534. vcipher 17, 17, 23
  535. vcipher 18, 18, 23
  536. vcipher 19, 19, 23
  537. vcipher 20, 20, 23
  538. vcipher 21, 21, 23
  539. vcipher 22, 22, 23
  540. vcipher 15, 15, 24
  541. vcipher 16, 16, 24
  542. vcipher 17, 17, 24
  543. vcipher 18, 18, 24
  544. vcipher 19, 19, 24
  545. vcipher 20, 20, 24
  546. vcipher 21, 21, 24
  547. vcipher 22, 22, 24
  548. xxlor 23+32, 12, 12
  549. cmpdi 10, 12
  550. beq Do_next_ghash
  551. # 256 bits
  552. xxlor 24+32, 13, 13
  553. vcipher 15, 15, 23
  554. vcipher 16, 16, 23
  555. vcipher 17, 17, 23
  556. vcipher 18, 18, 23
  557. vcipher 19, 19, 23
  558. vcipher 20, 20, 23
  559. vcipher 21, 21, 23
  560. vcipher 22, 22, 23
  561. vcipher 15, 15, 24
  562. vcipher 16, 16, 24
  563. vcipher 17, 17, 24
  564. vcipher 18, 18, 24
  565. vcipher 19, 19, 24
  566. vcipher 20, 20, 24
  567. vcipher 21, 21, 24
  568. vcipher 22, 22, 24
  569. xxlor 23+32, 14, 14
  570. cmpdi 10, 14
  571. beq Do_next_ghash
  572. b aes_gcm_out
  573. Do_next_ghash:
  574. #
  575. # last round
  576. vcipherlast 15, 15, 23
  577. vcipherlast 16, 16, 23
  578. xxlxor 47, 47, 15
  579. stxvb16x 47, 0, 9 # store output
  580. xxlxor 48, 48, 16
  581. stxvb16x 48, 15, 9 # store output
  582. vcipherlast 17, 17, 23
  583. vcipherlast 18, 18, 23
  584. xxlxor 49, 49, 17
  585. stxvb16x 49, 16, 9 # store output
  586. xxlxor 50, 50, 18
  587. stxvb16x 50, 17, 9 # store output
  588. vcipherlast 19, 19, 23
  589. vcipherlast 20, 20, 23
  590. xxlxor 51, 51, 19
  591. stxvb16x 51, 18, 9 # store output
  592. xxlxor 52, 52, 20
  593. stxvb16x 52, 19, 9 # store output
  594. vcipherlast 21, 21, 23
  595. vcipherlast 22, 22, 23
  596. xxlxor 53, 53, 21
  597. stxvb16x 53, 20, 9 # store output
  598. xxlxor 54, 54, 22
  599. stxvb16x 54, 21, 9 # store output
  600. addi 9, 9, 128
  601. # ghash here
  602. ppc_aes_gcm_ghash2_4x
  603. xxlor 27+32, 0, 0
  604. vaddudm 30, 30, 31 # IV + counter
  605. vmr 29, 30
  606. vxor 15, 30, 27 # add round key
  607. vaddudm 30, 30, 31
  608. vxor 16, 30, 27
  609. vaddudm 30, 30, 31
  610. vxor 17, 30, 27
  611. vaddudm 30, 30, 31
  612. vxor 18, 30, 27
  613. vaddudm 30, 30, 31
  614. vxor 19, 30, 27
  615. vaddudm 30, 30, 31
  616. vxor 20, 30, 27
  617. vaddudm 30, 30, 31
  618. vxor 21, 30, 27
  619. vaddudm 30, 30, 31
  620. vxor 22, 30, 27
  621. addi 12, 12, -128
  622. addi 11, 11, 128
  623. bdnz Loop_8x_block
  624. vmr 30, 29
  625. Loop_last_block:
  626. cmpdi 12, 0
  627. beq aes_gcm_out
  628. # loop last few blocks
  629. li 10, 16
  630. divdu 10, 12, 10
  631. mtctr 10
  632. lwz 10, 240(6)
  633. cmpdi 12, 16
  634. blt Final_block
  635. .macro Loop_aes_middle_1x
  636. xxlor 19+32, 1, 1
  637. xxlor 20+32, 2, 2
  638. xxlor 21+32, 3, 3
  639. xxlor 22+32, 4, 4
  640. vcipher 15, 15, 19
  641. vcipher 15, 15, 20
  642. vcipher 15, 15, 21
  643. vcipher 15, 15, 22
  644. xxlor 19+32, 5, 5
  645. xxlor 20+32, 6, 6
  646. xxlor 21+32, 7, 7
  647. xxlor 22+32, 8, 8
  648. vcipher 15, 15, 19
  649. vcipher 15, 15, 20
  650. vcipher 15, 15, 21
  651. vcipher 15, 15, 22
  652. xxlor 19+32, 9, 9
  653. vcipher 15, 15, 19
  654. .endm
  655. Next_rem_block:
  656. lxvb16x 15, 0, 14 # load block
  657. Loop_aes_middle_1x
  658. xxlor 23+32, 10, 10
  659. cmpdi 10, 10
  660. beq Do_next_1x
  661. # 192 bits
  662. xxlor 24+32, 11, 11
  663. vcipher 15, 15, 23
  664. vcipher 15, 15, 24
  665. xxlor 23+32, 12, 12
  666. cmpdi 10, 12
  667. beq Do_next_1x
  668. # 256 bits
  669. xxlor 24+32, 13, 13
  670. vcipher 15, 15, 23
  671. vcipher 15, 15, 24
  672. xxlor 23+32, 14, 14
  673. cmpdi 10, 14
  674. beq Do_next_1x
  675. Do_next_1x:
  676. vcipherlast 15, 15, 23
  677. xxlxor 47, 47, 15
  678. stxvb16x 47, 0, 9 # store output
  679. addi 14, 14, 16
  680. addi 9, 9, 16
  681. vmr 28, 15
  682. ppc_update_hash_1x
  683. addi 12, 12, -16
  684. addi 11, 11, 16
  685. xxlor 19+32, 0, 0
  686. vaddudm 30, 30, 31 # IV + counter
  687. vxor 15, 30, 19 # add round key
  688. bdnz Next_rem_block
  689. cmpdi 12, 0
  690. beq aes_gcm_out
  691. Final_block:
  692. Loop_aes_middle_1x
  693. xxlor 23+32, 10, 10
  694. cmpdi 10, 10
  695. beq Do_final_1x
  696. # 192 bits
  697. xxlor 24+32, 11, 11
  698. vcipher 15, 15, 23
  699. vcipher 15, 15, 24
  700. xxlor 23+32, 12, 12
  701. cmpdi 10, 12
  702. beq Do_final_1x
  703. # 256 bits
  704. xxlor 24+32, 13, 13
  705. vcipher 15, 15, 23
  706. vcipher 15, 15, 24
  707. xxlor 23+32, 14, 14
  708. cmpdi 10, 14
  709. beq Do_final_1x
  710. Do_final_1x:
  711. vcipherlast 15, 15, 23
  712. lxvb16x 15, 0, 14 # load last block
  713. xxlxor 47, 47, 15
  714. # create partial block mask
  715. li 15, 16
  716. sub 15, 15, 12 # index to the mask
  717. vspltisb 16, -1 # first 16 bytes - 0xffff...ff
  718. vspltisb 17, 0 # second 16 bytes - 0x0000...00
  719. li 10, 192
  720. stvx 16, 10, 1
  721. addi 10, 10, 16
  722. stvx 17, 10, 1
  723. addi 10, 1, 192
  724. lxvb16x 16, 15, 10 # load partial block mask
  725. xxland 47, 47, 16
  726. vmr 28, 15
  727. ppc_update_hash_1x
  728. # * should store only the remaining bytes.
  729. bl Write_partial_block
  730. b aes_gcm_out
  731. #
  732. # Write partial block
  733. # r9 - output
  734. # r12 - remaining bytes
  735. # v15 - partial input data
  736. #
  737. Write_partial_block:
  738. li 10, 192
  739. stxvb16x 15+32, 10, 1 # last block
  740. #add 10, 9, 11 # Output
  741. addi 10, 9, -1
  742. addi 16, 1, 191
  743. mtctr 12 # remaining bytes
  744. li 15, 0
  745. Write_last_byte:
  746. lbzu 14, 1(16)
  747. stbu 14, 1(10)
  748. bdnz Write_last_byte
  749. blr
  750. aes_gcm_out:
  751. # out = state
  752. stxvb16x 32, 0, 8 # write out Xi
  753. add 3, 11, 12 # return count
  754. li 9, 256
  755. lvx 20, 9, 1
  756. addi 9, 9, 16
  757. lvx 21, 9, 1
  758. addi 9, 9, 16
  759. lvx 22, 9, 1
  760. addi 9, 9, 16
  761. lvx 23, 9, 1
  762. addi 9, 9, 16
  763. lvx 24, 9, 1
  764. addi 9, 9, 16
  765. lvx 25, 9, 1
  766. addi 9, 9, 16
  767. lvx 26, 9, 1
  768. addi 9, 9, 16
  769. lvx 27, 9, 1
  770. addi 9, 9, 16
  771. lvx 28, 9, 1
  772. addi 9, 9, 16
  773. lvx 29, 9, 1
  774. addi 9, 9, 16
  775. lvx 30, 9, 1
  776. addi 9, 9, 16
  777. lvx 31, 9, 1
  778. ld 0, 528(1)
  779. ld 14,112(1)
  780. ld 15,120(1)
  781. ld 16,128(1)
  782. ld 17,136(1)
  783. ld 18,144(1)
  784. ld 19,152(1)
  785. ld 20,160(1)
  786. ld 21,168(1)
  787. mtlr 0
  788. addi 1, 1, 512
  789. blr
  790. #
  791. # 8x Decrypt
  792. #
  793. .global ppc_aes_gcm_decrypt
  794. .align 5
  795. ppc_aes_gcm_decrypt:
  796. _ppc_aes_gcm_decrypt:
  797. stdu 1,-512(1)
  798. mflr 0
  799. std 14,112(1)
  800. std 15,120(1)
  801. std 16,128(1)
  802. std 17,136(1)
  803. std 18,144(1)
  804. std 19,152(1)
  805. std 20,160(1)
  806. std 21,168(1)
  807. li 9, 256
  808. stvx 20, 9, 1
  809. addi 9, 9, 16
  810. stvx 21, 9, 1
  811. addi 9, 9, 16
  812. stvx 22, 9, 1
  813. addi 9, 9, 16
  814. stvx 23, 9, 1
  815. addi 9, 9, 16
  816. stvx 24, 9, 1
  817. addi 9, 9, 16
  818. stvx 25, 9, 1
  819. addi 9, 9, 16
  820. stvx 26, 9, 1
  821. addi 9, 9, 16
  822. stvx 27, 9, 1
  823. addi 9, 9, 16
  824. stvx 28, 9, 1
  825. addi 9, 9, 16
  826. stvx 29, 9, 1
  827. addi 9, 9, 16
  828. stvx 30, 9, 1
  829. addi 9, 9, 16
  830. stvx 31, 9, 1
  831. std 0, 528(1)
  832. # Load Xi
  833. lxvb16x 32, 0, 8 # load Xi
  834. # load Hash - h^4, h^3, h^2, h
  835. li 10, 32
  836. lxvd2x 2+32, 10, 8 # H Poli
  837. li 10, 48
  838. lxvd2x 3+32, 10, 8 # Hl
  839. li 10, 64
  840. lxvd2x 4+32, 10, 8 # H
  841. li 10, 80
  842. lxvd2x 5+32, 10, 8 # Hh
  843. li 10, 96
  844. lxvd2x 6+32, 10, 8 # H^2l
  845. li 10, 112
  846. lxvd2x 7+32, 10, 8 # H^2
  847. li 10, 128
  848. lxvd2x 8+32, 10, 8 # H^2h
  849. li 10, 144
  850. lxvd2x 9+32, 10, 8 # H^3l
  851. li 10, 160
  852. lxvd2x 10+32, 10, 8 # H^3
  853. li 10, 176
  854. lxvd2x 11+32, 10, 8 # H^3h
  855. li 10, 192
  856. lxvd2x 12+32, 10, 8 # H^4l
  857. li 10, 208
  858. lxvd2x 13+32, 10, 8 # H^4
  859. li 10, 224
  860. lxvd2x 14+32, 10, 8 # H^4h
  861. # initialize ICB: GHASH( IV ), IV - r7
  862. lxvb16x 30+32, 0, 7 # load IV - v30
  863. mr 12, 5 # length
  864. li 11, 0 # block index
  865. # counter 1
  866. vxor 31, 31, 31
  867. vspltisb 22, 1
  868. vsldoi 31, 31, 22,1 # counter 1
  869. # load round key to VSR
  870. lxv 0, 0(6)
  871. lxv 1, 0x10(6)
  872. lxv 2, 0x20(6)
  873. lxv 3, 0x30(6)
  874. lxv 4, 0x40(6)
  875. lxv 5, 0x50(6)
  876. lxv 6, 0x60(6)
  877. lxv 7, 0x70(6)
  878. lxv 8, 0x80(6)
  879. lxv 9, 0x90(6)
  880. lxv 10, 0xa0(6)
  881. # load rounds - 10 (128), 12 (192), 14 (256)
  882. lwz 9,240(6)
  883. #
  884. # vxor state, state, w # addroundkey
  885. xxlor 32+29, 0, 0
  886. vxor 15, 30, 29 # IV + round key - add round key 0
  887. cmpdi 9, 10
  888. beq Loop_aes_gcm_8x_dec
  889. # load 2 more round keys (v11, v12)
  890. lxv 11, 0xb0(6)
  891. lxv 12, 0xc0(6)
  892. cmpdi 9, 12
  893. beq Loop_aes_gcm_8x_dec
  894. # load 2 more round keys (v11, v12, v13, v14)
  895. lxv 13, 0xd0(6)
  896. lxv 14, 0xe0(6)
  897. cmpdi 9, 14
  898. beq Loop_aes_gcm_8x_dec
  899. b aes_gcm_out
  900. .align 5
  901. Loop_aes_gcm_8x_dec:
  902. mr 14, 3
  903. mr 9, 4
  904. # n blocks
  905. li 10, 128
  906. divdu 10, 5, 10 # n 128 bytes-blocks
  907. cmpdi 10, 0
  908. beq Loop_last_block_dec
  909. vaddudm 30, 30, 31 # IV + counter
  910. vxor 16, 30, 29
  911. vaddudm 30, 30, 31
  912. vxor 17, 30, 29
  913. vaddudm 30, 30, 31
  914. vxor 18, 30, 29
  915. vaddudm 30, 30, 31
  916. vxor 19, 30, 29
  917. vaddudm 30, 30, 31
  918. vxor 20, 30, 29
  919. vaddudm 30, 30, 31
  920. vxor 21, 30, 29
  921. vaddudm 30, 30, 31
  922. vxor 22, 30, 29
  923. mtctr 10
  924. li 15, 16
  925. li 16, 32
  926. li 17, 48
  927. li 18, 64
  928. li 19, 80
  929. li 20, 96
  930. li 21, 112
  931. lwz 10, 240(6)
  932. Loop_8x_block_dec:
  933. lxvb16x 15, 0, 14 # load block
  934. lxvb16x 16, 15, 14 # load block
  935. lxvb16x 17, 16, 14 # load block
  936. lxvb16x 18, 17, 14 # load block
  937. lxvb16x 19, 18, 14 # load block
  938. lxvb16x 20, 19, 14 # load block
  939. lxvb16x 21, 20, 14 # load block
  940. lxvb16x 22, 21, 14 # load block
  941. addi 14, 14, 128
  942. Loop_aes_middle8x
  943. xxlor 23+32, 10, 10
  944. cmpdi 10, 10
  945. beq Do_last_aes_dec
  946. # 192 bits
  947. xxlor 24+32, 11, 11
  948. vcipher 15, 15, 23
  949. vcipher 16, 16, 23
  950. vcipher 17, 17, 23
  951. vcipher 18, 18, 23
  952. vcipher 19, 19, 23
  953. vcipher 20, 20, 23
  954. vcipher 21, 21, 23
  955. vcipher 22, 22, 23
  956. vcipher 15, 15, 24
  957. vcipher 16, 16, 24
  958. vcipher 17, 17, 24
  959. vcipher 18, 18, 24
  960. vcipher 19, 19, 24
  961. vcipher 20, 20, 24
  962. vcipher 21, 21, 24
  963. vcipher 22, 22, 24
  964. xxlor 23+32, 12, 12
  965. cmpdi 10, 12
  966. beq Do_last_aes_dec
  967. # 256 bits
  968. xxlor 24+32, 13, 13
  969. vcipher 15, 15, 23
  970. vcipher 16, 16, 23
  971. vcipher 17, 17, 23
  972. vcipher 18, 18, 23
  973. vcipher 19, 19, 23
  974. vcipher 20, 20, 23
  975. vcipher 21, 21, 23
  976. vcipher 22, 22, 23
  977. vcipher 15, 15, 24
  978. vcipher 16, 16, 24
  979. vcipher 17, 17, 24
  980. vcipher 18, 18, 24
  981. vcipher 19, 19, 24
  982. vcipher 20, 20, 24
  983. vcipher 21, 21, 24
  984. vcipher 22, 22, 24
  985. xxlor 23+32, 14, 14
  986. cmpdi 10, 14
  987. beq Do_last_aes_dec
  988. b aes_gcm_out
  989. Do_last_aes_dec:
  990. #
  991. # last round
  992. vcipherlast 15, 15, 23
  993. vcipherlast 16, 16, 23
  994. xxlxor 47, 47, 15
  995. stxvb16x 47, 0, 9 # store output
  996. xxlxor 48, 48, 16
  997. stxvb16x 48, 15, 9 # store output
  998. vcipherlast 17, 17, 23
  999. vcipherlast 18, 18, 23
  1000. xxlxor 49, 49, 17
  1001. stxvb16x 49, 16, 9 # store output
  1002. xxlxor 50, 50, 18
  1003. stxvb16x 50, 17, 9 # store output
  1004. vcipherlast 19, 19, 23
  1005. vcipherlast 20, 20, 23
  1006. xxlxor 51, 51, 19
  1007. stxvb16x 51, 18, 9 # store output
  1008. xxlxor 52, 52, 20
  1009. stxvb16x 52, 19, 9 # store output
  1010. vcipherlast 21, 21, 23
  1011. vcipherlast 22, 22, 23
  1012. xxlxor 53, 53, 21
  1013. stxvb16x 53, 20, 9 # store output
  1014. xxlxor 54, 54, 22
  1015. stxvb16x 54, 21, 9 # store output
  1016. addi 9, 9, 128
  1017. xxlor 15+32, 15, 15
  1018. xxlor 16+32, 16, 16
  1019. xxlor 17+32, 17, 17
  1020. xxlor 18+32, 18, 18
  1021. xxlor 19+32, 19, 19
  1022. xxlor 20+32, 20, 20
  1023. xxlor 21+32, 21, 21
  1024. xxlor 22+32, 22, 22
  1025. # ghash here
  1026. ppc_aes_gcm_ghash2_4x
  1027. xxlor 27+32, 0, 0
  1028. vaddudm 30, 30, 31 # IV + counter
  1029. vmr 29, 30
  1030. vxor 15, 30, 27 # add round key
  1031. vaddudm 30, 30, 31
  1032. vxor 16, 30, 27
  1033. vaddudm 30, 30, 31
  1034. vxor 17, 30, 27
  1035. vaddudm 30, 30, 31
  1036. vxor 18, 30, 27
  1037. vaddudm 30, 30, 31
  1038. vxor 19, 30, 27
  1039. vaddudm 30, 30, 31
  1040. vxor 20, 30, 27
  1041. vaddudm 30, 30, 31
  1042. vxor 21, 30, 27
  1043. vaddudm 30, 30, 31
  1044. vxor 22, 30, 27
  1045. addi 12, 12, -128
  1046. addi 11, 11, 128
  1047. bdnz Loop_8x_block_dec
  1048. vmr 30, 29
  1049. Loop_last_block_dec:
  1050. cmpdi 12, 0
  1051. beq aes_gcm_out
  1052. # loop last few blocks
  1053. li 10, 16
  1054. divdu 10, 12, 10
  1055. mtctr 10
  1056. lwz 10,240(6)
  1057. cmpdi 12, 16
  1058. blt Final_block_dec
  1059. Next_rem_block_dec:
  1060. lxvb16x 15, 0, 14 # load block
  1061. Loop_aes_middle_1x
  1062. xxlor 23+32, 10, 10
  1063. cmpdi 10, 10
  1064. beq Do_next_1x_dec
  1065. # 192 bits
  1066. xxlor 24+32, 11, 11
  1067. vcipher 15, 15, 23
  1068. vcipher 15, 15, 24
  1069. xxlor 23+32, 12, 12
  1070. cmpdi 10, 12
  1071. beq Do_next_1x_dec
  1072. # 256 bits
  1073. xxlor 24+32, 13, 13
  1074. vcipher 15, 15, 23
  1075. vcipher 15, 15, 24
  1076. xxlor 23+32, 14, 14
  1077. cmpdi 10, 14
  1078. beq Do_next_1x_dec
  1079. Do_next_1x_dec:
  1080. vcipherlast 15, 15, 23
  1081. xxlxor 47, 47, 15
  1082. stxvb16x 47, 0, 9 # store output
  1083. addi 14, 14, 16
  1084. addi 9, 9, 16
  1085. xxlor 28+32, 15, 15
  1086. ppc_update_hash_1x
  1087. addi 12, 12, -16
  1088. addi 11, 11, 16
  1089. xxlor 19+32, 0, 0
  1090. vaddudm 30, 30, 31 # IV + counter
  1091. vxor 15, 30, 19 # add round key
  1092. bdnz Next_rem_block_dec
  1093. cmpdi 12, 0
  1094. beq aes_gcm_out
  1095. Final_block_dec:
  1096. Loop_aes_middle_1x
  1097. xxlor 23+32, 10, 10
  1098. cmpdi 10, 10
  1099. beq Do_final_1x_dec
  1100. # 192 bits
  1101. xxlor 24+32, 11, 11
  1102. vcipher 15, 15, 23
  1103. vcipher 15, 15, 24
  1104. xxlor 23+32, 12, 12
  1105. cmpdi 10, 12
  1106. beq Do_final_1x_dec
  1107. # 256 bits
  1108. xxlor 24+32, 13, 13
  1109. vcipher 15, 15, 23
  1110. vcipher 15, 15, 24
  1111. xxlor 23+32, 14, 14
  1112. cmpdi 10, 14
  1113. beq Do_final_1x_dec
  1114. Do_final_1x_dec:
  1115. vcipherlast 15, 15, 23
  1116. lxvb16x 15, 0, 14 # load block
  1117. xxlxor 47, 47, 15
  1118. # create partial block mask
  1119. li 15, 16
  1120. sub 15, 15, 12 # index to the mask
  1121. vspltisb 16, -1 # first 16 bytes - 0xffff...ff
  1122. vspltisb 17, 0 # second 16 bytes - 0x0000...00
  1123. li 10, 192
  1124. stvx 16, 10, 1
  1125. addi 10, 10, 16
  1126. stvx 17, 10, 1
  1127. addi 10, 1, 192
  1128. lxvb16x 16, 15, 10 # load block mask
  1129. xxland 47, 47, 16
  1130. xxlor 28+32, 15, 15
  1131. ppc_update_hash_1x
  1132. # * should store only the remaining bytes.
  1133. bl Write_partial_block
  1134. b aes_gcm_out
  1135. ___
  1136. foreach (split("\n",$code)) {
  1137. s/\`([^\`]*)\`/eval $1/geo;
  1138. if ($flavour =~ /le$/o) { # little-endian
  1139. s/le\?//o or
  1140. s/be\?/#be#/o;
  1141. } else {
  1142. s/le\?/#le#/o or
  1143. s/be\?//o;
  1144. }
  1145. print $_,"\n";
  1146. }
  1147. close STDOUT or die "error closing STDOUT: $!"; # enforce flush