aes-s390x.pl 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284
  1. #! /usr/bin/env perl
  2. # Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # AES for s390x.
  15. # April 2007.
  16. #
  17. # Software performance improvement over gcc-generated code is ~70% and
  18. # in absolute terms is ~73 cycles per byte processed with 128-bit key.
  19. # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
  20. # *strictly* in-order execution and issued instruction [in this case
  21. # load value from memory is critical] has to complete before execution
  22. # flow proceeds. S-boxes are compressed to 2KB[+256B].
  23. #
  24. # As for hardware acceleration support. It's basically a "teaser," as
  25. # it can and should be improved in several ways. Most notably support
  26. # for CBC is not utilized, nor multiple blocks are ever processed.
  27. # Then software key schedule can be postponed till hardware support
  28. # detection... Performance improvement over assembler is reportedly
  29. # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  30. # support is implemented.
  31. # May 2007.
  32. #
  33. # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
  34. # for 128-bit keys, if hardware support is detected.
  35. # January 2009.
  36. #
  37. # Add support for hardware AES192/256 and reschedule instructions to
  38. # minimize/avoid Address Generation Interlock hazard and to favour
  39. # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
  40. # almost 50% on z9. The gain is smaller on z10, because being dual-
  41. # issue z10 makes it impossible to eliminate the interlock condition:
  42. # critical path is not long enough. Yet it spends ~24 cycles per byte
  43. # processed with 128-bit key.
  44. #
  45. # Unlike previous version hardware support detection takes place only
  46. # at the moment of key schedule setup, which is denoted in key->rounds.
  47. # This is done, because deferred key setup can't be made MT-safe, not
  48. # for keys longer than 128 bits.
  49. #
  50. # Add AES_cbc_encrypt, which gives incredible performance improvement,
  51. # it was measured to be ~6.6x. It's less than previously mentioned 8x,
  52. # because software implementation was optimized.
  53. # May 2010.
  54. #
  55. # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
  56. # performance improvement over "generic" counter mode routine relying
  57. # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
  58. # to the fact that exact throughput value depends on current stack
  59. # frame alignment within 4KB page. In worst case you get ~75% of the
  60. # maximum, but *on average* it would be as much as ~98%. Meaning that
  61. # worst case is unlike, it's like hitting ravine on plateau.
  62. # November 2010.
  63. #
  64. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  65. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  66. # instructions and achieve "64-bit" performance even in 31-bit legacy
  67. # application context. The feature is not specific to any particular
  68. # processor, as long as it's "z-CPU". Latter implies that the code
  69. # remains z/Architecture specific. On z990 it was measured to perform
  70. # 2x better than code generated by gcc 4.3.
  71. # December 2010.
  72. #
  73. # Add support for z196 "cipher message with counter" instruction.
  74. # Note however that it's disengaged, because it was measured to
  75. # perform ~12% worse than vanilla km-based code...
  76. # February 2011.
  77. #
  78. # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
  79. # instructions, which deliver ~70% improvement at 8KB block size over
  80. # vanilla km-based code, 37% - at most like 512-bytes block size.
  81. # $output is the last argument if it looks like a file (it has an extension)
  82. # $flavour is the first argument if it doesn't look like a file
  83. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  84. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  85. if ($flavour =~ /3[12]/) {
  86. $SIZE_T=4;
  87. $g="";
  88. } else {
  89. $SIZE_T=8;
  90. $g="g";
  91. }
  92. $output and open STDOUT,">$output";
  93. $softonly=0; # allow hardware support
  94. $t0="%r0"; $mask="%r0";
  95. $t1="%r1";
  96. $t2="%r2"; $inp="%r2";
  97. $t3="%r3"; $out="%r3"; $bits="%r3";
  98. $key="%r4";
  99. $i1="%r5";
  100. $i2="%r6";
  101. $i3="%r7";
  102. $s0="%r8";
  103. $s1="%r9";
  104. $s2="%r10";
  105. $s3="%r11";
  106. $tbl="%r12";
  107. $rounds="%r13";
  108. $ra="%r14";
  109. $sp="%r15";
  110. $stdframe=16*$SIZE_T+4*8;
  111. sub _data_word()
  112. { my $i;
  113. while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
  114. }
  115. $code=<<___;
  116. #include "s390x_arch.h"
  117. .text
  118. .type AES_Te,\@object
  119. .align 256
  120. AES_Te:
  121. ___
  122. &_data_word(
  123. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  124. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  125. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  126. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  127. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  128. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  129. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  130. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  131. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  132. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  133. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  134. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  135. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  136. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  137. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  138. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  139. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  140. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  141. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  142. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  143. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  144. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  145. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  146. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  147. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  148. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  149. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  150. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  151. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  152. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  153. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  154. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  155. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  156. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  157. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  158. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  159. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  160. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  161. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  162. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  163. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  164. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  165. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  166. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  167. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  168. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  169. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  170. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  171. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  172. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  173. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  174. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  175. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  176. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  177. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  178. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  179. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  180. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  181. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  182. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  183. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  184. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  185. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  186. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  187. $code.=<<___;
  188. # Te4[256]
  189. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  190. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  191. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  192. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  193. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  194. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  195. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  196. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  197. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  198. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  199. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  200. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  201. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  202. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  203. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  204. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  205. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  206. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  207. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  208. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  209. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  210. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  211. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  212. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  213. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  214. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  215. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  216. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  217. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  218. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  219. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  220. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  221. # rcon[]
  222. .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
  223. .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
  224. .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
  225. .align 256
  226. .size AES_Te,.-AES_Te
  227. # void AES_encrypt(const unsigned char *inp, unsigned char *out,
  228. # const AES_KEY *key) {
  229. .globl AES_encrypt
  230. .type AES_encrypt,\@function
  231. AES_encrypt:
  232. ___
  233. $code.=<<___ if (!$softonly);
  234. l %r0,240($key)
  235. lhi %r1,16
  236. clr %r0,%r1
  237. jl .Lesoft
  238. la %r1,0($key)
  239. #la %r2,0($inp)
  240. la %r4,0($out)
  241. lghi %r3,16 # single block length
  242. .long 0xb92e0042 # km %r4,%r2
  243. brc 1,.-4 # can this happen?
  244. br %r14
  245. .align 64
  246. .Lesoft:
  247. ___
  248. $code.=<<___;
  249. stm${g} %r3,$ra,3*$SIZE_T($sp)
  250. llgf $s0,0($inp)
  251. llgf $s1,4($inp)
  252. llgf $s2,8($inp)
  253. llgf $s3,12($inp)
  254. larl $tbl,AES_Te
  255. bras $ra,_s390x_AES_encrypt
  256. l${g} $out,3*$SIZE_T($sp)
  257. st $s0,0($out)
  258. st $s1,4($out)
  259. st $s2,8($out)
  260. st $s3,12($out)
  261. lm${g} %r6,$ra,6*$SIZE_T($sp)
  262. br $ra
  263. .size AES_encrypt,.-AES_encrypt
  264. .type _s390x_AES_encrypt,\@function
  265. .align 16
  266. _s390x_AES_encrypt:
  267. st${g} $ra,15*$SIZE_T($sp)
  268. x $s0,0($key)
  269. x $s1,4($key)
  270. x $s2,8($key)
  271. x $s3,12($key)
  272. l $rounds,240($key)
  273. llill $mask,`0xff<<3`
  274. aghi $rounds,-1
  275. j .Lenc_loop
  276. .align 16
  277. .Lenc_loop:
  278. sllg $t1,$s0,`0+3`
  279. srlg $t2,$s0,`8-3`
  280. srlg $t3,$s0,`16-3`
  281. srl $s0,`24-3`
  282. nr $s0,$mask
  283. ngr $t1,$mask
  284. nr $t2,$mask
  285. nr $t3,$mask
  286. srlg $i1,$s1,`16-3` # i0
  287. sllg $i2,$s1,`0+3`
  288. srlg $i3,$s1,`8-3`
  289. srl $s1,`24-3`
  290. nr $i1,$mask
  291. nr $s1,$mask
  292. ngr $i2,$mask
  293. nr $i3,$mask
  294. l $s0,0($s0,$tbl) # Te0[s0>>24]
  295. l $t1,1($t1,$tbl) # Te3[s0>>0]
  296. l $t2,2($t2,$tbl) # Te2[s0>>8]
  297. l $t3,3($t3,$tbl) # Te1[s0>>16]
  298. x $s0,3($i1,$tbl) # Te1[s1>>16]
  299. l $s1,0($s1,$tbl) # Te0[s1>>24]
  300. x $t2,1($i2,$tbl) # Te3[s1>>0]
  301. x $t3,2($i3,$tbl) # Te2[s1>>8]
  302. srlg $i1,$s2,`8-3` # i0
  303. srlg $i2,$s2,`16-3` # i1
  304. nr $i1,$mask
  305. nr $i2,$mask
  306. sllg $i3,$s2,`0+3`
  307. srl $s2,`24-3`
  308. nr $s2,$mask
  309. ngr $i3,$mask
  310. xr $s1,$t1
  311. srlg $ra,$s3,`8-3` # i1
  312. sllg $t1,$s3,`0+3` # i0
  313. nr $ra,$mask
  314. la $key,16($key)
  315. ngr $t1,$mask
  316. x $s0,2($i1,$tbl) # Te2[s2>>8]
  317. x $s1,3($i2,$tbl) # Te1[s2>>16]
  318. l $s2,0($s2,$tbl) # Te0[s2>>24]
  319. x $t3,1($i3,$tbl) # Te3[s2>>0]
  320. srlg $i3,$s3,`16-3` # i2
  321. xr $s2,$t2
  322. srl $s3,`24-3`
  323. nr $i3,$mask
  324. nr $s3,$mask
  325. x $s0,0($key)
  326. x $s1,4($key)
  327. x $s2,8($key)
  328. x $t3,12($key)
  329. x $s0,1($t1,$tbl) # Te3[s3>>0]
  330. x $s1,2($ra,$tbl) # Te2[s3>>8]
  331. x $s2,3($i3,$tbl) # Te1[s3>>16]
  332. l $s3,0($s3,$tbl) # Te0[s3>>24]
  333. xr $s3,$t3
  334. brct $rounds,.Lenc_loop
  335. .align 16
  336. sllg $t1,$s0,`0+3`
  337. srlg $t2,$s0,`8-3`
  338. ngr $t1,$mask
  339. srlg $t3,$s0,`16-3`
  340. srl $s0,`24-3`
  341. nr $s0,$mask
  342. nr $t2,$mask
  343. nr $t3,$mask
  344. srlg $i1,$s1,`16-3` # i0
  345. sllg $i2,$s1,`0+3`
  346. ngr $i2,$mask
  347. srlg $i3,$s1,`8-3`
  348. srl $s1,`24-3`
  349. nr $i1,$mask
  350. nr $s1,$mask
  351. nr $i3,$mask
  352. llgc $s0,2($s0,$tbl) # Te4[s0>>24]
  353. llgc $t1,2($t1,$tbl) # Te4[s0>>0]
  354. sll $s0,24
  355. llgc $t2,2($t2,$tbl) # Te4[s0>>8]
  356. llgc $t3,2($t3,$tbl) # Te4[s0>>16]
  357. sll $t2,8
  358. sll $t3,16
  359. llgc $i1,2($i1,$tbl) # Te4[s1>>16]
  360. llgc $s1,2($s1,$tbl) # Te4[s1>>24]
  361. llgc $i2,2($i2,$tbl) # Te4[s1>>0]
  362. llgc $i3,2($i3,$tbl) # Te4[s1>>8]
  363. sll $i1,16
  364. sll $s1,24
  365. sll $i3,8
  366. or $s0,$i1
  367. or $s1,$t1
  368. or $t2,$i2
  369. or $t3,$i3
  370. srlg $i1,$s2,`8-3` # i0
  371. srlg $i2,$s2,`16-3` # i1
  372. nr $i1,$mask
  373. nr $i2,$mask
  374. sllg $i3,$s2,`0+3`
  375. srl $s2,`24-3`
  376. ngr $i3,$mask
  377. nr $s2,$mask
  378. sllg $t1,$s3,`0+3` # i0
  379. srlg $ra,$s3,`8-3` # i1
  380. ngr $t1,$mask
  381. llgc $i1,2($i1,$tbl) # Te4[s2>>8]
  382. llgc $i2,2($i2,$tbl) # Te4[s2>>16]
  383. sll $i1,8
  384. llgc $s2,2($s2,$tbl) # Te4[s2>>24]
  385. llgc $i3,2($i3,$tbl) # Te4[s2>>0]
  386. sll $i2,16
  387. nr $ra,$mask
  388. sll $s2,24
  389. or $s0,$i1
  390. or $s1,$i2
  391. or $s2,$t2
  392. or $t3,$i3
  393. srlg $i3,$s3,`16-3` # i2
  394. srl $s3,`24-3`
  395. nr $i3,$mask
  396. nr $s3,$mask
  397. l $t0,16($key)
  398. l $t2,20($key)
  399. llgc $i1,2($t1,$tbl) # Te4[s3>>0]
  400. llgc $i2,2($ra,$tbl) # Te4[s3>>8]
  401. llgc $i3,2($i3,$tbl) # Te4[s3>>16]
  402. llgc $s3,2($s3,$tbl) # Te4[s3>>24]
  403. sll $i2,8
  404. sll $i3,16
  405. sll $s3,24
  406. or $s0,$i1
  407. or $s1,$i2
  408. or $s2,$i3
  409. or $s3,$t3
  410. l${g} $ra,15*$SIZE_T($sp)
  411. xr $s0,$t0
  412. xr $s1,$t2
  413. x $s2,24($key)
  414. x $s3,28($key)
  415. br $ra
  416. .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
  417. ___
  418. $code.=<<___;
  419. .type AES_Td,\@object
  420. .align 256
  421. AES_Td:
  422. ___
  423. &_data_word(
  424. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  425. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  426. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  427. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  428. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  429. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  430. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  431. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  432. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  433. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  434. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  435. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  436. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  437. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  438. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  439. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  440. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  441. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  442. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  443. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  444. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  445. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  446. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  447. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  448. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  449. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  450. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  451. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  452. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  453. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  454. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  455. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  456. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  457. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  458. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  459. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  460. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  461. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  462. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  463. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  464. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  465. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  466. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  467. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  468. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  469. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  470. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  471. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  472. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  473. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  474. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  475. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  476. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  477. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  478. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  479. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  480. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  481. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  482. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  483. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  484. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  485. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  486. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  487. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  488. $code.=<<___;
  489. # Td4[256]
  490. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  491. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  492. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  493. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  494. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  495. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  496. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  497. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  498. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  499. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  500. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  501. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  502. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  503. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  504. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  505. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  506. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  507. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  508. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  509. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  510. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  511. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  512. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  513. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  514. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  515. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  516. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  517. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  518. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  519. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  520. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  521. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  522. .size AES_Td,.-AES_Td
  523. # void AES_decrypt(const unsigned char *inp, unsigned char *out,
  524. # const AES_KEY *key) {
  525. .globl AES_decrypt
  526. .type AES_decrypt,\@function
  527. AES_decrypt:
  528. ___
  529. $code.=<<___ if (!$softonly);
  530. l %r0,240($key)
  531. lhi %r1,16
  532. clr %r0,%r1
  533. jl .Ldsoft
  534. la %r1,0($key)
  535. #la %r2,0($inp)
  536. la %r4,0($out)
  537. lghi %r3,16 # single block length
  538. .long 0xb92e0042 # km %r4,%r2
  539. brc 1,.-4 # can this happen?
  540. br %r14
  541. .align 64
  542. .Ldsoft:
  543. ___
  544. $code.=<<___;
  545. stm${g} %r3,$ra,3*$SIZE_T($sp)
  546. llgf $s0,0($inp)
  547. llgf $s1,4($inp)
  548. llgf $s2,8($inp)
  549. llgf $s3,12($inp)
  550. larl $tbl,AES_Td
  551. bras $ra,_s390x_AES_decrypt
  552. l${g} $out,3*$SIZE_T($sp)
  553. st $s0,0($out)
  554. st $s1,4($out)
  555. st $s2,8($out)
  556. st $s3,12($out)
  557. lm${g} %r6,$ra,6*$SIZE_T($sp)
  558. br $ra
  559. .size AES_decrypt,.-AES_decrypt
  560. .type _s390x_AES_decrypt,\@function
  561. .align 16
  562. _s390x_AES_decrypt:
  563. st${g} $ra,15*$SIZE_T($sp)
  564. x $s0,0($key)
  565. x $s1,4($key)
  566. x $s2,8($key)
  567. x $s3,12($key)
  568. l $rounds,240($key)
  569. llill $mask,`0xff<<3`
  570. aghi $rounds,-1
  571. j .Ldec_loop
  572. .align 16
  573. .Ldec_loop:
  574. srlg $t1,$s0,`16-3`
  575. srlg $t2,$s0,`8-3`
  576. sllg $t3,$s0,`0+3`
  577. srl $s0,`24-3`
  578. nr $s0,$mask
  579. nr $t1,$mask
  580. nr $t2,$mask
  581. ngr $t3,$mask
  582. sllg $i1,$s1,`0+3` # i0
  583. srlg $i2,$s1,`16-3`
  584. srlg $i3,$s1,`8-3`
  585. srl $s1,`24-3`
  586. ngr $i1,$mask
  587. nr $s1,$mask
  588. nr $i2,$mask
  589. nr $i3,$mask
  590. l $s0,0($s0,$tbl) # Td0[s0>>24]
  591. l $t1,3($t1,$tbl) # Td1[s0>>16]
  592. l $t2,2($t2,$tbl) # Td2[s0>>8]
  593. l $t3,1($t3,$tbl) # Td3[s0>>0]
  594. x $s0,1($i1,$tbl) # Td3[s1>>0]
  595. l $s1,0($s1,$tbl) # Td0[s1>>24]
  596. x $t2,3($i2,$tbl) # Td1[s1>>16]
  597. x $t3,2($i3,$tbl) # Td2[s1>>8]
  598. srlg $i1,$s2,`8-3` # i0
  599. sllg $i2,$s2,`0+3` # i1
  600. srlg $i3,$s2,`16-3`
  601. srl $s2,`24-3`
  602. nr $i1,$mask
  603. ngr $i2,$mask
  604. nr $s2,$mask
  605. nr $i3,$mask
  606. xr $s1,$t1
  607. srlg $ra,$s3,`8-3` # i1
  608. srlg $t1,$s3,`16-3` # i0
  609. nr $ra,$mask
  610. la $key,16($key)
  611. nr $t1,$mask
  612. x $s0,2($i1,$tbl) # Td2[s2>>8]
  613. x $s1,1($i2,$tbl) # Td3[s2>>0]
  614. l $s2,0($s2,$tbl) # Td0[s2>>24]
  615. x $t3,3($i3,$tbl) # Td1[s2>>16]
  616. sllg $i3,$s3,`0+3` # i2
  617. srl $s3,`24-3`
  618. ngr $i3,$mask
  619. nr $s3,$mask
  620. xr $s2,$t2
  621. x $s0,0($key)
  622. x $s1,4($key)
  623. x $s2,8($key)
  624. x $t3,12($key)
  625. x $s0,3($t1,$tbl) # Td1[s3>>16]
  626. x $s1,2($ra,$tbl) # Td2[s3>>8]
  627. x $s2,1($i3,$tbl) # Td3[s3>>0]
  628. l $s3,0($s3,$tbl) # Td0[s3>>24]
  629. xr $s3,$t3
  630. brct $rounds,.Ldec_loop
  631. .align 16
  632. l $t1,`2048+0`($tbl) # prefetch Td4
  633. l $t2,`2048+64`($tbl)
  634. l $t3,`2048+128`($tbl)
  635. l $i1,`2048+192`($tbl)
  636. llill $mask,0xff
  637. srlg $i3,$s0,24 # i0
  638. srlg $t1,$s0,16
  639. srlg $t2,$s0,8
  640. nr $s0,$mask # i3
  641. nr $t1,$mask
  642. srlg $i1,$s1,24
  643. nr $t2,$mask
  644. srlg $i2,$s1,16
  645. srlg $ra,$s1,8
  646. nr $s1,$mask # i0
  647. nr $i2,$mask
  648. nr $ra,$mask
  649. llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
  650. llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
  651. llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
  652. sll $t1,16
  653. llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
  654. sllg $s0,$i3,24
  655. sll $t2,8
  656. llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
  657. llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
  658. llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
  659. sll $i1,24
  660. llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
  661. sll $i2,16
  662. sll $i3,8
  663. or $s0,$s1
  664. or $t1,$i1
  665. or $t2,$i2
  666. or $t3,$i3
  667. srlg $i1,$s2,8 # i0
  668. srlg $i2,$s2,24
  669. srlg $i3,$s2,16
  670. nr $s2,$mask # i1
  671. nr $i1,$mask
  672. nr $i3,$mask
  673. llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
  674. llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
  675. llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
  676. llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
  677. sll $i1,8
  678. sll $i2,24
  679. or $s0,$i1
  680. sll $i3,16
  681. or $t2,$i2
  682. or $t3,$i3
  683. srlg $i1,$s3,16 # i0
  684. srlg $i2,$s3,8 # i1
  685. srlg $i3,$s3,24
  686. nr $s3,$mask # i2
  687. nr $i1,$mask
  688. nr $i2,$mask
  689. l${g} $ra,15*$SIZE_T($sp)
  690. or $s1,$t1
  691. l $t0,16($key)
  692. l $t1,20($key)
  693. llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
  694. llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
  695. sll $i1,16
  696. llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
  697. llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
  698. sll $i2,8
  699. sll $s3,24
  700. or $s0,$i1
  701. or $s1,$i2
  702. or $s2,$t2
  703. or $s3,$t3
  704. xr $s0,$t0
  705. xr $s1,$t1
  706. x $s2,24($key)
  707. x $s3,28($key)
  708. br $ra
  709. .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
  710. ___
  711. $code.=<<___;
  712. # void AES_set_encrypt_key(const unsigned char *in, int bits,
  713. # AES_KEY *key) {
  714. .globl AES_set_encrypt_key
  715. .type AES_set_encrypt_key,\@function
  716. .align 16
  717. AES_set_encrypt_key:
  718. _s390x_AES_set_encrypt_key:
  719. lghi $t0,0
  720. cl${g}r $inp,$t0
  721. je .Lminus1
  722. cl${g}r $key,$t0
  723. je .Lminus1
  724. lghi $t0,128
  725. clr $bits,$t0
  726. je .Lproceed
  727. lghi $t0,192
  728. clr $bits,$t0
  729. je .Lproceed
  730. lghi $t0,256
  731. clr $bits,$t0
  732. je .Lproceed
  733. lghi %r2,-2
  734. br %r14
  735. .align 16
  736. .Lproceed:
  737. ___
  738. $code.=<<___ if (!$softonly);
  739. # convert bits to km(c) code, [128,192,256]->[18,19,20]
  740. lhi %r5,-128
  741. lhi %r0,18
  742. ar %r5,$bits
  743. srl %r5,6
  744. ar %r5,%r0
  745. larl %r1,OPENSSL_s390xcap_P
  746. llihh %r0,0x8000
  747. srlg %r0,%r0,0(%r5)
  748. ng %r0,S390X_KM(%r1) # check availability of both km...
  749. ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
  750. jz .Lekey_internal
  751. lmg %r0,%r1,0($inp) # just copy 128 bits...
  752. stmg %r0,%r1,0($key)
  753. lhi %r0,192
  754. cr $bits,%r0
  755. jl 1f
  756. lg %r1,16($inp)
  757. stg %r1,16($key)
  758. je 1f
  759. lg %r1,24($inp)
  760. stg %r1,24($key)
  761. 1: st $bits,236($key) # save bits [for debugging purposes]
  762. lgr $t0,%r5
  763. st %r5,240($key) # save km(c) code
  764. lghi %r2,0
  765. br %r14
  766. ___
  767. $code.=<<___;
  768. .align 16
  769. .Lekey_internal:
  770. stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
  771. larl $tbl,AES_Te+2048
  772. llgf $s0,0($inp)
  773. llgf $s1,4($inp)
  774. llgf $s2,8($inp)
  775. llgf $s3,12($inp)
  776. st $s0,0($key)
  777. st $s1,4($key)
  778. st $s2,8($key)
  779. st $s3,12($key)
  780. lghi $t0,128
  781. cr $bits,$t0
  782. jne .Lnot128
  783. llill $mask,0xff
  784. lghi $t3,0 # i=0
  785. lghi $rounds,10
  786. st $rounds,240($key)
  787. llgfr $t2,$s3 # temp=rk[3]
  788. srlg $i1,$s3,8
  789. srlg $i2,$s3,16
  790. srlg $i3,$s3,24
  791. nr $t2,$mask
  792. nr $i1,$mask
  793. nr $i2,$mask
  794. .align 16
  795. .L128_loop:
  796. la $t2,0($t2,$tbl)
  797. la $i1,0($i1,$tbl)
  798. la $i2,0($i2,$tbl)
  799. la $i3,0($i3,$tbl)
  800. icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
  801. icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
  802. icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
  803. icm $t2,1,0($i3) # Te4[rk[3]>>24]
  804. x $t2,256($t3,$tbl) # rcon[i]
  805. xr $s0,$t2 # rk[4]=rk[0]^...
  806. xr $s1,$s0 # rk[5]=rk[1]^rk[4]
  807. xr $s2,$s1 # rk[6]=rk[2]^rk[5]
  808. xr $s3,$s2 # rk[7]=rk[3]^rk[6]
  809. llgfr $t2,$s3 # temp=rk[3]
  810. srlg $i1,$s3,8
  811. srlg $i2,$s3,16
  812. nr $t2,$mask
  813. nr $i1,$mask
  814. srlg $i3,$s3,24
  815. nr $i2,$mask
  816. st $s0,16($key)
  817. st $s1,20($key)
  818. st $s2,24($key)
  819. st $s3,28($key)
  820. la $key,16($key) # key+=4
  821. la $t3,4($t3) # i++
  822. brct $rounds,.L128_loop
  823. lghi $t0,10
  824. lghi %r2,0
  825. lm${g} %r4,%r13,4*$SIZE_T($sp)
  826. br $ra
  827. .align 16
  828. .Lnot128:
  829. llgf $t0,16($inp)
  830. llgf $t1,20($inp)
  831. st $t0,16($key)
  832. st $t1,20($key)
  833. lghi $t0,192
  834. cr $bits,$t0
  835. jne .Lnot192
  836. llill $mask,0xff
  837. lghi $t3,0 # i=0
  838. lghi $rounds,12
  839. st $rounds,240($key)
  840. lghi $rounds,8
  841. srlg $i1,$t1,8
  842. srlg $i2,$t1,16
  843. srlg $i3,$t1,24
  844. nr $t1,$mask
  845. nr $i1,$mask
  846. nr $i2,$mask
  847. .align 16
  848. .L192_loop:
  849. la $t1,0($t1,$tbl)
  850. la $i1,0($i1,$tbl)
  851. la $i2,0($i2,$tbl)
  852. la $i3,0($i3,$tbl)
  853. icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
  854. icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
  855. icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
  856. icm $t1,1,0($i3) # Te4[rk[5]>>24]
  857. x $t1,256($t3,$tbl) # rcon[i]
  858. xr $s0,$t1 # rk[6]=rk[0]^...
  859. xr $s1,$s0 # rk[7]=rk[1]^rk[6]
  860. xr $s2,$s1 # rk[8]=rk[2]^rk[7]
  861. xr $s3,$s2 # rk[9]=rk[3]^rk[8]
  862. st $s0,24($key)
  863. st $s1,28($key)
  864. st $s2,32($key)
  865. st $s3,36($key)
  866. brct $rounds,.L192_continue
  867. lghi $t0,12
  868. lghi %r2,0
  869. lm${g} %r4,%r13,4*$SIZE_T($sp)
  870. br $ra
  871. .align 16
  872. .L192_continue:
  873. lgr $t1,$s3
  874. x $t1,16($key) # rk[10]=rk[4]^rk[9]
  875. st $t1,40($key)
  876. x $t1,20($key) # rk[11]=rk[5]^rk[10]
  877. st $t1,44($key)
  878. srlg $i1,$t1,8
  879. srlg $i2,$t1,16
  880. srlg $i3,$t1,24
  881. nr $t1,$mask
  882. nr $i1,$mask
  883. nr $i2,$mask
  884. la $key,24($key) # key+=6
  885. la $t3,4($t3) # i++
  886. j .L192_loop
  887. .align 16
  888. .Lnot192:
  889. llgf $t0,24($inp)
  890. llgf $t1,28($inp)
  891. st $t0,24($key)
  892. st $t1,28($key)
  893. llill $mask,0xff
  894. lghi $t3,0 # i=0
  895. lghi $rounds,14
  896. st $rounds,240($key)
  897. lghi $rounds,7
  898. srlg $i1,$t1,8
  899. srlg $i2,$t1,16
  900. srlg $i3,$t1,24
  901. nr $t1,$mask
  902. nr $i1,$mask
  903. nr $i2,$mask
  904. .align 16
  905. .L256_loop:
  906. la $t1,0($t1,$tbl)
  907. la $i1,0($i1,$tbl)
  908. la $i2,0($i2,$tbl)
  909. la $i3,0($i3,$tbl)
  910. icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
  911. icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
  912. icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
  913. icm $t1,1,0($i3) # Te4[rk[7]>>24]
  914. x $t1,256($t3,$tbl) # rcon[i]
  915. xr $s0,$t1 # rk[8]=rk[0]^...
  916. xr $s1,$s0 # rk[9]=rk[1]^rk[8]
  917. xr $s2,$s1 # rk[10]=rk[2]^rk[9]
  918. xr $s3,$s2 # rk[11]=rk[3]^rk[10]
  919. st $s0,32($key)
  920. st $s1,36($key)
  921. st $s2,40($key)
  922. st $s3,44($key)
  923. brct $rounds,.L256_continue
  924. lghi $t0,14
  925. lghi %r2,0
  926. lm${g} %r4,%r13,4*$SIZE_T($sp)
  927. br $ra
  928. .align 16
  929. .L256_continue:
  930. lgr $t1,$s3 # temp=rk[11]
  931. srlg $i1,$s3,8
  932. srlg $i2,$s3,16
  933. srlg $i3,$s3,24
  934. nr $t1,$mask
  935. nr $i1,$mask
  936. nr $i2,$mask
  937. la $t1,0($t1,$tbl)
  938. la $i1,0($i1,$tbl)
  939. la $i2,0($i2,$tbl)
  940. la $i3,0($i3,$tbl)
  941. llgc $t1,0($t1) # Te4[rk[11]>>0]
  942. icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
  943. icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
  944. icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
  945. x $t1,16($key) # rk[12]=rk[4]^...
  946. st $t1,48($key)
  947. x $t1,20($key) # rk[13]=rk[5]^rk[12]
  948. st $t1,52($key)
  949. x $t1,24($key) # rk[14]=rk[6]^rk[13]
  950. st $t1,56($key)
  951. x $t1,28($key) # rk[15]=rk[7]^rk[14]
  952. st $t1,60($key)
  953. srlg $i1,$t1,8
  954. srlg $i2,$t1,16
  955. srlg $i3,$t1,24
  956. nr $t1,$mask
  957. nr $i1,$mask
  958. nr $i2,$mask
  959. la $key,32($key) # key+=8
  960. la $t3,4($t3) # i++
  961. j .L256_loop
  962. .Lminus1:
  963. lghi %r2,-1
  964. br $ra
  965. .size AES_set_encrypt_key,.-AES_set_encrypt_key
  966. # void AES_set_decrypt_key(const unsigned char *in, int bits,
  967. # AES_KEY *key) {
  968. .globl AES_set_decrypt_key
  969. .type AES_set_decrypt_key,\@function
  970. .align 16
  971. AES_set_decrypt_key:
  972. #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
  973. st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
  974. bras $ra,_s390x_AES_set_encrypt_key
  975. #l${g} $key,4*$SIZE_T($sp)
  976. l${g} $ra,14*$SIZE_T($sp)
  977. ltgr %r2,%r2
  978. bnzr $ra
  979. ___
  980. $code.=<<___ if (!$softonly);
  981. #l $t0,240($key)
  982. lhi $t1,16
  983. cr $t0,$t1
  984. jl .Lgo
  985. oill $t0,S390X_DECRYPT # set "decrypt" bit
  986. st $t0,240($key)
  987. br $ra
  988. ___
  989. $code.=<<___;
  990. .align 16
  991. .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
  992. la $i1,0($key)
  993. sllg $i2,$rounds,4
  994. la $i2,0($i2,$key)
  995. srl $rounds,1
  996. lghi $t1,-16
  997. .align 16
  998. .Linv: lmg $s0,$s1,0($i1)
  999. lmg $s2,$s3,0($i2)
  1000. stmg $s0,$s1,0($i2)
  1001. stmg $s2,$s3,0($i1)
  1002. la $i1,16($i1)
  1003. la $i2,0($t1,$i2)
  1004. brct $rounds,.Linv
  1005. ___
  1006. $mask80=$i1;
  1007. $mask1b=$i2;
  1008. $maskfe=$i3;
  1009. $code.=<<___;
  1010. llgf $rounds,240($key)
  1011. aghi $rounds,-1
  1012. sll $rounds,2 # (rounds-1)*4
  1013. llilh $mask80,0x8080
  1014. llilh $mask1b,0x1b1b
  1015. llilh $maskfe,0xfefe
  1016. oill $mask80,0x8080
  1017. oill $mask1b,0x1b1b
  1018. oill $maskfe,0xfefe
  1019. .align 16
  1020. .Lmix: l $s0,16($key) # tp1
  1021. lr $s1,$s0
  1022. ngr $s1,$mask80
  1023. srlg $t1,$s1,7
  1024. slr $s1,$t1
  1025. nr $s1,$mask1b
  1026. sllg $t1,$s0,1
  1027. nr $t1,$maskfe
  1028. xr $s1,$t1 # tp2
  1029. lr $s2,$s1
  1030. ngr $s2,$mask80
  1031. srlg $t1,$s2,7
  1032. slr $s2,$t1
  1033. nr $s2,$mask1b
  1034. sllg $t1,$s1,1
  1035. nr $t1,$maskfe
  1036. xr $s2,$t1 # tp4
  1037. lr $s3,$s2
  1038. ngr $s3,$mask80
  1039. srlg $t1,$s3,7
  1040. slr $s3,$t1
  1041. nr $s3,$mask1b
  1042. sllg $t1,$s2,1
  1043. nr $t1,$maskfe
  1044. xr $s3,$t1 # tp8
  1045. xr $s1,$s0 # tp2^tp1
  1046. xr $s2,$s0 # tp4^tp1
  1047. rll $s0,$s0,24 # = ROTATE(tp1,8)
  1048. xr $s2,$s3 # ^=tp8
  1049. xr $s0,$s1 # ^=tp2^tp1
  1050. xr $s1,$s3 # tp2^tp1^tp8
  1051. xr $s0,$s2 # ^=tp4^tp1^tp8
  1052. rll $s1,$s1,8
  1053. rll $s2,$s2,16
  1054. xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
  1055. rll $s3,$s3,24
  1056. xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
  1057. xr $s0,$s3 # ^= ROTATE(tp8,8)
  1058. st $s0,16($key)
  1059. la $key,4($key)
  1060. brct $rounds,.Lmix
  1061. lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
  1062. lghi %r2,0
  1063. br $ra
  1064. .size AES_set_decrypt_key,.-AES_set_decrypt_key
  1065. ___
  1066. ########################################################################
  1067. # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
  1068. # size_t length, const AES_KEY *key,
  1069. # unsigned char *ivec, const int enc)
  1070. {
  1071. my $inp="%r2";
  1072. my $out="%r4"; # length and out are swapped
  1073. my $len="%r3";
  1074. my $key="%r5";
  1075. my $ivp="%r6";
  1076. $code.=<<___;
  1077. .globl AES_cbc_encrypt
  1078. .type AES_cbc_encrypt,\@function
  1079. .align 16
  1080. AES_cbc_encrypt:
  1081. xgr %r3,%r4 # flip %r3 and %r4, out and len
  1082. xgr %r4,%r3
  1083. xgr %r3,%r4
  1084. ___
  1085. $code.=<<___ if (!$softonly);
  1086. lhi %r0,16
  1087. cl %r0,240($key)
  1088. jh .Lcbc_software
  1089. lg %r0,0($ivp) # copy ivec
  1090. lg %r1,8($ivp)
  1091. stmg %r0,%r1,16($sp)
  1092. lmg %r0,%r1,0($key) # copy key, cover 256 bit
  1093. stmg %r0,%r1,32($sp)
  1094. lmg %r0,%r1,16($key)
  1095. stmg %r0,%r1,48($sp)
  1096. l %r0,240($key) # load kmc code
  1097. lghi $key,15 # res=len%16, len-=res;
  1098. ngr $key,$len
  1099. sl${g}r $len,$key
  1100. la %r1,16($sp) # parameter block - ivec || key
  1101. jz .Lkmc_truncated
  1102. .long 0xb92f0042 # kmc %r4,%r2
  1103. brc 1,.-4 # pay attention to "partial completion"
  1104. ltr $key,$key
  1105. jnz .Lkmc_truncated
  1106. .Lkmc_done:
  1107. lmg %r0,%r1,16($sp) # copy ivec to caller
  1108. stg %r0,0($ivp)
  1109. stg %r1,8($ivp)
  1110. br $ra
  1111. .align 16
  1112. .Lkmc_truncated:
  1113. ahi $key,-1 # it's the way it's encoded in mvc
  1114. tmll %r0,S390X_DECRYPT
  1115. jnz .Lkmc_truncated_dec
  1116. lghi %r1,0
  1117. stg %r1,16*$SIZE_T($sp)
  1118. stg %r1,16*$SIZE_T+8($sp)
  1119. bras %r1,1f
  1120. mvc 16*$SIZE_T(1,$sp),0($inp)
  1121. 1: ex $key,0(%r1)
  1122. la %r1,16($sp) # restore parameter block
  1123. la $inp,16*$SIZE_T($sp)
  1124. lghi $len,16
  1125. .long 0xb92f0042 # kmc %r4,%r2
  1126. j .Lkmc_done
  1127. .align 16
  1128. .Lkmc_truncated_dec:
  1129. st${g} $out,4*$SIZE_T($sp)
  1130. la $out,16*$SIZE_T($sp)
  1131. lghi $len,16
  1132. .long 0xb92f0042 # kmc %r4,%r2
  1133. l${g} $out,4*$SIZE_T($sp)
  1134. bras %r1,2f
  1135. mvc 0(1,$out),16*$SIZE_T($sp)
  1136. 2: ex $key,0(%r1)
  1137. j .Lkmc_done
  1138. .align 16
  1139. .Lcbc_software:
  1140. ___
  1141. $code.=<<___;
  1142. stm${g} $key,$ra,5*$SIZE_T($sp)
  1143. lhi %r0,0
  1144. cl %r0,`$stdframe+$SIZE_T-4`($sp)
  1145. je .Lcbc_decrypt
  1146. larl $tbl,AES_Te
  1147. llgf $s0,0($ivp)
  1148. llgf $s1,4($ivp)
  1149. llgf $s2,8($ivp)
  1150. llgf $s3,12($ivp)
  1151. lghi $t0,16
  1152. sl${g}r $len,$t0
  1153. brc 4,.Lcbc_enc_tail # if borrow
  1154. .Lcbc_enc_loop:
  1155. stm${g} $inp,$out,2*$SIZE_T($sp)
  1156. x $s0,0($inp)
  1157. x $s1,4($inp)
  1158. x $s2,8($inp)
  1159. x $s3,12($inp)
  1160. lgr %r4,$key
  1161. bras $ra,_s390x_AES_encrypt
  1162. lm${g} $inp,$key,2*$SIZE_T($sp)
  1163. st $s0,0($out)
  1164. st $s1,4($out)
  1165. st $s2,8($out)
  1166. st $s3,12($out)
  1167. la $inp,16($inp)
  1168. la $out,16($out)
  1169. lghi $t0,16
  1170. lt${g}r $len,$len
  1171. jz .Lcbc_enc_done
  1172. sl${g}r $len,$t0
  1173. brc 4,.Lcbc_enc_tail # if borrow
  1174. j .Lcbc_enc_loop
  1175. .align 16
  1176. .Lcbc_enc_done:
  1177. l${g} $ivp,6*$SIZE_T($sp)
  1178. st $s0,0($ivp)
  1179. st $s1,4($ivp)
  1180. st $s2,8($ivp)
  1181. st $s3,12($ivp)
  1182. lm${g} %r7,$ra,7*$SIZE_T($sp)
  1183. br $ra
  1184. .align 16
  1185. .Lcbc_enc_tail:
  1186. aghi $len,15
  1187. lghi $t0,0
  1188. stg $t0,16*$SIZE_T($sp)
  1189. stg $t0,16*$SIZE_T+8($sp)
  1190. bras $t1,3f
  1191. mvc 16*$SIZE_T(1,$sp),0($inp)
  1192. 3: ex $len,0($t1)
  1193. lghi $len,0
  1194. la $inp,16*$SIZE_T($sp)
  1195. j .Lcbc_enc_loop
  1196. .align 16
  1197. .Lcbc_decrypt:
  1198. larl $tbl,AES_Td
  1199. lg $t0,0($ivp)
  1200. lg $t1,8($ivp)
  1201. stmg $t0,$t1,16*$SIZE_T($sp)
  1202. .Lcbc_dec_loop:
  1203. stm${g} $inp,$out,2*$SIZE_T($sp)
  1204. llgf $s0,0($inp)
  1205. llgf $s1,4($inp)
  1206. llgf $s2,8($inp)
  1207. llgf $s3,12($inp)
  1208. lgr %r4,$key
  1209. bras $ra,_s390x_AES_decrypt
  1210. lm${g} $inp,$key,2*$SIZE_T($sp)
  1211. sllg $s0,$s0,32
  1212. sllg $s2,$s2,32
  1213. lr $s0,$s1
  1214. lr $s2,$s3
  1215. lg $t0,0($inp)
  1216. lg $t1,8($inp)
  1217. xg $s0,16*$SIZE_T($sp)
  1218. xg $s2,16*$SIZE_T+8($sp)
  1219. lghi $s1,16
  1220. sl${g}r $len,$s1
  1221. brc 4,.Lcbc_dec_tail # if borrow
  1222. brc 2,.Lcbc_dec_done # if zero
  1223. stg $s0,0($out)
  1224. stg $s2,8($out)
  1225. stmg $t0,$t1,16*$SIZE_T($sp)
  1226. la $inp,16($inp)
  1227. la $out,16($out)
  1228. j .Lcbc_dec_loop
  1229. .Lcbc_dec_done:
  1230. stg $s0,0($out)
  1231. stg $s2,8($out)
  1232. .Lcbc_dec_exit:
  1233. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1234. stmg $t0,$t1,0($ivp)
  1235. br $ra
  1236. .align 16
  1237. .Lcbc_dec_tail:
  1238. aghi $len,15
  1239. stg $s0,16*$SIZE_T($sp)
  1240. stg $s2,16*$SIZE_T+8($sp)
  1241. bras $s1,4f
  1242. mvc 0(1,$out),16*$SIZE_T($sp)
  1243. 4: ex $len,0($s1)
  1244. j .Lcbc_dec_exit
  1245. .size AES_cbc_encrypt,.-AES_cbc_encrypt
  1246. ___
  1247. }
  1248. ########################################################################
  1249. # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
  1250. # size_t blocks, const AES_KEY *key,
  1251. # const unsigned char *ivec)
  1252. {
  1253. my $inp="%r2";
  1254. my $out="%r4"; # blocks and out are swapped
  1255. my $len="%r3";
  1256. my $key="%r5"; my $iv0="%r5";
  1257. my $ivp="%r6";
  1258. my $fp ="%r7";
  1259. $code.=<<___;
  1260. .globl AES_ctr32_encrypt
  1261. .type AES_ctr32_encrypt,\@function
  1262. .align 16
  1263. AES_ctr32_encrypt:
  1264. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1265. xgr %r4,%r3
  1266. xgr %r3,%r4
  1267. llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
  1268. ___
  1269. $code.=<<___ if (!$softonly);
  1270. l %r0,240($key)
  1271. lhi %r1,16
  1272. clr %r0,%r1
  1273. jl .Lctr32_software
  1274. st${g} $s2,10*$SIZE_T($sp)
  1275. st${g} $s3,11*$SIZE_T($sp)
  1276. clr $len,%r1 # does work even in 64-bit mode
  1277. jle .Lctr32_nokma # kma is slower for <= 16 blocks
  1278. larl %r1,OPENSSL_s390xcap_P
  1279. lr $s2,%r0
  1280. llihh $s3,0x8000
  1281. srlg $s3,$s3,0($s2)
  1282. ng $s3,S390X_KMA(%r1) # check kma capability vector
  1283. jz .Lctr32_nokma
  1284. l${g}hi %r1,-$stdframe-112
  1285. l${g}r $s3,$sp
  1286. la $sp,0(%r1,$sp) # prepare parameter block
  1287. lhi %r1,0x0600
  1288. sllg $len,$len,4
  1289. or %r0,%r1 # set HS and LAAD flags
  1290. st${g} $s3,0($sp) # backchain
  1291. la %r1,$stdframe($sp)
  1292. lmg $s2,$s3,0($key) # copy key
  1293. stg $s2,$stdframe+80($sp)
  1294. stg $s3,$stdframe+88($sp)
  1295. lmg $s2,$s3,16($key)
  1296. stg $s2,$stdframe+96($sp)
  1297. stg $s3,$stdframe+104($sp)
  1298. lmg $s2,$s3,0($ivp) # copy iv
  1299. stg $s2,$stdframe+64($sp)
  1300. ahi $s3,-1 # kma requires counter-1
  1301. stg $s3,$stdframe+72($sp)
  1302. st $s3,$stdframe+12($sp) # copy counter
  1303. lghi $s2,0 # no AAD
  1304. lghi $s3,0
  1305. .long 0xb929a042 # kma $out,$s2,$inp
  1306. brc 1,.-4 # pay attention to "partial completion"
  1307. stg %r0,$stdframe+80($sp) # wipe key
  1308. stg %r0,$stdframe+88($sp)
  1309. stg %r0,$stdframe+96($sp)
  1310. stg %r0,$stdframe+104($sp)
  1311. la $sp,$stdframe+112($sp)
  1312. lm${g} $s2,$s3,10*$SIZE_T($sp)
  1313. br $ra
  1314. .align 16
  1315. .Lctr32_nokma:
  1316. stm${g} %r6,$s1,6*$SIZE_T($sp)
  1317. slgr $out,$inp
  1318. la %r1,0($key) # %r1 is permanent copy of $key
  1319. lg $iv0,0($ivp) # load ivec
  1320. lg $ivp,8($ivp)
  1321. # prepare and allocate stack frame at the top of 4K page
  1322. # with 1K reserved for eventual signal handling
  1323. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1324. lghi $s1,-4096
  1325. algr $s0,$sp
  1326. lgr $fp,$sp
  1327. ngr $s0,$s1 # align at page boundary
  1328. slgr $fp,$s0 # total buffer size
  1329. lgr $s2,$sp
  1330. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1331. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1332. # buffer size is at lest 256 and at most 3072+256-16
  1333. la $sp,1024($s0) # alloca
  1334. srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
  1335. st${g} $s2,0($sp) # back-chain
  1336. st${g} $fp,$SIZE_T($sp)
  1337. slgr $len,$fp
  1338. brc 1,.Lctr32_hw_switch # not zero, no borrow
  1339. algr $fp,$len # input is shorter than allocated buffer
  1340. lghi $len,0
  1341. st${g} $fp,$SIZE_T($sp)
  1342. .Lctr32_hw_switch:
  1343. ___
  1344. $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
  1345. llgfr $s0,%r0
  1346. lgr $s1,%r1
  1347. larl %r1,OPENSSL_s390xcap_P
  1348. llihh %r0,0x8000 # check if kmctr supports the function code
  1349. srlg %r0,%r0,0($s0)
  1350. ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
  1351. lgr %r0,$s0
  1352. lgr %r1,$s1
  1353. jz .Lctr32_km_loop
  1354. ####### kmctr code
  1355. algr $out,$inp # restore $out
  1356. lgr $s1,$len # $s1 undertakes $len
  1357. j .Lctr32_kmctr_loop
  1358. .align 16
  1359. .Lctr32_kmctr_loop:
  1360. la $s2,16($sp)
  1361. lgr $s3,$fp
  1362. .Lctr32_kmctr_prepare:
  1363. stg $iv0,0($s2)
  1364. stg $ivp,8($s2)
  1365. la $s2,16($s2)
  1366. ahi $ivp,1 # 32-bit increment, preserves upper half
  1367. brct $s3,.Lctr32_kmctr_prepare
  1368. #la $inp,0($inp) # inp
  1369. sllg $len,$fp,4 # len
  1370. #la $out,0($out) # out
  1371. la $s2,16($sp) # iv
  1372. .long 0xb92da042 # kmctr $out,$s2,$inp
  1373. brc 1,.-4 # pay attention to "partial completion"
  1374. slgr $s1,$fp
  1375. brc 1,.Lctr32_kmctr_loop # not zero, no borrow
  1376. algr $fp,$s1
  1377. lghi $s1,0
  1378. brc 4+1,.Lctr32_kmctr_loop # not zero
  1379. l${g} $sp,0($sp)
  1380. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1381. br $ra
  1382. .align 16
  1383. ___
  1384. $code.=<<___ if (!$softonly);
  1385. .Lctr32_km_loop:
  1386. la $s2,16($sp)
  1387. lgr $s3,$fp
  1388. .Lctr32_km_prepare:
  1389. stg $iv0,0($s2)
  1390. stg $ivp,8($s2)
  1391. la $s2,16($s2)
  1392. ahi $ivp,1 # 32-bit increment, preserves upper half
  1393. brct $s3,.Lctr32_km_prepare
  1394. la $s0,16($sp) # inp
  1395. sllg $s1,$fp,4 # len
  1396. la $s2,16($sp) # out
  1397. .long 0xb92e00a8 # km %r10,%r8
  1398. brc 1,.-4 # pay attention to "partial completion"
  1399. la $s2,16($sp)
  1400. lgr $s3,$fp
  1401. slgr $s2,$inp
  1402. .Lctr32_km_xor:
  1403. lg $s0,0($inp)
  1404. lg $s1,8($inp)
  1405. xg $s0,0($s2,$inp)
  1406. xg $s1,8($s2,$inp)
  1407. stg $s0,0($out,$inp)
  1408. stg $s1,8($out,$inp)
  1409. la $inp,16($inp)
  1410. brct $s3,.Lctr32_km_xor
  1411. slgr $len,$fp
  1412. brc 1,.Lctr32_km_loop # not zero, no borrow
  1413. algr $fp,$len
  1414. lghi $len,0
  1415. brc 4+1,.Lctr32_km_loop # not zero
  1416. l${g} $s0,0($sp)
  1417. l${g} $s1,$SIZE_T($sp)
  1418. la $s2,16($sp)
  1419. .Lctr32_km_zap:
  1420. stg $s0,0($s2)
  1421. stg $s0,8($s2)
  1422. la $s2,16($s2)
  1423. brct $s1,.Lctr32_km_zap
  1424. la $sp,0($s0)
  1425. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1426. br $ra
  1427. .align 16
  1428. .Lctr32_software:
  1429. ___
  1430. $code.=<<___;
  1431. stm${g} $key,$ra,5*$SIZE_T($sp)
  1432. sl${g}r $inp,$out
  1433. larl $tbl,AES_Te
  1434. llgf $t1,12($ivp)
  1435. .Lctr32_loop:
  1436. stm${g} $inp,$out,2*$SIZE_T($sp)
  1437. llgf $s0,0($ivp)
  1438. llgf $s1,4($ivp)
  1439. llgf $s2,8($ivp)
  1440. lgr $s3,$t1
  1441. st $t1,16*$SIZE_T($sp)
  1442. lgr %r4,$key
  1443. bras $ra,_s390x_AES_encrypt
  1444. lm${g} $inp,$ivp,2*$SIZE_T($sp)
  1445. llgf $t1,16*$SIZE_T($sp)
  1446. x $s0,0($inp,$out)
  1447. x $s1,4($inp,$out)
  1448. x $s2,8($inp,$out)
  1449. x $s3,12($inp,$out)
  1450. stm $s0,$s3,0($out)
  1451. la $out,16($out)
  1452. ahi $t1,1 # 32-bit increment
  1453. brct $len,.Lctr32_loop
  1454. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1455. br $ra
  1456. .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
  1457. ___
  1458. }
  1459. ########################################################################
  1460. # void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
  1461. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1462. # const unsigned char iv[16]);
  1463. #
  1464. {
  1465. my $inp="%r2";
  1466. my $out="%r4"; # len and out are swapped
  1467. my $len="%r3";
  1468. my $key1="%r5"; # $i1
  1469. my $key2="%r6"; # $i2
  1470. my $fp="%r7"; # $i3
  1471. my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
  1472. $code.=<<___;
  1473. .type _s390x_xts_km,\@function
  1474. .align 16
  1475. _s390x_xts_km:
  1476. ___
  1477. $code.=<<___ if(1);
  1478. llgfr $s0,%r0 # put aside the function code
  1479. lghi $s1,0x7f
  1480. nr $s1,%r0
  1481. larl %r1,OPENSSL_s390xcap_P
  1482. llihh %r0,0x8000
  1483. srlg %r0,%r0,32($s1) # check for 32+function code
  1484. ng %r0,S390X_KM(%r1) # check km capability vector
  1485. lgr %r0,$s0 # restore the function code
  1486. la %r1,0($key1) # restore $key1
  1487. jz .Lxts_km_vanilla
  1488. lmg $i2,$i3,$tweak($sp) # put aside the tweak value
  1489. algr $out,$inp
  1490. oill %r0,32 # switch to xts function code
  1491. aghi $s1,-18 #
  1492. sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
  1493. la %r1,$tweak-16($sp)
  1494. slgr %r1,$s1 # parameter block position
  1495. lmg $s0,$s3,0($key1) # load 256 bits of key material,
  1496. stmg $s0,$s3,0(%r1) # and copy it to parameter block.
  1497. # yes, it contains junk and overlaps
  1498. # with the tweak in 128-bit case.
  1499. # it's done to avoid conditional
  1500. # branch.
  1501. stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
  1502. .long 0xb92e0042 # km %r4,%r2
  1503. brc 1,.-4 # pay attention to "partial completion"
  1504. lrvg $s0,$tweak+0($sp) # load the last tweak
  1505. lrvg $s1,$tweak+8($sp)
  1506. stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
  1507. nill %r0,0xffdf # switch back to original function code
  1508. la %r1,0($key1) # restore pointer to $key1
  1509. slgr $out,$inp
  1510. llgc $len,2*$SIZE_T-1($sp)
  1511. nill $len,0x0f # $len%=16
  1512. br $ra
  1513. .align 16
  1514. .Lxts_km_vanilla:
  1515. ___
  1516. $code.=<<___;
  1517. # prepare and allocate stack frame at the top of 4K page
  1518. # with 1K reserved for eventual signal handling
  1519. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1520. lghi $s1,-4096
  1521. algr $s0,$sp
  1522. lgr $fp,$sp
  1523. ngr $s0,$s1 # align at page boundary
  1524. slgr $fp,$s0 # total buffer size
  1525. lgr $s2,$sp
  1526. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1527. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1528. # buffer size is at lest 256 and at most 3072+256-16
  1529. la $sp,1024($s0) # alloca
  1530. nill $fp,0xfff0 # round to 16*n
  1531. st${g} $s2,0($sp) # back-chain
  1532. nill $len,0xfff0 # redundant
  1533. st${g} $fp,$SIZE_T($sp)
  1534. slgr $len,$fp
  1535. brc 1,.Lxts_km_go # not zero, no borrow
  1536. algr $fp,$len # input is shorter than allocated buffer
  1537. lghi $len,0
  1538. st${g} $fp,$SIZE_T($sp)
  1539. .Lxts_km_go:
  1540. lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
  1541. lrvg $s1,$tweak+8($s2)
  1542. la $s2,16($sp) # vector of ascending tweak values
  1543. slgr $s2,$inp
  1544. srlg $s3,$fp,4
  1545. j .Lxts_km_start
  1546. .Lxts_km_loop:
  1547. la $s2,16($sp)
  1548. slgr $s2,$inp
  1549. srlg $s3,$fp,4
  1550. .Lxts_km_prepare:
  1551. lghi $i1,0x87
  1552. srag $i2,$s1,63 # broadcast upper bit
  1553. ngr $i1,$i2 # rem
  1554. algr $s0,$s0
  1555. alcgr $s1,$s1
  1556. xgr $s0,$i1
  1557. .Lxts_km_start:
  1558. lrvgr $i1,$s0 # flip byte order
  1559. lrvgr $i2,$s1
  1560. stg $i1,0($s2,$inp)
  1561. stg $i2,8($s2,$inp)
  1562. xg $i1,0($inp)
  1563. xg $i2,8($inp)
  1564. stg $i1,0($out,$inp)
  1565. stg $i2,8($out,$inp)
  1566. la $inp,16($inp)
  1567. brct $s3,.Lxts_km_prepare
  1568. slgr $inp,$fp # rewind $inp
  1569. la $s2,0($out,$inp)
  1570. lgr $s3,$fp
  1571. .long 0xb92e00aa # km $s2,$s2
  1572. brc 1,.-4 # pay attention to "partial completion"
  1573. la $s2,16($sp)
  1574. slgr $s2,$inp
  1575. srlg $s3,$fp,4
  1576. .Lxts_km_xor:
  1577. lg $i1,0($out,$inp)
  1578. lg $i2,8($out,$inp)
  1579. xg $i1,0($s2,$inp)
  1580. xg $i2,8($s2,$inp)
  1581. stg $i1,0($out,$inp)
  1582. stg $i2,8($out,$inp)
  1583. la $inp,16($inp)
  1584. brct $s3,.Lxts_km_xor
  1585. slgr $len,$fp
  1586. brc 1,.Lxts_km_loop # not zero, no borrow
  1587. algr $fp,$len
  1588. lghi $len,0
  1589. brc 4+1,.Lxts_km_loop # not zero
  1590. l${g} $i1,0($sp) # back-chain
  1591. llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
  1592. la $i2,16($sp)
  1593. srlg $fp,$fp,4
  1594. .Lxts_km_zap:
  1595. stg $i1,0($i2)
  1596. stg $i1,8($i2)
  1597. la $i2,16($i2)
  1598. brct $fp,.Lxts_km_zap
  1599. la $sp,0($i1)
  1600. llgc $len,2*$SIZE_T-1($i1)
  1601. nill $len,0x0f # $len%=16
  1602. bzr $ra
  1603. # generate one more tweak...
  1604. lghi $i1,0x87
  1605. srag $i2,$s1,63 # broadcast upper bit
  1606. ngr $i1,$i2 # rem
  1607. algr $s0,$s0
  1608. alcgr $s1,$s1
  1609. xgr $s0,$i1
  1610. ltr $len,$len # clear zero flag
  1611. br $ra
  1612. .size _s390x_xts_km,.-_s390x_xts_km
  1613. .globl AES_xts_encrypt
  1614. .type AES_xts_encrypt,\@function
  1615. .align 16
  1616. AES_xts_encrypt:
  1617. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1618. xgr %r4,%r3
  1619. xgr %r3,%r4
  1620. ___
  1621. $code.=<<___ if ($SIZE_T==4);
  1622. llgfr $len,$len
  1623. ___
  1624. $code.=<<___;
  1625. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1626. srag $len,$len,4 # formally wrong, because it expands
  1627. # sign byte, but who can afford asking
  1628. # to process more than 2^63-1 bytes?
  1629. # I use it, because it sets condition
  1630. # code...
  1631. bcr 8,$ra # abort if zero (i.e. less than 16)
  1632. ___
  1633. $code.=<<___ if (!$softonly);
  1634. llgf %r0,240($key2)
  1635. lhi %r1,16
  1636. clr %r0,%r1
  1637. jl .Lxts_enc_software
  1638. st${g} $ra,5*$SIZE_T($sp)
  1639. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1640. sllg $len,$len,4 # $len&=~15
  1641. slgr $out,$inp
  1642. # generate the tweak value
  1643. l${g} $s3,$stdframe($sp) # pointer to iv
  1644. la $s2,$tweak($sp)
  1645. lmg $s0,$s1,0($s3)
  1646. lghi $s3,16
  1647. stmg $s0,$s1,0($s2)
  1648. la %r1,0($key2) # $key2 is not needed anymore
  1649. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1650. brc 1,.-4 # can this happen?
  1651. l %r0,240($key1)
  1652. la %r1,0($key1) # $key1 is not needed anymore
  1653. bras $ra,_s390x_xts_km
  1654. jz .Lxts_enc_km_done
  1655. aghi $inp,-16 # take one step back
  1656. la $i3,0($out,$inp) # put aside real $out
  1657. .Lxts_enc_km_steal:
  1658. llgc $i1,16($inp)
  1659. llgc $i2,0($out,$inp)
  1660. stc $i1,0($out,$inp)
  1661. stc $i2,16($out,$inp)
  1662. la $inp,1($inp)
  1663. brct $len,.Lxts_enc_km_steal
  1664. la $s2,0($i3)
  1665. lghi $s3,16
  1666. lrvgr $i1,$s0 # flip byte order
  1667. lrvgr $i2,$s1
  1668. xg $i1,0($s2)
  1669. xg $i2,8($s2)
  1670. stg $i1,0($s2)
  1671. stg $i2,8($s2)
  1672. .long 0xb92e00aa # km $s2,$s2
  1673. brc 1,.-4 # can this happen?
  1674. lrvgr $i1,$s0 # flip byte order
  1675. lrvgr $i2,$s1
  1676. xg $i1,0($i3)
  1677. xg $i2,8($i3)
  1678. stg $i1,0($i3)
  1679. stg $i2,8($i3)
  1680. .Lxts_enc_km_done:
  1681. stg $sp,$tweak+0($sp) # wipe tweak
  1682. stg $sp,$tweak+8($sp)
  1683. l${g} $ra,5*$SIZE_T($sp)
  1684. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1685. br $ra
  1686. .align 16
  1687. .Lxts_enc_software:
  1688. ___
  1689. $code.=<<___;
  1690. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1691. slgr $out,$inp
  1692. l${g} $s3,$stdframe($sp) # ivp
  1693. llgf $s0,0($s3) # load iv
  1694. llgf $s1,4($s3)
  1695. llgf $s2,8($s3)
  1696. llgf $s3,12($s3)
  1697. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1698. la $key,0($key2)
  1699. larl $tbl,AES_Te
  1700. bras $ra,_s390x_AES_encrypt # generate the tweak
  1701. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1702. stm $s0,$s3,$tweak($sp) # save the tweak
  1703. j .Lxts_enc_enter
  1704. .align 16
  1705. .Lxts_enc_loop:
  1706. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1707. lrvg $s3,$tweak+8($sp)
  1708. lghi %r1,0x87
  1709. srag %r0,$s3,63 # broadcast upper bit
  1710. ngr %r1,%r0 # rem
  1711. algr $s1,$s1
  1712. alcgr $s3,$s3
  1713. xgr $s1,%r1
  1714. lrvgr $s1,$s1 # flip byte order
  1715. lrvgr $s3,$s3
  1716. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1717. stg $s1,$tweak+0($sp) # save the tweak
  1718. llgfr $s1,$s1
  1719. srlg $s2,$s3,32
  1720. stg $s3,$tweak+8($sp)
  1721. llgfr $s3,$s3
  1722. la $inp,16($inp) # $inp+=16
  1723. .Lxts_enc_enter:
  1724. x $s0,0($inp) # ^=*($inp)
  1725. x $s1,4($inp)
  1726. x $s2,8($inp)
  1727. x $s3,12($inp)
  1728. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1729. la $key,0($key1)
  1730. bras $ra,_s390x_AES_encrypt
  1731. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1732. x $s0,$tweak+0($sp) # ^=tweak
  1733. x $s1,$tweak+4($sp)
  1734. x $s2,$tweak+8($sp)
  1735. x $s3,$tweak+12($sp)
  1736. st $s0,0($out,$inp)
  1737. st $s1,4($out,$inp)
  1738. st $s2,8($out,$inp)
  1739. st $s3,12($out,$inp)
  1740. brct${g} $len,.Lxts_enc_loop
  1741. llgc $len,`2*$SIZE_T-1`($sp)
  1742. nill $len,0x0f # $len%16
  1743. jz .Lxts_enc_done
  1744. la $i3,0($inp,$out) # put aside real $out
  1745. .Lxts_enc_steal:
  1746. llgc %r0,16($inp)
  1747. llgc %r1,0($out,$inp)
  1748. stc %r0,0($out,$inp)
  1749. stc %r1,16($out,$inp)
  1750. la $inp,1($inp)
  1751. brct $len,.Lxts_enc_steal
  1752. la $out,0($i3) # restore real $out
  1753. # generate last tweak...
  1754. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1755. lrvg $s3,$tweak+8($sp)
  1756. lghi %r1,0x87
  1757. srag %r0,$s3,63 # broadcast upper bit
  1758. ngr %r1,%r0 # rem
  1759. algr $s1,$s1
  1760. alcgr $s3,$s3
  1761. xgr $s1,%r1
  1762. lrvgr $s1,$s1 # flip byte order
  1763. lrvgr $s3,$s3
  1764. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1765. stg $s1,$tweak+0($sp) # save the tweak
  1766. llgfr $s1,$s1
  1767. srlg $s2,$s3,32
  1768. stg $s3,$tweak+8($sp)
  1769. llgfr $s3,$s3
  1770. x $s0,0($out) # ^=*(inp)|stolen cipther-text
  1771. x $s1,4($out)
  1772. x $s2,8($out)
  1773. x $s3,12($out)
  1774. st${g} $out,4*$SIZE_T($sp)
  1775. la $key,0($key1)
  1776. bras $ra,_s390x_AES_encrypt
  1777. l${g} $out,4*$SIZE_T($sp)
  1778. x $s0,`$tweak+0`($sp) # ^=tweak
  1779. x $s1,`$tweak+4`($sp)
  1780. x $s2,`$tweak+8`($sp)
  1781. x $s3,`$tweak+12`($sp)
  1782. st $s0,0($out)
  1783. st $s1,4($out)
  1784. st $s2,8($out)
  1785. st $s3,12($out)
  1786. .Lxts_enc_done:
  1787. stg $sp,$tweak+0($sp) # wipe tweak
  1788. stg $sp,$tweak+8($sp)
  1789. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1790. br $ra
  1791. .size AES_xts_encrypt,.-AES_xts_encrypt
  1792. ___
  1793. # void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
  1794. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1795. # const unsigned char iv[16]);
  1796. #
  1797. $code.=<<___;
  1798. .globl AES_xts_decrypt
  1799. .type AES_xts_decrypt,\@function
  1800. .align 16
  1801. AES_xts_decrypt:
  1802. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1803. xgr %r4,%r3
  1804. xgr %r3,%r4
  1805. ___
  1806. $code.=<<___ if ($SIZE_T==4);
  1807. llgfr $len,$len
  1808. ___
  1809. $code.=<<___;
  1810. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1811. aghi $len,-16
  1812. bcr 4,$ra # abort if less than zero. formally
  1813. # wrong, because $len is unsigned,
  1814. # but who can afford asking to
  1815. # process more than 2^63-1 bytes?
  1816. tmll $len,0x0f
  1817. jnz .Lxts_dec_proceed
  1818. aghi $len,16
  1819. .Lxts_dec_proceed:
  1820. ___
  1821. $code.=<<___ if (!$softonly);
  1822. llgf %r0,240($key2)
  1823. lhi %r1,16
  1824. clr %r0,%r1
  1825. jl .Lxts_dec_software
  1826. st${g} $ra,5*$SIZE_T($sp)
  1827. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1828. nill $len,0xfff0 # $len&=~15
  1829. slgr $out,$inp
  1830. # generate the tweak value
  1831. l${g} $s3,$stdframe($sp) # pointer to iv
  1832. la $s2,$tweak($sp)
  1833. lmg $s0,$s1,0($s3)
  1834. lghi $s3,16
  1835. stmg $s0,$s1,0($s2)
  1836. la %r1,0($key2) # $key2 is not needed past this point
  1837. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1838. brc 1,.-4 # can this happen?
  1839. l %r0,240($key1)
  1840. la %r1,0($key1) # $key1 is not needed anymore
  1841. ltgr $len,$len
  1842. jz .Lxts_dec_km_short
  1843. bras $ra,_s390x_xts_km
  1844. jz .Lxts_dec_km_done
  1845. lrvgr $s2,$s0 # make copy in reverse byte order
  1846. lrvgr $s3,$s1
  1847. j .Lxts_dec_km_2ndtweak
  1848. .Lxts_dec_km_short:
  1849. llgc $len,`2*$SIZE_T-1`($sp)
  1850. nill $len,0x0f # $len%=16
  1851. lrvg $s0,$tweak+0($sp) # load the tweak
  1852. lrvg $s1,$tweak+8($sp)
  1853. lrvgr $s2,$s0 # make copy in reverse byte order
  1854. lrvgr $s3,$s1
  1855. .Lxts_dec_km_2ndtweak:
  1856. lghi $i1,0x87
  1857. srag $i2,$s1,63 # broadcast upper bit
  1858. ngr $i1,$i2 # rem
  1859. algr $s0,$s0
  1860. alcgr $s1,$s1
  1861. xgr $s0,$i1
  1862. lrvgr $i1,$s0 # flip byte order
  1863. lrvgr $i2,$s1
  1864. xg $i1,0($inp)
  1865. xg $i2,8($inp)
  1866. stg $i1,0($out,$inp)
  1867. stg $i2,8($out,$inp)
  1868. la $i2,0($out,$inp)
  1869. lghi $i3,16
  1870. .long 0xb92e0066 # km $i2,$i2
  1871. brc 1,.-4 # can this happen?
  1872. lrvgr $i1,$s0
  1873. lrvgr $i2,$s1
  1874. xg $i1,0($out,$inp)
  1875. xg $i2,8($out,$inp)
  1876. stg $i1,0($out,$inp)
  1877. stg $i2,8($out,$inp)
  1878. la $i3,0($out,$inp) # put aside real $out
  1879. .Lxts_dec_km_steal:
  1880. llgc $i1,16($inp)
  1881. llgc $i2,0($out,$inp)
  1882. stc $i1,0($out,$inp)
  1883. stc $i2,16($out,$inp)
  1884. la $inp,1($inp)
  1885. brct $len,.Lxts_dec_km_steal
  1886. lgr $s0,$s2
  1887. lgr $s1,$s3
  1888. xg $s0,0($i3)
  1889. xg $s1,8($i3)
  1890. stg $s0,0($i3)
  1891. stg $s1,8($i3)
  1892. la $s0,0($i3)
  1893. lghi $s1,16
  1894. .long 0xb92e0088 # km $s0,$s0
  1895. brc 1,.-4 # can this happen?
  1896. xg $s2,0($i3)
  1897. xg $s3,8($i3)
  1898. stg $s2,0($i3)
  1899. stg $s3,8($i3)
  1900. .Lxts_dec_km_done:
  1901. stg $sp,$tweak+0($sp) # wipe tweak
  1902. stg $sp,$tweak+8($sp)
  1903. l${g} $ra,5*$SIZE_T($sp)
  1904. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1905. br $ra
  1906. .align 16
  1907. .Lxts_dec_software:
  1908. ___
  1909. $code.=<<___;
  1910. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1911. srlg $len,$len,4
  1912. slgr $out,$inp
  1913. l${g} $s3,$stdframe($sp) # ivp
  1914. llgf $s0,0($s3) # load iv
  1915. llgf $s1,4($s3)
  1916. llgf $s2,8($s3)
  1917. llgf $s3,12($s3)
  1918. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1919. la $key,0($key2)
  1920. larl $tbl,AES_Te
  1921. bras $ra,_s390x_AES_encrypt # generate the tweak
  1922. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1923. larl $tbl,AES_Td
  1924. lt${g}r $len,$len
  1925. stm $s0,$s3,$tweak($sp) # save the tweak
  1926. jz .Lxts_dec_short
  1927. j .Lxts_dec_enter
  1928. .align 16
  1929. .Lxts_dec_loop:
  1930. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1931. lrvg $s3,$tweak+8($sp)
  1932. lghi %r1,0x87
  1933. srag %r0,$s3,63 # broadcast upper bit
  1934. ngr %r1,%r0 # rem
  1935. algr $s1,$s1
  1936. alcgr $s3,$s3
  1937. xgr $s1,%r1
  1938. lrvgr $s1,$s1 # flip byte order
  1939. lrvgr $s3,$s3
  1940. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1941. stg $s1,$tweak+0($sp) # save the tweak
  1942. llgfr $s1,$s1
  1943. srlg $s2,$s3,32
  1944. stg $s3,$tweak+8($sp)
  1945. llgfr $s3,$s3
  1946. .Lxts_dec_enter:
  1947. x $s0,0($inp) # tweak^=*(inp)
  1948. x $s1,4($inp)
  1949. x $s2,8($inp)
  1950. x $s3,12($inp)
  1951. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1952. la $key,0($key1)
  1953. bras $ra,_s390x_AES_decrypt
  1954. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1955. x $s0,$tweak+0($sp) # ^=tweak
  1956. x $s1,$tweak+4($sp)
  1957. x $s2,$tweak+8($sp)
  1958. x $s3,$tweak+12($sp)
  1959. st $s0,0($out,$inp)
  1960. st $s1,4($out,$inp)
  1961. st $s2,8($out,$inp)
  1962. st $s3,12($out,$inp)
  1963. la $inp,16($inp)
  1964. brct${g} $len,.Lxts_dec_loop
  1965. llgc $len,`2*$SIZE_T-1`($sp)
  1966. nill $len,0x0f # $len%16
  1967. jz .Lxts_dec_done
  1968. # generate pair of tweaks...
  1969. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1970. lrvg $s3,$tweak+8($sp)
  1971. lghi %r1,0x87
  1972. srag %r0,$s3,63 # broadcast upper bit
  1973. ngr %r1,%r0 # rem
  1974. algr $s1,$s1
  1975. alcgr $s3,$s3
  1976. xgr $s1,%r1
  1977. lrvgr $i2,$s1 # flip byte order
  1978. lrvgr $i3,$s3
  1979. stmg $i2,$i3,$tweak($sp) # save the 1st tweak
  1980. j .Lxts_dec_2ndtweak
  1981. .align 16
  1982. .Lxts_dec_short:
  1983. llgc $len,`2*$SIZE_T-1`($sp)
  1984. nill $len,0x0f # $len%16
  1985. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1986. lrvg $s3,$tweak+8($sp)
  1987. .Lxts_dec_2ndtweak:
  1988. lghi %r1,0x87
  1989. srag %r0,$s3,63 # broadcast upper bit
  1990. ngr %r1,%r0 # rem
  1991. algr $s1,$s1
  1992. alcgr $s3,$s3
  1993. xgr $s1,%r1
  1994. lrvgr $s1,$s1 # flip byte order
  1995. lrvgr $s3,$s3
  1996. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1997. stg $s1,$tweak-16+0($sp) # save the 2nd tweak
  1998. llgfr $s1,$s1
  1999. srlg $s2,$s3,32
  2000. stg $s3,$tweak-16+8($sp)
  2001. llgfr $s3,$s3
  2002. x $s0,0($inp) # tweak_the_2nd^=*(inp)
  2003. x $s1,4($inp)
  2004. x $s2,8($inp)
  2005. x $s3,12($inp)
  2006. stm${g} %r2,%r3,2*$SIZE_T($sp)
  2007. la $key,0($key1)
  2008. bras $ra,_s390x_AES_decrypt
  2009. lm${g} %r2,%r5,2*$SIZE_T($sp)
  2010. x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
  2011. x $s1,$tweak-16+4($sp)
  2012. x $s2,$tweak-16+8($sp)
  2013. x $s3,$tweak-16+12($sp)
  2014. st $s0,0($out,$inp)
  2015. st $s1,4($out,$inp)
  2016. st $s2,8($out,$inp)
  2017. st $s3,12($out,$inp)
  2018. la $i3,0($out,$inp) # put aside real $out
  2019. .Lxts_dec_steal:
  2020. llgc %r0,16($inp)
  2021. llgc %r1,0($out,$inp)
  2022. stc %r0,0($out,$inp)
  2023. stc %r1,16($out,$inp)
  2024. la $inp,1($inp)
  2025. brct $len,.Lxts_dec_steal
  2026. la $out,0($i3) # restore real $out
  2027. lm $s0,$s3,$tweak($sp) # load the 1st tweak
  2028. x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
  2029. x $s1,4($out)
  2030. x $s2,8($out)
  2031. x $s3,12($out)
  2032. st${g} $out,4*$SIZE_T($sp)
  2033. la $key,0($key1)
  2034. bras $ra,_s390x_AES_decrypt
  2035. l${g} $out,4*$SIZE_T($sp)
  2036. x $s0,$tweak+0($sp) # ^=tweak
  2037. x $s1,$tweak+4($sp)
  2038. x $s2,$tweak+8($sp)
  2039. x $s3,$tweak+12($sp)
  2040. st $s0,0($out)
  2041. st $s1,4($out)
  2042. st $s2,8($out)
  2043. st $s3,12($out)
  2044. stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
  2045. stg $sp,$tweak-16+8($sp)
  2046. .Lxts_dec_done:
  2047. stg $sp,$tweak+0($sp) # wipe tweak
  2048. stg $sp,$tweak+8($sp)
  2049. lm${g} %r6,$ra,6*$SIZE_T($sp)
  2050. br $ra
  2051. .size AES_xts_decrypt,.-AES_xts_decrypt
  2052. ___
  2053. }
  2054. $code.=<<___;
  2055. .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  2056. ___
  2057. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  2058. print $code;
  2059. close STDOUT or die "error closing STDOUT: $!"; # force flush