aes-s390x.pl 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # AES for s390x.
  9. # April 2007.
  10. #
  11. # Software performance improvement over gcc-generated code is ~70% and
  12. # in absolute terms is ~73 cycles per byte processed with 128-bit key.
  13. # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
  14. # *strictly* in-order execution and issued instruction [in this case
  15. # load value from memory is critical] has to complete before execution
  16. # flow proceeds. S-boxes are compressed to 2KB[+256B].
  17. #
  18. # As for hardware acceleration support. It's basically a "teaser," as
  19. # it can and should be improved in several ways. Most notably support
  20. # for CBC is not utilized, nor multiple blocks are ever processed.
  21. # Then software key schedule can be postponed till hardware support
  22. # detection... Performance improvement over assembler is reportedly
  23. # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  24. # support is implemented.
  25. # May 2007.
  26. #
  27. # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
  28. # for 128-bit keys, if hardware support is detected.
  29. # Januray 2009.
  30. #
  31. # Add support for hardware AES192/256 and reschedule instructions to
  32. # minimize/avoid Address Generation Interlock hazard and to favour
  33. # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
  34. # almost 50% on z9. The gain is smaller on z10, because being dual-
  35. # issue z10 makes it improssible to eliminate the interlock condition:
  36. # critial path is not long enough. Yet it spends ~24 cycles per byte
  37. # processed with 128-bit key.
  38. #
  39. # Unlike previous version hardware support detection takes place only
  40. # at the moment of key schedule setup, which is denoted in key->rounds.
  41. # This is done, because deferred key setup can't be made MT-safe, not
  42. # for keys longer than 128 bits.
  43. #
  44. # Add AES_cbc_encrypt, which gives incredible performance improvement,
  45. # it was measured to be ~6.6x. It's less than previously mentioned 8x,
  46. # because software implementation was optimized.
  47. # May 2010.
  48. #
  49. # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
  50. # performance improvement over "generic" counter mode routine relying
  51. # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
  52. # to the fact that exact throughput value depends on current stack
  53. # frame alignment within 4KB page. In worst case you get ~75% of the
  54. # maximum, but *on average* it would be as much as ~98%. Meaning that
  55. # worst case is unlike, it's like hitting ravine on plateau.
  56. # November 2010.
  57. #
  58. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  59. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  60. # instructions and achieve "64-bit" performance even in 31-bit legacy
  61. # application context. The feature is not specific to any particular
  62. # processor, as long as it's "z-CPU". Latter implies that the code
  63. # remains z/Architecture specific. On z990 it was measured to perform
  64. # 2x better than code generated by gcc 4.3.
  65. # December 2010.
  66. #
  67. # Add support for z196 "cipher message with counter" instruction.
  68. # Note however that it's disengaged, because it was measured to
  69. # perform ~12% worse than vanilla km-based code...
  70. # February 2011.
  71. #
  72. # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
  73. # instructions, which deliver ~70% improvement at 8KB block size over
  74. # vanilla km-based code, 37% - at most like 512-bytes block size.
  75. $flavour = shift;
  76. if ($flavour =~ /3[12]/) {
  77. $SIZE_T=4;
  78. $g="";
  79. } else {
  80. $SIZE_T=8;
  81. $g="g";
  82. }
  83. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  84. open STDOUT,">$output";
  85. $softonly=0; # allow hardware support
  86. $t0="%r0"; $mask="%r0";
  87. $t1="%r1";
  88. $t2="%r2"; $inp="%r2";
  89. $t3="%r3"; $out="%r3"; $bits="%r3";
  90. $key="%r4";
  91. $i1="%r5";
  92. $i2="%r6";
  93. $i3="%r7";
  94. $s0="%r8";
  95. $s1="%r9";
  96. $s2="%r10";
  97. $s3="%r11";
  98. $tbl="%r12";
  99. $rounds="%r13";
  100. $ra="%r14";
  101. $sp="%r15";
  102. $stdframe=16*$SIZE_T+4*8;
  103. sub _data_word()
  104. { my $i;
  105. while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
  106. }
  107. $code=<<___;
  108. .text
  109. .type AES_Te,\@object
  110. .align 256
  111. AES_Te:
  112. ___
  113. &_data_word(
  114. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  115. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  116. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  117. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  118. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  119. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  120. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  121. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  122. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  123. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  124. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  125. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  126. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  127. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  128. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  129. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  130. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  131. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  132. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  133. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  134. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  135. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  136. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  137. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  138. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  139. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  140. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  141. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  142. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  143. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  144. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  145. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  146. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  147. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  148. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  149. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  150. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  151. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  152. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  153. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  154. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  155. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  156. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  157. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  158. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  159. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  160. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  161. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  162. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  163. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  164. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  165. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  166. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  167. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  168. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  169. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  170. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  171. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  172. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  173. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  174. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  175. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  176. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  177. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  178. $code.=<<___;
  179. # Te4[256]
  180. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  181. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  182. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  183. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  184. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  185. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  186. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  187. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  188. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  189. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  190. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  191. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  192. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  193. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  194. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  195. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  196. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  197. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  198. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  199. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  200. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  201. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  202. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  203. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  204. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  205. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  206. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  207. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  208. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  209. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  210. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  211. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  212. # rcon[]
  213. .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
  214. .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
  215. .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
  216. .align 256
  217. .size AES_Te,.-AES_Te
  218. # void AES_encrypt(const unsigned char *inp, unsigned char *out,
  219. # const AES_KEY *key) {
  220. .globl AES_encrypt
  221. .type AES_encrypt,\@function
  222. AES_encrypt:
  223. ___
  224. $code.=<<___ if (!$softonly);
  225. l %r0,240($key)
  226. lhi %r1,16
  227. clr %r0,%r1
  228. jl .Lesoft
  229. la %r1,0($key)
  230. #la %r2,0($inp)
  231. la %r4,0($out)
  232. lghi %r3,16 # single block length
  233. .long 0xb92e0042 # km %r4,%r2
  234. brc 1,.-4 # can this happen?
  235. br %r14
  236. .align 64
  237. .Lesoft:
  238. ___
  239. $code.=<<___;
  240. stm${g} %r3,$ra,3*$SIZE_T($sp)
  241. llgf $s0,0($inp)
  242. llgf $s1,4($inp)
  243. llgf $s2,8($inp)
  244. llgf $s3,12($inp)
  245. larl $tbl,AES_Te
  246. bras $ra,_s390x_AES_encrypt
  247. l${g} $out,3*$SIZE_T($sp)
  248. st $s0,0($out)
  249. st $s1,4($out)
  250. st $s2,8($out)
  251. st $s3,12($out)
  252. lm${g} %r6,$ra,6*$SIZE_T($sp)
  253. br $ra
  254. .size AES_encrypt,.-AES_encrypt
  255. .type _s390x_AES_encrypt,\@function
  256. .align 16
  257. _s390x_AES_encrypt:
  258. st${g} $ra,15*$SIZE_T($sp)
  259. x $s0,0($key)
  260. x $s1,4($key)
  261. x $s2,8($key)
  262. x $s3,12($key)
  263. l $rounds,240($key)
  264. llill $mask,`0xff<<3`
  265. aghi $rounds,-1
  266. j .Lenc_loop
  267. .align 16
  268. .Lenc_loop:
  269. sllg $t1,$s0,`0+3`
  270. srlg $t2,$s0,`8-3`
  271. srlg $t3,$s0,`16-3`
  272. srl $s0,`24-3`
  273. nr $s0,$mask
  274. ngr $t1,$mask
  275. nr $t2,$mask
  276. nr $t3,$mask
  277. srlg $i1,$s1,`16-3` # i0
  278. sllg $i2,$s1,`0+3`
  279. srlg $i3,$s1,`8-3`
  280. srl $s1,`24-3`
  281. nr $i1,$mask
  282. nr $s1,$mask
  283. ngr $i2,$mask
  284. nr $i3,$mask
  285. l $s0,0($s0,$tbl) # Te0[s0>>24]
  286. l $t1,1($t1,$tbl) # Te3[s0>>0]
  287. l $t2,2($t2,$tbl) # Te2[s0>>8]
  288. l $t3,3($t3,$tbl) # Te1[s0>>16]
  289. x $s0,3($i1,$tbl) # Te1[s1>>16]
  290. l $s1,0($s1,$tbl) # Te0[s1>>24]
  291. x $t2,1($i2,$tbl) # Te3[s1>>0]
  292. x $t3,2($i3,$tbl) # Te2[s1>>8]
  293. srlg $i1,$s2,`8-3` # i0
  294. srlg $i2,$s2,`16-3` # i1
  295. nr $i1,$mask
  296. nr $i2,$mask
  297. sllg $i3,$s2,`0+3`
  298. srl $s2,`24-3`
  299. nr $s2,$mask
  300. ngr $i3,$mask
  301. xr $s1,$t1
  302. srlg $ra,$s3,`8-3` # i1
  303. sllg $t1,$s3,`0+3` # i0
  304. nr $ra,$mask
  305. la $key,16($key)
  306. ngr $t1,$mask
  307. x $s0,2($i1,$tbl) # Te2[s2>>8]
  308. x $s1,3($i2,$tbl) # Te1[s2>>16]
  309. l $s2,0($s2,$tbl) # Te0[s2>>24]
  310. x $t3,1($i3,$tbl) # Te3[s2>>0]
  311. srlg $i3,$s3,`16-3` # i2
  312. xr $s2,$t2
  313. srl $s3,`24-3`
  314. nr $i3,$mask
  315. nr $s3,$mask
  316. x $s0,0($key)
  317. x $s1,4($key)
  318. x $s2,8($key)
  319. x $t3,12($key)
  320. x $s0,1($t1,$tbl) # Te3[s3>>0]
  321. x $s1,2($ra,$tbl) # Te2[s3>>8]
  322. x $s2,3($i3,$tbl) # Te1[s3>>16]
  323. l $s3,0($s3,$tbl) # Te0[s3>>24]
  324. xr $s3,$t3
  325. brct $rounds,.Lenc_loop
  326. .align 16
  327. sllg $t1,$s0,`0+3`
  328. srlg $t2,$s0,`8-3`
  329. ngr $t1,$mask
  330. srlg $t3,$s0,`16-3`
  331. srl $s0,`24-3`
  332. nr $s0,$mask
  333. nr $t2,$mask
  334. nr $t3,$mask
  335. srlg $i1,$s1,`16-3` # i0
  336. sllg $i2,$s1,`0+3`
  337. ngr $i2,$mask
  338. srlg $i3,$s1,`8-3`
  339. srl $s1,`24-3`
  340. nr $i1,$mask
  341. nr $s1,$mask
  342. nr $i3,$mask
  343. llgc $s0,2($s0,$tbl) # Te4[s0>>24]
  344. llgc $t1,2($t1,$tbl) # Te4[s0>>0]
  345. sll $s0,24
  346. llgc $t2,2($t2,$tbl) # Te4[s0>>8]
  347. llgc $t3,2($t3,$tbl) # Te4[s0>>16]
  348. sll $t2,8
  349. sll $t3,16
  350. llgc $i1,2($i1,$tbl) # Te4[s1>>16]
  351. llgc $s1,2($s1,$tbl) # Te4[s1>>24]
  352. llgc $i2,2($i2,$tbl) # Te4[s1>>0]
  353. llgc $i3,2($i3,$tbl) # Te4[s1>>8]
  354. sll $i1,16
  355. sll $s1,24
  356. sll $i3,8
  357. or $s0,$i1
  358. or $s1,$t1
  359. or $t2,$i2
  360. or $t3,$i3
  361. srlg $i1,$s2,`8-3` # i0
  362. srlg $i2,$s2,`16-3` # i1
  363. nr $i1,$mask
  364. nr $i2,$mask
  365. sllg $i3,$s2,`0+3`
  366. srl $s2,`24-3`
  367. ngr $i3,$mask
  368. nr $s2,$mask
  369. sllg $t1,$s3,`0+3` # i0
  370. srlg $ra,$s3,`8-3` # i1
  371. ngr $t1,$mask
  372. llgc $i1,2($i1,$tbl) # Te4[s2>>8]
  373. llgc $i2,2($i2,$tbl) # Te4[s2>>16]
  374. sll $i1,8
  375. llgc $s2,2($s2,$tbl) # Te4[s2>>24]
  376. llgc $i3,2($i3,$tbl) # Te4[s2>>0]
  377. sll $i2,16
  378. nr $ra,$mask
  379. sll $s2,24
  380. or $s0,$i1
  381. or $s1,$i2
  382. or $s2,$t2
  383. or $t3,$i3
  384. srlg $i3,$s3,`16-3` # i2
  385. srl $s3,`24-3`
  386. nr $i3,$mask
  387. nr $s3,$mask
  388. l $t0,16($key)
  389. l $t2,20($key)
  390. llgc $i1,2($t1,$tbl) # Te4[s3>>0]
  391. llgc $i2,2($ra,$tbl) # Te4[s3>>8]
  392. llgc $i3,2($i3,$tbl) # Te4[s3>>16]
  393. llgc $s3,2($s3,$tbl) # Te4[s3>>24]
  394. sll $i2,8
  395. sll $i3,16
  396. sll $s3,24
  397. or $s0,$i1
  398. or $s1,$i2
  399. or $s2,$i3
  400. or $s3,$t3
  401. l${g} $ra,15*$SIZE_T($sp)
  402. xr $s0,$t0
  403. xr $s1,$t2
  404. x $s2,24($key)
  405. x $s3,28($key)
  406. br $ra
  407. .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
  408. ___
  409. $code.=<<___;
  410. .type AES_Td,\@object
  411. .align 256
  412. AES_Td:
  413. ___
  414. &_data_word(
  415. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  416. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  417. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  418. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  419. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  420. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  421. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  422. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  423. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  424. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  425. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  426. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  427. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  428. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  429. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  430. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  431. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  432. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  433. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  434. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  435. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  436. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  437. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  438. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  439. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  440. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  441. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  442. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  443. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  444. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  445. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  446. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  447. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  448. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  449. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  450. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  451. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  452. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  453. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  454. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  455. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  456. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  457. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  458. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  459. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  460. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  461. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  462. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  463. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  464. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  465. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  466. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  467. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  468. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  469. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  470. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  471. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  472. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  473. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  474. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  475. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  476. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  477. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  478. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  479. $code.=<<___;
  480. # Td4[256]
  481. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  482. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  483. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  484. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  485. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  486. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  487. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  488. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  489. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  490. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  491. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  492. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  493. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  494. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  495. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  496. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  497. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  498. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  499. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  500. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  501. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  502. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  503. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  504. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  505. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  506. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  507. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  508. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  509. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  510. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  511. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  512. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  513. .size AES_Td,.-AES_Td
  514. # void AES_decrypt(const unsigned char *inp, unsigned char *out,
  515. # const AES_KEY *key) {
  516. .globl AES_decrypt
  517. .type AES_decrypt,\@function
  518. AES_decrypt:
  519. ___
  520. $code.=<<___ if (!$softonly);
  521. l %r0,240($key)
  522. lhi %r1,16
  523. clr %r0,%r1
  524. jl .Ldsoft
  525. la %r1,0($key)
  526. #la %r2,0($inp)
  527. la %r4,0($out)
  528. lghi %r3,16 # single block length
  529. .long 0xb92e0042 # km %r4,%r2
  530. brc 1,.-4 # can this happen?
  531. br %r14
  532. .align 64
  533. .Ldsoft:
  534. ___
  535. $code.=<<___;
  536. stm${g} %r3,$ra,3*$SIZE_T($sp)
  537. llgf $s0,0($inp)
  538. llgf $s1,4($inp)
  539. llgf $s2,8($inp)
  540. llgf $s3,12($inp)
  541. larl $tbl,AES_Td
  542. bras $ra,_s390x_AES_decrypt
  543. l${g} $out,3*$SIZE_T($sp)
  544. st $s0,0($out)
  545. st $s1,4($out)
  546. st $s2,8($out)
  547. st $s3,12($out)
  548. lm${g} %r6,$ra,6*$SIZE_T($sp)
  549. br $ra
  550. .size AES_decrypt,.-AES_decrypt
  551. .type _s390x_AES_decrypt,\@function
  552. .align 16
  553. _s390x_AES_decrypt:
  554. st${g} $ra,15*$SIZE_T($sp)
  555. x $s0,0($key)
  556. x $s1,4($key)
  557. x $s2,8($key)
  558. x $s3,12($key)
  559. l $rounds,240($key)
  560. llill $mask,`0xff<<3`
  561. aghi $rounds,-1
  562. j .Ldec_loop
  563. .align 16
  564. .Ldec_loop:
  565. srlg $t1,$s0,`16-3`
  566. srlg $t2,$s0,`8-3`
  567. sllg $t3,$s0,`0+3`
  568. srl $s0,`24-3`
  569. nr $s0,$mask
  570. nr $t1,$mask
  571. nr $t2,$mask
  572. ngr $t3,$mask
  573. sllg $i1,$s1,`0+3` # i0
  574. srlg $i2,$s1,`16-3`
  575. srlg $i3,$s1,`8-3`
  576. srl $s1,`24-3`
  577. ngr $i1,$mask
  578. nr $s1,$mask
  579. nr $i2,$mask
  580. nr $i3,$mask
  581. l $s0,0($s0,$tbl) # Td0[s0>>24]
  582. l $t1,3($t1,$tbl) # Td1[s0>>16]
  583. l $t2,2($t2,$tbl) # Td2[s0>>8]
  584. l $t3,1($t3,$tbl) # Td3[s0>>0]
  585. x $s0,1($i1,$tbl) # Td3[s1>>0]
  586. l $s1,0($s1,$tbl) # Td0[s1>>24]
  587. x $t2,3($i2,$tbl) # Td1[s1>>16]
  588. x $t3,2($i3,$tbl) # Td2[s1>>8]
  589. srlg $i1,$s2,`8-3` # i0
  590. sllg $i2,$s2,`0+3` # i1
  591. srlg $i3,$s2,`16-3`
  592. srl $s2,`24-3`
  593. nr $i1,$mask
  594. ngr $i2,$mask
  595. nr $s2,$mask
  596. nr $i3,$mask
  597. xr $s1,$t1
  598. srlg $ra,$s3,`8-3` # i1
  599. srlg $t1,$s3,`16-3` # i0
  600. nr $ra,$mask
  601. la $key,16($key)
  602. nr $t1,$mask
  603. x $s0,2($i1,$tbl) # Td2[s2>>8]
  604. x $s1,1($i2,$tbl) # Td3[s2>>0]
  605. l $s2,0($s2,$tbl) # Td0[s2>>24]
  606. x $t3,3($i3,$tbl) # Td1[s2>>16]
  607. sllg $i3,$s3,`0+3` # i2
  608. srl $s3,`24-3`
  609. ngr $i3,$mask
  610. nr $s3,$mask
  611. xr $s2,$t2
  612. x $s0,0($key)
  613. x $s1,4($key)
  614. x $s2,8($key)
  615. x $t3,12($key)
  616. x $s0,3($t1,$tbl) # Td1[s3>>16]
  617. x $s1,2($ra,$tbl) # Td2[s3>>8]
  618. x $s2,1($i3,$tbl) # Td3[s3>>0]
  619. l $s3,0($s3,$tbl) # Td0[s3>>24]
  620. xr $s3,$t3
  621. brct $rounds,.Ldec_loop
  622. .align 16
  623. l $t1,`2048+0`($tbl) # prefetch Td4
  624. l $t2,`2048+64`($tbl)
  625. l $t3,`2048+128`($tbl)
  626. l $i1,`2048+192`($tbl)
  627. llill $mask,0xff
  628. srlg $i3,$s0,24 # i0
  629. srlg $t1,$s0,16
  630. srlg $t2,$s0,8
  631. nr $s0,$mask # i3
  632. nr $t1,$mask
  633. srlg $i1,$s1,24
  634. nr $t2,$mask
  635. srlg $i2,$s1,16
  636. srlg $ra,$s1,8
  637. nr $s1,$mask # i0
  638. nr $i2,$mask
  639. nr $ra,$mask
  640. llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
  641. llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
  642. llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
  643. sll $t1,16
  644. llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
  645. sllg $s0,$i3,24
  646. sll $t2,8
  647. llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
  648. llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
  649. llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
  650. sll $i1,24
  651. llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
  652. sll $i2,16
  653. sll $i3,8
  654. or $s0,$s1
  655. or $t1,$i1
  656. or $t2,$i2
  657. or $t3,$i3
  658. srlg $i1,$s2,8 # i0
  659. srlg $i2,$s2,24
  660. srlg $i3,$s2,16
  661. nr $s2,$mask # i1
  662. nr $i1,$mask
  663. nr $i3,$mask
  664. llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
  665. llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
  666. llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
  667. llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
  668. sll $i1,8
  669. sll $i2,24
  670. or $s0,$i1
  671. sll $i3,16
  672. or $t2,$i2
  673. or $t3,$i3
  674. srlg $i1,$s3,16 # i0
  675. srlg $i2,$s3,8 # i1
  676. srlg $i3,$s3,24
  677. nr $s3,$mask # i2
  678. nr $i1,$mask
  679. nr $i2,$mask
  680. l${g} $ra,15*$SIZE_T($sp)
  681. or $s1,$t1
  682. l $t0,16($key)
  683. l $t1,20($key)
  684. llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
  685. llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
  686. sll $i1,16
  687. llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
  688. llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
  689. sll $i2,8
  690. sll $s3,24
  691. or $s0,$i1
  692. or $s1,$i2
  693. or $s2,$t2
  694. or $s3,$t3
  695. xr $s0,$t0
  696. xr $s1,$t1
  697. x $s2,24($key)
  698. x $s3,28($key)
  699. br $ra
  700. .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
  701. ___
  702. $code.=<<___;
  703. # void AES_set_encrypt_key(const unsigned char *in, int bits,
  704. # AES_KEY *key) {
  705. .globl AES_set_encrypt_key
  706. .type AES_set_encrypt_key,\@function
  707. .align 16
  708. AES_set_encrypt_key:
  709. lghi $t0,0
  710. cl${g}r $inp,$t0
  711. je .Lminus1
  712. cl${g}r $key,$t0
  713. je .Lminus1
  714. lghi $t0,128
  715. clr $bits,$t0
  716. je .Lproceed
  717. lghi $t0,192
  718. clr $bits,$t0
  719. je .Lproceed
  720. lghi $t0,256
  721. clr $bits,$t0
  722. je .Lproceed
  723. lghi %r2,-2
  724. br %r14
  725. .align 16
  726. .Lproceed:
  727. ___
  728. $code.=<<___ if (!$softonly);
  729. # convert bits to km code, [128,192,256]->[18,19,20]
  730. lhi %r5,-128
  731. lhi %r0,18
  732. ar %r5,$bits
  733. srl %r5,6
  734. ar %r5,%r0
  735. larl %r1,OPENSSL_s390xcap_P
  736. lg %r0,0(%r1)
  737. tmhl %r0,0x4000 # check for message-security assist
  738. jz .Lekey_internal
  739. lghi %r0,0 # query capability vector
  740. la %r1,16($sp)
  741. .long 0xb92f0042 # kmc %r4,%r2
  742. llihh %r1,0x8000
  743. srlg %r1,%r1,0(%r5)
  744. ng %r1,16($sp)
  745. jz .Lekey_internal
  746. lmg %r0,%r1,0($inp) # just copy 128 bits...
  747. stmg %r0,%r1,0($key)
  748. lhi %r0,192
  749. cr $bits,%r0
  750. jl 1f
  751. lg %r1,16($inp)
  752. stg %r1,16($key)
  753. je 1f
  754. lg %r1,24($inp)
  755. stg %r1,24($key)
  756. 1: st $bits,236($key) # save bits
  757. st %r5,240($key) # save km code
  758. lghi %r2,0
  759. br %r14
  760. ___
  761. $code.=<<___;
  762. .align 16
  763. .Lekey_internal:
  764. stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs
  765. larl $tbl,AES_Te+2048
  766. llgf $s0,0($inp)
  767. llgf $s1,4($inp)
  768. llgf $s2,8($inp)
  769. llgf $s3,12($inp)
  770. st $s0,0($key)
  771. st $s1,4($key)
  772. st $s2,8($key)
  773. st $s3,12($key)
  774. lghi $t0,128
  775. cr $bits,$t0
  776. jne .Lnot128
  777. llill $mask,0xff
  778. lghi $t3,0 # i=0
  779. lghi $rounds,10
  780. st $rounds,240($key)
  781. llgfr $t2,$s3 # temp=rk[3]
  782. srlg $i1,$s3,8
  783. srlg $i2,$s3,16
  784. srlg $i3,$s3,24
  785. nr $t2,$mask
  786. nr $i1,$mask
  787. nr $i2,$mask
  788. .align 16
  789. .L128_loop:
  790. la $t2,0($t2,$tbl)
  791. la $i1,0($i1,$tbl)
  792. la $i2,0($i2,$tbl)
  793. la $i3,0($i3,$tbl)
  794. icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
  795. icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
  796. icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
  797. icm $t2,1,0($i3) # Te4[rk[3]>>24]
  798. x $t2,256($t3,$tbl) # rcon[i]
  799. xr $s0,$t2 # rk[4]=rk[0]^...
  800. xr $s1,$s0 # rk[5]=rk[1]^rk[4]
  801. xr $s2,$s1 # rk[6]=rk[2]^rk[5]
  802. xr $s3,$s2 # rk[7]=rk[3]^rk[6]
  803. llgfr $t2,$s3 # temp=rk[3]
  804. srlg $i1,$s3,8
  805. srlg $i2,$s3,16
  806. nr $t2,$mask
  807. nr $i1,$mask
  808. srlg $i3,$s3,24
  809. nr $i2,$mask
  810. st $s0,16($key)
  811. st $s1,20($key)
  812. st $s2,24($key)
  813. st $s3,28($key)
  814. la $key,16($key) # key+=4
  815. la $t3,4($t3) # i++
  816. brct $rounds,.L128_loop
  817. lghi %r2,0
  818. lm${g} %r6,%r13,6*$SIZE_T($sp)
  819. br $ra
  820. .align 16
  821. .Lnot128:
  822. llgf $t0,16($inp)
  823. llgf $t1,20($inp)
  824. st $t0,16($key)
  825. st $t1,20($key)
  826. lghi $t0,192
  827. cr $bits,$t0
  828. jne .Lnot192
  829. llill $mask,0xff
  830. lghi $t3,0 # i=0
  831. lghi $rounds,12
  832. st $rounds,240($key)
  833. lghi $rounds,8
  834. srlg $i1,$t1,8
  835. srlg $i2,$t1,16
  836. srlg $i3,$t1,24
  837. nr $t1,$mask
  838. nr $i1,$mask
  839. nr $i2,$mask
  840. .align 16
  841. .L192_loop:
  842. la $t1,0($t1,$tbl)
  843. la $i1,0($i1,$tbl)
  844. la $i2,0($i2,$tbl)
  845. la $i3,0($i3,$tbl)
  846. icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
  847. icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
  848. icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
  849. icm $t1,1,0($i3) # Te4[rk[5]>>24]
  850. x $t1,256($t3,$tbl) # rcon[i]
  851. xr $s0,$t1 # rk[6]=rk[0]^...
  852. xr $s1,$s0 # rk[7]=rk[1]^rk[6]
  853. xr $s2,$s1 # rk[8]=rk[2]^rk[7]
  854. xr $s3,$s2 # rk[9]=rk[3]^rk[8]
  855. st $s0,24($key)
  856. st $s1,28($key)
  857. st $s2,32($key)
  858. st $s3,36($key)
  859. brct $rounds,.L192_continue
  860. lghi %r2,0
  861. lm${g} %r6,%r13,6*$SIZE_T($sp)
  862. br $ra
  863. .align 16
  864. .L192_continue:
  865. lgr $t1,$s3
  866. x $t1,16($key) # rk[10]=rk[4]^rk[9]
  867. st $t1,40($key)
  868. x $t1,20($key) # rk[11]=rk[5]^rk[10]
  869. st $t1,44($key)
  870. srlg $i1,$t1,8
  871. srlg $i2,$t1,16
  872. srlg $i3,$t1,24
  873. nr $t1,$mask
  874. nr $i1,$mask
  875. nr $i2,$mask
  876. la $key,24($key) # key+=6
  877. la $t3,4($t3) # i++
  878. j .L192_loop
  879. .align 16
  880. .Lnot192:
  881. llgf $t0,24($inp)
  882. llgf $t1,28($inp)
  883. st $t0,24($key)
  884. st $t1,28($key)
  885. llill $mask,0xff
  886. lghi $t3,0 # i=0
  887. lghi $rounds,14
  888. st $rounds,240($key)
  889. lghi $rounds,7
  890. srlg $i1,$t1,8
  891. srlg $i2,$t1,16
  892. srlg $i3,$t1,24
  893. nr $t1,$mask
  894. nr $i1,$mask
  895. nr $i2,$mask
  896. .align 16
  897. .L256_loop:
  898. la $t1,0($t1,$tbl)
  899. la $i1,0($i1,$tbl)
  900. la $i2,0($i2,$tbl)
  901. la $i3,0($i3,$tbl)
  902. icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
  903. icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
  904. icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
  905. icm $t1,1,0($i3) # Te4[rk[7]>>24]
  906. x $t1,256($t3,$tbl) # rcon[i]
  907. xr $s0,$t1 # rk[8]=rk[0]^...
  908. xr $s1,$s0 # rk[9]=rk[1]^rk[8]
  909. xr $s2,$s1 # rk[10]=rk[2]^rk[9]
  910. xr $s3,$s2 # rk[11]=rk[3]^rk[10]
  911. st $s0,32($key)
  912. st $s1,36($key)
  913. st $s2,40($key)
  914. st $s3,44($key)
  915. brct $rounds,.L256_continue
  916. lghi %r2,0
  917. lm${g} %r6,%r13,6*$SIZE_T($sp)
  918. br $ra
  919. .align 16
  920. .L256_continue:
  921. lgr $t1,$s3 # temp=rk[11]
  922. srlg $i1,$s3,8
  923. srlg $i2,$s3,16
  924. srlg $i3,$s3,24
  925. nr $t1,$mask
  926. nr $i1,$mask
  927. nr $i2,$mask
  928. la $t1,0($t1,$tbl)
  929. la $i1,0($i1,$tbl)
  930. la $i2,0($i2,$tbl)
  931. la $i3,0($i3,$tbl)
  932. llgc $t1,0($t1) # Te4[rk[11]>>0]
  933. icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
  934. icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
  935. icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
  936. x $t1,16($key) # rk[12]=rk[4]^...
  937. st $t1,48($key)
  938. x $t1,20($key) # rk[13]=rk[5]^rk[12]
  939. st $t1,52($key)
  940. x $t1,24($key) # rk[14]=rk[6]^rk[13]
  941. st $t1,56($key)
  942. x $t1,28($key) # rk[15]=rk[7]^rk[14]
  943. st $t1,60($key)
  944. srlg $i1,$t1,8
  945. srlg $i2,$t1,16
  946. srlg $i3,$t1,24
  947. nr $t1,$mask
  948. nr $i1,$mask
  949. nr $i2,$mask
  950. la $key,32($key) # key+=8
  951. la $t3,4($t3) # i++
  952. j .L256_loop
  953. .Lminus1:
  954. lghi %r2,-1
  955. br $ra
  956. .size AES_set_encrypt_key,.-AES_set_encrypt_key
  957. # void AES_set_decrypt_key(const unsigned char *in, int bits,
  958. # AES_KEY *key) {
  959. .globl AES_set_decrypt_key
  960. .type AES_set_decrypt_key,\@function
  961. .align 16
  962. AES_set_decrypt_key:
  963. st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
  964. st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers!
  965. bras $ra,AES_set_encrypt_key
  966. l${g} $key,4*$SIZE_T($sp)
  967. l${g} $ra,14*$SIZE_T($sp)
  968. ltgr %r2,%r2
  969. bnzr $ra
  970. ___
  971. $code.=<<___ if (!$softonly);
  972. l $t0,240($key)
  973. lhi $t1,16
  974. cr $t0,$t1
  975. jl .Lgo
  976. oill $t0,0x80 # set "decrypt" bit
  977. st $t0,240($key)
  978. br $ra
  979. .align 16
  980. .Ldkey_internal:
  981. st${g} $key,4*$SIZE_T($sp)
  982. st${g} $ra,14*$SIZE_T($sp)
  983. bras $ra,.Lekey_internal
  984. l${g} $key,4*$SIZE_T($sp)
  985. l${g} $ra,14*$SIZE_T($sp)
  986. ___
  987. $code.=<<___;
  988. .Lgo: llgf $rounds,240($key)
  989. la $i1,0($key)
  990. sllg $i2,$rounds,4
  991. la $i2,0($i2,$key)
  992. srl $rounds,1
  993. lghi $t1,-16
  994. .align 16
  995. .Linv: lmg $s0,$s1,0($i1)
  996. lmg $s2,$s3,0($i2)
  997. stmg $s0,$s1,0($i2)
  998. stmg $s2,$s3,0($i1)
  999. la $i1,16($i1)
  1000. la $i2,0($t1,$i2)
  1001. brct $rounds,.Linv
  1002. ___
  1003. $mask80=$i1;
  1004. $mask1b=$i2;
  1005. $maskfe=$i3;
  1006. $code.=<<___;
  1007. llgf $rounds,240($key)
  1008. aghi $rounds,-1
  1009. sll $rounds,2 # (rounds-1)*4
  1010. llilh $mask80,0x8080
  1011. llilh $mask1b,0x1b1b
  1012. llilh $maskfe,0xfefe
  1013. oill $mask80,0x8080
  1014. oill $mask1b,0x1b1b
  1015. oill $maskfe,0xfefe
  1016. .align 16
  1017. .Lmix: l $s0,16($key) # tp1
  1018. lr $s1,$s0
  1019. ngr $s1,$mask80
  1020. srlg $t1,$s1,7
  1021. slr $s1,$t1
  1022. nr $s1,$mask1b
  1023. sllg $t1,$s0,1
  1024. nr $t1,$maskfe
  1025. xr $s1,$t1 # tp2
  1026. lr $s2,$s1
  1027. ngr $s2,$mask80
  1028. srlg $t1,$s2,7
  1029. slr $s2,$t1
  1030. nr $s2,$mask1b
  1031. sllg $t1,$s1,1
  1032. nr $t1,$maskfe
  1033. xr $s2,$t1 # tp4
  1034. lr $s3,$s2
  1035. ngr $s3,$mask80
  1036. srlg $t1,$s3,7
  1037. slr $s3,$t1
  1038. nr $s3,$mask1b
  1039. sllg $t1,$s2,1
  1040. nr $t1,$maskfe
  1041. xr $s3,$t1 # tp8
  1042. xr $s1,$s0 # tp2^tp1
  1043. xr $s2,$s0 # tp4^tp1
  1044. rll $s0,$s0,24 # = ROTATE(tp1,8)
  1045. xr $s2,$s3 # ^=tp8
  1046. xr $s0,$s1 # ^=tp2^tp1
  1047. xr $s1,$s3 # tp2^tp1^tp8
  1048. xr $s0,$s2 # ^=tp4^tp1^tp8
  1049. rll $s1,$s1,8
  1050. rll $s2,$s2,16
  1051. xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
  1052. rll $s3,$s3,24
  1053. xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
  1054. xr $s0,$s3 # ^= ROTATE(tp8,8)
  1055. st $s0,16($key)
  1056. la $key,4($key)
  1057. brct $rounds,.Lmix
  1058. lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
  1059. lghi %r2,0
  1060. br $ra
  1061. .size AES_set_decrypt_key,.-AES_set_decrypt_key
  1062. ___
  1063. ########################################################################
  1064. # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
  1065. # size_t length, const AES_KEY *key,
  1066. # unsigned char *ivec, const int enc)
  1067. {
  1068. my $inp="%r2";
  1069. my $out="%r4"; # length and out are swapped
  1070. my $len="%r3";
  1071. my $key="%r5";
  1072. my $ivp="%r6";
  1073. $code.=<<___;
  1074. .globl AES_cbc_encrypt
  1075. .type AES_cbc_encrypt,\@function
  1076. .align 16
  1077. AES_cbc_encrypt:
  1078. xgr %r3,%r4 # flip %r3 and %r4, out and len
  1079. xgr %r4,%r3
  1080. xgr %r3,%r4
  1081. ___
  1082. $code.=<<___ if (!$softonly);
  1083. lhi %r0,16
  1084. cl %r0,240($key)
  1085. jh .Lcbc_software
  1086. lg %r0,0($ivp) # copy ivec
  1087. lg %r1,8($ivp)
  1088. stmg %r0,%r1,16($sp)
  1089. lmg %r0,%r1,0($key) # copy key, cover 256 bit
  1090. stmg %r0,%r1,32($sp)
  1091. lmg %r0,%r1,16($key)
  1092. stmg %r0,%r1,48($sp)
  1093. l %r0,240($key) # load kmc code
  1094. lghi $key,15 # res=len%16, len-=res;
  1095. ngr $key,$len
  1096. sl${g}r $len,$key
  1097. la %r1,16($sp) # parameter block - ivec || key
  1098. jz .Lkmc_truncated
  1099. .long 0xb92f0042 # kmc %r4,%r2
  1100. brc 1,.-4 # pay attention to "partial completion"
  1101. ltr $key,$key
  1102. jnz .Lkmc_truncated
  1103. .Lkmc_done:
  1104. lmg %r0,%r1,16($sp) # copy ivec to caller
  1105. stg %r0,0($ivp)
  1106. stg %r1,8($ivp)
  1107. br $ra
  1108. .align 16
  1109. .Lkmc_truncated:
  1110. ahi $key,-1 # it's the way it's encoded in mvc
  1111. tmll %r0,0x80
  1112. jnz .Lkmc_truncated_dec
  1113. lghi %r1,0
  1114. stg %r1,16*$SIZE_T($sp)
  1115. stg %r1,16*$SIZE_T+8($sp)
  1116. bras %r1,1f
  1117. mvc 16*$SIZE_T(1,$sp),0($inp)
  1118. 1: ex $key,0(%r1)
  1119. la %r1,16($sp) # restore parameter block
  1120. la $inp,16*$SIZE_T($sp)
  1121. lghi $len,16
  1122. .long 0xb92f0042 # kmc %r4,%r2
  1123. j .Lkmc_done
  1124. .align 16
  1125. .Lkmc_truncated_dec:
  1126. st${g} $out,4*$SIZE_T($sp)
  1127. la $out,16*$SIZE_T($sp)
  1128. lghi $len,16
  1129. .long 0xb92f0042 # kmc %r4,%r2
  1130. l${g} $out,4*$SIZE_T($sp)
  1131. bras %r1,2f
  1132. mvc 0(1,$out),16*$SIZE_T($sp)
  1133. 2: ex $key,0(%r1)
  1134. j .Lkmc_done
  1135. .align 16
  1136. .Lcbc_software:
  1137. ___
  1138. $code.=<<___;
  1139. stm${g} $key,$ra,5*$SIZE_T($sp)
  1140. lhi %r0,0
  1141. cl %r0,`$stdframe+$SIZE_T-4`($sp)
  1142. je .Lcbc_decrypt
  1143. larl $tbl,AES_Te
  1144. llgf $s0,0($ivp)
  1145. llgf $s1,4($ivp)
  1146. llgf $s2,8($ivp)
  1147. llgf $s3,12($ivp)
  1148. lghi $t0,16
  1149. sl${g}r $len,$t0
  1150. brc 4,.Lcbc_enc_tail # if borrow
  1151. .Lcbc_enc_loop:
  1152. stm${g} $inp,$out,2*$SIZE_T($sp)
  1153. x $s0,0($inp)
  1154. x $s1,4($inp)
  1155. x $s2,8($inp)
  1156. x $s3,12($inp)
  1157. lgr %r4,$key
  1158. bras $ra,_s390x_AES_encrypt
  1159. lm${g} $inp,$key,2*$SIZE_T($sp)
  1160. st $s0,0($out)
  1161. st $s1,4($out)
  1162. st $s2,8($out)
  1163. st $s3,12($out)
  1164. la $inp,16($inp)
  1165. la $out,16($out)
  1166. lghi $t0,16
  1167. lt${g}r $len,$len
  1168. jz .Lcbc_enc_done
  1169. sl${g}r $len,$t0
  1170. brc 4,.Lcbc_enc_tail # if borrow
  1171. j .Lcbc_enc_loop
  1172. .align 16
  1173. .Lcbc_enc_done:
  1174. l${g} $ivp,6*$SIZE_T($sp)
  1175. st $s0,0($ivp)
  1176. st $s1,4($ivp)
  1177. st $s2,8($ivp)
  1178. st $s3,12($ivp)
  1179. lm${g} %r7,$ra,7*$SIZE_T($sp)
  1180. br $ra
  1181. .align 16
  1182. .Lcbc_enc_tail:
  1183. aghi $len,15
  1184. lghi $t0,0
  1185. stg $t0,16*$SIZE_T($sp)
  1186. stg $t0,16*$SIZE_T+8($sp)
  1187. bras $t1,3f
  1188. mvc 16*$SIZE_T(1,$sp),0($inp)
  1189. 3: ex $len,0($t1)
  1190. lghi $len,0
  1191. la $inp,16*$SIZE_T($sp)
  1192. j .Lcbc_enc_loop
  1193. .align 16
  1194. .Lcbc_decrypt:
  1195. larl $tbl,AES_Td
  1196. lg $t0,0($ivp)
  1197. lg $t1,8($ivp)
  1198. stmg $t0,$t1,16*$SIZE_T($sp)
  1199. .Lcbc_dec_loop:
  1200. stm${g} $inp,$out,2*$SIZE_T($sp)
  1201. llgf $s0,0($inp)
  1202. llgf $s1,4($inp)
  1203. llgf $s2,8($inp)
  1204. llgf $s3,12($inp)
  1205. lgr %r4,$key
  1206. bras $ra,_s390x_AES_decrypt
  1207. lm${g} $inp,$key,2*$SIZE_T($sp)
  1208. sllg $s0,$s0,32
  1209. sllg $s2,$s2,32
  1210. lr $s0,$s1
  1211. lr $s2,$s3
  1212. lg $t0,0($inp)
  1213. lg $t1,8($inp)
  1214. xg $s0,16*$SIZE_T($sp)
  1215. xg $s2,16*$SIZE_T+8($sp)
  1216. lghi $s1,16
  1217. sl${g}r $len,$s1
  1218. brc 4,.Lcbc_dec_tail # if borrow
  1219. brc 2,.Lcbc_dec_done # if zero
  1220. stg $s0,0($out)
  1221. stg $s2,8($out)
  1222. stmg $t0,$t1,16*$SIZE_T($sp)
  1223. la $inp,16($inp)
  1224. la $out,16($out)
  1225. j .Lcbc_dec_loop
  1226. .Lcbc_dec_done:
  1227. stg $s0,0($out)
  1228. stg $s2,8($out)
  1229. .Lcbc_dec_exit:
  1230. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1231. stmg $t0,$t1,0($ivp)
  1232. br $ra
  1233. .align 16
  1234. .Lcbc_dec_tail:
  1235. aghi $len,15
  1236. stg $s0,16*$SIZE_T($sp)
  1237. stg $s2,16*$SIZE_T+8($sp)
  1238. bras $s1,4f
  1239. mvc 0(1,$out),16*$SIZE_T($sp)
  1240. 4: ex $len,0($s1)
  1241. j .Lcbc_dec_exit
  1242. .size AES_cbc_encrypt,.-AES_cbc_encrypt
  1243. ___
  1244. }
  1245. ########################################################################
  1246. # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
  1247. # size_t blocks, const AES_KEY *key,
  1248. # const unsigned char *ivec)
  1249. {
  1250. my $inp="%r2";
  1251. my $out="%r4"; # blocks and out are swapped
  1252. my $len="%r3";
  1253. my $key="%r5"; my $iv0="%r5";
  1254. my $ivp="%r6";
  1255. my $fp ="%r7";
  1256. $code.=<<___;
  1257. .globl AES_ctr32_encrypt
  1258. .type AES_ctr32_encrypt,\@function
  1259. .align 16
  1260. AES_ctr32_encrypt:
  1261. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1262. xgr %r4,%r3
  1263. xgr %r3,%r4
  1264. llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
  1265. ___
  1266. $code.=<<___ if (!$softonly);
  1267. l %r0,240($key)
  1268. lhi %r1,16
  1269. clr %r0,%r1
  1270. jl .Lctr32_software
  1271. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1272. slgr $out,$inp
  1273. la %r1,0($key) # %r1 is permanent copy of $key
  1274. lg $iv0,0($ivp) # load ivec
  1275. lg $ivp,8($ivp)
  1276. # prepare and allocate stack frame at the top of 4K page
  1277. # with 1K reserved for eventual signal handling
  1278. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1279. lghi $s1,-4096
  1280. algr $s0,$sp
  1281. lgr $fp,$sp
  1282. ngr $s0,$s1 # align at page boundary
  1283. slgr $fp,$s0 # total buffer size
  1284. lgr $s2,$sp
  1285. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1286. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1287. # buffer size is at lest 256 and at most 3072+256-16
  1288. la $sp,1024($s0) # alloca
  1289. srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
  1290. st${g} $s2,0($sp) # back-chain
  1291. st${g} $fp,$SIZE_T($sp)
  1292. slgr $len,$fp
  1293. brc 1,.Lctr32_hw_switch # not zero, no borrow
  1294. algr $fp,$len # input is shorter than allocated buffer
  1295. lghi $len,0
  1296. st${g} $fp,$SIZE_T($sp)
  1297. .Lctr32_hw_switch:
  1298. ___
  1299. $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower
  1300. larl $s0,OPENSSL_s390xcap_P
  1301. lg $s0,8($s0)
  1302. tmhh $s0,0x0004 # check for message_security-assist-4
  1303. jz .Lctr32_km_loop
  1304. llgfr $s0,%r0
  1305. lgr $s1,%r1
  1306. lghi %r0,0
  1307. la %r1,16($sp)
  1308. .long 0xb92d2042 # kmctr %r4,%r2,%r2
  1309. llihh %r0,0x8000 # check if kmctr supports the function code
  1310. srlg %r0,%r0,0($s0)
  1311. ng %r0,16($sp)
  1312. lgr %r0,$s0
  1313. lgr %r1,$s1
  1314. jz .Lctr32_km_loop
  1315. ####### kmctr code
  1316. algr $out,$inp # restore $out
  1317. lgr $s1,$len # $s1 undertakes $len
  1318. j .Lctr32_kmctr_loop
  1319. .align 16
  1320. .Lctr32_kmctr_loop:
  1321. la $s2,16($sp)
  1322. lgr $s3,$fp
  1323. .Lctr32_kmctr_prepare:
  1324. stg $iv0,0($s2)
  1325. stg $ivp,8($s2)
  1326. la $s2,16($s2)
  1327. ahi $ivp,1 # 32-bit increment, preserves upper half
  1328. brct $s3,.Lctr32_kmctr_prepare
  1329. #la $inp,0($inp) # inp
  1330. sllg $len,$fp,4 # len
  1331. #la $out,0($out) # out
  1332. la $s2,16($sp) # iv
  1333. .long 0xb92da042 # kmctr $out,$s2,$inp
  1334. brc 1,.-4 # pay attention to "partial completion"
  1335. slgr $s1,$fp
  1336. brc 1,.Lctr32_kmctr_loop # not zero, no borrow
  1337. algr $fp,$s1
  1338. lghi $s1,0
  1339. brc 4+1,.Lctr32_kmctr_loop # not zero
  1340. l${g} $sp,0($sp)
  1341. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1342. br $ra
  1343. .align 16
  1344. ___
  1345. $code.=<<___;
  1346. .Lctr32_km_loop:
  1347. la $s2,16($sp)
  1348. lgr $s3,$fp
  1349. .Lctr32_km_prepare:
  1350. stg $iv0,0($s2)
  1351. stg $ivp,8($s2)
  1352. la $s2,16($s2)
  1353. ahi $ivp,1 # 32-bit increment, preserves upper half
  1354. brct $s3,.Lctr32_km_prepare
  1355. la $s0,16($sp) # inp
  1356. sllg $s1,$fp,4 # len
  1357. la $s2,16($sp) # out
  1358. .long 0xb92e00a8 # km %r10,%r8
  1359. brc 1,.-4 # pay attention to "partial completion"
  1360. la $s2,16($sp)
  1361. lgr $s3,$fp
  1362. slgr $s2,$inp
  1363. .Lctr32_km_xor:
  1364. lg $s0,0($inp)
  1365. lg $s1,8($inp)
  1366. xg $s0,0($s2,$inp)
  1367. xg $s1,8($s2,$inp)
  1368. stg $s0,0($out,$inp)
  1369. stg $s1,8($out,$inp)
  1370. la $inp,16($inp)
  1371. brct $s3,.Lctr32_km_xor
  1372. slgr $len,$fp
  1373. brc 1,.Lctr32_km_loop # not zero, no borrow
  1374. algr $fp,$len
  1375. lghi $len,0
  1376. brc 4+1,.Lctr32_km_loop # not zero
  1377. l${g} $s0,0($sp)
  1378. l${g} $s1,$SIZE_T($sp)
  1379. la $s2,16($sp)
  1380. .Lctr32_km_zap:
  1381. stg $s0,0($s2)
  1382. stg $s0,8($s2)
  1383. la $s2,16($s2)
  1384. brct $s1,.Lctr32_km_zap
  1385. la $sp,0($s0)
  1386. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1387. br $ra
  1388. .align 16
  1389. .Lctr32_software:
  1390. ___
  1391. $code.=<<___;
  1392. stm${g} $key,$ra,5*$SIZE_T($sp)
  1393. sl${g}r $inp,$out
  1394. larl $tbl,AES_Te
  1395. llgf $t1,12($ivp)
  1396. .Lctr32_loop:
  1397. stm${g} $inp,$out,2*$SIZE_T($sp)
  1398. llgf $s0,0($ivp)
  1399. llgf $s1,4($ivp)
  1400. llgf $s2,8($ivp)
  1401. lgr $s3,$t1
  1402. st $t1,16*$SIZE_T($sp)
  1403. lgr %r4,$key
  1404. bras $ra,_s390x_AES_encrypt
  1405. lm${g} $inp,$ivp,2*$SIZE_T($sp)
  1406. llgf $t1,16*$SIZE_T($sp)
  1407. x $s0,0($inp,$out)
  1408. x $s1,4($inp,$out)
  1409. x $s2,8($inp,$out)
  1410. x $s3,12($inp,$out)
  1411. stm $s0,$s3,0($out)
  1412. la $out,16($out)
  1413. ahi $t1,1 # 32-bit increment
  1414. brct $len,.Lctr32_loop
  1415. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1416. br $ra
  1417. .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
  1418. ___
  1419. }
  1420. ########################################################################
  1421. # void AES_xts_encrypt(const char *inp,char *out,size_t len,
  1422. # const AES_KEY *key1, const AES_KEY *key2,
  1423. # const unsigned char iv[16]);
  1424. #
  1425. {
  1426. my $inp="%r2";
  1427. my $out="%r4"; # len and out are swapped
  1428. my $len="%r3";
  1429. my $key1="%r5"; # $i1
  1430. my $key2="%r6"; # $i2
  1431. my $fp="%r7"; # $i3
  1432. my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
  1433. $code.=<<___;
  1434. .type _s390x_xts_km,\@function
  1435. .align 16
  1436. _s390x_xts_km:
  1437. ___
  1438. $code.=<<___ if(1);
  1439. llgfr $s0,%r0 # put aside the function code
  1440. lghi $s1,0x7f
  1441. nr $s1,%r0
  1442. lghi %r0,0 # query capability vector
  1443. la %r1,2*$SIZE_T($sp)
  1444. .long 0xb92e0042 # km %r4,%r2
  1445. llihh %r1,0x8000
  1446. srlg %r1,%r1,32($s1) # check for 32+function code
  1447. ng %r1,2*$SIZE_T($sp)
  1448. lgr %r0,$s0 # restore the function code
  1449. la %r1,0($key1) # restore $key1
  1450. jz .Lxts_km_vanilla
  1451. lmg $i2,$i3,$tweak($sp) # put aside the tweak value
  1452. algr $out,$inp
  1453. oill %r0,32 # switch to xts function code
  1454. aghi $s1,-18 #
  1455. sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
  1456. la %r1,$tweak-16($sp)
  1457. slgr %r1,$s1 # parameter block position
  1458. lmg $s0,$s3,0($key1) # load 256 bits of key material,
  1459. stmg $s0,$s3,0(%r1) # and copy it to parameter block.
  1460. # yes, it contains junk and overlaps
  1461. # with the tweak in 128-bit case.
  1462. # it's done to avoid conditional
  1463. # branch.
  1464. stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
  1465. .long 0xb92e0042 # km %r4,%r2
  1466. brc 1,.-4 # pay attention to "partial completion"
  1467. lrvg $s0,$tweak+0($sp) # load the last tweak
  1468. lrvg $s1,$tweak+8($sp)
  1469. stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key
  1470. nill %r0,0xffdf # switch back to original function code
  1471. la %r1,0($key1) # restore pointer to $key1
  1472. slgr $out,$inp
  1473. llgc $len,2*$SIZE_T-1($sp)
  1474. nill $len,0x0f # $len%=16
  1475. br $ra
  1476. .align 16
  1477. .Lxts_km_vanilla:
  1478. ___
  1479. $code.=<<___;
  1480. # prepare and allocate stack frame at the top of 4K page
  1481. # with 1K reserved for eventual signal handling
  1482. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1483. lghi $s1,-4096
  1484. algr $s0,$sp
  1485. lgr $fp,$sp
  1486. ngr $s0,$s1 # align at page boundary
  1487. slgr $fp,$s0 # total buffer size
  1488. lgr $s2,$sp
  1489. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1490. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1491. # buffer size is at lest 256 and at most 3072+256-16
  1492. la $sp,1024($s0) # alloca
  1493. nill $fp,0xfff0 # round to 16*n
  1494. st${g} $s2,0($sp) # back-chain
  1495. nill $len,0xfff0 # redundant
  1496. st${g} $fp,$SIZE_T($sp)
  1497. slgr $len,$fp
  1498. brc 1,.Lxts_km_go # not zero, no borrow
  1499. algr $fp,$len # input is shorter than allocated buffer
  1500. lghi $len,0
  1501. st${g} $fp,$SIZE_T($sp)
  1502. .Lxts_km_go:
  1503. lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
  1504. lrvg $s1,$tweak+8($s2)
  1505. la $s2,16($sp) # vector of ascending tweak values
  1506. slgr $s2,$inp
  1507. srlg $s3,$fp,4
  1508. j .Lxts_km_start
  1509. .Lxts_km_loop:
  1510. la $s2,16($sp)
  1511. slgr $s2,$inp
  1512. srlg $s3,$fp,4
  1513. .Lxts_km_prepare:
  1514. lghi $i1,0x87
  1515. srag $i2,$s1,63 # broadcast upper bit
  1516. ngr $i1,$i2 # rem
  1517. srlg $i2,$s0,63 # carry bit from lower half
  1518. sllg $s0,$s0,1
  1519. sllg $s1,$s1,1
  1520. xgr $s0,$i1
  1521. ogr $s1,$i2
  1522. .Lxts_km_start:
  1523. lrvgr $i1,$s0 # flip byte order
  1524. lrvgr $i2,$s1
  1525. stg $i1,0($s2,$inp)
  1526. stg $i2,8($s2,$inp)
  1527. xg $i1,0($inp)
  1528. xg $i2,8($inp)
  1529. stg $i1,0($out,$inp)
  1530. stg $i2,8($out,$inp)
  1531. la $inp,16($inp)
  1532. brct $s3,.Lxts_km_prepare
  1533. slgr $inp,$fp # rewind $inp
  1534. la $s2,0($out,$inp)
  1535. lgr $s3,$fp
  1536. .long 0xb92e00aa # km $s2,$s2
  1537. brc 1,.-4 # pay attention to "partial completion"
  1538. la $s2,16($sp)
  1539. slgr $s2,$inp
  1540. srlg $s3,$fp,4
  1541. .Lxts_km_xor:
  1542. lg $i1,0($out,$inp)
  1543. lg $i2,8($out,$inp)
  1544. xg $i1,0($s2,$inp)
  1545. xg $i2,8($s2,$inp)
  1546. stg $i1,0($out,$inp)
  1547. stg $i2,8($out,$inp)
  1548. la $inp,16($inp)
  1549. brct $s3,.Lxts_km_xor
  1550. slgr $len,$fp
  1551. brc 1,.Lxts_km_loop # not zero, no borrow
  1552. algr $fp,$len
  1553. lghi $len,0
  1554. brc 4+1,.Lxts_km_loop # not zero
  1555. l${g} $i1,0($sp) # back-chain
  1556. llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
  1557. la $i2,16($sp)
  1558. srlg $fp,$fp,4
  1559. .Lxts_km_zap:
  1560. stg $i1,0($i2)
  1561. stg $i1,8($i2)
  1562. la $i2,16($i2)
  1563. brct $fp,.Lxts_km_zap
  1564. la $sp,0($i1)
  1565. llgc $len,2*$SIZE_T-1($i1)
  1566. nill $len,0x0f # $len%=16
  1567. bzr $ra
  1568. # generate one more tweak...
  1569. lghi $i1,0x87
  1570. srag $i2,$s1,63 # broadcast upper bit
  1571. ngr $i1,$i2 # rem
  1572. srlg $i2,$s0,63 # carry bit from lower half
  1573. sllg $s0,$s0,1
  1574. sllg $s1,$s1,1
  1575. xgr $s0,$i1
  1576. ogr $s1,$i2
  1577. ltr $len,$len # clear zero flag
  1578. br $ra
  1579. .size _s390x_xts_km,.-_s390x_xts_km
  1580. .globl AES_xts_encrypt
  1581. .type AES_xts_encrypt,\@function
  1582. .align 16
  1583. AES_xts_encrypt:
  1584. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1585. xgr %r4,%r3
  1586. xgr %r3,%r4
  1587. ___
  1588. $code.=<<___ if ($SIZE_T==4);
  1589. llgfr $len,$len
  1590. ___
  1591. $code.=<<___;
  1592. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1593. srag $len,$len,4 # formally wrong, because it expands
  1594. # sign byte, but who can afford asking
  1595. # to process more than 2^63-1 bytes?
  1596. # I use it, because it sets condition
  1597. # code...
  1598. bcr 8,$ra # abort if zero (i.e. less than 16)
  1599. ___
  1600. $code.=<<___ if (!$softonly);
  1601. llgf %r0,240($key2)
  1602. lhi %r1,16
  1603. clr %r0,%r1
  1604. jl .Lxts_enc_software
  1605. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1606. st${g} $ra,14*$SIZE_T($sp)
  1607. sllg $len,$len,4 # $len&=~15
  1608. slgr $out,$inp
  1609. # generate the tweak value
  1610. l${g} $s3,$stdframe($sp) # pointer to iv
  1611. la $s2,$tweak($sp)
  1612. lmg $s0,$s1,0($s3)
  1613. lghi $s3,16
  1614. stmg $s0,$s1,0($s2)
  1615. la %r1,0($key2) # $key2 is not needed anymore
  1616. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1617. brc 1,.-4 # can this happen?
  1618. l %r0,240($key1)
  1619. la %r1,0($key1) # $key1 is not needed anymore
  1620. bras $ra,_s390x_xts_km
  1621. jz .Lxts_enc_km_done
  1622. aghi $inp,-16 # take one step back
  1623. la $i3,0($out,$inp) # put aside real $out
  1624. .Lxts_enc_km_steal:
  1625. llgc $i1,16($inp)
  1626. llgc $i2,0($out,$inp)
  1627. stc $i1,0($out,$inp)
  1628. stc $i2,16($out,$inp)
  1629. la $inp,1($inp)
  1630. brct $len,.Lxts_enc_km_steal
  1631. la $s2,0($i3)
  1632. lghi $s3,16
  1633. lrvgr $i1,$s0 # flip byte order
  1634. lrvgr $i2,$s1
  1635. xg $i1,0($s2)
  1636. xg $i2,8($s2)
  1637. stg $i1,0($s2)
  1638. stg $i2,8($s2)
  1639. .long 0xb92e00aa # km $s2,$s2
  1640. brc 1,.-4 # can this happen?
  1641. lrvgr $i1,$s0 # flip byte order
  1642. lrvgr $i2,$s1
  1643. xg $i1,0($i3)
  1644. xg $i2,8($i3)
  1645. stg $i1,0($i3)
  1646. stg $i2,8($i3)
  1647. .Lxts_enc_km_done:
  1648. l${g} $ra,14*$SIZE_T($sp)
  1649. st${g} $sp,$tweak($sp) # wipe tweak
  1650. st${g} $sp,$tweak($sp)
  1651. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1652. br $ra
  1653. .align 16
  1654. .Lxts_enc_software:
  1655. ___
  1656. $code.=<<___;
  1657. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1658. slgr $out,$inp
  1659. xgr $s0,$s0 # clear upper half
  1660. xgr $s1,$s1
  1661. lrv $s0,$stdframe+4($sp) # load secno
  1662. lrv $s1,$stdframe+0($sp)
  1663. xgr $s2,$s2
  1664. xgr $s3,$s3
  1665. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1666. la $key,0($key2)
  1667. larl $tbl,AES_Te
  1668. bras $ra,_s390x_AES_encrypt # generate the tweak
  1669. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1670. stm $s0,$s3,$tweak($sp) # save the tweak
  1671. j .Lxts_enc_enter
  1672. .align 16
  1673. .Lxts_enc_loop:
  1674. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1675. lrvg $s3,$tweak+8($sp)
  1676. lghi %r1,0x87
  1677. srag %r0,$s3,63 # broadcast upper bit
  1678. ngr %r1,%r0 # rem
  1679. srlg %r0,$s1,63 # carry bit from lower half
  1680. sllg $s1,$s1,1
  1681. sllg $s3,$s3,1
  1682. xgr $s1,%r1
  1683. ogr $s3,%r0
  1684. lrvgr $s1,$s1 # flip byte order
  1685. lrvgr $s3,$s3
  1686. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1687. stg $s1,$tweak+0($sp) # save the tweak
  1688. llgfr $s1,$s1
  1689. srlg $s2,$s3,32
  1690. stg $s3,$tweak+8($sp)
  1691. llgfr $s3,$s3
  1692. la $inp,16($inp) # $inp+=16
  1693. .Lxts_enc_enter:
  1694. x $s0,0($inp) # ^=*($inp)
  1695. x $s1,4($inp)
  1696. x $s2,8($inp)
  1697. x $s3,12($inp)
  1698. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1699. la $key,0($key1)
  1700. bras $ra,_s390x_AES_encrypt
  1701. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1702. x $s0,$tweak+0($sp) # ^=tweak
  1703. x $s1,$tweak+4($sp)
  1704. x $s2,$tweak+8($sp)
  1705. x $s3,$tweak+12($sp)
  1706. st $s0,0($out,$inp)
  1707. st $s1,4($out,$inp)
  1708. st $s2,8($out,$inp)
  1709. st $s3,12($out,$inp)
  1710. brct${g} $len,.Lxts_enc_loop
  1711. llgc $len,`2*$SIZE_T-1`($sp)
  1712. nill $len,0x0f # $len%16
  1713. jz .Lxts_enc_done
  1714. la $i3,0($inp,$out) # put aside real $out
  1715. .Lxts_enc_steal:
  1716. llgc %r0,16($inp)
  1717. llgc %r1,0($out,$inp)
  1718. stc %r0,0($out,$inp)
  1719. stc %r1,16($out,$inp)
  1720. la $inp,1($inp)
  1721. brct $len,.Lxts_enc_steal
  1722. la $out,0($i3) # restore real $out
  1723. # generate last tweak...
  1724. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1725. lrvg $s3,$tweak+8($sp)
  1726. lghi %r1,0x87
  1727. srag %r0,$s3,63 # broadcast upper bit
  1728. ngr %r1,%r0 # rem
  1729. srlg %r0,$s1,63 # carry bit from lower half
  1730. sllg $s1,$s1,1
  1731. sllg $s3,$s3,1
  1732. xgr $s1,%r1
  1733. ogr $s3,%r0
  1734. lrvgr $s1,$s1 # flip byte order
  1735. lrvgr $s3,$s3
  1736. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1737. stg $s1,$tweak+0($sp) # save the tweak
  1738. llgfr $s1,$s1
  1739. srlg $s2,$s3,32
  1740. stg $s3,$tweak+8($sp)
  1741. llgfr $s3,$s3
  1742. x $s0,0($out) # ^=*(inp)|stolen cipther-text
  1743. x $s1,4($out)
  1744. x $s2,8($out)
  1745. x $s3,12($out)
  1746. st${g} $out,4*$SIZE_T($sp)
  1747. la $key,0($key1)
  1748. bras $ra,_s390x_AES_encrypt
  1749. l${g} $out,4*$SIZE_T($sp)
  1750. x $s0,`$tweak+0`($sp) # ^=tweak
  1751. x $s1,`$tweak+4`($sp)
  1752. x $s2,`$tweak+8`($sp)
  1753. x $s3,`$tweak+12`($sp)
  1754. st $s0,0($out)
  1755. st $s1,4($out)
  1756. st $s2,8($out)
  1757. st $s3,12($out)
  1758. .Lxts_enc_done:
  1759. stg $sp,$tweak+0($sp) # wipe tweak
  1760. stg $sp,$twesk+8($sp)
  1761. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1762. br $ra
  1763. .size AES_xts_encrypt,.-AES_xts_encrypt
  1764. ___
  1765. # void AES_xts_decrypt(const char *inp,char *out,size_t len,
  1766. # const AES_KEY *key1, const AES_KEY *key2,u64 secno);
  1767. #
  1768. $code.=<<___;
  1769. .globl AES_xts_decrypt
  1770. .type AES_xts_decrypt,\@function
  1771. .align 16
  1772. AES_xts_decrypt:
  1773. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1774. xgr %r4,%r3
  1775. xgr %r3,%r4
  1776. ___
  1777. $code.=<<___ if ($SIZE_T==4);
  1778. llgfr $len,$len
  1779. ___
  1780. $code.=<<___;
  1781. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1782. aghi $len,-16
  1783. bcr 4,$ra # abort if less than zero. formally
  1784. # wrong, because $len is unsigned,
  1785. # but who can afford asking to
  1786. # process more than 2^63-1 bytes?
  1787. tmll $len,0x0f
  1788. jnz .Lxts_dec_proceed
  1789. aghi $len,16
  1790. .Lxts_dec_proceed:
  1791. ___
  1792. $code.=<<___ if (!$softonly);
  1793. llgf %r0,240($key2)
  1794. lhi %r1,16
  1795. clr %r0,%r1
  1796. jl .Lxts_dec_software
  1797. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1798. st${g} $ra,14*$SIZE_T($sp)
  1799. nill $len,0xfff0 # $len&=~15
  1800. slgr $out,$inp
  1801. # generate the tweak value
  1802. l${g} $s3,$stdframe($sp) # pointer to iv
  1803. la $s2,$tweak($sp)
  1804. lmg $s0,$s1,0($s3)
  1805. lghi $s3,16
  1806. stmg $s0,$s1,0($s2)
  1807. la %r1,0($key2) # $key2 is not needed past this point
  1808. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1809. brc 1,.-4 # can this happen?
  1810. l %r0,240($key1)
  1811. la %r1,0($key1) # $key1 is not needed anymore
  1812. ltgr $len,$len
  1813. jz .Lxts_dec_km_short
  1814. bras $ra,_s390x_xts_km
  1815. jz .Lxts_dec_km_done
  1816. lrvgr $s2,$s0 # make copy in reverse byte order
  1817. lrvgr $s3,$s1
  1818. j .Lxts_dec_km_2ndtweak
  1819. .Lxts_dec_km_short:
  1820. llgc $len,`2*$SIZE_T-1`($sp)
  1821. nill $len,0x0f # $len%=16
  1822. lrvg $s0,$tweak+0($sp) # load the tweak
  1823. lrvg $s1,$tweak+8($sp)
  1824. lrvgr $s2,$s0 # make copy in reverse byte order
  1825. lrvgr $s3,$s1
  1826. .Lxts_dec_km_2ndtweak:
  1827. lghi $i1,0x87
  1828. srag $i2,$s1,63 # broadcast upper bit
  1829. ngr $i1,$i2 # rem
  1830. srlg $i2,$s0,63 # carry bit from lower half
  1831. sllg $s0,$s0,1
  1832. sllg $s1,$s1,1
  1833. xgr $s0,$i1
  1834. ogr $s1,$i2
  1835. lrvgr $i1,$s0 # flip byte order
  1836. lrvgr $i2,$s1
  1837. xg $i1,0($inp)
  1838. xg $i2,8($inp)
  1839. stg $i1,0($out,$inp)
  1840. stg $i2,8($out,$inp)
  1841. la $i2,0($out,$inp)
  1842. lghi $i3,16
  1843. .long 0xb92e0066 # km $i2,$i2
  1844. brc 1,.-4 # can this happen?
  1845. lrvgr $i1,$s0
  1846. lrvgr $i2,$s1
  1847. xg $i1,0($out,$inp)
  1848. xg $i2,8($out,$inp)
  1849. stg $i1,0($out,$inp)
  1850. stg $i2,8($out,$inp)
  1851. la $i3,0($out,$inp) # put aside real $out
  1852. .Lxts_dec_km_steal:
  1853. llgc $i1,16($inp)
  1854. llgc $i2,0($out,$inp)
  1855. stc $i1,0($out,$inp)
  1856. stc $i2,16($out,$inp)
  1857. la $inp,1($inp)
  1858. brct $len,.Lxts_dec_km_steal
  1859. lgr $s0,$s2
  1860. lgr $s1,$s3
  1861. xg $s0,0($i3)
  1862. xg $s1,8($i3)
  1863. stg $s0,0($i3)
  1864. stg $s1,8($i3)
  1865. la $s0,0($i3)
  1866. lghi $s1,16
  1867. .long 0xb92e0088 # km $s0,$s0
  1868. brc 1,.-4 # can this happen?
  1869. xg $s2,0($i3)
  1870. xg $s3,8($i3)
  1871. stg $s2,0($i3)
  1872. stg $s3,8($i3)
  1873. .Lxts_dec_km_done:
  1874. l${g} $ra,14*$SIZE_T($sp)
  1875. st${g} $sp,$tweak($sp) # wipe tweak
  1876. st${g} $sp,$tweak($sp)
  1877. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1878. br $ra
  1879. .align 16
  1880. .Lxts_dec_software:
  1881. ___
  1882. $code.=<<___;
  1883. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1884. srlg $len,$len,4
  1885. slgr $out,$inp
  1886. xgr $s0,$s0 # clear upper half
  1887. xgr $s1,$s1
  1888. lrv $s0,$stdframe+4($sp) # load secno
  1889. lrv $s1,$stdframe+0($sp)
  1890. xgr $s2,$s2
  1891. xgr $s3,$s3
  1892. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1893. la $key,0($key2)
  1894. larl $tbl,AES_Te
  1895. bras $ra,_s390x_AES_encrypt # generate the tweak
  1896. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1897. larl $tbl,AES_Td
  1898. lt${g}r $len,$len
  1899. stm $s0,$s3,$tweak($sp) # save the tweak
  1900. jz .Lxts_dec_short
  1901. j .Lxts_dec_enter
  1902. .align 16
  1903. .Lxts_dec_loop:
  1904. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1905. lrvg $s3,$tweak+8($sp)
  1906. lghi %r1,0x87
  1907. srag %r0,$s3,63 # broadcast upper bit
  1908. ngr %r1,%r0 # rem
  1909. srlg %r0,$s1,63 # carry bit from lower half
  1910. sllg $s1,$s1,1
  1911. sllg $s3,$s3,1
  1912. xgr $s1,%r1
  1913. ogr $s3,%r0
  1914. lrvgr $s1,$s1 # flip byte order
  1915. lrvgr $s3,$s3
  1916. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1917. stg $s1,$tweak+0($sp) # save the tweak
  1918. llgfr $s1,$s1
  1919. srlg $s2,$s3,32
  1920. stg $s3,$tweak+8($sp)
  1921. llgfr $s3,$s3
  1922. .Lxts_dec_enter:
  1923. x $s0,0($inp) # tweak^=*(inp)
  1924. x $s1,4($inp)
  1925. x $s2,8($inp)
  1926. x $s3,12($inp)
  1927. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1928. la $key,0($key1)
  1929. bras $ra,_s390x_AES_decrypt
  1930. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1931. x $s0,$tweak+0($sp) # ^=tweak
  1932. x $s1,$tweak+4($sp)
  1933. x $s2,$tweak+8($sp)
  1934. x $s3,$tweak+12($sp)
  1935. st $s0,0($out,$inp)
  1936. st $s1,4($out,$inp)
  1937. st $s2,8($out,$inp)
  1938. st $s3,12($out,$inp)
  1939. la $inp,16($inp)
  1940. brct${g} $len,.Lxts_dec_loop
  1941. llgc $len,`2*$SIZE_T-1`($sp)
  1942. nill $len,0x0f # $len%16
  1943. jz .Lxts_dec_done
  1944. # generate pair of tweaks...
  1945. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1946. lrvg $s3,$tweak+8($sp)
  1947. lghi %r1,0x87
  1948. srag %r0,$s3,63 # broadcast upper bit
  1949. ngr %r1,%r0 # rem
  1950. srlg %r0,$s1,63 # carry bit from lower half
  1951. sllg $s1,$s1,1
  1952. sllg $s3,$s3,1
  1953. xgr $s1,%r1
  1954. ogr $s3,%r0
  1955. lrvgr $i2,$s1 # flip byte order
  1956. lrvgr $i3,$s3
  1957. stmg $i2,$i3,$tweak($sp) # save the 1st tweak
  1958. j .Lxts_dec_2ndtweak
  1959. .align 16
  1960. .Lxts_dec_short:
  1961. llgc $len,`2*$SIZE_T-1`($sp)
  1962. nill $len,0x0f # $len%16
  1963. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1964. lrvg $s3,$tweak+8($sp)
  1965. .Lxts_dec_2ndtweak:
  1966. lghi %r1,0x87
  1967. srag %r0,$s3,63 # broadcast upper bit
  1968. ngr %r1,%r0 # rem
  1969. srlg %r0,$s1,63 # carry bit from lower half
  1970. sllg $s1,$s1,1
  1971. sllg $s3,$s3,1
  1972. xgr $s1,%r1
  1973. ogr $s3,%r0
  1974. lrvgr $s1,$s1 # flip byte order
  1975. lrvgr $s3,$s3
  1976. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1977. stg $s1,$tweak-16+0($sp) # save the 2nd tweak
  1978. llgfr $s1,$s1
  1979. srlg $s2,$s3,32
  1980. stg $s3,$tweak-16+8($sp)
  1981. llgfr $s3,$s3
  1982. x $s0,0($inp) # tweak_the_2nd^=*(inp)
  1983. x $s1,4($inp)
  1984. x $s2,8($inp)
  1985. x $s3,12($inp)
  1986. stm${g} %r2,%r3,2*$SIZE_T($sp)
  1987. la $key,0($key1)
  1988. bras $ra,_s390x_AES_decrypt
  1989. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1990. x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
  1991. x $s1,$tweak-16+4($sp)
  1992. x $s2,$tweak-16+8($sp)
  1993. x $s3,$tweak-16+12($sp)
  1994. st $s0,0($out,$inp)
  1995. st $s1,4($out,$inp)
  1996. st $s2,8($out,$inp)
  1997. st $s3,12($out,$inp)
  1998. la $i3,0($out,$inp) # put aside real $out
  1999. .Lxts_dec_steal:
  2000. llgc %r0,16($inp)
  2001. llgc %r1,0($out,$inp)
  2002. stc %r0,0($out,$inp)
  2003. stc %r1,16($out,$inp)
  2004. la $inp,1($inp)
  2005. brct $len,.Lxts_dec_steal
  2006. la $out,0($i3) # restore real $out
  2007. lm $s0,$s3,$tweak($sp) # load the 1st tweak
  2008. x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
  2009. x $s1,4($out)
  2010. x $s2,8($out)
  2011. x $s3,12($out)
  2012. st${g} $out,4*$SIZE_T($sp)
  2013. la $key,0($key1)
  2014. bras $ra,_s390x_AES_decrypt
  2015. l${g} $out,4*$SIZE_T($sp)
  2016. x $s0,$tweak+0($sp) # ^=tweak
  2017. x $s1,$tweak+4($sp)
  2018. x $s2,$tweak+8($sp)
  2019. x $s3,$tweak+12($sp)
  2020. st $s0,0($out)
  2021. st $s1,4($out)
  2022. st $s2,8($out)
  2023. st $s3,12($out)
  2024. stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
  2025. stg $sp,$tweak-16+8($sp)
  2026. .Lxts_dec_done:
  2027. stg $sp,$tweak+0($sp) # wipe tweak
  2028. stg $sp,$twesk+8($sp)
  2029. lm${g} %r6,$ra,6*$SIZE_T($sp)
  2030. br $ra
  2031. .size AES_xts_decrypt,.-AES_xts_decrypt
  2032. ___
  2033. }
  2034. $code.=<<___;
  2035. .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  2036. .comm OPENSSL_s390xcap_P,16,8
  2037. ___
  2038. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  2039. print $code;
  2040. close STDOUT; # force flush