2
0

aes-s390x.pl 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282
  1. #! /usr/bin/env perl
  2. # Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # AES for s390x.
  15. # April 2007.
  16. #
  17. # Software performance improvement over gcc-generated code is ~70% and
  18. # in absolute terms is ~73 cycles per byte processed with 128-bit key.
  19. # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
  20. # *strictly* in-order execution and issued instruction [in this case
  21. # load value from memory is critical] has to complete before execution
  22. # flow proceeds. S-boxes are compressed to 2KB[+256B].
  23. #
  24. # As for hardware acceleration support. It's basically a "teaser," as
  25. # it can and should be improved in several ways. Most notably support
  26. # for CBC is not utilized, nor multiple blocks are ever processed.
  27. # Then software key schedule can be postponed till hardware support
  28. # detection... Performance improvement over assembler is reportedly
  29. # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  30. # support is implemented.
  31. # May 2007.
  32. #
  33. # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
  34. # for 128-bit keys, if hardware support is detected.
  35. # Januray 2009.
  36. #
  37. # Add support for hardware AES192/256 and reschedule instructions to
  38. # minimize/avoid Address Generation Interlock hazard and to favour
  39. # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
  40. # almost 50% on z9. The gain is smaller on z10, because being dual-
  41. # issue z10 makes it impossible to eliminate the interlock condition:
  42. # critial path is not long enough. Yet it spends ~24 cycles per byte
  43. # processed with 128-bit key.
  44. #
  45. # Unlike previous version hardware support detection takes place only
  46. # at the moment of key schedule setup, which is denoted in key->rounds.
  47. # This is done, because deferred key setup can't be made MT-safe, not
  48. # for keys longer than 128 bits.
  49. #
  50. # Add AES_cbc_encrypt, which gives incredible performance improvement,
  51. # it was measured to be ~6.6x. It's less than previously mentioned 8x,
  52. # because software implementation was optimized.
  53. # May 2010.
  54. #
  55. # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
  56. # performance improvement over "generic" counter mode routine relying
  57. # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
  58. # to the fact that exact throughput value depends on current stack
  59. # frame alignment within 4KB page. In worst case you get ~75% of the
  60. # maximum, but *on average* it would be as much as ~98%. Meaning that
  61. # worst case is unlike, it's like hitting ravine on plateau.
  62. # November 2010.
  63. #
  64. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  65. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  66. # instructions and achieve "64-bit" performance even in 31-bit legacy
  67. # application context. The feature is not specific to any particular
  68. # processor, as long as it's "z-CPU". Latter implies that the code
  69. # remains z/Architecture specific. On z990 it was measured to perform
  70. # 2x better than code generated by gcc 4.3.
  71. # December 2010.
  72. #
  73. # Add support for z196 "cipher message with counter" instruction.
  74. # Note however that it's disengaged, because it was measured to
  75. # perform ~12% worse than vanilla km-based code...
  76. # February 2011.
  77. #
  78. # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
  79. # instructions, which deliver ~70% improvement at 8KB block size over
  80. # vanilla km-based code, 37% - at most like 512-bytes block size.
  81. $flavour = shift;
  82. if ($flavour =~ /3[12]/) {
  83. $SIZE_T=4;
  84. $g="";
  85. } else {
  86. $SIZE_T=8;
  87. $g="g";
  88. }
  89. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  90. open STDOUT,">$output";
  91. $softonly=0; # allow hardware support
  92. $t0="%r0"; $mask="%r0";
  93. $t1="%r1";
  94. $t2="%r2"; $inp="%r2";
  95. $t3="%r3"; $out="%r3"; $bits="%r3";
  96. $key="%r4";
  97. $i1="%r5";
  98. $i2="%r6";
  99. $i3="%r7";
  100. $s0="%r8";
  101. $s1="%r9";
  102. $s2="%r10";
  103. $s3="%r11";
  104. $tbl="%r12";
  105. $rounds="%r13";
  106. $ra="%r14";
  107. $sp="%r15";
  108. $stdframe=16*$SIZE_T+4*8;
  109. sub _data_word()
  110. { my $i;
  111. while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
  112. }
  113. $code=<<___;
  114. #include "s390x_arch.h"
  115. .text
  116. .type AES_Te,\@object
  117. .align 256
  118. AES_Te:
  119. ___
  120. &_data_word(
  121. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  122. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  123. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  124. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  125. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  126. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  127. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  128. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  129. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  130. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  131. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  132. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  133. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  134. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  135. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  136. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  137. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  138. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  139. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  140. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  141. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  142. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  143. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  144. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  145. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  146. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  147. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  148. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  149. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  150. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  151. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  152. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  153. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  154. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  155. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  156. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  157. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  158. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  159. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  160. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  161. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  162. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  163. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  164. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  165. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  166. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  167. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  168. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  169. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  170. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  171. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  172. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  173. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  174. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  175. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  176. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  177. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  178. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  179. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  180. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  181. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  182. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  183. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  184. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  185. $code.=<<___;
  186. # Te4[256]
  187. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  188. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  189. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  190. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  191. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  192. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  193. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  194. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  195. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  196. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  197. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  198. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  199. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  200. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  201. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  202. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  203. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  204. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  205. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  206. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  207. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  208. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  209. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  210. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  211. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  212. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  213. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  214. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  215. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  216. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  217. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  218. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  219. # rcon[]
  220. .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
  221. .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
  222. .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
  223. .align 256
  224. .size AES_Te,.-AES_Te
  225. # void AES_encrypt(const unsigned char *inp, unsigned char *out,
  226. # const AES_KEY *key) {
  227. .globl AES_encrypt
  228. .type AES_encrypt,\@function
  229. AES_encrypt:
  230. ___
  231. $code.=<<___ if (!$softonly);
  232. l %r0,240($key)
  233. lhi %r1,16
  234. clr %r0,%r1
  235. jl .Lesoft
  236. la %r1,0($key)
  237. #la %r2,0($inp)
  238. la %r4,0($out)
  239. lghi %r3,16 # single block length
  240. .long 0xb92e0042 # km %r4,%r2
  241. brc 1,.-4 # can this happen?
  242. br %r14
  243. .align 64
  244. .Lesoft:
  245. ___
  246. $code.=<<___;
  247. stm${g} %r3,$ra,3*$SIZE_T($sp)
  248. llgf $s0,0($inp)
  249. llgf $s1,4($inp)
  250. llgf $s2,8($inp)
  251. llgf $s3,12($inp)
  252. larl $tbl,AES_Te
  253. bras $ra,_s390x_AES_encrypt
  254. l${g} $out,3*$SIZE_T($sp)
  255. st $s0,0($out)
  256. st $s1,4($out)
  257. st $s2,8($out)
  258. st $s3,12($out)
  259. lm${g} %r6,$ra,6*$SIZE_T($sp)
  260. br $ra
  261. .size AES_encrypt,.-AES_encrypt
  262. .type _s390x_AES_encrypt,\@function
  263. .align 16
  264. _s390x_AES_encrypt:
  265. st${g} $ra,15*$SIZE_T($sp)
  266. x $s0,0($key)
  267. x $s1,4($key)
  268. x $s2,8($key)
  269. x $s3,12($key)
  270. l $rounds,240($key)
  271. llill $mask,`0xff<<3`
  272. aghi $rounds,-1
  273. j .Lenc_loop
  274. .align 16
  275. .Lenc_loop:
  276. sllg $t1,$s0,`0+3`
  277. srlg $t2,$s0,`8-3`
  278. srlg $t3,$s0,`16-3`
  279. srl $s0,`24-3`
  280. nr $s0,$mask
  281. ngr $t1,$mask
  282. nr $t2,$mask
  283. nr $t3,$mask
  284. srlg $i1,$s1,`16-3` # i0
  285. sllg $i2,$s1,`0+3`
  286. srlg $i3,$s1,`8-3`
  287. srl $s1,`24-3`
  288. nr $i1,$mask
  289. nr $s1,$mask
  290. ngr $i2,$mask
  291. nr $i3,$mask
  292. l $s0,0($s0,$tbl) # Te0[s0>>24]
  293. l $t1,1($t1,$tbl) # Te3[s0>>0]
  294. l $t2,2($t2,$tbl) # Te2[s0>>8]
  295. l $t3,3($t3,$tbl) # Te1[s0>>16]
  296. x $s0,3($i1,$tbl) # Te1[s1>>16]
  297. l $s1,0($s1,$tbl) # Te0[s1>>24]
  298. x $t2,1($i2,$tbl) # Te3[s1>>0]
  299. x $t3,2($i3,$tbl) # Te2[s1>>8]
  300. srlg $i1,$s2,`8-3` # i0
  301. srlg $i2,$s2,`16-3` # i1
  302. nr $i1,$mask
  303. nr $i2,$mask
  304. sllg $i3,$s2,`0+3`
  305. srl $s2,`24-3`
  306. nr $s2,$mask
  307. ngr $i3,$mask
  308. xr $s1,$t1
  309. srlg $ra,$s3,`8-3` # i1
  310. sllg $t1,$s3,`0+3` # i0
  311. nr $ra,$mask
  312. la $key,16($key)
  313. ngr $t1,$mask
  314. x $s0,2($i1,$tbl) # Te2[s2>>8]
  315. x $s1,3($i2,$tbl) # Te1[s2>>16]
  316. l $s2,0($s2,$tbl) # Te0[s2>>24]
  317. x $t3,1($i3,$tbl) # Te3[s2>>0]
  318. srlg $i3,$s3,`16-3` # i2
  319. xr $s2,$t2
  320. srl $s3,`24-3`
  321. nr $i3,$mask
  322. nr $s3,$mask
  323. x $s0,0($key)
  324. x $s1,4($key)
  325. x $s2,8($key)
  326. x $t3,12($key)
  327. x $s0,1($t1,$tbl) # Te3[s3>>0]
  328. x $s1,2($ra,$tbl) # Te2[s3>>8]
  329. x $s2,3($i3,$tbl) # Te1[s3>>16]
  330. l $s3,0($s3,$tbl) # Te0[s3>>24]
  331. xr $s3,$t3
  332. brct $rounds,.Lenc_loop
  333. .align 16
  334. sllg $t1,$s0,`0+3`
  335. srlg $t2,$s0,`8-3`
  336. ngr $t1,$mask
  337. srlg $t3,$s0,`16-3`
  338. srl $s0,`24-3`
  339. nr $s0,$mask
  340. nr $t2,$mask
  341. nr $t3,$mask
  342. srlg $i1,$s1,`16-3` # i0
  343. sllg $i2,$s1,`0+3`
  344. ngr $i2,$mask
  345. srlg $i3,$s1,`8-3`
  346. srl $s1,`24-3`
  347. nr $i1,$mask
  348. nr $s1,$mask
  349. nr $i3,$mask
  350. llgc $s0,2($s0,$tbl) # Te4[s0>>24]
  351. llgc $t1,2($t1,$tbl) # Te4[s0>>0]
  352. sll $s0,24
  353. llgc $t2,2($t2,$tbl) # Te4[s0>>8]
  354. llgc $t3,2($t3,$tbl) # Te4[s0>>16]
  355. sll $t2,8
  356. sll $t3,16
  357. llgc $i1,2($i1,$tbl) # Te4[s1>>16]
  358. llgc $s1,2($s1,$tbl) # Te4[s1>>24]
  359. llgc $i2,2($i2,$tbl) # Te4[s1>>0]
  360. llgc $i3,2($i3,$tbl) # Te4[s1>>8]
  361. sll $i1,16
  362. sll $s1,24
  363. sll $i3,8
  364. or $s0,$i1
  365. or $s1,$t1
  366. or $t2,$i2
  367. or $t3,$i3
  368. srlg $i1,$s2,`8-3` # i0
  369. srlg $i2,$s2,`16-3` # i1
  370. nr $i1,$mask
  371. nr $i2,$mask
  372. sllg $i3,$s2,`0+3`
  373. srl $s2,`24-3`
  374. ngr $i3,$mask
  375. nr $s2,$mask
  376. sllg $t1,$s3,`0+3` # i0
  377. srlg $ra,$s3,`8-3` # i1
  378. ngr $t1,$mask
  379. llgc $i1,2($i1,$tbl) # Te4[s2>>8]
  380. llgc $i2,2($i2,$tbl) # Te4[s2>>16]
  381. sll $i1,8
  382. llgc $s2,2($s2,$tbl) # Te4[s2>>24]
  383. llgc $i3,2($i3,$tbl) # Te4[s2>>0]
  384. sll $i2,16
  385. nr $ra,$mask
  386. sll $s2,24
  387. or $s0,$i1
  388. or $s1,$i2
  389. or $s2,$t2
  390. or $t3,$i3
  391. srlg $i3,$s3,`16-3` # i2
  392. srl $s3,`24-3`
  393. nr $i3,$mask
  394. nr $s3,$mask
  395. l $t0,16($key)
  396. l $t2,20($key)
  397. llgc $i1,2($t1,$tbl) # Te4[s3>>0]
  398. llgc $i2,2($ra,$tbl) # Te4[s3>>8]
  399. llgc $i3,2($i3,$tbl) # Te4[s3>>16]
  400. llgc $s3,2($s3,$tbl) # Te4[s3>>24]
  401. sll $i2,8
  402. sll $i3,16
  403. sll $s3,24
  404. or $s0,$i1
  405. or $s1,$i2
  406. or $s2,$i3
  407. or $s3,$t3
  408. l${g} $ra,15*$SIZE_T($sp)
  409. xr $s0,$t0
  410. xr $s1,$t2
  411. x $s2,24($key)
  412. x $s3,28($key)
  413. br $ra
  414. .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
  415. ___
  416. $code.=<<___;
  417. .type AES_Td,\@object
  418. .align 256
  419. AES_Td:
  420. ___
  421. &_data_word(
  422. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  423. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  424. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  425. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  426. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  427. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  428. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  429. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  430. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  431. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  432. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  433. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  434. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  435. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  436. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  437. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  438. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  439. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  440. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  441. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  442. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  443. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  444. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  445. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  446. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  447. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  448. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  449. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  450. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  451. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  452. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  453. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  454. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  455. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  456. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  457. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  458. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  459. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  460. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  461. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  462. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  463. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  464. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  465. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  466. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  467. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  468. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  469. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  470. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  471. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  472. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  473. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  474. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  475. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  476. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  477. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  478. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  479. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  480. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  481. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  482. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  483. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  484. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  485. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  486. $code.=<<___;
  487. # Td4[256]
  488. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  489. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  490. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  491. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  492. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  493. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  494. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  495. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  496. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  497. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  498. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  499. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  500. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  501. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  502. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  503. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  504. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  505. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  506. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  507. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  508. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  509. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  510. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  511. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  512. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  513. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  514. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  515. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  516. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  517. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  518. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  519. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  520. .size AES_Td,.-AES_Td
  521. # void AES_decrypt(const unsigned char *inp, unsigned char *out,
  522. # const AES_KEY *key) {
  523. .globl AES_decrypt
  524. .type AES_decrypt,\@function
  525. AES_decrypt:
  526. ___
  527. $code.=<<___ if (!$softonly);
  528. l %r0,240($key)
  529. lhi %r1,16
  530. clr %r0,%r1
  531. jl .Ldsoft
  532. la %r1,0($key)
  533. #la %r2,0($inp)
  534. la %r4,0($out)
  535. lghi %r3,16 # single block length
  536. .long 0xb92e0042 # km %r4,%r2
  537. brc 1,.-4 # can this happen?
  538. br %r14
  539. .align 64
  540. .Ldsoft:
  541. ___
  542. $code.=<<___;
  543. stm${g} %r3,$ra,3*$SIZE_T($sp)
  544. llgf $s0,0($inp)
  545. llgf $s1,4($inp)
  546. llgf $s2,8($inp)
  547. llgf $s3,12($inp)
  548. larl $tbl,AES_Td
  549. bras $ra,_s390x_AES_decrypt
  550. l${g} $out,3*$SIZE_T($sp)
  551. st $s0,0($out)
  552. st $s1,4($out)
  553. st $s2,8($out)
  554. st $s3,12($out)
  555. lm${g} %r6,$ra,6*$SIZE_T($sp)
  556. br $ra
  557. .size AES_decrypt,.-AES_decrypt
  558. .type _s390x_AES_decrypt,\@function
  559. .align 16
  560. _s390x_AES_decrypt:
  561. st${g} $ra,15*$SIZE_T($sp)
  562. x $s0,0($key)
  563. x $s1,4($key)
  564. x $s2,8($key)
  565. x $s3,12($key)
  566. l $rounds,240($key)
  567. llill $mask,`0xff<<3`
  568. aghi $rounds,-1
  569. j .Ldec_loop
  570. .align 16
  571. .Ldec_loop:
  572. srlg $t1,$s0,`16-3`
  573. srlg $t2,$s0,`8-3`
  574. sllg $t3,$s0,`0+3`
  575. srl $s0,`24-3`
  576. nr $s0,$mask
  577. nr $t1,$mask
  578. nr $t2,$mask
  579. ngr $t3,$mask
  580. sllg $i1,$s1,`0+3` # i0
  581. srlg $i2,$s1,`16-3`
  582. srlg $i3,$s1,`8-3`
  583. srl $s1,`24-3`
  584. ngr $i1,$mask
  585. nr $s1,$mask
  586. nr $i2,$mask
  587. nr $i3,$mask
  588. l $s0,0($s0,$tbl) # Td0[s0>>24]
  589. l $t1,3($t1,$tbl) # Td1[s0>>16]
  590. l $t2,2($t2,$tbl) # Td2[s0>>8]
  591. l $t3,1($t3,$tbl) # Td3[s0>>0]
  592. x $s0,1($i1,$tbl) # Td3[s1>>0]
  593. l $s1,0($s1,$tbl) # Td0[s1>>24]
  594. x $t2,3($i2,$tbl) # Td1[s1>>16]
  595. x $t3,2($i3,$tbl) # Td2[s1>>8]
  596. srlg $i1,$s2,`8-3` # i0
  597. sllg $i2,$s2,`0+3` # i1
  598. srlg $i3,$s2,`16-3`
  599. srl $s2,`24-3`
  600. nr $i1,$mask
  601. ngr $i2,$mask
  602. nr $s2,$mask
  603. nr $i3,$mask
  604. xr $s1,$t1
  605. srlg $ra,$s3,`8-3` # i1
  606. srlg $t1,$s3,`16-3` # i0
  607. nr $ra,$mask
  608. la $key,16($key)
  609. nr $t1,$mask
  610. x $s0,2($i1,$tbl) # Td2[s2>>8]
  611. x $s1,1($i2,$tbl) # Td3[s2>>0]
  612. l $s2,0($s2,$tbl) # Td0[s2>>24]
  613. x $t3,3($i3,$tbl) # Td1[s2>>16]
  614. sllg $i3,$s3,`0+3` # i2
  615. srl $s3,`24-3`
  616. ngr $i3,$mask
  617. nr $s3,$mask
  618. xr $s2,$t2
  619. x $s0,0($key)
  620. x $s1,4($key)
  621. x $s2,8($key)
  622. x $t3,12($key)
  623. x $s0,3($t1,$tbl) # Td1[s3>>16]
  624. x $s1,2($ra,$tbl) # Td2[s3>>8]
  625. x $s2,1($i3,$tbl) # Td3[s3>>0]
  626. l $s3,0($s3,$tbl) # Td0[s3>>24]
  627. xr $s3,$t3
  628. brct $rounds,.Ldec_loop
  629. .align 16
  630. l $t1,`2048+0`($tbl) # prefetch Td4
  631. l $t2,`2048+64`($tbl)
  632. l $t3,`2048+128`($tbl)
  633. l $i1,`2048+192`($tbl)
  634. llill $mask,0xff
  635. srlg $i3,$s0,24 # i0
  636. srlg $t1,$s0,16
  637. srlg $t2,$s0,8
  638. nr $s0,$mask # i3
  639. nr $t1,$mask
  640. srlg $i1,$s1,24
  641. nr $t2,$mask
  642. srlg $i2,$s1,16
  643. srlg $ra,$s1,8
  644. nr $s1,$mask # i0
  645. nr $i2,$mask
  646. nr $ra,$mask
  647. llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
  648. llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
  649. llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
  650. sll $t1,16
  651. llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
  652. sllg $s0,$i3,24
  653. sll $t2,8
  654. llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
  655. llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
  656. llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
  657. sll $i1,24
  658. llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
  659. sll $i2,16
  660. sll $i3,8
  661. or $s0,$s1
  662. or $t1,$i1
  663. or $t2,$i2
  664. or $t3,$i3
  665. srlg $i1,$s2,8 # i0
  666. srlg $i2,$s2,24
  667. srlg $i3,$s2,16
  668. nr $s2,$mask # i1
  669. nr $i1,$mask
  670. nr $i3,$mask
  671. llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
  672. llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
  673. llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
  674. llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
  675. sll $i1,8
  676. sll $i2,24
  677. or $s0,$i1
  678. sll $i3,16
  679. or $t2,$i2
  680. or $t3,$i3
  681. srlg $i1,$s3,16 # i0
  682. srlg $i2,$s3,8 # i1
  683. srlg $i3,$s3,24
  684. nr $s3,$mask # i2
  685. nr $i1,$mask
  686. nr $i2,$mask
  687. l${g} $ra,15*$SIZE_T($sp)
  688. or $s1,$t1
  689. l $t0,16($key)
  690. l $t1,20($key)
  691. llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
  692. llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
  693. sll $i1,16
  694. llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
  695. llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
  696. sll $i2,8
  697. sll $s3,24
  698. or $s0,$i1
  699. or $s1,$i2
  700. or $s2,$t2
  701. or $s3,$t3
  702. xr $s0,$t0
  703. xr $s1,$t1
  704. x $s2,24($key)
  705. x $s3,28($key)
  706. br $ra
  707. .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
  708. ___
  709. $code.=<<___;
  710. # void AES_set_encrypt_key(const unsigned char *in, int bits,
  711. # AES_KEY *key) {
  712. .globl AES_set_encrypt_key
  713. .type AES_set_encrypt_key,\@function
  714. .align 16
  715. AES_set_encrypt_key:
  716. _s390x_AES_set_encrypt_key:
  717. lghi $t0,0
  718. cl${g}r $inp,$t0
  719. je .Lminus1
  720. cl${g}r $key,$t0
  721. je .Lminus1
  722. lghi $t0,128
  723. clr $bits,$t0
  724. je .Lproceed
  725. lghi $t0,192
  726. clr $bits,$t0
  727. je .Lproceed
  728. lghi $t0,256
  729. clr $bits,$t0
  730. je .Lproceed
  731. lghi %r2,-2
  732. br %r14
  733. .align 16
  734. .Lproceed:
  735. ___
  736. $code.=<<___ if (!$softonly);
  737. # convert bits to km(c) code, [128,192,256]->[18,19,20]
  738. lhi %r5,-128
  739. lhi %r0,18
  740. ar %r5,$bits
  741. srl %r5,6
  742. ar %r5,%r0
  743. larl %r1,OPENSSL_s390xcap_P
  744. llihh %r0,0x8000
  745. srlg %r0,%r0,0(%r5)
  746. ng %r0,S390X_KM(%r1) # check availability of both km...
  747. ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
  748. jz .Lekey_internal
  749. lmg %r0,%r1,0($inp) # just copy 128 bits...
  750. stmg %r0,%r1,0($key)
  751. lhi %r0,192
  752. cr $bits,%r0
  753. jl 1f
  754. lg %r1,16($inp)
  755. stg %r1,16($key)
  756. je 1f
  757. lg %r1,24($inp)
  758. stg %r1,24($key)
  759. 1: st $bits,236($key) # save bits [for debugging purposes]
  760. lgr $t0,%r5
  761. st %r5,240($key) # save km(c) code
  762. lghi %r2,0
  763. br %r14
  764. ___
  765. $code.=<<___;
  766. .align 16
  767. .Lekey_internal:
  768. stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
  769. larl $tbl,AES_Te+2048
  770. llgf $s0,0($inp)
  771. llgf $s1,4($inp)
  772. llgf $s2,8($inp)
  773. llgf $s3,12($inp)
  774. st $s0,0($key)
  775. st $s1,4($key)
  776. st $s2,8($key)
  777. st $s3,12($key)
  778. lghi $t0,128
  779. cr $bits,$t0
  780. jne .Lnot128
  781. llill $mask,0xff
  782. lghi $t3,0 # i=0
  783. lghi $rounds,10
  784. st $rounds,240($key)
  785. llgfr $t2,$s3 # temp=rk[3]
  786. srlg $i1,$s3,8
  787. srlg $i2,$s3,16
  788. srlg $i3,$s3,24
  789. nr $t2,$mask
  790. nr $i1,$mask
  791. nr $i2,$mask
  792. .align 16
  793. .L128_loop:
  794. la $t2,0($t2,$tbl)
  795. la $i1,0($i1,$tbl)
  796. la $i2,0($i2,$tbl)
  797. la $i3,0($i3,$tbl)
  798. icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
  799. icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
  800. icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
  801. icm $t2,1,0($i3) # Te4[rk[3]>>24]
  802. x $t2,256($t3,$tbl) # rcon[i]
  803. xr $s0,$t2 # rk[4]=rk[0]^...
  804. xr $s1,$s0 # rk[5]=rk[1]^rk[4]
  805. xr $s2,$s1 # rk[6]=rk[2]^rk[5]
  806. xr $s3,$s2 # rk[7]=rk[3]^rk[6]
  807. llgfr $t2,$s3 # temp=rk[3]
  808. srlg $i1,$s3,8
  809. srlg $i2,$s3,16
  810. nr $t2,$mask
  811. nr $i1,$mask
  812. srlg $i3,$s3,24
  813. nr $i2,$mask
  814. st $s0,16($key)
  815. st $s1,20($key)
  816. st $s2,24($key)
  817. st $s3,28($key)
  818. la $key,16($key) # key+=4
  819. la $t3,4($t3) # i++
  820. brct $rounds,.L128_loop
  821. lghi $t0,10
  822. lghi %r2,0
  823. lm${g} %r4,%r13,4*$SIZE_T($sp)
  824. br $ra
  825. .align 16
  826. .Lnot128:
  827. llgf $t0,16($inp)
  828. llgf $t1,20($inp)
  829. st $t0,16($key)
  830. st $t1,20($key)
  831. lghi $t0,192
  832. cr $bits,$t0
  833. jne .Lnot192
  834. llill $mask,0xff
  835. lghi $t3,0 # i=0
  836. lghi $rounds,12
  837. st $rounds,240($key)
  838. lghi $rounds,8
  839. srlg $i1,$t1,8
  840. srlg $i2,$t1,16
  841. srlg $i3,$t1,24
  842. nr $t1,$mask
  843. nr $i1,$mask
  844. nr $i2,$mask
  845. .align 16
  846. .L192_loop:
  847. la $t1,0($t1,$tbl)
  848. la $i1,0($i1,$tbl)
  849. la $i2,0($i2,$tbl)
  850. la $i3,0($i3,$tbl)
  851. icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
  852. icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
  853. icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
  854. icm $t1,1,0($i3) # Te4[rk[5]>>24]
  855. x $t1,256($t3,$tbl) # rcon[i]
  856. xr $s0,$t1 # rk[6]=rk[0]^...
  857. xr $s1,$s0 # rk[7]=rk[1]^rk[6]
  858. xr $s2,$s1 # rk[8]=rk[2]^rk[7]
  859. xr $s3,$s2 # rk[9]=rk[3]^rk[8]
  860. st $s0,24($key)
  861. st $s1,28($key)
  862. st $s2,32($key)
  863. st $s3,36($key)
  864. brct $rounds,.L192_continue
  865. lghi $t0,12
  866. lghi %r2,0
  867. lm${g} %r4,%r13,4*$SIZE_T($sp)
  868. br $ra
  869. .align 16
  870. .L192_continue:
  871. lgr $t1,$s3
  872. x $t1,16($key) # rk[10]=rk[4]^rk[9]
  873. st $t1,40($key)
  874. x $t1,20($key) # rk[11]=rk[5]^rk[10]
  875. st $t1,44($key)
  876. srlg $i1,$t1,8
  877. srlg $i2,$t1,16
  878. srlg $i3,$t1,24
  879. nr $t1,$mask
  880. nr $i1,$mask
  881. nr $i2,$mask
  882. la $key,24($key) # key+=6
  883. la $t3,4($t3) # i++
  884. j .L192_loop
  885. .align 16
  886. .Lnot192:
  887. llgf $t0,24($inp)
  888. llgf $t1,28($inp)
  889. st $t0,24($key)
  890. st $t1,28($key)
  891. llill $mask,0xff
  892. lghi $t3,0 # i=0
  893. lghi $rounds,14
  894. st $rounds,240($key)
  895. lghi $rounds,7
  896. srlg $i1,$t1,8
  897. srlg $i2,$t1,16
  898. srlg $i3,$t1,24
  899. nr $t1,$mask
  900. nr $i1,$mask
  901. nr $i2,$mask
  902. .align 16
  903. .L256_loop:
  904. la $t1,0($t1,$tbl)
  905. la $i1,0($i1,$tbl)
  906. la $i2,0($i2,$tbl)
  907. la $i3,0($i3,$tbl)
  908. icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
  909. icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
  910. icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
  911. icm $t1,1,0($i3) # Te4[rk[7]>>24]
  912. x $t1,256($t3,$tbl) # rcon[i]
  913. xr $s0,$t1 # rk[8]=rk[0]^...
  914. xr $s1,$s0 # rk[9]=rk[1]^rk[8]
  915. xr $s2,$s1 # rk[10]=rk[2]^rk[9]
  916. xr $s3,$s2 # rk[11]=rk[3]^rk[10]
  917. st $s0,32($key)
  918. st $s1,36($key)
  919. st $s2,40($key)
  920. st $s3,44($key)
  921. brct $rounds,.L256_continue
  922. lghi $t0,14
  923. lghi %r2,0
  924. lm${g} %r4,%r13,4*$SIZE_T($sp)
  925. br $ra
  926. .align 16
  927. .L256_continue:
  928. lgr $t1,$s3 # temp=rk[11]
  929. srlg $i1,$s3,8
  930. srlg $i2,$s3,16
  931. srlg $i3,$s3,24
  932. nr $t1,$mask
  933. nr $i1,$mask
  934. nr $i2,$mask
  935. la $t1,0($t1,$tbl)
  936. la $i1,0($i1,$tbl)
  937. la $i2,0($i2,$tbl)
  938. la $i3,0($i3,$tbl)
  939. llgc $t1,0($t1) # Te4[rk[11]>>0]
  940. icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
  941. icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
  942. icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
  943. x $t1,16($key) # rk[12]=rk[4]^...
  944. st $t1,48($key)
  945. x $t1,20($key) # rk[13]=rk[5]^rk[12]
  946. st $t1,52($key)
  947. x $t1,24($key) # rk[14]=rk[6]^rk[13]
  948. st $t1,56($key)
  949. x $t1,28($key) # rk[15]=rk[7]^rk[14]
  950. st $t1,60($key)
  951. srlg $i1,$t1,8
  952. srlg $i2,$t1,16
  953. srlg $i3,$t1,24
  954. nr $t1,$mask
  955. nr $i1,$mask
  956. nr $i2,$mask
  957. la $key,32($key) # key+=8
  958. la $t3,4($t3) # i++
  959. j .L256_loop
  960. .Lminus1:
  961. lghi %r2,-1
  962. br $ra
  963. .size AES_set_encrypt_key,.-AES_set_encrypt_key
  964. # void AES_set_decrypt_key(const unsigned char *in, int bits,
  965. # AES_KEY *key) {
  966. .globl AES_set_decrypt_key
  967. .type AES_set_decrypt_key,\@function
  968. .align 16
  969. AES_set_decrypt_key:
  970. #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
  971. st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
  972. bras $ra,_s390x_AES_set_encrypt_key
  973. #l${g} $key,4*$SIZE_T($sp)
  974. l${g} $ra,14*$SIZE_T($sp)
  975. ltgr %r2,%r2
  976. bnzr $ra
  977. ___
  978. $code.=<<___ if (!$softonly);
  979. #l $t0,240($key)
  980. lhi $t1,16
  981. cr $t0,$t1
  982. jl .Lgo
  983. oill $t0,S390X_DECRYPT # set "decrypt" bit
  984. st $t0,240($key)
  985. br $ra
  986. ___
  987. $code.=<<___;
  988. .align 16
  989. .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
  990. la $i1,0($key)
  991. sllg $i2,$rounds,4
  992. la $i2,0($i2,$key)
  993. srl $rounds,1
  994. lghi $t1,-16
  995. .align 16
  996. .Linv: lmg $s0,$s1,0($i1)
  997. lmg $s2,$s3,0($i2)
  998. stmg $s0,$s1,0($i2)
  999. stmg $s2,$s3,0($i1)
  1000. la $i1,16($i1)
  1001. la $i2,0($t1,$i2)
  1002. brct $rounds,.Linv
  1003. ___
  1004. $mask80=$i1;
  1005. $mask1b=$i2;
  1006. $maskfe=$i3;
  1007. $code.=<<___;
  1008. llgf $rounds,240($key)
  1009. aghi $rounds,-1
  1010. sll $rounds,2 # (rounds-1)*4
  1011. llilh $mask80,0x8080
  1012. llilh $mask1b,0x1b1b
  1013. llilh $maskfe,0xfefe
  1014. oill $mask80,0x8080
  1015. oill $mask1b,0x1b1b
  1016. oill $maskfe,0xfefe
  1017. .align 16
  1018. .Lmix: l $s0,16($key) # tp1
  1019. lr $s1,$s0
  1020. ngr $s1,$mask80
  1021. srlg $t1,$s1,7
  1022. slr $s1,$t1
  1023. nr $s1,$mask1b
  1024. sllg $t1,$s0,1
  1025. nr $t1,$maskfe
  1026. xr $s1,$t1 # tp2
  1027. lr $s2,$s1
  1028. ngr $s2,$mask80
  1029. srlg $t1,$s2,7
  1030. slr $s2,$t1
  1031. nr $s2,$mask1b
  1032. sllg $t1,$s1,1
  1033. nr $t1,$maskfe
  1034. xr $s2,$t1 # tp4
  1035. lr $s3,$s2
  1036. ngr $s3,$mask80
  1037. srlg $t1,$s3,7
  1038. slr $s3,$t1
  1039. nr $s3,$mask1b
  1040. sllg $t1,$s2,1
  1041. nr $t1,$maskfe
  1042. xr $s3,$t1 # tp8
  1043. xr $s1,$s0 # tp2^tp1
  1044. xr $s2,$s0 # tp4^tp1
  1045. rll $s0,$s0,24 # = ROTATE(tp1,8)
  1046. xr $s2,$s3 # ^=tp8
  1047. xr $s0,$s1 # ^=tp2^tp1
  1048. xr $s1,$s3 # tp2^tp1^tp8
  1049. xr $s0,$s2 # ^=tp4^tp1^tp8
  1050. rll $s1,$s1,8
  1051. rll $s2,$s2,16
  1052. xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
  1053. rll $s3,$s3,24
  1054. xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
  1055. xr $s0,$s3 # ^= ROTATE(tp8,8)
  1056. st $s0,16($key)
  1057. la $key,4($key)
  1058. brct $rounds,.Lmix
  1059. lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
  1060. lghi %r2,0
  1061. br $ra
  1062. .size AES_set_decrypt_key,.-AES_set_decrypt_key
  1063. ___
  1064. ########################################################################
  1065. # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
  1066. # size_t length, const AES_KEY *key,
  1067. # unsigned char *ivec, const int enc)
  1068. {
  1069. my $inp="%r2";
  1070. my $out="%r4"; # length and out are swapped
  1071. my $len="%r3";
  1072. my $key="%r5";
  1073. my $ivp="%r6";
  1074. $code.=<<___;
  1075. .globl AES_cbc_encrypt
  1076. .type AES_cbc_encrypt,\@function
  1077. .align 16
  1078. AES_cbc_encrypt:
  1079. xgr %r3,%r4 # flip %r3 and %r4, out and len
  1080. xgr %r4,%r3
  1081. xgr %r3,%r4
  1082. ___
  1083. $code.=<<___ if (!$softonly);
  1084. lhi %r0,16
  1085. cl %r0,240($key)
  1086. jh .Lcbc_software
  1087. lg %r0,0($ivp) # copy ivec
  1088. lg %r1,8($ivp)
  1089. stmg %r0,%r1,16($sp)
  1090. lmg %r0,%r1,0($key) # copy key, cover 256 bit
  1091. stmg %r0,%r1,32($sp)
  1092. lmg %r0,%r1,16($key)
  1093. stmg %r0,%r1,48($sp)
  1094. l %r0,240($key) # load kmc code
  1095. lghi $key,15 # res=len%16, len-=res;
  1096. ngr $key,$len
  1097. sl${g}r $len,$key
  1098. la %r1,16($sp) # parameter block - ivec || key
  1099. jz .Lkmc_truncated
  1100. .long 0xb92f0042 # kmc %r4,%r2
  1101. brc 1,.-4 # pay attention to "partial completion"
  1102. ltr $key,$key
  1103. jnz .Lkmc_truncated
  1104. .Lkmc_done:
  1105. lmg %r0,%r1,16($sp) # copy ivec to caller
  1106. stg %r0,0($ivp)
  1107. stg %r1,8($ivp)
  1108. br $ra
  1109. .align 16
  1110. .Lkmc_truncated:
  1111. ahi $key,-1 # it's the way it's encoded in mvc
  1112. tmll %r0,S390X_DECRYPT
  1113. jnz .Lkmc_truncated_dec
  1114. lghi %r1,0
  1115. stg %r1,16*$SIZE_T($sp)
  1116. stg %r1,16*$SIZE_T+8($sp)
  1117. bras %r1,1f
  1118. mvc 16*$SIZE_T(1,$sp),0($inp)
  1119. 1: ex $key,0(%r1)
  1120. la %r1,16($sp) # restore parameter block
  1121. la $inp,16*$SIZE_T($sp)
  1122. lghi $len,16
  1123. .long 0xb92f0042 # kmc %r4,%r2
  1124. j .Lkmc_done
  1125. .align 16
  1126. .Lkmc_truncated_dec:
  1127. st${g} $out,4*$SIZE_T($sp)
  1128. la $out,16*$SIZE_T($sp)
  1129. lghi $len,16
  1130. .long 0xb92f0042 # kmc %r4,%r2
  1131. l${g} $out,4*$SIZE_T($sp)
  1132. bras %r1,2f
  1133. mvc 0(1,$out),16*$SIZE_T($sp)
  1134. 2: ex $key,0(%r1)
  1135. j .Lkmc_done
  1136. .align 16
  1137. .Lcbc_software:
  1138. ___
  1139. $code.=<<___;
  1140. stm${g} $key,$ra,5*$SIZE_T($sp)
  1141. lhi %r0,0
  1142. cl %r0,`$stdframe+$SIZE_T-4`($sp)
  1143. je .Lcbc_decrypt
  1144. larl $tbl,AES_Te
  1145. llgf $s0,0($ivp)
  1146. llgf $s1,4($ivp)
  1147. llgf $s2,8($ivp)
  1148. llgf $s3,12($ivp)
  1149. lghi $t0,16
  1150. sl${g}r $len,$t0
  1151. brc 4,.Lcbc_enc_tail # if borrow
  1152. .Lcbc_enc_loop:
  1153. stm${g} $inp,$out,2*$SIZE_T($sp)
  1154. x $s0,0($inp)
  1155. x $s1,4($inp)
  1156. x $s2,8($inp)
  1157. x $s3,12($inp)
  1158. lgr %r4,$key
  1159. bras $ra,_s390x_AES_encrypt
  1160. lm${g} $inp,$key,2*$SIZE_T($sp)
  1161. st $s0,0($out)
  1162. st $s1,4($out)
  1163. st $s2,8($out)
  1164. st $s3,12($out)
  1165. la $inp,16($inp)
  1166. la $out,16($out)
  1167. lghi $t0,16
  1168. lt${g}r $len,$len
  1169. jz .Lcbc_enc_done
  1170. sl${g}r $len,$t0
  1171. brc 4,.Lcbc_enc_tail # if borrow
  1172. j .Lcbc_enc_loop
  1173. .align 16
  1174. .Lcbc_enc_done:
  1175. l${g} $ivp,6*$SIZE_T($sp)
  1176. st $s0,0($ivp)
  1177. st $s1,4($ivp)
  1178. st $s2,8($ivp)
  1179. st $s3,12($ivp)
  1180. lm${g} %r7,$ra,7*$SIZE_T($sp)
  1181. br $ra
  1182. .align 16
  1183. .Lcbc_enc_tail:
  1184. aghi $len,15
  1185. lghi $t0,0
  1186. stg $t0,16*$SIZE_T($sp)
  1187. stg $t0,16*$SIZE_T+8($sp)
  1188. bras $t1,3f
  1189. mvc 16*$SIZE_T(1,$sp),0($inp)
  1190. 3: ex $len,0($t1)
  1191. lghi $len,0
  1192. la $inp,16*$SIZE_T($sp)
  1193. j .Lcbc_enc_loop
  1194. .align 16
  1195. .Lcbc_decrypt:
  1196. larl $tbl,AES_Td
  1197. lg $t0,0($ivp)
  1198. lg $t1,8($ivp)
  1199. stmg $t0,$t1,16*$SIZE_T($sp)
  1200. .Lcbc_dec_loop:
  1201. stm${g} $inp,$out,2*$SIZE_T($sp)
  1202. llgf $s0,0($inp)
  1203. llgf $s1,4($inp)
  1204. llgf $s2,8($inp)
  1205. llgf $s3,12($inp)
  1206. lgr %r4,$key
  1207. bras $ra,_s390x_AES_decrypt
  1208. lm${g} $inp,$key,2*$SIZE_T($sp)
  1209. sllg $s0,$s0,32
  1210. sllg $s2,$s2,32
  1211. lr $s0,$s1
  1212. lr $s2,$s3
  1213. lg $t0,0($inp)
  1214. lg $t1,8($inp)
  1215. xg $s0,16*$SIZE_T($sp)
  1216. xg $s2,16*$SIZE_T+8($sp)
  1217. lghi $s1,16
  1218. sl${g}r $len,$s1
  1219. brc 4,.Lcbc_dec_tail # if borrow
  1220. brc 2,.Lcbc_dec_done # if zero
  1221. stg $s0,0($out)
  1222. stg $s2,8($out)
  1223. stmg $t0,$t1,16*$SIZE_T($sp)
  1224. la $inp,16($inp)
  1225. la $out,16($out)
  1226. j .Lcbc_dec_loop
  1227. .Lcbc_dec_done:
  1228. stg $s0,0($out)
  1229. stg $s2,8($out)
  1230. .Lcbc_dec_exit:
  1231. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1232. stmg $t0,$t1,0($ivp)
  1233. br $ra
  1234. .align 16
  1235. .Lcbc_dec_tail:
  1236. aghi $len,15
  1237. stg $s0,16*$SIZE_T($sp)
  1238. stg $s2,16*$SIZE_T+8($sp)
  1239. bras $s1,4f
  1240. mvc 0(1,$out),16*$SIZE_T($sp)
  1241. 4: ex $len,0($s1)
  1242. j .Lcbc_dec_exit
  1243. .size AES_cbc_encrypt,.-AES_cbc_encrypt
  1244. ___
  1245. }
  1246. ########################################################################
  1247. # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
  1248. # size_t blocks, const AES_KEY *key,
  1249. # const unsigned char *ivec)
  1250. {
  1251. my $inp="%r2";
  1252. my $out="%r4"; # blocks and out are swapped
  1253. my $len="%r3";
  1254. my $key="%r5"; my $iv0="%r5";
  1255. my $ivp="%r6";
  1256. my $fp ="%r7";
  1257. $code.=<<___;
  1258. .globl AES_ctr32_encrypt
  1259. .type AES_ctr32_encrypt,\@function
  1260. .align 16
  1261. AES_ctr32_encrypt:
  1262. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1263. xgr %r4,%r3
  1264. xgr %r3,%r4
  1265. llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
  1266. ___
  1267. $code.=<<___ if (!$softonly);
  1268. l %r0,240($key)
  1269. lhi %r1,16
  1270. clr %r0,%r1
  1271. jl .Lctr32_software
  1272. st${g} $s2,10*$SIZE_T($sp)
  1273. st${g} $s3,11*$SIZE_T($sp)
  1274. clr $len,%r1 # does work even in 64-bit mode
  1275. jle .Lctr32_nokma # kma is slower for <= 16 blocks
  1276. larl %r1,OPENSSL_s390xcap_P
  1277. lr $s2,%r0
  1278. llihh $s3,0x8000
  1279. srlg $s3,$s3,0($s2)
  1280. ng $s3,S390X_KMA(%r1) # check kma capability vector
  1281. jz .Lctr32_nokma
  1282. l${g}hi %r1,-$stdframe-112
  1283. l${g}r $s3,$sp
  1284. la $sp,0(%r1,$sp) # prepare parameter block
  1285. lhi %r1,0x0600
  1286. sllg $len,$len,4
  1287. or %r0,%r1 # set HS and LAAD flags
  1288. st${g} $s3,0($sp) # backchain
  1289. la %r1,$stdframe($sp)
  1290. lmg $s2,$s3,0($key) # copy key
  1291. stg $s2,$stdframe+80($sp)
  1292. stg $s3,$stdframe+88($sp)
  1293. lmg $s2,$s3,16($key)
  1294. stg $s2,$stdframe+96($sp)
  1295. stg $s3,$stdframe+104($sp)
  1296. lmg $s2,$s3,0($ivp) # copy iv
  1297. stg $s2,$stdframe+64($sp)
  1298. ahi $s3,-1 # kma requires counter-1
  1299. stg $s3,$stdframe+72($sp)
  1300. st $s3,$stdframe+12($sp) # copy counter
  1301. lghi $s2,0 # no AAD
  1302. lghi $s3,0
  1303. .long 0xb929a042 # kma $out,$s2,$inp
  1304. brc 1,.-4 # pay attention to "partial completion"
  1305. stg %r0,$stdframe+80($sp) # wipe key
  1306. stg %r0,$stdframe+88($sp)
  1307. stg %r0,$stdframe+96($sp)
  1308. stg %r0,$stdframe+104($sp)
  1309. la $sp,$stdframe+112($sp)
  1310. lm${g} $s2,$s3,10*$SIZE_T($sp)
  1311. br $ra
  1312. .align 16
  1313. .Lctr32_nokma:
  1314. stm${g} %r6,$s1,6*$SIZE_T($sp)
  1315. slgr $out,$inp
  1316. la %r1,0($key) # %r1 is permanent copy of $key
  1317. lg $iv0,0($ivp) # load ivec
  1318. lg $ivp,8($ivp)
  1319. # prepare and allocate stack frame at the top of 4K page
  1320. # with 1K reserved for eventual signal handling
  1321. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1322. lghi $s1,-4096
  1323. algr $s0,$sp
  1324. lgr $fp,$sp
  1325. ngr $s0,$s1 # align at page boundary
  1326. slgr $fp,$s0 # total buffer size
  1327. lgr $s2,$sp
  1328. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1329. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1330. # buffer size is at lest 256 and at most 3072+256-16
  1331. la $sp,1024($s0) # alloca
  1332. srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
  1333. st${g} $s2,0($sp) # back-chain
  1334. st${g} $fp,$SIZE_T($sp)
  1335. slgr $len,$fp
  1336. brc 1,.Lctr32_hw_switch # not zero, no borrow
  1337. algr $fp,$len # input is shorter than allocated buffer
  1338. lghi $len,0
  1339. st${g} $fp,$SIZE_T($sp)
  1340. .Lctr32_hw_switch:
  1341. ___
  1342. $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
  1343. llgfr $s0,%r0
  1344. lgr $s1,%r1
  1345. larl %r1,OPENSSL_s390xcap_P
  1346. llihh %r0,0x8000 # check if kmctr supports the function code
  1347. srlg %r0,%r0,0($s0)
  1348. ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
  1349. lgr %r0,$s0
  1350. lgr %r1,$s1
  1351. jz .Lctr32_km_loop
  1352. ####### kmctr code
  1353. algr $out,$inp # restore $out
  1354. lgr $s1,$len # $s1 undertakes $len
  1355. j .Lctr32_kmctr_loop
  1356. .align 16
  1357. .Lctr32_kmctr_loop:
  1358. la $s2,16($sp)
  1359. lgr $s3,$fp
  1360. .Lctr32_kmctr_prepare:
  1361. stg $iv0,0($s2)
  1362. stg $ivp,8($s2)
  1363. la $s2,16($s2)
  1364. ahi $ivp,1 # 32-bit increment, preserves upper half
  1365. brct $s3,.Lctr32_kmctr_prepare
  1366. #la $inp,0($inp) # inp
  1367. sllg $len,$fp,4 # len
  1368. #la $out,0($out) # out
  1369. la $s2,16($sp) # iv
  1370. .long 0xb92da042 # kmctr $out,$s2,$inp
  1371. brc 1,.-4 # pay attention to "partial completion"
  1372. slgr $s1,$fp
  1373. brc 1,.Lctr32_kmctr_loop # not zero, no borrow
  1374. algr $fp,$s1
  1375. lghi $s1,0
  1376. brc 4+1,.Lctr32_kmctr_loop # not zero
  1377. l${g} $sp,0($sp)
  1378. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1379. br $ra
  1380. .align 16
  1381. ___
  1382. $code.=<<___ if (!$softonly);
  1383. .Lctr32_km_loop:
  1384. la $s2,16($sp)
  1385. lgr $s3,$fp
  1386. .Lctr32_km_prepare:
  1387. stg $iv0,0($s2)
  1388. stg $ivp,8($s2)
  1389. la $s2,16($s2)
  1390. ahi $ivp,1 # 32-bit increment, preserves upper half
  1391. brct $s3,.Lctr32_km_prepare
  1392. la $s0,16($sp) # inp
  1393. sllg $s1,$fp,4 # len
  1394. la $s2,16($sp) # out
  1395. .long 0xb92e00a8 # km %r10,%r8
  1396. brc 1,.-4 # pay attention to "partial completion"
  1397. la $s2,16($sp)
  1398. lgr $s3,$fp
  1399. slgr $s2,$inp
  1400. .Lctr32_km_xor:
  1401. lg $s0,0($inp)
  1402. lg $s1,8($inp)
  1403. xg $s0,0($s2,$inp)
  1404. xg $s1,8($s2,$inp)
  1405. stg $s0,0($out,$inp)
  1406. stg $s1,8($out,$inp)
  1407. la $inp,16($inp)
  1408. brct $s3,.Lctr32_km_xor
  1409. slgr $len,$fp
  1410. brc 1,.Lctr32_km_loop # not zero, no borrow
  1411. algr $fp,$len
  1412. lghi $len,0
  1413. brc 4+1,.Lctr32_km_loop # not zero
  1414. l${g} $s0,0($sp)
  1415. l${g} $s1,$SIZE_T($sp)
  1416. la $s2,16($sp)
  1417. .Lctr32_km_zap:
  1418. stg $s0,0($s2)
  1419. stg $s0,8($s2)
  1420. la $s2,16($s2)
  1421. brct $s1,.Lctr32_km_zap
  1422. la $sp,0($s0)
  1423. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1424. br $ra
  1425. .align 16
  1426. .Lctr32_software:
  1427. ___
  1428. $code.=<<___;
  1429. stm${g} $key,$ra,5*$SIZE_T($sp)
  1430. sl${g}r $inp,$out
  1431. larl $tbl,AES_Te
  1432. llgf $t1,12($ivp)
  1433. .Lctr32_loop:
  1434. stm${g} $inp,$out,2*$SIZE_T($sp)
  1435. llgf $s0,0($ivp)
  1436. llgf $s1,4($ivp)
  1437. llgf $s2,8($ivp)
  1438. lgr $s3,$t1
  1439. st $t1,16*$SIZE_T($sp)
  1440. lgr %r4,$key
  1441. bras $ra,_s390x_AES_encrypt
  1442. lm${g} $inp,$ivp,2*$SIZE_T($sp)
  1443. llgf $t1,16*$SIZE_T($sp)
  1444. x $s0,0($inp,$out)
  1445. x $s1,4($inp,$out)
  1446. x $s2,8($inp,$out)
  1447. x $s3,12($inp,$out)
  1448. stm $s0,$s3,0($out)
  1449. la $out,16($out)
  1450. ahi $t1,1 # 32-bit increment
  1451. brct $len,.Lctr32_loop
  1452. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1453. br $ra
  1454. .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
  1455. ___
  1456. }
  1457. ########################################################################
  1458. # void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
  1459. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1460. # const unsigned char iv[16]);
  1461. #
  1462. {
  1463. my $inp="%r2";
  1464. my $out="%r4"; # len and out are swapped
  1465. my $len="%r3";
  1466. my $key1="%r5"; # $i1
  1467. my $key2="%r6"; # $i2
  1468. my $fp="%r7"; # $i3
  1469. my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
  1470. $code.=<<___;
  1471. .type _s390x_xts_km,\@function
  1472. .align 16
  1473. _s390x_xts_km:
  1474. ___
  1475. $code.=<<___ if(1);
  1476. llgfr $s0,%r0 # put aside the function code
  1477. lghi $s1,0x7f
  1478. nr $s1,%r0
  1479. larl %r1,OPENSSL_s390xcap_P
  1480. llihh %r0,0x8000
  1481. srlg %r0,%r0,32($s1) # check for 32+function code
  1482. ng %r0,S390X_KM(%r1) # check km capability vector
  1483. lgr %r0,$s0 # restore the function code
  1484. la %r1,0($key1) # restore $key1
  1485. jz .Lxts_km_vanilla
  1486. lmg $i2,$i3,$tweak($sp) # put aside the tweak value
  1487. algr $out,$inp
  1488. oill %r0,32 # switch to xts function code
  1489. aghi $s1,-18 #
  1490. sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
  1491. la %r1,$tweak-16($sp)
  1492. slgr %r1,$s1 # parameter block position
  1493. lmg $s0,$s3,0($key1) # load 256 bits of key material,
  1494. stmg $s0,$s3,0(%r1) # and copy it to parameter block.
  1495. # yes, it contains junk and overlaps
  1496. # with the tweak in 128-bit case.
  1497. # it's done to avoid conditional
  1498. # branch.
  1499. stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
  1500. .long 0xb92e0042 # km %r4,%r2
  1501. brc 1,.-4 # pay attention to "partial completion"
  1502. lrvg $s0,$tweak+0($sp) # load the last tweak
  1503. lrvg $s1,$tweak+8($sp)
  1504. stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
  1505. nill %r0,0xffdf # switch back to original function code
  1506. la %r1,0($key1) # restore pointer to $key1
  1507. slgr $out,$inp
  1508. llgc $len,2*$SIZE_T-1($sp)
  1509. nill $len,0x0f # $len%=16
  1510. br $ra
  1511. .align 16
  1512. .Lxts_km_vanilla:
  1513. ___
  1514. $code.=<<___;
  1515. # prepare and allocate stack frame at the top of 4K page
  1516. # with 1K reserved for eventual signal handling
  1517. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1518. lghi $s1,-4096
  1519. algr $s0,$sp
  1520. lgr $fp,$sp
  1521. ngr $s0,$s1 # align at page boundary
  1522. slgr $fp,$s0 # total buffer size
  1523. lgr $s2,$sp
  1524. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1525. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1526. # buffer size is at lest 256 and at most 3072+256-16
  1527. la $sp,1024($s0) # alloca
  1528. nill $fp,0xfff0 # round to 16*n
  1529. st${g} $s2,0($sp) # back-chain
  1530. nill $len,0xfff0 # redundant
  1531. st${g} $fp,$SIZE_T($sp)
  1532. slgr $len,$fp
  1533. brc 1,.Lxts_km_go # not zero, no borrow
  1534. algr $fp,$len # input is shorter than allocated buffer
  1535. lghi $len,0
  1536. st${g} $fp,$SIZE_T($sp)
  1537. .Lxts_km_go:
  1538. lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
  1539. lrvg $s1,$tweak+8($s2)
  1540. la $s2,16($sp) # vector of ascending tweak values
  1541. slgr $s2,$inp
  1542. srlg $s3,$fp,4
  1543. j .Lxts_km_start
  1544. .Lxts_km_loop:
  1545. la $s2,16($sp)
  1546. slgr $s2,$inp
  1547. srlg $s3,$fp,4
  1548. .Lxts_km_prepare:
  1549. lghi $i1,0x87
  1550. srag $i2,$s1,63 # broadcast upper bit
  1551. ngr $i1,$i2 # rem
  1552. algr $s0,$s0
  1553. alcgr $s1,$s1
  1554. xgr $s0,$i1
  1555. .Lxts_km_start:
  1556. lrvgr $i1,$s0 # flip byte order
  1557. lrvgr $i2,$s1
  1558. stg $i1,0($s2,$inp)
  1559. stg $i2,8($s2,$inp)
  1560. xg $i1,0($inp)
  1561. xg $i2,8($inp)
  1562. stg $i1,0($out,$inp)
  1563. stg $i2,8($out,$inp)
  1564. la $inp,16($inp)
  1565. brct $s3,.Lxts_km_prepare
  1566. slgr $inp,$fp # rewind $inp
  1567. la $s2,0($out,$inp)
  1568. lgr $s3,$fp
  1569. .long 0xb92e00aa # km $s2,$s2
  1570. brc 1,.-4 # pay attention to "partial completion"
  1571. la $s2,16($sp)
  1572. slgr $s2,$inp
  1573. srlg $s3,$fp,4
  1574. .Lxts_km_xor:
  1575. lg $i1,0($out,$inp)
  1576. lg $i2,8($out,$inp)
  1577. xg $i1,0($s2,$inp)
  1578. xg $i2,8($s2,$inp)
  1579. stg $i1,0($out,$inp)
  1580. stg $i2,8($out,$inp)
  1581. la $inp,16($inp)
  1582. brct $s3,.Lxts_km_xor
  1583. slgr $len,$fp
  1584. brc 1,.Lxts_km_loop # not zero, no borrow
  1585. algr $fp,$len
  1586. lghi $len,0
  1587. brc 4+1,.Lxts_km_loop # not zero
  1588. l${g} $i1,0($sp) # back-chain
  1589. llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
  1590. la $i2,16($sp)
  1591. srlg $fp,$fp,4
  1592. .Lxts_km_zap:
  1593. stg $i1,0($i2)
  1594. stg $i1,8($i2)
  1595. la $i2,16($i2)
  1596. brct $fp,.Lxts_km_zap
  1597. la $sp,0($i1)
  1598. llgc $len,2*$SIZE_T-1($i1)
  1599. nill $len,0x0f # $len%=16
  1600. bzr $ra
  1601. # generate one more tweak...
  1602. lghi $i1,0x87
  1603. srag $i2,$s1,63 # broadcast upper bit
  1604. ngr $i1,$i2 # rem
  1605. algr $s0,$s0
  1606. alcgr $s1,$s1
  1607. xgr $s0,$i1
  1608. ltr $len,$len # clear zero flag
  1609. br $ra
  1610. .size _s390x_xts_km,.-_s390x_xts_km
  1611. .globl AES_xts_encrypt
  1612. .type AES_xts_encrypt,\@function
  1613. .align 16
  1614. AES_xts_encrypt:
  1615. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1616. xgr %r4,%r3
  1617. xgr %r3,%r4
  1618. ___
  1619. $code.=<<___ if ($SIZE_T==4);
  1620. llgfr $len,$len
  1621. ___
  1622. $code.=<<___;
  1623. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1624. srag $len,$len,4 # formally wrong, because it expands
  1625. # sign byte, but who can afford asking
  1626. # to process more than 2^63-1 bytes?
  1627. # I use it, because it sets condition
  1628. # code...
  1629. bcr 8,$ra # abort if zero (i.e. less than 16)
  1630. ___
  1631. $code.=<<___ if (!$softonly);
  1632. llgf %r0,240($key2)
  1633. lhi %r1,16
  1634. clr %r0,%r1
  1635. jl .Lxts_enc_software
  1636. st${g} $ra,5*$SIZE_T($sp)
  1637. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1638. sllg $len,$len,4 # $len&=~15
  1639. slgr $out,$inp
  1640. # generate the tweak value
  1641. l${g} $s3,$stdframe($sp) # pointer to iv
  1642. la $s2,$tweak($sp)
  1643. lmg $s0,$s1,0($s3)
  1644. lghi $s3,16
  1645. stmg $s0,$s1,0($s2)
  1646. la %r1,0($key2) # $key2 is not needed anymore
  1647. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1648. brc 1,.-4 # can this happen?
  1649. l %r0,240($key1)
  1650. la %r1,0($key1) # $key1 is not needed anymore
  1651. bras $ra,_s390x_xts_km
  1652. jz .Lxts_enc_km_done
  1653. aghi $inp,-16 # take one step back
  1654. la $i3,0($out,$inp) # put aside real $out
  1655. .Lxts_enc_km_steal:
  1656. llgc $i1,16($inp)
  1657. llgc $i2,0($out,$inp)
  1658. stc $i1,0($out,$inp)
  1659. stc $i2,16($out,$inp)
  1660. la $inp,1($inp)
  1661. brct $len,.Lxts_enc_km_steal
  1662. la $s2,0($i3)
  1663. lghi $s3,16
  1664. lrvgr $i1,$s0 # flip byte order
  1665. lrvgr $i2,$s1
  1666. xg $i1,0($s2)
  1667. xg $i2,8($s2)
  1668. stg $i1,0($s2)
  1669. stg $i2,8($s2)
  1670. .long 0xb92e00aa # km $s2,$s2
  1671. brc 1,.-4 # can this happen?
  1672. lrvgr $i1,$s0 # flip byte order
  1673. lrvgr $i2,$s1
  1674. xg $i1,0($i3)
  1675. xg $i2,8($i3)
  1676. stg $i1,0($i3)
  1677. stg $i2,8($i3)
  1678. .Lxts_enc_km_done:
  1679. stg $sp,$tweak+0($sp) # wipe tweak
  1680. stg $sp,$tweak+8($sp)
  1681. l${g} $ra,5*$SIZE_T($sp)
  1682. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1683. br $ra
  1684. .align 16
  1685. .Lxts_enc_software:
  1686. ___
  1687. $code.=<<___;
  1688. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1689. slgr $out,$inp
  1690. l${g} $s3,$stdframe($sp) # ivp
  1691. llgf $s0,0($s3) # load iv
  1692. llgf $s1,4($s3)
  1693. llgf $s2,8($s3)
  1694. llgf $s3,12($s3)
  1695. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1696. la $key,0($key2)
  1697. larl $tbl,AES_Te
  1698. bras $ra,_s390x_AES_encrypt # generate the tweak
  1699. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1700. stm $s0,$s3,$tweak($sp) # save the tweak
  1701. j .Lxts_enc_enter
  1702. .align 16
  1703. .Lxts_enc_loop:
  1704. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1705. lrvg $s3,$tweak+8($sp)
  1706. lghi %r1,0x87
  1707. srag %r0,$s3,63 # broadcast upper bit
  1708. ngr %r1,%r0 # rem
  1709. algr $s1,$s1
  1710. alcgr $s3,$s3
  1711. xgr $s1,%r1
  1712. lrvgr $s1,$s1 # flip byte order
  1713. lrvgr $s3,$s3
  1714. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1715. stg $s1,$tweak+0($sp) # save the tweak
  1716. llgfr $s1,$s1
  1717. srlg $s2,$s3,32
  1718. stg $s3,$tweak+8($sp)
  1719. llgfr $s3,$s3
  1720. la $inp,16($inp) # $inp+=16
  1721. .Lxts_enc_enter:
  1722. x $s0,0($inp) # ^=*($inp)
  1723. x $s1,4($inp)
  1724. x $s2,8($inp)
  1725. x $s3,12($inp)
  1726. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1727. la $key,0($key1)
  1728. bras $ra,_s390x_AES_encrypt
  1729. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1730. x $s0,$tweak+0($sp) # ^=tweak
  1731. x $s1,$tweak+4($sp)
  1732. x $s2,$tweak+8($sp)
  1733. x $s3,$tweak+12($sp)
  1734. st $s0,0($out,$inp)
  1735. st $s1,4($out,$inp)
  1736. st $s2,8($out,$inp)
  1737. st $s3,12($out,$inp)
  1738. brct${g} $len,.Lxts_enc_loop
  1739. llgc $len,`2*$SIZE_T-1`($sp)
  1740. nill $len,0x0f # $len%16
  1741. jz .Lxts_enc_done
  1742. la $i3,0($inp,$out) # put aside real $out
  1743. .Lxts_enc_steal:
  1744. llgc %r0,16($inp)
  1745. llgc %r1,0($out,$inp)
  1746. stc %r0,0($out,$inp)
  1747. stc %r1,16($out,$inp)
  1748. la $inp,1($inp)
  1749. brct $len,.Lxts_enc_steal
  1750. la $out,0($i3) # restore real $out
  1751. # generate last tweak...
  1752. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1753. lrvg $s3,$tweak+8($sp)
  1754. lghi %r1,0x87
  1755. srag %r0,$s3,63 # broadcast upper bit
  1756. ngr %r1,%r0 # rem
  1757. algr $s1,$s1
  1758. alcgr $s3,$s3
  1759. xgr $s1,%r1
  1760. lrvgr $s1,$s1 # flip byte order
  1761. lrvgr $s3,$s3
  1762. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1763. stg $s1,$tweak+0($sp) # save the tweak
  1764. llgfr $s1,$s1
  1765. srlg $s2,$s3,32
  1766. stg $s3,$tweak+8($sp)
  1767. llgfr $s3,$s3
  1768. x $s0,0($out) # ^=*(inp)|stolen cipther-text
  1769. x $s1,4($out)
  1770. x $s2,8($out)
  1771. x $s3,12($out)
  1772. st${g} $out,4*$SIZE_T($sp)
  1773. la $key,0($key1)
  1774. bras $ra,_s390x_AES_encrypt
  1775. l${g} $out,4*$SIZE_T($sp)
  1776. x $s0,`$tweak+0`($sp) # ^=tweak
  1777. x $s1,`$tweak+4`($sp)
  1778. x $s2,`$tweak+8`($sp)
  1779. x $s3,`$tweak+12`($sp)
  1780. st $s0,0($out)
  1781. st $s1,4($out)
  1782. st $s2,8($out)
  1783. st $s3,12($out)
  1784. .Lxts_enc_done:
  1785. stg $sp,$tweak+0($sp) # wipe tweak
  1786. stg $sp,$twesk+8($sp)
  1787. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1788. br $ra
  1789. .size AES_xts_encrypt,.-AES_xts_encrypt
  1790. ___
  1791. # void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
  1792. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1793. # const unsigned char iv[16]);
  1794. #
  1795. $code.=<<___;
  1796. .globl AES_xts_decrypt
  1797. .type AES_xts_decrypt,\@function
  1798. .align 16
  1799. AES_xts_decrypt:
  1800. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1801. xgr %r4,%r3
  1802. xgr %r3,%r4
  1803. ___
  1804. $code.=<<___ if ($SIZE_T==4);
  1805. llgfr $len,$len
  1806. ___
  1807. $code.=<<___;
  1808. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1809. aghi $len,-16
  1810. bcr 4,$ra # abort if less than zero. formally
  1811. # wrong, because $len is unsigned,
  1812. # but who can afford asking to
  1813. # process more than 2^63-1 bytes?
  1814. tmll $len,0x0f
  1815. jnz .Lxts_dec_proceed
  1816. aghi $len,16
  1817. .Lxts_dec_proceed:
  1818. ___
  1819. $code.=<<___ if (!$softonly);
  1820. llgf %r0,240($key2)
  1821. lhi %r1,16
  1822. clr %r0,%r1
  1823. jl .Lxts_dec_software
  1824. st${g} $ra,5*$SIZE_T($sp)
  1825. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1826. nill $len,0xfff0 # $len&=~15
  1827. slgr $out,$inp
  1828. # generate the tweak value
  1829. l${g} $s3,$stdframe($sp) # pointer to iv
  1830. la $s2,$tweak($sp)
  1831. lmg $s0,$s1,0($s3)
  1832. lghi $s3,16
  1833. stmg $s0,$s1,0($s2)
  1834. la %r1,0($key2) # $key2 is not needed past this point
  1835. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1836. brc 1,.-4 # can this happen?
  1837. l %r0,240($key1)
  1838. la %r1,0($key1) # $key1 is not needed anymore
  1839. ltgr $len,$len
  1840. jz .Lxts_dec_km_short
  1841. bras $ra,_s390x_xts_km
  1842. jz .Lxts_dec_km_done
  1843. lrvgr $s2,$s0 # make copy in reverse byte order
  1844. lrvgr $s3,$s1
  1845. j .Lxts_dec_km_2ndtweak
  1846. .Lxts_dec_km_short:
  1847. llgc $len,`2*$SIZE_T-1`($sp)
  1848. nill $len,0x0f # $len%=16
  1849. lrvg $s0,$tweak+0($sp) # load the tweak
  1850. lrvg $s1,$tweak+8($sp)
  1851. lrvgr $s2,$s0 # make copy in reverse byte order
  1852. lrvgr $s3,$s1
  1853. .Lxts_dec_km_2ndtweak:
  1854. lghi $i1,0x87
  1855. srag $i2,$s1,63 # broadcast upper bit
  1856. ngr $i1,$i2 # rem
  1857. algr $s0,$s0
  1858. alcgr $s1,$s1
  1859. xgr $s0,$i1
  1860. lrvgr $i1,$s0 # flip byte order
  1861. lrvgr $i2,$s1
  1862. xg $i1,0($inp)
  1863. xg $i2,8($inp)
  1864. stg $i1,0($out,$inp)
  1865. stg $i2,8($out,$inp)
  1866. la $i2,0($out,$inp)
  1867. lghi $i3,16
  1868. .long 0xb92e0066 # km $i2,$i2
  1869. brc 1,.-4 # can this happen?
  1870. lrvgr $i1,$s0
  1871. lrvgr $i2,$s1
  1872. xg $i1,0($out,$inp)
  1873. xg $i2,8($out,$inp)
  1874. stg $i1,0($out,$inp)
  1875. stg $i2,8($out,$inp)
  1876. la $i3,0($out,$inp) # put aside real $out
  1877. .Lxts_dec_km_steal:
  1878. llgc $i1,16($inp)
  1879. llgc $i2,0($out,$inp)
  1880. stc $i1,0($out,$inp)
  1881. stc $i2,16($out,$inp)
  1882. la $inp,1($inp)
  1883. brct $len,.Lxts_dec_km_steal
  1884. lgr $s0,$s2
  1885. lgr $s1,$s3
  1886. xg $s0,0($i3)
  1887. xg $s1,8($i3)
  1888. stg $s0,0($i3)
  1889. stg $s1,8($i3)
  1890. la $s0,0($i3)
  1891. lghi $s1,16
  1892. .long 0xb92e0088 # km $s0,$s0
  1893. brc 1,.-4 # can this happen?
  1894. xg $s2,0($i3)
  1895. xg $s3,8($i3)
  1896. stg $s2,0($i3)
  1897. stg $s3,8($i3)
  1898. .Lxts_dec_km_done:
  1899. stg $sp,$tweak+0($sp) # wipe tweak
  1900. stg $sp,$tweak+8($sp)
  1901. l${g} $ra,5*$SIZE_T($sp)
  1902. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1903. br $ra
  1904. .align 16
  1905. .Lxts_dec_software:
  1906. ___
  1907. $code.=<<___;
  1908. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1909. srlg $len,$len,4
  1910. slgr $out,$inp
  1911. l${g} $s3,$stdframe($sp) # ivp
  1912. llgf $s0,0($s3) # load iv
  1913. llgf $s1,4($s3)
  1914. llgf $s2,8($s3)
  1915. llgf $s3,12($s3)
  1916. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1917. la $key,0($key2)
  1918. larl $tbl,AES_Te
  1919. bras $ra,_s390x_AES_encrypt # generate the tweak
  1920. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1921. larl $tbl,AES_Td
  1922. lt${g}r $len,$len
  1923. stm $s0,$s3,$tweak($sp) # save the tweak
  1924. jz .Lxts_dec_short
  1925. j .Lxts_dec_enter
  1926. .align 16
  1927. .Lxts_dec_loop:
  1928. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1929. lrvg $s3,$tweak+8($sp)
  1930. lghi %r1,0x87
  1931. srag %r0,$s3,63 # broadcast upper bit
  1932. ngr %r1,%r0 # rem
  1933. algr $s1,$s1
  1934. alcgr $s3,$s3
  1935. xgr $s1,%r1
  1936. lrvgr $s1,$s1 # flip byte order
  1937. lrvgr $s3,$s3
  1938. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1939. stg $s1,$tweak+0($sp) # save the tweak
  1940. llgfr $s1,$s1
  1941. srlg $s2,$s3,32
  1942. stg $s3,$tweak+8($sp)
  1943. llgfr $s3,$s3
  1944. .Lxts_dec_enter:
  1945. x $s0,0($inp) # tweak^=*(inp)
  1946. x $s1,4($inp)
  1947. x $s2,8($inp)
  1948. x $s3,12($inp)
  1949. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1950. la $key,0($key1)
  1951. bras $ra,_s390x_AES_decrypt
  1952. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1953. x $s0,$tweak+0($sp) # ^=tweak
  1954. x $s1,$tweak+4($sp)
  1955. x $s2,$tweak+8($sp)
  1956. x $s3,$tweak+12($sp)
  1957. st $s0,0($out,$inp)
  1958. st $s1,4($out,$inp)
  1959. st $s2,8($out,$inp)
  1960. st $s3,12($out,$inp)
  1961. la $inp,16($inp)
  1962. brct${g} $len,.Lxts_dec_loop
  1963. llgc $len,`2*$SIZE_T-1`($sp)
  1964. nill $len,0x0f # $len%16
  1965. jz .Lxts_dec_done
  1966. # generate pair of tweaks...
  1967. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1968. lrvg $s3,$tweak+8($sp)
  1969. lghi %r1,0x87
  1970. srag %r0,$s3,63 # broadcast upper bit
  1971. ngr %r1,%r0 # rem
  1972. algr $s1,$s1
  1973. alcgr $s3,$s3
  1974. xgr $s1,%r1
  1975. lrvgr $i2,$s1 # flip byte order
  1976. lrvgr $i3,$s3
  1977. stmg $i2,$i3,$tweak($sp) # save the 1st tweak
  1978. j .Lxts_dec_2ndtweak
  1979. .align 16
  1980. .Lxts_dec_short:
  1981. llgc $len,`2*$SIZE_T-1`($sp)
  1982. nill $len,0x0f # $len%16
  1983. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1984. lrvg $s3,$tweak+8($sp)
  1985. .Lxts_dec_2ndtweak:
  1986. lghi %r1,0x87
  1987. srag %r0,$s3,63 # broadcast upper bit
  1988. ngr %r1,%r0 # rem
  1989. algr $s1,$s1
  1990. alcgr $s3,$s3
  1991. xgr $s1,%r1
  1992. lrvgr $s1,$s1 # flip byte order
  1993. lrvgr $s3,$s3
  1994. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1995. stg $s1,$tweak-16+0($sp) # save the 2nd tweak
  1996. llgfr $s1,$s1
  1997. srlg $s2,$s3,32
  1998. stg $s3,$tweak-16+8($sp)
  1999. llgfr $s3,$s3
  2000. x $s0,0($inp) # tweak_the_2nd^=*(inp)
  2001. x $s1,4($inp)
  2002. x $s2,8($inp)
  2003. x $s3,12($inp)
  2004. stm${g} %r2,%r3,2*$SIZE_T($sp)
  2005. la $key,0($key1)
  2006. bras $ra,_s390x_AES_decrypt
  2007. lm${g} %r2,%r5,2*$SIZE_T($sp)
  2008. x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
  2009. x $s1,$tweak-16+4($sp)
  2010. x $s2,$tweak-16+8($sp)
  2011. x $s3,$tweak-16+12($sp)
  2012. st $s0,0($out,$inp)
  2013. st $s1,4($out,$inp)
  2014. st $s2,8($out,$inp)
  2015. st $s3,12($out,$inp)
  2016. la $i3,0($out,$inp) # put aside real $out
  2017. .Lxts_dec_steal:
  2018. llgc %r0,16($inp)
  2019. llgc %r1,0($out,$inp)
  2020. stc %r0,0($out,$inp)
  2021. stc %r1,16($out,$inp)
  2022. la $inp,1($inp)
  2023. brct $len,.Lxts_dec_steal
  2024. la $out,0($i3) # restore real $out
  2025. lm $s0,$s3,$tweak($sp) # load the 1st tweak
  2026. x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
  2027. x $s1,4($out)
  2028. x $s2,8($out)
  2029. x $s3,12($out)
  2030. st${g} $out,4*$SIZE_T($sp)
  2031. la $key,0($key1)
  2032. bras $ra,_s390x_AES_decrypt
  2033. l${g} $out,4*$SIZE_T($sp)
  2034. x $s0,$tweak+0($sp) # ^=tweak
  2035. x $s1,$tweak+4($sp)
  2036. x $s2,$tweak+8($sp)
  2037. x $s3,$tweak+12($sp)
  2038. st $s0,0($out)
  2039. st $s1,4($out)
  2040. st $s2,8($out)
  2041. st $s3,12($out)
  2042. stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
  2043. stg $sp,$tweak-16+8($sp)
  2044. .Lxts_dec_done:
  2045. stg $sp,$tweak+0($sp) # wipe tweak
  2046. stg $sp,$twesk+8($sp)
  2047. lm${g} %r6,$ra,6*$SIZE_T($sp)
  2048. br $ra
  2049. .size AES_xts_decrypt,.-AES_xts_decrypt
  2050. ___
  2051. }
  2052. $code.=<<___;
  2053. .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  2054. ___
  2055. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  2056. print $code;
  2057. close STDOUT; # force flush