aes-s390x.pl 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # AES for s390x.
  9. # April 2007.
  10. #
  11. # Software performance improvement over gcc-generated code is ~70% and
  12. # in absolute terms is ~73 cycles per byte processed with 128-bit key.
  13. # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
  14. # *strictly* in-order execution and issued instruction [in this case
  15. # load value from memory is critical] has to complete before execution
  16. # flow proceeds. S-boxes are compressed to 2KB[+256B].
  17. #
  18. # As for hardware acceleration support. It's basically a "teaser," as
  19. # it can and should be improved in several ways. Most notably support
  20. # for CBC is not utilized, nor multiple blocks are ever processed.
  21. # Then software key schedule can be postponed till hardware support
  22. # detection... Performance improvement over assembler is reportedly
  23. # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  24. # support is implemented.
  25. # May 2007.
  26. #
  27. # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
  28. # for 128-bit keys, if hardware support is detected.
  29. # Januray 2009.
  30. #
  31. # Add support for hardware AES192/256 and reschedule instructions to
  32. # minimize/avoid Address Generation Interlock hazard and to favour
  33. # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
  34. # almost 50% on z9. The gain is smaller on z10, because being dual-
  35. # issue z10 makes it improssible to eliminate the interlock condition:
  36. # critial path is not long enough. Yet it spends ~24 cycles per byte
  37. # processed with 128-bit key.
  38. #
  39. # Unlike previous version hardware support detection takes place only
  40. # at the moment of key schedule setup, which is denoted in key->rounds.
  41. # This is done, because deferred key setup can't be made MT-safe, not
  42. # for keys longer than 128 bits.
  43. #
  44. # Add AES_cbc_encrypt, which gives incredible performance improvement,
  45. # it was measured to be ~6.6x. It's less than previously mentioned 8x,
  46. # because software implementation was optimized.
  47. # May 2010.
  48. #
  49. # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
  50. # performance improvement over "generic" counter mode routine relying
  51. # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
  52. # to the fact that exact throughput value depends on current stack
  53. # frame alignment within 4KB page. In worst case you get ~75% of the
  54. # maximum, but *on average* it would be as much as ~98%. Meaning that
  55. # worst case is unlike, it's like hitting ravine on plateau.
  56. # November 2010.
  57. #
  58. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  59. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  60. # instructions and achieve "64-bit" performance even in 31-bit legacy
  61. # application context. The feature is not specific to any particular
  62. # processor, as long as it's "z-CPU". Latter implies that the code
  63. # remains z/Architecture specific. On z990 it was measured to perform
  64. # 2x better than code generated by gcc 4.3.
  65. # December 2010.
  66. #
  67. # Add support for z196 "cipher message with counter" instruction.
  68. # Note however that it's disengaged, because it was measured to
  69. # perform ~12% worse than vanilla km-based code...
  70. # February 2011.
  71. #
  72. # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
  73. # instructions, which deliver ~70% improvement at 8KB block size over
  74. # vanilla km-based code, 37% - at most like 512-bytes block size.
  75. $flavour = shift;
  76. if ($flavour =~ /3[12]/) {
  77. $SIZE_T=4;
  78. $g="";
  79. } else {
  80. $SIZE_T=8;
  81. $g="g";
  82. }
  83. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  84. open STDOUT,">$output";
  85. $softonly=0; # allow hardware support
  86. $t0="%r0"; $mask="%r0";
  87. $t1="%r1";
  88. $t2="%r2"; $inp="%r2";
  89. $t3="%r3"; $out="%r3"; $bits="%r3";
  90. $key="%r4";
  91. $i1="%r5";
  92. $i2="%r6";
  93. $i3="%r7";
  94. $s0="%r8";
  95. $s1="%r9";
  96. $s2="%r10";
  97. $s3="%r11";
  98. $tbl="%r12";
  99. $rounds="%r13";
  100. $ra="%r14";
  101. $sp="%r15";
  102. $stdframe=16*$SIZE_T+4*8;
  103. sub _data_word()
  104. { my $i;
  105. while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
  106. }
  107. $code=<<___;
  108. .text
  109. .type AES_Te,\@object
  110. .align 256
  111. AES_Te:
  112. ___
  113. &_data_word(
  114. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  115. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  116. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  117. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  118. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  119. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  120. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  121. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  122. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  123. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  124. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  125. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  126. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  127. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  128. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  129. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  130. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  131. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  132. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  133. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  134. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  135. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  136. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  137. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  138. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  139. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  140. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  141. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  142. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  143. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  144. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  145. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  146. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  147. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  148. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  149. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  150. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  151. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  152. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  153. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  154. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  155. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  156. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  157. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  158. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  159. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  160. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  161. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  162. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  163. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  164. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  165. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  166. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  167. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  168. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  169. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  170. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  171. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  172. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  173. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  174. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  175. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  176. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  177. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  178. $code.=<<___;
  179. # Te4[256]
  180. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  181. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  182. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  183. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  184. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  185. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  186. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  187. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  188. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  189. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  190. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  191. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  192. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  193. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  194. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  195. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  196. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  197. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  198. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  199. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  200. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  201. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  202. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  203. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  204. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  205. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  206. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  207. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  208. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  209. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  210. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  211. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  212. # rcon[]
  213. .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
  214. .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
  215. .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
  216. .align 256
  217. .size AES_Te,.-AES_Te
  218. # void AES_encrypt(const unsigned char *inp, unsigned char *out,
  219. # const AES_KEY *key) {
  220. .globl AES_encrypt
  221. .type AES_encrypt,\@function
  222. AES_encrypt:
  223. ___
  224. $code.=<<___ if (!$softonly);
  225. l %r0,240($key)
  226. lhi %r1,16
  227. clr %r0,%r1
  228. jl .Lesoft
  229. la %r1,0($key)
  230. #la %r2,0($inp)
  231. la %r4,0($out)
  232. lghi %r3,16 # single block length
  233. .long 0xb92e0042 # km %r4,%r2
  234. brc 1,.-4 # can this happen?
  235. br %r14
  236. .align 64
  237. .Lesoft:
  238. ___
  239. $code.=<<___;
  240. stm${g} %r3,$ra,3*$SIZE_T($sp)
  241. llgf $s0,0($inp)
  242. llgf $s1,4($inp)
  243. llgf $s2,8($inp)
  244. llgf $s3,12($inp)
  245. larl $tbl,AES_Te
  246. bras $ra,_s390x_AES_encrypt
  247. l${g} $out,3*$SIZE_T($sp)
  248. st $s0,0($out)
  249. st $s1,4($out)
  250. st $s2,8($out)
  251. st $s3,12($out)
  252. lm${g} %r6,$ra,6*$SIZE_T($sp)
  253. br $ra
  254. .size AES_encrypt,.-AES_encrypt
  255. .type _s390x_AES_encrypt,\@function
  256. .align 16
  257. _s390x_AES_encrypt:
  258. st${g} $ra,15*$SIZE_T($sp)
  259. x $s0,0($key)
  260. x $s1,4($key)
  261. x $s2,8($key)
  262. x $s3,12($key)
  263. l $rounds,240($key)
  264. llill $mask,`0xff<<3`
  265. aghi $rounds,-1
  266. j .Lenc_loop
  267. .align 16
  268. .Lenc_loop:
  269. sllg $t1,$s0,`0+3`
  270. srlg $t2,$s0,`8-3`
  271. srlg $t3,$s0,`16-3`
  272. srl $s0,`24-3`
  273. nr $s0,$mask
  274. ngr $t1,$mask
  275. nr $t2,$mask
  276. nr $t3,$mask
  277. srlg $i1,$s1,`16-3` # i0
  278. sllg $i2,$s1,`0+3`
  279. srlg $i3,$s1,`8-3`
  280. srl $s1,`24-3`
  281. nr $i1,$mask
  282. nr $s1,$mask
  283. ngr $i2,$mask
  284. nr $i3,$mask
  285. l $s0,0($s0,$tbl) # Te0[s0>>24]
  286. l $t1,1($t1,$tbl) # Te3[s0>>0]
  287. l $t2,2($t2,$tbl) # Te2[s0>>8]
  288. l $t3,3($t3,$tbl) # Te1[s0>>16]
  289. x $s0,3($i1,$tbl) # Te1[s1>>16]
  290. l $s1,0($s1,$tbl) # Te0[s1>>24]
  291. x $t2,1($i2,$tbl) # Te3[s1>>0]
  292. x $t3,2($i3,$tbl) # Te2[s1>>8]
  293. srlg $i1,$s2,`8-3` # i0
  294. srlg $i2,$s2,`16-3` # i1
  295. nr $i1,$mask
  296. nr $i2,$mask
  297. sllg $i3,$s2,`0+3`
  298. srl $s2,`24-3`
  299. nr $s2,$mask
  300. ngr $i3,$mask
  301. xr $s1,$t1
  302. srlg $ra,$s3,`8-3` # i1
  303. sllg $t1,$s3,`0+3` # i0
  304. nr $ra,$mask
  305. la $key,16($key)
  306. ngr $t1,$mask
  307. x $s0,2($i1,$tbl) # Te2[s2>>8]
  308. x $s1,3($i2,$tbl) # Te1[s2>>16]
  309. l $s2,0($s2,$tbl) # Te0[s2>>24]
  310. x $t3,1($i3,$tbl) # Te3[s2>>0]
  311. srlg $i3,$s3,`16-3` # i2
  312. xr $s2,$t2
  313. srl $s3,`24-3`
  314. nr $i3,$mask
  315. nr $s3,$mask
  316. x $s0,0($key)
  317. x $s1,4($key)
  318. x $s2,8($key)
  319. x $t3,12($key)
  320. x $s0,1($t1,$tbl) # Te3[s3>>0]
  321. x $s1,2($ra,$tbl) # Te2[s3>>8]
  322. x $s2,3($i3,$tbl) # Te1[s3>>16]
  323. l $s3,0($s3,$tbl) # Te0[s3>>24]
  324. xr $s3,$t3
  325. brct $rounds,.Lenc_loop
  326. .align 16
  327. sllg $t1,$s0,`0+3`
  328. srlg $t2,$s0,`8-3`
  329. ngr $t1,$mask
  330. srlg $t3,$s0,`16-3`
  331. srl $s0,`24-3`
  332. nr $s0,$mask
  333. nr $t2,$mask
  334. nr $t3,$mask
  335. srlg $i1,$s1,`16-3` # i0
  336. sllg $i2,$s1,`0+3`
  337. ngr $i2,$mask
  338. srlg $i3,$s1,`8-3`
  339. srl $s1,`24-3`
  340. nr $i1,$mask
  341. nr $s1,$mask
  342. nr $i3,$mask
  343. llgc $s0,2($s0,$tbl) # Te4[s0>>24]
  344. llgc $t1,2($t1,$tbl) # Te4[s0>>0]
  345. sll $s0,24
  346. llgc $t2,2($t2,$tbl) # Te4[s0>>8]
  347. llgc $t3,2($t3,$tbl) # Te4[s0>>16]
  348. sll $t2,8
  349. sll $t3,16
  350. llgc $i1,2($i1,$tbl) # Te4[s1>>16]
  351. llgc $s1,2($s1,$tbl) # Te4[s1>>24]
  352. llgc $i2,2($i2,$tbl) # Te4[s1>>0]
  353. llgc $i3,2($i3,$tbl) # Te4[s1>>8]
  354. sll $i1,16
  355. sll $s1,24
  356. sll $i3,8
  357. or $s0,$i1
  358. or $s1,$t1
  359. or $t2,$i2
  360. or $t3,$i3
  361. srlg $i1,$s2,`8-3` # i0
  362. srlg $i2,$s2,`16-3` # i1
  363. nr $i1,$mask
  364. nr $i2,$mask
  365. sllg $i3,$s2,`0+3`
  366. srl $s2,`24-3`
  367. ngr $i3,$mask
  368. nr $s2,$mask
  369. sllg $t1,$s3,`0+3` # i0
  370. srlg $ra,$s3,`8-3` # i1
  371. ngr $t1,$mask
  372. llgc $i1,2($i1,$tbl) # Te4[s2>>8]
  373. llgc $i2,2($i2,$tbl) # Te4[s2>>16]
  374. sll $i1,8
  375. llgc $s2,2($s2,$tbl) # Te4[s2>>24]
  376. llgc $i3,2($i3,$tbl) # Te4[s2>>0]
  377. sll $i2,16
  378. nr $ra,$mask
  379. sll $s2,24
  380. or $s0,$i1
  381. or $s1,$i2
  382. or $s2,$t2
  383. or $t3,$i3
  384. srlg $i3,$s3,`16-3` # i2
  385. srl $s3,`24-3`
  386. nr $i3,$mask
  387. nr $s3,$mask
  388. l $t0,16($key)
  389. l $t2,20($key)
  390. llgc $i1,2($t1,$tbl) # Te4[s3>>0]
  391. llgc $i2,2($ra,$tbl) # Te4[s3>>8]
  392. llgc $i3,2($i3,$tbl) # Te4[s3>>16]
  393. llgc $s3,2($s3,$tbl) # Te4[s3>>24]
  394. sll $i2,8
  395. sll $i3,16
  396. sll $s3,24
  397. or $s0,$i1
  398. or $s1,$i2
  399. or $s2,$i3
  400. or $s3,$t3
  401. l${g} $ra,15*$SIZE_T($sp)
  402. xr $s0,$t0
  403. xr $s1,$t2
  404. x $s2,24($key)
  405. x $s3,28($key)
  406. br $ra
  407. .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
  408. ___
  409. $code.=<<___;
  410. .type AES_Td,\@object
  411. .align 256
  412. AES_Td:
  413. ___
  414. &_data_word(
  415. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  416. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  417. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  418. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  419. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  420. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  421. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  422. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  423. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  424. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  425. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  426. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  427. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  428. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  429. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  430. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  431. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  432. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  433. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  434. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  435. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  436. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  437. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  438. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  439. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  440. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  441. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  442. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  443. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  444. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  445. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  446. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  447. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  448. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  449. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  450. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  451. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  452. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  453. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  454. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  455. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  456. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  457. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  458. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  459. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  460. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  461. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  462. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  463. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  464. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  465. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  466. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  467. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  468. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  469. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  470. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  471. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  472. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  473. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  474. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  475. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  476. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  477. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  478. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  479. $code.=<<___;
  480. # Td4[256]
  481. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  482. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  483. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  484. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  485. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  486. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  487. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  488. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  489. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  490. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  491. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  492. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  493. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  494. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  495. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  496. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  497. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  498. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  499. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  500. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  501. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  502. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  503. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  504. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  505. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  506. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  507. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  508. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  509. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  510. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  511. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  512. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  513. .size AES_Td,.-AES_Td
  514. # void AES_decrypt(const unsigned char *inp, unsigned char *out,
  515. # const AES_KEY *key) {
  516. .globl AES_decrypt
  517. .type AES_decrypt,\@function
  518. AES_decrypt:
  519. ___
  520. $code.=<<___ if (!$softonly);
  521. l %r0,240($key)
  522. lhi %r1,16
  523. clr %r0,%r1
  524. jl .Ldsoft
  525. la %r1,0($key)
  526. #la %r2,0($inp)
  527. la %r4,0($out)
  528. lghi %r3,16 # single block length
  529. .long 0xb92e0042 # km %r4,%r2
  530. brc 1,.-4 # can this happen?
  531. br %r14
  532. .align 64
  533. .Ldsoft:
  534. ___
  535. $code.=<<___;
  536. stm${g} %r3,$ra,3*$SIZE_T($sp)
  537. llgf $s0,0($inp)
  538. llgf $s1,4($inp)
  539. llgf $s2,8($inp)
  540. llgf $s3,12($inp)
  541. larl $tbl,AES_Td
  542. bras $ra,_s390x_AES_decrypt
  543. l${g} $out,3*$SIZE_T($sp)
  544. st $s0,0($out)
  545. st $s1,4($out)
  546. st $s2,8($out)
  547. st $s3,12($out)
  548. lm${g} %r6,$ra,6*$SIZE_T($sp)
  549. br $ra
  550. .size AES_decrypt,.-AES_decrypt
  551. .type _s390x_AES_decrypt,\@function
  552. .align 16
  553. _s390x_AES_decrypt:
  554. st${g} $ra,15*$SIZE_T($sp)
  555. x $s0,0($key)
  556. x $s1,4($key)
  557. x $s2,8($key)
  558. x $s3,12($key)
  559. l $rounds,240($key)
  560. llill $mask,`0xff<<3`
  561. aghi $rounds,-1
  562. j .Ldec_loop
  563. .align 16
  564. .Ldec_loop:
  565. srlg $t1,$s0,`16-3`
  566. srlg $t2,$s0,`8-3`
  567. sllg $t3,$s0,`0+3`
  568. srl $s0,`24-3`
  569. nr $s0,$mask
  570. nr $t1,$mask
  571. nr $t2,$mask
  572. ngr $t3,$mask
  573. sllg $i1,$s1,`0+3` # i0
  574. srlg $i2,$s1,`16-3`
  575. srlg $i3,$s1,`8-3`
  576. srl $s1,`24-3`
  577. ngr $i1,$mask
  578. nr $s1,$mask
  579. nr $i2,$mask
  580. nr $i3,$mask
  581. l $s0,0($s0,$tbl) # Td0[s0>>24]
  582. l $t1,3($t1,$tbl) # Td1[s0>>16]
  583. l $t2,2($t2,$tbl) # Td2[s0>>8]
  584. l $t3,1($t3,$tbl) # Td3[s0>>0]
  585. x $s0,1($i1,$tbl) # Td3[s1>>0]
  586. l $s1,0($s1,$tbl) # Td0[s1>>24]
  587. x $t2,3($i2,$tbl) # Td1[s1>>16]
  588. x $t3,2($i3,$tbl) # Td2[s1>>8]
  589. srlg $i1,$s2,`8-3` # i0
  590. sllg $i2,$s2,`0+3` # i1
  591. srlg $i3,$s2,`16-3`
  592. srl $s2,`24-3`
  593. nr $i1,$mask
  594. ngr $i2,$mask
  595. nr $s2,$mask
  596. nr $i3,$mask
  597. xr $s1,$t1
  598. srlg $ra,$s3,`8-3` # i1
  599. srlg $t1,$s3,`16-3` # i0
  600. nr $ra,$mask
  601. la $key,16($key)
  602. nr $t1,$mask
  603. x $s0,2($i1,$tbl) # Td2[s2>>8]
  604. x $s1,1($i2,$tbl) # Td3[s2>>0]
  605. l $s2,0($s2,$tbl) # Td0[s2>>24]
  606. x $t3,3($i3,$tbl) # Td1[s2>>16]
  607. sllg $i3,$s3,`0+3` # i2
  608. srl $s3,`24-3`
  609. ngr $i3,$mask
  610. nr $s3,$mask
  611. xr $s2,$t2
  612. x $s0,0($key)
  613. x $s1,4($key)
  614. x $s2,8($key)
  615. x $t3,12($key)
  616. x $s0,3($t1,$tbl) # Td1[s3>>16]
  617. x $s1,2($ra,$tbl) # Td2[s3>>8]
  618. x $s2,1($i3,$tbl) # Td3[s3>>0]
  619. l $s3,0($s3,$tbl) # Td0[s3>>24]
  620. xr $s3,$t3
  621. brct $rounds,.Ldec_loop
  622. .align 16
  623. l $t1,`2048+0`($tbl) # prefetch Td4
  624. l $t2,`2048+64`($tbl)
  625. l $t3,`2048+128`($tbl)
  626. l $i1,`2048+192`($tbl)
  627. llill $mask,0xff
  628. srlg $i3,$s0,24 # i0
  629. srlg $t1,$s0,16
  630. srlg $t2,$s0,8
  631. nr $s0,$mask # i3
  632. nr $t1,$mask
  633. srlg $i1,$s1,24
  634. nr $t2,$mask
  635. srlg $i2,$s1,16
  636. srlg $ra,$s1,8
  637. nr $s1,$mask # i0
  638. nr $i2,$mask
  639. nr $ra,$mask
  640. llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
  641. llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
  642. llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
  643. sll $t1,16
  644. llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
  645. sllg $s0,$i3,24
  646. sll $t2,8
  647. llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
  648. llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
  649. llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
  650. sll $i1,24
  651. llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
  652. sll $i2,16
  653. sll $i3,8
  654. or $s0,$s1
  655. or $t1,$i1
  656. or $t2,$i2
  657. or $t3,$i3
  658. srlg $i1,$s2,8 # i0
  659. srlg $i2,$s2,24
  660. srlg $i3,$s2,16
  661. nr $s2,$mask # i1
  662. nr $i1,$mask
  663. nr $i3,$mask
  664. llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
  665. llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
  666. llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
  667. llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
  668. sll $i1,8
  669. sll $i2,24
  670. or $s0,$i1
  671. sll $i3,16
  672. or $t2,$i2
  673. or $t3,$i3
  674. srlg $i1,$s3,16 # i0
  675. srlg $i2,$s3,8 # i1
  676. srlg $i3,$s3,24
  677. nr $s3,$mask # i2
  678. nr $i1,$mask
  679. nr $i2,$mask
  680. l${g} $ra,15*$SIZE_T($sp)
  681. or $s1,$t1
  682. l $t0,16($key)
  683. l $t1,20($key)
  684. llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
  685. llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
  686. sll $i1,16
  687. llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
  688. llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
  689. sll $i2,8
  690. sll $s3,24
  691. or $s0,$i1
  692. or $s1,$i2
  693. or $s2,$t2
  694. or $s3,$t3
  695. xr $s0,$t0
  696. xr $s1,$t1
  697. x $s2,24($key)
  698. x $s3,28($key)
  699. br $ra
  700. .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
  701. ___
  702. $code.=<<___;
  703. # void AES_set_encrypt_key(const unsigned char *in, int bits,
  704. # AES_KEY *key) {
  705. .globl AES_set_encrypt_key
  706. .type AES_set_encrypt_key,\@function
  707. .align 16
  708. AES_set_encrypt_key:
  709. _s390x_AES_set_encrypt_key:
  710. lghi $t0,0
  711. cl${g}r $inp,$t0
  712. je .Lminus1
  713. cl${g}r $key,$t0
  714. je .Lminus1
  715. lghi $t0,128
  716. clr $bits,$t0
  717. je .Lproceed
  718. lghi $t0,192
  719. clr $bits,$t0
  720. je .Lproceed
  721. lghi $t0,256
  722. clr $bits,$t0
  723. je .Lproceed
  724. lghi %r2,-2
  725. br %r14
  726. .align 16
  727. .Lproceed:
  728. ___
  729. $code.=<<___ if (!$softonly);
  730. # convert bits to km code, [128,192,256]->[18,19,20]
  731. lhi %r5,-128
  732. lhi %r0,18
  733. ar %r5,$bits
  734. srl %r5,6
  735. ar %r5,%r0
  736. larl %r1,OPENSSL_s390xcap_P
  737. lg %r0,0(%r1)
  738. tmhl %r0,0x4000 # check for message-security assist
  739. jz .Lekey_internal
  740. lghi %r0,0 # query capability vector
  741. la %r1,16($sp)
  742. .long 0xb92f0042 # kmc %r4,%r2
  743. llihh %r1,0x8000
  744. srlg %r1,%r1,0(%r5)
  745. ng %r1,16($sp)
  746. jz .Lekey_internal
  747. lmg %r0,%r1,0($inp) # just copy 128 bits...
  748. stmg %r0,%r1,0($key)
  749. lhi %r0,192
  750. cr $bits,%r0
  751. jl 1f
  752. lg %r1,16($inp)
  753. stg %r1,16($key)
  754. je 1f
  755. lg %r1,24($inp)
  756. stg %r1,24($key)
  757. 1: st $bits,236($key) # save bits [for debugging purposes]
  758. lgr $t0,%r5
  759. st %r5,240($key) # save km code
  760. lghi %r2,0
  761. br %r14
  762. ___
  763. $code.=<<___;
  764. .align 16
  765. .Lekey_internal:
  766. stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
  767. larl $tbl,AES_Te+2048
  768. llgf $s0,0($inp)
  769. llgf $s1,4($inp)
  770. llgf $s2,8($inp)
  771. llgf $s3,12($inp)
  772. st $s0,0($key)
  773. st $s1,4($key)
  774. st $s2,8($key)
  775. st $s3,12($key)
  776. lghi $t0,128
  777. cr $bits,$t0
  778. jne .Lnot128
  779. llill $mask,0xff
  780. lghi $t3,0 # i=0
  781. lghi $rounds,10
  782. st $rounds,240($key)
  783. llgfr $t2,$s3 # temp=rk[3]
  784. srlg $i1,$s3,8
  785. srlg $i2,$s3,16
  786. srlg $i3,$s3,24
  787. nr $t2,$mask
  788. nr $i1,$mask
  789. nr $i2,$mask
  790. .align 16
  791. .L128_loop:
  792. la $t2,0($t2,$tbl)
  793. la $i1,0($i1,$tbl)
  794. la $i2,0($i2,$tbl)
  795. la $i3,0($i3,$tbl)
  796. icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
  797. icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
  798. icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
  799. icm $t2,1,0($i3) # Te4[rk[3]>>24]
  800. x $t2,256($t3,$tbl) # rcon[i]
  801. xr $s0,$t2 # rk[4]=rk[0]^...
  802. xr $s1,$s0 # rk[5]=rk[1]^rk[4]
  803. xr $s2,$s1 # rk[6]=rk[2]^rk[5]
  804. xr $s3,$s2 # rk[7]=rk[3]^rk[6]
  805. llgfr $t2,$s3 # temp=rk[3]
  806. srlg $i1,$s3,8
  807. srlg $i2,$s3,16
  808. nr $t2,$mask
  809. nr $i1,$mask
  810. srlg $i3,$s3,24
  811. nr $i2,$mask
  812. st $s0,16($key)
  813. st $s1,20($key)
  814. st $s2,24($key)
  815. st $s3,28($key)
  816. la $key,16($key) # key+=4
  817. la $t3,4($t3) # i++
  818. brct $rounds,.L128_loop
  819. lghi $t0,10
  820. lghi %r2,0
  821. lm${g} %r4,%r13,4*$SIZE_T($sp)
  822. br $ra
  823. .align 16
  824. .Lnot128:
  825. llgf $t0,16($inp)
  826. llgf $t1,20($inp)
  827. st $t0,16($key)
  828. st $t1,20($key)
  829. lghi $t0,192
  830. cr $bits,$t0
  831. jne .Lnot192
  832. llill $mask,0xff
  833. lghi $t3,0 # i=0
  834. lghi $rounds,12
  835. st $rounds,240($key)
  836. lghi $rounds,8
  837. srlg $i1,$t1,8
  838. srlg $i2,$t1,16
  839. srlg $i3,$t1,24
  840. nr $t1,$mask
  841. nr $i1,$mask
  842. nr $i2,$mask
  843. .align 16
  844. .L192_loop:
  845. la $t1,0($t1,$tbl)
  846. la $i1,0($i1,$tbl)
  847. la $i2,0($i2,$tbl)
  848. la $i3,0($i3,$tbl)
  849. icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
  850. icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
  851. icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
  852. icm $t1,1,0($i3) # Te4[rk[5]>>24]
  853. x $t1,256($t3,$tbl) # rcon[i]
  854. xr $s0,$t1 # rk[6]=rk[0]^...
  855. xr $s1,$s0 # rk[7]=rk[1]^rk[6]
  856. xr $s2,$s1 # rk[8]=rk[2]^rk[7]
  857. xr $s3,$s2 # rk[9]=rk[3]^rk[8]
  858. st $s0,24($key)
  859. st $s1,28($key)
  860. st $s2,32($key)
  861. st $s3,36($key)
  862. brct $rounds,.L192_continue
  863. lghi $t0,12
  864. lghi %r2,0
  865. lm${g} %r4,%r13,4*$SIZE_T($sp)
  866. br $ra
  867. .align 16
  868. .L192_continue:
  869. lgr $t1,$s3
  870. x $t1,16($key) # rk[10]=rk[4]^rk[9]
  871. st $t1,40($key)
  872. x $t1,20($key) # rk[11]=rk[5]^rk[10]
  873. st $t1,44($key)
  874. srlg $i1,$t1,8
  875. srlg $i2,$t1,16
  876. srlg $i3,$t1,24
  877. nr $t1,$mask
  878. nr $i1,$mask
  879. nr $i2,$mask
  880. la $key,24($key) # key+=6
  881. la $t3,4($t3) # i++
  882. j .L192_loop
  883. .align 16
  884. .Lnot192:
  885. llgf $t0,24($inp)
  886. llgf $t1,28($inp)
  887. st $t0,24($key)
  888. st $t1,28($key)
  889. llill $mask,0xff
  890. lghi $t3,0 # i=0
  891. lghi $rounds,14
  892. st $rounds,240($key)
  893. lghi $rounds,7
  894. srlg $i1,$t1,8
  895. srlg $i2,$t1,16
  896. srlg $i3,$t1,24
  897. nr $t1,$mask
  898. nr $i1,$mask
  899. nr $i2,$mask
  900. .align 16
  901. .L256_loop:
  902. la $t1,0($t1,$tbl)
  903. la $i1,0($i1,$tbl)
  904. la $i2,0($i2,$tbl)
  905. la $i3,0($i3,$tbl)
  906. icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
  907. icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
  908. icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
  909. icm $t1,1,0($i3) # Te4[rk[7]>>24]
  910. x $t1,256($t3,$tbl) # rcon[i]
  911. xr $s0,$t1 # rk[8]=rk[0]^...
  912. xr $s1,$s0 # rk[9]=rk[1]^rk[8]
  913. xr $s2,$s1 # rk[10]=rk[2]^rk[9]
  914. xr $s3,$s2 # rk[11]=rk[3]^rk[10]
  915. st $s0,32($key)
  916. st $s1,36($key)
  917. st $s2,40($key)
  918. st $s3,44($key)
  919. brct $rounds,.L256_continue
  920. lghi $t0,14
  921. lghi %r2,0
  922. lm${g} %r4,%r13,4*$SIZE_T($sp)
  923. br $ra
  924. .align 16
  925. .L256_continue:
  926. lgr $t1,$s3 # temp=rk[11]
  927. srlg $i1,$s3,8
  928. srlg $i2,$s3,16
  929. srlg $i3,$s3,24
  930. nr $t1,$mask
  931. nr $i1,$mask
  932. nr $i2,$mask
  933. la $t1,0($t1,$tbl)
  934. la $i1,0($i1,$tbl)
  935. la $i2,0($i2,$tbl)
  936. la $i3,0($i3,$tbl)
  937. llgc $t1,0($t1) # Te4[rk[11]>>0]
  938. icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
  939. icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
  940. icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
  941. x $t1,16($key) # rk[12]=rk[4]^...
  942. st $t1,48($key)
  943. x $t1,20($key) # rk[13]=rk[5]^rk[12]
  944. st $t1,52($key)
  945. x $t1,24($key) # rk[14]=rk[6]^rk[13]
  946. st $t1,56($key)
  947. x $t1,28($key) # rk[15]=rk[7]^rk[14]
  948. st $t1,60($key)
  949. srlg $i1,$t1,8
  950. srlg $i2,$t1,16
  951. srlg $i3,$t1,24
  952. nr $t1,$mask
  953. nr $i1,$mask
  954. nr $i2,$mask
  955. la $key,32($key) # key+=8
  956. la $t3,4($t3) # i++
  957. j .L256_loop
  958. .Lminus1:
  959. lghi %r2,-1
  960. br $ra
  961. .size AES_set_encrypt_key,.-AES_set_encrypt_key
  962. # void AES_set_decrypt_key(const unsigned char *in, int bits,
  963. # AES_KEY *key) {
  964. .globl AES_set_decrypt_key
  965. .type AES_set_decrypt_key,\@function
  966. .align 16
  967. AES_set_decrypt_key:
  968. #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
  969. st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
  970. bras $ra,_s390x_AES_set_encrypt_key
  971. #l${g} $key,4*$SIZE_T($sp)
  972. l${g} $ra,14*$SIZE_T($sp)
  973. ltgr %r2,%r2
  974. bnzr $ra
  975. ___
  976. $code.=<<___ if (!$softonly);
  977. #l $t0,240($key)
  978. lhi $t1,16
  979. cr $t0,$t1
  980. jl .Lgo
  981. oill $t0,0x80 # set "decrypt" bit
  982. st $t0,240($key)
  983. br $ra
  984. ___
  985. $code.=<<___;
  986. .align 16
  987. .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
  988. la $i1,0($key)
  989. sllg $i2,$rounds,4
  990. la $i2,0($i2,$key)
  991. srl $rounds,1
  992. lghi $t1,-16
  993. .align 16
  994. .Linv: lmg $s0,$s1,0($i1)
  995. lmg $s2,$s3,0($i2)
  996. stmg $s0,$s1,0($i2)
  997. stmg $s2,$s3,0($i1)
  998. la $i1,16($i1)
  999. la $i2,0($t1,$i2)
  1000. brct $rounds,.Linv
  1001. ___
  1002. $mask80=$i1;
  1003. $mask1b=$i2;
  1004. $maskfe=$i3;
  1005. $code.=<<___;
  1006. llgf $rounds,240($key)
  1007. aghi $rounds,-1
  1008. sll $rounds,2 # (rounds-1)*4
  1009. llilh $mask80,0x8080
  1010. llilh $mask1b,0x1b1b
  1011. llilh $maskfe,0xfefe
  1012. oill $mask80,0x8080
  1013. oill $mask1b,0x1b1b
  1014. oill $maskfe,0xfefe
  1015. .align 16
  1016. .Lmix: l $s0,16($key) # tp1
  1017. lr $s1,$s0
  1018. ngr $s1,$mask80
  1019. srlg $t1,$s1,7
  1020. slr $s1,$t1
  1021. nr $s1,$mask1b
  1022. sllg $t1,$s0,1
  1023. nr $t1,$maskfe
  1024. xr $s1,$t1 # tp2
  1025. lr $s2,$s1
  1026. ngr $s2,$mask80
  1027. srlg $t1,$s2,7
  1028. slr $s2,$t1
  1029. nr $s2,$mask1b
  1030. sllg $t1,$s1,1
  1031. nr $t1,$maskfe
  1032. xr $s2,$t1 # tp4
  1033. lr $s3,$s2
  1034. ngr $s3,$mask80
  1035. srlg $t1,$s3,7
  1036. slr $s3,$t1
  1037. nr $s3,$mask1b
  1038. sllg $t1,$s2,1
  1039. nr $t1,$maskfe
  1040. xr $s3,$t1 # tp8
  1041. xr $s1,$s0 # tp2^tp1
  1042. xr $s2,$s0 # tp4^tp1
  1043. rll $s0,$s0,24 # = ROTATE(tp1,8)
  1044. xr $s2,$s3 # ^=tp8
  1045. xr $s0,$s1 # ^=tp2^tp1
  1046. xr $s1,$s3 # tp2^tp1^tp8
  1047. xr $s0,$s2 # ^=tp4^tp1^tp8
  1048. rll $s1,$s1,8
  1049. rll $s2,$s2,16
  1050. xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
  1051. rll $s3,$s3,24
  1052. xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
  1053. xr $s0,$s3 # ^= ROTATE(tp8,8)
  1054. st $s0,16($key)
  1055. la $key,4($key)
  1056. brct $rounds,.Lmix
  1057. lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
  1058. lghi %r2,0
  1059. br $ra
  1060. .size AES_set_decrypt_key,.-AES_set_decrypt_key
  1061. ___
  1062. ########################################################################
  1063. # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
  1064. # size_t length, const AES_KEY *key,
  1065. # unsigned char *ivec, const int enc)
  1066. {
  1067. my $inp="%r2";
  1068. my $out="%r4"; # length and out are swapped
  1069. my $len="%r3";
  1070. my $key="%r5";
  1071. my $ivp="%r6";
  1072. $code.=<<___;
  1073. .globl AES_cbc_encrypt
  1074. .type AES_cbc_encrypt,\@function
  1075. .align 16
  1076. AES_cbc_encrypt:
  1077. xgr %r3,%r4 # flip %r3 and %r4, out and len
  1078. xgr %r4,%r3
  1079. xgr %r3,%r4
  1080. ___
  1081. $code.=<<___ if (!$softonly);
  1082. lhi %r0,16
  1083. cl %r0,240($key)
  1084. jh .Lcbc_software
  1085. lg %r0,0($ivp) # copy ivec
  1086. lg %r1,8($ivp)
  1087. stmg %r0,%r1,16($sp)
  1088. lmg %r0,%r1,0($key) # copy key, cover 256 bit
  1089. stmg %r0,%r1,32($sp)
  1090. lmg %r0,%r1,16($key)
  1091. stmg %r0,%r1,48($sp)
  1092. l %r0,240($key) # load kmc code
  1093. lghi $key,15 # res=len%16, len-=res;
  1094. ngr $key,$len
  1095. sl${g}r $len,$key
  1096. la %r1,16($sp) # parameter block - ivec || key
  1097. jz .Lkmc_truncated
  1098. .long 0xb92f0042 # kmc %r4,%r2
  1099. brc 1,.-4 # pay attention to "partial completion"
  1100. ltr $key,$key
  1101. jnz .Lkmc_truncated
  1102. .Lkmc_done:
  1103. lmg %r0,%r1,16($sp) # copy ivec to caller
  1104. stg %r0,0($ivp)
  1105. stg %r1,8($ivp)
  1106. br $ra
  1107. .align 16
  1108. .Lkmc_truncated:
  1109. ahi $key,-1 # it's the way it's encoded in mvc
  1110. tmll %r0,0x80
  1111. jnz .Lkmc_truncated_dec
  1112. lghi %r1,0
  1113. stg %r1,16*$SIZE_T($sp)
  1114. stg %r1,16*$SIZE_T+8($sp)
  1115. bras %r1,1f
  1116. mvc 16*$SIZE_T(1,$sp),0($inp)
  1117. 1: ex $key,0(%r1)
  1118. la %r1,16($sp) # restore parameter block
  1119. la $inp,16*$SIZE_T($sp)
  1120. lghi $len,16
  1121. .long 0xb92f0042 # kmc %r4,%r2
  1122. j .Lkmc_done
  1123. .align 16
  1124. .Lkmc_truncated_dec:
  1125. st${g} $out,4*$SIZE_T($sp)
  1126. la $out,16*$SIZE_T($sp)
  1127. lghi $len,16
  1128. .long 0xb92f0042 # kmc %r4,%r2
  1129. l${g} $out,4*$SIZE_T($sp)
  1130. bras %r1,2f
  1131. mvc 0(1,$out),16*$SIZE_T($sp)
  1132. 2: ex $key,0(%r1)
  1133. j .Lkmc_done
  1134. .align 16
  1135. .Lcbc_software:
  1136. ___
  1137. $code.=<<___;
  1138. stm${g} $key,$ra,5*$SIZE_T($sp)
  1139. lhi %r0,0
  1140. cl %r0,`$stdframe+$SIZE_T-4`($sp)
  1141. je .Lcbc_decrypt
  1142. larl $tbl,AES_Te
  1143. llgf $s0,0($ivp)
  1144. llgf $s1,4($ivp)
  1145. llgf $s2,8($ivp)
  1146. llgf $s3,12($ivp)
  1147. lghi $t0,16
  1148. sl${g}r $len,$t0
  1149. brc 4,.Lcbc_enc_tail # if borrow
  1150. .Lcbc_enc_loop:
  1151. stm${g} $inp,$out,2*$SIZE_T($sp)
  1152. x $s0,0($inp)
  1153. x $s1,4($inp)
  1154. x $s2,8($inp)
  1155. x $s3,12($inp)
  1156. lgr %r4,$key
  1157. bras $ra,_s390x_AES_encrypt
  1158. lm${g} $inp,$key,2*$SIZE_T($sp)
  1159. st $s0,0($out)
  1160. st $s1,4($out)
  1161. st $s2,8($out)
  1162. st $s3,12($out)
  1163. la $inp,16($inp)
  1164. la $out,16($out)
  1165. lghi $t0,16
  1166. lt${g}r $len,$len
  1167. jz .Lcbc_enc_done
  1168. sl${g}r $len,$t0
  1169. brc 4,.Lcbc_enc_tail # if borrow
  1170. j .Lcbc_enc_loop
  1171. .align 16
  1172. .Lcbc_enc_done:
  1173. l${g} $ivp,6*$SIZE_T($sp)
  1174. st $s0,0($ivp)
  1175. st $s1,4($ivp)
  1176. st $s2,8($ivp)
  1177. st $s3,12($ivp)
  1178. lm${g} %r7,$ra,7*$SIZE_T($sp)
  1179. br $ra
  1180. .align 16
  1181. .Lcbc_enc_tail:
  1182. aghi $len,15
  1183. lghi $t0,0
  1184. stg $t0,16*$SIZE_T($sp)
  1185. stg $t0,16*$SIZE_T+8($sp)
  1186. bras $t1,3f
  1187. mvc 16*$SIZE_T(1,$sp),0($inp)
  1188. 3: ex $len,0($t1)
  1189. lghi $len,0
  1190. la $inp,16*$SIZE_T($sp)
  1191. j .Lcbc_enc_loop
  1192. .align 16
  1193. .Lcbc_decrypt:
  1194. larl $tbl,AES_Td
  1195. lg $t0,0($ivp)
  1196. lg $t1,8($ivp)
  1197. stmg $t0,$t1,16*$SIZE_T($sp)
  1198. .Lcbc_dec_loop:
  1199. stm${g} $inp,$out,2*$SIZE_T($sp)
  1200. llgf $s0,0($inp)
  1201. llgf $s1,4($inp)
  1202. llgf $s2,8($inp)
  1203. llgf $s3,12($inp)
  1204. lgr %r4,$key
  1205. bras $ra,_s390x_AES_decrypt
  1206. lm${g} $inp,$key,2*$SIZE_T($sp)
  1207. sllg $s0,$s0,32
  1208. sllg $s2,$s2,32
  1209. lr $s0,$s1
  1210. lr $s2,$s3
  1211. lg $t0,0($inp)
  1212. lg $t1,8($inp)
  1213. xg $s0,16*$SIZE_T($sp)
  1214. xg $s2,16*$SIZE_T+8($sp)
  1215. lghi $s1,16
  1216. sl${g}r $len,$s1
  1217. brc 4,.Lcbc_dec_tail # if borrow
  1218. brc 2,.Lcbc_dec_done # if zero
  1219. stg $s0,0($out)
  1220. stg $s2,8($out)
  1221. stmg $t0,$t1,16*$SIZE_T($sp)
  1222. la $inp,16($inp)
  1223. la $out,16($out)
  1224. j .Lcbc_dec_loop
  1225. .Lcbc_dec_done:
  1226. stg $s0,0($out)
  1227. stg $s2,8($out)
  1228. .Lcbc_dec_exit:
  1229. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1230. stmg $t0,$t1,0($ivp)
  1231. br $ra
  1232. .align 16
  1233. .Lcbc_dec_tail:
  1234. aghi $len,15
  1235. stg $s0,16*$SIZE_T($sp)
  1236. stg $s2,16*$SIZE_T+8($sp)
  1237. bras $s1,4f
  1238. mvc 0(1,$out),16*$SIZE_T($sp)
  1239. 4: ex $len,0($s1)
  1240. j .Lcbc_dec_exit
  1241. .size AES_cbc_encrypt,.-AES_cbc_encrypt
  1242. ___
  1243. }
  1244. ########################################################################
  1245. # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
  1246. # size_t blocks, const AES_KEY *key,
  1247. # const unsigned char *ivec)
  1248. {
  1249. my $inp="%r2";
  1250. my $out="%r4"; # blocks and out are swapped
  1251. my $len="%r3";
  1252. my $key="%r5"; my $iv0="%r5";
  1253. my $ivp="%r6";
  1254. my $fp ="%r7";
  1255. $code.=<<___;
  1256. .globl AES_ctr32_encrypt
  1257. .type AES_ctr32_encrypt,\@function
  1258. .align 16
  1259. AES_ctr32_encrypt:
  1260. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1261. xgr %r4,%r3
  1262. xgr %r3,%r4
  1263. llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
  1264. ___
  1265. $code.=<<___ if (!$softonly);
  1266. l %r0,240($key)
  1267. lhi %r1,16
  1268. clr %r0,%r1
  1269. jl .Lctr32_software
  1270. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1271. slgr $out,$inp
  1272. la %r1,0($key) # %r1 is permanent copy of $key
  1273. lg $iv0,0($ivp) # load ivec
  1274. lg $ivp,8($ivp)
  1275. # prepare and allocate stack frame at the top of 4K page
  1276. # with 1K reserved for eventual signal handling
  1277. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1278. lghi $s1,-4096
  1279. algr $s0,$sp
  1280. lgr $fp,$sp
  1281. ngr $s0,$s1 # align at page boundary
  1282. slgr $fp,$s0 # total buffer size
  1283. lgr $s2,$sp
  1284. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1285. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1286. # buffer size is at lest 256 and at most 3072+256-16
  1287. la $sp,1024($s0) # alloca
  1288. srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
  1289. st${g} $s2,0($sp) # back-chain
  1290. st${g} $fp,$SIZE_T($sp)
  1291. slgr $len,$fp
  1292. brc 1,.Lctr32_hw_switch # not zero, no borrow
  1293. algr $fp,$len # input is shorter than allocated buffer
  1294. lghi $len,0
  1295. st${g} $fp,$SIZE_T($sp)
  1296. .Lctr32_hw_switch:
  1297. ___
  1298. $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower
  1299. larl $s0,OPENSSL_s390xcap_P
  1300. lg $s0,8($s0)
  1301. tmhh $s0,0x0004 # check for message_security-assist-4
  1302. jz .Lctr32_km_loop
  1303. llgfr $s0,%r0
  1304. lgr $s1,%r1
  1305. lghi %r0,0
  1306. la %r1,16($sp)
  1307. .long 0xb92d2042 # kmctr %r4,%r2,%r2
  1308. llihh %r0,0x8000 # check if kmctr supports the function code
  1309. srlg %r0,%r0,0($s0)
  1310. ng %r0,16($sp)
  1311. lgr %r0,$s0
  1312. lgr %r1,$s1
  1313. jz .Lctr32_km_loop
  1314. ####### kmctr code
  1315. algr $out,$inp # restore $out
  1316. lgr $s1,$len # $s1 undertakes $len
  1317. j .Lctr32_kmctr_loop
  1318. .align 16
  1319. .Lctr32_kmctr_loop:
  1320. la $s2,16($sp)
  1321. lgr $s3,$fp
  1322. .Lctr32_kmctr_prepare:
  1323. stg $iv0,0($s2)
  1324. stg $ivp,8($s2)
  1325. la $s2,16($s2)
  1326. ahi $ivp,1 # 32-bit increment, preserves upper half
  1327. brct $s3,.Lctr32_kmctr_prepare
  1328. #la $inp,0($inp) # inp
  1329. sllg $len,$fp,4 # len
  1330. #la $out,0($out) # out
  1331. la $s2,16($sp) # iv
  1332. .long 0xb92da042 # kmctr $out,$s2,$inp
  1333. brc 1,.-4 # pay attention to "partial completion"
  1334. slgr $s1,$fp
  1335. brc 1,.Lctr32_kmctr_loop # not zero, no borrow
  1336. algr $fp,$s1
  1337. lghi $s1,0
  1338. brc 4+1,.Lctr32_kmctr_loop # not zero
  1339. l${g} $sp,0($sp)
  1340. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1341. br $ra
  1342. .align 16
  1343. ___
  1344. $code.=<<___;
  1345. .Lctr32_km_loop:
  1346. la $s2,16($sp)
  1347. lgr $s3,$fp
  1348. .Lctr32_km_prepare:
  1349. stg $iv0,0($s2)
  1350. stg $ivp,8($s2)
  1351. la $s2,16($s2)
  1352. ahi $ivp,1 # 32-bit increment, preserves upper half
  1353. brct $s3,.Lctr32_km_prepare
  1354. la $s0,16($sp) # inp
  1355. sllg $s1,$fp,4 # len
  1356. la $s2,16($sp) # out
  1357. .long 0xb92e00a8 # km %r10,%r8
  1358. brc 1,.-4 # pay attention to "partial completion"
  1359. la $s2,16($sp)
  1360. lgr $s3,$fp
  1361. slgr $s2,$inp
  1362. .Lctr32_km_xor:
  1363. lg $s0,0($inp)
  1364. lg $s1,8($inp)
  1365. xg $s0,0($s2,$inp)
  1366. xg $s1,8($s2,$inp)
  1367. stg $s0,0($out,$inp)
  1368. stg $s1,8($out,$inp)
  1369. la $inp,16($inp)
  1370. brct $s3,.Lctr32_km_xor
  1371. slgr $len,$fp
  1372. brc 1,.Lctr32_km_loop # not zero, no borrow
  1373. algr $fp,$len
  1374. lghi $len,0
  1375. brc 4+1,.Lctr32_km_loop # not zero
  1376. l${g} $s0,0($sp)
  1377. l${g} $s1,$SIZE_T($sp)
  1378. la $s2,16($sp)
  1379. .Lctr32_km_zap:
  1380. stg $s0,0($s2)
  1381. stg $s0,8($s2)
  1382. la $s2,16($s2)
  1383. brct $s1,.Lctr32_km_zap
  1384. la $sp,0($s0)
  1385. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1386. br $ra
  1387. .align 16
  1388. .Lctr32_software:
  1389. ___
  1390. $code.=<<___;
  1391. stm${g} $key,$ra,5*$SIZE_T($sp)
  1392. sl${g}r $inp,$out
  1393. larl $tbl,AES_Te
  1394. llgf $t1,12($ivp)
  1395. .Lctr32_loop:
  1396. stm${g} $inp,$out,2*$SIZE_T($sp)
  1397. llgf $s0,0($ivp)
  1398. llgf $s1,4($ivp)
  1399. llgf $s2,8($ivp)
  1400. lgr $s3,$t1
  1401. st $t1,16*$SIZE_T($sp)
  1402. lgr %r4,$key
  1403. bras $ra,_s390x_AES_encrypt
  1404. lm${g} $inp,$ivp,2*$SIZE_T($sp)
  1405. llgf $t1,16*$SIZE_T($sp)
  1406. x $s0,0($inp,$out)
  1407. x $s1,4($inp,$out)
  1408. x $s2,8($inp,$out)
  1409. x $s3,12($inp,$out)
  1410. stm $s0,$s3,0($out)
  1411. la $out,16($out)
  1412. ahi $t1,1 # 32-bit increment
  1413. brct $len,.Lctr32_loop
  1414. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1415. br $ra
  1416. .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
  1417. ___
  1418. }
  1419. ########################################################################
  1420. # void AES_xts_encrypt(const char *inp,char *out,size_t len,
  1421. # const AES_KEY *key1, const AES_KEY *key2,
  1422. # const unsigned char iv[16]);
  1423. #
  1424. {
  1425. my $inp="%r2";
  1426. my $out="%r4"; # len and out are swapped
  1427. my $len="%r3";
  1428. my $key1="%r5"; # $i1
  1429. my $key2="%r6"; # $i2
  1430. my $fp="%r7"; # $i3
  1431. my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
  1432. $code.=<<___;
  1433. .type _s390x_xts_km,\@function
  1434. .align 16
  1435. _s390x_xts_km:
  1436. ___
  1437. $code.=<<___ if(1);
  1438. llgfr $s0,%r0 # put aside the function code
  1439. lghi $s1,0x7f
  1440. nr $s1,%r0
  1441. lghi %r0,0 # query capability vector
  1442. la %r1,$tweak-16($sp)
  1443. .long 0xb92e0042 # km %r4,%r2
  1444. llihh %r1,0x8000
  1445. srlg %r1,%r1,32($s1) # check for 32+function code
  1446. ng %r1,$tweak-16($sp)
  1447. lgr %r0,$s0 # restore the function code
  1448. la %r1,0($key1) # restore $key1
  1449. jz .Lxts_km_vanilla
  1450. lmg $i2,$i3,$tweak($sp) # put aside the tweak value
  1451. algr $out,$inp
  1452. oill %r0,32 # switch to xts function code
  1453. aghi $s1,-18 #
  1454. sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
  1455. la %r1,$tweak-16($sp)
  1456. slgr %r1,$s1 # parameter block position
  1457. lmg $s0,$s3,0($key1) # load 256 bits of key material,
  1458. stmg $s0,$s3,0(%r1) # and copy it to parameter block.
  1459. # yes, it contains junk and overlaps
  1460. # with the tweak in 128-bit case.
  1461. # it's done to avoid conditional
  1462. # branch.
  1463. stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
  1464. .long 0xb92e0042 # km %r4,%r2
  1465. brc 1,.-4 # pay attention to "partial completion"
  1466. lrvg $s0,$tweak+0($sp) # load the last tweak
  1467. lrvg $s1,$tweak+8($sp)
  1468. stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
  1469. nill %r0,0xffdf # switch back to original function code
  1470. la %r1,0($key1) # restore pointer to $key1
  1471. slgr $out,$inp
  1472. llgc $len,2*$SIZE_T-1($sp)
  1473. nill $len,0x0f # $len%=16
  1474. br $ra
  1475. .align 16
  1476. .Lxts_km_vanilla:
  1477. ___
  1478. $code.=<<___;
  1479. # prepare and allocate stack frame at the top of 4K page
  1480. # with 1K reserved for eventual signal handling
  1481. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1482. lghi $s1,-4096
  1483. algr $s0,$sp
  1484. lgr $fp,$sp
  1485. ngr $s0,$s1 # align at page boundary
  1486. slgr $fp,$s0 # total buffer size
  1487. lgr $s2,$sp
  1488. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1489. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1490. # buffer size is at lest 256 and at most 3072+256-16
  1491. la $sp,1024($s0) # alloca
  1492. nill $fp,0xfff0 # round to 16*n
  1493. st${g} $s2,0($sp) # back-chain
  1494. nill $len,0xfff0 # redundant
  1495. st${g} $fp,$SIZE_T($sp)
  1496. slgr $len,$fp
  1497. brc 1,.Lxts_km_go # not zero, no borrow
  1498. algr $fp,$len # input is shorter than allocated buffer
  1499. lghi $len,0
  1500. st${g} $fp,$SIZE_T($sp)
  1501. .Lxts_km_go:
  1502. lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
  1503. lrvg $s1,$tweak+8($s2)
  1504. la $s2,16($sp) # vector of ascending tweak values
  1505. slgr $s2,$inp
  1506. srlg $s3,$fp,4
  1507. j .Lxts_km_start
  1508. .Lxts_km_loop:
  1509. la $s2,16($sp)
  1510. slgr $s2,$inp
  1511. srlg $s3,$fp,4
  1512. .Lxts_km_prepare:
  1513. lghi $i1,0x87
  1514. srag $i2,$s1,63 # broadcast upper bit
  1515. ngr $i1,$i2 # rem
  1516. algr $s0,$s0
  1517. alcgr $s1,$s1
  1518. xgr $s0,$i1
  1519. .Lxts_km_start:
  1520. lrvgr $i1,$s0 # flip byte order
  1521. lrvgr $i2,$s1
  1522. stg $i1,0($s2,$inp)
  1523. stg $i2,8($s2,$inp)
  1524. xg $i1,0($inp)
  1525. xg $i2,8($inp)
  1526. stg $i1,0($out,$inp)
  1527. stg $i2,8($out,$inp)
  1528. la $inp,16($inp)
  1529. brct $s3,.Lxts_km_prepare
  1530. slgr $inp,$fp # rewind $inp
  1531. la $s2,0($out,$inp)
  1532. lgr $s3,$fp
  1533. .long 0xb92e00aa # km $s2,$s2
  1534. brc 1,.-4 # pay attention to "partial completion"
  1535. la $s2,16($sp)
  1536. slgr $s2,$inp
  1537. srlg $s3,$fp,4
  1538. .Lxts_km_xor:
  1539. lg $i1,0($out,$inp)
  1540. lg $i2,8($out,$inp)
  1541. xg $i1,0($s2,$inp)
  1542. xg $i2,8($s2,$inp)
  1543. stg $i1,0($out,$inp)
  1544. stg $i2,8($out,$inp)
  1545. la $inp,16($inp)
  1546. brct $s3,.Lxts_km_xor
  1547. slgr $len,$fp
  1548. brc 1,.Lxts_km_loop # not zero, no borrow
  1549. algr $fp,$len
  1550. lghi $len,0
  1551. brc 4+1,.Lxts_km_loop # not zero
  1552. l${g} $i1,0($sp) # back-chain
  1553. llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
  1554. la $i2,16($sp)
  1555. srlg $fp,$fp,4
  1556. .Lxts_km_zap:
  1557. stg $i1,0($i2)
  1558. stg $i1,8($i2)
  1559. la $i2,16($i2)
  1560. brct $fp,.Lxts_km_zap
  1561. la $sp,0($i1)
  1562. llgc $len,2*$SIZE_T-1($i1)
  1563. nill $len,0x0f # $len%=16
  1564. bzr $ra
  1565. # generate one more tweak...
  1566. lghi $i1,0x87
  1567. srag $i2,$s1,63 # broadcast upper bit
  1568. ngr $i1,$i2 # rem
  1569. algr $s0,$s0
  1570. alcgr $s1,$s1
  1571. xgr $s0,$i1
  1572. ltr $len,$len # clear zero flag
  1573. br $ra
  1574. .size _s390x_xts_km,.-_s390x_xts_km
  1575. .globl AES_xts_encrypt
  1576. .type AES_xts_encrypt,\@function
  1577. .align 16
  1578. AES_xts_encrypt:
  1579. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1580. xgr %r4,%r3
  1581. xgr %r3,%r4
  1582. ___
  1583. $code.=<<___ if ($SIZE_T==4);
  1584. llgfr $len,$len
  1585. ___
  1586. $code.=<<___;
  1587. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1588. srag $len,$len,4 # formally wrong, because it expands
  1589. # sign byte, but who can afford asking
  1590. # to process more than 2^63-1 bytes?
  1591. # I use it, because it sets condition
  1592. # code...
  1593. bcr 8,$ra # abort if zero (i.e. less than 16)
  1594. ___
  1595. $code.=<<___ if (!$softonly);
  1596. llgf %r0,240($key2)
  1597. lhi %r1,16
  1598. clr %r0,%r1
  1599. jl .Lxts_enc_software
  1600. st${g} $ra,5*$SIZE_T($sp)
  1601. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1602. sllg $len,$len,4 # $len&=~15
  1603. slgr $out,$inp
  1604. # generate the tweak value
  1605. l${g} $s3,$stdframe($sp) # pointer to iv
  1606. la $s2,$tweak($sp)
  1607. lmg $s0,$s1,0($s3)
  1608. lghi $s3,16
  1609. stmg $s0,$s1,0($s2)
  1610. la %r1,0($key2) # $key2 is not needed anymore
  1611. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1612. brc 1,.-4 # can this happen?
  1613. l %r0,240($key1)
  1614. la %r1,0($key1) # $key1 is not needed anymore
  1615. bras $ra,_s390x_xts_km
  1616. jz .Lxts_enc_km_done
  1617. aghi $inp,-16 # take one step back
  1618. la $i3,0($out,$inp) # put aside real $out
  1619. .Lxts_enc_km_steal:
  1620. llgc $i1,16($inp)
  1621. llgc $i2,0($out,$inp)
  1622. stc $i1,0($out,$inp)
  1623. stc $i2,16($out,$inp)
  1624. la $inp,1($inp)
  1625. brct $len,.Lxts_enc_km_steal
  1626. la $s2,0($i3)
  1627. lghi $s3,16
  1628. lrvgr $i1,$s0 # flip byte order
  1629. lrvgr $i2,$s1
  1630. xg $i1,0($s2)
  1631. xg $i2,8($s2)
  1632. stg $i1,0($s2)
  1633. stg $i2,8($s2)
  1634. .long 0xb92e00aa # km $s2,$s2
  1635. brc 1,.-4 # can this happen?
  1636. lrvgr $i1,$s0 # flip byte order
  1637. lrvgr $i2,$s1
  1638. xg $i1,0($i3)
  1639. xg $i2,8($i3)
  1640. stg $i1,0($i3)
  1641. stg $i2,8($i3)
  1642. .Lxts_enc_km_done:
  1643. stg $sp,$tweak+0($sp) # wipe tweak
  1644. stg $sp,$tweak+8($sp)
  1645. l${g} $ra,5*$SIZE_T($sp)
  1646. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1647. br $ra
  1648. .align 16
  1649. .Lxts_enc_software:
  1650. ___
  1651. $code.=<<___;
  1652. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1653. slgr $out,$inp
  1654. l${g} $s3,$stdframe($sp) # ivp
  1655. llgf $s0,0($s3) # load iv
  1656. llgf $s1,4($s3)
  1657. llgf $s2,8($s3)
  1658. llgf $s3,12($s3)
  1659. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1660. la $key,0($key2)
  1661. larl $tbl,AES_Te
  1662. bras $ra,_s390x_AES_encrypt # generate the tweak
  1663. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1664. stm $s0,$s3,$tweak($sp) # save the tweak
  1665. j .Lxts_enc_enter
  1666. .align 16
  1667. .Lxts_enc_loop:
  1668. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1669. lrvg $s3,$tweak+8($sp)
  1670. lghi %r1,0x87
  1671. srag %r0,$s3,63 # broadcast upper bit
  1672. ngr %r1,%r0 # rem
  1673. algr $s1,$s1
  1674. alcgr $s3,$s3
  1675. xgr $s1,%r1
  1676. lrvgr $s1,$s1 # flip byte order
  1677. lrvgr $s3,$s3
  1678. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1679. stg $s1,$tweak+0($sp) # save the tweak
  1680. llgfr $s1,$s1
  1681. srlg $s2,$s3,32
  1682. stg $s3,$tweak+8($sp)
  1683. llgfr $s3,$s3
  1684. la $inp,16($inp) # $inp+=16
  1685. .Lxts_enc_enter:
  1686. x $s0,0($inp) # ^=*($inp)
  1687. x $s1,4($inp)
  1688. x $s2,8($inp)
  1689. x $s3,12($inp)
  1690. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1691. la $key,0($key1)
  1692. bras $ra,_s390x_AES_encrypt
  1693. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1694. x $s0,$tweak+0($sp) # ^=tweak
  1695. x $s1,$tweak+4($sp)
  1696. x $s2,$tweak+8($sp)
  1697. x $s3,$tweak+12($sp)
  1698. st $s0,0($out,$inp)
  1699. st $s1,4($out,$inp)
  1700. st $s2,8($out,$inp)
  1701. st $s3,12($out,$inp)
  1702. brct${g} $len,.Lxts_enc_loop
  1703. llgc $len,`2*$SIZE_T-1`($sp)
  1704. nill $len,0x0f # $len%16
  1705. jz .Lxts_enc_done
  1706. la $i3,0($inp,$out) # put aside real $out
  1707. .Lxts_enc_steal:
  1708. llgc %r0,16($inp)
  1709. llgc %r1,0($out,$inp)
  1710. stc %r0,0($out,$inp)
  1711. stc %r1,16($out,$inp)
  1712. la $inp,1($inp)
  1713. brct $len,.Lxts_enc_steal
  1714. la $out,0($i3) # restore real $out
  1715. # generate last tweak...
  1716. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1717. lrvg $s3,$tweak+8($sp)
  1718. lghi %r1,0x87
  1719. srag %r0,$s3,63 # broadcast upper bit
  1720. ngr %r1,%r0 # rem
  1721. algr $s1,$s1
  1722. alcgr $s3,$s3
  1723. xgr $s1,%r1
  1724. lrvgr $s1,$s1 # flip byte order
  1725. lrvgr $s3,$s3
  1726. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1727. stg $s1,$tweak+0($sp) # save the tweak
  1728. llgfr $s1,$s1
  1729. srlg $s2,$s3,32
  1730. stg $s3,$tweak+8($sp)
  1731. llgfr $s3,$s3
  1732. x $s0,0($out) # ^=*(inp)|stolen cipther-text
  1733. x $s1,4($out)
  1734. x $s2,8($out)
  1735. x $s3,12($out)
  1736. st${g} $out,4*$SIZE_T($sp)
  1737. la $key,0($key1)
  1738. bras $ra,_s390x_AES_encrypt
  1739. l${g} $out,4*$SIZE_T($sp)
  1740. x $s0,`$tweak+0`($sp) # ^=tweak
  1741. x $s1,`$tweak+4`($sp)
  1742. x $s2,`$tweak+8`($sp)
  1743. x $s3,`$tweak+12`($sp)
  1744. st $s0,0($out)
  1745. st $s1,4($out)
  1746. st $s2,8($out)
  1747. st $s3,12($out)
  1748. .Lxts_enc_done:
  1749. stg $sp,$tweak+0($sp) # wipe tweak
  1750. stg $sp,$twesk+8($sp)
  1751. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1752. br $ra
  1753. .size AES_xts_encrypt,.-AES_xts_encrypt
  1754. ___
  1755. # void AES_xts_decrypt(const char *inp,char *out,size_t len,
  1756. # const AES_KEY *key1, const AES_KEY *key2,
  1757. # const unsigned char iv[16]);
  1758. #
  1759. $code.=<<___;
  1760. .globl AES_xts_decrypt
  1761. .type AES_xts_decrypt,\@function
  1762. .align 16
  1763. AES_xts_decrypt:
  1764. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1765. xgr %r4,%r3
  1766. xgr %r3,%r4
  1767. ___
  1768. $code.=<<___ if ($SIZE_T==4);
  1769. llgfr $len,$len
  1770. ___
  1771. $code.=<<___;
  1772. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1773. aghi $len,-16
  1774. bcr 4,$ra # abort if less than zero. formally
  1775. # wrong, because $len is unsigned,
  1776. # but who can afford asking to
  1777. # process more than 2^63-1 bytes?
  1778. tmll $len,0x0f
  1779. jnz .Lxts_dec_proceed
  1780. aghi $len,16
  1781. .Lxts_dec_proceed:
  1782. ___
  1783. $code.=<<___ if (!$softonly);
  1784. llgf %r0,240($key2)
  1785. lhi %r1,16
  1786. clr %r0,%r1
  1787. jl .Lxts_dec_software
  1788. st${g} $ra,5*$SIZE_T($sp)
  1789. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1790. nill $len,0xfff0 # $len&=~15
  1791. slgr $out,$inp
  1792. # generate the tweak value
  1793. l${g} $s3,$stdframe($sp) # pointer to iv
  1794. la $s2,$tweak($sp)
  1795. lmg $s0,$s1,0($s3)
  1796. lghi $s3,16
  1797. stmg $s0,$s1,0($s2)
  1798. la %r1,0($key2) # $key2 is not needed past this point
  1799. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1800. brc 1,.-4 # can this happen?
  1801. l %r0,240($key1)
  1802. la %r1,0($key1) # $key1 is not needed anymore
  1803. ltgr $len,$len
  1804. jz .Lxts_dec_km_short
  1805. bras $ra,_s390x_xts_km
  1806. jz .Lxts_dec_km_done
  1807. lrvgr $s2,$s0 # make copy in reverse byte order
  1808. lrvgr $s3,$s1
  1809. j .Lxts_dec_km_2ndtweak
  1810. .Lxts_dec_km_short:
  1811. llgc $len,`2*$SIZE_T-1`($sp)
  1812. nill $len,0x0f # $len%=16
  1813. lrvg $s0,$tweak+0($sp) # load the tweak
  1814. lrvg $s1,$tweak+8($sp)
  1815. lrvgr $s2,$s0 # make copy in reverse byte order
  1816. lrvgr $s3,$s1
  1817. .Lxts_dec_km_2ndtweak:
  1818. lghi $i1,0x87
  1819. srag $i2,$s1,63 # broadcast upper bit
  1820. ngr $i1,$i2 # rem
  1821. algr $s0,$s0
  1822. alcgr $s1,$s1
  1823. xgr $s0,$i1
  1824. lrvgr $i1,$s0 # flip byte order
  1825. lrvgr $i2,$s1
  1826. xg $i1,0($inp)
  1827. xg $i2,8($inp)
  1828. stg $i1,0($out,$inp)
  1829. stg $i2,8($out,$inp)
  1830. la $i2,0($out,$inp)
  1831. lghi $i3,16
  1832. .long 0xb92e0066 # km $i2,$i2
  1833. brc 1,.-4 # can this happen?
  1834. lrvgr $i1,$s0
  1835. lrvgr $i2,$s1
  1836. xg $i1,0($out,$inp)
  1837. xg $i2,8($out,$inp)
  1838. stg $i1,0($out,$inp)
  1839. stg $i2,8($out,$inp)
  1840. la $i3,0($out,$inp) # put aside real $out
  1841. .Lxts_dec_km_steal:
  1842. llgc $i1,16($inp)
  1843. llgc $i2,0($out,$inp)
  1844. stc $i1,0($out,$inp)
  1845. stc $i2,16($out,$inp)
  1846. la $inp,1($inp)
  1847. brct $len,.Lxts_dec_km_steal
  1848. lgr $s0,$s2
  1849. lgr $s1,$s3
  1850. xg $s0,0($i3)
  1851. xg $s1,8($i3)
  1852. stg $s0,0($i3)
  1853. stg $s1,8($i3)
  1854. la $s0,0($i3)
  1855. lghi $s1,16
  1856. .long 0xb92e0088 # km $s0,$s0
  1857. brc 1,.-4 # can this happen?
  1858. xg $s2,0($i3)
  1859. xg $s3,8($i3)
  1860. stg $s2,0($i3)
  1861. stg $s3,8($i3)
  1862. .Lxts_dec_km_done:
  1863. stg $sp,$tweak+0($sp) # wipe tweak
  1864. stg $sp,$tweak+8($sp)
  1865. l${g} $ra,5*$SIZE_T($sp)
  1866. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1867. br $ra
  1868. .align 16
  1869. .Lxts_dec_software:
  1870. ___
  1871. $code.=<<___;
  1872. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1873. srlg $len,$len,4
  1874. slgr $out,$inp
  1875. l${g} $s3,$stdframe($sp) # ivp
  1876. llgf $s0,0($s3) # load iv
  1877. llgf $s1,4($s3)
  1878. llgf $s2,8($s3)
  1879. llgf $s3,12($s3)
  1880. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1881. la $key,0($key2)
  1882. larl $tbl,AES_Te
  1883. bras $ra,_s390x_AES_encrypt # generate the tweak
  1884. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1885. larl $tbl,AES_Td
  1886. lt${g}r $len,$len
  1887. stm $s0,$s3,$tweak($sp) # save the tweak
  1888. jz .Lxts_dec_short
  1889. j .Lxts_dec_enter
  1890. .align 16
  1891. .Lxts_dec_loop:
  1892. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1893. lrvg $s3,$tweak+8($sp)
  1894. lghi %r1,0x87
  1895. srag %r0,$s3,63 # broadcast upper bit
  1896. ngr %r1,%r0 # rem
  1897. algr $s1,$s1
  1898. alcgr $s3,$s3
  1899. xgr $s1,%r1
  1900. lrvgr $s1,$s1 # flip byte order
  1901. lrvgr $s3,$s3
  1902. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1903. stg $s1,$tweak+0($sp) # save the tweak
  1904. llgfr $s1,$s1
  1905. srlg $s2,$s3,32
  1906. stg $s3,$tweak+8($sp)
  1907. llgfr $s3,$s3
  1908. .Lxts_dec_enter:
  1909. x $s0,0($inp) # tweak^=*(inp)
  1910. x $s1,4($inp)
  1911. x $s2,8($inp)
  1912. x $s3,12($inp)
  1913. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1914. la $key,0($key1)
  1915. bras $ra,_s390x_AES_decrypt
  1916. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1917. x $s0,$tweak+0($sp) # ^=tweak
  1918. x $s1,$tweak+4($sp)
  1919. x $s2,$tweak+8($sp)
  1920. x $s3,$tweak+12($sp)
  1921. st $s0,0($out,$inp)
  1922. st $s1,4($out,$inp)
  1923. st $s2,8($out,$inp)
  1924. st $s3,12($out,$inp)
  1925. la $inp,16($inp)
  1926. brct${g} $len,.Lxts_dec_loop
  1927. llgc $len,`2*$SIZE_T-1`($sp)
  1928. nill $len,0x0f # $len%16
  1929. jz .Lxts_dec_done
  1930. # generate pair of tweaks...
  1931. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1932. lrvg $s3,$tweak+8($sp)
  1933. lghi %r1,0x87
  1934. srag %r0,$s3,63 # broadcast upper bit
  1935. ngr %r1,%r0 # rem
  1936. algr $s1,$s1
  1937. alcgr $s3,$s3
  1938. xgr $s1,%r1
  1939. lrvgr $i2,$s1 # flip byte order
  1940. lrvgr $i3,$s3
  1941. stmg $i2,$i3,$tweak($sp) # save the 1st tweak
  1942. j .Lxts_dec_2ndtweak
  1943. .align 16
  1944. .Lxts_dec_short:
  1945. llgc $len,`2*$SIZE_T-1`($sp)
  1946. nill $len,0x0f # $len%16
  1947. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1948. lrvg $s3,$tweak+8($sp)
  1949. .Lxts_dec_2ndtweak:
  1950. lghi %r1,0x87
  1951. srag %r0,$s3,63 # broadcast upper bit
  1952. ngr %r1,%r0 # rem
  1953. algr $s1,$s1
  1954. alcgr $s3,$s3
  1955. xgr $s1,%r1
  1956. lrvgr $s1,$s1 # flip byte order
  1957. lrvgr $s3,$s3
  1958. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1959. stg $s1,$tweak-16+0($sp) # save the 2nd tweak
  1960. llgfr $s1,$s1
  1961. srlg $s2,$s3,32
  1962. stg $s3,$tweak-16+8($sp)
  1963. llgfr $s3,$s3
  1964. x $s0,0($inp) # tweak_the_2nd^=*(inp)
  1965. x $s1,4($inp)
  1966. x $s2,8($inp)
  1967. x $s3,12($inp)
  1968. stm${g} %r2,%r3,2*$SIZE_T($sp)
  1969. la $key,0($key1)
  1970. bras $ra,_s390x_AES_decrypt
  1971. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1972. x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
  1973. x $s1,$tweak-16+4($sp)
  1974. x $s2,$tweak-16+8($sp)
  1975. x $s3,$tweak-16+12($sp)
  1976. st $s0,0($out,$inp)
  1977. st $s1,4($out,$inp)
  1978. st $s2,8($out,$inp)
  1979. st $s3,12($out,$inp)
  1980. la $i3,0($out,$inp) # put aside real $out
  1981. .Lxts_dec_steal:
  1982. llgc %r0,16($inp)
  1983. llgc %r1,0($out,$inp)
  1984. stc %r0,0($out,$inp)
  1985. stc %r1,16($out,$inp)
  1986. la $inp,1($inp)
  1987. brct $len,.Lxts_dec_steal
  1988. la $out,0($i3) # restore real $out
  1989. lm $s0,$s3,$tweak($sp) # load the 1st tweak
  1990. x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
  1991. x $s1,4($out)
  1992. x $s2,8($out)
  1993. x $s3,12($out)
  1994. st${g} $out,4*$SIZE_T($sp)
  1995. la $key,0($key1)
  1996. bras $ra,_s390x_AES_decrypt
  1997. l${g} $out,4*$SIZE_T($sp)
  1998. x $s0,$tweak+0($sp) # ^=tweak
  1999. x $s1,$tweak+4($sp)
  2000. x $s2,$tweak+8($sp)
  2001. x $s3,$tweak+12($sp)
  2002. st $s0,0($out)
  2003. st $s1,4($out)
  2004. st $s2,8($out)
  2005. st $s3,12($out)
  2006. stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
  2007. stg $sp,$tweak-16+8($sp)
  2008. .Lxts_dec_done:
  2009. stg $sp,$tweak+0($sp) # wipe tweak
  2010. stg $sp,$twesk+8($sp)
  2011. lm${g} %r6,$ra,6*$SIZE_T($sp)
  2012. br $ra
  2013. .size AES_xts_decrypt,.-AES_xts_decrypt
  2014. ___
  2015. }
  2016. $code.=<<___;
  2017. .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  2018. .comm OPENSSL_s390xcap_P,16,8
  2019. ___
  2020. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  2021. print $code;
  2022. close STDOUT; # force flush