ppccap.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. /*
  2. * Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the OpenSSL license (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <setjmp.h>
  13. #include <signal.h>
  14. #include <unistd.h>
  15. #if defined(__linux) || defined(_AIX)
  16. # include <sys/utsname.h>
  17. #endif
  18. #if defined(_AIX53) /* defined even on post-5.3 */
  19. # include <sys/systemcfg.h>
  20. # if !defined(__power_set)
  21. # define __power_set(a) (_system_configuration.implementation & (a))
  22. # endif
  23. #endif
  24. #if defined(__APPLE__) && defined(__MACH__)
  25. # include <sys/types.h>
  26. # include <sys/sysctl.h>
  27. #endif
  28. #include <openssl/crypto.h>
  29. #include <openssl/bn.h>
  30. #include "ppc_arch.h"
  31. unsigned int OPENSSL_ppccap_P = 0;
  32. static sigset_t all_masked;
  33. #ifdef OPENSSL_BN_ASM_MONT
  34. int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  35. const BN_ULONG *np, const BN_ULONG *n0, int num)
  36. {
  37. int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  38. const BN_ULONG *np, const BN_ULONG *n0, int num);
  39. int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  40. const BN_ULONG *np, const BN_ULONG *n0, int num);
  41. if (num < 4)
  42. return 0;
  43. if ((num & 3) == 0)
  44. return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
  45. /*
  46. * There used to be [optional] call to bn_mul_mont_fpu64 here,
  47. * but above subroutine is faster on contemporary processors.
  48. * Formulation means that there might be old processors where
  49. * FPU code path would be faster, POWER6 perhaps, but there was
  50. * no opportunity to figure it out...
  51. */
  52. return bn_mul_mont_int(rp, ap, bp, np, n0, num);
  53. }
  54. #endif
  55. void sha256_block_p8(void *ctx, const void *inp, size_t len);
  56. void sha256_block_ppc(void *ctx, const void *inp, size_t len);
  57. void sha256_block_data_order(void *ctx, const void *inp, size_t len)
  58. {
  59. OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) :
  60. sha256_block_ppc(ctx, inp, len);
  61. }
  62. void sha512_block_p8(void *ctx, const void *inp, size_t len);
  63. void sha512_block_ppc(void *ctx, const void *inp, size_t len);
  64. void sha512_block_data_order(void *ctx, const void *inp, size_t len)
  65. {
  66. OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) :
  67. sha512_block_ppc(ctx, inp, len);
  68. }
  69. #ifndef OPENSSL_NO_CHACHA
  70. void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp,
  71. size_t len, const unsigned int key[8],
  72. const unsigned int counter[4]);
  73. void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp,
  74. size_t len, const unsigned int key[8],
  75. const unsigned int counter[4]);
  76. void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
  77. size_t len, const unsigned int key[8],
  78. const unsigned int counter[4])
  79. {
  80. OPENSSL_ppccap_P & PPC_ALTIVEC
  81. ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
  82. : ChaCha20_ctr32_int(out, inp, len, key, counter);
  83. }
  84. #endif
  85. #ifndef OPENSSL_NO_POLY1305
  86. void poly1305_init_int(void *ctx, const unsigned char key[16]);
  87. void poly1305_blocks(void *ctx, const unsigned char *inp, size_t len,
  88. unsigned int padbit);
  89. void poly1305_emit(void *ctx, unsigned char mac[16],
  90. const unsigned int nonce[4]);
  91. void poly1305_init_fpu(void *ctx, const unsigned char key[16]);
  92. void poly1305_blocks_fpu(void *ctx, const unsigned char *inp, size_t len,
  93. unsigned int padbit);
  94. void poly1305_emit_fpu(void *ctx, unsigned char mac[16],
  95. const unsigned int nonce[4]);
  96. int poly1305_init(void *ctx, const unsigned char key[16], void *func[2])
  97. {
  98. if (sizeof(size_t) == 4 && (OPENSSL_ppccap_P & PPC_FPU)) {
  99. poly1305_init_fpu(ctx, key);
  100. func[0] = poly1305_blocks_fpu;
  101. func[1] = poly1305_emit_fpu;
  102. } else {
  103. poly1305_init_int(ctx, key);
  104. func[0] = poly1305_blocks;
  105. func[1] = poly1305_emit;
  106. }
  107. return 1;
  108. }
  109. #endif
  110. #ifdef ECP_NISTZ256_ASM
  111. void ecp_nistz256_mul_mont(unsigned long res[4], const unsigned long a[4],
  112. const unsigned long b[4]);
  113. void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4]);
  114. void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4])
  115. {
  116. static const unsigned long RR[] = { 0x0000000000000003U,
  117. 0xfffffffbffffffffU,
  118. 0xfffffffffffffffeU,
  119. 0x00000004fffffffdU };
  120. ecp_nistz256_mul_mont(res, in, RR);
  121. }
  122. void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4]);
  123. void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4])
  124. {
  125. static const unsigned long one[] = { 1, 0, 0, 0 };
  126. ecp_nistz256_mul_mont(res, in, one);
  127. }
  128. #endif
  129. static sigjmp_buf ill_jmp;
  130. static void ill_handler(int sig)
  131. {
  132. siglongjmp(ill_jmp, sig);
  133. }
  134. void OPENSSL_fpu_probe(void);
  135. void OPENSSL_ppc64_probe(void);
  136. void OPENSSL_altivec_probe(void);
  137. void OPENSSL_crypto207_probe(void);
  138. void OPENSSL_madd300_probe(void);
  139. /*
  140. * Use a weak reference to getauxval() so we can use it if it is available
  141. * but don't break the build if it is not. Note that this is *link-time*
  142. * feature detection, not *run-time*. In other words if we link with
  143. * symbol present, it's expected to be present even at run-time.
  144. */
  145. #if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__)
  146. extern unsigned long getauxval(unsigned long type) __attribute__ ((weak));
  147. #else
  148. static unsigned long (*getauxval) (unsigned long) = NULL;
  149. #endif
  150. /* I wish <sys/auxv.h> was universally available */
  151. #define HWCAP 16 /* AT_HWCAP */
  152. #define HWCAP_PPC64 (1U << 30)
  153. #define HWCAP_ALTIVEC (1U << 28)
  154. #define HWCAP_FPU (1U << 27)
  155. #define HWCAP_POWER6_EXT (1U << 9)
  156. #define HWCAP_VSX (1U << 7)
  157. #define HWCAP2 26 /* AT_HWCAP2 */
  158. #define HWCAP_VEC_CRYPTO (1U << 25)
  159. #define HWCAP_ARCH_3_00 (1U << 23)
  160. # if defined(__GNUC__) && __GNUC__>=2
  161. __attribute__ ((constructor))
  162. # endif
  163. void OPENSSL_cpuid_setup(void)
  164. {
  165. char *e;
  166. struct sigaction ill_oact, ill_act;
  167. sigset_t oset;
  168. static int trigger = 0;
  169. if (trigger)
  170. return;
  171. trigger = 1;
  172. if ((e = getenv("OPENSSL_ppccap"))) {
  173. OPENSSL_ppccap_P = strtoul(e, NULL, 0);
  174. return;
  175. }
  176. OPENSSL_ppccap_P = 0;
  177. #if defined(_AIX)
  178. OPENSSL_ppccap_P |= PPC_FPU;
  179. if (sizeof(size_t) == 4) {
  180. struct utsname uts;
  181. # if defined(_SC_AIX_KERNEL_BITMODE)
  182. if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64)
  183. return;
  184. # endif
  185. if (uname(&uts) != 0 || atoi(uts.version) < 6)
  186. return;
  187. }
  188. # if defined(__power_set)
  189. /*
  190. * Value used in __power_set is a single-bit 1<<n one denoting
  191. * specific processor class. Incidentally 0xffffffff<<n can be
  192. * used to denote specific processor and its successors.
  193. */
  194. if (sizeof(size_t) == 4) {
  195. /* In 32-bit case PPC_FPU64 is always fastest [if option] */
  196. if (__power_set(0xffffffffU<<13)) /* POWER5 and later */
  197. OPENSSL_ppccap_P |= PPC_FPU64;
  198. } else {
  199. /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
  200. if (__power_set(0x1U<<14)) /* POWER6 */
  201. OPENSSL_ppccap_P |= PPC_FPU64;
  202. }
  203. if (__power_set(0xffffffffU<<14)) /* POWER6 and later */
  204. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  205. if (__power_set(0xffffffffU<<16)) /* POWER8 and later */
  206. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  207. if (__power_set(0xffffffffU<<17)) /* POWER9 and later */
  208. OPENSSL_ppccap_P |= PPC_MADD300;
  209. return;
  210. # endif
  211. #endif
  212. #if defined(__APPLE__) && defined(__MACH__)
  213. OPENSSL_ppccap_P |= PPC_FPU;
  214. {
  215. int val;
  216. size_t len = sizeof(val);
  217. if (sysctlbyname("hw.optional.64bitops", &val, &len, NULL, 0) == 0) {
  218. if (val)
  219. OPENSSL_ppccap_P |= PPC_FPU64;
  220. }
  221. len = sizeof(val);
  222. if (sysctlbyname("hw.optional.altivec", &val, &len, NULL, 0) == 0) {
  223. if (val)
  224. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  225. }
  226. return;
  227. }
  228. #endif
  229. if (getauxval != NULL) {
  230. unsigned long hwcap = getauxval(HWCAP);
  231. if (hwcap & HWCAP_FPU) {
  232. OPENSSL_ppccap_P |= PPC_FPU;
  233. if (sizeof(size_t) == 4) {
  234. /* In 32-bit case PPC_FPU64 is always fastest [if option] */
  235. if (hwcap & HWCAP_PPC64)
  236. OPENSSL_ppccap_P |= PPC_FPU64;
  237. } else {
  238. /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
  239. if (hwcap & HWCAP_POWER6_EXT)
  240. OPENSSL_ppccap_P |= PPC_FPU64;
  241. }
  242. }
  243. if (hwcap & HWCAP_ALTIVEC) {
  244. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  245. if ((hwcap & HWCAP_VSX) && (getauxval(HWCAP2) & HWCAP_VEC_CRYPTO))
  246. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  247. }
  248. if (hwcap & HWCAP_ARCH_3_00) {
  249. OPENSSL_ppccap_P |= PPC_MADD300;
  250. }
  251. return;
  252. }
  253. sigfillset(&all_masked);
  254. sigdelset(&all_masked, SIGILL);
  255. sigdelset(&all_masked, SIGTRAP);
  256. #ifdef SIGEMT
  257. sigdelset(&all_masked, SIGEMT);
  258. #endif
  259. sigdelset(&all_masked, SIGFPE);
  260. sigdelset(&all_masked, SIGBUS);
  261. sigdelset(&all_masked, SIGSEGV);
  262. memset(&ill_act, 0, sizeof(ill_act));
  263. ill_act.sa_handler = ill_handler;
  264. ill_act.sa_mask = all_masked;
  265. sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
  266. sigaction(SIGILL, &ill_act, &ill_oact);
  267. if (sigsetjmp(ill_jmp,1) == 0) {
  268. OPENSSL_fpu_probe();
  269. OPENSSL_ppccap_P |= PPC_FPU;
  270. if (sizeof(size_t) == 4) {
  271. #ifdef __linux
  272. struct utsname uts;
  273. if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0)
  274. #endif
  275. if (sigsetjmp(ill_jmp, 1) == 0) {
  276. OPENSSL_ppc64_probe();
  277. OPENSSL_ppccap_P |= PPC_FPU64;
  278. }
  279. } else {
  280. /*
  281. * Wanted code detecting POWER6 CPU and setting PPC_FPU64
  282. */
  283. }
  284. }
  285. if (sigsetjmp(ill_jmp, 1) == 0) {
  286. OPENSSL_altivec_probe();
  287. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  288. if (sigsetjmp(ill_jmp, 1) == 0) {
  289. OPENSSL_crypto207_probe();
  290. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  291. }
  292. }
  293. if (sigsetjmp(ill_jmp, 1) == 0) {
  294. OPENSSL_madd300_probe();
  295. OPENSSL_ppccap_P |= PPC_MADD300;
  296. }
  297. sigaction(SIGILL, &ill_oact, NULL);
  298. sigprocmask(SIG_SETMASK, &oset, NULL);
  299. }