ppccap.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /*
  2. * Copyright 2009-2021 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <setjmp.h>
  13. #include <signal.h>
  14. #include <unistd.h>
  15. #if defined(__linux) || defined(_AIX)
  16. # include <sys/utsname.h>
  17. #endif
  18. #if defined(_AIX53) /* defined even on post-5.3 */
  19. # include <sys/systemcfg.h>
  20. # if !defined(__power_set)
  21. # define __power_set(a) (_system_configuration.implementation & (a))
  22. # endif
  23. #endif
  24. #if defined(__APPLE__) && defined(__MACH__)
  25. # include <sys/types.h>
  26. # include <sys/sysctl.h>
  27. #endif
  28. #include <openssl/crypto.h>
  29. #include <openssl/bn.h>
  30. #include "internal/cryptlib.h"
  31. #include "crypto/chacha.h"
  32. #include "bn/bn_local.h"
  33. #include "ppc_arch.h"
  34. unsigned int OPENSSL_ppccap_P = 0;
  35. static sigset_t all_masked;
  36. #ifdef OPENSSL_BN_ASM_MONT
  37. int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  38. const BN_ULONG *np, const BN_ULONG *n0, int num)
  39. {
  40. int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  41. const BN_ULONG *np, const BN_ULONG *n0, int num);
  42. int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
  43. const BN_ULONG *np, const BN_ULONG *n0, int num);
  44. int bn_mul_mont_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
  45. const BN_ULONG *bp, const BN_ULONG *np,
  46. const BN_ULONG *n0, int num);
  47. int bn_mul_mont_300_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
  48. const BN_ULONG *bp, const BN_ULONG *np,
  49. const BN_ULONG *n0, int num);
  50. if (num < 4)
  51. return 0;
  52. if ((num & 3) == 0)
  53. return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
  54. /*
  55. * There used to be [optional] call to bn_mul_mont_fpu64 here,
  56. * but above subroutine is faster on contemporary processors.
  57. * Formulation means that there might be old processors where
  58. * FPU code path would be faster, POWER6 perhaps, but there was
  59. * no opportunity to figure it out...
  60. */
  61. if (num == 6) {
  62. if (OPENSSL_ppccap_P & PPC_MADD300)
  63. return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num);
  64. else
  65. return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num);
  66. }
  67. return bn_mul_mont_int(rp, ap, bp, np, n0, num);
  68. }
  69. #endif
  70. void sha256_block_p8(void *ctx, const void *inp, size_t len);
  71. void sha256_block_ppc(void *ctx, const void *inp, size_t len);
  72. void sha256_block_data_order(void *ctx, const void *inp, size_t len);
  73. void sha256_block_data_order(void *ctx, const void *inp, size_t len)
  74. {
  75. OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) :
  76. sha256_block_ppc(ctx, inp, len);
  77. }
  78. void sha512_block_p8(void *ctx, const void *inp, size_t len);
  79. void sha512_block_ppc(void *ctx, const void *inp, size_t len);
  80. void sha512_block_data_order(void *ctx, const void *inp, size_t len);
  81. void sha512_block_data_order(void *ctx, const void *inp, size_t len)
  82. {
  83. OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) :
  84. sha512_block_ppc(ctx, inp, len);
  85. }
  86. #ifndef FIPS_MODULE
  87. # ifndef OPENSSL_NO_CHACHA
  88. void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp,
  89. size_t len, const unsigned int key[8],
  90. const unsigned int counter[4]);
  91. void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp,
  92. size_t len, const unsigned int key[8],
  93. const unsigned int counter[4]);
  94. void ChaCha20_ctr32_vsx(unsigned char *out, const unsigned char *inp,
  95. size_t len, const unsigned int key[8],
  96. const unsigned int counter[4]);
  97. void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
  98. size_t len, const unsigned int key[8],
  99. const unsigned int counter[4])
  100. {
  101. OPENSSL_ppccap_P & PPC_CRYPTO207
  102. ? ChaCha20_ctr32_vsx(out, inp, len, key, counter)
  103. : OPENSSL_ppccap_P & PPC_ALTIVEC
  104. ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
  105. : ChaCha20_ctr32_int(out, inp, len, key, counter);
  106. }
  107. # endif
  108. # ifndef OPENSSL_NO_POLY1305
  109. void poly1305_init_int(void *ctx, const unsigned char key[16]);
  110. void poly1305_blocks(void *ctx, const unsigned char *inp, size_t len,
  111. unsigned int padbit);
  112. void poly1305_emit(void *ctx, unsigned char mac[16],
  113. const unsigned int nonce[4]);
  114. void poly1305_init_fpu(void *ctx, const unsigned char key[16]);
  115. void poly1305_blocks_fpu(void *ctx, const unsigned char *inp, size_t len,
  116. unsigned int padbit);
  117. void poly1305_emit_fpu(void *ctx, unsigned char mac[16],
  118. const unsigned int nonce[4]);
  119. void poly1305_init_vsx(void *ctx, const unsigned char key[16]);
  120. void poly1305_blocks_vsx(void *ctx, const unsigned char *inp, size_t len,
  121. unsigned int padbit);
  122. void poly1305_emit_vsx(void *ctx, unsigned char mac[16],
  123. const unsigned int nonce[4]);
  124. int poly1305_init(void *ctx, const unsigned char key[16], void *func[2]);
  125. int poly1305_init(void *ctx, const unsigned char key[16], void *func[2])
  126. {
  127. if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
  128. poly1305_init_int(ctx, key);
  129. func[0] = (void*)(uintptr_t)poly1305_blocks_vsx;
  130. func[1] = (void*)(uintptr_t)poly1305_emit;
  131. } else if (sizeof(size_t) == 4 && (OPENSSL_ppccap_P & PPC_FPU)) {
  132. poly1305_init_fpu(ctx, key);
  133. func[0] = (void*)(uintptr_t)poly1305_blocks_fpu;
  134. func[1] = (void*)(uintptr_t)poly1305_emit_fpu;
  135. } else {
  136. poly1305_init_int(ctx, key);
  137. func[0] = (void*)(uintptr_t)poly1305_blocks;
  138. func[1] = (void*)(uintptr_t)poly1305_emit;
  139. }
  140. return 1;
  141. }
  142. # endif
  143. #endif /* FIPS_MODULE */
  144. #ifdef ECP_NISTZ256_ASM
  145. void ecp_nistz256_mul_mont(unsigned long res[4], const unsigned long a[4],
  146. const unsigned long b[4]);
  147. void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4]);
  148. void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4])
  149. {
  150. static const unsigned long RR[] = { 0x0000000000000003U,
  151. 0xfffffffbffffffffU,
  152. 0xfffffffffffffffeU,
  153. 0x00000004fffffffdU };
  154. ecp_nistz256_mul_mont(res, in, RR);
  155. }
  156. void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4]);
  157. void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4])
  158. {
  159. static const unsigned long one[] = { 1, 0, 0, 0 };
  160. ecp_nistz256_mul_mont(res, in, one);
  161. }
  162. #endif
  163. static sigjmp_buf ill_jmp;
  164. static void ill_handler(int sig)
  165. {
  166. siglongjmp(ill_jmp, sig);
  167. }
  168. void OPENSSL_fpu_probe(void);
  169. void OPENSSL_ppc64_probe(void);
  170. void OPENSSL_altivec_probe(void);
  171. void OPENSSL_crypto207_probe(void);
  172. void OPENSSL_madd300_probe(void);
  173. long OPENSSL_rdtsc_mftb(void);
  174. long OPENSSL_rdtsc_mfspr268(void);
  175. uint32_t OPENSSL_rdtsc(void)
  176. {
  177. if (OPENSSL_ppccap_P & PPC_MFTB)
  178. return OPENSSL_rdtsc_mftb();
  179. else if (OPENSSL_ppccap_P & PPC_MFSPR268)
  180. return OPENSSL_rdtsc_mfspr268();
  181. else
  182. return 0;
  183. }
  184. size_t OPENSSL_instrument_bus_mftb(unsigned int *, size_t);
  185. size_t OPENSSL_instrument_bus_mfspr268(unsigned int *, size_t);
  186. size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt)
  187. {
  188. if (OPENSSL_ppccap_P & PPC_MFTB)
  189. return OPENSSL_instrument_bus_mftb(out, cnt);
  190. else if (OPENSSL_ppccap_P & PPC_MFSPR268)
  191. return OPENSSL_instrument_bus_mfspr268(out, cnt);
  192. else
  193. return 0;
  194. }
  195. size_t OPENSSL_instrument_bus2_mftb(unsigned int *, size_t, size_t);
  196. size_t OPENSSL_instrument_bus2_mfspr268(unsigned int *, size_t, size_t);
  197. size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max)
  198. {
  199. if (OPENSSL_ppccap_P & PPC_MFTB)
  200. return OPENSSL_instrument_bus2_mftb(out, cnt, max);
  201. else if (OPENSSL_ppccap_P & PPC_MFSPR268)
  202. return OPENSSL_instrument_bus2_mfspr268(out, cnt, max);
  203. else
  204. return 0;
  205. }
  206. #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
  207. # if __GLIBC_PREREQ(2, 16)
  208. # include <sys/auxv.h>
  209. # define OSSL_IMPLEMENT_GETAUXVAL
  210. # endif
  211. #endif
  212. #if defined(__FreeBSD__)
  213. # include <sys/param.h>
  214. # if __FreeBSD_version >= 1200000
  215. # include <sys/auxv.h>
  216. # define OSSL_IMPLEMENT_GETAUXVAL
  217. static unsigned long getauxval(unsigned long key)
  218. {
  219. unsigned long val = 0ul;
  220. if (elf_aux_info((int)key, &val, sizeof(val)) != 0)
  221. return 0ul;
  222. return val;
  223. }
  224. # endif
  225. #endif
  226. /* I wish <sys/auxv.h> was universally available */
  227. #define HWCAP 16 /* AT_HWCAP */
  228. #define HWCAP_PPC64 (1U << 30)
  229. #define HWCAP_ALTIVEC (1U << 28)
  230. #define HWCAP_FPU (1U << 27)
  231. #define HWCAP_POWER6_EXT (1U << 9)
  232. #define HWCAP_VSX (1U << 7)
  233. #define HWCAP2 26 /* AT_HWCAP2 */
  234. #define HWCAP_VEC_CRYPTO (1U << 25)
  235. #define HWCAP_ARCH_3_00 (1U << 23)
  236. # if defined(__GNUC__) && __GNUC__>=2
  237. __attribute__ ((constructor))
  238. # endif
  239. void OPENSSL_cpuid_setup(void)
  240. {
  241. char *e;
  242. struct sigaction ill_oact, ill_act;
  243. sigset_t oset;
  244. static int trigger = 0;
  245. if (trigger)
  246. return;
  247. trigger = 1;
  248. if ((e = getenv("OPENSSL_ppccap"))) {
  249. OPENSSL_ppccap_P = strtoul(e, NULL, 0);
  250. return;
  251. }
  252. OPENSSL_ppccap_P = 0;
  253. #if defined(_AIX)
  254. OPENSSL_ppccap_P |= PPC_FPU;
  255. if (sizeof(size_t) == 4) {
  256. struct utsname uts;
  257. # if defined(_SC_AIX_KERNEL_BITMODE)
  258. if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64)
  259. return;
  260. # endif
  261. if (uname(&uts) != 0 || atoi(uts.version) < 6)
  262. return;
  263. }
  264. # if defined(__power_set)
  265. /*
  266. * Value used in __power_set is a single-bit 1<<n one denoting
  267. * specific processor class. Incidentally 0xffffffff<<n can be
  268. * used to denote specific processor and its successors.
  269. */
  270. if (sizeof(size_t) == 4) {
  271. /* In 32-bit case PPC_FPU64 is always fastest [if option] */
  272. if (__power_set(0xffffffffU<<13)) /* POWER5 and later */
  273. OPENSSL_ppccap_P |= PPC_FPU64;
  274. } else {
  275. /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
  276. if (__power_set(0x1U<<14)) /* POWER6 */
  277. OPENSSL_ppccap_P |= PPC_FPU64;
  278. }
  279. if (__power_set(0xffffffffU<<14)) /* POWER6 and later */
  280. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  281. if (__power_set(0xffffffffU<<16)) /* POWER8 and later */
  282. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  283. if (__power_set(0xffffffffU<<17)) /* POWER9 and later */
  284. OPENSSL_ppccap_P |= PPC_MADD300;
  285. return;
  286. # endif
  287. #endif
  288. #if defined(__APPLE__) && defined(__MACH__)
  289. OPENSSL_ppccap_P |= PPC_FPU;
  290. {
  291. int val;
  292. size_t len = sizeof(val);
  293. if (sysctlbyname("hw.optional.64bitops", &val, &len, NULL, 0) == 0) {
  294. if (val)
  295. OPENSSL_ppccap_P |= PPC_FPU64;
  296. }
  297. len = sizeof(val);
  298. if (sysctlbyname("hw.optional.altivec", &val, &len, NULL, 0) == 0) {
  299. if (val)
  300. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  301. }
  302. return;
  303. }
  304. #endif
  305. #ifdef OSSL_IMPLEMENT_GETAUXVAL
  306. {
  307. unsigned long hwcap = getauxval(HWCAP);
  308. unsigned long hwcap2 = getauxval(HWCAP2);
  309. if (hwcap & HWCAP_FPU) {
  310. OPENSSL_ppccap_P |= PPC_FPU;
  311. if (sizeof(size_t) == 4) {
  312. /* In 32-bit case PPC_FPU64 is always fastest [if option] */
  313. if (hwcap & HWCAP_PPC64)
  314. OPENSSL_ppccap_P |= PPC_FPU64;
  315. } else {
  316. /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
  317. if (hwcap & HWCAP_POWER6_EXT)
  318. OPENSSL_ppccap_P |= PPC_FPU64;
  319. }
  320. }
  321. if (hwcap & HWCAP_ALTIVEC) {
  322. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  323. if ((hwcap & HWCAP_VSX) && (hwcap2 & HWCAP_VEC_CRYPTO))
  324. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  325. }
  326. if (hwcap2 & HWCAP_ARCH_3_00) {
  327. OPENSSL_ppccap_P |= PPC_MADD300;
  328. }
  329. }
  330. #endif
  331. sigfillset(&all_masked);
  332. sigdelset(&all_masked, SIGILL);
  333. sigdelset(&all_masked, SIGTRAP);
  334. #ifdef SIGEMT
  335. sigdelset(&all_masked, SIGEMT);
  336. #endif
  337. sigdelset(&all_masked, SIGFPE);
  338. sigdelset(&all_masked, SIGBUS);
  339. sigdelset(&all_masked, SIGSEGV);
  340. memset(&ill_act, 0, sizeof(ill_act));
  341. ill_act.sa_handler = ill_handler;
  342. ill_act.sa_mask = all_masked;
  343. sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
  344. sigaction(SIGILL, &ill_act, &ill_oact);
  345. #ifndef OSSL_IMPLEMENT_GETAUXVAL
  346. if (sigsetjmp(ill_jmp,1) == 0) {
  347. OPENSSL_fpu_probe();
  348. OPENSSL_ppccap_P |= PPC_FPU;
  349. if (sizeof(size_t) == 4) {
  350. # ifdef __linux
  351. struct utsname uts;
  352. if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0)
  353. # endif
  354. if (sigsetjmp(ill_jmp, 1) == 0) {
  355. OPENSSL_ppc64_probe();
  356. OPENSSL_ppccap_P |= PPC_FPU64;
  357. }
  358. } else {
  359. /*
  360. * Wanted code detecting POWER6 CPU and setting PPC_FPU64
  361. */
  362. }
  363. }
  364. if (sigsetjmp(ill_jmp, 1) == 0) {
  365. OPENSSL_altivec_probe();
  366. OPENSSL_ppccap_P |= PPC_ALTIVEC;
  367. if (sigsetjmp(ill_jmp, 1) == 0) {
  368. OPENSSL_crypto207_probe();
  369. OPENSSL_ppccap_P |= PPC_CRYPTO207;
  370. }
  371. }
  372. if (sigsetjmp(ill_jmp, 1) == 0) {
  373. OPENSSL_madd300_probe();
  374. OPENSSL_ppccap_P |= PPC_MADD300;
  375. }
  376. #endif
  377. if (sigsetjmp(ill_jmp, 1) == 0) {
  378. OPENSSL_rdtsc_mftb();
  379. OPENSSL_ppccap_P |= PPC_MFTB;
  380. } else if (sigsetjmp(ill_jmp, 1) == 0) {
  381. OPENSSL_rdtsc_mfspr268();
  382. OPENSSL_ppccap_P |= PPC_MFSPR268;
  383. }
  384. sigaction(SIGILL, &ill_oact, NULL);
  385. sigprocmask(SIG_SETMASK, &oset, NULL);
  386. }