threads_pthread.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. /*
  2. * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. /* We need to use the OPENSSL_fork_*() deprecated APIs */
  10. #define OPENSSL_SUPPRESS_DEPRECATED
  11. #include <openssl/crypto.h>
  12. #include <crypto/cryptlib.h>
  13. #include "internal/cryptlib.h"
  14. #include "internal/rcu.h"
  15. #include "rcu_internal.h"
  16. #if defined(__sun)
  17. # include <atomic.h>
  18. #endif
  19. #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
  20. /*
  21. * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
  22. * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
  23. * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
  24. * All of this makes impossible to use __atomic_is_lock_free here.
  25. *
  26. * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
  27. */
  28. # define BROKEN_CLANG_ATOMICS
  29. #endif
  30. #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
  31. # if defined(OPENSSL_SYS_UNIX)
  32. # include <sys/types.h>
  33. # include <unistd.h>
  34. # endif
  35. # include <assert.h>
  36. # ifdef PTHREAD_RWLOCK_INITIALIZER
  37. # define USE_RWLOCK
  38. # endif
  39. /*
  40. * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
  41. * other compilers.
  42. * Unfortunately, we can't do that with some "generic type", because there's no
  43. * guarantee that the chosen generic type is large enough to cover all cases.
  44. * Therefore, we implement fallbacks for each applicable type, with composed
  45. * names that include the type they handle.
  46. *
  47. * (an anecdote: we previously tried to use |void *| as the generic type, with
  48. * the thought that the pointer itself is the largest type. However, this is
  49. * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
  50. *
  51. * All applicable ATOMIC_ macros take the intended type as first parameter, so
  52. * they can map to the correct fallback function. In the GNU/clang case, that
  53. * parameter is simply ignored.
  54. */
  55. /*
  56. * Internal types used with the ATOMIC_ macros, to make it possible to compose
  57. * fallback function names.
  58. */
  59. typedef void *pvoid;
  60. typedef struct rcu_cb_item *prcu_cb_item;
  61. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
  62. && !defined(USE_ATOMIC_FALLBACKS)
  63. # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__)
  64. /*
  65. * For pointers, Apple M1 virtualized cpu seems to have some problem using the
  66. * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
  67. * When using the native apple clang compiler, this instruction is emitted for
  68. * atomic loads, which is bad. So, if
  69. * 1) We are building on a target that defines __APPLE__ AND
  70. * 2) We are building on a target using clang (__clang__) AND
  71. * 3) We are building for an M1 processor (__aarch64__)
  72. * Then we shold not use __atomic_load_n and instead implement our own
  73. * function to issue the ldar instruction instead, which procuces the proper
  74. * sequencing guarantees
  75. */
  76. static inline void *apple_atomic_load_n_pvoid(void **p,
  77. ossl_unused int memorder)
  78. {
  79. void *ret;
  80. __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
  81. return ret;
  82. }
  83. /* For uint64_t, we should be fine, though */
  84. # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
  85. # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
  86. # else
  87. # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
  88. # endif
  89. # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
  90. # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
  91. # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
  92. # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
  93. # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
  94. # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
  95. # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
  96. # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
  97. # else
  98. static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
  99. # define IMPL_fallback_atomic_load_n(t) \
  100. static ossl_inline t fallback_atomic_load_n_##t(t *p) \
  101. { \
  102. t ret; \
  103. \
  104. pthread_mutex_lock(&atomic_sim_lock); \
  105. ret = *p; \
  106. pthread_mutex_unlock(&atomic_sim_lock); \
  107. return ret; \
  108. }
  109. IMPL_fallback_atomic_load_n(uint64_t)
  110. IMPL_fallback_atomic_load_n(pvoid)
  111. # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
  112. # define IMPL_fallback_atomic_store_n(t) \
  113. static ossl_inline t fallback_atomic_store_n_##t(t *p, t v) \
  114. { \
  115. t ret; \
  116. \
  117. pthread_mutex_lock(&atomic_sim_lock); \
  118. ret = *p; \
  119. *p = v; \
  120. pthread_mutex_unlock(&atomic_sim_lock); \
  121. return ret; \
  122. }
  123. IMPL_fallback_atomic_store_n(uint64_t)
  124. # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
  125. # define IMPL_fallback_atomic_store(t) \
  126. static ossl_inline void fallback_atomic_store_##t(t *p, t *v) \
  127. { \
  128. pthread_mutex_lock(&atomic_sim_lock); \
  129. *p = *v; \
  130. pthread_mutex_unlock(&atomic_sim_lock); \
  131. }
  132. IMPL_fallback_atomic_store(uint64_t)
  133. IMPL_fallback_atomic_store(pvoid)
  134. # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
  135. # define IMPL_fallback_atomic_exchange_n(t) \
  136. static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v) \
  137. { \
  138. t ret; \
  139. \
  140. pthread_mutex_lock(&atomic_sim_lock); \
  141. ret = *p; \
  142. *p = v; \
  143. pthread_mutex_unlock(&atomic_sim_lock); \
  144. return ret; \
  145. }
  146. IMPL_fallback_atomic_exchange_n(uint64_t)
  147. IMPL_fallback_atomic_exchange_n(prcu_cb_item)
  148. # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
  149. /*
  150. * The fallbacks that follow don't need any per type implementation, as
  151. * they are designed for uint64_t only. If there comes a time when multiple
  152. * types need to be covered, it's relatively easy to refactor them the same
  153. * way as the fallbacks above.
  154. */
  155. static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
  156. {
  157. uint64_t ret;
  158. pthread_mutex_lock(&atomic_sim_lock);
  159. *p += v;
  160. ret = *p;
  161. pthread_mutex_unlock(&atomic_sim_lock);
  162. return ret;
  163. }
  164. # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
  165. static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
  166. {
  167. uint64_t ret;
  168. pthread_mutex_lock(&atomic_sim_lock);
  169. ret = *p;
  170. *p += v;
  171. pthread_mutex_unlock(&atomic_sim_lock);
  172. return ret;
  173. }
  174. # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
  175. static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
  176. {
  177. uint64_t ret;
  178. pthread_mutex_lock(&atomic_sim_lock);
  179. *p -= v;
  180. ret = *p;
  181. pthread_mutex_unlock(&atomic_sim_lock);
  182. return ret;
  183. }
  184. # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
  185. static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
  186. {
  187. uint64_t ret;
  188. pthread_mutex_lock(&atomic_sim_lock);
  189. *p &= m;
  190. ret = *p;
  191. pthread_mutex_unlock(&atomic_sim_lock);
  192. return ret;
  193. }
  194. # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
  195. static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
  196. {
  197. uint64_t ret;
  198. pthread_mutex_lock(&atomic_sim_lock);
  199. *p |= m;
  200. ret = *p;
  201. pthread_mutex_unlock(&atomic_sim_lock);
  202. return ret;
  203. }
  204. # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
  205. # endif
  206. static CRYPTO_THREAD_LOCAL rcu_thr_key;
  207. /*
  208. * users is broken up into 2 parts
  209. * bits 0-15 current readers
  210. * bit 32-63 - ID
  211. */
  212. # define READER_SHIFT 0
  213. # define ID_SHIFT 32
  214. # define READER_SIZE 16
  215. # define ID_SIZE 32
  216. # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
  217. # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
  218. # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK)
  219. # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK)
  220. # define VAL_READER ((uint64_t)1 << READER_SHIFT)
  221. # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
  222. /*
  223. * This is the core of an rcu lock. It tracks the readers and writers for the
  224. * current quiescence point for a given lock. Users is the 64 bit value that
  225. * stores the READERS/ID as defined above
  226. *
  227. */
  228. struct rcu_qp {
  229. uint64_t users;
  230. };
  231. struct thread_qp {
  232. struct rcu_qp *qp;
  233. unsigned int depth;
  234. CRYPTO_RCU_LOCK *lock;
  235. };
  236. # define MAX_QPS 10
  237. /*
  238. * This is the per thread tracking data
  239. * that is assigned to each thread participating
  240. * in an rcu qp
  241. *
  242. * qp points to the qp that it last acquired
  243. *
  244. */
  245. struct rcu_thr_data {
  246. struct thread_qp thread_qps[MAX_QPS];
  247. };
  248. /*
  249. * This is the internal version of a CRYPTO_RCU_LOCK
  250. * it is cast from CRYPTO_RCU_LOCK
  251. */
  252. struct rcu_lock_st {
  253. /* Callbacks to call for next ossl_synchronize_rcu */
  254. struct rcu_cb_item *cb_items;
  255. /* rcu generation counter for in-order retirement */
  256. uint32_t id_ctr;
  257. /* Array of quiescent points for synchronization */
  258. struct rcu_qp *qp_group;
  259. /* Number of elements in qp_group array */
  260. size_t group_count;
  261. /* Index of the current qp in the qp_group array */
  262. uint64_t reader_idx;
  263. /* value of the next id_ctr value to be retired */
  264. uint32_t next_to_retire;
  265. /* index of the next free rcu_qp in the qp_group */
  266. uint64_t current_alloc_idx;
  267. /* number of qp's in qp_group array currently being retired */
  268. uint32_t writers_alloced;
  269. /* lock protecting write side operations */
  270. pthread_mutex_t write_lock;
  271. /* lock protecting updates to writers_alloced/current_alloc_idx */
  272. pthread_mutex_t alloc_lock;
  273. /* signal to wake threads waiting on alloc_lock */
  274. pthread_cond_t alloc_signal;
  275. /* lock to enforce in-order retirement */
  276. pthread_mutex_t prior_lock;
  277. /* signal to wake threads waiting on prior_lock */
  278. pthread_cond_t prior_signal;
  279. };
  280. /*
  281. * Called on thread exit to free the pthread key
  282. * associated with this thread, if any
  283. */
  284. static void free_rcu_thr_data(void *ptr)
  285. {
  286. struct rcu_thr_data *data =
  287. (struct rcu_thr_data *)CRYPTO_THREAD_get_local(&rcu_thr_key);
  288. OPENSSL_free(data);
  289. CRYPTO_THREAD_set_local(&rcu_thr_key, NULL);
  290. }
  291. static void ossl_rcu_init(void)
  292. {
  293. CRYPTO_THREAD_init_local(&rcu_thr_key, NULL);
  294. }
  295. /* Read side acquisition of the current qp */
  296. static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
  297. {
  298. uint64_t qp_idx;
  299. /* get the current qp index */
  300. for (;;) {
  301. /*
  302. * Notes on use of __ATOMIC_ACQUIRE
  303. * We need to ensure the following:
  304. * 1) That subsequent operations aren't optimized by hoisting them above
  305. * this operation. Specifically, we don't want the below re-load of
  306. * qp_idx to get optimized away
  307. * 2) We want to ensure that any updating of reader_idx on the write side
  308. * of the lock is flushed from a local cpu cache so that we see any
  309. * updates prior to the load. This is a non-issue on cache coherent
  310. * systems like x86, but is relevant on other arches
  311. * Note: This applies to the reload below as well
  312. */
  313. qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
  314. /*
  315. * Notes of use of __ATOMIC_RELEASE
  316. * This counter is only read by the write side of the lock, and so we
  317. * specify __ATOMIC_RELEASE here to ensure that the write side of the
  318. * lock see this during the spin loop read of users, as it waits for the
  319. * reader count to approach zero
  320. */
  321. ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  322. __ATOMIC_RELEASE);
  323. /* if the idx hasn't changed, we're good, else try again */
  324. if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE))
  325. break;
  326. /*
  327. * Notes on use of __ATOMIC_RELEASE
  328. * As with the add above, we want to ensure that this decrement is
  329. * seen by the write side of the lock as soon as it happens to prevent
  330. * undue spinning waiting for write side completion
  331. */
  332. ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  333. __ATOMIC_RELEASE);
  334. }
  335. return &lock->qp_group[qp_idx];
  336. }
  337. void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
  338. {
  339. struct rcu_thr_data *data;
  340. int i, available_qp = -1;
  341. /*
  342. * we're going to access current_qp here so ask the
  343. * processor to fetch it
  344. */
  345. data = CRYPTO_THREAD_get_local(&rcu_thr_key);
  346. if (data == NULL) {
  347. data = OPENSSL_zalloc(sizeof(*data));
  348. OPENSSL_assert(data != NULL);
  349. CRYPTO_THREAD_set_local(&rcu_thr_key, data);
  350. ossl_init_thread_start(NULL, NULL, free_rcu_thr_data);
  351. }
  352. for (i = 0; i < MAX_QPS; i++) {
  353. if (data->thread_qps[i].qp == NULL && available_qp == -1)
  354. available_qp = i;
  355. /* If we have a hold on this lock already, we're good */
  356. if (data->thread_qps[i].lock == lock) {
  357. data->thread_qps[i].depth++;
  358. return;
  359. }
  360. }
  361. /*
  362. * if we get here, then we don't have a hold on this lock yet
  363. */
  364. assert(available_qp != -1);
  365. data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
  366. data->thread_qps[available_qp].depth = 1;
  367. data->thread_qps[available_qp].lock = lock;
  368. }
  369. void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
  370. {
  371. int i;
  372. struct rcu_thr_data *data = CRYPTO_THREAD_get_local(&rcu_thr_key);
  373. uint64_t ret;
  374. assert(data != NULL);
  375. for (i = 0; i < MAX_QPS; i++) {
  376. if (data->thread_qps[i].lock == lock) {
  377. /*
  378. * As with read side acquisition, we use __ATOMIC_RELEASE here
  379. * to ensure that the decrement is published immediately
  380. * to any write side waiters
  381. */
  382. data->thread_qps[i].depth--;
  383. if (data->thread_qps[i].depth == 0) {
  384. ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER,
  385. __ATOMIC_RELEASE);
  386. OPENSSL_assert(ret != UINT64_MAX);
  387. data->thread_qps[i].qp = NULL;
  388. data->thread_qps[i].lock = NULL;
  389. }
  390. return;
  391. }
  392. }
  393. /*
  394. * If we get here, we're trying to unlock a lock that we never acquired -
  395. * that's fatal.
  396. */
  397. assert(0);
  398. }
  399. /*
  400. * Write side allocation routine to get the current qp
  401. * and replace it with a new one
  402. */
  403. static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
  404. {
  405. uint64_t new_id;
  406. uint64_t current_idx;
  407. pthread_mutex_lock(&lock->alloc_lock);
  408. /*
  409. * we need at least one qp to be available with one
  410. * left over, so that readers can start working on
  411. * one that isn't yet being waited on
  412. */
  413. while (lock->group_count - lock->writers_alloced < 2)
  414. /* we have to wait for one to be free */
  415. pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
  416. current_idx = lock->current_alloc_idx;
  417. /* Allocate the qp */
  418. lock->writers_alloced++;
  419. /* increment the allocation index */
  420. lock->current_alloc_idx =
  421. (lock->current_alloc_idx + 1) % lock->group_count;
  422. /* get and insert a new id */
  423. new_id = lock->id_ctr;
  424. lock->id_ctr++;
  425. new_id = VAL_ID(new_id);
  426. /*
  427. * Even though we are under a write side lock here
  428. * We need to use atomic instructions to ensure that the results
  429. * of this update are published to the read side prior to updating the
  430. * reader idx below
  431. */
  432. ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
  433. __ATOMIC_RELEASE);
  434. ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
  435. __ATOMIC_RELEASE);
  436. /*
  437. * Update the reader index to be the prior qp.
  438. * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
  439. * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
  440. * of this value to be seen on the read side immediately after it happens
  441. */
  442. ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx,
  443. __ATOMIC_RELEASE);
  444. /* wake up any waiters */
  445. pthread_cond_signal(&lock->alloc_signal);
  446. pthread_mutex_unlock(&lock->alloc_lock);
  447. return &lock->qp_group[current_idx];
  448. }
  449. static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
  450. {
  451. pthread_mutex_lock(&lock->alloc_lock);
  452. lock->writers_alloced--;
  453. pthread_cond_signal(&lock->alloc_signal);
  454. pthread_mutex_unlock(&lock->alloc_lock);
  455. }
  456. static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
  457. int count)
  458. {
  459. struct rcu_qp *new =
  460. OPENSSL_zalloc(sizeof(*new) * count);
  461. lock->group_count = count;
  462. return new;
  463. }
  464. void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
  465. {
  466. pthread_mutex_lock(&lock->write_lock);
  467. }
  468. void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
  469. {
  470. pthread_mutex_unlock(&lock->write_lock);
  471. }
  472. void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
  473. {
  474. struct rcu_qp *qp;
  475. uint64_t count;
  476. struct rcu_cb_item *cb_items, *tmpcb;
  477. /*
  478. * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published
  479. * writes before we read, and publish our write immediately
  480. */
  481. cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL,
  482. __ATOMIC_ACQ_REL);
  483. qp = update_qp(lock);
  484. /*
  485. * wait for the reader count to reach zero
  486. * Note the use of __ATOMIC_ACQUIRE here to ensure that any
  487. * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
  488. * is visible prior to our read
  489. */
  490. do {
  491. count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
  492. } while (READER_COUNT(count) != 0);
  493. /* retire in order */
  494. pthread_mutex_lock(&lock->prior_lock);
  495. while (lock->next_to_retire != ID_VAL(count))
  496. pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
  497. lock->next_to_retire++;
  498. pthread_cond_broadcast(&lock->prior_signal);
  499. pthread_mutex_unlock(&lock->prior_lock);
  500. retire_qp(lock, qp);
  501. /* handle any callbacks that we have */
  502. while (cb_items != NULL) {
  503. tmpcb = cb_items;
  504. cb_items = cb_items->next;
  505. tmpcb->fn(tmpcb->data);
  506. OPENSSL_free(tmpcb);
  507. }
  508. }
  509. int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
  510. {
  511. struct rcu_cb_item *new =
  512. OPENSSL_zalloc(sizeof(*new));
  513. if (new == NULL)
  514. return 0;
  515. new->data = data;
  516. new->fn = cb;
  517. /*
  518. * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
  519. * list are visible to us prior to reading, and publish the new value
  520. * immediately
  521. */
  522. new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
  523. __ATOMIC_ACQ_REL);
  524. return 1;
  525. }
  526. void *ossl_rcu_uptr_deref(void **p)
  527. {
  528. return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
  529. }
  530. void ossl_rcu_assign_uptr(void **p, void **v)
  531. {
  532. ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
  533. }
  534. static CRYPTO_ONCE rcu_init_once = CRYPTO_ONCE_STATIC_INIT;
  535. CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers)
  536. {
  537. struct rcu_lock_st *new;
  538. if (!CRYPTO_THREAD_run_once(&rcu_init_once, ossl_rcu_init))
  539. return NULL;
  540. if (num_writers < 1)
  541. num_writers = 1;
  542. new = OPENSSL_zalloc(sizeof(*new));
  543. if (new == NULL)
  544. return NULL;
  545. pthread_mutex_init(&new->write_lock, NULL);
  546. pthread_mutex_init(&new->prior_lock, NULL);
  547. pthread_mutex_init(&new->alloc_lock, NULL);
  548. pthread_cond_init(&new->prior_signal, NULL);
  549. pthread_cond_init(&new->alloc_signal, NULL);
  550. new->qp_group = allocate_new_qp_group(new, num_writers + 1);
  551. if (new->qp_group == NULL) {
  552. OPENSSL_free(new);
  553. new = NULL;
  554. }
  555. return new;
  556. }
  557. void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
  558. {
  559. struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
  560. if (lock == NULL)
  561. return;
  562. /* make sure we're synchronized */
  563. ossl_synchronize_rcu(rlock);
  564. OPENSSL_free(rlock->qp_group);
  565. /* There should only be a single qp left now */
  566. OPENSSL_free(rlock);
  567. }
  568. CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
  569. {
  570. # ifdef USE_RWLOCK
  571. CRYPTO_RWLOCK *lock;
  572. if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
  573. /* Don't set error, to avoid recursion blowup. */
  574. return NULL;
  575. if (pthread_rwlock_init(lock, NULL) != 0) {
  576. OPENSSL_free(lock);
  577. return NULL;
  578. }
  579. # else
  580. pthread_mutexattr_t attr;
  581. CRYPTO_RWLOCK *lock;
  582. if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
  583. /* Don't set error, to avoid recursion blowup. */
  584. return NULL;
  585. /*
  586. * We don't use recursive mutexes, but try to catch errors if we do.
  587. */
  588. pthread_mutexattr_init(&attr);
  589. # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
  590. # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
  591. pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
  592. # endif
  593. # else
  594. /* The SPT Thread Library does not define MUTEX attributes. */
  595. # endif
  596. if (pthread_mutex_init(lock, &attr) != 0) {
  597. pthread_mutexattr_destroy(&attr);
  598. OPENSSL_free(lock);
  599. return NULL;
  600. }
  601. pthread_mutexattr_destroy(&attr);
  602. # endif
  603. return lock;
  604. }
  605. __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
  606. {
  607. # ifdef USE_RWLOCK
  608. if (pthread_rwlock_rdlock(lock) != 0)
  609. return 0;
  610. # else
  611. if (pthread_mutex_lock(lock) != 0) {
  612. assert(errno != EDEADLK && errno != EBUSY);
  613. return 0;
  614. }
  615. # endif
  616. return 1;
  617. }
  618. __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
  619. {
  620. # ifdef USE_RWLOCK
  621. if (pthread_rwlock_wrlock(lock) != 0)
  622. return 0;
  623. # else
  624. if (pthread_mutex_lock(lock) != 0) {
  625. assert(errno != EDEADLK && errno != EBUSY);
  626. return 0;
  627. }
  628. # endif
  629. return 1;
  630. }
  631. int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
  632. {
  633. # ifdef USE_RWLOCK
  634. if (pthread_rwlock_unlock(lock) != 0)
  635. return 0;
  636. # else
  637. if (pthread_mutex_unlock(lock) != 0) {
  638. assert(errno != EPERM);
  639. return 0;
  640. }
  641. # endif
  642. return 1;
  643. }
  644. void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
  645. {
  646. if (lock == NULL)
  647. return;
  648. # ifdef USE_RWLOCK
  649. pthread_rwlock_destroy(lock);
  650. # else
  651. pthread_mutex_destroy(lock);
  652. # endif
  653. OPENSSL_free(lock);
  654. return;
  655. }
  656. int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
  657. {
  658. if (pthread_once(once, init) != 0)
  659. return 0;
  660. return 1;
  661. }
  662. int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
  663. {
  664. if (pthread_key_create(key, cleanup) != 0)
  665. return 0;
  666. return 1;
  667. }
  668. void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
  669. {
  670. return pthread_getspecific(*key);
  671. }
  672. int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
  673. {
  674. if (pthread_setspecific(*key, val) != 0)
  675. return 0;
  676. return 1;
  677. }
  678. int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
  679. {
  680. if (pthread_key_delete(*key) != 0)
  681. return 0;
  682. return 1;
  683. }
  684. CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
  685. {
  686. return pthread_self();
  687. }
  688. int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
  689. {
  690. return pthread_equal(a, b);
  691. }
  692. int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
  693. {
  694. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  695. if (__atomic_is_lock_free(sizeof(*val), val)) {
  696. *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
  697. return 1;
  698. }
  699. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  700. /* This will work for all future Solaris versions. */
  701. if (ret != NULL) {
  702. *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
  703. return 1;
  704. }
  705. # endif
  706. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  707. return 0;
  708. *val += amount;
  709. *ret = *val;
  710. if (!CRYPTO_THREAD_unlock(lock))
  711. return 0;
  712. return 1;
  713. }
  714. int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
  715. CRYPTO_RWLOCK *lock)
  716. {
  717. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  718. if (__atomic_is_lock_free(sizeof(*val), val)) {
  719. *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
  720. return 1;
  721. }
  722. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  723. /* This will work for all future Solaris versions. */
  724. if (ret != NULL) {
  725. *ret = atomic_or_64_nv(val, op);
  726. return 1;
  727. }
  728. # endif
  729. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  730. return 0;
  731. *val |= op;
  732. *ret = *val;
  733. if (!CRYPTO_THREAD_unlock(lock))
  734. return 0;
  735. return 1;
  736. }
  737. int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
  738. {
  739. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  740. if (__atomic_is_lock_free(sizeof(*val), val)) {
  741. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  742. return 1;
  743. }
  744. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  745. /* This will work for all future Solaris versions. */
  746. if (ret != NULL) {
  747. *ret = atomic_or_64_nv(val, 0);
  748. return 1;
  749. }
  750. # endif
  751. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  752. return 0;
  753. *ret = *val;
  754. if (!CRYPTO_THREAD_unlock(lock))
  755. return 0;
  756. return 1;
  757. }
  758. int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
  759. {
  760. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  761. if (__atomic_is_lock_free(sizeof(*val), val)) {
  762. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  763. return 1;
  764. }
  765. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  766. /* This will work for all future Solaris versions. */
  767. if (ret != NULL) {
  768. *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0);
  769. return 1;
  770. }
  771. # endif
  772. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  773. return 0;
  774. *ret = *val;
  775. if (!CRYPTO_THREAD_unlock(lock))
  776. return 0;
  777. return 1;
  778. }
  779. # ifndef FIPS_MODULE
  780. int openssl_init_fork_handlers(void)
  781. {
  782. return 1;
  783. }
  784. # endif /* FIPS_MODULE */
  785. int openssl_get_fork_id(void)
  786. {
  787. return getpid();
  788. }
  789. #endif