threads_pthread.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. /*
  2. * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. /* We need to use the OPENSSL_fork_*() deprecated APIs */
  10. #define OPENSSL_SUPPRESS_DEPRECATED
  11. #include <openssl/crypto.h>
  12. #include <crypto/cryptlib.h>
  13. #include "internal/cryptlib.h"
  14. #include "internal/rcu.h"
  15. #include "rcu_internal.h"
  16. #if defined(__sun)
  17. # include <atomic.h>
  18. #endif
  19. #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
  20. /*
  21. * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
  22. * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
  23. * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
  24. * All of this makes impossible to use __atomic_is_lock_free here.
  25. *
  26. * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
  27. */
  28. # define BROKEN_CLANG_ATOMICS
  29. #endif
  30. #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
  31. # if defined(OPENSSL_SYS_UNIX)
  32. # include <sys/types.h>
  33. # include <unistd.h>
  34. # endif
  35. # include <assert.h>
  36. # ifdef PTHREAD_RWLOCK_INITIALIZER
  37. # define USE_RWLOCK
  38. # endif
  39. /*
  40. * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
  41. * other compilers.
  42. * Unfortunately, we can't do that with some "generic type", because there's no
  43. * guarantee that the chosen generic type is large enough to cover all cases.
  44. * Therefore, we implement fallbacks for each applicable type, with composed
  45. * names that include the type they handle.
  46. *
  47. * (an anecdote: we previously tried to use |void *| as the generic type, with
  48. * the thought that the pointer itself is the largest type. However, this is
  49. * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
  50. *
  51. * All applicable ATOMIC_ macros take the intended type as first parameter, so
  52. * they can map to the correct fallback function. In the GNU/clang case, that
  53. * parameter is simply ignored.
  54. */
  55. /*
  56. * Internal types used with the ATOMIC_ macros, to make it possible to compose
  57. * fallback function names.
  58. */
  59. typedef void *pvoid;
  60. typedef struct rcu_cb_item *prcu_cb_item;
  61. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
  62. && !defined(USE_ATOMIC_FALLBACKS)
  63. # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__)
  64. /*
  65. * For pointers, Apple M1 virtualized cpu seems to have some problem using the
  66. * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
  67. * When using the native apple clang compiler, this instruction is emitted for
  68. * atomic loads, which is bad. So, if
  69. * 1) We are building on a target that defines __APPLE__ AND
  70. * 2) We are building on a target using clang (__clang__) AND
  71. * 3) We are building for an M1 processor (__aarch64__)
  72. * Then we shold not use __atomic_load_n and instead implement our own
  73. * function to issue the ldar instruction instead, which procuces the proper
  74. * sequencing guarantees
  75. */
  76. static inline void *apple_atomic_load_n_pvoid(void **p,
  77. ossl_unused int memorder)
  78. {
  79. void *ret;
  80. __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
  81. return ret;
  82. }
  83. /* For uint64_t, we should be fine, though */
  84. # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
  85. # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
  86. # else
  87. # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
  88. # endif
  89. # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
  90. # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
  91. # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
  92. # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
  93. # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
  94. # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
  95. # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
  96. # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
  97. # else
  98. static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
  99. # define IMPL_fallback_atomic_load_n(t) \
  100. static inline t fallback_atomic_load_n_##t(t *p) \
  101. { \
  102. t ret; \
  103. \
  104. pthread_mutex_lock(&atomic_sim_lock); \
  105. ret = *p; \
  106. pthread_mutex_unlock(&atomic_sim_lock); \
  107. return ret; \
  108. }
  109. IMPL_fallback_atomic_load_n(uint64_t)
  110. IMPL_fallback_atomic_load_n(pvoid)
  111. # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
  112. # define IMPL_fallback_atomic_store_n(t) \
  113. static inline t fallback_atomic_store_n_##t(t *p, t v) \
  114. { \
  115. t ret; \
  116. \
  117. pthread_mutex_lock(&atomic_sim_lock); \
  118. ret = *p; \
  119. *p = v; \
  120. pthread_mutex_unlock(&atomic_sim_lock); \
  121. return ret; \
  122. }
  123. IMPL_fallback_atomic_store_n(uint64_t)
  124. # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
  125. # define IMPL_fallback_atomic_store(t) \
  126. static inline void fallback_atomic_store_##t(t *p, t *v) \
  127. { \
  128. pthread_mutex_lock(&atomic_sim_lock); \
  129. *p = *v; \
  130. pthread_mutex_unlock(&atomic_sim_lock); \
  131. }
  132. IMPL_fallback_atomic_store(uint64_t)
  133. IMPL_fallback_atomic_store(pvoid)
  134. # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
  135. # define IMPL_fallback_atomic_exchange_n(t) \
  136. static inline t fallback_atomic_exchange_n_##t(t *p, t v) \
  137. { \
  138. t ret; \
  139. \
  140. pthread_mutex_lock(&atomic_sim_lock); \
  141. ret = *p; \
  142. *p = v; \
  143. pthread_mutex_unlock(&atomic_sim_lock); \
  144. return ret; \
  145. }
  146. IMPL_fallback_atomic_exchange_n(uint64_t)
  147. IMPL_fallback_atomic_exchange_n(prcu_cb_item)
  148. # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
  149. /*
  150. * The fallbacks that follow don't need any per type implementation, as
  151. * they are designed for uint64_t only. If there comes a time when multiple
  152. * types need to be covered, it's relatively easy to refactor them the same
  153. * way as the fallbacks above.
  154. */
  155. static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
  156. {
  157. uint64_t ret;
  158. pthread_mutex_lock(&atomic_sim_lock);
  159. *p += v;
  160. ret = *p;
  161. pthread_mutex_unlock(&atomic_sim_lock);
  162. return ret;
  163. }
  164. # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
  165. static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
  166. {
  167. uint64_t ret;
  168. pthread_mutex_lock(&atomic_sim_lock);
  169. ret = *p;
  170. *p += v;
  171. pthread_mutex_unlock(&atomic_sim_lock);
  172. return ret;
  173. }
  174. # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
  175. static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
  176. {
  177. uint64_t ret;
  178. pthread_mutex_lock(&atomic_sim_lock);
  179. *p -= v;
  180. ret = *p;
  181. pthread_mutex_unlock(&atomic_sim_lock);
  182. return ret;
  183. }
  184. # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
  185. static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
  186. {
  187. uint64_t ret;
  188. pthread_mutex_lock(&atomic_sim_lock);
  189. *p &= m;
  190. ret = *p;
  191. pthread_mutex_unlock(&atomic_sim_lock);
  192. return ret;
  193. }
  194. # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
  195. static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
  196. {
  197. uint64_t ret;
  198. pthread_mutex_lock(&atomic_sim_lock);
  199. *p |= m;
  200. ret = *p;
  201. pthread_mutex_unlock(&atomic_sim_lock);
  202. return ret;
  203. }
  204. # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
  205. # endif
  206. /*
  207. * users is broken up into 2 parts
  208. * bits 0-15 current readers
  209. * bit 32-63 - ID
  210. */
  211. # define READER_SHIFT 0
  212. # define ID_SHIFT 32
  213. # define READER_SIZE 16
  214. # define ID_SIZE 32
  215. # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
  216. # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
  217. # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK)
  218. # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK)
  219. # define VAL_READER ((uint64_t)1 << READER_SHIFT)
  220. # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
  221. /*
  222. * This is the core of an rcu lock. It tracks the readers and writers for the
  223. * current quiescence point for a given lock. Users is the 64 bit value that
  224. * stores the READERS/ID as defined above
  225. *
  226. */
  227. struct rcu_qp {
  228. uint64_t users;
  229. };
  230. struct thread_qp {
  231. struct rcu_qp *qp;
  232. unsigned int depth;
  233. CRYPTO_RCU_LOCK *lock;
  234. };
  235. # define MAX_QPS 10
  236. /*
  237. * This is the per thread tracking data
  238. * that is assigned to each thread participating
  239. * in an rcu qp
  240. *
  241. * qp points to the qp that it last acquired
  242. *
  243. */
  244. struct rcu_thr_data {
  245. struct thread_qp thread_qps[MAX_QPS];
  246. };
  247. /*
  248. * This is the internal version of a CRYPTO_RCU_LOCK
  249. * it is cast from CRYPTO_RCU_LOCK
  250. */
  251. struct rcu_lock_st {
  252. /* Callbacks to call for next ossl_synchronize_rcu */
  253. struct rcu_cb_item *cb_items;
  254. /* The context we are being created against */
  255. OSSL_LIB_CTX *ctx;
  256. /* rcu generation counter for in-order retirement */
  257. uint32_t id_ctr;
  258. /* Array of quiescent points for synchronization */
  259. struct rcu_qp *qp_group;
  260. /* Number of elements in qp_group array */
  261. size_t group_count;
  262. /* Index of the current qp in the qp_group array */
  263. uint64_t reader_idx;
  264. /* value of the next id_ctr value to be retired */
  265. uint32_t next_to_retire;
  266. /* index of the next free rcu_qp in the qp_group */
  267. uint64_t current_alloc_idx;
  268. /* number of qp's in qp_group array currently being retired */
  269. uint32_t writers_alloced;
  270. /* lock protecting write side operations */
  271. pthread_mutex_t write_lock;
  272. /* lock protecting updates to writers_alloced/current_alloc_idx */
  273. pthread_mutex_t alloc_lock;
  274. /* signal to wake threads waiting on alloc_lock */
  275. pthread_cond_t alloc_signal;
  276. /* lock to enforce in-order retirement */
  277. pthread_mutex_t prior_lock;
  278. /* signal to wake threads waiting on prior_lock */
  279. pthread_cond_t prior_signal;
  280. };
  281. /* Read side acquisition of the current qp */
  282. static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
  283. {
  284. uint64_t qp_idx;
  285. /* get the current qp index */
  286. for (;;) {
  287. /*
  288. * Notes on use of __ATOMIC_ACQUIRE
  289. * We need to ensure the following:
  290. * 1) That subsequent operations aren't optimized by hoisting them above
  291. * this operation. Specifically, we don't want the below re-load of
  292. * qp_idx to get optimized away
  293. * 2) We want to ensure that any updating of reader_idx on the write side
  294. * of the lock is flushed from a local cpu cache so that we see any
  295. * updates prior to the load. This is a non-issue on cache coherent
  296. * systems like x86, but is relevant on other arches
  297. * Note: This applies to the reload below as well
  298. */
  299. qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
  300. /*
  301. * Notes of use of __ATOMIC_RELEASE
  302. * This counter is only read by the write side of the lock, and so we
  303. * specify __ATOMIC_RELEASE here to ensure that the write side of the
  304. * lock see this during the spin loop read of users, as it waits for the
  305. * reader count to approach zero
  306. */
  307. ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  308. __ATOMIC_RELEASE);
  309. /* if the idx hasn't changed, we're good, else try again */
  310. if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE))
  311. break;
  312. /*
  313. * Notes on use of __ATOMIC_RELEASE
  314. * As with the add above, we want to ensure that this decrement is
  315. * seen by the write side of the lock as soon as it happens to prevent
  316. * undue spinning waiting for write side completion
  317. */
  318. ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  319. __ATOMIC_RELEASE);
  320. }
  321. return &lock->qp_group[qp_idx];
  322. }
  323. static void ossl_rcu_free_local_data(void *arg)
  324. {
  325. OSSL_LIB_CTX *ctx = arg;
  326. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
  327. struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
  328. OPENSSL_free(data);
  329. }
  330. void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
  331. {
  332. struct rcu_thr_data *data;
  333. int i, available_qp = -1;
  334. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
  335. /*
  336. * we're going to access current_qp here so ask the
  337. * processor to fetch it
  338. */
  339. data = CRYPTO_THREAD_get_local(lkey);
  340. if (data == NULL) {
  341. data = OPENSSL_zalloc(sizeof(*data));
  342. OPENSSL_assert(data != NULL);
  343. CRYPTO_THREAD_set_local(lkey, data);
  344. ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
  345. }
  346. for (i = 0; i < MAX_QPS; i++) {
  347. if (data->thread_qps[i].qp == NULL && available_qp == -1)
  348. available_qp = i;
  349. /* If we have a hold on this lock already, we're good */
  350. if (data->thread_qps[i].lock == lock) {
  351. data->thread_qps[i].depth++;
  352. return;
  353. }
  354. }
  355. /*
  356. * if we get here, then we don't have a hold on this lock yet
  357. */
  358. assert(available_qp != -1);
  359. data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
  360. data->thread_qps[available_qp].depth = 1;
  361. data->thread_qps[available_qp].lock = lock;
  362. }
  363. void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
  364. {
  365. int i;
  366. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
  367. struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
  368. uint64_t ret;
  369. assert(data != NULL);
  370. for (i = 0; i < MAX_QPS; i++) {
  371. if (data->thread_qps[i].lock == lock) {
  372. /*
  373. * As with read side acquisition, we use __ATOMIC_RELEASE here
  374. * to ensure that the decrement is published immediately
  375. * to any write side waiters
  376. */
  377. data->thread_qps[i].depth--;
  378. if (data->thread_qps[i].depth == 0) {
  379. ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER,
  380. __ATOMIC_RELEASE);
  381. OPENSSL_assert(ret != UINT64_MAX);
  382. data->thread_qps[i].qp = NULL;
  383. data->thread_qps[i].lock = NULL;
  384. }
  385. return;
  386. }
  387. }
  388. /*
  389. * If we get here, we're trying to unlock a lock that we never acquired -
  390. * that's fatal.
  391. */
  392. assert(0);
  393. }
  394. /*
  395. * Write side allocation routine to get the current qp
  396. * and replace it with a new one
  397. */
  398. static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
  399. {
  400. uint64_t new_id;
  401. uint64_t current_idx;
  402. pthread_mutex_lock(&lock->alloc_lock);
  403. /*
  404. * we need at least one qp to be available with one
  405. * left over, so that readers can start working on
  406. * one that isn't yet being waited on
  407. */
  408. while (lock->group_count - lock->writers_alloced < 2)
  409. /* we have to wait for one to be free */
  410. pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
  411. current_idx = lock->current_alloc_idx;
  412. /* Allocate the qp */
  413. lock->writers_alloced++;
  414. /* increment the allocation index */
  415. lock->current_alloc_idx =
  416. (lock->current_alloc_idx + 1) % lock->group_count;
  417. /* get and insert a new id */
  418. new_id = lock->id_ctr;
  419. lock->id_ctr++;
  420. new_id = VAL_ID(new_id);
  421. /*
  422. * Even though we are under a write side lock here
  423. * We need to use atomic instructions to ensure that the results
  424. * of this update are published to the read side prior to updating the
  425. * reader idx below
  426. */
  427. ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
  428. __ATOMIC_RELEASE);
  429. ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
  430. __ATOMIC_RELEASE);
  431. /*
  432. * Update the reader index to be the prior qp.
  433. * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
  434. * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
  435. * of this value to be seen on the read side immediately after it happens
  436. */
  437. ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx,
  438. __ATOMIC_RELEASE);
  439. /* wake up any waiters */
  440. pthread_cond_signal(&lock->alloc_signal);
  441. pthread_mutex_unlock(&lock->alloc_lock);
  442. return &lock->qp_group[current_idx];
  443. }
  444. static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
  445. {
  446. pthread_mutex_lock(&lock->alloc_lock);
  447. lock->writers_alloced--;
  448. pthread_cond_signal(&lock->alloc_signal);
  449. pthread_mutex_unlock(&lock->alloc_lock);
  450. }
  451. static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
  452. int count)
  453. {
  454. struct rcu_qp *new =
  455. OPENSSL_zalloc(sizeof(*new) * count);
  456. lock->group_count = count;
  457. return new;
  458. }
  459. void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
  460. {
  461. pthread_mutex_lock(&lock->write_lock);
  462. }
  463. void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
  464. {
  465. pthread_mutex_unlock(&lock->write_lock);
  466. }
  467. void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
  468. {
  469. struct rcu_qp *qp;
  470. uint64_t count;
  471. struct rcu_cb_item *cb_items, *tmpcb;
  472. /*
  473. * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published
  474. * writes before we read, and publish our write immediately
  475. */
  476. cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL,
  477. __ATOMIC_ACQ_REL);
  478. qp = update_qp(lock);
  479. /*
  480. * wait for the reader count to reach zero
  481. * Note the use of __ATOMIC_ACQUIRE here to ensure that any
  482. * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
  483. * is visible prior to our read
  484. */
  485. do {
  486. count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
  487. } while (READER_COUNT(count) != 0);
  488. /* retire in order */
  489. pthread_mutex_lock(&lock->prior_lock);
  490. while (lock->next_to_retire != ID_VAL(count))
  491. pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
  492. lock->next_to_retire++;
  493. pthread_cond_broadcast(&lock->prior_signal);
  494. pthread_mutex_unlock(&lock->prior_lock);
  495. retire_qp(lock, qp);
  496. /* handle any callbacks that we have */
  497. while (cb_items != NULL) {
  498. tmpcb = cb_items;
  499. cb_items = cb_items->next;
  500. tmpcb->fn(tmpcb->data);
  501. OPENSSL_free(tmpcb);
  502. }
  503. }
  504. int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
  505. {
  506. struct rcu_cb_item *new =
  507. OPENSSL_zalloc(sizeof(*new));
  508. if (new == NULL)
  509. return 0;
  510. new->data = data;
  511. new->fn = cb;
  512. /*
  513. * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
  514. * list are visible to us prior to reading, and publish the new value
  515. * immediately
  516. */
  517. new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
  518. __ATOMIC_ACQ_REL);
  519. return 1;
  520. }
  521. void *ossl_rcu_uptr_deref(void **p)
  522. {
  523. return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
  524. }
  525. void ossl_rcu_assign_uptr(void **p, void **v)
  526. {
  527. ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
  528. }
  529. CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
  530. {
  531. struct rcu_lock_st *new;
  532. if (num_writers < 1)
  533. num_writers = 1;
  534. ctx = ossl_lib_ctx_get_concrete(ctx);
  535. if (ctx == NULL)
  536. return 0;
  537. new = OPENSSL_zalloc(sizeof(*new));
  538. if (new == NULL)
  539. return NULL;
  540. new->ctx = ctx;
  541. pthread_mutex_init(&new->write_lock, NULL);
  542. pthread_mutex_init(&new->prior_lock, NULL);
  543. pthread_mutex_init(&new->alloc_lock, NULL);
  544. pthread_cond_init(&new->prior_signal, NULL);
  545. pthread_cond_init(&new->alloc_signal, NULL);
  546. new->qp_group = allocate_new_qp_group(new, num_writers + 1);
  547. if (new->qp_group == NULL) {
  548. OPENSSL_free(new);
  549. new = NULL;
  550. }
  551. return new;
  552. }
  553. void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
  554. {
  555. struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
  556. if (lock == NULL)
  557. return;
  558. /* make sure we're synchronized */
  559. ossl_synchronize_rcu(rlock);
  560. OPENSSL_free(rlock->qp_group);
  561. /* There should only be a single qp left now */
  562. OPENSSL_free(rlock);
  563. }
  564. CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
  565. {
  566. # ifdef USE_RWLOCK
  567. CRYPTO_RWLOCK *lock;
  568. if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
  569. /* Don't set error, to avoid recursion blowup. */
  570. return NULL;
  571. if (pthread_rwlock_init(lock, NULL) != 0) {
  572. OPENSSL_free(lock);
  573. return NULL;
  574. }
  575. # else
  576. pthread_mutexattr_t attr;
  577. CRYPTO_RWLOCK *lock;
  578. if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
  579. /* Don't set error, to avoid recursion blowup. */
  580. return NULL;
  581. /*
  582. * We don't use recursive mutexes, but try to catch errors if we do.
  583. */
  584. pthread_mutexattr_init(&attr);
  585. # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
  586. # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
  587. pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
  588. # endif
  589. # else
  590. /* The SPT Thread Library does not define MUTEX attributes. */
  591. # endif
  592. if (pthread_mutex_init(lock, &attr) != 0) {
  593. pthread_mutexattr_destroy(&attr);
  594. OPENSSL_free(lock);
  595. return NULL;
  596. }
  597. pthread_mutexattr_destroy(&attr);
  598. # endif
  599. return lock;
  600. }
  601. __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
  602. {
  603. # ifdef USE_RWLOCK
  604. if (pthread_rwlock_rdlock(lock) != 0)
  605. return 0;
  606. # else
  607. if (pthread_mutex_lock(lock) != 0) {
  608. assert(errno != EDEADLK && errno != EBUSY);
  609. return 0;
  610. }
  611. # endif
  612. return 1;
  613. }
  614. __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
  615. {
  616. # ifdef USE_RWLOCK
  617. if (pthread_rwlock_wrlock(lock) != 0)
  618. return 0;
  619. # else
  620. if (pthread_mutex_lock(lock) != 0) {
  621. assert(errno != EDEADLK && errno != EBUSY);
  622. return 0;
  623. }
  624. # endif
  625. return 1;
  626. }
  627. int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
  628. {
  629. # ifdef USE_RWLOCK
  630. if (pthread_rwlock_unlock(lock) != 0)
  631. return 0;
  632. # else
  633. if (pthread_mutex_unlock(lock) != 0) {
  634. assert(errno != EPERM);
  635. return 0;
  636. }
  637. # endif
  638. return 1;
  639. }
  640. void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
  641. {
  642. if (lock == NULL)
  643. return;
  644. # ifdef USE_RWLOCK
  645. pthread_rwlock_destroy(lock);
  646. # else
  647. pthread_mutex_destroy(lock);
  648. # endif
  649. OPENSSL_free(lock);
  650. return;
  651. }
  652. int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
  653. {
  654. if (pthread_once(once, init) != 0)
  655. return 0;
  656. return 1;
  657. }
  658. int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
  659. {
  660. if (pthread_key_create(key, cleanup) != 0)
  661. return 0;
  662. return 1;
  663. }
  664. void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
  665. {
  666. return pthread_getspecific(*key);
  667. }
  668. int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
  669. {
  670. if (pthread_setspecific(*key, val) != 0)
  671. return 0;
  672. return 1;
  673. }
  674. int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
  675. {
  676. if (pthread_key_delete(*key) != 0)
  677. return 0;
  678. return 1;
  679. }
  680. CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
  681. {
  682. return pthread_self();
  683. }
  684. int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
  685. {
  686. return pthread_equal(a, b);
  687. }
  688. int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
  689. {
  690. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  691. if (__atomic_is_lock_free(sizeof(*val), val)) {
  692. *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
  693. return 1;
  694. }
  695. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  696. /* This will work for all future Solaris versions. */
  697. if (ret != NULL) {
  698. *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
  699. return 1;
  700. }
  701. # endif
  702. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  703. return 0;
  704. *val += amount;
  705. *ret = *val;
  706. if (!CRYPTO_THREAD_unlock(lock))
  707. return 0;
  708. return 1;
  709. }
  710. int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
  711. CRYPTO_RWLOCK *lock)
  712. {
  713. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  714. if (__atomic_is_lock_free(sizeof(*val), val)) {
  715. *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
  716. return 1;
  717. }
  718. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  719. /* This will work for all future Solaris versions. */
  720. if (ret != NULL) {
  721. *ret = atomic_or_64_nv(val, op);
  722. return 1;
  723. }
  724. # endif
  725. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  726. return 0;
  727. *val |= op;
  728. *ret = *val;
  729. if (!CRYPTO_THREAD_unlock(lock))
  730. return 0;
  731. return 1;
  732. }
  733. int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
  734. {
  735. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  736. if (__atomic_is_lock_free(sizeof(*val), val)) {
  737. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  738. return 1;
  739. }
  740. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  741. /* This will work for all future Solaris versions. */
  742. if (ret != NULL) {
  743. *ret = atomic_or_64_nv(val, 0);
  744. return 1;
  745. }
  746. # endif
  747. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  748. return 0;
  749. *ret = *val;
  750. if (!CRYPTO_THREAD_unlock(lock))
  751. return 0;
  752. return 1;
  753. }
  754. int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
  755. {
  756. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  757. if (__atomic_is_lock_free(sizeof(*val), val)) {
  758. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  759. return 1;
  760. }
  761. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  762. /* This will work for all future Solaris versions. */
  763. if (ret != NULL) {
  764. *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0);
  765. return 1;
  766. }
  767. # endif
  768. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  769. return 0;
  770. *ret = *val;
  771. if (!CRYPTO_THREAD_unlock(lock))
  772. return 0;
  773. return 1;
  774. }
  775. # ifndef FIPS_MODULE
  776. int openssl_init_fork_handlers(void)
  777. {
  778. return 1;
  779. }
  780. # endif /* FIPS_MODULE */
  781. int openssl_get_fork_id(void)
  782. {
  783. return getpid();
  784. }
  785. #endif