bg_bf.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2017 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file block/bg_bf.c
  18. * @brief implementation of a block group using a Bloom filter
  19. * to drop duplicate blocks
  20. * @author Christian Grothoff
  21. */
  22. #include "platform.h"
  23. #include "gnunet_util_lib.h"
  24. #include "gnunet_block_group_lib.h"
  25. #include "gnunet_block_plugin.h"
  26. /**
  27. * Internal data structure for a block group.
  28. */
  29. struct BfGroupInternals {
  30. /**
  31. * A Bloom filter to weed out duplicate replies probabilistically.
  32. */
  33. struct GNUNET_CONTAINER_BloomFilter *bf;
  34. /**
  35. * Set from the nonce to mingle the hashes before going into the @e bf.
  36. */
  37. uint32_t bf_mutator;
  38. /**
  39. * Size of @a bf.
  40. */
  41. uint32_t bf_size;
  42. };
  43. /**
  44. * Serialize state of a block group.
  45. *
  46. * @param bg group to serialize
  47. * @param[out] nonce set to the nonce of the @a bg
  48. * @param[out] raw_data set to the serialized state
  49. * @param[out] raw_data_size set to the number of bytes in @a raw_data
  50. * @return #GNUNET_OK on success, #GNUNET_NO if serialization is not
  51. * supported, #GNUNET_SYSERR on error
  52. */
  53. static int
  54. bf_group_serialize_cb(struct GNUNET_BLOCK_Group *bg,
  55. uint32_t *nonce,
  56. void **raw_data,
  57. size_t *raw_data_size)
  58. {
  59. struct BfGroupInternals *gi = bg->internal_cls;
  60. char *raw;
  61. raw = GNUNET_malloc(gi->bf_size);
  62. if (GNUNET_OK !=
  63. GNUNET_CONTAINER_bloomfilter_get_raw_data(gi->bf,
  64. raw,
  65. gi->bf_size))
  66. {
  67. GNUNET_free(raw);
  68. GNUNET_break(0);
  69. return GNUNET_SYSERR;
  70. }
  71. *nonce = gi->bf_mutator;
  72. *raw_data = raw;
  73. *raw_data_size = gi->bf_size;
  74. return GNUNET_OK;
  75. }
  76. /**
  77. * Mark elements as "seen" using a hash of the element. Not supported
  78. * by all block plugins.
  79. *
  80. * @param bg group to update
  81. * @param seen_results results already seen
  82. * @param seen_results_count number of entries in @a seen_results
  83. */
  84. static void
  85. bf_group_mark_seen_cb(struct GNUNET_BLOCK_Group *bg,
  86. const struct GNUNET_HashCode *seen_results,
  87. unsigned int seen_results_count)
  88. {
  89. struct BfGroupInternals *gi = bg->internal_cls;
  90. for (unsigned int i = 0; i < seen_results_count; i++)
  91. {
  92. struct GNUNET_HashCode mhash;
  93. GNUNET_BLOCK_mingle_hash(&seen_results[i],
  94. gi->bf_mutator,
  95. &mhash);
  96. GNUNET_CONTAINER_bloomfilter_add(gi->bf,
  97. &mhash);
  98. }
  99. }
  100. /**
  101. * Merge two groups, if possible. Not supported by all block plugins,
  102. * can also fail if the nonces were different.
  103. *
  104. * @param bg1 group to update
  105. * @param bg2 group to merge into @a bg1
  106. * @return #GNUNET_OK on success, #GNUNET_NO if the nonces were different and thus
  107. * we failed.
  108. */
  109. static int
  110. bf_group_merge_cb(struct GNUNET_BLOCK_Group *bg1,
  111. const struct GNUNET_BLOCK_Group *bg2)
  112. {
  113. struct BfGroupInternals *gi1 = bg1->internal_cls;
  114. struct BfGroupInternals *gi2 = bg2->internal_cls;
  115. if (gi1->bf_mutator != gi2->bf_mutator)
  116. return GNUNET_NO;
  117. if (gi1->bf_size != gi2->bf_size)
  118. return GNUNET_NO;
  119. GNUNET_CONTAINER_bloomfilter_or2(gi1->bf,
  120. gi2->bf);
  121. return GNUNET_OK;
  122. }
  123. /**
  124. * Destroy resources used by a block group.
  125. *
  126. * @param bg group to destroy, NULL is allowed
  127. */
  128. static void
  129. bf_group_destroy_cb(struct GNUNET_BLOCK_Group *bg)
  130. {
  131. struct BfGroupInternals *gi = bg->internal_cls;
  132. GNUNET_CONTAINER_bloomfilter_free(gi->bf);
  133. GNUNET_free(gi);
  134. GNUNET_free(bg);
  135. }
  136. /**
  137. * Create a new block group that filters duplicates using a Bloom filter.
  138. *
  139. * @param ctx block context in which the block group is created
  140. * @param bf_size size of the Bloom filter
  141. * @param bf_k K-value for the Bloom filter
  142. * @param type block type
  143. * @param nonce random value used to seed the group creation
  144. * @param raw_data optional serialized prior state of the group, NULL if unavailable/fresh
  145. * @param raw_data_size number of bytes in @a raw_data, 0 if unavailable/fresh
  146. * @return block group handle, NULL if block groups are not supported
  147. * by this @a type of block (this is not an error)
  148. */
  149. struct GNUNET_BLOCK_Group *
  150. GNUNET_BLOCK_GROUP_bf_create(void *cls,
  151. size_t bf_size,
  152. unsigned int bf_k,
  153. enum GNUNET_BLOCK_Type type,
  154. uint32_t nonce,
  155. const void *raw_data,
  156. size_t raw_data_size)
  157. {
  158. struct BfGroupInternals *gi;
  159. struct GNUNET_BLOCK_Group *bg;
  160. gi = GNUNET_new(struct BfGroupInternals);
  161. gi->bf = GNUNET_CONTAINER_bloomfilter_init((bf_size != raw_data_size) ? NULL : raw_data,
  162. bf_size,
  163. bf_k);
  164. gi->bf_mutator = nonce;
  165. gi->bf_size = bf_size;
  166. bg = GNUNET_new(struct GNUNET_BLOCK_Group);
  167. bg->type = type;
  168. bg->serialize_cb = &bf_group_serialize_cb;
  169. bg->mark_seen_cb = &bf_group_mark_seen_cb;
  170. bg->merge_cb = &bf_group_merge_cb;
  171. bg->destroy_cb = &bf_group_destroy_cb;
  172. bg->internal_cls = gi;
  173. return bg;
  174. }
  175. /**
  176. * Test if @a hc is contained in the Bloom filter of @a bg. If so,
  177. * return #GNUNET_YES. If not, add @a hc to the Bloom filter and
  178. * return #GNUNET_NO.
  179. *
  180. * @param bg block group to use for testing
  181. * @param hc hash of element to evaluate
  182. * @return #GNUNET_YES if @a hc is (likely) a duplicate
  183. * #GNUNET_NO if @a hc was definitively not in @bg (but now is)
  184. */
  185. int
  186. GNUNET_BLOCK_GROUP_bf_test_and_set(struct GNUNET_BLOCK_Group *bg,
  187. const struct GNUNET_HashCode *hc)
  188. {
  189. struct BfGroupInternals *gi;
  190. struct GNUNET_HashCode mhash;
  191. if (NULL == bg)
  192. return GNUNET_NO;
  193. gi = bg->internal_cls;
  194. GNUNET_BLOCK_mingle_hash(hc,
  195. gi->bf_mutator,
  196. &mhash);
  197. if (GNUNET_YES ==
  198. GNUNET_CONTAINER_bloomfilter_test(gi->bf,
  199. &mhash))
  200. return GNUNET_YES;
  201. GNUNET_CONTAINER_bloomfilter_add(gi->bf,
  202. &mhash);
  203. return GNUNET_NO;
  204. }
  205. /**
  206. * How many bytes should a bloomfilter be if we have already seen
  207. * entry_count responses? Sized so that do not have to
  208. * re-size the filter too often (to keep it cheap).
  209. *
  210. * Since other peers will also add entries but not resize the filter,
  211. * we should generally pick a slightly larger size than what the
  212. * strict math would suggest.
  213. *
  214. * @param entry_count expected number of entries in the Bloom filter
  215. * @param k number of bits set per entry
  216. * @return must be a power of two and smaller or equal to 2^15.
  217. */
  218. size_t
  219. GNUNET_BLOCK_GROUP_compute_bloomfilter_size(unsigned int entry_count,
  220. unsigned int k)
  221. {
  222. size_t size;
  223. unsigned int ideal = (entry_count * k) / 4;
  224. uint16_t max = 1 << 15;
  225. if (entry_count > max)
  226. return max;
  227. size = 8;
  228. while ((size < max) && (size < ideal))
  229. size *= 2;
  230. if (size > max)
  231. return max;
  232. return size;
  233. }
  234. /* end of bg_bf.c */