ibf.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2012 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file set/ibf.c
  18. * @brief implementation of the invertible Bloom filter
  19. * @author Florian Dold
  20. */
  21. #include "ibf.h"
  22. /**
  23. * Compute the key's hash from the key.
  24. * Redefine to use a different hash function.
  25. */
  26. #define IBF_KEY_HASH_VAL(k) (GNUNET_CRYPTO_crc32_n (&(k), sizeof(struct \
  27. IBF_KeyHash)))
  28. /**
  29. * Create a key from a hashcode.
  30. *
  31. * @param hash the hashcode
  32. * @return a key
  33. */
  34. struct IBF_Key
  35. ibf_key_from_hashcode (const struct GNUNET_HashCode *hash)
  36. {
  37. return *(struct IBF_Key *) hash;
  38. }
  39. /**
  40. * Create a hashcode from a key, by replicating the key
  41. * until the hascode is filled
  42. *
  43. * @param key the key
  44. * @param dst hashcode to store the result in
  45. */
  46. void
  47. ibf_hashcode_from_key (struct IBF_Key key,
  48. struct GNUNET_HashCode *dst)
  49. {
  50. struct IBF_Key *p;
  51. const unsigned int keys_per_hashcode = sizeof(struct GNUNET_HashCode)
  52. / sizeof(struct IBF_Key);
  53. p = (struct IBF_Key *) dst;
  54. for (unsigned int i = 0; i < keys_per_hashcode; i++)
  55. *p++ = key;
  56. }
  57. /**
  58. * Create an invertible bloom filter.
  59. *
  60. * @param size number of IBF buckets
  61. * @param hash_num number of buckets one element is hashed in
  62. * @return the newly created invertible bloom filter, NULL on error
  63. */
  64. struct InvertibleBloomFilter *
  65. ibf_create (uint32_t size,
  66. uint8_t hash_num)
  67. {
  68. struct InvertibleBloomFilter *ibf;
  69. GNUNET_assert (0 != size);
  70. ibf = GNUNET_new (struct InvertibleBloomFilter);
  71. ibf->count = GNUNET_malloc_large (size * sizeof(uint8_t));
  72. if (NULL == ibf->count)
  73. {
  74. GNUNET_free (ibf);
  75. return NULL;
  76. }
  77. ibf->key_sum = GNUNET_malloc_large (size * sizeof(struct IBF_Key));
  78. if (NULL == ibf->key_sum)
  79. {
  80. GNUNET_free (ibf->count);
  81. GNUNET_free (ibf);
  82. return NULL;
  83. }
  84. ibf->key_hash_sum = GNUNET_malloc_large (size * sizeof(struct IBF_KeyHash));
  85. if (NULL == ibf->key_hash_sum)
  86. {
  87. GNUNET_free (ibf->key_sum);
  88. GNUNET_free (ibf->count);
  89. GNUNET_free (ibf);
  90. return NULL;
  91. }
  92. ibf->size = size;
  93. ibf->hash_num = hash_num;
  94. return ibf;
  95. }
  96. /**
  97. * Store unique bucket indices for the specified @a key in @a dst.
  98. */
  99. static void
  100. ibf_get_indices (const struct InvertibleBloomFilter *ibf,
  101. struct IBF_Key key,
  102. int *dst)
  103. {
  104. uint32_t filled;
  105. uint32_t i;
  106. uint32_t bucket;
  107. bucket = GNUNET_CRYPTO_crc32_n (&key,
  108. sizeof (key));
  109. for (i = 0, filled = 0; filled < ibf->hash_num; i++)
  110. {
  111. uint64_t x;
  112. for (unsigned int j = 0; j < filled; j++)
  113. if (dst[j] == bucket % ibf->size)
  114. goto try_next;
  115. dst[filled++] = bucket % ibf->size;
  116. try_next:
  117. x = ((uint64_t) bucket << 32) | i;
  118. bucket = GNUNET_CRYPTO_crc32_n (&x,
  119. sizeof (x));
  120. }
  121. }
  122. static void
  123. ibf_insert_into (struct InvertibleBloomFilter *ibf,
  124. struct IBF_Key key,
  125. const int *buckets,
  126. int side)
  127. {
  128. for (unsigned int i = 0; i < ibf->hash_num; i++)
  129. {
  130. const int bucket = buckets[i];
  131. ibf->count[bucket].count_val += side;
  132. ibf->key_sum[bucket].key_val ^= key.key_val;
  133. ibf->key_hash_sum[bucket].key_hash_val
  134. ^= IBF_KEY_HASH_VAL (key);
  135. }
  136. }
  137. /**
  138. * Insert a key into an IBF.
  139. *
  140. * @param ibf the IBF
  141. * @param key the element's hash code
  142. */
  143. void
  144. ibf_insert (struct InvertibleBloomFilter *ibf,
  145. struct IBF_Key key)
  146. {
  147. int buckets[ibf->hash_num];
  148. GNUNET_assert (ibf->hash_num <= ibf->size);
  149. ibf_get_indices (ibf,
  150. key,
  151. buckets);
  152. ibf_insert_into (ibf,
  153. key,
  154. buckets,
  155. 1);
  156. }
  157. /**
  158. * Remove a key from an IBF.
  159. *
  160. * @param ibf the IBF
  161. * @param key the element's hash code
  162. */
  163. void
  164. ibf_remove (struct InvertibleBloomFilter *ibf,
  165. struct IBF_Key key)
  166. {
  167. int buckets[ibf->hash_num];
  168. GNUNET_assert (ibf->hash_num <= ibf->size);
  169. ibf_get_indices (ibf,
  170. key,
  171. buckets);
  172. ibf_insert_into (ibf,
  173. key,
  174. buckets,
  175. -1);
  176. }
  177. /**
  178. * Test is the IBF is empty, i.e. all counts, keys and key hashes are zero.
  179. */
  180. static int
  181. ibf_is_empty (struct InvertibleBloomFilter *ibf)
  182. {
  183. for (uint32_t i = 0; i < ibf->size; i++)
  184. {
  185. if (0 != ibf->count[i].count_val)
  186. return GNUNET_NO;
  187. if (0 != ibf->key_hash_sum[i].key_hash_val)
  188. return GNUNET_NO;
  189. if (0 != ibf->key_sum[i].key_val)
  190. return GNUNET_NO;
  191. }
  192. return GNUNET_YES;
  193. }
  194. /**
  195. * Decode and remove an element from the IBF, if possible.
  196. *
  197. * @param ibf the invertible bloom filter to decode
  198. * @param ret_side sign of the cell's count where the decoded element came from.
  199. * A negative sign indicates that the element was recovered
  200. * resides in an IBF that was previously subtracted from.
  201. * @param ret_id receives the hash code of the decoded element, if successful
  202. * @return #GNUNET_YES if decoding an element was successful,
  203. * #GNUNET_NO if the IBF is empty,
  204. * #GNUNET_SYSERR if the decoding has failed
  205. */
  206. int
  207. ibf_decode (struct InvertibleBloomFilter *ibf,
  208. int *ret_side,
  209. struct IBF_Key *ret_id)
  210. {
  211. struct IBF_KeyHash hash;
  212. int buckets[ibf->hash_num];
  213. for (uint32_t i = 0; i < ibf->size; i++)
  214. {
  215. /* we can only decode from pure buckets */
  216. if ( (1 != ibf->count[i].count_val) &&
  217. (-1 != ibf->count[i].count_val) )
  218. continue;
  219. hash.key_hash_val = IBF_KEY_HASH_VAL (ibf->key_sum[i]);
  220. /* test if the hash matches the key */
  221. if (hash.key_hash_val != ibf->key_hash_sum[i].key_hash_val)
  222. continue;
  223. /* test if key in bucket hits its own location,
  224. * if not, the key hash was subject to collision */
  225. {
  226. bool hit = false;
  227. ibf_get_indices (ibf,
  228. ibf->key_sum[i],
  229. buckets);
  230. for (int j = 0; j < ibf->hash_num; j++)
  231. if (buckets[j] == i)
  232. {
  233. hit = true;
  234. break;
  235. }
  236. if (! hit)
  237. continue;
  238. }
  239. if (NULL != ret_side)
  240. *ret_side = ibf->count[i].count_val;
  241. if (NULL != ret_id)
  242. *ret_id = ibf->key_sum[i];
  243. /* insert on the opposite side, effectively removing the element */
  244. ibf_insert_into (ibf,
  245. ibf->key_sum[i], buckets,
  246. -ibf->count[i].count_val);
  247. return GNUNET_YES;
  248. }
  249. if (GNUNET_YES == ibf_is_empty (ibf))
  250. return GNUNET_NO;
  251. return GNUNET_SYSERR;
  252. }
  253. /**
  254. * Write buckets from an ibf to a buffer.
  255. * Exactly (IBF_BUCKET_SIZE*ibf->size) bytes are written to buf.
  256. *
  257. * @param ibf the ibf to write
  258. * @param start with which bucket to start
  259. * @param count how many buckets to write
  260. * @param buf buffer to write the data to
  261. */
  262. void
  263. ibf_write_slice (const struct InvertibleBloomFilter *ibf,
  264. uint32_t start,
  265. uint32_t count,
  266. void *buf)
  267. {
  268. struct IBF_Key *key_dst;
  269. struct IBF_KeyHash *key_hash_dst;
  270. struct IBF_Count *count_dst;
  271. GNUNET_assert (start + count <= ibf->size);
  272. /* copy keys */
  273. key_dst = (struct IBF_Key *) buf;
  274. GNUNET_memcpy (key_dst,
  275. ibf->key_sum + start,
  276. count * sizeof *key_dst);
  277. key_dst += count;
  278. /* copy key hashes */
  279. key_hash_dst = (struct IBF_KeyHash *) key_dst;
  280. GNUNET_memcpy (key_hash_dst,
  281. ibf->key_hash_sum + start,
  282. count * sizeof *key_hash_dst);
  283. key_hash_dst += count;
  284. /* copy counts */
  285. count_dst = (struct IBF_Count *) key_hash_dst;
  286. GNUNET_memcpy (count_dst,
  287. ibf->count + start,
  288. count * sizeof *count_dst);
  289. }
  290. /**
  291. * Read buckets from a buffer into an ibf.
  292. *
  293. * @param buf pointer to the buffer to read from
  294. * @param start which bucket to start at
  295. * @param count how many buckets to read
  296. * @param ibf the ibf to read from
  297. */
  298. void
  299. ibf_read_slice (const void *buf,
  300. uint32_t start,
  301. uint32_t count,
  302. struct InvertibleBloomFilter *ibf)
  303. {
  304. struct IBF_Key *key_src;
  305. struct IBF_KeyHash *key_hash_src;
  306. struct IBF_Count *count_src;
  307. GNUNET_assert (count > 0);
  308. GNUNET_assert (start + count <= ibf->size);
  309. /* copy keys */
  310. key_src = (struct IBF_Key *) buf;
  311. GNUNET_memcpy (ibf->key_sum + start,
  312. key_src,
  313. count * sizeof *key_src);
  314. key_src += count;
  315. /* copy key hashes */
  316. key_hash_src = (struct IBF_KeyHash *) key_src;
  317. GNUNET_memcpy (ibf->key_hash_sum + start,
  318. key_hash_src,
  319. count * sizeof *key_hash_src);
  320. key_hash_src += count;
  321. /* copy counts */
  322. count_src = (struct IBF_Count *) key_hash_src;
  323. GNUNET_memcpy (ibf->count + start,
  324. count_src,
  325. count * sizeof *count_src);
  326. }
  327. /**
  328. * Subtract ibf2 from ibf1, storing the result in ibf1.
  329. * The two IBF's must have the same parameters size and hash_num.
  330. *
  331. * @param ibf1 IBF that is subtracted from
  332. * @param ibf2 IBF that will be subtracted from ibf1
  333. */
  334. void
  335. ibf_subtract (struct InvertibleBloomFilter *ibf1,
  336. const struct InvertibleBloomFilter *ibf2)
  337. {
  338. GNUNET_assert (ibf1->size == ibf2->size);
  339. GNUNET_assert (ibf1->hash_num == ibf2->hash_num);
  340. for (uint32_t i = 0; i < ibf1->size; i++)
  341. {
  342. ibf1->count[i].count_val -= ibf2->count[i].count_val;
  343. ibf1->key_hash_sum[i].key_hash_val ^= ibf2->key_hash_sum[i].key_hash_val;
  344. ibf1->key_sum[i].key_val ^= ibf2->key_sum[i].key_val;
  345. }
  346. }
  347. /**
  348. * Create a copy of an IBF, the copy has to be destroyed properly.
  349. *
  350. * @param ibf the IBF to copy
  351. */
  352. struct InvertibleBloomFilter *
  353. ibf_dup (const struct InvertibleBloomFilter *ibf)
  354. {
  355. struct InvertibleBloomFilter *copy;
  356. copy = GNUNET_malloc (sizeof *copy);
  357. copy->hash_num = ibf->hash_num;
  358. copy->size = ibf->size;
  359. copy->key_hash_sum = GNUNET_memdup (ibf->key_hash_sum,
  360. ibf->size * sizeof(struct IBF_KeyHash));
  361. copy->key_sum = GNUNET_memdup (ibf->key_sum,
  362. ibf->size * sizeof(struct IBF_Key));
  363. copy->count = GNUNET_memdup (ibf->count,
  364. ibf->size * sizeof(struct IBF_Count));
  365. return copy;
  366. }
  367. /**
  368. * Destroy all resources associated with the invertible bloom filter.
  369. * No more ibf_*-functions may be called on ibf after calling destroy.
  370. *
  371. * @param ibf the intertible bloom filter to destroy
  372. */
  373. void
  374. ibf_destroy (struct InvertibleBloomFilter *ibf)
  375. {
  376. GNUNET_free (ibf->key_sum);
  377. GNUNET_free (ibf->key_hash_sum);
  378. GNUNET_free (ibf->count);
  379. GNUNET_free (ibf);
  380. }