ibf.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2012 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file set/ibf.c
  18. * @brief implementation of the invertible bloom filter
  19. * @author Florian Dold
  20. */
  21. #include "ibf.h"
  22. /**
  23. * Compute the key's hash from the key.
  24. * Redefine to use a different hash function.
  25. */
  26. #define IBF_KEY_HASH_VAL(k) (GNUNET_CRYPTO_crc32_n (&(k), sizeof(struct \
  27. IBF_KeyHash)))
  28. /**
  29. * Create a key from a hashcode.
  30. *
  31. * @param hash the hashcode
  32. * @return a key
  33. */
  34. struct IBF_Key
  35. ibf_key_from_hashcode (const struct GNUNET_HashCode *hash)
  36. {
  37. return *(struct IBF_Key *) hash;
  38. }
  39. /**
  40. * Create a hashcode from a key, by replicating the key
  41. * until the hascode is filled
  42. *
  43. * @param key the key
  44. * @param dst hashcode to store the result in
  45. */
  46. void
  47. ibf_hashcode_from_key (struct IBF_Key key,
  48. struct GNUNET_HashCode *dst)
  49. {
  50. struct IBF_Key *p;
  51. unsigned int i;
  52. const unsigned int keys_per_hashcode = sizeof(struct GNUNET_HashCode)
  53. / sizeof(struct IBF_Key);
  54. p = (struct IBF_Key *) dst;
  55. for (i = 0; i < keys_per_hashcode; i++)
  56. *p++ = key;
  57. }
  58. /**
  59. * Create an invertible bloom filter.
  60. *
  61. * @param size number of IBF buckets
  62. * @param hash_num number of buckets one element is hashed in
  63. * @return the newly created invertible bloom filter, NULL on error
  64. */
  65. struct InvertibleBloomFilter *
  66. ibf_create (uint32_t size, uint8_t hash_num)
  67. {
  68. struct InvertibleBloomFilter *ibf;
  69. GNUNET_assert (0 != size);
  70. ibf = GNUNET_new (struct InvertibleBloomFilter);
  71. ibf->count = GNUNET_malloc_large (size * sizeof(uint8_t));
  72. if (NULL == ibf->count)
  73. {
  74. GNUNET_free (ibf);
  75. return NULL;
  76. }
  77. ibf->key_sum = GNUNET_malloc_large (size * sizeof(struct IBF_Key));
  78. if (NULL == ibf->key_sum)
  79. {
  80. GNUNET_free (ibf->count);
  81. GNUNET_free (ibf);
  82. return NULL;
  83. }
  84. ibf->key_hash_sum = GNUNET_malloc_large (size * sizeof(struct IBF_KeyHash));
  85. if (NULL == ibf->key_hash_sum)
  86. {
  87. GNUNET_free (ibf->key_sum);
  88. GNUNET_free (ibf->count);
  89. GNUNET_free (ibf);
  90. return NULL;
  91. }
  92. ibf->size = size;
  93. ibf->hash_num = hash_num;
  94. return ibf;
  95. }
  96. /**
  97. * Store unique bucket indices for the specified key in dst.
  98. */
  99. static void
  100. ibf_get_indices (const struct InvertibleBloomFilter *ibf,
  101. struct IBF_Key key,
  102. int *dst)
  103. {
  104. uint32_t filled;
  105. uint32_t i;
  106. uint32_t bucket;
  107. bucket = GNUNET_CRYPTO_crc32_n (&key, sizeof key);
  108. for (i = 0, filled = 0; filled < ibf->hash_num; i++)
  109. {
  110. unsigned int j;
  111. uint64_t x;
  112. for (j = 0; j < filled; j++)
  113. if (dst[j] == bucket)
  114. goto try_next;
  115. dst[filled++] = bucket % ibf->size;
  116. try_next:;
  117. x = ((uint64_t) bucket << 32) | i;
  118. bucket = GNUNET_CRYPTO_crc32_n (&x, sizeof x);
  119. }
  120. }
  121. static void
  122. ibf_insert_into (struct InvertibleBloomFilter *ibf,
  123. struct IBF_Key key,
  124. const int *buckets, int side)
  125. {
  126. int i;
  127. for (i = 0; i < ibf->hash_num; i++)
  128. {
  129. const int bucket = buckets[i];
  130. ibf->count[bucket].count_val += side;
  131. ibf->key_sum[bucket].key_val ^= key.key_val;
  132. ibf->key_hash_sum[bucket].key_hash_val
  133. ^= IBF_KEY_HASH_VAL (key);
  134. }
  135. }
  136. /**
  137. * Insert a key into an IBF.
  138. *
  139. * @param ibf the IBF
  140. * @param key the element's hash code
  141. */
  142. void
  143. ibf_insert (struct InvertibleBloomFilter *ibf, struct IBF_Key key)
  144. {
  145. int buckets[ibf->hash_num];
  146. GNUNET_assert (ibf->hash_num <= ibf->size);
  147. ibf_get_indices (ibf, key, buckets);
  148. ibf_insert_into (ibf, key, buckets, 1);
  149. }
  150. /**
  151. * Remove a key from an IBF.
  152. *
  153. * @param ibf the IBF
  154. * @param key the element's hash code
  155. */
  156. void
  157. ibf_remove (struct InvertibleBloomFilter *ibf, struct IBF_Key key)
  158. {
  159. int buckets[ibf->hash_num];
  160. GNUNET_assert (ibf->hash_num <= ibf->size);
  161. ibf_get_indices (ibf, key, buckets);
  162. ibf_insert_into (ibf, key, buckets, -1);
  163. }
  164. /**
  165. * Test is the IBF is empty, i.e. all counts, keys and key hashes are zero.
  166. */
  167. static int
  168. ibf_is_empty (struct InvertibleBloomFilter *ibf)
  169. {
  170. int i;
  171. for (i = 0; i < ibf->size; i++)
  172. {
  173. if (0 != ibf->count[i].count_val)
  174. return GNUNET_NO;
  175. if (0 != ibf->key_hash_sum[i].key_hash_val)
  176. return GNUNET_NO;
  177. if (0 != ibf->key_sum[i].key_val)
  178. return GNUNET_NO;
  179. }
  180. return GNUNET_YES;
  181. }
  182. /**
  183. * Decode and remove an element from the IBF, if possible.
  184. *
  185. * @param ibf the invertible bloom filter to decode
  186. * @param ret_side sign of the cell's count where the decoded element came from.
  187. * A negative sign indicates that the element was recovered
  188. * resides in an IBF that was previously subtracted from.
  189. * @param ret_id receives the hash code of the decoded element, if successful
  190. * @return GNUNET_YES if decoding an element was successful,
  191. * GNUNET_NO if the IBF is empty,
  192. * GNUNET_SYSERR if the decoding has failed
  193. */
  194. int
  195. ibf_decode (struct InvertibleBloomFilter *ibf,
  196. int *ret_side, struct IBF_Key *ret_id)
  197. {
  198. struct IBF_KeyHash hash;
  199. int i;
  200. int buckets[ibf->hash_num];
  201. GNUNET_assert (NULL != ibf);
  202. for (i = 0; i < ibf->size; i++)
  203. {
  204. int j;
  205. int hit;
  206. /* we can only decode from pure buckets */
  207. if ((1 != ibf->count[i].count_val) && (-1 != ibf->count[i].count_val))
  208. continue;
  209. hash.key_hash_val = IBF_KEY_HASH_VAL (ibf->key_sum[i]);
  210. /* test if the hash matches the key */
  211. if (hash.key_hash_val != ibf->key_hash_sum[i].key_hash_val)
  212. continue;
  213. /* test if key in bucket hits its own location,
  214. * if not, the key hash was subject to collision */
  215. hit = GNUNET_NO;
  216. ibf_get_indices (ibf, ibf->key_sum[i], buckets);
  217. for (j = 0; j < ibf->hash_num; j++)
  218. if (buckets[j] == i)
  219. hit = GNUNET_YES;
  220. if (GNUNET_NO == hit)
  221. continue;
  222. if (NULL != ret_side)
  223. *ret_side = ibf->count[i].count_val;
  224. if (NULL != ret_id)
  225. *ret_id = ibf->key_sum[i];
  226. /* insert on the opposite side, effectively removing the element */
  227. ibf_insert_into (ibf, ibf->key_sum[i], buckets, -ibf->count[i].count_val);
  228. return GNUNET_YES;
  229. }
  230. if (GNUNET_YES == ibf_is_empty (ibf))
  231. return GNUNET_NO;
  232. return GNUNET_SYSERR;
  233. }
  234. /**
  235. * Write buckets from an ibf to a buffer.
  236. * Exactly (IBF_BUCKET_SIZE*ibf->size) bytes are written to buf.
  237. *
  238. * @param ibf the ibf to write
  239. * @param start with which bucket to start
  240. * @param count how many buckets to write
  241. * @param buf buffer to write the data to
  242. */
  243. void
  244. ibf_write_slice (const struct InvertibleBloomFilter *ibf, uint32_t start,
  245. uint32_t count, void *buf)
  246. {
  247. struct IBF_Key *key_dst;
  248. struct IBF_KeyHash *key_hash_dst;
  249. struct IBF_Count *count_dst;
  250. GNUNET_assert (start + count <= ibf->size);
  251. /* copy keys */
  252. key_dst = (struct IBF_Key *) buf;
  253. GNUNET_memcpy (key_dst, ibf->key_sum + start, count * sizeof *key_dst);
  254. key_dst += count;
  255. /* copy key hashes */
  256. key_hash_dst = (struct IBF_KeyHash *) key_dst;
  257. GNUNET_memcpy (key_hash_dst, ibf->key_hash_sum + start, count
  258. * sizeof *key_hash_dst);
  259. key_hash_dst += count;
  260. /* copy counts */
  261. count_dst = (struct IBF_Count *) key_hash_dst;
  262. GNUNET_memcpy (count_dst, ibf->count + start, count * sizeof *count_dst);
  263. }
  264. /**
  265. * Read buckets from a buffer into an ibf.
  266. *
  267. * @param buf pointer to the buffer to read from
  268. * @param start which bucket to start at
  269. * @param count how many buckets to read
  270. * @param ibf the ibf to read from
  271. */
  272. void
  273. ibf_read_slice (const void *buf, uint32_t start, uint32_t count, struct
  274. InvertibleBloomFilter *ibf)
  275. {
  276. struct IBF_Key *key_src;
  277. struct IBF_KeyHash *key_hash_src;
  278. struct IBF_Count *count_src;
  279. GNUNET_assert (count > 0);
  280. GNUNET_assert (start + count <= ibf->size);
  281. /* copy keys */
  282. key_src = (struct IBF_Key *) buf;
  283. GNUNET_memcpy (ibf->key_sum + start, key_src, count * sizeof *key_src);
  284. key_src += count;
  285. /* copy key hashes */
  286. key_hash_src = (struct IBF_KeyHash *) key_src;
  287. GNUNET_memcpy (ibf->key_hash_sum + start, key_hash_src, count
  288. * sizeof *key_hash_src);
  289. key_hash_src += count;
  290. /* copy counts */
  291. count_src = (struct IBF_Count *) key_hash_src;
  292. GNUNET_memcpy (ibf->count + start, count_src, count * sizeof *count_src);
  293. }
  294. /**
  295. * Subtract ibf2 from ibf1, storing the result in ibf1.
  296. * The two IBF's must have the same parameters size and hash_num.
  297. *
  298. * @param ibf1 IBF that is subtracted from
  299. * @param ibf2 IBF that will be subtracted from ibf1
  300. */
  301. void
  302. ibf_subtract (struct InvertibleBloomFilter *ibf1, const struct
  303. InvertibleBloomFilter *ibf2)
  304. {
  305. int i;
  306. GNUNET_assert (ibf1->size == ibf2->size);
  307. GNUNET_assert (ibf1->hash_num == ibf2->hash_num);
  308. for (i = 0; i < ibf1->size; i++)
  309. {
  310. ibf1->count[i].count_val -= ibf2->count[i].count_val;
  311. ibf1->key_hash_sum[i].key_hash_val ^= ibf2->key_hash_sum[i].key_hash_val;
  312. ibf1->key_sum[i].key_val ^= ibf2->key_sum[i].key_val;
  313. }
  314. }
  315. /**
  316. * Create a copy of an IBF, the copy has to be destroyed properly.
  317. *
  318. * @param ibf the IBF to copy
  319. */
  320. struct InvertibleBloomFilter *
  321. ibf_dup (const struct InvertibleBloomFilter *ibf)
  322. {
  323. struct InvertibleBloomFilter *copy;
  324. copy = GNUNET_malloc (sizeof *copy);
  325. copy->hash_num = ibf->hash_num;
  326. copy->size = ibf->size;
  327. copy->key_hash_sum = GNUNET_memdup (ibf->key_hash_sum, ibf->size
  328. * sizeof(struct IBF_KeyHash));
  329. copy->key_sum = GNUNET_memdup (ibf->key_sum, ibf->size * sizeof(struct
  330. IBF_Key));
  331. copy->count = GNUNET_memdup (ibf->count, ibf->size * sizeof(struct
  332. IBF_Count));
  333. return copy;
  334. }
  335. /**
  336. * Destroy all resources associated with the invertible bloom filter.
  337. * No more ibf_*-functions may be called on ibf after calling destroy.
  338. *
  339. * @param ibf the intertible bloom filter to destroy
  340. */
  341. void
  342. ibf_destroy (struct InvertibleBloomFilter *ibf)
  343. {
  344. GNUNET_free (ibf->key_sum);
  345. GNUNET_free (ibf->key_hash_sum);
  346. GNUNET_free (ibf->count);
  347. GNUNET_free (ibf);
  348. }