container_bloomfilter.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. /*
  2. This file is part of GNUnet.
  3. Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file util/container_bloomfilter.c
  18. * @brief data structure used to reduce disk accesses.
  19. *
  20. * The idea basically: Create a signature for each element in the
  21. * database. Add those signatures to a bit array. When doing a lookup,
  22. * check if the bit array matches the signature of the requested
  23. * element. If yes, address the disk, otherwise return 'not found'.
  24. *
  25. * A property of the bloom filter is that sometimes we will have
  26. * a match even if the element is not on the disk (then we do
  27. * an unnecessary disk access), but what's most important is that
  28. * we never get a single "false negative".
  29. *
  30. * To be able to delete entries from the bloom filter, we maintain
  31. * a 4 bit counter in the file on the drive (we still use only one
  32. * bit in memory).
  33. *
  34. * @author Igor Wronsky
  35. * @author Christian Grothoff
  36. */
  37. #include "platform.h"
  38. #include "gnunet_util_lib.h"
  39. #define LOG(kind,...) GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
  40. #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
  41. #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util-container-bloomfilter", syscall, filename)
  42. struct GNUNET_CONTAINER_BloomFilter
  43. {
  44. /**
  45. * The actual bloomfilter bit array
  46. */
  47. char *bitArray;
  48. /**
  49. * Filename of the filter
  50. */
  51. char *filename;
  52. /**
  53. * The bit counter file on disk
  54. */
  55. struct GNUNET_DISK_FileHandle *fh;
  56. /**
  57. * How many bits we set for each stored element
  58. */
  59. unsigned int addressesPerElement;
  60. /**
  61. * Size of bitArray in bytes
  62. */
  63. size_t bitArraySize;
  64. };
  65. /**
  66. * Get the number of the addresses set per element in the bloom filter.
  67. *
  68. * @param bf the filter
  69. * @return addresses set per element in the bf
  70. */
  71. size_t
  72. GNUNET_CONTAINER_bloomfilter_get_element_addresses (const struct GNUNET_CONTAINER_BloomFilter
  73. *bf)
  74. {
  75. if (bf == NULL)
  76. return 0;
  77. return bf->addressesPerElement;
  78. }
  79. /**
  80. * Get size of the bloom filter.
  81. *
  82. * @param bf the filter
  83. * @return number of bytes used for the data of the bloom filter
  84. */
  85. size_t
  86. GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
  87. *bf)
  88. {
  89. if (bf == NULL)
  90. return 0;
  91. return bf->bitArraySize;
  92. }
  93. /**
  94. * Copy an existing memory. Any association with a file
  95. * on-disk will be lost in the process.
  96. * @param bf the filter to copy
  97. * @return copy of the bf
  98. */
  99. struct GNUNET_CONTAINER_BloomFilter *
  100. GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
  101. *bf)
  102. {
  103. return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
  104. bf->addressesPerElement);
  105. }
  106. /**
  107. * Sets a bit active in the bitArray. Increment bit-specific
  108. * usage counter on disk only if below 4bit max (==15).
  109. *
  110. * @param bitArray memory area to set the bit in
  111. * @param bitIdx which bit to set
  112. */
  113. static void
  114. setBit (char *bitArray, unsigned int bitIdx)
  115. {
  116. size_t arraySlot;
  117. unsigned int targetBit;
  118. arraySlot = bitIdx / 8;
  119. targetBit = (1L << (bitIdx % 8));
  120. bitArray[arraySlot] |= targetBit;
  121. }
  122. /**
  123. * Clears a bit from bitArray. Bit is cleared from the array
  124. * only if the respective usage counter on the disk hits/is zero.
  125. *
  126. * @param bitArray memory area to set the bit in
  127. * @param bitIdx which bit to unset
  128. */
  129. static void
  130. clearBit (char *bitArray, unsigned int bitIdx)
  131. {
  132. size_t slot;
  133. unsigned int targetBit;
  134. slot = bitIdx / 8;
  135. targetBit = (1L << (bitIdx % 8));
  136. bitArray[slot] = bitArray[slot] & (~targetBit);
  137. }
  138. /**
  139. * Checks if a bit is active in the bitArray
  140. *
  141. * @param bitArray memory area to set the bit in
  142. * @param bitIdx which bit to test
  143. * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
  144. */
  145. static int
  146. testBit (char *bitArray, unsigned int bitIdx)
  147. {
  148. size_t slot;
  149. unsigned int targetBit;
  150. slot = bitIdx / 8;
  151. targetBit = (1L << (bitIdx % 8));
  152. if (bitArray[slot] & targetBit)
  153. return GNUNET_YES;
  154. else
  155. return GNUNET_NO;
  156. }
  157. /**
  158. * Sets a bit active in the bitArray and increments
  159. * bit-specific usage counter on disk (but only if
  160. * the counter was below 4 bit max (==15)).
  161. *
  162. * @param bitArray memory area to set the bit in
  163. * @param bitIdx which bit to test
  164. * @param fh A file to keep the 4 bit address usage counters in
  165. */
  166. static void
  167. incrementBit (char *bitArray, unsigned int bitIdx,
  168. const struct GNUNET_DISK_FileHandle *fh)
  169. {
  170. off_t fileSlot;
  171. unsigned char value;
  172. unsigned int high;
  173. unsigned int low;
  174. unsigned int targetLoc;
  175. setBit (bitArray, bitIdx);
  176. if (GNUNET_DISK_handle_invalid (fh))
  177. return;
  178. /* Update the counter file on disk */
  179. fileSlot = bitIdx / 2;
  180. targetLoc = bitIdx % 2;
  181. GNUNET_assert (fileSlot ==
  182. GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
  183. if (1 != GNUNET_DISK_file_read (fh, &value, 1))
  184. value = 0;
  185. low = value & 0xF;
  186. high = (value & (~0xF)) >> 4;
  187. if (targetLoc == 0)
  188. {
  189. if (low < 0xF)
  190. low++;
  191. }
  192. else
  193. {
  194. if (high < 0xF)
  195. high++;
  196. }
  197. value = ((high << 4) | low);
  198. GNUNET_assert (fileSlot ==
  199. GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
  200. GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
  201. }
  202. /**
  203. * Clears a bit from bitArray if the respective usage
  204. * counter on the disk hits/is zero.
  205. *
  206. * @param bitArray memory area to set the bit in
  207. * @param bitIdx which bit to test
  208. * @param fh A file to keep the 4bit address usage counters in
  209. */
  210. static void
  211. decrementBit (char *bitArray, unsigned int bitIdx,
  212. const struct GNUNET_DISK_FileHandle *fh)
  213. {
  214. off_t fileslot;
  215. unsigned char value;
  216. unsigned int high;
  217. unsigned int low;
  218. unsigned int targetLoc;
  219. if (GNUNET_DISK_handle_invalid (fh))
  220. return; /* cannot decrement! */
  221. /* Each char slot in the counter file holds two 4 bit counters */
  222. fileslot = bitIdx / 2;
  223. targetLoc = bitIdx % 2;
  224. if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
  225. {
  226. GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
  227. return;
  228. }
  229. if (1 != GNUNET_DISK_file_read (fh, &value, 1))
  230. value = 0;
  231. low = value & 0xF;
  232. high = (value & 0xF0) >> 4;
  233. /* decrement, but once we have reached the max, never go back! */
  234. if (targetLoc == 0)
  235. {
  236. if ((low > 0) && (low < 0xF))
  237. low--;
  238. if (low == 0)
  239. {
  240. clearBit (bitArray, bitIdx);
  241. }
  242. }
  243. else
  244. {
  245. if ((high > 0) && (high < 0xF))
  246. high--;
  247. if (high == 0)
  248. {
  249. clearBit (bitArray, bitIdx);
  250. }
  251. }
  252. value = ((high << 4) | low);
  253. if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
  254. {
  255. GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
  256. return;
  257. }
  258. GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
  259. }
  260. #define BUFFSIZE 65536
  261. /**
  262. * Creates a file filled with zeroes
  263. *
  264. * @param fh the file handle
  265. * @param size the size of the file
  266. * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
  267. */
  268. static int
  269. make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
  270. {
  271. char buffer[BUFFSIZE];
  272. size_t bytesleft = size;
  273. int res = 0;
  274. if (GNUNET_DISK_handle_invalid (fh))
  275. return GNUNET_SYSERR;
  276. memset (buffer, 0, sizeof (buffer));
  277. GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
  278. while (bytesleft > 0)
  279. {
  280. if (bytesleft > sizeof (buffer))
  281. {
  282. res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
  283. if (res >= 0)
  284. bytesleft -= res;
  285. }
  286. else
  287. {
  288. res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
  289. if (res >= 0)
  290. bytesleft -= res;
  291. }
  292. if (GNUNET_SYSERR == res)
  293. return GNUNET_SYSERR;
  294. }
  295. return GNUNET_OK;
  296. }
  297. /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
  298. /**
  299. * Iterator (callback) method to be called by the
  300. * bloomfilter iterator on each bit that is to be
  301. * set or tested for the key.
  302. *
  303. * @param cls closure
  304. * @param bf the filter to manipulate
  305. * @param bit the current bit
  306. * @return GNUNET_YES to continue, GNUNET_NO to stop early
  307. */
  308. typedef int (*BitIterator) (void *cls,
  309. const struct GNUNET_CONTAINER_BloomFilter * bf,
  310. unsigned int bit);
  311. /**
  312. * Call an iterator for each bit that the bloomfilter
  313. * must test or set for this element.
  314. *
  315. * @param bf the filter
  316. * @param callback the method to call
  317. * @param arg extra argument to callback
  318. * @param key the key for which we iterate over the BF bits
  319. */
  320. static void
  321. iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
  322. BitIterator callback, void *arg, const struct GNUNET_HashCode *key)
  323. {
  324. struct GNUNET_HashCode tmp[2];
  325. int bitCount;
  326. unsigned int round;
  327. unsigned int slot = 0;
  328. bitCount = bf->addressesPerElement;
  329. tmp[0] = *key;
  330. round = 0;
  331. GNUNET_assert (bf->bitArraySize > 0);
  332. GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
  333. while (bitCount > 0)
  334. {
  335. while (slot < (sizeof (struct GNUNET_HashCode) / sizeof (uint32_t)))
  336. {
  337. if (GNUNET_YES !=
  338. callback (arg, bf,
  339. ntohl ((((uint32_t *) & tmp[round & 1])[slot])) %
  340. ((bf->bitArraySize * 8LL))))
  341. return;
  342. slot++;
  343. bitCount--;
  344. if (bitCount == 0)
  345. break;
  346. }
  347. if (bitCount > 0)
  348. {
  349. GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (struct GNUNET_HashCode),
  350. &tmp[(round + 1) & 1]);
  351. round++;
  352. slot = 0;
  353. }
  354. }
  355. }
  356. /**
  357. * Callback: increment bit
  358. *
  359. * @param cls pointer to writeable form of bf
  360. * @param bf the filter to manipulate
  361. * @param bit the bit to increment
  362. * @return GNUNET_YES
  363. */
  364. static int
  365. incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
  366. unsigned int bit)
  367. {
  368. struct GNUNET_CONTAINER_BloomFilter *b = cls;
  369. incrementBit (b->bitArray, bit, bf->fh);
  370. return GNUNET_YES;
  371. }
  372. /**
  373. * Callback: decrement bit
  374. *
  375. * @param cls pointer to writeable form of bf
  376. * @param bf the filter to manipulate
  377. * @param bit the bit to decrement
  378. * @return GNUNET_YES
  379. */
  380. static int
  381. decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
  382. unsigned int bit)
  383. {
  384. struct GNUNET_CONTAINER_BloomFilter *b = cls;
  385. decrementBit (b->bitArray, bit, bf->fh);
  386. return GNUNET_YES;
  387. }
  388. /**
  389. * Callback: test if all bits are set
  390. *
  391. * @param cls pointer set to GNUNET_NO if bit is not set
  392. * @param bf the filter
  393. * @param bit the bit to test
  394. * @return YES if the bit is set, NO if not
  395. */
  396. static int
  397. testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
  398. unsigned int bit)
  399. {
  400. int *arg = cls;
  401. if (GNUNET_NO == testBit (bf->bitArray, bit))
  402. {
  403. *arg = GNUNET_NO;
  404. return GNUNET_NO;
  405. }
  406. return GNUNET_YES;
  407. }
  408. /* *********************** INTERFACE **************** */
  409. /**
  410. * Load a bloom-filter from a file.
  411. *
  412. * @param filename the name of the file (or the prefix)
  413. * @param size the size of the bloom-filter (number of
  414. * bytes of storage space to use); will be rounded up
  415. * to next power of 2
  416. * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
  417. * element (number of bits set per element in the set)
  418. * @return the bloomfilter
  419. */
  420. struct GNUNET_CONTAINER_BloomFilter *
  421. GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
  422. unsigned int k)
  423. {
  424. struct GNUNET_CONTAINER_BloomFilter *bf;
  425. char *rbuff;
  426. off_t pos;
  427. int i;
  428. size_t ui;
  429. off_t fsize;
  430. int must_read;
  431. GNUNET_assert (NULL != filename);
  432. if ((k == 0) || (size == 0))
  433. return NULL;
  434. if (size < BUFFSIZE)
  435. size = BUFFSIZE;
  436. ui = 1;
  437. while ( (ui < size) &&
  438. (ui * 2 > ui) )
  439. ui *= 2;
  440. size = ui; /* make sure it's a power of 2 */
  441. bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
  442. /* Try to open a bloomfilter file */
  443. if (GNUNET_YES == GNUNET_DISK_file_test (filename))
  444. bf->fh =
  445. GNUNET_DISK_file_open (filename,
  446. GNUNET_DISK_OPEN_READWRITE,
  447. GNUNET_DISK_PERM_USER_READ |
  448. GNUNET_DISK_PERM_USER_WRITE);
  449. if (NULL != bf->fh)
  450. {
  451. /* file existed, try to read it! */
  452. must_read = GNUNET_YES;
  453. if (GNUNET_OK !=
  454. GNUNET_DISK_file_handle_size (bf->fh, &fsize))
  455. {
  456. GNUNET_DISK_file_close (bf->fh);
  457. GNUNET_free (bf);
  458. return NULL;
  459. }
  460. if (0 == fsize)
  461. {
  462. /* found existing empty file, just overwrite */
  463. if (GNUNET_OK !=
  464. make_empty_file (bf->fh, size * 4LL))
  465. {
  466. GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
  467. "write");
  468. GNUNET_DISK_file_close (bf->fh);
  469. GNUNET_free (bf);
  470. return NULL;
  471. }
  472. }
  473. else if (fsize != ((off_t) size) * 4LL)
  474. {
  475. GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
  476. _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
  477. (unsigned long long) (size * 4LL),
  478. (unsigned long long) fsize);
  479. GNUNET_DISK_file_close (bf->fh);
  480. GNUNET_free (bf);
  481. return NULL;
  482. }
  483. }
  484. else
  485. {
  486. /* file did not exist, don't read, just create */
  487. must_read = GNUNET_NO;
  488. bf->fh =
  489. GNUNET_DISK_file_open (filename,
  490. GNUNET_DISK_OPEN_CREATE |
  491. GNUNET_DISK_OPEN_READWRITE,
  492. GNUNET_DISK_PERM_USER_READ |
  493. GNUNET_DISK_PERM_USER_WRITE);
  494. if (NULL == bf->fh)
  495. {
  496. GNUNET_free (bf);
  497. return NULL;
  498. }
  499. if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
  500. {
  501. GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
  502. "write");
  503. GNUNET_DISK_file_close (bf->fh);
  504. GNUNET_free (bf);
  505. return NULL;
  506. }
  507. }
  508. bf->filename = GNUNET_strdup (filename);
  509. /* Alloc block */
  510. bf->bitArray = GNUNET_malloc_large (size);
  511. if (NULL == bf->bitArray)
  512. {
  513. if (NULL != bf->fh)
  514. GNUNET_DISK_file_close (bf->fh);
  515. GNUNET_free (bf->filename);
  516. GNUNET_free (bf);
  517. return NULL;
  518. }
  519. bf->bitArraySize = size;
  520. bf->addressesPerElement = k;
  521. if (GNUNET_YES != must_read)
  522. return bf; /* already done! */
  523. /* Read from the file what bits we can */
  524. rbuff = GNUNET_malloc (BUFFSIZE);
  525. pos = 0;
  526. while (pos < ((off_t) size) * 8LL)
  527. {
  528. int res;
  529. res = GNUNET_DISK_file_read (bf->fh,
  530. rbuff,
  531. BUFFSIZE);
  532. if (res == -1)
  533. {
  534. LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING,
  535. "read",
  536. bf->filename);
  537. GNUNET_free (rbuff);
  538. GNUNET_free (bf->filename);
  539. GNUNET_DISK_file_close (bf->fh);
  540. GNUNET_free (bf);
  541. return NULL;
  542. }
  543. if (res == 0)
  544. break; /* is ok! we just did not use that many bits yet */
  545. for (i = 0; i < res; i++)
  546. {
  547. if ((rbuff[i] & 0x0F) != 0)
  548. setBit (bf->bitArray, pos + i * 2);
  549. if ((rbuff[i] & 0xF0) != 0)
  550. setBit (bf->bitArray, pos + i * 2 + 1);
  551. }
  552. if (res < BUFFSIZE)
  553. break;
  554. pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
  555. }
  556. GNUNET_free (rbuff);
  557. return bf;
  558. }
  559. /**
  560. * Create a bloom filter from raw bits.
  561. *
  562. * @param data the raw bits in memory (maybe NULL,
  563. * in which case all bits should be considered
  564. * to be zero).
  565. * @param size the size of the bloom-filter (number of
  566. * bytes of storage space to use); also size of data
  567. * -- unless data is NULL
  568. * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
  569. * element (number of bits set per element in the set)
  570. * @return the bloomfilter
  571. */
  572. struct GNUNET_CONTAINER_BloomFilter *
  573. GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
  574. unsigned int k)
  575. {
  576. struct GNUNET_CONTAINER_BloomFilter *bf;
  577. if ((0 == k) || (0 == size))
  578. return NULL;
  579. bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
  580. bf->filename = NULL;
  581. bf->fh = NULL;
  582. bf->bitArray = GNUNET_malloc_large (size);
  583. if (NULL == bf->bitArray)
  584. {
  585. GNUNET_free (bf);
  586. return NULL;
  587. }
  588. bf->bitArraySize = size;
  589. bf->addressesPerElement = k;
  590. if (NULL != data)
  591. GNUNET_memcpy (bf->bitArray, data, size);
  592. return bf;
  593. }
  594. /**
  595. * Copy the raw data of this bloomfilter into
  596. * the given data array.
  597. *
  598. * @param bf bloomfilter to take the raw data from
  599. * @param data where to write the data
  600. * @param size the size of the given data array
  601. * @return #GNUNET_SYSERR if the data array is not big enough
  602. */
  603. int
  604. GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
  605. GNUNET_CONTAINER_BloomFilter *bf,
  606. char *data, size_t size)
  607. {
  608. if (NULL == bf)
  609. return GNUNET_SYSERR;
  610. if (bf->bitArraySize != size)
  611. return GNUNET_SYSERR;
  612. GNUNET_memcpy (data, bf->bitArray, size);
  613. return GNUNET_OK;
  614. }
  615. /**
  616. * Free the space associated with a filter
  617. * in memory, flush to drive if needed (do not
  618. * free the space on the drive)
  619. *
  620. * @param bf the filter
  621. */
  622. void
  623. GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
  624. {
  625. if (NULL == bf)
  626. return;
  627. if (bf->fh != NULL)
  628. GNUNET_DISK_file_close (bf->fh);
  629. GNUNET_free_non_null (bf->filename);
  630. GNUNET_free (bf->bitArray);
  631. GNUNET_free (bf);
  632. }
  633. /**
  634. * Reset a bloom filter to empty. Clears the file on disk.
  635. *
  636. * @param bf the filter
  637. */
  638. void
  639. GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
  640. {
  641. if (NULL == bf)
  642. return;
  643. memset (bf->bitArray, 0, bf->bitArraySize);
  644. if (bf->filename != NULL)
  645. make_empty_file (bf->fh, bf->bitArraySize * 4LL);
  646. }
  647. /**
  648. * Test if an element is in the filter.
  649. *
  650. * @param e the element
  651. * @param bf the filter
  652. * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
  653. */
  654. int
  655. GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter *bf,
  656. const struct GNUNET_HashCode * e)
  657. {
  658. int res;
  659. if (NULL == bf)
  660. return GNUNET_YES;
  661. res = GNUNET_YES;
  662. iterateBits (bf, &testBitCallback, &res, e);
  663. return res;
  664. }
  665. /**
  666. * Add an element to the filter
  667. *
  668. * @param bf the filter
  669. * @param e the element
  670. */
  671. void
  672. GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
  673. const struct GNUNET_HashCode * e)
  674. {
  675. if (NULL == bf)
  676. return;
  677. iterateBits (bf, &incrementBitCallback, bf, e);
  678. }
  679. /**
  680. * Or the entries of the given raw data array with the
  681. * data of the given bloom filter. Assumes that
  682. * the size of the data array and the current filter
  683. * match.
  684. *
  685. * @param bf the filter
  686. * @param data the data to or-in
  687. * @param size number of bytes in data
  688. */
  689. int
  690. GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
  691. const char *data,
  692. size_t size)
  693. {
  694. unsigned int i;
  695. unsigned int n;
  696. unsigned long long *fc;
  697. const unsigned long long *dc;
  698. if (NULL == bf)
  699. return GNUNET_YES;
  700. if (bf->bitArraySize != size)
  701. return GNUNET_SYSERR;
  702. fc = (unsigned long long *) bf->bitArray;
  703. dc = (const unsigned long long *) data;
  704. n = size / sizeof (unsigned long long);
  705. for (i = 0; i < n; i++)
  706. fc[i] |= dc[i];
  707. for (i = n * sizeof (unsigned long long); i < size; i++)
  708. bf->bitArray[i] |= data[i];
  709. return GNUNET_OK;
  710. }
  711. /**
  712. * Or the entries of the given raw data array with the
  713. * data of the given bloom filter. Assumes that
  714. * the size of the data array and the current filter
  715. * match.
  716. *
  717. * @param bf the filter
  718. * @param to_or the bloomfilter to or-in
  719. * @return #GNUNET_OK on success
  720. */
  721. int
  722. GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
  723. const struct GNUNET_CONTAINER_BloomFilter *to_or)
  724. {
  725. unsigned int i;
  726. unsigned int n;
  727. unsigned long long *fc;
  728. const unsigned long long *dc;
  729. size_t size;
  730. if (NULL == bf)
  731. return GNUNET_OK;
  732. if (bf->bitArraySize != to_or->bitArraySize)
  733. {
  734. GNUNET_break (0);
  735. return GNUNET_SYSERR;
  736. }
  737. size = bf->bitArraySize;
  738. fc = (unsigned long long *) bf->bitArray;
  739. dc = (const unsigned long long *) to_or->bitArray;
  740. n = size / sizeof (unsigned long long);
  741. for (i = 0; i < n; i++)
  742. fc[i] |= dc[i];
  743. for (i = n * sizeof (unsigned long long); i < size; i++)
  744. bf->bitArray[i] |= to_or->bitArray[i];
  745. return GNUNET_OK;
  746. }
  747. /**
  748. * Remove an element from the filter.
  749. *
  750. * @param bf the filter
  751. * @param e the element to remove
  752. */
  753. void
  754. GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
  755. const struct GNUNET_HashCode *e)
  756. {
  757. if (NULL == bf)
  758. return;
  759. if (NULL == bf->filename)
  760. return;
  761. iterateBits (bf,
  762. &decrementBitCallback,
  763. bf,
  764. e);
  765. }
  766. /**
  767. * Resize a bloom filter. Note that this operation
  768. * is pretty costly. Essentially, the bloom filter
  769. * needs to be completely re-build.
  770. *
  771. * @param bf the filter
  772. * @param iterator an iterator over all elements stored in the BF
  773. * @param iterator_cls argument to the iterator function
  774. * @param size the new size for the filter
  775. * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
  776. */
  777. void
  778. GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
  779. GNUNET_CONTAINER_HashCodeIterator iterator,
  780. void *iterator_cls,
  781. size_t size,
  782. unsigned int k)
  783. {
  784. struct GNUNET_HashCode hc;
  785. unsigned int i;
  786. GNUNET_free (bf->bitArray);
  787. i = 1;
  788. while (i < size)
  789. i *= 2;
  790. size = i; /* make sure it's a power of 2 */
  791. bf->addressesPerElement = k;
  792. bf->bitArraySize = size;
  793. bf->bitArray = GNUNET_malloc (size);
  794. if (NULL != bf->filename)
  795. make_empty_file (bf->fh,
  796. bf->bitArraySize * 4LL);
  797. while (GNUNET_YES == iterator (iterator_cls,
  798. &hc))
  799. GNUNET_CONTAINER_bloomfilter_add (bf,
  800. &hc);
  801. }
  802. /* end of container_bloomfilter.c */