container_bloomfilter.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /*
  2. This file is part of GNUnet.
  3. Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file util/container_bloomfilter.c
  18. * @brief data structure used to reduce disk accesses.
  19. *
  20. * The idea basically: Create a signature for each element in the
  21. * database. Add those signatures to a bit array. When doing a lookup,
  22. * check if the bit array matches the signature of the requested
  23. * element. If yes, address the disk, otherwise return 'not found'.
  24. *
  25. * A property of the bloom filter is that sometimes we will have
  26. * a match even if the element is not on the disk (then we do
  27. * an unnecessary disk access), but what's most important is that
  28. * we never get a single "false negative".
  29. *
  30. * To be able to delete entries from the bloom filter, we maintain
  31. * a 4 bit counter in the file on the drive (we still use only one
  32. * bit in memory).
  33. *
  34. * @author Igor Wronsky
  35. * @author Christian Grothoff
  36. */
  37. #include "platform.h"
  38. #include "gnunet_util_lib.h"
  39. #define LOG(kind, ...) \
  40. GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
  41. #define LOG_STRERROR(kind, syscall) \
  42. GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
  43. #define LOG_STRERROR_FILE(kind, syscall, filename) \
  44. GNUNET_log_from_strerror_file (kind, \
  45. "util-container-bloomfilter", \
  46. syscall, \
  47. filename)
  48. struct GNUNET_CONTAINER_BloomFilter
  49. {
  50. /**
  51. * The actual bloomfilter bit array
  52. */
  53. char *bitArray;
  54. /**
  55. * Filename of the filter
  56. */
  57. char *filename;
  58. /**
  59. * The bit counter file on disk
  60. */
  61. struct GNUNET_DISK_FileHandle *fh;
  62. /**
  63. * How many bits we set for each stored element
  64. */
  65. unsigned int addressesPerElement;
  66. /**
  67. * Size of bitArray in bytes
  68. */
  69. size_t bitArraySize;
  70. };
  71. /**
  72. * Get the number of the addresses set per element in the bloom filter.
  73. *
  74. * @param bf the filter
  75. * @return addresses set per element in the bf
  76. */
  77. size_t
  78. GNUNET_CONTAINER_bloomfilter_get_element_addresses (
  79. const struct GNUNET_CONTAINER_BloomFilter *bf)
  80. {
  81. if (bf == NULL)
  82. return 0;
  83. return bf->addressesPerElement;
  84. }
  85. /**
  86. * Get size of the bloom filter.
  87. *
  88. * @param bf the filter
  89. * @return number of bytes used for the data of the bloom filter
  90. */
  91. size_t
  92. GNUNET_CONTAINER_bloomfilter_get_size (
  93. const struct GNUNET_CONTAINER_BloomFilter *bf)
  94. {
  95. if (bf == NULL)
  96. return 0;
  97. return bf->bitArraySize;
  98. }
  99. /**
  100. * Copy an existing memory. Any association with a file
  101. * on-disk will be lost in the process.
  102. * @param bf the filter to copy
  103. * @return copy of the bf
  104. */
  105. struct GNUNET_CONTAINER_BloomFilter *
  106. GNUNET_CONTAINER_bloomfilter_copy (
  107. const struct GNUNET_CONTAINER_BloomFilter *bf)
  108. {
  109. return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray,
  110. bf->bitArraySize,
  111. bf->addressesPerElement);
  112. }
  113. /**
  114. * Sets a bit active in the bitArray. Increment bit-specific
  115. * usage counter on disk only if below 4bit max (==15).
  116. *
  117. * @param bitArray memory area to set the bit in
  118. * @param bitIdx which bit to set
  119. */
  120. static void
  121. setBit (char *bitArray, unsigned int bitIdx)
  122. {
  123. size_t arraySlot;
  124. unsigned int targetBit;
  125. arraySlot = bitIdx / 8;
  126. targetBit = (1L << (bitIdx % 8));
  127. bitArray[arraySlot] |= targetBit;
  128. }
  129. /**
  130. * Clears a bit from bitArray. Bit is cleared from the array
  131. * only if the respective usage counter on the disk hits/is zero.
  132. *
  133. * @param bitArray memory area to set the bit in
  134. * @param bitIdx which bit to unset
  135. */
  136. static void
  137. clearBit (char *bitArray, unsigned int bitIdx)
  138. {
  139. size_t slot;
  140. unsigned int targetBit;
  141. slot = bitIdx / 8;
  142. targetBit = (1L << (bitIdx % 8));
  143. bitArray[slot] = bitArray[slot] & (~targetBit);
  144. }
  145. /**
  146. * Checks if a bit is active in the bitArray
  147. *
  148. * @param bitArray memory area to set the bit in
  149. * @param bitIdx which bit to test
  150. * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
  151. */
  152. static int
  153. testBit (char *bitArray, unsigned int bitIdx)
  154. {
  155. size_t slot;
  156. unsigned int targetBit;
  157. slot = bitIdx / 8;
  158. targetBit = (1L << (bitIdx % 8));
  159. if (bitArray[slot] & targetBit)
  160. return GNUNET_YES;
  161. else
  162. return GNUNET_NO;
  163. }
  164. /**
  165. * Sets a bit active in the bitArray and increments
  166. * bit-specific usage counter on disk (but only if
  167. * the counter was below 4 bit max (==15)).
  168. *
  169. * @param bitArray memory area to set the bit in
  170. * @param bitIdx which bit to test
  171. * @param fh A file to keep the 4 bit address usage counters in
  172. */
  173. static void
  174. incrementBit (char *bitArray,
  175. unsigned int bitIdx,
  176. const struct GNUNET_DISK_FileHandle *fh)
  177. {
  178. off_t fileSlot;
  179. unsigned char value;
  180. unsigned int high;
  181. unsigned int low;
  182. unsigned int targetLoc;
  183. setBit (bitArray, bitIdx);
  184. if (GNUNET_DISK_handle_invalid (fh))
  185. return;
  186. /* Update the counter file on disk */
  187. fileSlot = bitIdx / 2;
  188. targetLoc = bitIdx % 2;
  189. GNUNET_assert (fileSlot ==
  190. GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
  191. if (1 != GNUNET_DISK_file_read (fh, &value, 1))
  192. value = 0;
  193. low = value & 0xF;
  194. high = (value & (~0xF)) >> 4;
  195. if (targetLoc == 0)
  196. {
  197. if (low < 0xF)
  198. low++;
  199. }
  200. else
  201. {
  202. if (high < 0xF)
  203. high++;
  204. }
  205. value = ((high << 4) | low);
  206. GNUNET_assert (fileSlot ==
  207. GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
  208. GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
  209. }
  210. /**
  211. * Clears a bit from bitArray if the respective usage
  212. * counter on the disk hits/is zero.
  213. *
  214. * @param bitArray memory area to set the bit in
  215. * @param bitIdx which bit to test
  216. * @param fh A file to keep the 4bit address usage counters in
  217. */
  218. static void
  219. decrementBit (char *bitArray,
  220. unsigned int bitIdx,
  221. const struct GNUNET_DISK_FileHandle *fh)
  222. {
  223. off_t fileslot;
  224. unsigned char value;
  225. unsigned int high;
  226. unsigned int low;
  227. unsigned int targetLoc;
  228. if (GNUNET_DISK_handle_invalid (fh))
  229. return; /* cannot decrement! */
  230. /* Each char slot in the counter file holds two 4 bit counters */
  231. fileslot = bitIdx / 2;
  232. targetLoc = bitIdx % 2;
  233. if (GNUNET_SYSERR ==
  234. GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
  235. {
  236. GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
  237. return;
  238. }
  239. if (1 != GNUNET_DISK_file_read (fh, &value, 1))
  240. value = 0;
  241. low = value & 0xF;
  242. high = (value & 0xF0) >> 4;
  243. /* decrement, but once we have reached the max, never go back! */
  244. if (targetLoc == 0)
  245. {
  246. if ((low > 0) && (low < 0xF))
  247. low--;
  248. if (low == 0)
  249. {
  250. clearBit (bitArray, bitIdx);
  251. }
  252. }
  253. else
  254. {
  255. if ((high > 0) && (high < 0xF))
  256. high--;
  257. if (high == 0)
  258. {
  259. clearBit (bitArray, bitIdx);
  260. }
  261. }
  262. value = ((high << 4) | low);
  263. if (GNUNET_SYSERR ==
  264. GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
  265. {
  266. GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
  267. return;
  268. }
  269. GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
  270. }
  271. #define BUFFSIZE 65536
  272. /**
  273. * Creates a file filled with zeroes
  274. *
  275. * @param fh the file handle
  276. * @param size the size of the file
  277. * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
  278. */
  279. static int
  280. make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
  281. {
  282. char buffer[BUFFSIZE];
  283. size_t bytesleft = size;
  284. int res = 0;
  285. if (GNUNET_DISK_handle_invalid (fh))
  286. return GNUNET_SYSERR;
  287. memset (buffer, 0, sizeof(buffer));
  288. GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
  289. while (bytesleft > 0)
  290. {
  291. if (bytesleft > sizeof(buffer))
  292. {
  293. res = GNUNET_DISK_file_write (fh, buffer, sizeof(buffer));
  294. if (res >= 0)
  295. bytesleft -= res;
  296. }
  297. else
  298. {
  299. res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
  300. if (res >= 0)
  301. bytesleft -= res;
  302. }
  303. if (GNUNET_SYSERR == res)
  304. return GNUNET_SYSERR;
  305. }
  306. return GNUNET_OK;
  307. }
  308. /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
  309. /**
  310. * Iterator (callback) method to be called by the
  311. * bloomfilter iterator on each bit that is to be
  312. * set or tested for the key.
  313. *
  314. * @param cls closure
  315. * @param bf the filter to manipulate
  316. * @param bit the current bit
  317. * @return GNUNET_YES to continue, GNUNET_NO to stop early
  318. */
  319. typedef int (*BitIterator) (void *cls,
  320. const struct GNUNET_CONTAINER_BloomFilter *bf,
  321. unsigned int bit);
  322. /**
  323. * Call an iterator for each bit that the bloomfilter
  324. * must test or set for this element.
  325. *
  326. * @param bf the filter
  327. * @param callback the method to call
  328. * @param arg extra argument to callback
  329. * @param key the key for which we iterate over the BF bits
  330. */
  331. static void
  332. iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
  333. BitIterator callback,
  334. void *arg,
  335. const struct GNUNET_HashCode *key)
  336. {
  337. struct GNUNET_HashCode tmp[2];
  338. int bitCount;
  339. unsigned int round;
  340. unsigned int slot = 0;
  341. bitCount = bf->addressesPerElement;
  342. tmp[0] = *key;
  343. round = 0;
  344. GNUNET_assert (bf->bitArraySize > 0);
  345. GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
  346. while (bitCount > 0)
  347. {
  348. while (slot < (sizeof(struct GNUNET_HashCode) / sizeof(uint32_t)))
  349. {
  350. if (GNUNET_YES !=
  351. callback (arg,
  352. bf,
  353. ntohl ((((uint32_t *) &tmp[round & 1])[slot]))
  354. % ((bf->bitArraySize * 8LL))))
  355. return;
  356. slot++;
  357. bitCount--;
  358. if (bitCount == 0)
  359. break;
  360. }
  361. if (bitCount > 0)
  362. {
  363. GNUNET_CRYPTO_hash (&tmp[round & 1],
  364. sizeof(struct GNUNET_HashCode),
  365. &tmp[(round + 1) & 1]);
  366. round++;
  367. slot = 0;
  368. }
  369. }
  370. }
  371. /**
  372. * Callback: increment bit
  373. *
  374. * @param cls pointer to writeable form of bf
  375. * @param bf the filter to manipulate
  376. * @param bit the bit to increment
  377. * @return GNUNET_YES
  378. */
  379. static int
  380. incrementBitCallback (void *cls,
  381. const struct GNUNET_CONTAINER_BloomFilter *bf,
  382. unsigned int bit)
  383. {
  384. struct GNUNET_CONTAINER_BloomFilter *b = cls;
  385. incrementBit (b->bitArray, bit, bf->fh);
  386. return GNUNET_YES;
  387. }
  388. /**
  389. * Callback: decrement bit
  390. *
  391. * @param cls pointer to writeable form of bf
  392. * @param bf the filter to manipulate
  393. * @param bit the bit to decrement
  394. * @return GNUNET_YES
  395. */
  396. static int
  397. decrementBitCallback (void *cls,
  398. const struct GNUNET_CONTAINER_BloomFilter *bf,
  399. unsigned int bit)
  400. {
  401. struct GNUNET_CONTAINER_BloomFilter *b = cls;
  402. decrementBit (b->bitArray, bit, bf->fh);
  403. return GNUNET_YES;
  404. }
  405. /**
  406. * Callback: test if all bits are set
  407. *
  408. * @param cls pointer set to GNUNET_NO if bit is not set
  409. * @param bf the filter
  410. * @param bit the bit to test
  411. * @return YES if the bit is set, NO if not
  412. */
  413. static int
  414. testBitCallback (void *cls,
  415. const struct GNUNET_CONTAINER_BloomFilter *bf,
  416. unsigned int bit)
  417. {
  418. int *arg = cls;
  419. if (GNUNET_NO == testBit (bf->bitArray, bit))
  420. {
  421. *arg = GNUNET_NO;
  422. return GNUNET_NO;
  423. }
  424. return GNUNET_YES;
  425. }
  426. /* *********************** INTERFACE **************** */
  427. /**
  428. * Load a bloom-filter from a file.
  429. *
  430. * @param filename the name of the file (or the prefix)
  431. * @param size the size of the bloom-filter (number of
  432. * bytes of storage space to use); will be rounded up
  433. * to next power of 2
  434. * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
  435. * element (number of bits set per element in the set)
  436. * @return the bloomfilter
  437. */
  438. struct GNUNET_CONTAINER_BloomFilter *
  439. GNUNET_CONTAINER_bloomfilter_load (const char *filename,
  440. size_t size,
  441. unsigned int k)
  442. {
  443. struct GNUNET_CONTAINER_BloomFilter *bf;
  444. char *rbuff;
  445. off_t pos;
  446. int i;
  447. size_t ui;
  448. off_t fsize;
  449. int must_read;
  450. GNUNET_assert (NULL != filename);
  451. if ((k == 0) || (size == 0))
  452. return NULL;
  453. if (size < BUFFSIZE)
  454. size = BUFFSIZE;
  455. ui = 1;
  456. while ((ui < size) && (ui * 2 > ui))
  457. ui *= 2;
  458. size = ui; /* make sure it's a power of 2 */
  459. bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
  460. /* Try to open a bloomfilter file */
  461. if (GNUNET_YES == GNUNET_DISK_file_test (filename))
  462. bf->fh = GNUNET_DISK_file_open (filename,
  463. GNUNET_DISK_OPEN_READWRITE,
  464. GNUNET_DISK_PERM_USER_READ
  465. | GNUNET_DISK_PERM_USER_WRITE);
  466. if (NULL != bf->fh)
  467. {
  468. /* file existed, try to read it! */
  469. must_read = GNUNET_YES;
  470. if (GNUNET_OK != GNUNET_DISK_file_handle_size (bf->fh, &fsize))
  471. {
  472. GNUNET_DISK_file_close (bf->fh);
  473. GNUNET_free (bf);
  474. return NULL;
  475. }
  476. if (0 == fsize)
  477. {
  478. /* found existing empty file, just overwrite */
  479. if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
  480. {
  481. GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write");
  482. GNUNET_DISK_file_close (bf->fh);
  483. GNUNET_free (bf);
  484. return NULL;
  485. }
  486. }
  487. else if (fsize != ((off_t) size) * 4LL)
  488. {
  489. GNUNET_log (
  490. GNUNET_ERROR_TYPE_ERROR,
  491. _ (
  492. "Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
  493. (unsigned long long) (size * 4LL),
  494. (unsigned long long) fsize);
  495. GNUNET_DISK_file_close (bf->fh);
  496. GNUNET_free (bf);
  497. return NULL;
  498. }
  499. }
  500. else
  501. {
  502. /* file did not exist, don't read, just create */
  503. must_read = GNUNET_NO;
  504. bf->fh = GNUNET_DISK_file_open (filename,
  505. GNUNET_DISK_OPEN_CREATE
  506. | GNUNET_DISK_OPEN_READWRITE,
  507. GNUNET_DISK_PERM_USER_READ
  508. | GNUNET_DISK_PERM_USER_WRITE);
  509. if (NULL == bf->fh)
  510. {
  511. GNUNET_free (bf);
  512. return NULL;
  513. }
  514. if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
  515. {
  516. GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write");
  517. GNUNET_DISK_file_close (bf->fh);
  518. GNUNET_free (bf);
  519. return NULL;
  520. }
  521. }
  522. bf->filename = GNUNET_strdup (filename);
  523. /* Alloc block */
  524. bf->bitArray = GNUNET_malloc_large (size);
  525. if (NULL == bf->bitArray)
  526. {
  527. if (NULL != bf->fh)
  528. GNUNET_DISK_file_close (bf->fh);
  529. GNUNET_free (bf->filename);
  530. GNUNET_free (bf);
  531. return NULL;
  532. }
  533. bf->bitArraySize = size;
  534. bf->addressesPerElement = k;
  535. if (GNUNET_YES != must_read)
  536. return bf; /* already done! */
  537. /* Read from the file what bits we can */
  538. rbuff = GNUNET_malloc (BUFFSIZE);
  539. pos = 0;
  540. while (pos < ((off_t) size) * 8LL)
  541. {
  542. int res;
  543. res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
  544. if (res == -1)
  545. {
  546. LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename);
  547. GNUNET_free (rbuff);
  548. GNUNET_free (bf->filename);
  549. GNUNET_DISK_file_close (bf->fh);
  550. GNUNET_free (bf);
  551. return NULL;
  552. }
  553. if (res == 0)
  554. break; /* is ok! we just did not use that many bits yet */
  555. for (i = 0; i < res; i++)
  556. {
  557. if ((rbuff[i] & 0x0F) != 0)
  558. setBit (bf->bitArray, pos + i * 2);
  559. if ((rbuff[i] & 0xF0) != 0)
  560. setBit (bf->bitArray, pos + i * 2 + 1);
  561. }
  562. if (res < BUFFSIZE)
  563. break;
  564. pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
  565. }
  566. GNUNET_free (rbuff);
  567. return bf;
  568. }
  569. /**
  570. * Create a bloom filter from raw bits.
  571. *
  572. * @param data the raw bits in memory (maybe NULL,
  573. * in which case all bits should be considered
  574. * to be zero).
  575. * @param size the size of the bloom-filter (number of
  576. * bytes of storage space to use); also size of data
  577. * -- unless data is NULL
  578. * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
  579. * element (number of bits set per element in the set)
  580. * @return the bloomfilter
  581. */
  582. struct GNUNET_CONTAINER_BloomFilter *
  583. GNUNET_CONTAINER_bloomfilter_init (const char *data,
  584. size_t size,
  585. unsigned int k)
  586. {
  587. struct GNUNET_CONTAINER_BloomFilter *bf;
  588. if ((0 == k) || (0 == size))
  589. return NULL;
  590. bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
  591. bf->filename = NULL;
  592. bf->fh = NULL;
  593. bf->bitArray = GNUNET_malloc_large (size);
  594. if (NULL == bf->bitArray)
  595. {
  596. GNUNET_free (bf);
  597. return NULL;
  598. }
  599. bf->bitArraySize = size;
  600. bf->addressesPerElement = k;
  601. if (NULL != data)
  602. GNUNET_memcpy (bf->bitArray, data, size);
  603. return bf;
  604. }
  605. /**
  606. * Copy the raw data of this bloomfilter into
  607. * the given data array.
  608. *
  609. * @param bf bloomfilter to take the raw data from
  610. * @param data where to write the data
  611. * @param size the size of the given data array
  612. * @return #GNUNET_SYSERR if the data array is not big enough
  613. */
  614. int
  615. GNUNET_CONTAINER_bloomfilter_get_raw_data (
  616. const struct GNUNET_CONTAINER_BloomFilter *bf,
  617. char *data,
  618. size_t size)
  619. {
  620. if (NULL == bf)
  621. return GNUNET_SYSERR;
  622. if (bf->bitArraySize != size)
  623. return GNUNET_SYSERR;
  624. GNUNET_memcpy (data, bf->bitArray, size);
  625. return GNUNET_OK;
  626. }
  627. /**
  628. * Free the space associated with a filter
  629. * in memory, flush to drive if needed (do not
  630. * free the space on the drive)
  631. *
  632. * @param bf the filter
  633. */
  634. void
  635. GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
  636. {
  637. if (NULL == bf)
  638. return;
  639. if (bf->fh != NULL)
  640. GNUNET_DISK_file_close (bf->fh);
  641. GNUNET_free (bf->filename);
  642. GNUNET_free (bf->bitArray);
  643. GNUNET_free (bf);
  644. }
  645. /**
  646. * Reset a bloom filter to empty. Clears the file on disk.
  647. *
  648. * @param bf the filter
  649. */
  650. void
  651. GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
  652. {
  653. if (NULL == bf)
  654. return;
  655. memset (bf->bitArray, 0, bf->bitArraySize);
  656. if (bf->filename != NULL)
  657. make_empty_file (bf->fh, bf->bitArraySize * 4LL);
  658. }
  659. /**
  660. * Test if an element is in the filter.
  661. *
  662. * @param e the element
  663. * @param bf the filter
  664. * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
  665. */
  666. int
  667. GNUNET_CONTAINER_bloomfilter_test (
  668. const struct GNUNET_CONTAINER_BloomFilter *bf,
  669. const struct GNUNET_HashCode *e)
  670. {
  671. int res;
  672. if (NULL == bf)
  673. return GNUNET_YES;
  674. res = GNUNET_YES;
  675. iterateBits (bf, &testBitCallback, &res, e);
  676. return res;
  677. }
  678. /**
  679. * Add an element to the filter
  680. *
  681. * @param bf the filter
  682. * @param e the element
  683. */
  684. void
  685. GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
  686. const struct GNUNET_HashCode *e)
  687. {
  688. if (NULL == bf)
  689. return;
  690. iterateBits (bf, &incrementBitCallback, bf, e);
  691. }
  692. /**
  693. * Or the entries of the given raw data array with the
  694. * data of the given bloom filter. Assumes that
  695. * the size of the data array and the current filter
  696. * match.
  697. *
  698. * @param bf the filter
  699. * @param data the data to or-in
  700. * @param size number of bytes in data
  701. */
  702. int
  703. GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
  704. const char *data,
  705. size_t size)
  706. {
  707. unsigned int i;
  708. unsigned int n;
  709. unsigned long long *fc;
  710. const unsigned long long *dc;
  711. if (NULL == bf)
  712. return GNUNET_YES;
  713. if (bf->bitArraySize != size)
  714. return GNUNET_SYSERR;
  715. fc = (unsigned long long *) bf->bitArray;
  716. dc = (const unsigned long long *) data;
  717. n = size / sizeof(unsigned long long);
  718. for (i = 0; i < n; i++)
  719. fc[i] |= dc[i];
  720. for (i = n * sizeof(unsigned long long); i < size; i++)
  721. bf->bitArray[i] |= data[i];
  722. return GNUNET_OK;
  723. }
  724. /**
  725. * Or the entries of the given raw data array with the
  726. * data of the given bloom filter. Assumes that
  727. * the size of the data array and the current filter
  728. * match.
  729. *
  730. * @param bf the filter
  731. * @param to_or the bloomfilter to or-in
  732. * @return #GNUNET_OK on success
  733. */
  734. int
  735. GNUNET_CONTAINER_bloomfilter_or2 (
  736. struct GNUNET_CONTAINER_BloomFilter *bf,
  737. const struct GNUNET_CONTAINER_BloomFilter *to_or)
  738. {
  739. unsigned int i;
  740. unsigned int n;
  741. unsigned long long *fc;
  742. const unsigned long long *dc;
  743. size_t size;
  744. if (NULL == bf)
  745. return GNUNET_OK;
  746. if (bf->bitArraySize != to_or->bitArraySize)
  747. {
  748. GNUNET_break (0);
  749. return GNUNET_SYSERR;
  750. }
  751. size = bf->bitArraySize;
  752. fc = (unsigned long long *) bf->bitArray;
  753. dc = (const unsigned long long *) to_or->bitArray;
  754. n = size / sizeof(unsigned long long);
  755. for (i = 0; i < n; i++)
  756. fc[i] |= dc[i];
  757. for (i = n * sizeof(unsigned long long); i < size; i++)
  758. bf->bitArray[i] |= to_or->bitArray[i];
  759. return GNUNET_OK;
  760. }
  761. /**
  762. * Remove an element from the filter.
  763. *
  764. * @param bf the filter
  765. * @param e the element to remove
  766. */
  767. void
  768. GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
  769. const struct GNUNET_HashCode *e)
  770. {
  771. if (NULL == bf)
  772. return;
  773. if (NULL == bf->filename)
  774. return;
  775. iterateBits (bf, &decrementBitCallback, bf, e);
  776. }
  777. /**
  778. * Resize a bloom filter. Note that this operation
  779. * is pretty costly. Essentially, the bloom filter
  780. * needs to be completely re-build.
  781. *
  782. * @param bf the filter
  783. * @param iterator an iterator over all elements stored in the BF
  784. * @param iterator_cls argument to the iterator function
  785. * @param size the new size for the filter
  786. * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
  787. */
  788. void
  789. GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
  790. GNUNET_CONTAINER_HashCodeIterator iterator,
  791. void *iterator_cls,
  792. size_t size,
  793. unsigned int k)
  794. {
  795. struct GNUNET_HashCode hc;
  796. unsigned int i;
  797. GNUNET_free (bf->bitArray);
  798. i = 1;
  799. while (i < size)
  800. i *= 2;
  801. size = i; /* make sure it's a power of 2 */
  802. bf->addressesPerElement = k;
  803. bf->bitArraySize = size;
  804. bf->bitArray = GNUNET_malloc (size);
  805. if (NULL != bf->filename)
  806. make_empty_file (bf->fh, bf->bitArraySize * 4LL);
  807. while (GNUNET_YES == iterator (iterator_cls, &hc))
  808. GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
  809. }
  810. /* end of container_bloomfilter.c */