ibf_sim.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2013 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file set/ibf_sim.c
  18. * @brief implementation of simulation for invertible bloom filter
  19. * @author Florian Dold
  20. *
  21. * This code was used for some internal experiments, it is not
  22. * build or shipped as part of the GNUnet system.
  23. */
  24. #include <stdlib.h>
  25. #include <stdio.h>
  26. #include <string.h>
  27. #define MAX_IBF_DECODE 16
  28. /* report average over how many rounds? */
  29. #define ROUNDS 100000
  30. /* enable one of the three below */
  31. // simple fix
  32. #define FIX1 0
  33. // possibly slightly better fix for large IBF_DECODE values
  34. #define FIX2 1
  35. // SIGCOMM algorithm
  36. #define STRATA 0
  37. // print each value?
  38. #define VERBOSE 0
  39. // avoid assembly? (ASM is about 50% faster)
  40. #define SLOW 0
  41. int
  42. main(int argc, char **argv)
  43. {
  44. unsigned int round;
  45. unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31
  46. unsigned int i;
  47. int j;
  48. unsigned int r;
  49. unsigned int ret;
  50. unsigned long long total;
  51. unsigned int want;
  52. double predict;
  53. srandom (time (NULL));
  54. total = 0;
  55. want = atoi (argv[1]);
  56. for (round=0;round<ROUNDS;round++)
  57. {
  58. memset (buckets, 0, sizeof (buckets));
  59. for (i=0;i<want;i++)
  60. {
  61. /* FIXME: might want to use 'better' PRNG to avoid
  62. PRNG-induced biases */
  63. r = random ();
  64. if (0 == r)
  65. continue;
  66. #if SLOW
  67. for (j=0;(j < 31) && (0 == (r & (1 << j)));j++) ;
  68. #else
  69. /* use assembly / gcc */
  70. j = __builtin_ffs (r) - 1;
  71. #endif
  72. buckets[j]++;
  73. }
  74. ret = 0;
  75. predict = 0.0;
  76. for (j=31;j >= 0; j--)
  77. {
  78. #if FIX1
  79. /* improved algorithm, for 1000 elements with IBF-DECODE 8, I
  80. get 990/1000 elements on average over 1 million runs; key
  81. idea being to stop short of the 'last' possible IBF as
  82. otherwise a "lowball" per-chance would unduely influence the
  83. result */
  84. if ( (j > 0) &&
  85. (buckets[j - 1] > MAX_IBF_DECODE) )
  86. {
  87. ret *= (1 << (j + 1));
  88. break;
  89. }
  90. #endif
  91. #if FIX2
  92. /* another improvement: don't just always cut off the last one,
  93. but rather try to predict based on all previous values where
  94. that "last" one is; additional prediction can only really
  95. work if MAX_IBF_DECODE is sufficiently high */
  96. if ( (j > 0) &&
  97. ( (buckets[j - 1] > MAX_IBF_DECODE) ||
  98. (predict > MAX_IBF_DECODE) ) )
  99. {
  100. ret *= (1 << (j + 1));
  101. break;
  102. }
  103. #endif
  104. #if STRATA
  105. /* original algorithm, for 1000 elements with IBF-DECODE 8,
  106. I get 920/1000 elements on average over 1 million runs */
  107. if (buckets[j] > MAX_IBF_DECODE)
  108. {
  109. ret *= (1 << (j+1));
  110. break;
  111. }
  112. #endif
  113. ret += buckets[j];
  114. predict = (buckets[j] + 2.0 * predict) / 2.0;
  115. }
  116. #if VERBOSE
  117. fprintf (stderr, "%u ", ret);
  118. #endif
  119. total += ret;
  120. }
  121. fprintf (stderr, "\n");
  122. fprintf (stdout, "average %llu\n", total / ROUNDS);
  123. return 0;
  124. }
  125. /* TODO: should calculate stddev of the results to also be able to
  126. say something about the stability of the results, outside of
  127. large-scale averages -- gaining 8% precision at the expense of
  128. 50% additional variance might not be worth it... */