bloom.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /*
  2. * Bloom filter tracking which scores are present in our arenas
  3. * and (more importantly) which are not.
  4. */
  5. #include "stdinc.h"
  6. #include "dat.h"
  7. #include "fns.h"
  8. int ignorebloom;
  9. int
  10. bloominit(Bloom *b, vlong vsize, u8int *data)
  11. {
  12. ulong size;
  13. size = vsize;
  14. if(size != vsize){ /* truncation */
  15. werrstr("bloom data too big");
  16. return -1;
  17. }
  18. b->size = size;
  19. b->nhash = 32; /* will be fixed by caller on initialization */
  20. if(data != nil)
  21. if(unpackbloomhead(b, data) < 0)
  22. return -1;
  23. b->bitmask = (b->size<<3) - 1;
  24. b->data = data;
  25. return 0;
  26. }
  27. void
  28. wbbloomhead(Bloom *b)
  29. {
  30. packbloomhead(b, b->data);
  31. }
  32. Bloom*
  33. readbloom(Part *p)
  34. {
  35. uchar buf[512];
  36. Bloom *b;
  37. b = vtmallocz(sizeof *b);
  38. if(readpart(p, 0, buf, sizeof buf) < 0)
  39. return nil;
  40. /*
  41. * pass buf as b->data so that bloominit
  42. * can parse header. won't be used for
  43. * accessing bits (cleared below).
  44. */
  45. if(bloominit(b, 0, buf) < 0){
  46. vtfree(b);
  47. freepart(p);
  48. return nil;
  49. }else{
  50. /*
  51. * default block size is system page size.
  52. * the bloom filter is usually very big.
  53. * bump the block size up to speed i/o.
  54. */
  55. if(p->blocksize < (1<<20)){
  56. p->blocksize = 1<<20;
  57. if(p->blocksize > p->size)
  58. p->blocksize = p->size;
  59. }
  60. }
  61. b->part = p;
  62. b->data = nil;
  63. return b;
  64. }
  65. int
  66. resetbloom(Bloom *b)
  67. {
  68. uchar *data;
  69. data = vtmallocz(b->size);
  70. b->data = data;
  71. if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
  72. addstat(StatBloomBits, b->size*8-1);
  73. else
  74. addstat(StatBloomBits, b->size*8);
  75. return 0;
  76. }
  77. int
  78. loadbloom(Bloom *b)
  79. {
  80. int i, n;
  81. uint ones;
  82. uchar *data;
  83. u32int *a;
  84. data = vtmallocz(b->size);
  85. if(readpart(b->part, 0, data, b->size) < 0){
  86. vtfree(b);
  87. vtfree(data);
  88. return -1;
  89. }
  90. b->data = data;
  91. a = (u32int*)b->data;
  92. n = b->size/4;
  93. ones = 0;
  94. for(i=0; i<n; i++)
  95. ones += countbits(a[i]);
  96. addstat(StatBloomOnes, ones);
  97. if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
  98. addstat(StatBloomBits, b->size*8-1);
  99. else
  100. addstat(StatBloomBits, b->size*8);
  101. return 0;
  102. }
  103. int
  104. writebloom(Bloom *b)
  105. {
  106. wbbloomhead(b);
  107. if(writepart(b->part, 0, b->data, b->size) < 0)
  108. return -1;
  109. if(flushpart(b->part) < 0)
  110. return -1;
  111. return 0;
  112. }
  113. /*
  114. * Derive two random 32-bit quantities a, b from the score
  115. * and then use a+b*i as a sequence of bloom filter indices.
  116. * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
  117. * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
  118. */
  119. static void
  120. gethashes(u8int *score, ulong *h)
  121. {
  122. int i;
  123. u32int a, b;
  124. a = 0;
  125. b = 0;
  126. for(i=4; i+8<=VtScoreSize; i+=8){
  127. a ^= *(u32int*)(score+i);
  128. b ^= *(u32int*)(score+i+4);
  129. }
  130. if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
  131. a ^= *(u32int*)(score+i);
  132. for(i=0; i<BloomMaxHash; i++, a+=b)
  133. h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
  134. }
  135. static void
  136. _markbloomfilter(Bloom *b, u8int *score)
  137. {
  138. int i, nnew;
  139. ulong h[BloomMaxHash];
  140. u32int x, *y, z, *tab;
  141. trace("markbloomfilter", "markbloomfilter %V", score);
  142. gethashes(score, h);
  143. nnew = 0;
  144. tab = (u32int*)b->data;
  145. for(i=0; i<b->nhash; i++){
  146. x = h[i];
  147. y = &tab[(x&b->bitmask)>>5];
  148. z = 1<<(x&31);
  149. if(!(*y&z)){
  150. nnew++;
  151. *y |= z;
  152. }
  153. }
  154. if(nnew)
  155. addstat(StatBloomOnes, nnew);
  156. trace("markbloomfilter", "markbloomfilter exit");
  157. }
  158. static int
  159. _inbloomfilter(Bloom *b, u8int *score)
  160. {
  161. int i;
  162. ulong h[BloomMaxHash], x;
  163. u32int *tab;
  164. gethashes(score, h);
  165. tab = (u32int*)b->data;
  166. for(i=0; i<b->nhash; i++){
  167. x = h[i];
  168. if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
  169. return 0;
  170. }
  171. return 1;
  172. }
  173. int
  174. inbloomfilter(Bloom *b, u8int *score)
  175. {
  176. int r;
  177. uint ms;
  178. if(b == nil || b->data == nil)
  179. return 1;
  180. if(ignorebloom)
  181. return 1;
  182. ms = msec();
  183. rlock(&b->lk);
  184. r = _inbloomfilter(b, score);
  185. runlock(&b->lk);
  186. ms = ms - msec();
  187. addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
  188. if(r)
  189. addstat(StatBloomMiss, 1);
  190. else
  191. addstat(StatBloomHit, 1);
  192. return r;
  193. }
  194. void
  195. markbloomfilter(Bloom *b, u8int *score)
  196. {
  197. if(b == nil || b->data == nil)
  198. return;
  199. rlock(&b->lk);
  200. qlock(&b->mod);
  201. _markbloomfilter(b, score);
  202. qunlock(&b->mod);
  203. runlock(&b->lk);
  204. }
  205. static void
  206. bloomwriteproc(void *v)
  207. {
  208. int ret;
  209. Bloom *b;
  210. threadsetname("bloomwriteproc");
  211. b = v;
  212. for(;;){
  213. recv(b->writechan, 0);
  214. if((ret=writebloom(b)) < 0)
  215. fprint(2, "oops! writing bloom: %r\n");
  216. else
  217. ret = 0;
  218. sendul(b->writedonechan, ret);
  219. }
  220. }
  221. void
  222. startbloomproc(Bloom *b)
  223. {
  224. b->writechan = chancreate(sizeof(void*), 0);
  225. b->writedonechan = chancreate(sizeof(void*), 0);
  226. vtproc(bloomwriteproc, b);
  227. }