123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- /*
- * Bloom filter tracking which scores are present in our arenas
- * and (more importantly) which are not.
- */
- #include "stdinc.h"
- #include "dat.h"
- #include "fns.h"
- int ignorebloom;
- int
- bloominit(Bloom *b, vlong vsize, u8int *data)
- {
- ulong size;
-
- size = vsize;
- if(size != vsize){ /* truncation */
- werrstr("bloom data too big");
- return -1;
- }
-
- b->size = size;
- b->nhash = 32; /* will be fixed by caller on initialization */
- if(data != nil)
- if(unpackbloomhead(b, data) < 0)
- return -1;
-
- b->bitmask = (b->size<<3) - 1;
- b->data = data;
- return 0;
- }
- void
- wbbloomhead(Bloom *b)
- {
- packbloomhead(b, b->data);
- }
- Bloom*
- readbloom(Part *p)
- {
- uchar buf[512];
- Bloom *b;
-
- b = vtmallocz(sizeof *b);
- if(readpart(p, 0, buf, sizeof buf) < 0)
- return nil;
- /*
- * pass buf as b->data so that bloominit
- * can parse header. won't be used for
- * accessing bits (cleared below).
- */
- if(bloominit(b, 0, buf) < 0){
- vtfree(b);
- return nil;
- }else{
- /*
- * default block size is system page size.
- * the bloom filter is usually very big.
- * bump the block size up to speed i/o.
- */
- if(p->blocksize < (1<<20)){
- p->blocksize = 1<<20;
- if(p->blocksize > p->size)
- p->blocksize = p->size;
- }
- }
- b->part = p;
- b->data = nil;
- return b;
- }
- int
- resetbloom(Bloom *b)
- {
- uchar *data;
-
- data = vtmallocz(b->size);
- b->data = data;
- if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
- addstat(StatBloomBits, b->size*8-1);
- else
- addstat(StatBloomBits, b->size*8);
- return 0;
- }
- int
- loadbloom(Bloom *b)
- {
- int i, n;
- uint ones;
- uchar *data;
- u32int *a;
-
- data = vtmallocz(b->size);
- if(readpart(b->part, 0, data, b->size) < 0){
- vtfree(b);
- vtfree(data);
- return -1;
- }
- b->data = data;
- a = (u32int*)b->data;
- n = b->size/4;
- ones = 0;
- for(i=0; i<n; i++)
- ones += countbits(a[i]);
- addstat(StatBloomOnes, ones);
- if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
- addstat(StatBloomBits, b->size*8-1);
- else
- addstat(StatBloomBits, b->size*8);
-
- return 0;
- }
- int
- writebloom(Bloom *b)
- {
- wbbloomhead(b);
- if(writepart(b->part, 0, b->data, b->size) < 0)
- return -1;
- if(flushpart(b->part) < 0)
- return -1;
- return 0;
- }
- /*
- * Derive two random 32-bit quantities a, b from the score
- * and then use a+b*i as a sequence of bloom filter indices.
- * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
- * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
- */
- static void
- gethashes(u8int *score, ulong *h)
- {
- int i;
- u32int a, b;
- a = 0;
- b = 0;
- for(i=4; i+8<=VtScoreSize; i+=8){
- a ^= *(u32int*)(score+i);
- b ^= *(u32int*)(score+i+4);
- }
- if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
- a ^= *(u32int*)(score+i);
- for(i=0; i<BloomMaxHash; i++, a+=b)
- h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
- }
- static void
- _markbloomfilter(Bloom *b, u8int *score)
- {
- int i, nnew;
- ulong h[BloomMaxHash];
- u32int x, *y, z, *tab;
- trace("markbloomfilter", "markbloomfilter %V", score);
- gethashes(score, h);
- nnew = 0;
- tab = (u32int*)b->data;
- for(i=0; i<b->nhash; i++){
- x = h[i];
- y = &tab[(x&b->bitmask)>>5];
- z = 1<<(x&31);
- if(!(*y&z)){
- nnew++;
- *y |= z;
- }
- }
- if(nnew)
- addstat(StatBloomOnes, nnew);
- trace("markbloomfilter", "markbloomfilter exit");
- }
- static int
- _inbloomfilter(Bloom *b, u8int *score)
- {
- int i;
- ulong h[BloomMaxHash], x;
- u32int *tab;
- gethashes(score, h);
- tab = (u32int*)b->data;
- for(i=0; i<b->nhash; i++){
- x = h[i];
- if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
- return 0;
- }
- return 1;
- }
- int
- inbloomfilter(Bloom *b, u8int *score)
- {
- int r;
- uint ms;
- if(b == nil || b->data == nil)
- return 1;
- if(ignorebloom)
- return 1;
-
- ms = msec();
- rlock(&b->lk);
- r = _inbloomfilter(b, score);
- runlock(&b->lk);
- ms = ms - msec();
- addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
- if(r)
- addstat(StatBloomMiss, 1);
- else
- addstat(StatBloomHit, 1);
- return r;
- }
- void
- markbloomfilter(Bloom *b, u8int *score)
- {
- if(b == nil || b->data == nil)
- return;
- rlock(&b->lk);
- qlock(&b->mod);
- _markbloomfilter(b, score);
- qunlock(&b->mod);
- runlock(&b->lk);
- }
- static void
- bloomwriteproc(void *v)
- {
- int ret;
- Bloom *b;
- threadsetname("bloomwriteproc");
- b = v;
- for(;;){
- recv(b->writechan, 0);
- if((ret=writebloom(b)) < 0)
- fprint(2, "oops! writing bloom: %r\n");
- else
- ret = 0;
- sendul(b->writedonechan, ret);
- }
- }
- void
- startbloomproc(Bloom *b)
- {
- b->writechan = chancreate(sizeof(void*), 0);
- b->writedonechan = chancreate(sizeof(void*), 0);
- vtproc(bloomwriteproc, b);
- }
|