123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- #include "stdinc.h"
- #include "dat.h"
- #include "fns.h"
- Bloom b;
- void
- usage(void)
- {
- fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
- threadexitsall(0);
- }
- void
- threadmain(int argc, char *argv[])
- {
- Part *part;
- char *file;
- vlong bits, size, size2;
- int nhash;
- vlong nblocks;
-
- ventifmtinstall();
- statsinit();
- size = 0;
- nhash = 0;
- nblocks = 0;
- ARGBEGIN{
- case 'n':
- if(nhash || nblocks)
- usage();
- nblocks = unittoull(EARGF(usage()));
- break;
- case 'N':
- if(nhash || nblocks)
- usage();
- nhash = unittoull(EARGF(usage()));
- if(nhash > BloomMaxHash){
- fprint(2, "maximum possible is -N %d", BloomMaxHash);
- usage();
- }
- break;
- case 's':
- size = unittoull(ARGF());
- if(size == ~0)
- usage();
- break;
- default:
- usage();
- break;
- }ARGEND
- if(argc != 1)
- usage();
- file = argv[0];
- part = initpart(file, ORDWR|ODIRECT);
- if(part == nil)
- sysfatal("can't open partition %s: %r", file);
- if(size == 0)
- size = part->size;
-
- if(size < 1024*1024)
- sysfatal("bloom filter too small");
- if(size > MaxBloomSize){
- fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
- size, (vlong)MaxBloomSize);
- size = MaxBloomSize;
- }
- if(size&(size-1)){
- for(size2=1; size2<size; size2*=2)
- ;
- size = size2/2;
- fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
- }
- if(nblocks){
- /*
- * no use for more than 32 bits per block
- * shoot for less than 64 bits per block
- */
- size2 = size;
- while(size2*8 >= nblocks*64)
- size2 >>= 1;
- if(size2 != size){
- size = size2;
- fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
- size/1024/1024);
- }
- /*
- * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
- */
- bits = (8*size)/nblocks;
- nhash = bits*7/10;
- if(nhash > BloomMaxHash)
- nhash = BloomMaxHash;
- }
- if(!nhash)
- nhash = BloomMaxHash;
- if(bloominit(&b, size, nil) < 0)
- sysfatal("bloominit: %r");
- b.nhash = nhash;
- bits = nhash*10/7;
- nblocks = (8*size)/bits;
- fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
- b.data = vtmallocz(size);
- b.part = part;
- if(writebloom(&b) < 0)
- sysfatal("writing %s: %r", file);
- threadexitsall(0);
- }
|