fmtbloom.c 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. Bloom b;
  5. void
  6. usage(void)
  7. {
  8. fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
  9. threadexitsall(0);
  10. }
  11. void
  12. threadmain(int argc, char *argv[])
  13. {
  14. Part *part;
  15. char *file;
  16. vlong bits, size, size2;
  17. int nhash;
  18. vlong nblocks;
  19. ventifmtinstall();
  20. statsinit();
  21. size = 0;
  22. nhash = 0;
  23. nblocks = 0;
  24. ARGBEGIN{
  25. case 'n':
  26. if(nhash || nblocks)
  27. usage();
  28. nblocks = unittoull(EARGF(usage()));
  29. break;
  30. case 'N':
  31. if(nhash || nblocks)
  32. usage();
  33. nhash = unittoull(EARGF(usage()));
  34. if(nhash > BloomMaxHash){
  35. fprint(2, "maximum possible is -N %d", BloomMaxHash);
  36. usage();
  37. }
  38. break;
  39. case 's':
  40. size = unittoull(ARGF());
  41. if(size == ~0)
  42. usage();
  43. break;
  44. default:
  45. usage();
  46. break;
  47. }ARGEND
  48. if(argc != 1)
  49. usage();
  50. file = argv[0];
  51. part = initpart(file, ORDWR|ODIRECT);
  52. if(part == nil)
  53. sysfatal("can't open partition %s: %r", file);
  54. if(size == 0)
  55. size = part->size;
  56. if(size < 1024*1024)
  57. sysfatal("bloom filter too small");
  58. if(size > MaxBloomSize){
  59. fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
  60. size, (vlong)MaxBloomSize);
  61. size = MaxBloomSize;
  62. }
  63. if(size&(size-1)){
  64. for(size2=1; size2<size; size2*=2)
  65. ;
  66. size = size2/2;
  67. fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
  68. }
  69. if(nblocks){
  70. /*
  71. * no use for more than 32 bits per block
  72. * shoot for less than 64 bits per block
  73. */
  74. size2 = size;
  75. while(size2*8 >= nblocks*64)
  76. size2 >>= 1;
  77. if(size2 != size){
  78. size = size2;
  79. fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
  80. size/1024/1024);
  81. }
  82. /*
  83. * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
  84. */
  85. bits = (8*size)/nblocks;
  86. nhash = bits*7/10;
  87. if(nhash > BloomMaxHash)
  88. nhash = BloomMaxHash;
  89. }
  90. if(!nhash)
  91. nhash = BloomMaxHash;
  92. if(bloominit(&b, size, nil) < 0)
  93. sysfatal("bloominit: %r");
  94. b.nhash = nhash;
  95. bits = nhash*10/7;
  96. nblocks = (8*size)/bits;
  97. fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
  98. b.data = vtmallocz(size);
  99. b.part = part;
  100. if(writebloom(&b) < 0)
  101. sysfatal("writing %s: %r", file);
  102. threadexitsall(0);
  103. }