buildbuck.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #include "stdinc.h"
  2. #include "dat.h"
  3. #include "fns.h"
  4. /*
  5. * An IEStream is a sorted list of index entries.
  6. */
  7. struct IEStream
  8. {
  9. Part *part;
  10. u64int off; /* read position within part */
  11. u64int n; /* number of valid ientries left to read */
  12. u32int size; /* allocated space in buffer */
  13. u8int *buf;
  14. u8int *pos; /* current place in buffer */
  15. u8int *epos; /* end of valid buffer contents */
  16. };
  17. IEStream*
  18. initiestream(Part *part, u64int off, u64int clumps, u32int size)
  19. {
  20. IEStream *ies;
  21. /* out of memory? */
  22. ies = MKZ(IEStream);
  23. ies->buf = MKN(u8int, size);
  24. ies->epos = ies->buf;
  25. ies->pos = ies->epos;
  26. ies->off = off;
  27. ies->n = clumps;
  28. ies->size = size;
  29. ies->part = part;
  30. return ies;
  31. }
  32. void
  33. freeiestream(IEStream *ies)
  34. {
  35. if(ies == nil)
  36. return;
  37. free(ies->buf);
  38. free(ies);
  39. }
  40. /*
  41. * Return the next IEntry (still packed) in the stream.
  42. */
  43. static u8int*
  44. peekientry(IEStream *ies)
  45. {
  46. u32int n, nn;
  47. n = ies->epos - ies->pos;
  48. if(n < IEntrySize){
  49. memmove(ies->buf, ies->pos, n);
  50. ies->epos = &ies->buf[n];
  51. ies->pos = ies->buf;
  52. nn = ies->size;
  53. if(nn > ies->n * IEntrySize)
  54. nn = ies->n * IEntrySize;
  55. nn -= n;
  56. if(nn == 0)
  57. return nil;
  58. //fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
  59. if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
  60. seterr(EOk, "can't read sorted index entries: %r");
  61. return nil;
  62. }
  63. ies->epos += nn;
  64. ies->off += nn;
  65. }
  66. return ies->pos;
  67. }
  68. /*
  69. * Compute the bucket number for the given IEntry.
  70. * Knows that the score is the first thing in the packed
  71. * representation.
  72. */
  73. static u32int
  74. iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
  75. {
  76. USED(ies);
  77. USED(ib);
  78. return hashbits(b, 32) / ix->div;
  79. }
  80. /*
  81. * Fill ib with the next bucket in the stream.
  82. */
  83. u32int
  84. buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
  85. {
  86. IEntry ie1, ie2;
  87. u8int *b;
  88. u32int buck;
  89. buck = TWID32;
  90. ib->n = 0;
  91. while(ies->n){
  92. b = peekientry(ies);
  93. if(b == nil)
  94. return TWID32;
  95. /* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
  96. if(ib->n == 0)
  97. buck = iebuck(ix, b, ib, ies);
  98. else{
  99. if(buck != iebuck(ix, b, ib, ies))
  100. break;
  101. if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
  102. /*
  103. * guess that the larger address is the correct one to use
  104. */
  105. unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
  106. unpackientry(&ie2, b);
  107. seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
  108. ib->n--;
  109. if(ie1.ia.addr > ie2.ia.addr)
  110. memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
  111. }
  112. }
  113. if((ib->n+1)*IEntrySize > maxdata){
  114. seterr(EOk, "bucket overflow");
  115. return TWID32;
  116. }
  117. memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
  118. ib->n++;
  119. ies->n--;
  120. ies->pos += IEntrySize;
  121. }
  122. return buck;
  123. }