buildbuck.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "stdinc.h"
  10. #include "dat.h"
  11. #include "fns.h"
  12. /*
  13. * An IEStream is a sorted list of index entries.
  14. */
  15. struct IEStream
  16. {
  17. Part *part;
  18. uint64_t off; /* read position within part */
  19. uint64_t n; /* number of valid ientries left to read */
  20. uint32_t size; /* allocated space in buffer */
  21. uint8_t *buf;
  22. uint8_t *pos; /* current place in buffer */
  23. uint8_t *epos; /* end of valid buffer contents */
  24. };
  25. IEStream*
  26. initiestream(Part *part, uint64_t off, uint64_t clumps, uint32_t size)
  27. {
  28. IEStream *ies;
  29. /* out of memory? */
  30. ies = MKZ(IEStream);
  31. ies->buf = MKN(uint8_t, size);
  32. ies->epos = ies->buf;
  33. ies->pos = ies->epos;
  34. ies->off = off;
  35. ies->n = clumps;
  36. ies->size = size;
  37. ies->part = part;
  38. return ies;
  39. }
  40. void
  41. freeiestream(IEStream *ies)
  42. {
  43. if(ies == nil)
  44. return;
  45. free(ies->buf);
  46. free(ies);
  47. }
  48. /*
  49. * Return the next IEntry (still packed) in the stream.
  50. */
  51. static uint8_t*
  52. peekientry(IEStream *ies)
  53. {
  54. uint32_t n, nn;
  55. n = ies->epos - ies->pos;
  56. if(n < IEntrySize){
  57. memmove(ies->buf, ies->pos, n);
  58. ies->epos = &ies->buf[n];
  59. ies->pos = ies->buf;
  60. nn = ies->size;
  61. if(nn > ies->n * IEntrySize)
  62. nn = ies->n * IEntrySize;
  63. nn -= n;
  64. if(nn == 0)
  65. return nil;
  66. //fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
  67. if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
  68. seterr(EOk, "can't read sorted index entries: %r");
  69. return nil;
  70. }
  71. ies->epos += nn;
  72. ies->off += nn;
  73. }
  74. return ies->pos;
  75. }
  76. /*
  77. * Compute the bucket number for the given IEntry.
  78. * Knows that the score is the first thing in the packed
  79. * representation.
  80. */
  81. static uint32_t
  82. iebuck(Index *ix, uint8_t *b, IBucket *ib, IEStream *ies)
  83. {
  84. USED(ies);
  85. USED(ib);
  86. return hashbits(b, 32) / ix->div;
  87. }
  88. /*
  89. * Fill ib with the next bucket in the stream.
  90. */
  91. uint32_t
  92. buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
  93. {
  94. IEntry ie1, ie2;
  95. uint8_t *b;
  96. uint32_t buck;
  97. buck = TWID32;
  98. ib->n = 0;
  99. while(ies->n){
  100. b = peekientry(ies);
  101. if(b == nil)
  102. return TWID32;
  103. /* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
  104. if(ib->n == 0)
  105. buck = iebuck(ix, b, ib, ies);
  106. else{
  107. if(buck != iebuck(ix, b, ib, ies))
  108. break;
  109. if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
  110. /*
  111. * guess that the larger address is the correct one to use
  112. */
  113. unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
  114. unpackientry(&ie2, b);
  115. seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
  116. ib->n--;
  117. if(ie1.ia.addr > ie2.ia.addr)
  118. memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
  119. }
  120. }
  121. if((ib->n+1)*IEntrySize > maxdata){
  122. seterr(EOk, "bucket overflow");
  123. return TWID32;
  124. }
  125. memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
  126. ib->n++;
  127. ies->n--;
  128. ies->pos += IEntrySize;
  129. }
  130. return buck;
  131. }