index.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862
  1. /*
  2. * Index, mapping scores to log positions.
  3. *
  4. * The index is made up of some number of index sections, each of
  5. * which is typically stored on a different disk. The blocks in all the
  6. * index sections are logically numbered, with each index section
  7. * responsible for a range of blocks. Blocks are typically 8kB.
  8. *
  9. * The N index blocks are treated as a giant hash table. The top 32 bits
  10. * of score are used as the key for a lookup. Each index block holds
  11. * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
  12. *
  13. * The index is sized so that a particular bucket is extraordinarily
  14. * unlikely to overflow: assuming compressed data blocks are 4kB
  15. * on disk, and assuming each block has a 40 byte index entry,
  16. * the index data will be 1% of the total data. Since scores are essentially
  17. * random, all buckets should be about the same fullness.
  18. * A factor of 5 gives us a wide comfort boundary to account for
  19. * random variation. So the index disk space should be 5% of the arena disk space.
  20. */
  21. #include "stdinc.h"
  22. #include "dat.h"
  23. #include "fns.h"
  24. static int initindex1(Index*);
  25. static ISect *initisect1(ISect *is);
  26. #define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
  27. static char IndexMagic[] = "venti index configuration";
  28. Index*
  29. initindex(char *name, ISect **sects, int n)
  30. {
  31. IFile f;
  32. Index *ix;
  33. ISect *is;
  34. u32int last, blocksize, tabsize;
  35. int i;
  36. if(n <= 0){
  37. fprint(2, "bad n\n");
  38. seterr(EOk, "no index sections to initialize index");
  39. return nil;
  40. }
  41. ix = MKZ(Index);
  42. if(ix == nil){
  43. fprint(2, "no mem\n");
  44. seterr(EOk, "can't initialize index: out of memory");
  45. freeindex(ix);
  46. return nil;
  47. }
  48. tabsize = sects[0]->tabsize;
  49. if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
  50. return nil;
  51. if(parseindex(&f, ix) < 0){
  52. freeifile(&f);
  53. freeindex(ix);
  54. return nil;
  55. }
  56. freeifile(&f);
  57. if(namecmp(ix->name, name) != 0){
  58. seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
  59. return nil;
  60. }
  61. if(ix->nsects != n){
  62. seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
  63. freeindex(ix);
  64. return nil;
  65. }
  66. ix->sects = sects;
  67. last = 0;
  68. blocksize = ix->blocksize;
  69. for(i = 0; i < ix->nsects; i++){
  70. is = sects[i];
  71. if(namecmp(ix->name, is->index) != 0
  72. || is->blocksize != blocksize
  73. || is->tabsize != tabsize
  74. || namecmp(is->name, ix->smap[i].name) != 0
  75. || is->start != ix->smap[i].start
  76. || is->stop != ix->smap[i].stop
  77. || last != is->start
  78. || is->start > is->stop){
  79. seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
  80. freeindex(ix);
  81. return nil;
  82. }
  83. last = is->stop;
  84. }
  85. ix->tabsize = tabsize;
  86. ix->buckets = last;
  87. if(initindex1(ix) < 0){
  88. freeindex(ix);
  89. return nil;
  90. }
  91. ix->arenas = MKNZ(Arena*, ix->narenas);
  92. if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
  93. freeindex(ix);
  94. return nil;
  95. }
  96. return ix;
  97. }
  98. static int
  99. initindex1(Index *ix)
  100. {
  101. u32int buckets;
  102. ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
  103. buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
  104. if(buckets != ix->buckets){
  105. seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
  106. return -1;
  107. }
  108. return 0;
  109. }
  110. int
  111. wbindex(Index *ix)
  112. {
  113. Fmt f;
  114. ZBlock *b;
  115. int i;
  116. if(ix->nsects == 0){
  117. seterr(EOk, "no sections in index %s", ix->name);
  118. return -1;
  119. }
  120. b = alloczblock(ix->tabsize, 1, ix->blocksize);
  121. if(b == nil){
  122. seterr(EOk, "can't write index configuration: out of memory");
  123. return -1;
  124. }
  125. fmtzbinit(&f, b);
  126. if(outputindex(&f, ix) < 0){
  127. seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
  128. freezblock(b);
  129. return -1;
  130. }
  131. for(i = 0; i < ix->nsects; i++){
  132. if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0
  133. || flushpart(ix->sects[i]->part) < 0){
  134. seterr(EOk, "can't write index: %r");
  135. freezblock(b);
  136. return -1;
  137. }
  138. }
  139. freezblock(b);
  140. for(i = 0; i < ix->nsects; i++)
  141. if(wbisect(ix->sects[i]) < 0)
  142. return -1;
  143. return 0;
  144. }
  145. /*
  146. * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
  147. * version, blocksize: u32int
  148. * name: max. ANameSize string
  149. * sections, arenas: AMap
  150. */
  151. int
  152. outputindex(Fmt *f, Index *ix)
  153. {
  154. if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
  155. || outputamap(f, ix->smap, ix->nsects) < 0
  156. || outputamap(f, ix->amap, ix->narenas) < 0)
  157. return -1;
  158. return 0;
  159. }
  160. int
  161. parseindex(IFile *f, Index *ix)
  162. {
  163. AMapN amn;
  164. u32int v;
  165. char *s;
  166. /*
  167. * magic
  168. */
  169. s = ifileline(f);
  170. if(s == nil || strcmp(s, IndexMagic) != 0){
  171. seterr(ECorrupt, "bad index magic for %s", f->name);
  172. return -1;
  173. }
  174. /*
  175. * version
  176. */
  177. if(ifileu32int(f, &v) < 0){
  178. seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
  179. return -1;
  180. }
  181. ix->version = v;
  182. if(ix->version != IndexVersion){
  183. seterr(ECorrupt, "bad version number in %s", f->name);
  184. return -1;
  185. }
  186. /*
  187. * name
  188. */
  189. if(ifilename(f, ix->name) < 0){
  190. seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
  191. return -1;
  192. }
  193. /*
  194. * block size
  195. */
  196. if(ifileu32int(f, &v) < 0){
  197. seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
  198. return -1;
  199. }
  200. ix->blocksize = v;
  201. if(parseamap(f, &amn) < 0)
  202. return -1;
  203. ix->nsects = amn.n;
  204. ix->smap = amn.map;
  205. if(parseamap(f, &amn) < 0)
  206. return -1;
  207. ix->narenas = amn.n;
  208. ix->amap = amn.map;
  209. return 0;
  210. }
  211. /*
  212. * initialize an entirely new index
  213. */
  214. Index *
  215. newindex(char *name, ISect **sects, int n)
  216. {
  217. Index *ix;
  218. AMap *smap;
  219. u64int nb;
  220. u32int div, ub, xb, start, stop, blocksize, tabsize;
  221. int i, j;
  222. if(n < 1){
  223. seterr(EOk, "creating index with no index sections");
  224. return nil;
  225. }
  226. /*
  227. * compute the total buckets available in the index,
  228. * and the total buckets which are used.
  229. */
  230. nb = 0;
  231. blocksize = sects[0]->blocksize;
  232. tabsize = sects[0]->tabsize;
  233. for(i = 0; i < n; i++){
  234. /*
  235. * allow index, start, and stop to be set if index is correct
  236. * and start and stop are what we would have picked.
  237. * this allows calling fmtindex to reformat the index after
  238. * replacing a bad index section with a freshly formatted one.
  239. * start and stop are checked below.
  240. */
  241. if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
  242. seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
  243. return nil;
  244. }
  245. if(blocksize != sects[i]->blocksize){
  246. seterr(EOk, "mismatched block sizes in index sections");
  247. return nil;
  248. }
  249. if(tabsize != sects[i]->tabsize){
  250. seterr(EOk, "mismatched config table sizes in index sections");
  251. return nil;
  252. }
  253. nb += sects[i]->blocks;
  254. }
  255. /*
  256. * check for duplicate names
  257. */
  258. for(i = 0; i < n; i++){
  259. for(j = i + 1; j < n; j++){
  260. if(namecmp(sects[i]->name, sects[j]->name) == 0){
  261. seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
  262. return nil;
  263. }
  264. }
  265. }
  266. if(nb >= ((u64int)1 << 32)){
  267. seterr(EBug, "index too large");
  268. return nil;
  269. }
  270. div = (((u64int)1 << 32) + nb - 1) / nb;
  271. ub = (((u64int)1 << 32) - 1) / div + 1;
  272. if(div < 100){
  273. seterr(EBug, "index divisor too coarse");
  274. return nil;
  275. }
  276. if(ub > nb){
  277. seterr(EBug, "index initialization math wrong");
  278. return nil;
  279. }
  280. xb = nb - ub;
  281. /*
  282. * initialize each of the index sections
  283. * and the section map table
  284. */
  285. smap = MKNZ(AMap, n);
  286. if(smap == nil){
  287. seterr(EOk, "can't create new index: out of memory");
  288. return nil;
  289. }
  290. start = 0;
  291. for(i = 0; i < n; i++){
  292. stop = start + sects[i]->blocks - xb / n;
  293. if(i == n - 1)
  294. stop = ub;
  295. if(sects[i]->start != 0 || sects[i]->stop != 0)
  296. if(sects[i]->start != start || sects[i]->stop != stop){
  297. seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
  298. return nil;
  299. }
  300. sects[i]->start = start;
  301. sects[i]->stop = stop;
  302. namecp(sects[i]->index, name);
  303. smap[i].start = start;
  304. smap[i].stop = stop;
  305. namecp(smap[i].name, sects[i]->name);
  306. start = stop;
  307. }
  308. /*
  309. * initialize the index itself
  310. */
  311. ix = MKZ(Index);
  312. if(ix == nil){
  313. seterr(EOk, "can't create new index: out of memory");
  314. free(smap);
  315. return nil;
  316. }
  317. ix->version = IndexVersion;
  318. namecp(ix->name, name);
  319. ix->sects = sects;
  320. ix->smap = smap;
  321. ix->nsects = n;
  322. ix->blocksize = blocksize;
  323. ix->buckets = ub;
  324. ix->tabsize = tabsize;
  325. ix->div = div;
  326. if(initindex1(ix) < 0){
  327. free(smap);
  328. return nil;
  329. }
  330. return ix;
  331. }
  332. ISect*
  333. initisect(Part *part)
  334. {
  335. ISect *is;
  336. ZBlock *b;
  337. int ok;
  338. b = alloczblock(HeadSize, 0, 0);
  339. if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
  340. seterr(EAdmin, "can't read index section header: %r");
  341. return nil;
  342. }
  343. is = MKZ(ISect);
  344. if(is == nil){
  345. freezblock(b);
  346. return nil;
  347. }
  348. is->part = part;
  349. ok = unpackisect(is, b->data);
  350. freezblock(b);
  351. if(ok < 0){
  352. seterr(ECorrupt, "corrupted index section header: %r");
  353. freeisect(is);
  354. return nil;
  355. }
  356. if(is->version != ISectVersion1 && is->version != ISectVersion2){
  357. seterr(EAdmin, "unknown index section version %d", is->version);
  358. freeisect(is);
  359. return nil;
  360. }
  361. return initisect1(is);
  362. }
  363. ISect*
  364. newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
  365. {
  366. ISect *is;
  367. u32int tabbase;
  368. is = MKZ(ISect);
  369. if(is == nil)
  370. return nil;
  371. namecp(is->name, name);
  372. is->version = vers;
  373. is->part = part;
  374. is->blocksize = blocksize;
  375. is->start = 0;
  376. is->stop = 0;
  377. tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
  378. is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
  379. is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
  380. is->bucketmagic = 0;
  381. if(is->version == ISectVersion2){
  382. do{
  383. is->bucketmagic = fastrand();
  384. }while(is->bucketmagic==0);
  385. }
  386. is = initisect1(is);
  387. if(is == nil)
  388. return nil;
  389. return is;
  390. }
  391. /*
  392. * initialize the computed parameters for an index
  393. */
  394. static ISect*
  395. initisect1(ISect *is)
  396. {
  397. u64int v;
  398. is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
  399. is->blocklog = u64log2(is->blocksize);
  400. if(is->blocksize != (1 << is->blocklog)){
  401. seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
  402. freeisect(is);
  403. return nil;
  404. }
  405. partblocksize(is->part, is->blocksize);
  406. is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
  407. if(is->tabbase >= is->blockbase){
  408. seterr(ECorrupt, "index section config table overlaps bucket storage");
  409. freeisect(is);
  410. return nil;
  411. }
  412. is->tabsize = is->blockbase - is->tabbase;
  413. v = is->part->size & ~(u64int)(is->blocksize - 1);
  414. if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
  415. seterr(ECorrupt, "invalid blocks in index section %s", is->name);
  416. /* ZZZ what to do?
  417. freeisect(is);
  418. return nil;
  419. */
  420. }
  421. if(is->stop - is->start > is->blocks){
  422. seterr(ECorrupt, "index section overflows available space");
  423. freeisect(is);
  424. return nil;
  425. }
  426. if(is->start > is->stop){
  427. seterr(ECorrupt, "invalid index section range");
  428. freeisect(is);
  429. return nil;
  430. }
  431. return is;
  432. }
  433. int
  434. wbisect(ISect *is)
  435. {
  436. ZBlock *b;
  437. b = alloczblock(HeadSize, 1, 0);
  438. if(b == nil){
  439. /* ZZZ set error? */
  440. return -1;
  441. }
  442. if(packisect(is, b->data) < 0){
  443. seterr(ECorrupt, "can't make index section header: %r");
  444. freezblock(b);
  445. return -1;
  446. }
  447. if(writepart(is->part, PartBlank, b->data, HeadSize) < 0 || flushpart(is->part) < 0){
  448. seterr(EAdmin, "can't write index section header: %r");
  449. freezblock(b);
  450. return -1;
  451. }
  452. freezblock(b);
  453. return 0;
  454. }
  455. void
  456. freeisect(ISect *is)
  457. {
  458. if(is == nil)
  459. return;
  460. free(is);
  461. }
  462. void
  463. freeindex(Index *ix)
  464. {
  465. int i;
  466. if(ix == nil)
  467. return;
  468. free(ix->amap);
  469. free(ix->arenas);
  470. if(ix->sects)
  471. for(i = 0; i < ix->nsects; i++)
  472. freeisect(ix->sects[i]);
  473. free(ix->sects);
  474. free(ix->smap);
  475. free(ix);
  476. }
  477. /*
  478. * write a clump to an available arena in the index
  479. * and return the address of the clump within the index.
  480. ZZZ question: should this distinguish between an arena
  481. filling up and real errors writing the clump?
  482. */
  483. u64int
  484. writeiclump(Index *ix, Clump *c, u8int *clbuf)
  485. {
  486. u64int a;
  487. int i;
  488. IAddr ia;
  489. AState as;
  490. trace(TraceLump, "writeiclump enter");
  491. qlock(&ix->writing);
  492. for(i = ix->mapalloc; i < ix->narenas; i++){
  493. a = writeaclump(ix->arenas[i], c, clbuf);
  494. if(a != TWID64){
  495. ix->mapalloc = i;
  496. ia.addr = ix->amap[i].start + a;
  497. ia.type = c->info.type;
  498. ia.size = c->info.uncsize;
  499. ia.blocks = (c->info.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
  500. as.arena = ix->arenas[i];
  501. as.aa = ia.addr;
  502. as.stats = as.arena->memstats;
  503. insertscore(c->info.score, &ia, IEDirty, &as);
  504. qunlock(&ix->writing);
  505. trace(TraceLump, "writeiclump exit");
  506. return ia.addr;
  507. }
  508. }
  509. qunlock(&ix->writing);
  510. seterr(EAdmin, "no space left in arenas");
  511. trace(TraceLump, "writeiclump failed");
  512. return TWID64;
  513. }
  514. /*
  515. * convert an arena index to an relative arena address
  516. */
  517. Arena*
  518. amapitoa(Index *ix, u64int a, u64int *aa)
  519. {
  520. int i, r, l, m;
  521. l = 1;
  522. r = ix->narenas - 1;
  523. while(l <= r){
  524. m = (r + l) / 2;
  525. if(ix->amap[m].start <= a)
  526. l = m + 1;
  527. else
  528. r = m - 1;
  529. }
  530. l--;
  531. if(a > ix->amap[l].stop){
  532. for(i=0; i<ix->narenas; i++)
  533. print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
  534. print("want arena %d for %llux\n", l, a);
  535. seterr(ECrash, "unmapped address passed to amapitoa");
  536. return nil;
  537. }
  538. if(ix->arenas[l] == nil){
  539. seterr(ECrash, "unmapped arena selected in amapitoa");
  540. return nil;
  541. }
  542. *aa = a - ix->amap[l].start;
  543. return ix->arenas[l];
  544. }
  545. /*
  546. * convert an arena index to the bounds of the containing arena group.
  547. */
  548. Arena*
  549. amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g)
  550. {
  551. u64int aa;
  552. Arena *arena;
  553. arena = amapitoa(ix, a, &aa);
  554. if(arena == nil)
  555. return nil;
  556. if(arenatog(arena, aa, gstart, glimit, g) < 0)
  557. return nil;
  558. *gstart += a - aa;
  559. *glimit += a - aa;
  560. return arena;
  561. }
  562. int
  563. iaddrcmp(IAddr *ia1, IAddr *ia2)
  564. {
  565. return ia1->type != ia2->type
  566. || ia1->size != ia2->size
  567. || ia1->blocks != ia2->blocks
  568. || ia1->addr != ia2->addr;
  569. }
  570. /*
  571. * lookup the score in the partition
  572. *
  573. * nothing needs to be explicitly locked:
  574. * only static parts of ix are used, and
  575. * the bucket is locked by the DBlock lock.
  576. */
  577. int
  578. loadientry(Index *ix, u8int *score, int type, IEntry *ie)
  579. {
  580. ISect *is;
  581. DBlock *b;
  582. IBucket ib;
  583. u32int buck;
  584. int h, ok;
  585. ok = -1;
  586. trace(TraceLump, "loadientry enter");
  587. /*
  588. qlock(&stats.lock);
  589. stats.indexreads++;
  590. qunlock(&stats.lock);
  591. */
  592. if(!inbloomfilter(mainindex->bloom, score)){
  593. trace(TraceLump, "loadientry bloomhit");
  594. return -1;
  595. }
  596. trace(TraceLump, "loadientry loadibucket");
  597. b = loadibucket(ix, score, &is, &buck, &ib);
  598. trace(TraceLump, "loadientry loadedibucket");
  599. if(b == nil)
  600. return -1;
  601. if(okibucket(&ib, is) < 0){
  602. trace(TraceLump, "loadientry badbucket");
  603. goto out;
  604. }
  605. h = bucklook(score, type, ib.data, ib.n);
  606. if(h & 1){
  607. h ^= 1;
  608. trace(TraceLump, "loadientry found");
  609. unpackientry(ie, &ib.data[h]);
  610. ok = 0;
  611. goto out;
  612. }
  613. trace(TraceLump, "loadientry notfound");
  614. addstat(StatBloomFalseMiss, 1);
  615. out:
  616. putdblock(b);
  617. trace(TraceLump, "loadientry exit");
  618. return ok;
  619. }
  620. int
  621. okibucket(IBucket *ib, ISect *is)
  622. {
  623. if(ib->n <= is->buckmax)
  624. return 0;
  625. seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
  626. ib->n, is->buckmax, is->start, is->stop);
  627. return -1;
  628. }
  629. /*
  630. * look for score within data;
  631. * return 1 | byte index of matching index,
  632. * or 0 | index of least element > score
  633. */
  634. int
  635. bucklook(u8int *score, int otype, u8int *data, int n)
  636. {
  637. int i, r, l, m, h, c, cc, type;
  638. if(otype == -1)
  639. type = -1;
  640. else
  641. type = vttodisktype(otype);
  642. l = 0;
  643. r = n - 1;
  644. while(l <= r){
  645. m = (r + l) >> 1;
  646. h = m * IEntrySize;
  647. for(i = 0; i < VtScoreSize; i++){
  648. c = score[i];
  649. cc = data[h + i];
  650. if(c != cc){
  651. if(c > cc)
  652. l = m + 1;
  653. else
  654. r = m - 1;
  655. goto cont;
  656. }
  657. }
  658. cc = data[h + IEntryTypeOff];
  659. if(type != cc && type != -1){
  660. if(type > cc)
  661. l = m + 1;
  662. else
  663. r = m - 1;
  664. goto cont;
  665. }
  666. return h | 1;
  667. cont:;
  668. }
  669. return l * IEntrySize;
  670. }
  671. /*
  672. * compare two IEntries; consistent with bucklook
  673. */
  674. int
  675. ientrycmp(const void *vie1, const void *vie2)
  676. {
  677. u8int *ie1, *ie2;
  678. int i, v1, v2;
  679. ie1 = (u8int*)vie1;
  680. ie2 = (u8int*)vie2;
  681. for(i = 0; i < VtScoreSize; i++){
  682. v1 = ie1[i];
  683. v2 = ie2[i];
  684. if(v1 != v2){
  685. if(v1 < v2)
  686. return -1;
  687. return 1;
  688. }
  689. }
  690. v1 = ie1[IEntryTypeOff];
  691. v2 = ie2[IEntryTypeOff];
  692. if(v1 != v2){
  693. if(v1 < v2)
  694. return -1;
  695. return 1;
  696. }
  697. return 0;
  698. }
  699. /*
  700. * find the number of the index section holding bucket #buck
  701. */
  702. int
  703. indexsect0(Index *ix, u32int buck)
  704. {
  705. int r, l, m;
  706. l = 1;
  707. r = ix->nsects - 1;
  708. while(l <= r){
  709. m = (r + l) >> 1;
  710. if(ix->sects[m]->start <= buck)
  711. l = m + 1;
  712. else
  713. r = m - 1;
  714. }
  715. return l - 1;
  716. }
  717. /*
  718. * load the index block at bucket #buck
  719. */
  720. static DBlock*
  721. loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
  722. {
  723. ISect *is;
  724. DBlock *b;
  725. is = ix->sects[indexsect0(ix, buck)];
  726. if(buck < is->start || is->stop <= buck){
  727. seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
  728. return nil;
  729. }
  730. buck -= is->start;
  731. if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
  732. return nil;
  733. if(pis)
  734. *pis = is;
  735. if(pbuck)
  736. *pbuck = buck;
  737. if(ib)
  738. unpackibucket(ib, b->data, is->bucketmagic);
  739. return b;
  740. }
  741. /*
  742. * find the number of the index section holding score
  743. */
  744. int
  745. indexsect1(Index *ix, u8int *score)
  746. {
  747. return indexsect0(ix, hashbits(score, 32) / ix->div);
  748. }
  749. /*
  750. * load the index block responsible for score.
  751. */
  752. static DBlock*
  753. loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
  754. {
  755. return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
  756. }
  757. int
  758. indexsect(Index *ix, u8int *score)
  759. {
  760. return indexsect1(ix, score);
  761. }
  762. DBlock*
  763. loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
  764. {
  765. return loadibucket1(ix, score, pis, pbuck, ib);
  766. }