index.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * Index, mapping scores to log positions.
  11. *
  12. * The index is made up of some number of index sections, each of
  13. * which is typically stored on a different disk. The blocks in all the
  14. * index sections are logically numbered, with each index section
  15. * responsible for a range of blocks. Blocks are typically 8kB.
  16. *
  17. * The N index blocks are treated as a giant hash table. The top 32 bits
  18. * of score are used as the key for a lookup. Each index block holds
  19. * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
  20. *
  21. * The index is sized so that a particular bucket is extraordinarily
  22. * unlikely to overflow: assuming compressed data blocks are 4kB
  23. * on disk, and assuming each block has a 40 byte index entry,
  24. * the index data will be 1% of the total data. Since scores are essentially
  25. * random, all buckets should be about the same fullness.
  26. * A factor of 5 gives us a wide comfort boundary to account for
  27. * random variation. So the index disk space should be 5% of the arena disk space.
  28. */
  29. #include "stdinc.h"
  30. #include "dat.h"
  31. #include "fns.h"
  32. static int initindex1(Index*);
  33. static ISect *initisect1(ISect *is);
  34. #define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
  35. static char IndexMagic[] = "venti index configuration";
  36. Index*
  37. initindex(char *name, ISect **sects, int n)
  38. {
  39. IFile f;
  40. Index *ix;
  41. ISect *is;
  42. uint32_t last, blocksize, tabsize;
  43. int i;
  44. if(n <= 0){
  45. fprint(2, "bad n\n");
  46. seterr(EOk, "no index sections to initialize index");
  47. return nil;
  48. }
  49. ix = MKZ(Index);
  50. if(ix == nil){
  51. fprint(2, "no mem\n");
  52. seterr(EOk, "can't initialize index: out of memory");
  53. freeindex(ix);
  54. return nil;
  55. }
  56. tabsize = sects[0]->tabsize;
  57. if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
  58. return nil;
  59. if(parseindex(&f, ix) < 0){
  60. freeifile(&f);
  61. freeindex(ix);
  62. return nil;
  63. }
  64. freeifile(&f);
  65. if(namecmp(ix->name, name) != 0){
  66. seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
  67. return nil;
  68. }
  69. if(ix->nsects != n){
  70. seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
  71. freeindex(ix);
  72. return nil;
  73. }
  74. ix->sects = sects;
  75. last = 0;
  76. blocksize = ix->blocksize;
  77. for(i = 0; i < ix->nsects; i++){
  78. is = sects[i];
  79. if(namecmp(ix->name, is->index) != 0
  80. || is->blocksize != blocksize
  81. || is->tabsize != tabsize
  82. || namecmp(is->name, ix->smap[i].name) != 0
  83. || is->start != ix->smap[i].start
  84. || is->stop != ix->smap[i].stop
  85. || last != is->start
  86. || is->start > is->stop){
  87. seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
  88. freeindex(ix);
  89. return nil;
  90. }
  91. last = is->stop;
  92. }
  93. ix->tabsize = tabsize;
  94. ix->buckets = last;
  95. if(initindex1(ix) < 0){
  96. freeindex(ix);
  97. return nil;
  98. }
  99. ix->arenas = MKNZ(Arena*, ix->narenas);
  100. if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
  101. freeindex(ix);
  102. return nil;
  103. }
  104. return ix;
  105. }
  106. static int
  107. initindex1(Index *ix)
  108. {
  109. uint32_t buckets;
  110. ix->div = (((uint64_t)1 << 32) + ix->buckets - 1) / ix->buckets;
  111. buckets = (((uint64_t)1 << 32) - 1) / ix->div + 1;
  112. if(buckets != ix->buckets){
  113. seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
  114. return -1;
  115. }
  116. return 0;
  117. }
  118. int
  119. wbindex(Index *ix)
  120. {
  121. Fmt f;
  122. ZBlock *b;
  123. int i;
  124. if(ix->nsects == 0){
  125. seterr(EOk, "no sections in index %s", ix->name);
  126. return -1;
  127. }
  128. b = alloczblock(ix->tabsize, 1, ix->blocksize);
  129. if(b == nil){
  130. seterr(EOk, "can't write index configuration: out of memory");
  131. return -1;
  132. }
  133. fmtzbinit(&f, b);
  134. if(outputindex(&f, ix) < 0){
  135. seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
  136. freezblock(b);
  137. return -1;
  138. }
  139. for(i = 0; i < ix->nsects; i++){
  140. if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0
  141. || flushpart(ix->sects[i]->part) < 0){
  142. seterr(EOk, "can't write index: %r");
  143. freezblock(b);
  144. return -1;
  145. }
  146. }
  147. freezblock(b);
  148. for(i = 0; i < ix->nsects; i++)
  149. if(wbisect(ix->sects[i]) < 0)
  150. return -1;
  151. return 0;
  152. }
  153. /*
  154. * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
  155. * version, blocksize: uint32_t
  156. * name: max. ANameSize string
  157. * sections, arenas: AMap
  158. */
  159. int
  160. outputindex(Fmt *f, Index *ix)
  161. {
  162. if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
  163. || outputamap(f, ix->smap, ix->nsects) < 0
  164. || outputamap(f, ix->amap, ix->narenas) < 0)
  165. return -1;
  166. return 0;
  167. }
  168. int
  169. parseindex(IFile *f, Index *ix)
  170. {
  171. AMapN amn;
  172. uint32_t v;
  173. char *s;
  174. /*
  175. * magic
  176. */
  177. s = ifileline(f);
  178. if(s == nil || strcmp(s, IndexMagic) != 0){
  179. seterr(ECorrupt, "bad index magic for %s", f->name);
  180. return -1;
  181. }
  182. /*
  183. * version
  184. */
  185. if(ifileuint32_t(f, &v) < 0){
  186. seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
  187. return -1;
  188. }
  189. ix->version = v;
  190. if(ix->version != IndexVersion){
  191. seterr(ECorrupt, "bad version number in %s", f->name);
  192. return -1;
  193. }
  194. /*
  195. * name
  196. */
  197. if(ifilename(f, ix->name) < 0){
  198. seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
  199. return -1;
  200. }
  201. /*
  202. * block size
  203. */
  204. if(ifileuint32_t(f, &v) < 0){
  205. seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
  206. return -1;
  207. }
  208. ix->blocksize = v;
  209. if(parseamap(f, &amn) < 0)
  210. return -1;
  211. ix->nsects = amn.n;
  212. ix->smap = amn.map;
  213. if(parseamap(f, &amn) < 0)
  214. return -1;
  215. ix->narenas = amn.n;
  216. ix->amap = amn.map;
  217. return 0;
  218. }
  219. /*
  220. * initialize an entirely new index
  221. */
  222. Index *
  223. newindex(char *name, ISect **sects, int n)
  224. {
  225. Index *ix;
  226. AMap *smap;
  227. uint64_t nb;
  228. uint32_t div, ub, xb, start, stop, blocksize, tabsize;
  229. int i, j;
  230. if(n < 1){
  231. seterr(EOk, "creating index with no index sections");
  232. return nil;
  233. }
  234. /*
  235. * compute the total buckets available in the index,
  236. * and the total buckets which are used.
  237. */
  238. nb = 0;
  239. blocksize = sects[0]->blocksize;
  240. tabsize = sects[0]->tabsize;
  241. for(i = 0; i < n; i++){
  242. /*
  243. * allow index, start, and stop to be set if index is correct
  244. * and start and stop are what we would have picked.
  245. * this allows calling fmtindex to reformat the index after
  246. * replacing a bad index section with a freshly formatted one.
  247. * start and stop are checked below.
  248. */
  249. if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
  250. seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
  251. return nil;
  252. }
  253. if(blocksize != sects[i]->blocksize){
  254. seterr(EOk, "mismatched block sizes in index sections");
  255. return nil;
  256. }
  257. if(tabsize != sects[i]->tabsize){
  258. seterr(EOk, "mismatched config table sizes in index sections");
  259. return nil;
  260. }
  261. nb += sects[i]->blocks;
  262. }
  263. /*
  264. * check for duplicate names
  265. */
  266. for(i = 0; i < n; i++){
  267. for(j = i + 1; j < n; j++){
  268. if(namecmp(sects[i]->name, sects[j]->name) == 0){
  269. seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
  270. return nil;
  271. }
  272. }
  273. }
  274. if(nb >= ((uint64_t)1 << 32)){
  275. fprint(2, "%s: index is 2^32 blocks or more; ignoring some of it\n",
  276. argv0);
  277. nb = ((uint64_t)1 << 32) - 1;
  278. }
  279. div = (((uint64_t)1 << 32) + nb - 1) / nb;
  280. if(div < 100){
  281. fprint(2, "%s: index divisor %d too coarse; "
  282. "index larger than needed, ignoring some of it\n",
  283. argv0, div);
  284. div = 100;
  285. nb = (((uint64_t)1 << 32) - 1) / (100 - 1);
  286. }
  287. ub = (((uint64_t)1 << 32) - 1) / div + 1;
  288. if(ub > nb){
  289. seterr(EBug, "index initialization math wrong");
  290. return nil;
  291. }
  292. xb = nb - ub;
  293. /*
  294. * initialize each of the index sections
  295. * and the section map table
  296. */
  297. smap = MKNZ(AMap, n);
  298. if(smap == nil){
  299. seterr(EOk, "can't create new index: out of memory");
  300. return nil;
  301. }
  302. start = 0;
  303. for(i = 0; i < n; i++){
  304. stop = start + sects[i]->blocks - xb / n;
  305. if(i == n - 1)
  306. stop = ub;
  307. if(sects[i]->start != 0 || sects[i]->stop != 0)
  308. if(sects[i]->start != start || sects[i]->stop != stop){
  309. seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
  310. return nil;
  311. }
  312. sects[i]->start = start;
  313. sects[i]->stop = stop;
  314. namecp(sects[i]->index, name);
  315. smap[i].start = start;
  316. smap[i].stop = stop;
  317. namecp(smap[i].name, sects[i]->name);
  318. start = stop;
  319. }
  320. /*
  321. * initialize the index itself
  322. */
  323. ix = MKZ(Index);
  324. if(ix == nil){
  325. seterr(EOk, "can't create new index: out of memory");
  326. free(smap);
  327. return nil;
  328. }
  329. ix->version = IndexVersion;
  330. namecp(ix->name, name);
  331. ix->sects = sects;
  332. ix->smap = smap;
  333. ix->nsects = n;
  334. ix->blocksize = blocksize;
  335. ix->buckets = ub;
  336. ix->tabsize = tabsize;
  337. ix->div = div;
  338. if(initindex1(ix) < 0){
  339. free(smap);
  340. return nil;
  341. }
  342. return ix;
  343. }
  344. ISect*
  345. initisect(Part *part)
  346. {
  347. ISect *is;
  348. ZBlock *b;
  349. int ok;
  350. b = alloczblock(HeadSize, 0, 0);
  351. if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
  352. seterr(EAdmin, "can't read index section header: %r");
  353. return nil;
  354. }
  355. is = MKZ(ISect);
  356. if(is == nil){
  357. freezblock(b);
  358. return nil;
  359. }
  360. is->part = part;
  361. ok = unpackisect(is, b->data);
  362. freezblock(b);
  363. if(ok < 0){
  364. seterr(ECorrupt, "corrupted index section header: %r");
  365. freeisect(is);
  366. return nil;
  367. }
  368. if(is->version != ISectVersion1 && is->version != ISectVersion2){
  369. seterr(EAdmin, "unknown index section version %d", is->version);
  370. freeisect(is);
  371. return nil;
  372. }
  373. return initisect1(is);
  374. }
  375. ISect*
  376. newisect(Part *part, uint32_t vers, char *name, uint32_t blocksize,
  377. uint32_t tabsize)
  378. {
  379. ISect *is;
  380. uint32_t tabbase;
  381. is = MKZ(ISect);
  382. if(is == nil)
  383. return nil;
  384. namecp(is->name, name);
  385. is->version = vers;
  386. is->part = part;
  387. is->blocksize = blocksize;
  388. is->start = 0;
  389. is->stop = 0;
  390. tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
  391. is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
  392. is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
  393. is->bucketmagic = 0;
  394. if(is->version == ISectVersion2){
  395. do{
  396. is->bucketmagic = fastrand();
  397. }while(is->bucketmagic==0);
  398. }
  399. is = initisect1(is);
  400. if(is == nil)
  401. return nil;
  402. return is;
  403. }
  404. /*
  405. * initialize the computed parameters for an index
  406. */
  407. static ISect*
  408. initisect1(ISect *is)
  409. {
  410. uint64_t v;
  411. is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
  412. is->blocklog = u64log2(is->blocksize);
  413. if(is->blocksize != (1 << is->blocklog)){
  414. seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
  415. freeisect(is);
  416. return nil;
  417. }
  418. partblocksize(is->part, is->blocksize);
  419. is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
  420. if(is->tabbase >= is->blockbase){
  421. seterr(ECorrupt, "index section config table overlaps bucket storage");
  422. freeisect(is);
  423. return nil;
  424. }
  425. is->tabsize = is->blockbase - is->tabbase;
  426. v = is->part->size & ~(uint64_t)(is->blocksize - 1);
  427. if(is->blockbase + (uint64_t)is->blocks * is->blocksize != v){
  428. seterr(ECorrupt, "invalid blocks in index section %s", is->name);
  429. /* ZZZ what to do?
  430. freeisect(is);
  431. return nil;
  432. */
  433. }
  434. if(is->stop - is->start > is->blocks){
  435. seterr(ECorrupt, "index section overflows available space");
  436. freeisect(is);
  437. return nil;
  438. }
  439. if(is->start > is->stop){
  440. seterr(ECorrupt, "invalid index section range");
  441. freeisect(is);
  442. return nil;
  443. }
  444. return is;
  445. }
  446. int
  447. wbisect(ISect *is)
  448. {
  449. ZBlock *b;
  450. b = alloczblock(HeadSize, 1, 0);
  451. if(b == nil){
  452. /* ZZZ set error? */
  453. return -1;
  454. }
  455. if(packisect(is, b->data) < 0){
  456. seterr(ECorrupt, "can't make index section header: %r");
  457. freezblock(b);
  458. return -1;
  459. }
  460. if(writepart(is->part, PartBlank, b->data, HeadSize) < 0 || flushpart(is->part) < 0){
  461. seterr(EAdmin, "can't write index section header: %r");
  462. freezblock(b);
  463. return -1;
  464. }
  465. freezblock(b);
  466. return 0;
  467. }
  468. void
  469. freeisect(ISect *is)
  470. {
  471. if(is == nil)
  472. return;
  473. free(is);
  474. }
  475. void
  476. freeindex(Index *ix)
  477. {
  478. int i;
  479. if(ix == nil)
  480. return;
  481. free(ix->amap);
  482. free(ix->arenas);
  483. if(ix->sects)
  484. for(i = 0; i < ix->nsects; i++)
  485. freeisect(ix->sects[i]);
  486. free(ix->sects);
  487. free(ix->smap);
  488. free(ix);
  489. }
  490. /*
  491. * write a clump to an available arena in the index
  492. * and return the address of the clump within the index.
  493. ZZZ question: should this distinguish between an arena
  494. filling up and real errors writing the clump?
  495. */
  496. uint64_t
  497. writeiclump(Index *ix, Clump *c, uint8_t *clbuf)
  498. {
  499. uint64_t a;
  500. int i;
  501. IAddr ia;
  502. AState as;
  503. trace(TraceLump, "writeiclump enter");
  504. qlock(&ix->writing);
  505. for(i = ix->mapalloc; i < ix->narenas; i++){
  506. a = writeaclump(ix->arenas[i], c, clbuf);
  507. if(a != TWID64){
  508. ix->mapalloc = i;
  509. ia.addr = ix->amap[i].start + a;
  510. ia.type = c->info.type;
  511. ia.size = c->info.uncsize;
  512. ia.blocks = (c->info.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
  513. as.arena = ix->arenas[i];
  514. as.aa = ia.addr;
  515. as.stats = as.arena->memstats;
  516. insertscore(c->info.score, &ia, IEDirty, &as);
  517. qunlock(&ix->writing);
  518. trace(TraceLump, "writeiclump exit");
  519. return ia.addr;
  520. }
  521. }
  522. qunlock(&ix->writing);
  523. seterr(EAdmin, "no space left in arenas");
  524. trace(TraceLump, "writeiclump failed");
  525. return TWID64;
  526. }
  527. /*
  528. * convert an arena index to an relative arena address
  529. */
  530. Arena*
  531. amapitoa(Index *ix, uint64_t a, uint64_t *aa)
  532. {
  533. int i, r, l, m;
  534. l = 1;
  535. r = ix->narenas - 1;
  536. while(l <= r){
  537. m = (r + l) / 2;
  538. if(ix->amap[m].start <= a)
  539. l = m + 1;
  540. else
  541. r = m - 1;
  542. }
  543. l--;
  544. if(a > ix->amap[l].stop){
  545. for(i=0; i<ix->narenas; i++)
  546. print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
  547. print("want arena %d for %llux\n", l, a);
  548. seterr(ECrash, "unmapped address passed to amapitoa");
  549. return nil;
  550. }
  551. if(ix->arenas[l] == nil){
  552. seterr(ECrash, "unmapped arena selected in amapitoa");
  553. return nil;
  554. }
  555. *aa = a - ix->amap[l].start;
  556. return ix->arenas[l];
  557. }
  558. /*
  559. * convert an arena index to the bounds of the containing arena group.
  560. */
  561. Arena*
  562. amapitoag(Index *ix, uint64_t a, uint64_t *gstart, uint64_t *glimit,
  563. int *g)
  564. {
  565. uint64_t aa;
  566. Arena *arena;
  567. arena = amapitoa(ix, a, &aa);
  568. if(arena == nil)
  569. return nil;
  570. if(arenatog(arena, aa, gstart, glimit, g) < 0)
  571. return nil;
  572. *gstart += a - aa;
  573. *glimit += a - aa;
  574. return arena;
  575. }
  576. int
  577. iaddrcmp(IAddr *ia1, IAddr *ia2)
  578. {
  579. return ia1->type != ia2->type
  580. || ia1->size != ia2->size
  581. || ia1->blocks != ia2->blocks
  582. || ia1->addr != ia2->addr;
  583. }
  584. /*
  585. * lookup the score in the partition
  586. *
  587. * nothing needs to be explicitly locked:
  588. * only static parts of ix are used, and
  589. * the bucket is locked by the DBlock lock.
  590. */
  591. int
  592. loadientry(Index *ix, uint8_t *score, int type, IEntry *ie)
  593. {
  594. ISect *is;
  595. DBlock *b;
  596. IBucket ib;
  597. uint32_t buck;
  598. int h, ok;
  599. ok = -1;
  600. trace(TraceLump, "loadientry enter");
  601. /*
  602. qlock(&stats.lock);
  603. stats.indexreads++;
  604. qunlock(&stats.lock);
  605. */
  606. if(!inbloomfilter(mainindex->bloom, score)){
  607. trace(TraceLump, "loadientry bloomhit");
  608. return -1;
  609. }
  610. trace(TraceLump, "loadientry loadibucket");
  611. b = loadibucket(ix, score, &is, &buck, &ib);
  612. trace(TraceLump, "loadientry loadedibucket");
  613. if(b == nil)
  614. return -1;
  615. if(okibucket(&ib, is) < 0){
  616. trace(TraceLump, "loadientry badbucket");
  617. goto out;
  618. }
  619. h = bucklook(score, type, ib.data, ib.n);
  620. if(h & 1){
  621. h ^= 1;
  622. trace(TraceLump, "loadientry found");
  623. unpackientry(ie, &ib.data[h]);
  624. ok = 0;
  625. goto out;
  626. }
  627. trace(TraceLump, "loadientry notfound");
  628. addstat(StatBloomFalseMiss, 1);
  629. out:
  630. putdblock(b);
  631. trace(TraceLump, "loadientry exit");
  632. return ok;
  633. }
  634. int
  635. okibucket(IBucket *ib, ISect *is)
  636. {
  637. if(ib->n <= is->buckmax)
  638. return 0;
  639. seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
  640. ib->n, is->buckmax, is->start, is->stop);
  641. return -1;
  642. }
  643. /*
  644. * look for score within data;
  645. * return 1 | byte index of matching index,
  646. * or 0 | index of least element > score
  647. */
  648. int
  649. bucklook(uint8_t *score, int otype, uint8_t *data, int n)
  650. {
  651. int i, r, l, m, h, c, cc, type;
  652. if(otype == -1)
  653. type = -1;
  654. else
  655. type = vttodisktype(otype);
  656. l = 0;
  657. r = n - 1;
  658. while(l <= r){
  659. m = (r + l) >> 1;
  660. h = m * IEntrySize;
  661. for(i = 0; i < VtScoreSize; i++){
  662. c = score[i];
  663. cc = data[h + i];
  664. if(c != cc){
  665. if(c > cc)
  666. l = m + 1;
  667. else
  668. r = m - 1;
  669. goto cont;
  670. }
  671. }
  672. cc = data[h + IEntryTypeOff];
  673. if(type != cc && type != -1){
  674. if(type > cc)
  675. l = m + 1;
  676. else
  677. r = m - 1;
  678. goto cont;
  679. }
  680. return h | 1;
  681. cont:;
  682. }
  683. return l * IEntrySize;
  684. }
  685. /*
  686. * compare two IEntries; consistent with bucklook
  687. */
  688. int
  689. ientrycmp(const void *vie1, const void *vie2)
  690. {
  691. uint8_t *ie1, *ie2;
  692. int i, v1, v2;
  693. ie1 = (uint8_t*)vie1;
  694. ie2 = (uint8_t*)vie2;
  695. for(i = 0; i < VtScoreSize; i++){
  696. v1 = ie1[i];
  697. v2 = ie2[i];
  698. if(v1 != v2){
  699. if(v1 < v2)
  700. return -1;
  701. return 1;
  702. }
  703. }
  704. v1 = ie1[IEntryTypeOff];
  705. v2 = ie2[IEntryTypeOff];
  706. if(v1 != v2){
  707. if(v1 < v2)
  708. return -1;
  709. return 1;
  710. }
  711. return 0;
  712. }
  713. /*
  714. * find the number of the index section holding bucket #buck
  715. */
  716. int
  717. indexsect0(Index *ix, uint32_t buck)
  718. {
  719. int r, l, m;
  720. l = 1;
  721. r = ix->nsects - 1;
  722. while(l <= r){
  723. m = (r + l) >> 1;
  724. if(ix->sects[m]->start <= buck)
  725. l = m + 1;
  726. else
  727. r = m - 1;
  728. }
  729. return l - 1;
  730. }
  731. /*
  732. * load the index block at bucket #buck
  733. */
  734. static DBlock*
  735. loadibucket0(Index *ix, uint32_t buck, ISect **pis, uint32_t *pbuck,
  736. IBucket *ib, int mode)
  737. {
  738. ISect *is;
  739. DBlock *b;
  740. is = ix->sects[indexsect0(ix, buck)];
  741. if(buck < is->start || is->stop <= buck){
  742. seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
  743. return nil;
  744. }
  745. buck -= is->start;
  746. if((b = getdblock(is->part, is->blockbase + ((uint64_t)buck << is->blocklog), mode)) == nil)
  747. return nil;
  748. if(pis)
  749. *pis = is;
  750. if(pbuck)
  751. *pbuck = buck;
  752. if(ib)
  753. unpackibucket(ib, b->data, is->bucketmagic);
  754. return b;
  755. }
  756. /*
  757. * find the number of the index section holding score
  758. */
  759. int
  760. indexsect1(Index *ix, uint8_t *score)
  761. {
  762. return indexsect0(ix, hashbits(score, 32) / ix->div);
  763. }
  764. /*
  765. * load the index block responsible for score.
  766. */
  767. static DBlock*
  768. loadibucket1(Index *ix, uint8_t *score, ISect **pis, uint32_t *pbuck,
  769. IBucket *ib)
  770. {
  771. return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
  772. }
  773. int
  774. indexsect(Index *ix, uint8_t *score)
  775. {
  776. return indexsect1(ix, score);
  777. }
  778. DBlock*
  779. loadibucket(Index *ix, uint8_t *score, ISect **pis, uint32_t *pbuck,
  780. IBucket *ib)
  781. {
  782. return loadibucket1(ix, score, pis, pbuck, ib);
  783. }