123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /* $XConsortium: bt_split.c /main/3 1996/06/11 17:13:10 cde-hal $ */
- /*-
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #if defined(LIBC_SCCS) && !defined(lint)
- static char sccsid[] = "@(#)bt_split.c 8.1 (Berkeley) 6/4/93";
- #endif /* LIBC_SCCS and not lint */
- #include <sys/types.h>
- #define __DBINTERFACE_PRIVATE
- #include <limits.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <db.h>
- #include "btree.h"
- static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *));
- static PAGE *bt_page
- __P((BTREE *, PAGE *, PAGE **, PAGE **, u_int *, size_t));
- static int bt_preserve __P((BTREE *, pgno_t));
- static PAGE *bt_psplit
- __P((BTREE *, PAGE *, PAGE *, PAGE *, u_int *, size_t));
- static PAGE *bt_root
- __P((BTREE *, PAGE *, PAGE **, PAGE **, u_int *, size_t));
- static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *));
- static recno_t rec_total __P((PAGE *));
- #ifdef STATISTICS
- u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
- #endif
- /*
- * __BT_SPLIT -- Split the tree.
- *
- * Parameters:
- * t: tree
- * sp: page to split
- * key: key to insert
- * data: data to insert
- * flags: BIGKEY/BIGDATA flags
- * ilen: insert length
- * skip: index to leave open
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
- */
- int
- __bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, u_long flags, size_t ilen, u_int skip)
- {
- BINTERNAL *bi = NULL;
- BLEAF *bl = NULL;
- BLEAF *tbl;
- DBT a, b;
- EPGNO *parent;
- PAGE *h, *l, *r, *lchild, *rchild;
- indx_t nxtindex;
- size_t n, nbytes;
- size_t nksize = 0;
- int parentsplit;
- char *dest;
- /*
- * Split the page into two pages, l and r. The split routines return
- * a pointer to the page into which the key should be inserted and with
- * skip set to the offset which should be used. Additionally, l and r
- * are pinned.
- */
- h = sp->pgno == P_ROOT ?
- bt_root(t, sp, &l, &r, &skip, ilen) :
- bt_page(t, sp, &l, &r, &skip, ilen);
- if (h == NULL)
- return (RET_ERROR);
- /*
- * Insert the new key/data pair into the leaf page. (Key inserts
- * always cause a leaf page to split first.)
- */
- h->linp[skip] = h->upper -= ilen;
- dest = (char *)h + h->upper;
- if (ISSET(t, R_RECNO))
- WR_RLEAF(dest, data, flags)
- else
- WR_BLEAF(dest, key, data, flags)
- /* If the root page was split, make it look right. */
- if (sp->pgno == P_ROOT &&
- (ISSET(t, R_RECNO) ?
- bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
- goto err2;
- /*
- * Now we walk the parent page stack -- a LIFO stack of the pages that
- * were traversed when we searched for the page that split. Each stack
- * entry is a page number and a page index offset. The offset is for
- * the page traversed on the search. We've just split a page, so we
- * have to insert a new key into the parent page.
- *
- * If the insert into the parent page causes it to split, may have to
- * continue splitting all the way up the tree. We stop if the root
- * splits or the page inserted into didn't have to split to hold the
- * new key. Some algorithms replace the key for the old page as well
- * as the new page. We don't, as there's no reason to believe that the
- * first key on the old page is any better than the key we have, and,
- * in the case of a key being placed at index 0 causing the split, the
- * key is unavailable.
- *
- * There are a maximum of 5 pages pinned at any time. We keep the left
- * and right pages pinned while working on the parent. The 5 are the
- * two children, left parent and right parent (when the parent splits)
- * and the root page or the overflow key page when calling bt_preserve.
- * This code must make sure that all pins are released other than the
- * root page or overflow page which is unlocked elsewhere.
- */
- while ((parent = BT_POP(t)) != NULL) {
- lchild = l;
- rchild = r;
- /* Get the parent page. */
- if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
- goto err2;
- /*
- * The new key goes ONE AFTER the index, because the split
- * was to the right.
- */
- skip = parent->index + 1;
- /*
- * Calculate the space needed on the parent page.
- *
- * Prefix trees: space hack when inserting into BINTERNAL
- * pages. Retain only what's needed to distinguish between
- * the new entry and the LAST entry on the page to its left.
- * If the keys compare equal, retain the entire key. Note,
- * we don't touch overflow keys, and the entire key must be
- * retained for the next-to-left most key on the leftmost
- * page of each level, or the search will fail. Applicable
- * ONLY to internal pages that have leaf pages as children.
- * Further reduction of the key between pairs of internal
- * pages loses too much information.
- */
- switch (rchild->flags & P_TYPE) {
- case P_BINTERNAL:
- bi = GETBINTERNAL(rchild, 0);
- nbytes = NBINTERNAL(bi->ksize);
- break;
- case P_BLEAF:
- bl = GETBLEAF(rchild, 0);
- nbytes = NBINTERNAL(bl->ksize);
- if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
- (h->prevpg != P_INVALID || skip > 1)) {
- tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
- a.size = tbl->ksize;
- a.data = tbl->bytes;
- b.size = bl->ksize;
- b.data = bl->bytes;
- nksize = t->bt_pfx(&a, &b);
- n = NBINTERNAL(nksize);
- if (n < nbytes) {
- #ifdef STATISTICS
- bt_pfxsaved += nbytes - n;
- #endif
- nbytes = n;
- } else
- nksize = 0;
- } else
- nksize = 0;
- break;
- case P_RINTERNAL:
- case P_RLEAF:
- nbytes = NRINTERNAL;
- break;
- default:
- abort();
- }
- /* Split the parent page if necessary or shift the indices. */
- if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
- sp = h;
- h = h->pgno == P_ROOT ?
- bt_root(t, h, &l, &r, &skip, nbytes) :
- bt_page(t, h, &l, &r, &skip, nbytes);
- if (h == NULL)
- goto err1;
- parentsplit = 1;
- } else {
- if (skip < (nxtindex = NEXTINDEX(h)))
- memmove(h->linp + skip + 1, h->linp + skip,
- (nxtindex - skip) * sizeof(indx_t));
- h->lower += sizeof(indx_t);
- parentsplit = 0;
- }
- /* Insert the key into the parent page. */
- switch(rchild->flags & P_TYPE) {
- case P_BINTERNAL:
- h->linp[skip] = h->upper -= nbytes;
- dest = (char *)h + h->linp[skip];
- memmove(dest, bi, nbytes);
- ((BINTERNAL *)dest)->pgno = rchild->pgno;
- break;
- case P_BLEAF:
- h->linp[skip] = h->upper -= nbytes;
- dest = (char *)h + h->linp[skip];
- WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
- rchild->pgno, bl->flags & P_BIGKEY);
- memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
- if (bl->flags & P_BIGKEY &&
- bt_preserve(t, *(char *)bl->bytes) == RET_ERROR)
- goto err1;
- break;
- case P_RINTERNAL:
- /*
- * Update the left page count. If split
- * added at index 0, fix the correct page.
- */
- if (skip > 0)
- dest = (char *)h + h->linp[skip - 1];
- else
- dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
- ((RINTERNAL *)dest)->nrecs = rec_total(lchild);
- ((RINTERNAL *)dest)->pgno = lchild->pgno;
- /* Update the right page count. */
- h->linp[skip] = h->upper -= nbytes;
- dest = (char *)h + h->linp[skip];
- ((RINTERNAL *)dest)->nrecs = rec_total(rchild);
- ((RINTERNAL *)dest)->pgno = rchild->pgno;
- break;
- case P_RLEAF:
- /*
- * Update the left page count. If split
- * added at index 0, fix the correct page.
- */
- if (skip > 0)
- dest = (char *)h + h->linp[skip - 1];
- else
- dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
- ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
- ((RINTERNAL *)dest)->pgno = lchild->pgno;
- /* Update the right page count. */
- h->linp[skip] = h->upper -= nbytes;
- dest = (char *)h + h->linp[skip];
- ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
- ((RINTERNAL *)dest)->pgno = rchild->pgno;
- break;
- default:
- abort();
- }
- /* Unpin the held pages. */
- if (!parentsplit) {
- mpool_put(t->bt_mp, h, MPOOL_DIRTY);
- break;
- }
- /* If the root page was split, make it look right. */
- if (sp->pgno == P_ROOT &&
- (ISSET(t, R_RECNO) ?
- bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
- goto err1;
- mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
- mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
- }
- /* Unpin the held pages. */
- mpool_put(t->bt_mp, l, MPOOL_DIRTY);
- mpool_put(t->bt_mp, r, MPOOL_DIRTY);
- /* Clear any pages left on the stack. */
- return (RET_SUCCESS);
- /*
- * If something fails in the above loop we were already walking back
- * up the tree and the tree is now inconsistent. Nothing much we can
- * do about it but release any memory we're holding.
- */
- err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
- mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
- err2: mpool_put(t->bt_mp, l, 0);
- mpool_put(t->bt_mp, r, 0);
- __dbpanic(t->bt_dbp);
- return (RET_ERROR);
- }
- /*
- * BT_PAGE -- Split a non-root page of a btree.
- *
- * Parameters:
- * t: tree
- * h: root page
- * lp: pointer to left page pointer
- * rp: pointer to right page pointer
- * skip: pointer to index to leave open
- * ilen: insert length
- *
- * Returns:
- * Pointer to page in which to insert or NULL on error.
- */
- static PAGE *
- bt_page(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, u_int *skip, size_t ilen)
- {
- PAGE *l, *r, *tp;
- pgno_t npg;
- #ifdef STATISTICS
- ++bt_split;
- #endif
- /* Put the new right page for the split into place. */
- if ((r = __bt_new(t, &npg)) == NULL)
- return (NULL);
- r->pgno = npg;
- r->lower = BTDATAOFF;
- r->upper = t->bt_psize;
- r->nextpg = h->nextpg;
- r->prevpg = h->pgno;
- r->flags = h->flags & P_TYPE;
- /*
- * If we're splitting the last page on a level because we're appending
- * a key to it (skip is NEXTINDEX()), it's likely that the data is
- * sorted. Adding an empty page on the side of the level is less work
- * and can push the fill factor much higher than normal. If we're
- * wrong it's no big deal, we'll just do the split the right way next
- * time. It may look like it's equally easy to do a similar hack for
- * reverse sorted data, that is, split the tree left, but it's not.
- * Don't even try.
- */
- if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
- #ifdef STATISTICS
- ++bt_sortsplit;
- #endif
- h->nextpg = r->pgno;
- r->lower = BTDATAOFF + sizeof(indx_t);
- *skip = 0;
- *lp = h;
- *rp = r;
- return (r);
- }
- /* Put the new left page for the split into place. */
- if ((l = malloc(t->bt_psize)) == NULL) {
- mpool_put(t->bt_mp, r, 0);
- return (NULL);
- }
- l->pgno = h->pgno;
- l->nextpg = r->pgno;
- l->prevpg = h->prevpg;
- l->lower = BTDATAOFF;
- l->upper = t->bt_psize;
- l->flags = h->flags & P_TYPE;
- /* Fix up the previous pointer of the page after the split page. */
- if (h->nextpg != P_INVALID) {
- if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
- free(l);
- /* XXX mpool_free(t->bt_mp, r->pgno); */
- return (NULL);
- }
- tp->prevpg = r->pgno;
- mpool_put(t->bt_mp, tp, 0);
- }
- /*
- * Split right. The key/data pairs aren't sorted in the btree page so
- * it's simpler to copy the data from the split page onto two new pages
- * instead of copying half the data to the right page and compacting
- * the left page in place. Since the left page can't change, we have
- * to swap the original and the allocated left page after the split.
- */
- tp = bt_psplit(t, h, l, r, skip, ilen);
- /* Move the new left page onto the old left page. */
- memmove(h, l, t->bt_psize);
- if (tp == l)
- tp = h;
- free(l);
- *lp = h;
- *rp = r;
- return (tp);
- }
- /*
- * BT_ROOT -- Split the root page of a btree.
- *
- * Parameters:
- * t: tree
- * h: root page
- * lp: pointer to left page pointer
- * rp: pointer to right page pointer
- * skip: pointer to index to leave open
- * ilen: insert length
- *
- * Returns:
- * Pointer to page in which to insert or NULL on error.
- */
- static PAGE *
- bt_root(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, u_int *skip, size_t ilen)
- {
- PAGE *l, *r, *tp;
- pgno_t lnpg, rnpg;
- #ifdef STATISTICS
- ++bt_split;
- ++bt_rootsplit;
- #endif
- /* Put the new left and right pages for the split into place. */
- if ((l = __bt_new(t, &lnpg)) == NULL ||
- (r = __bt_new(t, &rnpg)) == NULL)
- return (NULL);
- l->pgno = lnpg;
- r->pgno = rnpg;
- l->nextpg = r->pgno;
- r->prevpg = l->pgno;
- l->prevpg = r->nextpg = P_INVALID;
- l->lower = r->lower = BTDATAOFF;
- l->upper = r->upper = t->bt_psize;
- l->flags = r->flags = h->flags & P_TYPE;
- /* Split the root page. */
- tp = bt_psplit(t, h, l, r, skip, ilen);
- *lp = l;
- *rp = r;
- return (tp);
- }
- /*
- * BT_RROOT -- Fix up the recno root page after it has been split.
- *
- * Parameters:
- * t: tree
- * h: root page
- * l: left page
- * r: right page
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
- */
- static int
- bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
- {
- char *dest;
- /* Insert the left and right keys, set the header information. */
- h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
- dest = (char *)h + h->upper;
- WR_RINTERNAL(dest,
- l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
- h->linp[1] = h->upper -= NRINTERNAL;
- dest = (char *)h + h->upper;
- WR_RINTERNAL(dest,
- r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
- h->lower = BTDATAOFF + 2 * sizeof(indx_t);
- /* Unpin the root page, set to recno internal page. */
- h->flags &= ~P_TYPE;
- h->flags |= P_RINTERNAL;
- mpool_put(t->bt_mp, h, MPOOL_DIRTY);
- return (RET_SUCCESS);
- }
- /*
- * BT_BROOT -- Fix up the btree root page after it has been split.
- *
- * Parameters:
- * t: tree
- * h: root page
- * l: left page
- * r: right page
- *
- * Returns:
- * RET_ERROR, RET_SUCCESS
- */
- static int
- bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
- {
- BINTERNAL *bi;
- BLEAF *bl;
- size_t nbytes;
- char *dest;
- /*
- * If the root page was a leaf page, change it into an internal page.
- * We copy the key we split on (but not the key's data, in the case of
- * a leaf page) to the new root page.
- *
- * The btree comparison code guarantees that the left-most key on any
- * level of the tree is never used, so it doesn't need to be filled in.
- */
- nbytes = NBINTERNAL(0);
- h->linp[0] = h->upper = t->bt_psize - nbytes;
- dest = (char *)h + h->upper;
- WR_BINTERNAL(dest, 0, l->pgno, 0);
- switch(h->flags & P_TYPE) {
- case P_BLEAF:
- bl = GETBLEAF(r, 0);
- nbytes = NBINTERNAL(bl->ksize);
- h->linp[1] = h->upper -= nbytes;
- dest = (char *)h + h->upper;
- WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
- memmove(dest, bl->bytes, bl->ksize);
- /*
- * If the key is on an overflow page, mark the overflow chain
- * so it isn't deleted when the leaf copy of the key is deleted.
- */
- if (bl->flags & P_BIGKEY &&
- bt_preserve(t, *(char *)bl->bytes) == RET_ERROR)
- return (RET_ERROR);
- break;
- case P_BINTERNAL:
- bi = GETBINTERNAL(r, 0);
- nbytes = NBINTERNAL(bi->ksize);
- h->linp[1] = h->upper -= nbytes;
- dest = (char *)h + h->upper;
- memmove(dest, bi, nbytes);
- ((BINTERNAL *)dest)->pgno = r->pgno;
- break;
- default:
- abort();
- }
- /* There are two keys on the page. */
- h->lower = BTDATAOFF + 2 * sizeof(indx_t);
- /* Unpin the root page, set to btree internal page. */
- h->flags &= ~P_TYPE;
- h->flags |= P_BINTERNAL;
- mpool_put(t->bt_mp, h, MPOOL_DIRTY);
- return (RET_SUCCESS);
- }
- /*
- * BT_PSPLIT -- Do the real work of splitting the page.
- *
- * Parameters:
- * t: tree
- * h: page to be split
- * l: page to put lower half of data
- * r: page to put upper half of data
- * pskip: pointer to index to leave open
- * ilen: insert length
- *
- * Returns:
- * Pointer to page in which to insert.
- */
- static PAGE *
- bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, u_int *pskip, size_t ilen)
- {
- BINTERNAL *bi;
- BLEAF *bl;
- RLEAF *rl;
- EPGNO *c;
- PAGE *rval;
- void *src = NULL;
- indx_t full, half, nxt, off, skip, top, used;
- size_t nbytes;
- int bigkeycnt, isbigkey;
- /*
- * Split the data to the left and right pages. Leave the skip index
- * open. Additionally, make some effort not to split on an overflow
- * key. This makes internal page processing faster and can save
- * space as overflow keys used by internal pages are never deleted.
- */
- bigkeycnt = 0;
- skip = *pskip;
- full = t->bt_psize - BTDATAOFF;
- half = full / 2;
- used = 0;
- for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
- if (skip == off) {
- nbytes = ilen;
- isbigkey = 0; /* XXX: not really known. */
- } else
- switch (h->flags & P_TYPE) {
- case P_BINTERNAL:
- src = bi = GETBINTERNAL(h, nxt);
- nbytes = NBINTERNAL(bi->ksize);
- isbigkey = bi->flags & P_BIGKEY;
- break;
- case P_BLEAF:
- src = bl = GETBLEAF(h, nxt);
- nbytes = NBLEAF(bl);
- isbigkey = bl->flags & P_BIGKEY;
- break;
- case P_RINTERNAL:
- src = GETRINTERNAL(h, nxt);
- nbytes = NRINTERNAL;
- isbigkey = 0;
- break;
- case P_RLEAF:
- src = rl = GETRLEAF(h, nxt);
- nbytes = NRLEAF(rl);
- isbigkey = 0;
- break;
- default:
- abort();
- }
- /*
- * If the key/data pairs are substantial fractions of the max
- * possible size for the page, it's possible to get situations
- * where we decide to try and copy too much onto the left page.
- * Make sure that doesn't happen.
- */
- if (skip <= off && used + nbytes >= full) {
- --off;
- break;
- }
- /* Copy the key/data pair, if not the skipped index. */
- if (skip != off) {
- ++nxt;
- l->linp[off] = l->upper -= nbytes;
- memmove((char *)l + l->upper, src, nbytes);
- }
- used += nbytes;
- if (used >= half) {
- if (!isbigkey || bigkeycnt == 3)
- break;
- else
- ++bigkeycnt;
- }
- }
- /*
- * Off is the last offset that's valid for the left page.
- * Nxt is the first offset to be placed on the right page.
- */
- l->lower += (off + 1) * sizeof(indx_t);
- /*
- * If splitting the page that the cursor was on, the cursor has to be
- * adjusted to point to the same record as before the split. If the
- * cursor is at or past the skipped slot, the cursor is incremented by
- * one. If the cursor is on the right page, it is decremented by the
- * number of records split to the left page.
- *
- * Don't bother checking for the B_SEQINIT flag, the page number will
- * be P_INVALID.
- */
- c = &t->bt_bcursor;
- if (c->pgno == h->pgno) {
- if (c->index >= skip)
- ++c->index;
- if (c->index < nxt) /* Left page. */
- c->pgno = l->pgno;
- else { /* Right page. */
- c->pgno = r->pgno;
- c->index -= nxt;
- }
- }
- /*
- * If the skipped index was on the left page, just return that page.
- * Otherwise, adjust the skip index to reflect the new position on
- * the right page.
- */
- if (skip <= off) {
- skip = 0;
- rval = l;
- } else {
- rval = r;
- *pskip -= nxt;
- }
- for (off = 0; nxt < top; ++off) {
- if (skip == nxt) {
- ++off;
- skip = 0;
- }
- switch (h->flags & P_TYPE) {
- case P_BINTERNAL:
- src = bi = GETBINTERNAL(h, nxt);
- nbytes = NBINTERNAL(bi->ksize);
- break;
- case P_BLEAF:
- src = bl = GETBLEAF(h, nxt);
- nbytes = NBLEAF(bl);
- break;
- case P_RINTERNAL:
- src = GETRINTERNAL(h, nxt);
- nbytes = NRINTERNAL;
- break;
- case P_RLEAF:
- src = rl = GETRLEAF(h, nxt);
- nbytes = NRLEAF(rl);
- break;
- default:
- abort();
- }
- ++nxt;
- r->linp[off] = r->upper -= nbytes;
- memmove((char *)r + r->upper, src, nbytes);
- }
- r->lower += off * sizeof(indx_t);
- /* If the key is being appended to the page, adjust the index. */
- if (skip == top)
- r->lower += sizeof(indx_t);
- return (rval);
- }
- /*
- * BT_PRESERVE -- Mark a chain of pages as used by an internal node.
- *
- * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
- * record that references them gets deleted. Chains pointed to by internal
- * pages never get deleted. This routine marks a chain as pointed to by an
- * internal page.
- *
- * Parameters:
- * t: tree
- * pg: page number of first page in the chain.
- *
- * Returns:
- * RET_SUCCESS, RET_ERROR.
- */
- static int
- bt_preserve(BTREE *t, pgno_t pg)
- {
- PAGE *h;
- if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
- return (RET_ERROR);
- h->flags |= P_PRESERVE;
- mpool_put(t->bt_mp, h, MPOOL_DIRTY);
- return (RET_SUCCESS);
- }
- /*
- * REC_TOTAL -- Return the number of recno entries below a page.
- *
- * Parameters:
- * h: page
- *
- * Returns:
- * The number of recno entries below a page.
- *
- * XXX
- * These values could be set by the bt_psplit routine. The problem is that the
- * entry has to be popped off of the stack etc. or the values have to be passed
- * all the way back to bt_split/bt_rroot and it's not very clean.
- */
- static recno_t
- rec_total(PAGE *h)
- {
- recno_t recs;
- indx_t nxt, top;
- for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
- recs += GETRINTERNAL(h, nxt)->nrecs;
- return (recs);
- }
|