123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477 |
- /*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
- /*
- * Archiver. In charge of sending blocks to Venti.
- */
- #include "stdinc.h"
- #include "dat.h"
- #include "fns.h"
- #include "error.h"
- #include "9.h" /* for consPrint */
- #define DEBUG 0
- static void archThread(void*);
- struct Arch
- {
- int ref;
- uint blockSize;
- uint diskSize;
- Cache *c;
- Fs *fs;
- VtSession *z;
- VtLock *lk;
- VtRendez *starve;
- VtRendez *die;
- };
- Arch *
- archInit(Cache *c, Disk *disk, Fs *fs, VtSession *z)
- {
- Arch *a;
- a = vtMemAllocZ(sizeof(Arch));
- a->c = c;
- a->z = z;
- a->fs = fs;
- a->blockSize = diskBlockSize(disk);
- a->lk = vtLockAlloc();
- a->starve = vtRendezAlloc(a->lk);
- a->ref = 2;
- vtThread(archThread, a);
- return a;
- }
- void
- archFree(Arch *a)
- {
- /* kill slave */
- vtLock(a->lk);
- a->die = vtRendezAlloc(a->lk);
- vtWakeup(a->starve);
- while(a->ref > 1)
- vtSleep(a->die);
- vtUnlock(a->lk);
- vtRendezFree(a->starve);
- vtRendezFree(a->die);
- vtLockFree(a->lk);
- vtMemFree(a);
- }
- static int
- ventiSend(Arch *a, Block *b, uint8_t *data)
- {
- uint n;
- uint8_t score[VtScoreSize];
- if(DEBUG > 1)
- fprint(2, "ventiSend: sending %#x %L to venti\n", b->addr, &b->l);
- n = vtZeroTruncate(vtType[b->l.type], data, a->blockSize);
- if(DEBUG > 1)
- fprint(2, "ventiSend: truncate %d to %d\n", a->blockSize, n);
- if(!vtWrite(a->z, score, vtType[b->l.type], data, n)){
- fprint(2, "ventiSend: vtWrite block %#x failed: %R\n", b->addr);
- return 0;
- }
- if(!vtSha1Check(score, data, n)){
- uint8_t score2[VtScoreSize];
- vtSha1(score2, data, n);
- fprint(2, "ventiSend: vtWrite block %#x failed vtSha1Check %V %V\n",
- b->addr, score, score2);
- return 0;
- }
- if(!vtSync(a->z))
- return 0;
- return 1;
- }
- /*
- * parameters for recursion; there are so many,
- * and some only change occasionally. this is
- * easier than spelling things out at each call.
- */
- typedef struct Param Param;
- struct Param
- {
- /* these never change */
- uint snapEpoch; /* epoch for snapshot being archived */
- uint blockSize;
- Cache *c;
- Arch *a;
- /* changes on every call */
- uint depth;
- /* statistics */
- uint nfixed;
- uint nsend;
- uint nvisit;
- uint nfailsend;
- uint maxdepth;
- uint nreclaim;
- uint nfake;
- uint nreal;
- /* these occasionally change (must save old values and put back) */
- uint dsize;
- uint psize;
- /* return value; avoids using stack space */
- Label l;
- uint8_t score[VtScoreSize];
- };
- static void
- shaBlock(uint8_t score[VtScoreSize], Block *b, uint8_t *data, uint bsize)
- {
- vtSha1(score, data, vtZeroTruncate(vtType[b->l.type], data, bsize));
- }
- /* Not used?
- static uint
- etype(Entry *e)
- {
- uint t;
- if(e->flags&VtEntryDir)
- t = BtDir;
- else
- t = BtData;
- return t+e->depth;
- }
- */
- static uint8_t*
- copyBlock(Block *b, uint32_t blockSize)
- {
- uint8_t *data;
- data = vtMemAlloc(blockSize);
- if(data == nil)
- return nil;
- memmove(data, b->data, blockSize);
- return data;
- }
- /*
- * Walk over the block tree, archiving it to Venti.
- *
- * We don't archive the snapshots. Instead we zero the
- * entries in a temporary copy of the block and archive that.
- *
- * Return value is:
- *
- * ArchFailure some error occurred
- * ArchSuccess block and all children archived
- * ArchFaked success, but block or children got copied
- */
- enum
- {
- ArchFailure,
- ArchSuccess,
- ArchFaked,
- };
- static int
- archWalk(Param *p, uint32_t addr, uint8_t type, uint32_t tag)
- {
- int ret, i, x, psize, dsize;
- uint8_t *data, score[VtScoreSize];
- Block *b;
- Label l;
- Entry *e;
- WalkPtr w;
- p->nvisit++;
- b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
- if(b == nil){
- fprint(2, "archive(%u, %#x): cannot find block: %R\n", p->snapEpoch, addr);
- if(strcmp(vtGetError(), ELabelMismatch) == 0){
- /* might as well plod on so we write _something_ to Venti */
- memmove(p->score, vtZeroScore, VtScoreSize);
- return ArchFaked;
- }
- return ArchFailure;
- }
- if(DEBUG) fprint(2, "%*sarchive(%u, %#x): block label %L\n",
- p->depth*2, "", p->snapEpoch, b->addr, &b->l);
- p->depth++;
- if(p->depth > p->maxdepth)
- p->maxdepth = p->depth;
- data = b->data;
- if((b->l.state&BsVenti) == 0){
- initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
- for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
- if(e){
- if(!(e->flags&VtEntryActive))
- continue;
- if((e->snap && !e->archive)
- || (e->flags&VtEntryNoArchive)){
- if(0) fprint(2, "snap; faking %#x\n", b->addr);
- if(data == b->data){
- data = copyBlock(b, p->blockSize);
- if(data == nil){
- ret = ArchFailure;
- goto Out;
- }
- w.data = data;
- }
- memmove(e->score, vtZeroScore, VtScoreSize);
- e->depth = 0;
- e->size = 0;
- e->tag = 0;
- e->flags &= ~VtEntryLocal;
- entryPack(e, data, w.n-1);
- continue;
- }
- }
- addr = globalToLocal(score);
- if(addr == NilBlock)
- continue;
- dsize = p->dsize;
- psize = p->psize;
- if(e){
- p->dsize= e->dsize;
- p->psize = e->psize;
- }
- vtUnlock(b->lk);
- x = archWalk(p, addr, type, tag);
- vtLock(b->lk);
- if(e){
- p->dsize = dsize;
- p->psize = psize;
- }
- while(b->iostate != BioClean && b->iostate != BioDirty)
- vtSleep(b->ioready);
- switch(x){
- case ArchFailure:
- fprint(2, "archWalk %#x failed; ptr is in %#x offset %d\n",
- addr, b->addr, i);
- ret = ArchFailure;
- goto Out;
- case ArchFaked:
- /*
- * When we're writing the entry for an archive directory
- * (like /archive/2003/1215) then even if we've faked
- * any data, record the score unconditionally.
- * This way, we will always record the Venti score here.
- * Otherwise, temporary data or corrupted file system
- * would cause us to keep holding onto the on-disk
- * copy of the archive.
- */
- if(e==nil || !e->archive)
- if(data == b->data){
- if(0) fprint(2, "faked %#x, faking %#x (%V)\n", addr, b->addr, p->score);
- data = copyBlock(b, p->blockSize);
- if(data == nil){
- ret = ArchFailure;
- goto Out;
- }
- w.data = data;
- }
- /* fall through */
- if(0) fprint(2, "falling\n");
- case ArchSuccess:
- if(e){
- memmove(e->score, p->score, VtScoreSize);
- e->flags &= ~VtEntryLocal;
- entryPack(e, data, w.n-1);
- }else
- memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
- if(data == b->data){
- blockDirty(b);
- /*
- * If b is in the active tree, then we need to note that we've
- * just removed addr from the active tree (replacing it with the
- * copy we just stored to Venti). If addr is in other snapshots,
- * this will close addr but not free it, since it has a non-empty
- * epoch range.
- *
- * If b is in the active tree but has been copied (this can happen
- * if we get killed at just the right moment), then we will
- * mistakenly leak its kids.
- *
- * The children of an archive directory (e.g., /archive/2004/0604)
- * are not treated as in the active tree.
- */
- if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
- blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
- }
- break;
- }
- }
- if(!ventiSend(p->a, b, data)){
- p->nfailsend++;
- ret = ArchFailure;
- goto Out;
- }
- p->nsend++;
- if(data != b->data)
- p->nfake++;
- if(data == b->data){ /* not faking it, so update state */
- p->nreal++;
- l = b->l;
- l.state |= BsVenti;
- if(!blockSetLabel(b, &l, 0)){
- ret = ArchFailure;
- goto Out;
- }
- }
- }
- shaBlock(p->score, b, data, p->blockSize);
- if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
- ret = data!=b->data ? ArchFaked : ArchSuccess;
- p->l = b->l;
- Out:
- if(data != b->data)
- vtMemFree(data);
- p->depth--;
- blockPut(b);
- return ret;
- }
- static void
- archThread(void *v)
- {
- Arch *a = v;
- Block *b;
- Param p;
- Super super;
- int ret;
- uint32_t addr;
- uint8_t rbuf[VtRootSize];
- VtRoot root;
- vtThreadSetName("arch");
- for(;;){
- /* look for work */
- vtLock(a->fs->elk);
- b = superGet(a->c, &super);
- if(b == nil){
- vtUnlock(a->fs->elk);
- fprint(2, "archThread: superGet: %R\n");
- sleep(60*1000);
- continue;
- }
- addr = super.next;
- if(addr != NilBlock && super.current == NilBlock){
- super.current = addr;
- super.next = (int64_t)NilBlock;
- superPack(&super, b->data);
- blockDirty(b);
- }else
- addr = super.current;
- blockPut(b);
- vtUnlock(a->fs->elk);
- if(addr == NilBlock){
- /* wait for work */
- vtLock(a->lk);
- vtSleep(a->starve);
- if(a->die != nil)
- goto Done;
- vtUnlock(a->lk);
- continue;
- }
- sleep(10*1000); /* window of opportunity to provoke races */
- /* do work */
- memset(&p, 0, sizeof p);
- p.blockSize = a->blockSize;
- p.dsize = 3*VtEntrySize; /* root has three Entries */
- p.c = a->c;
- p.a = a;
- ret = archWalk(&p, addr, BtDir, RootTag);
- switch(ret){
- default:
- abort();
- case ArchFailure:
- fprint(2, "archiveBlock %#x: %R\n", addr);
- sleep(60*1000);
- continue;
- case ArchSuccess:
- case ArchFaked:
- break;
- }
- if(0) fprint(2, "archiveSnapshot 0x%#x: maxdepth %u nfixed %u"
- " send %u nfailsend %u nvisit %u"
- " nreclaim %u nfake %u nreal %u\n",
- addr, p.maxdepth, p.nfixed,
- p.nsend, p.nfailsend, p.nvisit,
- p.nreclaim, p.nfake, p.nreal);
- if(0) fprint(2, "archiveBlock %V (%u)\n", p.score, p.blockSize);
- /* tie up vac root */
- memset(&root, 0, sizeof root);
- root.version = VtRootVersion;
- strecpy(root.type, root.type+sizeof root.type, "vac");
- strecpy(root.name, root.name+sizeof root.name, "fossil");
- memmove(root.score, p.score, VtScoreSize);
- memmove(root.prev, super.last, VtScoreSize);
- root.blockSize = a->blockSize;
- vtRootPack(&root, rbuf);
- if(!vtWrite(a->z, p.score, VtRootType, rbuf, VtRootSize)
- || !vtSha1Check(p.score, rbuf, VtRootSize)){
- fprint(2, "vtWriteBlock %#x: %R\n", addr);
- sleep(60*1000);
- continue;
- }
- /* record success */
- vtLock(a->fs->elk);
- b = superGet(a->c, &super);
- if(b == nil){
- vtUnlock(a->fs->elk);
- fprint(2, "archThread: superGet: %R\n");
- sleep(60*1000);
- continue;
- }
- super.current = (int64_t)NilBlock;
- memmove(super.last, p.score, VtScoreSize);
- superPack(&super, b->data);
- blockDirty(b);
- blockPut(b);
- vtUnlock(a->fs->elk);
- consPrint("archive vac:%V\n", p.score);
- }
- Done:
- a->ref--;
- vtWakeup(a->die);
- vtUnlock(a->lk);
- }
- void
- archKick(Arch *a)
- {
- if(a == nil){
- fprint(2, "warning: archKick nil\n");
- return;
- }
- vtLock(a->lk);
- vtWakeup(a->starve);
- vtUnlock(a->lk);
- }
|