dat.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. typedef struct Arch Arch;
  2. typedef struct BList BList;
  3. typedef struct Block Block;
  4. typedef struct Cache Cache;
  5. typedef struct Disk Disk;
  6. typedef struct Entry Entry;
  7. typedef struct Header Header;
  8. typedef struct Label Label;
  9. typedef struct Periodic Periodic;
  10. typedef struct Snap Snap;
  11. typedef struct Source Source;
  12. typedef struct Super Super;
  13. typedef struct WalkPtr WalkPtr;
  14. /* tuneable parameters - probably should not be constants */
  15. enum {
  16. BytesPerEntry = 100, /* estimate of bytes per dir entries - determines number of index entries in the block */
  17. FullPercentage = 80, /* don't allocate in block if more than this percentage full */
  18. FlushSize = 200, /* number of blocks to flush */
  19. DirtyPercentage = 50, /* maximum percentage of dirty blocks */
  20. };
  21. enum {
  22. NilBlock = (~0UL),
  23. MaxBlock = (1UL<<31),
  24. };
  25. enum {
  26. HeaderMagic = 0x3776ae89,
  27. HeaderVersion = 1,
  28. HeaderOffset = 128*1024,
  29. HeaderSize = 512,
  30. SuperMagic = 0x2340a3b1,
  31. SuperSize = 512,
  32. SuperVersion = 1,
  33. LabelSize = 14,
  34. };
  35. /* well known tags */
  36. enum {
  37. BadTag = 0, /* this tag should not be used */
  38. RootTag = 1, /* root of fs */
  39. EnumTag, /* root of a dir listing */
  40. UserTag = 32, /* all other tags should be >= UserTag */
  41. };
  42. struct Super {
  43. u16int version;
  44. u32int epochLow;
  45. u32int epochHigh;
  46. u64int qid; /* next qid */
  47. u32int active; /* root of active file system */
  48. u32int next; /* root of next snapshot to archive */
  49. u32int current; /* root of snapshot currently archiving */
  50. uchar last[VtScoreSize]; /* last snapshot successfully archived */
  51. char name[128]; /* label */
  52. };
  53. struct Fs {
  54. Arch *arch; /* immutable */
  55. Cache *cache; /* immutable */
  56. int mode; /* immutable */
  57. int blockSize; /* immutable */
  58. VtSession *z; /* immutable */
  59. Snap *snap; /* immutable */
  60. Periodic *metaFlush; /* periodically flushes meta data cached in files */
  61. /*
  62. * epoch lock.
  63. * Most operations on the fs require a read lock of elk, ensuring that
  64. * the current high and low epochs do not change under foot.
  65. * This lock is mostly acquired via a call to fileLock or fileRlock.
  66. * Deletion and creation of snapshots occurs under a write lock of elk,
  67. * ensuring no file operations are occurring concurrently.
  68. */
  69. VtLock *elk; /* epoch lock */
  70. u32int ehi; /* epoch high */
  71. u32int elo; /* epoch low */
  72. int halted; /* epoch lock is held to halt (console initiated) */
  73. Source *source; /* immutable: root of sources */
  74. File *file; /* immutable: root of files */
  75. };
  76. /*
  77. * variant on VtEntry
  78. * there are extra fields when stored locally
  79. */
  80. struct Entry {
  81. u32int gen; /* generation number */
  82. ushort psize; /* pointer block size */
  83. ushort dsize; /* data block size */
  84. uchar depth; /* unpacked from flags */
  85. uchar flags;
  86. uvlong size;
  87. uchar score[VtScoreSize];
  88. u32int tag; /* tag for local blocks: zero if stored on Venti */
  89. u32int snap; /* non zero -> entering snapshot of given epoch */
  90. uchar archive; /* archive this snapshot: only valid for snap != 0 */
  91. };
  92. struct Source {
  93. Fs *fs; /* immutable */
  94. int mode; /* immutable */
  95. u32int gen; /* immutable */
  96. int dsize; /* immutable */
  97. int dir; /* immutable */
  98. Source *parent; /* immutable */
  99. VtLock *lk;
  100. int ref;
  101. /*
  102. * epoch for the source
  103. * for ReadWrite sources, epoch is used to lazily notice
  104. * sources that must be split from the snapshots.
  105. * for ReadOnly sources, the epoch represents the minimum epoch
  106. * along the chain from the root, and is used to lazily notice
  107. * sources that have become invalid because they belong to an old
  108. * snapshot.
  109. */
  110. u32int epoch;
  111. Block *b; /* block containing this source */
  112. uchar score[VtScoreSize]; /* score of block containing this source */
  113. u32int scoreEpoch; /* epoch of block containing this source */
  114. int epb; /* immutable: entries per block in parent */
  115. u32int tag; /* immutable: tag of parent */
  116. u32int offset; /* immutable: entry offset in parent */
  117. };
  118. struct Header {
  119. ushort version;
  120. ushort blockSize;
  121. ulong super; /* super blocks */
  122. ulong label; /* start of labels */
  123. ulong data; /* end of labels - start of data blocks */
  124. ulong end; /* end of data blocks */
  125. };
  126. /*
  127. * contains a one block buffer
  128. * to avoid problems of the block changing underfoot
  129. * and to enable an interface that supports unget.
  130. */
  131. struct DirEntryEnum {
  132. File *file;
  133. u32int boff; /* block offset */
  134. int i, n;
  135. DirEntry *buf;
  136. };
  137. /* Block states; two orthogonal fields, Bv* and Ba* */
  138. enum {
  139. BsFree = 0, /* available for allocation */
  140. BsBad = 0xFF, /* something is wrong with this block */
  141. /* bit fields */
  142. BsAlloc = 1<<0, /* block is in use */
  143. BsCopied = 1<<1, /* block has been copied */
  144. BsVenti = 1<<2, /* block has been stored on Venti */
  145. BsClosed = 1<<3, /* block has been unlinked from active file system */
  146. BsMask = BsAlloc|BsCopied|BsVenti|BsClosed,
  147. };
  148. /*
  149. * Each block has a state and generation
  150. * The following invariants are maintained
  151. * Each block has no more than than one parent per generation
  152. * For Active*, no child has a parent of a greater generation
  153. * For Snap*, there is a snap parent of given generation and there are
  154. * no parents of greater gen - implies no children snaps
  155. * of a lesser gen
  156. * For *RO, the block is fixed - no change can be made - all pointers
  157. * are valid venti addresses
  158. * For *A, the block is on the venti server
  159. * There are no pointers to Zombie blocks
  160. *
  161. * Transitions
  162. * Archiver at generation g
  163. * Mutator at generation h
  164. *
  165. * Want to modify a block
  166. * Venti: create new Active(h)
  167. * Active(x): x == h: do nothing
  168. * Active(x): x < h: change to Snap(h-1) + add Active(h)
  169. * ActiveRO(x): change to SnapRO(h-1) + add Active(h)
  170. * ActiveA(x): add Active(h)
  171. * Snap*(x): should not occur
  172. * Zombie(x): should not occur
  173. * Want to archive
  174. * Active(x): x != g: should never happen
  175. * Active(x): x == g fix children and free them: move to ActiveRO(g);
  176. * ActiveRO(x): x != g: should never happen
  177. * ActiveRO(x): x == g: wait until it hits ActiveA or SnapA
  178. * ActiveA(x): done
  179. * Snap(x): x < g: should never happen
  180. * Snap(x): x >= g: fix children, freeing all SnapA(y) x == y;
  181. * SnapRO(x): wait until it hits SnapA
  182. *
  183. */
  184. /*
  185. * block types
  186. * more regular than Venti block types
  187. * bit 3 -> block or data block
  188. * bits 2-0 -> level of block
  189. */
  190. enum {
  191. BtData,
  192. BtDir = 1<<3,
  193. BtLevelMask = 7,
  194. BtMax = 1<<4,
  195. };
  196. /* io states */
  197. enum {
  198. BioEmpty, /* label & data are not valid */
  199. BioLabel, /* label is good */
  200. BioClean, /* data is on the disk */
  201. BioDirty, /* data is not yet on the disk */
  202. BioReading, /* in process of reading data */
  203. BioWriting, /* in process of writing data */
  204. BioReadError, /* error reading: assume disk always handles write errors */
  205. BioVentiError, /* error reading from venti (probably disconnected) */
  206. BioMax
  207. };
  208. struct Label {
  209. uchar type;
  210. uchar state;
  211. u32int tag;
  212. u32int epoch;
  213. u32int epochClose;
  214. };
  215. struct Block {
  216. Cache *c;
  217. int ref;
  218. int nlock;
  219. ulong pc; /* pc that fetched this block from the cache */
  220. VtLock *lk;
  221. int part;
  222. u32int addr;
  223. uchar score[VtScoreSize]; /* score */
  224. Label l;
  225. uchar *dmap;
  226. uchar *data;
  227. /* the following is private; used by cache */
  228. Block *next; /* doubly linked hash chains */
  229. Block **prev;
  230. u32int heap; /* index in heap table */
  231. u32int used; /* last reference times */
  232. u32int vers; /* version of dirty flag */
  233. BList *uhead; /* blocks to unlink when this block is written */
  234. BList *utail;
  235. /* block ordering for cache -> disk */
  236. BList *prior; /* list of blocks before this one */
  237. Block *ionext;
  238. int iostate;
  239. VtRendez *ioready;
  240. };
  241. /* tree walker, for gc and archiver */
  242. struct WalkPtr
  243. {
  244. uchar *data;
  245. int isEntry;
  246. int n;
  247. int m;
  248. Entry e;
  249. uchar type;
  250. u32int tag;
  251. };
  252. /* disk partitions */
  253. enum {
  254. PartError,
  255. PartSuper,
  256. PartLabel,
  257. PartData,
  258. PartVenti, /* fake partition */
  259. };
  260. extern vtType[BtMax];