dat.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. typedef struct Arch Arch;
  2. typedef struct BList BList;
  3. typedef struct Block Block;
  4. typedef struct Cache Cache;
  5. typedef struct Disk Disk;
  6. typedef struct Entry Entry;
  7. typedef struct Header Header;
  8. typedef struct Label Label;
  9. typedef struct Periodic Periodic;
  10. typedef struct Snap Snap;
  11. typedef struct Source Source;
  12. typedef struct Super Super;
  13. typedef struct WalkPtr WalkPtr;
  14. /* tuneable parameters - probably should not be constants */
  15. enum {
  16. BytesPerEntry = 100, /* estimate of bytes per dir entries - determines number of index entries in the block */
  17. FullPercentage = 80, /* don't allocate in block if more than this percentage full */
  18. FlushSize = 200, /* number of blocks to flush */
  19. DirtyPercentage = 50, /* maximum percentage of dirty blocks */
  20. };
  21. enum {
  22. NilBlock = (~0UL),
  23. MaxBlock = (1UL<<31),
  24. };
  25. enum {
  26. HeaderMagic = 0x3776ae89,
  27. HeaderVersion = 1,
  28. HeaderOffset = 128*1024,
  29. HeaderSize = 512,
  30. SuperMagic = 0x2340a3b1,
  31. SuperSize = 512,
  32. SuperVersion = 1,
  33. LabelSize = 14,
  34. };
  35. /* well known tags */
  36. enum {
  37. BadTag = 0, /* this tag should not be used */
  38. RootTag = 1, /* root of fs */
  39. EnumTag, /* root of a dir listing */
  40. UserTag = 32, /* all other tags should be >= UserTag */
  41. };
  42. struct Super {
  43. u16int version;
  44. u32int epochLow;
  45. u32int epochHigh;
  46. u64int qid; /* next qid */
  47. u32int active; /* root of active file system */
  48. u32int next; /* root of next snapshot to archive */
  49. u32int current; /* root of snapshot currently archiving */
  50. uchar last[VtScoreSize]; /* last snapshot successfully archived */
  51. char name[128]; /* label */
  52. };
  53. struct Fs {
  54. Arch *arch; /* immutable */
  55. Cache *cache; /* immutable */
  56. int mode; /* immutable */
  57. int blockSize; /* immutable */
  58. VtSession *z; /* immutable */
  59. Snap *snap; /* immutable */
  60. Periodic *metaFlush; /* periodically flushes meta data cached in files */
  61. /*
  62. * epoch lock.
  63. * Most operations on the fs require a read lock of elk, ensuring that
  64. * the current high and low epochs do not change under foot.
  65. * This lock is mostly acquired via a call to fileLock or fileRlock.
  66. * Deletion and creation of snapshots occurs under a write lock of elk,
  67. * ensuring no file operations are occurring concurrently.
  68. */
  69. VtLock *elk; /* epoch lock */
  70. u32int ehi; /* epoch high */
  71. u32int elo; /* epoch low */
  72. Source *source; /* immutable: root of sources */
  73. File *file; /* immutable: root of files */
  74. };
  75. /*
  76. * variant on VtEntry
  77. * there are extra fields when stored locally
  78. */
  79. struct Entry {
  80. u32int gen; /* generation number */
  81. ushort psize; /* pointer block size */
  82. ushort dsize; /* data block size */
  83. uchar depth; /* unpacked from flags */
  84. uchar flags;
  85. uvlong size;
  86. uchar score[VtScoreSize];
  87. u32int tag; /* tag for local blocks: zero if stored on Venti */
  88. u32int snap; /* non zero -> entering snapshot of given epoch */
  89. uchar archive; /* archive this snapshot: only valid for snap != 0 */
  90. };
  91. struct Source {
  92. Fs *fs; /* immutable */
  93. int mode; /* immutable */
  94. u32int gen; /* immutable */
  95. int dsize; /* immutable */
  96. int dir; /* immutable */
  97. Source *parent; /* immutable */
  98. VtLock *lk;
  99. int ref;
  100. /*
  101. * epoch for the source
  102. * for ReadWrite sources, epoch is used to lazily notice
  103. * sources that must be split from the snapshots.
  104. * for ReadOnly sources, the epoch represents the minimum epoch
  105. * along the chain from the root, and is used to lazily notice
  106. * sources that have become invalid because they belong to an old
  107. * snapshot.
  108. */
  109. u32int epoch;
  110. Block *b; /* block containing this source */
  111. uchar score[VtScoreSize]; /* score of block containing this source */
  112. u32int scoreEpoch; /* epoch of block containing this source */
  113. int epb; /* immutable: entries per block in parent */
  114. u32int tag; /* immutable: tag of parent */
  115. u32int offset; /* immutable: entry offset in parent */
  116. };
  117. struct Header {
  118. ushort version;
  119. ushort blockSize;
  120. ulong super; /* super blocks */
  121. ulong label; /* start of labels */
  122. ulong data; /* end of labels - start of data blocks */
  123. ulong end; /* end of data blocks */
  124. };
  125. /*
  126. * contains a one block buffer
  127. * to avoid problems of the block changing underfoot
  128. * and to enable an interface that supports unget.
  129. */
  130. struct DirEntryEnum {
  131. File *file;
  132. u32int boff; /* block offset */
  133. int i, n;
  134. DirEntry *buf;
  135. };
  136. /* Block states; two orthogonal fields, Bv* and Ba* */
  137. enum {
  138. BsFree = 0, /* available for allocation */
  139. BsBad = 0xFF, /* something is wrong with this block */
  140. /* bit fields */
  141. BsAlloc = 1<<0, /* block is in use */
  142. BsCopied = 1<<1, /* block has been copied */
  143. BsVenti = 1<<2, /* block has been stored on Venti */
  144. BsClosed = 1<<3, /* block has been unlinked from active file system */
  145. BsMask = BsAlloc|BsCopied|BsVenti|BsClosed,
  146. };
  147. /*
  148. * Each block has a state and generation
  149. * The following invariants are maintained
  150. * Each block has no more than than one parent per generation
  151. * For Active*, no child has a parent of a greater generation
  152. * For Snap*, there is a snap parent of given generation and there are
  153. * no parents of greater gen - implies no children snaps
  154. * of a lesser gen
  155. * For *RO, the block is fixed - no change can be made - all pointers
  156. * are valid venti addresses
  157. * For *A, the block is on the venti server
  158. * There are no pointers to Zombie blocks
  159. *
  160. * Transitions
  161. * Archiver at generation g
  162. * Mutator at generation h
  163. *
  164. * Want to modify a block
  165. * Venti: create new Active(h)
  166. * Active(x): x == h: do nothing
  167. * Active(x): x < h: change to Snap(h-1) + add Active(h)
  168. * ActiveRO(x): change to SnapRO(h-1) + add Active(h)
  169. * ActiveA(x): add Active(h)
  170. * Snap*(x): should not occur
  171. * Zombie(x): should not occur
  172. * Want to archive
  173. * Active(x): x != g: should never happen
  174. * Active(x): x == g fix children and free them: move to ActiveRO(g);
  175. * ActiveRO(x): x != g: should never happen
  176. * ActiveRO(x): x == g: wait until it hits ActiveA or SnapA
  177. * ActiveA(x): done
  178. * Snap(x): x < g: should never happen
  179. * Snap(x): x >= g: fix children, freeing all SnapA(y) x == y;
  180. * SnapRO(x): wait until it hits SnapA
  181. *
  182. */
  183. /*
  184. * block types
  185. * more regular than Venti block types
  186. * bit 3 -> block or data block
  187. * bits 2-0 -> level of block
  188. */
  189. enum {
  190. BtData,
  191. BtDir = 1<<3,
  192. BtLevelMask = 7,
  193. BtMax = 1<<4,
  194. };
  195. /* io states */
  196. enum {
  197. BioEmpty, /* label & data are not valid */
  198. BioLabel, /* label is good */
  199. BioClean, /* data is on the disk */
  200. BioDirty, /* data is not yet on the disk */
  201. BioReading, /* in process of reading data */
  202. BioWriting, /* in process of writing data */
  203. BioReadError, /* error reading: assume disk always handles write errors */
  204. BioVentiError, /* error reading from venti (probably disconnected) */
  205. BioMax
  206. };
  207. struct Label {
  208. uchar type;
  209. uchar state;
  210. u32int tag;
  211. u32int epoch;
  212. u32int epochClose;
  213. };
  214. struct Block {
  215. Cache *c;
  216. int ref;
  217. int nlock;
  218. ulong pc; /* pc that fetched this block from the cache */
  219. VtLock *lk;
  220. int part;
  221. u32int addr;
  222. uchar score[VtScoreSize]; /* score */
  223. Label l;
  224. uchar *dmap;
  225. uchar *data;
  226. /* the following is private; used by cache */
  227. Block *next; /* doubly linked hash chains */
  228. Block **prev;
  229. u32int heap; /* index in heap table */
  230. u32int used; /* last reference times */
  231. u32int vers; /* version of dirty flag */
  232. BList *uhead; /* blocks to unlink when this block is written */
  233. BList *utail;
  234. /* block ordering for cache -> disk */
  235. BList *prior; /* list of blocks before this one */
  236. Block *ionext;
  237. int iostate;
  238. VtRendez *ioready;
  239. };
  240. /* tree walker, for gc and archiver */
  241. struct WalkPtr
  242. {
  243. uchar *data;
  244. int isEntry;
  245. int n;
  246. int m;
  247. Entry e;
  248. uchar type;
  249. u32int tag;
  250. };
  251. /* disk partitions */
  252. enum {
  253. PartError,
  254. PartSuper,
  255. PartLabel,
  256. PartData,
  257. PartVenti, /* fake partition */
  258. };
  259. extern vtType[BtMax];