Browse Source

Plan 9 from Bell Labs 2007-09-06

David du Colombier 16 years ago
parent
commit
6db58b0c9b
100 changed files with 17372 additions and 7330 deletions
  1. 206 97
      dist/replica/_plan9.db
  2. 204 99
      dist/replica/plan9.db
  3. 275 0
      dist/replica/plan9.log
  4. 271 0
      sys/include/oventi.h
  5. 457 232
      sys/include/venti.h
  6. 158 0
      sys/man/1/venti
  7. 246 0
      sys/man/2/venti-cache
  8. 190 0
      sys/man/2/venti-client
  9. 200 0
      sys/man/2/venti-conn
  10. 275 0
      sys/man/2/venti-fcall
  11. 325 0
      sys/man/2/venti-file
  12. 136 0
      sys/man/2/venti-log
  13. 66 0
      sys/man/2/venti-mem
  14. 281 0
      sys/man/2/venti-packet
  15. 122 0
      sys/man/2/venti-server
  16. 56 0
      sys/man/2/venti-zero
  17. 3 0
      sys/man/4/cwfs
  18. 451 0
      sys/man/6/venti
  19. 426 164
      sys/man/8/venti
  20. 106 0
      sys/man/8/venti-backup
  21. 404 0
      sys/man/8/venti-fmt
  22. 0 507
      sys/man/8/ventiaux
  23. 3 6
      sys/src/cmd/aquarela/nblistener.c
  24. 10 3
      sys/src/cmd/cwfs/main.c
  25. 1 1
      sys/src/cmd/fossil/mkfile
  26. 1 1
      sys/src/cmd/fossil/stdinc.h
  27. 1 0
      sys/src/cmd/oventi/README
  28. 0 0
      sys/src/cmd/oventi/backup.example
  29. 0 0
      sys/src/cmd/oventi/conf.rc
  30. 193 0
      sys/src/cmd/oventi/copy.c
  31. 0 0
      sys/src/cmd/oventi/dat.h
  32. 0 0
      sys/src/cmd/oventi/dumpvacroots
  33. 0 0
      sys/src/cmd/oventi/fns.h
  34. 20 0
      sys/src/cmd/oventi/mkfile
  35. 0 0
      sys/src/cmd/oventi/notes
  36. 1 1
      sys/src/cmd/oventi/stdinc.h
  37. 0 0
      sys/src/cmd/oventi/venti.conf
  38. 0 0
      sys/src/cmd/oventi/wrtape
  39. 1 0
      sys/src/cmd/vac/cache.c
  40. 3 1
      sys/src/cmd/vac/fs.c
  41. 1 1
      sys/src/cmd/vac/mkfile
  42. 1 1
      sys/src/cmd/vac/stdinc.h
  43. 0 649
      sys/src/cmd/venti/arena.c
  44. 0 404
      sys/src/cmd/venti/arenas.c
  45. 0 147
      sys/src/cmd/venti/buildindex.c
  46. 0 122
      sys/src/cmd/venti/checkarenas.c
  47. 0 189
      sys/src/cmd/venti/checkindex.c
  48. 0 197
      sys/src/cmd/venti/clump.c
  49. 0 471
      sys/src/cmd/venti/conv.c
  50. 165 95
      sys/src/cmd/venti/copy.c
  51. 0 373
      sys/src/cmd/venti/dcache.c
  52. 79 0
      sys/src/cmd/venti/devnull.c
  53. 0 365
      sys/src/cmd/venti/dumparena.c
  54. 0 137
      sys/src/cmd/venti/findscore.c
  55. 0 109
      sys/src/cmd/venti/fmtarenas.c
  56. 0 75
      sys/src/cmd/venti/fmtisect.c
  57. 0 438
      sys/src/cmd/venti/httpd.c
  58. 0 201
      sys/src/cmd/venti/icache.c
  59. 0 781
      sys/src/cmd/venti/index.c
  60. 0 205
      sys/src/cmd/venti/lump.c
  61. 0 382
      sys/src/cmd/venti/lumpcache.c
  62. 0 153
      sys/src/cmd/venti/lumpqueue.c
  63. 11 113
      sys/src/cmd/venti/mkfile
  64. 27 28
      sys/src/cmd/venti/mkroot.c
  65. 0 47
      sys/src/cmd/venti/mkxml
  66. 0 42
      sys/src/cmd/venti/mkxml.elems
  67. 0 128
      sys/src/cmd/venti/part.c
  68. 0 137
      sys/src/cmd/venti/printarena.c
  69. 335 0
      sys/src/cmd/venti/randtest.c
  70. 36 64
      sys/src/cmd/venti/read.c
  71. 112 0
      sys/src/cmd/venti/readlist.c
  72. 112 0
      sys/src/cmd/venti/ro.c
  73. 72 0
      sys/src/cmd/venti/root.c
  74. 756 0
      sys/src/cmd/venti/srv/arena.c
  75. 415 0
      sys/src/cmd/venti/srv/arenas.c
  76. 260 0
      sys/src/cmd/venti/srv/bloom.c
  77. 38 18
      sys/src/cmd/venti/srv/buildbuck.c
  78. 945 0
      sys/src/cmd/venti/srv/buildindex.c
  79. 137 0
      sys/src/cmd/venti/srv/checkarenas.c
  80. 295 0
      sys/src/cmd/venti/srv/checkindex.c
  81. 227 0
      sys/src/cmd/venti/srv/clump.c
  82. 29 33
      sys/src/cmd/venti/srv/clumpstats.c
  83. 249 0
      sys/src/cmd/venti/srv/cmparena.c
  84. 317 0
      sys/src/cmd/venti/srv/cmparenas.c
  85. 96 69
      sys/src/cmd/venti/srv/config.c
  86. 698 0
      sys/src/cmd/venti/srv/conv.c
  87. 731 0
      sys/src/cmd/venti/srv/dat.h
  88. 862 0
      sys/src/cmd/venti/srv/dcache.c
  89. 88 0
      sys/src/cmd/venti/srv/disksched.c
  90. 13 13
      sys/src/cmd/venti/srv/dump.c
  91. 122 0
      sys/src/cmd/venti/srv/findscore.c
  92. 1911 0
      sys/src/cmd/venti/srv/fixarenas.c
  93. 1660 0
      sys/src/cmd/venti/srv/fixarenas0.c
  94. 132 0
      sys/src/cmd/venti/srv/fmtarenas.c
  95. 116 0
      sys/src/cmd/venti/srv/fmtbloom.c
  96. 31 31
      sys/src/cmd/venti/srv/fmtindex.c
  97. 83 0
      sys/src/cmd/venti/srv/fmtisect.c
  98. 225 0
      sys/src/cmd/venti/srv/fns.h
  99. 197 0
      sys/src/cmd/venti/srv/graph.c
  100. 696 0
      sys/src/cmd/venti/srv/hdisk.c

+ 206 - 97
dist/replica/_plan9.db

@@ -120,7 +120,7 @@
 386/bin/aux/ms2 - 775 sys sys 1188530147 88499
 386/bin/aux/msexceltables - 775 sys sys 1168402283 82921
 386/bin/aux/mswordstrings - 775 sys sys 1168402283 65541
-386/bin/aux/na - 775 sys sys 1168402284 154423
+386/bin/aux/na - 775 sys sys 1189023884 154372
 386/bin/aux/nfsmount - 775 sys sys 1184731233 233217
 386/bin/aux/nfsserver - 775 sys sys 1182223285 184719
 386/bin/aux/olefs - 775 sys sys 1179372075 148111
@@ -191,7 +191,7 @@
 386/bin/cpp - 775 sys sys 1188446882 149057
 386/bin/cpu - 775 sys sys 1188446885 139216
 386/bin/crop - 775 sys sys 1168402298 116374
-386/bin/cwfs - 775 sys sys 1188446894 365325
+386/bin/cwfs - 775 sys sys 1189033352 365605
 386/bin/date - 775 sys sys 1178568265 41845
 386/bin/db - 775 sys sys 1188499570 349188
 386/bin/dc - 775 sys sys 1168402299 99260
@@ -569,6 +569,7 @@
 386/lib/libmemlayer.a - 664 sys sys 1168402369 47360
 386/lib/libmp.a - 664 sys sys 1188447454 79978
 386/lib/libndb.a - 664 sys sys 1187061208 64470
+386/lib/liboventi.a - 664 sys sys 1188621781 99062
 386/lib/libplumb.a - 664 sys sys 1168402370 19408
 386/lib/libregexp.a - 664 sys sys 1181507273 37290
 386/lib/libscribble.a - 664 sys sys 1175972562 107238
@@ -576,7 +577,7 @@
 386/lib/libstdio.a - 664 sys sys 1176432133 126062
 386/lib/libsunrpc.a - 664 sys sys 1187061209 353148
 386/lib/libthread.a - 664 sys sys 1184731247 71918
-386/lib/libventi.a - 664 sys sys 1188447470 97858
+386/lib/libventi.a - 664 sys sys 1188621843 191156
 386/mbr - 775 sys sys 1131317338 407
 386/mkfile - 664 sys sys 948141303 46
 386/pbs - 775 sys sys 1143465402 495
@@ -6093,6 +6094,7 @@ sys/include/mouse.h - 664 sys sys 1035232010 1003
 sys/include/mp.h - 664 sys sys 1176499134 4803
 sys/include/ndb.h - 664 sys sys 1144174492 4412
 sys/include/nfs3.h - 664 sys sys 1045589438 15082
+sys/include/oventi.h - 664 sys sys 1188621731 7152
 sys/include/plumb.h - 664 sys sys 1014929065 989
 sys/include/pool.h - 664 sys sys 1102093074 1219
 sys/include/rdbg.h - 664 sys sys 1014929066 95
@@ -6104,7 +6106,7 @@ sys/include/sunrpc.h - 664 sys sys 1046367129 7219
 sys/include/thread.h - 664 sys sys 1184471649 3586
 sys/include/tos.h - 664 sys sys 1091904418 575
 sys/include/trace.h - 664 sys sys 1138460022 640
-sys/include/venti.h - 664 sys sys 1091904426 7150
+sys/include/venti.h - 664 sys sys 1188621741 9941
 sys/lib - 20000000775 sys sys 1105564897 0
 sys/lib/acid - 20000000775 sys sys 1114524607 0
 sys/lib/acid/386 - 664 sys sys 1168035402 3188
@@ -7476,6 +7478,7 @@ sys/man/1/uniq - 664 sys sys 944959674 995
 sys/man/1/units - 664 sys sys 1113743326 2046
 sys/man/1/uptime - 664 sys sys 1074733782 380
 sys/man/1/vac - 664 sys sys 1162102172 3244
+sys/man/1/venti - 664 sys sys 1188620038 2445
 sys/man/1/vi - 664 sys sys 1101668051 2904
 sys/man/1/vnc - 664 sys sys 1158063994 4313
 sys/man/1/vt - 664 sys sys 1186695303 2424
@@ -7630,6 +7633,16 @@ sys/man/2/symbol - 664 sys sys 950892874 9423
 sys/man/2/thread - 664 sys sys 1188841215 11916
 sys/man/2/time - 664 sys sys 1182980757 736
 sys/man/2/tmpfile - 664 sys sys 1048637161 1157
+sys/man/2/venti-cache - 664 sys sys 1188620038 5022
+sys/man/2/venti-client - 664 sys sys 1188620038 3339
+sys/man/2/venti-conn - 664 sys sys 1188620038 3600
+sys/man/2/venti-fcall - 664 sys sys 1188620038 4739
+sys/man/2/venti-file - 664 sys sys 1188620038 5840
+sys/man/2/venti-log - 664 sys sys 1188620038 2313
+sys/man/2/venti-mem - 664 sys sys 1188620038 1023
+sys/man/2/venti-packet - 664 sys sys 1188620038 4573
+sys/man/2/venti-server - 664 sys sys 1188620038 2201
+sys/man/2/venti-zero - 664 sys sys 1188620038 1064
 sys/man/2/wait - 664 sys sys 1163004064 2525
 sys/man/2/window - 664 sys sys 950593499 5522
 sys/man/3 - 20000000775 sys sys 1123100836 0
@@ -7680,7 +7693,7 @@ sys/man/4/archfs - 664 sys sys 960000712 533
 sys/man/4/cdfs - 664 sys sys 1026846913 3638
 sys/man/4/cfs - 664 sys sys 1172762903 1813
 sys/man/4/consolefs - 664 sys sys 1144424854 4245
-sys/man/4/cwfs - 664 sys sys 1178224996 6225
+sys/man/4/cwfs - 664 sys sys 1189029640 6255
 sys/man/4/dossrv - 664 sys sys 1168307403 4334
 sys/man/4/execnet - 664 sys sys 1019866708 1069
 sys/man/4/exportfs - 664 sys sys 1145881912 4655
@@ -7763,7 +7776,8 @@ sys/man/6/snap - 664 sys sys 1132452694 2402
 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
 sys/man/6/users - 664 sys sys 1130912014 1392
 sys/man/6/utf - 664 sys sys 1146582112 2430
-sys/man/6/venti.conf - 664 sys sys 1164860945 1929
+sys/man/6/venti - 664 sys sys 1188620038 10695
+sys/man/6/venti.conf - 664 sys sys 1164860473 1929
 sys/man/6/vgadb - 664 sys sys 960256513 10972
 sys/man/7 - 20000000775 sys sys 1103794042 0
 sys/man/7/0intro - 664 sys sys 944959677 256
@@ -7842,8 +7856,9 @@ sys/man/8/tlssrv - 664 sys sys 1165623041 2589
 sys/man/8/trampoline - 664 sys sys 1126104844 1199
 sys/man/8/udpecho - 664 sys sys 954305553 303
 sys/man/8/update - 664 sys sys 961259288 2336
-sys/man/8/venti - 664 sys sys 1164860929 5434
-sys/man/8/ventiaux - 664 sys sys 1159419552 10571
+sys/man/8/venti - 664 sys sys 1188620038 12188
+sys/man/8/venti-backup - 664 sys sys 1188620038 2102
+sys/man/8/venti-fmt - 664 sys sys 1188620038 8626
 sys/man/8/vga - 664 sys sys 1141660952 3856
 sys/man/fonts - 664 sys sys 944959700 218
 sys/man/index.html - 664 sys sys 1019918444 1859
@@ -10071,7 +10086,7 @@ sys/src/cmd/cwfs/io.h - 664 sys sys 1174280312 866
 sys/src/cmd/cwfs/iobuf.c - 664 sys sys 1176500092 4722
 sys/src/cmd/cwfs/juke.c - 664 sys sys 1176500144 28859
 sys/src/cmd/cwfs/lrand.c - 664 sys sys 1171160167 1070
-sys/src/cmd/cwfs/main.c - 664 sys sys 1174799729 9976
+sys/src/cmd/cwfs/main.c - 664 sys sys 1189033288 10146
 sys/src/cmd/cwfs/malloc.c - 664 sys sys 1174281557 2360
 sys/src/cmd/cwfs/mkfile - 664 sys sys 1174941889 201
 sys/src/cmd/cwfs/mworm.c - 664 sys sys 1174370308 4311
@@ -10369,13 +10384,13 @@ sys/src/cmd/fossil/fs.h - 664 sys sys 1139667269 1581
 sys/src/cmd/fossil/history - 664 sys sys 1055703793 1400
 sys/src/cmd/fossil/invariants - 664 sys sys 1042005509 4073
 sys/src/cmd/fossil/last.c - 664 sys sys 1087005593 812
-sys/src/cmd/fossil/mkfile - 664 sys sys 1139667242 2640
+sys/src/cmd/fossil/mkfile - 664 sys sys 1189020178 2641
 sys/src/cmd/fossil/nobwatch.c - 664 sys sys 1042005509 329
 sys/src/cmd/fossil/pack.c - 664 sys sys 1187136436 4710
 sys/src/cmd/fossil/periodic.c - 664 sys sys 1061530726 1087
 sys/src/cmd/fossil/source.c - 664 sys sys 1187135867 20578
 sys/src/cmd/fossil/srcload.c - 664 sys sys 1042005510 4178
-sys/src/cmd/fossil/stdinc.h - 664 sys sys 1042005510 155
+sys/src/cmd/fossil/stdinc.h - 664 sys sys 1189020178 156
 sys/src/cmd/fossil/trunc.c - 664 sys sys 1042005511 280
 sys/src/cmd/fossil/unpack - 775 sys sys 1042005511 286
 sys/src/cmd/fossil/vac.c - 664 sys sys 1061530727 12483
@@ -12781,6 +12796,19 @@ sys/src/cmd/nfs.c - 664 sys sys 1050068720 31096
 sys/src/cmd/nm.c - 664 sys sys 1148106943 5322
 sys/src/cmd/nntpfs.c - 664 sys sys 1143695271 18860
 sys/src/cmd/ns.c - 664 sys sys 984717934 3558
+sys/src/cmd/oventi - 20000000775 sys sys 1189019154 0
+sys/src/cmd/oventi/README - 664 sys sys 1189018077 63
+sys/src/cmd/oventi/backup.example - 775 sys sys 1045503662 521
+sys/src/cmd/oventi/conf.rc - 775 sys sys 1055707446 1416
+sys/src/cmd/oventi/copy.c - 664 sys sys 1158618728 3713
+sys/src/cmd/oventi/dat.h - 664 sys sys 1128337707 15346
+sys/src/cmd/oventi/dumpvacroots - 775 sys sys 1163468045 571
+sys/src/cmd/oventi/fns.h - 664 sys sys 1055707422 6793
+sys/src/cmd/oventi/mkfile - 664 sys sys 1189018610 256
+sys/src/cmd/oventi/notes - 664 sys sys 1068500911 4032
+sys/src/cmd/oventi/stdinc.h - 664 sys sys 1188621931 140
+sys/src/cmd/oventi/venti.conf - 664 sys sys 1019867537 397
+sys/src/cmd/oventi/wrtape - 775 sys sys 1019678881 555
 sys/src/cmd/p.c - 664 sys sys 1121977162 1504
 sys/src/cmd/page - 20000000775 sys sys 1045606937 0
 sys/src/cmd/page/filter.c - 664 sys sys 1069793856 2166
@@ -14228,19 +14256,19 @@ sys/src/cmd/va/l.s - 664 sys sys 944961340 12696
 sys/src/cmd/va/lex.c - 664 sys sys 1143293804 12095
 sys/src/cmd/va/mkfile - 664 sys sys 944961340 215
 sys/src/cmd/vac - 20000000775 sys sys 1055699701 0
-sys/src/cmd/vac/cache.c - 664 sys sys 1071245791 15806
+sys/src/cmd/vac/cache.c - 664 sys sys 1189020077 15843
 sys/src/cmd/vac/dat.h - 664 sys sys 1162951177 4029
 sys/src/cmd/vac/error.c - 664 sys sys 1036006057 633
 sys/src/cmd/vac/error.h - 664 sys sys 1036024048 327
 sys/src/cmd/vac/file.c - 664 sys sys 1168307519 19922
 sys/src/cmd/vac/fns.h - 664 sys sys 1036006061 1746
-sys/src/cmd/vac/fs.c - 664 sys sys 1162327826 2902
-sys/src/cmd/vac/mkfile - 664 sys sys 1036024045 434
+sys/src/cmd/vac/fs.c - 664 sys sys 1189020075 2950
+sys/src/cmd/vac/mkfile - 664 sys sys 1189020070 435
 sys/src/cmd/vac/pack.c - 664 sys sys 1036006059 10126
 sys/src/cmd/vac/rtest.c - 664 sys sys 1019678787 1116
 sys/src/cmd/vac/source.c - 664 sys sys 1162327879 6767
 sys/src/cmd/vac/srcload.c - 664 sys sys 1036024047 4925
-sys/src/cmd/vac/stdinc.h - 664 sys sys 1036006059 121
+sys/src/cmd/vac/stdinc.h - 664 sys sys 1189020065 122
 sys/src/cmd/vac/util.c - 664 sys sys 1019678787 930
 sys/src/cmd/vac/vac.c - 664 sys sys 1162350579 23803
 sys/src/cmd/vac/vac.h - 664 sys sys 1091904424 3598
@@ -14264,68 +14292,92 @@ sys/src/cmd/vc/swt.c - 664 sys sys 1143384791 10367
 sys/src/cmd/vc/txt.c - 664 sys sys 1168702346 22737
 sys/src/cmd/vc/v.out.h - 664 sys sys 1089299165 2630
 sys/src/cmd/venti - 20000000775 sys sys 1068500904 0
-sys/src/cmd/venti/arena.c - 664 sys sys 1045503653 13221
-sys/src/cmd/venti/arenas.c - 664 sys sys 1063854192 7701
-sys/src/cmd/venti/backup.example - 775 sys sys 1045503662 521
-sys/src/cmd/venti/buildbuck.c - 664 sys sys 1019678877 2282
-sys/src/cmd/venti/buildindex.c - 664 sys sys 1143759340 3162
-sys/src/cmd/venti/checkarenas.c - 664 sys sys 1019854295 1894
-sys/src/cmd/venti/checkindex.c - 664 sys sys 1143759337 4094
-sys/src/cmd/venti/clump.c - 664 sys sys 1019678877 4148
-sys/src/cmd/venti/clumpstats.c - 664 sys sys 1055707430 2239
-sys/src/cmd/venti/conf.rc - 775 sys sys 1055707446 1416
-sys/src/cmd/venti/config.c - 664 sys sys 1138471074 4930
-sys/src/cmd/venti/conv.c - 664 sys sys 1019678878 8202
-sys/src/cmd/venti/copy.c - 664 sys sys 1158618908 3713
-sys/src/cmd/venti/dat.h - 664 sys sys 1132452191 15346
-sys/src/cmd/venti/dcache.c - 664 sys sys 1019678878 7374
-sys/src/cmd/venti/dump.c - 664 sys sys 1068520313 1578
-sys/src/cmd/venti/dumparena.c - 664 sys sys 1019678878 6381
-sys/src/cmd/venti/dumpvacroots - 775 sys sys 1167777001 571
-sys/src/cmd/venti/findscore.c - 664 sys sys 1121977913 2605
-sys/src/cmd/venti/fmtarenas.c - 664 sys sys 1132452192 2213
-sys/src/cmd/venti/fmtindex.c - 664 sys sys 1138471072 2524
-sys/src/cmd/venti/fmtisect.c - 664 sys sys 1132452192 1326
-sys/src/cmd/venti/fns.h - 664 sys sys 1055707422 6793
-sys/src/cmd/venti/httpd.c - 664 sys sys 1045503654 9107
-sys/src/cmd/venti/icache.c - 664 sys sys 1091904425 3974
-sys/src/cmd/venti/ifile.c - 664 sys sys 1055707440 2288
-sys/src/cmd/venti/index.c - 664 sys sys 1045503654 15232
-sys/src/cmd/venti/lump.c - 664 sys sys 1055707441 3844
-sys/src/cmd/venti/lumpcache.c - 664 sys sys 1019678879 7585
-sys/src/cmd/venti/lumpqueue.c - 664 sys sys 1168307536 2278
-sys/src/cmd/venti/mkfile - 664 sys sys 1139839228 1795
-sys/src/cmd/venti/mkroot.c - 664 sys sys 1045503630 1172
-sys/src/cmd/venti/mkxml - 775 sys sys 1019678879 1078
-sys/src/cmd/venti/mkxml.elems - 775 sys sys 1019678880 786
-sys/src/cmd/venti/notes - 664 sys sys 1068500911 4032
-sys/src/cmd/venti/part.c - 664 sys sys 1036389637 2532
-sys/src/cmd/venti/printarena.c - 664 sys sys 1109511493 2675
-sys/src/cmd/venti/rdarena.c - 664 sys sys 1055707432 1621
-sys/src/cmd/venti/read.c - 664 sys sys 1055707434 1723
-sys/src/cmd/venti/score.c - 664 sys sys 1045503655 588
-sys/src/cmd/venti/sortientry.c - 664 sys sys 1143759340 7436
-sys/src/cmd/venti/stats.c - 664 sys sys 1019678880 2099
-sys/src/cmd/venti/stdinc.h - 664 sys sys 1019678880 139
-sys/src/cmd/venti/sync.c - 664 sys sys 1036470213 674
-sys/src/cmd/venti/syncarena.c - 664 sys sys 1183441016 4206
-sys/src/cmd/venti/syncindex.c - 664 sys sys 1055707433 983
-sys/src/cmd/venti/syncindex0.c - 664 sys sys 1036470283 3044
-sys/src/cmd/venti/unittoull.c - 664 sys sys 1019678881 398
-sys/src/cmd/venti/unwhack.c - 664 sys sys 1019678881 3191
-sys/src/cmd/venti/utils.c - 664 sys sys 1127527222 4002
-sys/src/cmd/venti/venti.c - 664 sys sys 1069101830 4626
-sys/src/cmd/venti/venti.conf - 664 sys sys 1019867537 397
-sys/src/cmd/venti/ventifs.c - 664 sys sys 1063854129 6157
-sys/src/cmd/venti/verifyarena.c - 664 sys sys 1019678881 2705
-sys/src/cmd/venti/whack.c - 664 sys sys 1019678881 6375
-sys/src/cmd/venti/whack.h - 664 sys sys 1019678881 966
-sys/src/cmd/venti/wrarena.c - 664 sys sys 1038536023 2948
-sys/src/cmd/venti/write.c - 664 sys sys 1036470214 1051
-sys/src/cmd/venti/wrtape - 775 sys sys 1019678881 555
-sys/src/cmd/venti/xml.c - 664 sys sys 1091904425 2183
-sys/src/cmd/venti/xml.h - 664 sys sys 1019678881 450
-sys/src/cmd/venti/zeropart.c - 664 sys sys 1132452194 877
+sys/src/cmd/venti/copy.c - 664 sys sys 1189017556 5275
+sys/src/cmd/venti/devnull.c - 664 sys sys 1177189434 1225
+sys/src/cmd/venti/mkfile - 664 sys sys 1189017596 358
+sys/src/cmd/venti/mkroot.c - 664 sys sys 1177189435 1178
+sys/src/cmd/venti/randtest.c - 664 sys sys 1177189435 5656
+sys/src/cmd/venti/read.c - 664 sys sys 1177189435 1289
+sys/src/cmd/venti/readlist.c - 664 sys sys 1177189435 1934
+sys/src/cmd/venti/ro.c - 664 sys sys 1177189435 1886
+sys/src/cmd/venti/root.c - 664 sys sys 1177189435 1329
+sys/src/cmd/venti/srv - 20000000775 sys sys 1189020012 0
+sys/src/cmd/venti/srv/arena.c - 664 sys sys 1178160303 16402
+sys/src/cmd/venti/srv/arenas.c - 664 sys sys 1178160303 8039
+sys/src/cmd/venti/srv/bloom.c - 664 sys sys 1178160303 4529
+sys/src/cmd/venti/srv/buildbuck.c - 664 sys sys 1177189435 2837
+sys/src/cmd/venti/srv/buildindex.c - 664 sys sys 1182131900 21117
+sys/src/cmd/venti/srv/checkarenas.c - 664 sys sys 1177189435 2313
+sys/src/cmd/venti/srv/checkindex.c - 664 sys sys 1179342633 5978
+sys/src/cmd/venti/srv/clump.c - 664 sys sys 1177189435 5329
+sys/src/cmd/venti/srv/clumpstats.c - 664 sys sys 1142736351 2234
+sys/src/cmd/venti/srv/cmparena.c - 664 sys sys 1142736351 5471
+sys/src/cmd/venti/srv/cmparenas.c - 664 sys sys 1177189435 7215
+sys/src/cmd/venti/srv/config.c - 664 sys sys 1178160303 5566
+sys/src/cmd/venti/srv/conv.c - 664 sys sys 1178160303 14052
+sys/src/cmd/venti/srv/dat.h - 664 sys sys 1178160303 19018
+sys/src/cmd/venti/srv/dcache.c - 664 sys sys 1178160304 18409
+sys/src/cmd/venti/srv/disksched.c - 664 sys sys 1142736352 2125
+sys/src/cmd/venti/srv/dump.c - 664 sys sys 1142736352 1642
+sys/src/cmd/venti/srv/findscore.c - 664 sys sys 1179863768 2195
+sys/src/cmd/venti/srv/fixarenas.c - 664 sys sys 1178160904 40515
+sys/src/cmd/venti/srv/fixarenas0.c - 664 sys sys 1142736352 36271
+sys/src/cmd/venti/srv/fmtarenas.c - 664 sys sys 1177189436 2702
+sys/src/cmd/venti/srv/fmtbloom.c - 664 sys sys 1142736352 2293
+sys/src/cmd/venti/srv/fmtindex.c - 664 sys sys 1178160304 2599
+sys/src/cmd/venti/srv/fmtisect.c - 664 sys sys 1177189436 1454
+sys/src/cmd/venti/srv/fns.h - 664 sys sys 1178160304 9179
+sys/src/cmd/venti/srv/graph.c - 664 sys sys 1177189436 4190
+sys/src/cmd/venti/srv/hdisk.c - 664 sys sys 1177189437 16510
+sys/src/cmd/venti/srv/httpd.c - 664 sys sys 1177189437 22784
+sys/src/cmd/venti/srv/icache.c - 664 sys sys 1186114652 8778
+sys/src/cmd/venti/srv/icachewrite.c - 664 sys sys 1178160304 7655
+sys/src/cmd/venti/srv/ifile.c - 664 sys sys 1177535026 2327
+sys/src/cmd/venti/srv/index.c - 664 sys sys 1178160304 17239
+sys/src/cmd/venti/srv/index2.c - 664 sys sys 1142736354 21620
+sys/src/cmd/venti/srv/lump.c - 664 sys sys 1177189437 5799
+sys/src/cmd/venti/srv/lumpcache.c - 664 sys sys 1177189437 8811
+sys/src/cmd/venti/srv/lumpqueue.c - 664 sys sys 1142736354 2722
+sys/src/cmd/venti/srv/mirrorarenas.c - 664 sys sys 1178160304 10544
+sys/src/cmd/venti/srv/mkfile - 664 sys sys 1188622141 1032
+sys/src/cmd/venti/srv/part.c - 664 sys sys 1180244694 5662
+sys/src/cmd/venti/srv/png.c - 664 sys sys 1142736354 3729
+sys/src/cmd/venti/srv/printarena.c - 664 sys sys 1177189438 2673
+sys/src/cmd/venti/srv/printarenapart.c - 664 sys sys 1178160304 3544
+sys/src/cmd/venti/srv/printarenas.c - 664 sys sys 1142736355 2074
+sys/src/cmd/venti/srv/printindex.c - 664 sys sys 1142736355 1746
+sys/src/cmd/venti/srv/printmap.c - 664 sys sys 1142736355 542
+sys/src/cmd/venti/srv/rdarena.c - 664 sys sys 1178161030 1637
+sys/src/cmd/venti/srv/readifile.c - 664 sys sys 1177534667 411
+sys/src/cmd/venti/srv/reseal.c - 664 sys sys 1177189438 6682
+sys/src/cmd/venti/srv/round.c - 664 sys sys 1142736355 1577
+sys/src/cmd/venti/srv/score.c - 664 sys sys 1178160305 740
+sys/src/cmd/venti/srv/sortientry.c - 664 sys sys 1177189439 8325
+sys/src/cmd/venti/srv/stats.c - 664 sys sys 1177189439 3912
+sys/src/cmd/venti/srv/stdinc.h - 664 sys sys 1177189439 169
+sys/src/cmd/venti/srv/syncarena.c - 664 sys sys 1177249927 4834
+sys/src/cmd/venti/srv/syncindex.c - 664 sys sys 1177249927 1622
+sys/src/cmd/venti/srv/syncindex0.c - 664 sys sys 1177249927 4447
+sys/src/cmd/venti/srv/trace.c - 664 sys sys 1142736356 709
+sys/src/cmd/venti/srv/unittoull.c - 664 sys sys 1142736356 471
+sys/src/cmd/venti/srv/unwhack.c - 664 sys sys 1142736356 3191
+sys/src/cmd/venti/srv/utils.c - 664 sys sys 1178160305 3755
+sys/src/cmd/venti/srv/venti.c - 664 sys sys 1188624404 5882
+sys/src/cmd/venti/srv/verifyarena.c - 664 sys sys 1178160305 5666
+sys/src/cmd/venti/srv/whack.c - 664 sys sys 1142736357 6419
+sys/src/cmd/venti/srv/whack.h - 664 sys sys 1142736357 966
+sys/src/cmd/venti/srv/wrarena.c - 664 sys sys 1183678902 4647
+sys/src/cmd/venti/srv/www - 20000000775 sys sys 1189019173 0
+sys/src/cmd/venti/srv/www/stats.html - 664 sys sys 1177190826 869
+sys/src/cmd/venti/srv/www/stats.js - 664 sys sys 1177190826 9427
+sys/src/cmd/venti/srv/www/status.js - 664 sys sys 1177190826 452
+sys/src/cmd/venti/srv/www/status1.js - 664 sys sys 1177190826 419
+sys/src/cmd/venti/srv/xml.c - 664 sys sys 1142736357 2236
+sys/src/cmd/venti/srv/xml.h - 664 sys sys 1142736357 450
+sys/src/cmd/venti/srv/zblock.c - 664 sys sys 1178160305 1659
+sys/src/cmd/venti/srv/zeropart.c - 664 sys sys 1178160305 707
+sys/src/cmd/venti/sync.c - 664 sys sys 1177189440 726
+sys/src/cmd/venti/write.c - 664 sys sys 1177189440 1106
 sys/src/cmd/vi - 20000000775 sys sys 1039727599 0
 sys/src/cmd/vi/bpt.c - 664 sys sys 944961341 2216
 sys/src/cmd/vi/cmd.c - 664 sys sys 944961342 9150
@@ -15376,6 +15428,44 @@ sys/src/libndb/ndbopen.c - 664 sys sys 1173737251 2717
 sys/src/libndb/ndbparse.c - 664 sys sys 1144174490 1207
 sys/src/libndb/ndbreorder.c - 664 sys sys 1078618600 966
 sys/src/libndb/ndbsubstitute.c - 664 sys sys 1144174491 867
+sys/src/liboventi - 20000000775 sys sys 1189019654 0
+sys/src/liboventi/client.8 - 664 sys sys 1188621755 11972
+sys/src/liboventi/client.c - 664 sys sys 1188621652 5494
+sys/src/liboventi/debug.8 - 664 sys sys 1188621755 3582
+sys/src/liboventi/debug.c - 664 sys sys 1188621652 1259
+sys/src/liboventi/errfmt.8 - 664 sys sys 1188621755 449
+sys/src/liboventi/errfmt.c - 664 sys sys 1188621652 134
+sys/src/liboventi/fatal.8 - 664 sys sys 1188621755 778
+sys/src/liboventi/fatal.c - 664 sys sys 1188621652 226
+sys/src/liboventi/mkfile - 664 sys sys 1188621776 577
+sys/src/liboventi/pack.8 - 664 sys sys 1188621755 5477
+sys/src/liboventi/pack.c - 664 sys sys 1188621652 2852
+sys/src/liboventi/packet.8 - 664 sys sys 1188621755 24906
+sys/src/liboventi/packet.c - 664 sys sys 1188621652 13229
+sys/src/liboventi/packet.h - 664 sys sys 1019678692 641
+sys/src/liboventi/parsescore.8 - 664 sys sys 1188621755 1208
+sys/src/liboventi/parsescore.c - 664 sys sys 1188621652 517
+sys/src/liboventi/plan9-io.8 - 664 sys sys 1188621756 3811
+sys/src/liboventi/plan9-io.c - 664 sys sys 1188621652 1759
+sys/src/liboventi/plan9-sha1.8 - 664 sys sys 1188621756 2021
+sys/src/liboventi/plan9-sha1.c - 664 sys sys 1188621652 1059
+sys/src/liboventi/plan9-thread.8 - 664 sys sys 1188621756 9994
+sys/src/liboventi/plan9-thread.acid - 664 sys sys 1138756224 9254
+sys/src/liboventi/plan9-thread.c - 664 sys sys 1188621652 6714
+sys/src/liboventi/readfully.8 - 664 sys sys 1188621755 740
+sys/src/liboventi/readfully.c - 664 sys sys 1188621652 238
+sys/src/liboventi/rpc.8 - 664 sys sys 1188621755 16009
+sys/src/liboventi/rpc.c - 664 sys sys 1188621652 7297
+sys/src/liboventi/scorefmt.8 - 664 sys sys 1188621755 862
+sys/src/liboventi/scorefmt.c - 664 sys sys 1188621652 256
+sys/src/liboventi/server.8 - 664 sys sys 1188621756 10569
+sys/src/liboventi/server.c - 664 sys sys 1188621652 4508
+sys/src/liboventi/session.h - 664 sys sys 1063853749 936
+sys/src/liboventi/strdup.8 - 664 sys sys 1188621756 640
+sys/src/liboventi/strdup.c - 664 sys sys 1188621652 204
+sys/src/liboventi/venti.txt - 664 sys sys 1045502097 4347
+sys/src/liboventi/zero.8 - 664 sys sys 1188621756 3268
+sys/src/liboventi/zero.c - 664 sys sys 1188621652 1510
 sys/src/libplumb - 20000000775 sys sys 1123099015 0
 sys/src/libplumb/event.c - 664 sys sys 947358887 1861
 sys/src/libplumb/mesg.c - 664 sys sys 1133279518 7080
@@ -15589,26 +15679,41 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
 sys/src/libventi - 20000000775 sys sys 1063853749 0
-sys/src/libventi/client.c - 664 sys sys 1138191441 5493
-sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
-sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
-sys/src/libventi/fatal.c - 664 sys sys 1084468118 225
-sys/src/libventi/mkfile - 664 sys sys 1045502094 576
-sys/src/libventi/pack.c - 664 sys sys 1045502094 2851
-sys/src/libventi/packet.c - 664 sys sys 1143695310 13228
-sys/src/libventi/packet.h - 664 sys sys 1019678692 641
-sys/src/libventi/parsescore.c - 664 sys sys 1045502095 516
-sys/src/libventi/plan9-io.c - 664 sys sys 1135487933 1758
-sys/src/libventi/plan9-sha1.c - 664 sys sys 1045502095 1058
-sys/src/libventi/plan9-thread.c - 664 sys sys 1135487955 6713
-sys/src/libventi/readfully.c - 664 sys sys 1045502095 237
-sys/src/libventi/rpc.c - 664 sys sys 1166821900 7296
-sys/src/libventi/scorefmt.c - 664 sys sys 1045502096 255
-sys/src/libventi/server.c - 664 sys sys 1045502096 4507
-sys/src/libventi/session.h - 664 sys sys 1063853749 936
-sys/src/libventi/strdup.c - 664 sys sys 1045502096 203
-sys/src/libventi/venti.txt - 664 sys sys 1045502097 4347
-sys/src/libventi/zero.c - 664 sys sys 1045502097 1509
+sys/src/libventi/cache.acid - 664 sys sys 1177189440 11960
+sys/src/libventi/cache.c - 664 sys sys 1177189440 11925
+sys/src/libventi/client.c - 664 sys sys 1177189440 3197
+sys/src/libventi/conn.c - 664 sys sys 1177189440 895
+sys/src/libventi/cvt.h - 664 sys sys 1142736171 610
+sys/src/libventi/debug.c - 664 sys sys 1177189440 201
+sys/src/libventi/debugpacket.c - 664 sys sys 1177189441 3838
+sys/src/libventi/dial.c - 664 sys sys 1177189441 378
+sys/src/libventi/dtype.c - 664 sys sys 1177189441 1117
+sys/src/libventi/entry.c - 664 sys sys 1177189441 1779
+sys/src/libventi/fcall.c - 664 sys sys 1177189441 3765
+sys/src/libventi/fcallfmt.c - 664 sys sys 1177189441 1912
+sys/src/libventi/file.c - 664 sys sys 1177189441 23509
+sys/src/libventi/hangup.c - 664 sys sys 1177189441 547
+sys/src/libventi/log.c - 664 sys sys 1179957535 3742
+sys/src/libventi/mem.c - 664 sys sys 1177189441 1184
+sys/src/libventi/mkfile - 664 sys sys 1188621815 512
+sys/src/libventi/packet.acid - 664 sys sys 1143389340 21446
+sys/src/libventi/packet.c - 664 sys sys 1177189441 15894
+sys/src/libventi/parsescore.c - 664 sys sys 1177189441 719
+sys/src/libventi/queue.c - 664 sys sys 1177189441 1571
+sys/src/libventi/queue.h - 664 sys sys 1177189441 233
+sys/src/libventi/root.c - 664 sys sys 1177189441 1218
+sys/src/libventi/rpc.acid - 664 sys sys 1143491745 12592
+sys/src/libventi/rpc.c - 664 sys sys 1177189442 3174
+sys/src/libventi/scorefmt.c - 664 sys sys 1177189442 248
+sys/src/libventi/send.c - 664 sys sys 1177189442 4395
+sys/src/libventi/server.c - 664 sys sys 1179957527 3693
+sys/src/libventi/srvhello.c - 664 sys sys 1177189442 833
+sys/src/libventi/strdup.c - 664 sys sys 1177189442 201
+sys/src/libventi/string.c - 664 sys sys 1177189442 737
+sys/src/libventi/time.c - 664 sys sys 1177189442 497
+sys/src/libventi/version.c - 664 sys sys 1177189442 2174
+sys/src/libventi/zero.c - 664 sys sys 1177189442 899
+sys/src/libventi/zeroscore.c - 664 sys sys 1177189442 248
 sys/src/mkfile - 664 sys sys 1110437421 1005
 sys/src/mkfile.proto - 664 sys sys 1105121349 265
 tmp - 20000000555 sys sys 1020896384 0
@@ -15631,3 +15736,7 @@ usr/glenda/lib/profile - 664 glenda glenda 1105128663 890
 usr/glenda/readme.acme - 664 glenda glenda 1019860628 4753
 usr/glenda/readme.rio - 664 glenda glenda 1019860628 6370
 usr/glenda/tmp - 20000000775 glenda glenda 1018802620 0
+386/bin/cwfs - 775 sys sys 1189048120 365605
+386/lib/libventi.a - 664 sys sys 1189048123 189448
+sys/src/cmd/aquarela/nblistener.c - 664 sys sys 1189049390 687
+sys/src/mkfile - 664 sys sys 1189049203 1017

+ 204 - 99
dist/replica/plan9.db

@@ -120,7 +120,7 @@
 386/bin/aux/ms2 - 775 sys sys 1188530147 88499
 386/bin/aux/msexceltables - 775 sys sys 1168402283 82921
 386/bin/aux/mswordstrings - 775 sys sys 1168402283 65541
-386/bin/aux/na - 775 sys sys 1168402284 154423
+386/bin/aux/na - 775 sys sys 1189023884 154372
 386/bin/aux/nfsmount - 775 sys sys 1184731233 233217
 386/bin/aux/nfsserver - 775 sys sys 1182223285 184719
 386/bin/aux/olefs - 775 sys sys 1179372075 148111
@@ -191,7 +191,7 @@
 386/bin/cpp - 775 sys sys 1188446882 149057
 386/bin/cpu - 775 sys sys 1188446885 139216
 386/bin/crop - 775 sys sys 1168402298 116374
-386/bin/cwfs - 775 sys sys 1188446894 365325
+386/bin/cwfs - 775 sys sys 1189048120 365605
 386/bin/date - 775 sys sys 1178568265 41845
 386/bin/db - 775 sys sys 1188499570 349188
 386/bin/dc - 775 sys sys 1168402299 99260
@@ -569,6 +569,7 @@
 386/lib/libmemlayer.a - 664 sys sys 1168402369 47360
 386/lib/libmp.a - 664 sys sys 1188447454 79978
 386/lib/libndb.a - 664 sys sys 1187061208 64470
+386/lib/liboventi.a - 664 sys sys 1188621781 99062
 386/lib/libplumb.a - 664 sys sys 1168402370 19408
 386/lib/libregexp.a - 664 sys sys 1181507273 37290
 386/lib/libscribble.a - 664 sys sys 1175972562 107238
@@ -576,7 +577,7 @@
 386/lib/libstdio.a - 664 sys sys 1176432133 126062
 386/lib/libsunrpc.a - 664 sys sys 1187061209 353148
 386/lib/libthread.a - 664 sys sys 1184731247 71918
-386/lib/libventi.a - 664 sys sys 1188447470 97858
+386/lib/libventi.a - 664 sys sys 1189048123 189448
 386/mbr - 775 sys sys 1131317338 407
 386/mkfile - 664 sys sys 948141303 46
 386/pbs - 775 sys sys 1143465402 495
@@ -6093,6 +6094,7 @@ sys/include/mouse.h - 664 sys sys 1035232010 1003
 sys/include/mp.h - 664 sys sys 1176499134 4803
 sys/include/ndb.h - 664 sys sys 1144174492 4412
 sys/include/nfs3.h - 664 sys sys 1045589438 15082
+sys/include/oventi.h - 664 sys sys 1188621731 7152
 sys/include/plumb.h - 664 sys sys 1014929065 989
 sys/include/pool.h - 664 sys sys 1102093074 1219
 sys/include/rdbg.h - 664 sys sys 1014929066 95
@@ -6104,7 +6106,7 @@ sys/include/sunrpc.h - 664 sys sys 1046367129 7219
 sys/include/thread.h - 664 sys sys 1184471649 3586
 sys/include/tos.h - 664 sys sys 1091904418 575
 sys/include/trace.h - 664 sys sys 1138460022 640
-sys/include/venti.h - 664 sys sys 1091904426 7150
+sys/include/venti.h - 664 sys sys 1188621741 9941
 sys/lib - 20000000775 sys sys 1105564897 0
 sys/lib/acid - 20000000775 sys sys 1114524607 0
 sys/lib/acid/386 - 664 sys sys 1168035402 3188
@@ -7476,6 +7478,7 @@ sys/man/1/uniq - 664 sys sys 944959674 995
 sys/man/1/units - 664 sys sys 1113743326 2046
 sys/man/1/uptime - 664 sys sys 1074733782 380
 sys/man/1/vac - 664 sys sys 1162102172 3244
+sys/man/1/venti - 664 sys sys 1188620038 2445
 sys/man/1/vi - 664 sys sys 1101668051 2904
 sys/man/1/vnc - 664 sys sys 1158063994 4313
 sys/man/1/vt - 664 sys sys 1186695303 2424
@@ -7630,6 +7633,16 @@ sys/man/2/symbol - 664 sys sys 950892874 9423
 sys/man/2/thread - 664 sys sys 1188841215 11916
 sys/man/2/time - 664 sys sys 1182980757 736
 sys/man/2/tmpfile - 664 sys sys 1048637161 1157
+sys/man/2/venti-cache - 664 sys sys 1188620038 5022
+sys/man/2/venti-client - 664 sys sys 1188620038 3339
+sys/man/2/venti-conn - 664 sys sys 1188620038 3600
+sys/man/2/venti-fcall - 664 sys sys 1188620038 4739
+sys/man/2/venti-file - 664 sys sys 1188620038 5840
+sys/man/2/venti-log - 664 sys sys 1188620038 2313
+sys/man/2/venti-mem - 664 sys sys 1188620038 1023
+sys/man/2/venti-packet - 664 sys sys 1188620038 4573
+sys/man/2/venti-server - 664 sys sys 1188620038 2201
+sys/man/2/venti-zero - 664 sys sys 1188620038 1064
 sys/man/2/wait - 664 sys sys 1163004064 2525
 sys/man/2/window - 664 sys sys 950593499 5522
 sys/man/3 - 20000000775 sys sys 1123100836 0
@@ -7680,7 +7693,7 @@ sys/man/4/archfs - 664 sys sys 960000712 533
 sys/man/4/cdfs - 664 sys sys 1026846913 3638
 sys/man/4/cfs - 664 sys sys 1172762903 1813
 sys/man/4/consolefs - 664 sys sys 1144424854 4245
-sys/man/4/cwfs - 664 sys sys 1178224996 6225
+sys/man/4/cwfs - 664 sys sys 1189029640 6255
 sys/man/4/dossrv - 664 sys sys 1168307403 4334
 sys/man/4/execnet - 664 sys sys 1019866708 1069
 sys/man/4/exportfs - 664 sys sys 1145881912 4655
@@ -7763,7 +7776,8 @@ sys/man/6/snap - 664 sys sys 1132452694 2402
 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
 sys/man/6/users - 664 sys sys 1130912014 1392
 sys/man/6/utf - 664 sys sys 1146582112 2430
-sys/man/6/venti.conf - 664 sys sys 1164860945 1929
+sys/man/6/venti - 664 sys sys 1188620038 10695
+sys/man/6/venti.conf - 664 sys sys 1164860473 1929
 sys/man/6/vgadb - 664 sys sys 960256513 10972
 sys/man/7 - 20000000775 sys sys 1103794042 0
 sys/man/7/0intro - 664 sys sys 944959677 256
@@ -7842,8 +7856,9 @@ sys/man/8/tlssrv - 664 sys sys 1165623041 2589
 sys/man/8/trampoline - 664 sys sys 1126104844 1199
 sys/man/8/udpecho - 664 sys sys 954305553 303
 sys/man/8/update - 664 sys sys 961259288 2336
-sys/man/8/venti - 664 sys sys 1164860929 5434
-sys/man/8/ventiaux - 664 sys sys 1159419552 10571
+sys/man/8/venti - 664 sys sys 1188620038 12188
+sys/man/8/venti-backup - 664 sys sys 1188620038 2102
+sys/man/8/venti-fmt - 664 sys sys 1188620038 8626
 sys/man/8/vga - 664 sys sys 1141660952 3856
 sys/man/fonts - 664 sys sys 944959700 218
 sys/man/index.html - 664 sys sys 1019918444 1859
@@ -9453,7 +9468,7 @@ sys/src/cmd/aquarela/nb.c - 664 sys sys 1135892109 538
 sys/src/cmd/aquarela/nbdgram.c - 664 sys sys 1135892109 4364
 sys/src/cmd/aquarela/nbdgramconv.c - 664 sys sys 1135892109 2339
 sys/src/cmd/aquarela/nbdgramdump.c - 664 sys sys 1135892109 506
-sys/src/cmd/aquarela/nblistener.c - 664 sys sys 1135892109 738
+sys/src/cmd/aquarela/nblistener.c - 664 sys sys 1189049390 687
 sys/src/cmd/aquarela/nbname.c - 664 sys sys 1135892109 5457
 sys/src/cmd/aquarela/nbns.c - 664 sys sys 1135892109 2747
 sys/src/cmd/aquarela/nbnsconv.c - 664 sys sys 1135892109 6257
@@ -10071,7 +10086,7 @@ sys/src/cmd/cwfs/io.h - 664 sys sys 1174280312 866
 sys/src/cmd/cwfs/iobuf.c - 664 sys sys 1176500092 4722
 sys/src/cmd/cwfs/juke.c - 664 sys sys 1176500144 28859
 sys/src/cmd/cwfs/lrand.c - 664 sys sys 1171160167 1070
-sys/src/cmd/cwfs/main.c - 664 sys sys 1174799729 9976
+sys/src/cmd/cwfs/main.c - 664 sys sys 1189033288 10146
 sys/src/cmd/cwfs/malloc.c - 664 sys sys 1174281557 2360
 sys/src/cmd/cwfs/mkfile - 664 sys sys 1174941889 201
 sys/src/cmd/cwfs/mworm.c - 664 sys sys 1174370308 4311
@@ -10369,13 +10384,13 @@ sys/src/cmd/fossil/fs.h - 664 sys sys 1139667269 1581
 sys/src/cmd/fossil/history - 664 sys sys 1055703793 1400
 sys/src/cmd/fossil/invariants - 664 sys sys 1042005509 4073
 sys/src/cmd/fossil/last.c - 664 sys sys 1087005593 812
-sys/src/cmd/fossil/mkfile - 664 sys sys 1139667242 2640
+sys/src/cmd/fossil/mkfile - 664 sys sys 1189020178 2641
 sys/src/cmd/fossil/nobwatch.c - 664 sys sys 1042005509 329
 sys/src/cmd/fossil/pack.c - 664 sys sys 1187136436 4710
 sys/src/cmd/fossil/periodic.c - 664 sys sys 1061530726 1087
 sys/src/cmd/fossil/source.c - 664 sys sys 1187135867 20578
 sys/src/cmd/fossil/srcload.c - 664 sys sys 1042005510 4178
-sys/src/cmd/fossil/stdinc.h - 664 sys sys 1042005510 155
+sys/src/cmd/fossil/stdinc.h - 664 sys sys 1189020178 156
 sys/src/cmd/fossil/trunc.c - 664 sys sys 1042005511 280
 sys/src/cmd/fossil/unpack - 775 sys sys 1042005511 286
 sys/src/cmd/fossil/vac.c - 664 sys sys 1061530727 12483
@@ -12781,6 +12796,19 @@ sys/src/cmd/nfs.c - 664 sys sys 1050068720 31096
 sys/src/cmd/nm.c - 664 sys sys 1148106943 5322
 sys/src/cmd/nntpfs.c - 664 sys sys 1143695271 18860
 sys/src/cmd/ns.c - 664 sys sys 984717934 3558
+sys/src/cmd/oventi - 20000000775 sys sys 1189019154 0
+sys/src/cmd/oventi/README - 664 sys sys 1189018077 63
+sys/src/cmd/oventi/backup.example - 775 sys sys 1045503662 521
+sys/src/cmd/oventi/conf.rc - 775 sys sys 1055707446 1416
+sys/src/cmd/oventi/copy.c - 664 sys sys 1158618728 3713
+sys/src/cmd/oventi/dat.h - 664 sys sys 1128337707 15346
+sys/src/cmd/oventi/dumpvacroots - 775 sys sys 1163468045 571
+sys/src/cmd/oventi/fns.h - 664 sys sys 1055707422 6793
+sys/src/cmd/oventi/mkfile - 664 sys sys 1189018610 256
+sys/src/cmd/oventi/notes - 664 sys sys 1068500911 4032
+sys/src/cmd/oventi/stdinc.h - 664 sys sys 1188621931 140
+sys/src/cmd/oventi/venti.conf - 664 sys sys 1019867537 397
+sys/src/cmd/oventi/wrtape - 775 sys sys 1019678881 555
 sys/src/cmd/p.c - 664 sys sys 1121977162 1504
 sys/src/cmd/page - 20000000775 sys sys 1045606937 0
 sys/src/cmd/page/filter.c - 664 sys sys 1069793856 2166
@@ -14228,19 +14256,19 @@ sys/src/cmd/va/l.s - 664 sys sys 944961340 12696
 sys/src/cmd/va/lex.c - 664 sys sys 1143293804 12095
 sys/src/cmd/va/mkfile - 664 sys sys 944961340 215
 sys/src/cmd/vac - 20000000775 sys sys 1055699701 0
-sys/src/cmd/vac/cache.c - 664 sys sys 1071245791 15806
+sys/src/cmd/vac/cache.c - 664 sys sys 1189020077 15843
 sys/src/cmd/vac/dat.h - 664 sys sys 1162951177 4029
 sys/src/cmd/vac/error.c - 664 sys sys 1036006057 633
 sys/src/cmd/vac/error.h - 664 sys sys 1036024048 327
 sys/src/cmd/vac/file.c - 664 sys sys 1168307519 19922
 sys/src/cmd/vac/fns.h - 664 sys sys 1036006061 1746
-sys/src/cmd/vac/fs.c - 664 sys sys 1162327826 2902
-sys/src/cmd/vac/mkfile - 664 sys sys 1036024045 434
+sys/src/cmd/vac/fs.c - 664 sys sys 1189020075 2950
+sys/src/cmd/vac/mkfile - 664 sys sys 1189020070 435
 sys/src/cmd/vac/pack.c - 664 sys sys 1036006059 10126
 sys/src/cmd/vac/rtest.c - 664 sys sys 1019678787 1116
 sys/src/cmd/vac/source.c - 664 sys sys 1162327879 6767
 sys/src/cmd/vac/srcload.c - 664 sys sys 1036024047 4925
-sys/src/cmd/vac/stdinc.h - 664 sys sys 1036006059 121
+sys/src/cmd/vac/stdinc.h - 664 sys sys 1189020065 122
 sys/src/cmd/vac/util.c - 664 sys sys 1019678787 930
 sys/src/cmd/vac/vac.c - 664 sys sys 1162350579 23803
 sys/src/cmd/vac/vac.h - 664 sys sys 1091904424 3598
@@ -14264,68 +14292,92 @@ sys/src/cmd/vc/swt.c - 664 sys sys 1143384791 10367
 sys/src/cmd/vc/txt.c - 664 sys sys 1168702346 22737
 sys/src/cmd/vc/v.out.h - 664 sys sys 1089299165 2630
 sys/src/cmd/venti - 20000000775 sys sys 1068500904 0
-sys/src/cmd/venti/arena.c - 664 sys sys 1045503653 13221
-sys/src/cmd/venti/arenas.c - 664 sys sys 1063854192 7701
-sys/src/cmd/venti/backup.example - 775 sys sys 1045503662 521
-sys/src/cmd/venti/buildbuck.c - 664 sys sys 1019678877 2282
-sys/src/cmd/venti/buildindex.c - 664 sys sys 1143759340 3162
-sys/src/cmd/venti/checkarenas.c - 664 sys sys 1019854295 1894
-sys/src/cmd/venti/checkindex.c - 664 sys sys 1143759337 4094
-sys/src/cmd/venti/clump.c - 664 sys sys 1019678877 4148
-sys/src/cmd/venti/clumpstats.c - 664 sys sys 1055707430 2239
-sys/src/cmd/venti/conf.rc - 775 sys sys 1055707446 1416
-sys/src/cmd/venti/config.c - 664 sys sys 1138471074 4930
-sys/src/cmd/venti/conv.c - 664 sys sys 1019678878 8202
-sys/src/cmd/venti/copy.c - 664 sys sys 1158618908 3713
-sys/src/cmd/venti/dat.h - 664 sys sys 1132452191 15346
-sys/src/cmd/venti/dcache.c - 664 sys sys 1019678878 7374
-sys/src/cmd/venti/dump.c - 664 sys sys 1068520313 1578
-sys/src/cmd/venti/dumparena.c - 664 sys sys 1019678878 6381
-sys/src/cmd/venti/dumpvacroots - 775 sys sys 1167777001 571
-sys/src/cmd/venti/findscore.c - 664 sys sys 1121977913 2605
-sys/src/cmd/venti/fmtarenas.c - 664 sys sys 1132452192 2213
-sys/src/cmd/venti/fmtindex.c - 664 sys sys 1138471072 2524
-sys/src/cmd/venti/fmtisect.c - 664 sys sys 1132452192 1326
-sys/src/cmd/venti/fns.h - 664 sys sys 1055707422 6793
-sys/src/cmd/venti/httpd.c - 664 sys sys 1045503654 9107
-sys/src/cmd/venti/icache.c - 664 sys sys 1091904425 3974
-sys/src/cmd/venti/ifile.c - 664 sys sys 1055707440 2288
-sys/src/cmd/venti/index.c - 664 sys sys 1045503654 15232
-sys/src/cmd/venti/lump.c - 664 sys sys 1055707441 3844
-sys/src/cmd/venti/lumpcache.c - 664 sys sys 1019678879 7585
-sys/src/cmd/venti/lumpqueue.c - 664 sys sys 1168307536 2278
-sys/src/cmd/venti/mkfile - 664 sys sys 1139839228 1795
-sys/src/cmd/venti/mkroot.c - 664 sys sys 1045503630 1172
-sys/src/cmd/venti/mkxml - 775 sys sys 1019678879 1078
-sys/src/cmd/venti/mkxml.elems - 775 sys sys 1019678880 786
-sys/src/cmd/venti/notes - 664 sys sys 1068500911 4032
-sys/src/cmd/venti/part.c - 664 sys sys 1036389637 2532
-sys/src/cmd/venti/printarena.c - 664 sys sys 1109511493 2675
-sys/src/cmd/venti/rdarena.c - 664 sys sys 1055707432 1621
-sys/src/cmd/venti/read.c - 664 sys sys 1055707434 1723
-sys/src/cmd/venti/score.c - 664 sys sys 1045503655 588
-sys/src/cmd/venti/sortientry.c - 664 sys sys 1143759340 7436
-sys/src/cmd/venti/stats.c - 664 sys sys 1019678880 2099
-sys/src/cmd/venti/stdinc.h - 664 sys sys 1019678880 139
-sys/src/cmd/venti/sync.c - 664 sys sys 1036470213 674
-sys/src/cmd/venti/syncarena.c - 664 sys sys 1183441016 4206
-sys/src/cmd/venti/syncindex.c - 664 sys sys 1055707433 983
-sys/src/cmd/venti/syncindex0.c - 664 sys sys 1036470283 3044
-sys/src/cmd/venti/unittoull.c - 664 sys sys 1019678881 398
-sys/src/cmd/venti/unwhack.c - 664 sys sys 1019678881 3191
-sys/src/cmd/venti/utils.c - 664 sys sys 1127527222 4002
-sys/src/cmd/venti/venti.c - 664 sys sys 1069101830 4626
-sys/src/cmd/venti/venti.conf - 664 sys sys 1019867537 397
-sys/src/cmd/venti/ventifs.c - 664 sys sys 1063854129 6157
-sys/src/cmd/venti/verifyarena.c - 664 sys sys 1019678881 2705
-sys/src/cmd/venti/whack.c - 664 sys sys 1019678881 6375
-sys/src/cmd/venti/whack.h - 664 sys sys 1019678881 966
-sys/src/cmd/venti/wrarena.c - 664 sys sys 1038536023 2948
-sys/src/cmd/venti/write.c - 664 sys sys 1036470214 1051
-sys/src/cmd/venti/wrtape - 775 sys sys 1019678881 555
-sys/src/cmd/venti/xml.c - 664 sys sys 1091904425 2183
-sys/src/cmd/venti/xml.h - 664 sys sys 1019678881 450
-sys/src/cmd/venti/zeropart.c - 664 sys sys 1132452194 877
+sys/src/cmd/venti/copy.c - 664 sys sys 1189017556 5275
+sys/src/cmd/venti/devnull.c - 664 sys sys 1177189434 1225
+sys/src/cmd/venti/mkfile - 664 sys sys 1189017596 358
+sys/src/cmd/venti/mkroot.c - 664 sys sys 1177189435 1178
+sys/src/cmd/venti/randtest.c - 664 sys sys 1177189435 5656
+sys/src/cmd/venti/read.c - 664 sys sys 1177189435 1289
+sys/src/cmd/venti/readlist.c - 664 sys sys 1177189435 1934
+sys/src/cmd/venti/ro.c - 664 sys sys 1177189435 1886
+sys/src/cmd/venti/root.c - 664 sys sys 1177189435 1329
+sys/src/cmd/venti/srv - 20000000775 sys sys 1189020012 0
+sys/src/cmd/venti/srv/arena.c - 664 sys sys 1178160303 16402
+sys/src/cmd/venti/srv/arenas.c - 664 sys sys 1178160303 8039
+sys/src/cmd/venti/srv/bloom.c - 664 sys sys 1178160303 4529
+sys/src/cmd/venti/srv/buildbuck.c - 664 sys sys 1177189435 2837
+sys/src/cmd/venti/srv/buildindex.c - 664 sys sys 1182131900 21117
+sys/src/cmd/venti/srv/checkarenas.c - 664 sys sys 1177189435 2313
+sys/src/cmd/venti/srv/checkindex.c - 664 sys sys 1179342633 5978
+sys/src/cmd/venti/srv/clump.c - 664 sys sys 1177189435 5329
+sys/src/cmd/venti/srv/clumpstats.c - 664 sys sys 1142736351 2234
+sys/src/cmd/venti/srv/cmparena.c - 664 sys sys 1142736351 5471
+sys/src/cmd/venti/srv/cmparenas.c - 664 sys sys 1177189435 7215
+sys/src/cmd/venti/srv/config.c - 664 sys sys 1178160303 5566
+sys/src/cmd/venti/srv/conv.c - 664 sys sys 1178160303 14052
+sys/src/cmd/venti/srv/dat.h - 664 sys sys 1178160303 19018
+sys/src/cmd/venti/srv/dcache.c - 664 sys sys 1178160304 18409
+sys/src/cmd/venti/srv/disksched.c - 664 sys sys 1142736352 2125
+sys/src/cmd/venti/srv/dump.c - 664 sys sys 1142736352 1642
+sys/src/cmd/venti/srv/findscore.c - 664 sys sys 1179863768 2195
+sys/src/cmd/venti/srv/fixarenas.c - 664 sys sys 1178160904 40515
+sys/src/cmd/venti/srv/fixarenas0.c - 664 sys sys 1142736352 36271
+sys/src/cmd/venti/srv/fmtarenas.c - 664 sys sys 1177189436 2702
+sys/src/cmd/venti/srv/fmtbloom.c - 664 sys sys 1142736352 2293
+sys/src/cmd/venti/srv/fmtindex.c - 664 sys sys 1178160304 2599
+sys/src/cmd/venti/srv/fmtisect.c - 664 sys sys 1177189436 1454
+sys/src/cmd/venti/srv/fns.h - 664 sys sys 1178160304 9179
+sys/src/cmd/venti/srv/graph.c - 664 sys sys 1177189436 4190
+sys/src/cmd/venti/srv/hdisk.c - 664 sys sys 1177189437 16510
+sys/src/cmd/venti/srv/httpd.c - 664 sys sys 1177189437 22784
+sys/src/cmd/venti/srv/icache.c - 664 sys sys 1186114652 8778
+sys/src/cmd/venti/srv/icachewrite.c - 664 sys sys 1178160304 7655
+sys/src/cmd/venti/srv/ifile.c - 664 sys sys 1177535026 2327
+sys/src/cmd/venti/srv/index.c - 664 sys sys 1178160304 17239
+sys/src/cmd/venti/srv/index2.c - 664 sys sys 1142736354 21620
+sys/src/cmd/venti/srv/lump.c - 664 sys sys 1177189437 5799
+sys/src/cmd/venti/srv/lumpcache.c - 664 sys sys 1177189437 8811
+sys/src/cmd/venti/srv/lumpqueue.c - 664 sys sys 1142736354 2722
+sys/src/cmd/venti/srv/mirrorarenas.c - 664 sys sys 1178160304 10544
+sys/src/cmd/venti/srv/mkfile - 664 sys sys 1188622141 1032
+sys/src/cmd/venti/srv/part.c - 664 sys sys 1180244694 5662
+sys/src/cmd/venti/srv/png.c - 664 sys sys 1142736354 3729
+sys/src/cmd/venti/srv/printarena.c - 664 sys sys 1177189438 2673
+sys/src/cmd/venti/srv/printarenapart.c - 664 sys sys 1178160304 3544
+sys/src/cmd/venti/srv/printarenas.c - 664 sys sys 1142736355 2074
+sys/src/cmd/venti/srv/printindex.c - 664 sys sys 1142736355 1746
+sys/src/cmd/venti/srv/printmap.c - 664 sys sys 1142736355 542
+sys/src/cmd/venti/srv/rdarena.c - 664 sys sys 1178161030 1637
+sys/src/cmd/venti/srv/readifile.c - 664 sys sys 1177534667 411
+sys/src/cmd/venti/srv/reseal.c - 664 sys sys 1177189438 6682
+sys/src/cmd/venti/srv/round.c - 664 sys sys 1142736355 1577
+sys/src/cmd/venti/srv/score.c - 664 sys sys 1178160305 740
+sys/src/cmd/venti/srv/sortientry.c - 664 sys sys 1177189439 8325
+sys/src/cmd/venti/srv/stats.c - 664 sys sys 1177189439 3912
+sys/src/cmd/venti/srv/stdinc.h - 664 sys sys 1177189439 169
+sys/src/cmd/venti/srv/syncarena.c - 664 sys sys 1177249927 4834
+sys/src/cmd/venti/srv/syncindex.c - 664 sys sys 1177249927 1622
+sys/src/cmd/venti/srv/syncindex0.c - 664 sys sys 1177249927 4447
+sys/src/cmd/venti/srv/trace.c - 664 sys sys 1142736356 709
+sys/src/cmd/venti/srv/unittoull.c - 664 sys sys 1142736356 471
+sys/src/cmd/venti/srv/unwhack.c - 664 sys sys 1142736356 3191
+sys/src/cmd/venti/srv/utils.c - 664 sys sys 1178160305 3755
+sys/src/cmd/venti/srv/venti.c - 664 sys sys 1188624404 5882
+sys/src/cmd/venti/srv/verifyarena.c - 664 sys sys 1178160305 5666
+sys/src/cmd/venti/srv/whack.c - 664 sys sys 1142736357 6419
+sys/src/cmd/venti/srv/whack.h - 664 sys sys 1142736357 966
+sys/src/cmd/venti/srv/wrarena.c - 664 sys sys 1183678902 4647
+sys/src/cmd/venti/srv/www - 20000000775 sys sys 1189019173 0
+sys/src/cmd/venti/srv/www/stats.html - 664 sys sys 1177190826 869
+sys/src/cmd/venti/srv/www/stats.js - 664 sys sys 1177190826 9427
+sys/src/cmd/venti/srv/www/status.js - 664 sys sys 1177190826 452
+sys/src/cmd/venti/srv/www/status1.js - 664 sys sys 1177190826 419
+sys/src/cmd/venti/srv/xml.c - 664 sys sys 1142736357 2236
+sys/src/cmd/venti/srv/xml.h - 664 sys sys 1142736357 450
+sys/src/cmd/venti/srv/zblock.c - 664 sys sys 1178160305 1659
+sys/src/cmd/venti/srv/zeropart.c - 664 sys sys 1178160305 707
+sys/src/cmd/venti/sync.c - 664 sys sys 1177189440 726
+sys/src/cmd/venti/write.c - 664 sys sys 1177189440 1106
 sys/src/cmd/vi - 20000000775 sys sys 1039727599 0
 sys/src/cmd/vi/bpt.c - 664 sys sys 944961341 2216
 sys/src/cmd/vi/cmd.c - 664 sys sys 944961342 9150
@@ -15376,6 +15428,44 @@ sys/src/libndb/ndbopen.c - 664 sys sys 1173737251 2717
 sys/src/libndb/ndbparse.c - 664 sys sys 1144174490 1207
 sys/src/libndb/ndbreorder.c - 664 sys sys 1078618600 966
 sys/src/libndb/ndbsubstitute.c - 664 sys sys 1144174491 867
+sys/src/liboventi - 20000000775 sys sys 1189019654 0
+sys/src/liboventi/client.8 - 664 sys sys 1188621755 11972
+sys/src/liboventi/client.c - 664 sys sys 1188621652 5494
+sys/src/liboventi/debug.8 - 664 sys sys 1188621755 3582
+sys/src/liboventi/debug.c - 664 sys sys 1188621652 1259
+sys/src/liboventi/errfmt.8 - 664 sys sys 1188621755 449
+sys/src/liboventi/errfmt.c - 664 sys sys 1188621652 134
+sys/src/liboventi/fatal.8 - 664 sys sys 1188621755 778
+sys/src/liboventi/fatal.c - 664 sys sys 1188621652 226
+sys/src/liboventi/mkfile - 664 sys sys 1188621776 577
+sys/src/liboventi/pack.8 - 664 sys sys 1188621755 5477
+sys/src/liboventi/pack.c - 664 sys sys 1188621652 2852
+sys/src/liboventi/packet.8 - 664 sys sys 1188621755 24906
+sys/src/liboventi/packet.c - 664 sys sys 1188621652 13229
+sys/src/liboventi/packet.h - 664 sys sys 1019678692 641
+sys/src/liboventi/parsescore.8 - 664 sys sys 1188621755 1208
+sys/src/liboventi/parsescore.c - 664 sys sys 1188621652 517
+sys/src/liboventi/plan9-io.8 - 664 sys sys 1188621756 3811
+sys/src/liboventi/plan9-io.c - 664 sys sys 1188621652 1759
+sys/src/liboventi/plan9-sha1.8 - 664 sys sys 1188621756 2021
+sys/src/liboventi/plan9-sha1.c - 664 sys sys 1188621652 1059
+sys/src/liboventi/plan9-thread.8 - 664 sys sys 1188621756 9994
+sys/src/liboventi/plan9-thread.acid - 664 sys sys 1138756224 9254
+sys/src/liboventi/plan9-thread.c - 664 sys sys 1188621652 6714
+sys/src/liboventi/readfully.8 - 664 sys sys 1188621755 740
+sys/src/liboventi/readfully.c - 664 sys sys 1188621652 238
+sys/src/liboventi/rpc.8 - 664 sys sys 1188621755 16009
+sys/src/liboventi/rpc.c - 664 sys sys 1188621652 7297
+sys/src/liboventi/scorefmt.8 - 664 sys sys 1188621755 862
+sys/src/liboventi/scorefmt.c - 664 sys sys 1188621652 256
+sys/src/liboventi/server.8 - 664 sys sys 1188621756 10569
+sys/src/liboventi/server.c - 664 sys sys 1188621652 4508
+sys/src/liboventi/session.h - 664 sys sys 1063853749 936
+sys/src/liboventi/strdup.8 - 664 sys sys 1188621756 640
+sys/src/liboventi/strdup.c - 664 sys sys 1188621652 204
+sys/src/liboventi/venti.txt - 664 sys sys 1045502097 4347
+sys/src/liboventi/zero.8 - 664 sys sys 1188621756 3268
+sys/src/liboventi/zero.c - 664 sys sys 1188621652 1510
 sys/src/libplumb - 20000000775 sys sys 1123099015 0
 sys/src/libplumb/event.c - 664 sys sys 947358887 1861
 sys/src/libplumb/mesg.c - 664 sys sys 1133279518 7080
@@ -15589,27 +15679,42 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
 sys/src/libventi - 20000000775 sys sys 1063853749 0
-sys/src/libventi/client.c - 664 sys sys 1138191441 5493
-sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
-sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
-sys/src/libventi/fatal.c - 664 sys sys 1084468118 225
-sys/src/libventi/mkfile - 664 sys sys 1045502094 576
-sys/src/libventi/pack.c - 664 sys sys 1045502094 2851
-sys/src/libventi/packet.c - 664 sys sys 1143695310 13228
-sys/src/libventi/packet.h - 664 sys sys 1019678692 641
-sys/src/libventi/parsescore.c - 664 sys sys 1045502095 516
-sys/src/libventi/plan9-io.c - 664 sys sys 1135487933 1758
-sys/src/libventi/plan9-sha1.c - 664 sys sys 1045502095 1058
-sys/src/libventi/plan9-thread.c - 664 sys sys 1135487955 6713
-sys/src/libventi/readfully.c - 664 sys sys 1045502095 237
-sys/src/libventi/rpc.c - 664 sys sys 1166821900 7296
-sys/src/libventi/scorefmt.c - 664 sys sys 1045502096 255
-sys/src/libventi/server.c - 664 sys sys 1045502096 4507
-sys/src/libventi/session.h - 664 sys sys 1063853749 936
-sys/src/libventi/strdup.c - 664 sys sys 1045502096 203
-sys/src/libventi/venti.txt - 664 sys sys 1045502097 4347
-sys/src/libventi/zero.c - 664 sys sys 1045502097 1509
-sys/src/mkfile - 664 sys sys 1110437421 1005
+sys/src/libventi/cache.acid - 664 sys sys 1177189440 11960
+sys/src/libventi/cache.c - 664 sys sys 1177189440 11925
+sys/src/libventi/client.c - 664 sys sys 1177189440 3197
+sys/src/libventi/conn.c - 664 sys sys 1177189440 895
+sys/src/libventi/cvt.h - 664 sys sys 1142736171 610
+sys/src/libventi/debug.c - 664 sys sys 1177189440 201
+sys/src/libventi/debugpacket.c - 664 sys sys 1177189441 3838
+sys/src/libventi/dial.c - 664 sys sys 1177189441 378
+sys/src/libventi/dtype.c - 664 sys sys 1177189441 1117
+sys/src/libventi/entry.c - 664 sys sys 1177189441 1779
+sys/src/libventi/fcall.c - 664 sys sys 1177189441 3765
+sys/src/libventi/fcallfmt.c - 664 sys sys 1177189441 1912
+sys/src/libventi/file.c - 664 sys sys 1177189441 23509
+sys/src/libventi/hangup.c - 664 sys sys 1177189441 547
+sys/src/libventi/log.c - 664 sys sys 1179957535 3742
+sys/src/libventi/mem.c - 664 sys sys 1177189441 1184
+sys/src/libventi/mkfile - 664 sys sys 1188621815 512
+sys/src/libventi/packet.acid - 664 sys sys 1143389340 21446
+sys/src/libventi/packet.c - 664 sys sys 1177189441 15894
+sys/src/libventi/parsescore.c - 664 sys sys 1177189441 719
+sys/src/libventi/queue.c - 664 sys sys 1177189441 1571
+sys/src/libventi/queue.h - 664 sys sys 1177189441 233
+sys/src/libventi/root.c - 664 sys sys 1177189441 1218
+sys/src/libventi/rpc.acid - 664 sys sys 1143491745 12592
+sys/src/libventi/rpc.c - 664 sys sys 1177189442 3174
+sys/src/libventi/scorefmt.c - 664 sys sys 1177189442 248
+sys/src/libventi/send.c - 664 sys sys 1177189442 4395
+sys/src/libventi/server.c - 664 sys sys 1179957527 3693
+sys/src/libventi/srvhello.c - 664 sys sys 1177189442 833
+sys/src/libventi/strdup.c - 664 sys sys 1177189442 201
+sys/src/libventi/string.c - 664 sys sys 1177189442 737
+sys/src/libventi/time.c - 664 sys sys 1177189442 497
+sys/src/libventi/version.c - 664 sys sys 1177189442 2174
+sys/src/libventi/zero.c - 664 sys sys 1177189442 899
+sys/src/libventi/zeroscore.c - 664 sys sys 1177189442 248
+sys/src/mkfile - 664 sys sys 1189049203 1017
 sys/src/mkfile.proto - 664 sys sys 1105121349 265
 tmp - 20000000555 sys sys 1020896384 0
 usr - 20000000775 sys sys 953406542 0

+ 275 - 0
dist/replica/plan9.log

@@ -52378,3 +52378,278 @@
 1188923403 1 c sys/src/9/port/sysproc.c - 664 sys sys 1188922870 21419
 1188955804 0 c sys/man/2/exec - 664 sys sys 1188955837 4230
 1188957605 0 c sys/src/cmd/aux/na/na.y - 775 sys sys 1188957547 25129
+1189020603 0 a 386/lib/liboventi.a - 664 sys sys 1188621781 99062
+1189020603 1 c 386/lib/libventi.a - 664 sys sys 1188621843 191156
+1189020603 2 a sys/include/oventi.h - 664 sys sys 1188621731 7152
+1189020603 3 c sys/include/venti.h - 664 sys sys 1188621741 9941
+1189020603 4 a sys/man/1/venti - 664 sys sys 1188620038 2445
+1189020603 5 a sys/man/2/venti-cache - 664 sys sys 1188620038 5022
+1189020603 6 a sys/man/2/venti-client - 664 sys sys 1188620038 3339
+1189020603 7 a sys/man/2/venti-conn - 664 sys sys 1188620038 3600
+1189020603 8 a sys/man/2/venti-fcall - 664 sys sys 1188620038 4739
+1189020603 9 a sys/man/2/venti-file - 664 sys sys 1188620038 5840
+1189020603 10 a sys/man/2/venti-log - 664 sys sys 1188620038 2313
+1189020603 11 a sys/man/2/venti-mem - 664 sys sys 1188620038 1023
+1189020603 12 a sys/man/2/venti-packet - 664 sys sys 1188620038 4573
+1189020603 13 a sys/man/2/venti-server - 664 sys sys 1188620038 2201
+1189020603 14 a sys/man/2/venti-zero - 664 sys sys 1188620038 1064
+1189020603 15 a sys/man/6/venti - 664 sys sys 1188620038 10695
+1189020603 16 c sys/man/6/venti.conf - 664 sys sys 1164860473 1929
+1189020603 17 c sys/man/8/venti - 664 sys sys 1188620038 12188
+1189020603 18 a sys/man/8/venti-backup - 664 sys sys 1188620038 2102
+1189020603 19 a sys/man/8/venti-fmt - 664 sys sys 1188620038 8626
+1189020603 20 c sys/src/cmd/vac/cache.c - 664 sys sys 1189020077 15843
+1189020603 21 c sys/src/cmd/vac/fs.c - 664 sys sys 1189020075 2950
+1189020603 22 c sys/src/cmd/vac/mkfile - 664 sys sys 1189020070 435
+1189020603 23 c sys/src/cmd/vac/stdinc.h - 664 sys sys 1189020065 122
+1189020603 24 c sys/src/cmd/venti/copy.c - 664 sys sys 1189017556 5275
+1189020603 25 a sys/src/cmd/venti/devnull.c - 664 sys sys 1177189434 1225
+1189020603 26 c sys/src/cmd/venti/mkfile - 664 sys sys 1189017596 358
+1189020603 27 c sys/src/cmd/venti/mkroot.c - 664 sys sys 1177189435 1178
+1189020603 28 a sys/src/cmd/venti/randtest.c - 664 sys sys 1177189435 5656
+1189020603 29 c sys/src/cmd/venti/read.c - 664 sys sys 1177189435 1289
+1189020603 30 a sys/src/cmd/venti/readlist.c - 664 sys sys 1177189435 1934
+1189020603 31 a sys/src/cmd/venti/ro.c - 664 sys sys 1177189435 1886
+1189020603 32 a sys/src/cmd/venti/root.c - 664 sys sys 1177189435 1329
+1189020603 33 a sys/src/cmd/venti/srv - 20000000775 sys sys 1189020012 0
+1189020603 34 a sys/src/cmd/venti/srv/arena.c - 664 sys sys 1178160303 16402
+1189020603 35 a sys/src/cmd/venti/srv/arenas.c - 664 sys sys 1178160303 8039
+1189020603 36 a sys/src/cmd/venti/srv/bloom.c - 664 sys sys 1178160303 4529
+1189020603 37 a sys/src/cmd/venti/srv/buildbuck.c - 664 sys sys 1177189435 2837
+1189020603 38 a sys/src/cmd/venti/srv/buildindex.c - 664 sys sys 1182131900 21117
+1189020603 39 a sys/src/cmd/venti/srv/checkarenas.c - 664 sys sys 1177189435 2313
+1189020603 40 a sys/src/cmd/venti/srv/checkindex.c - 664 sys sys 1179342633 5978
+1189020603 41 a sys/src/cmd/venti/srv/clump.c - 664 sys sys 1177189435 5329
+1189020603 42 a sys/src/cmd/venti/srv/clumpstats.c - 664 sys sys 1142736351 2234
+1189020603 43 a sys/src/cmd/venti/srv/cmparena.c - 664 sys sys 1142736351 5471
+1189020603 44 a sys/src/cmd/venti/srv/cmparenas.c - 664 sys sys 1177189435 7215
+1189020603 45 a sys/src/cmd/venti/srv/config.c - 664 sys sys 1178160303 5566
+1189020603 46 a sys/src/cmd/venti/srv/conv.c - 664 sys sys 1178160303 14052
+1189020603 47 a sys/src/cmd/venti/srv/dat.h - 664 sys sys 1178160303 19018
+1189020603 48 a sys/src/cmd/venti/srv/dcache.c - 664 sys sys 1178160304 18409
+1189020603 49 a sys/src/cmd/venti/srv/disksched.c - 664 sys sys 1142736352 2125
+1189020603 50 a sys/src/cmd/venti/srv/dump.c - 664 sys sys 1142736352 1642
+1189020603 51 a sys/src/cmd/venti/srv/findscore.c - 664 sys sys 1179863768 2195
+1189020603 52 a sys/src/cmd/venti/srv/fixarenas.c - 664 sys sys 1178160904 40515
+1189020603 53 a sys/src/cmd/venti/srv/fixarenas0.c - 664 sys sys 1142736352 36271
+1189020603 54 a sys/src/cmd/venti/srv/fmtarenas.c - 664 sys sys 1177189436 2702
+1189020603 55 a sys/src/cmd/venti/srv/fmtbloom.c - 664 sys sys 1142736352 2293
+1189020603 56 a sys/src/cmd/venti/srv/fmtindex.c - 664 sys sys 1178160304 2599
+1189020603 57 a sys/src/cmd/venti/srv/fmtisect.c - 664 sys sys 1177189436 1454
+1189020603 58 a sys/src/cmd/venti/srv/fns.h - 664 sys sys 1178160304 9179
+1189020603 59 a sys/src/cmd/venti/srv/graph.c - 664 sys sys 1177189436 4190
+1189020603 60 a sys/src/cmd/venti/srv/hdisk.c - 664 sys sys 1177189437 16510
+1189020603 61 a sys/src/cmd/venti/srv/httpd.c - 664 sys sys 1177189437 22784
+1189020603 62 a sys/src/cmd/venti/srv/icache.c - 664 sys sys 1186114652 8778
+1189020603 63 a sys/src/cmd/venti/srv/icachewrite.c - 664 sys sys 1178160304 7655
+1189020603 64 a sys/src/cmd/venti/srv/ifile.c - 664 sys sys 1177535026 2327
+1189020603 65 a sys/src/cmd/venti/srv/index.c - 664 sys sys 1178160304 17239
+1189020603 66 a sys/src/cmd/venti/srv/index2.c - 664 sys sys 1142736354 21620
+1189020603 67 a sys/src/cmd/venti/srv/lump.c - 664 sys sys 1177189437 5799
+1189020603 68 a sys/src/cmd/venti/srv/lumpcache.c - 664 sys sys 1177189437 8811
+1189020603 69 a sys/src/cmd/venti/srv/lumpqueue.c - 664 sys sys 1142736354 2722
+1189020603 70 a sys/src/cmd/venti/srv/mirrorarenas.c - 664 sys sys 1178160304 10544
+1189020603 71 a sys/src/cmd/venti/srv/mkfile - 664 sys sys 1188622141 1032
+1189020603 72 a sys/src/cmd/venti/srv/part.c - 664 sys sys 1180244694 5662
+1189020603 73 a sys/src/cmd/venti/srv/png.c - 664 sys sys 1142736354 3729
+1189020603 74 a sys/src/cmd/venti/srv/printarena.c - 664 sys sys 1177189438 2673
+1189020603 75 a sys/src/cmd/venti/srv/printarenapart.c - 664 sys sys 1178160304 3544
+1189020603 76 a sys/src/cmd/venti/srv/printarenas.c - 664 sys sys 1142736355 2074
+1189020603 77 a sys/src/cmd/venti/srv/printindex.c - 664 sys sys 1142736355 1746
+1189020603 78 a sys/src/cmd/venti/srv/printmap.c - 664 sys sys 1142736355 542
+1189020603 79 a sys/src/cmd/venti/srv/rdarena.c - 664 sys sys 1178161030 1637
+1189020603 80 a sys/src/cmd/venti/srv/readifile.c - 664 sys sys 1177534667 411
+1189020603 81 a sys/src/cmd/venti/srv/reseal.c - 664 sys sys 1177189438 6682
+1189020603 82 a sys/src/cmd/venti/srv/round.c - 664 sys sys 1142736355 1577
+1189020603 83 a sys/src/cmd/venti/srv/score.c - 664 sys sys 1178160305 740
+1189020603 84 a sys/src/cmd/venti/srv/sortientry.c - 664 sys sys 1177189439 8325
+1189020603 85 a sys/src/cmd/venti/srv/stats.c - 664 sys sys 1177189439 3912
+1189020603 86 a sys/src/cmd/venti/srv/stdinc.h - 664 sys sys 1177189439 169
+1189020603 87 a sys/src/cmd/venti/srv/syncarena.c - 664 sys sys 1177249927 4834
+1189020603 88 a sys/src/cmd/venti/srv/syncindex.c - 664 sys sys 1177249927 1622
+1189020603 89 a sys/src/cmd/venti/srv/syncindex0.c - 664 sys sys 1177249927 4447
+1189020603 90 a sys/src/cmd/venti/srv/trace.c - 664 sys sys 1142736356 709
+1189020603 91 a sys/src/cmd/venti/srv/unittoull.c - 664 sys sys 1142736356 471
+1189020603 92 a sys/src/cmd/venti/srv/unwhack.c - 664 sys sys 1142736356 3191
+1189020603 93 a sys/src/cmd/venti/srv/utils.c - 664 sys sys 1178160305 3755
+1189020603 94 a sys/src/cmd/venti/srv/venti.c - 664 sys sys 1188624404 5882
+1189020603 95 a sys/src/cmd/venti/srv/verifyarena.c - 664 sys sys 1178160305 5666
+1189020603 96 a sys/src/cmd/venti/srv/whack.c - 664 sys sys 1142736357 6419
+1189020603 97 a sys/src/cmd/venti/srv/whack.h - 664 sys sys 1142736357 966
+1189020603 98 a sys/src/cmd/venti/srv/wrarena.c - 664 sys sys 1183678902 4647
+1189020603 99 a sys/src/cmd/venti/srv/www - 20000000775 sys sys 1189019173 0
+1189020603 100 a sys/src/cmd/venti/srv/www/stats.html - 664 sys sys 1177190826 869
+1189020603 101 a sys/src/cmd/venti/srv/www/stats.js - 664 sys sys 1177190826 9427
+1189020603 102 a sys/src/cmd/venti/srv/www/status.js - 664 sys sys 1177190826 452
+1189020603 103 a sys/src/cmd/venti/srv/www/status1.js - 664 sys sys 1177190826 419
+1189020603 104 a sys/src/cmd/venti/srv/xml.c - 664 sys sys 1142736357 2236
+1189020603 105 a sys/src/cmd/venti/srv/xml.h - 664 sys sys 1142736357 450
+1189020603 106 a sys/src/cmd/venti/srv/zblock.c - 664 sys sys 1178160305 1659
+1189020603 107 a sys/src/cmd/venti/srv/zeropart.c - 664 sys sys 1178160305 707
+1189020603 108 c sys/src/cmd/venti/sync.c - 664 sys sys 1177189440 726
+1189020603 109 c sys/src/cmd/venti/write.c - 664 sys sys 1177189440 1106
+1189020603 110 c sys/src/cmd/fossil/mkfile - 664 sys sys 1189020178 2641
+1189020603 111 c sys/src/cmd/fossil/stdinc.h - 664 sys sys 1189020178 156
+1189020603 112 a sys/src/cmd/oventi - 20000000775 sys sys 1189019154 0
+1189020603 113 a sys/src/cmd/oventi/README - 664 sys sys 1189018077 63
+1189020603 114 a sys/src/cmd/oventi/backup.example - 775 sys sys 1045503662 521
+1189020603 115 a sys/src/cmd/oventi/conf.rc - 775 sys sys 1055707446 1416
+1189020603 116 a sys/src/cmd/oventi/copy.c - 664 sys sys 1158618728 3713
+1189020603 117 a sys/src/cmd/oventi/dat.h - 664 sys sys 1128337707 15346
+1189020603 118 a sys/src/cmd/oventi/dumpvacroots - 775 sys sys 1163468045 571
+1189020603 119 a sys/src/cmd/oventi/fns.h - 664 sys sys 1055707422 6793
+1189020603 120 a sys/src/cmd/oventi/mkfile - 664 sys sys 1189018610 256
+1189020603 121 a sys/src/cmd/oventi/notes - 664 sys sys 1068500911 4032
+1189020603 122 a sys/src/cmd/oventi/stdinc.h - 664 sys sys 1188621931 140
+1189020603 123 a sys/src/cmd/oventi/venti.conf - 664 sys sys 1019867537 397
+1189020603 124 a sys/src/cmd/oventi/wrtape - 775 sys sys 1019678881 555
+1189020603 125 a sys/src/liboventi - 20000000775 sys sys 1189019654 0
+1189020603 126 a sys/src/liboventi/client.8 - 664 sys sys 1188621755 11972
+1189020603 127 a sys/src/liboventi/client.c - 664 sys sys 1188621652 5494
+1189020603 128 a sys/src/liboventi/debug.8 - 664 sys sys 1188621755 3582
+1189020603 129 a sys/src/liboventi/debug.c - 664 sys sys 1188621652 1259
+1189020603 130 a sys/src/liboventi/errfmt.8 - 664 sys sys 1188621755 449
+1189020603 131 a sys/src/liboventi/errfmt.c - 664 sys sys 1188621652 134
+1189020603 132 a sys/src/liboventi/fatal.8 - 664 sys sys 1188621755 778
+1189020603 133 a sys/src/liboventi/fatal.c - 664 sys sys 1188621652 226
+1189020603 134 a sys/src/liboventi/mkfile - 664 sys sys 1188621776 577
+1189020603 135 a sys/src/liboventi/pack.8 - 664 sys sys 1188621755 5477
+1189020603 136 a sys/src/liboventi/pack.c - 664 sys sys 1188621652 2852
+1189020603 137 a sys/src/liboventi/packet.8 - 664 sys sys 1188621755 24906
+1189020603 138 a sys/src/liboventi/packet.c - 664 sys sys 1188621652 13229
+1189020603 139 a sys/src/liboventi/packet.h - 664 sys sys 1019678692 641
+1189020603 140 a sys/src/liboventi/parsescore.8 - 664 sys sys 1188621755 1208
+1189020603 141 a sys/src/liboventi/parsescore.c - 664 sys sys 1188621652 517
+1189020603 142 a sys/src/liboventi/plan9-io.8 - 664 sys sys 1188621756 3811
+1189020603 143 a sys/src/liboventi/plan9-io.c - 664 sys sys 1188621652 1759
+1189020603 144 a sys/src/liboventi/plan9-sha1.8 - 664 sys sys 1188621756 2021
+1189020603 145 a sys/src/liboventi/plan9-sha1.c - 664 sys sys 1188621652 1059
+1189020603 146 a sys/src/liboventi/plan9-thread.8 - 664 sys sys 1188621756 9994
+1189020603 147 a sys/src/liboventi/plan9-thread.acid - 664 sys sys 1138756224 9254
+1189020603 148 a sys/src/liboventi/plan9-thread.c - 664 sys sys 1188621652 6714
+1189020603 149 a sys/src/liboventi/readfully.8 - 664 sys sys 1188621755 740
+1189020603 150 a sys/src/liboventi/readfully.c - 664 sys sys 1188621652 238
+1189020603 151 a sys/src/liboventi/rpc.8 - 664 sys sys 1188621755 16009
+1189020603 152 a sys/src/liboventi/rpc.c - 664 sys sys 1188621652 7297
+1189020603 153 a sys/src/liboventi/scorefmt.8 - 664 sys sys 1188621755 862
+1189020603 154 a sys/src/liboventi/scorefmt.c - 664 sys sys 1188621652 256
+1189020603 155 a sys/src/liboventi/server.8 - 664 sys sys 1188621756 10569
+1189020603 156 a sys/src/liboventi/server.c - 664 sys sys 1188621652 4508
+1189020603 157 a sys/src/liboventi/session.h - 664 sys sys 1063853749 936
+1189020603 158 a sys/src/liboventi/strdup.8 - 664 sys sys 1188621756 640
+1189020603 159 a sys/src/liboventi/strdup.c - 664 sys sys 1188621652 204
+1189020603 160 a sys/src/liboventi/venti.txt - 664 sys sys 1045502097 4347
+1189020603 161 a sys/src/liboventi/zero.8 - 664 sys sys 1188621756 3268
+1189020603 162 a sys/src/liboventi/zero.c - 664 sys sys 1188621652 1510
+1189020603 163 a sys/src/libventi/cache.acid - 664 sys sys 1177189440 11960
+1189020603 164 a sys/src/libventi/cache.c - 664 sys sys 1177189440 11925
+1189020603 165 c sys/src/libventi/client.c - 664 sys sys 1177189440 3197
+1189020603 166 a sys/src/libventi/conn.c - 664 sys sys 1177189440 895
+1189020603 167 a sys/src/libventi/cvt.h - 664 sys sys 1142736171 610
+1189020603 168 c sys/src/libventi/debug.c - 664 sys sys 1177189440 201
+1189020603 169 a sys/src/libventi/debugpacket.c - 664 sys sys 1177189441 3838
+1189020603 170 a sys/src/libventi/dial.c - 664 sys sys 1177189441 378
+1189020603 171 a sys/src/libventi/dtype.c - 664 sys sys 1177189441 1117
+1189020603 172 a sys/src/libventi/entry.c - 664 sys sys 1177189441 1779
+1189020603 173 a sys/src/libventi/fcall.c - 664 sys sys 1177189441 3765
+1189020603 174 a sys/src/libventi/fcallfmt.c - 664 sys sys 1177189441 1912
+1189020603 175 a sys/src/libventi/file.c - 664 sys sys 1177189441 23509
+1189020603 176 a sys/src/libventi/hangup.c - 664 sys sys 1177189441 547
+1189020603 177 a sys/src/libventi/log.c - 664 sys sys 1179957535 3742
+1189020603 178 a sys/src/libventi/mem.c - 664 sys sys 1177189441 1184
+1189020603 179 c sys/src/libventi/mkfile - 664 sys sys 1188621815 512
+1189020603 180 a sys/src/libventi/packet.acid - 664 sys sys 1143389340 21446
+1189020603 181 c sys/src/libventi/packet.c - 664 sys sys 1177189441 15894
+1189020603 182 c sys/src/libventi/parsescore.c - 664 sys sys 1177189441 719
+1189020603 183 a sys/src/libventi/queue.c - 664 sys sys 1177189441 1571
+1189020603 184 a sys/src/libventi/queue.h - 664 sys sys 1177189441 233
+1189020603 185 a sys/src/libventi/root.c - 664 sys sys 1177189441 1218
+1189020603 186 a sys/src/libventi/rpc.acid - 664 sys sys 1143491745 12592
+1189020603 187 c sys/src/libventi/rpc.c - 664 sys sys 1177189442 3174
+1189020603 188 c sys/src/libventi/scorefmt.c - 664 sys sys 1177189442 248
+1189020603 189 a sys/src/libventi/send.c - 664 sys sys 1177189442 4395
+1189020603 190 c sys/src/libventi/server.c - 664 sys sys 1179957527 3693
+1189020603 191 a sys/src/libventi/srvhello.c - 664 sys sys 1177189442 833
+1189020603 192 c sys/src/libventi/strdup.c - 664 sys sys 1177189442 201
+1189020603 193 a sys/src/libventi/string.c - 664 sys sys 1177189442 737
+1189020603 194 a sys/src/libventi/time.c - 664 sys sys 1177189442 497
+1189020603 195 a sys/src/libventi/version.c - 664 sys sys 1177189442 2174
+1189020603 196 c sys/src/libventi/zero.c - 664 sys sys 1177189442 899
+1189020603 197 a sys/src/libventi/zeroscore.c - 664 sys sys 1177189442 248
+1189020603 198 d sys/src/libventi/venti.txt - 664 sys sys 1045502097 0
+1189020603 199 d sys/src/libventi/session.h - 664 sys sys 1063853749 0
+1189020603 200 d sys/src/libventi/readfully.c - 664 sys sys 1045502095 0
+1189020603 201 d sys/src/libventi/plan9-thread.c - 664 sys sys 1135487955 0
+1189020603 202 d sys/src/libventi/plan9-sha1.c - 664 sys sys 1045502095 0
+1189020603 203 d sys/src/libventi/plan9-io.c - 664 sys sys 1135487933 0
+1189020603 204 d sys/src/libventi/packet.h - 664 sys sys 1019678692 0
+1189020603 205 d sys/src/libventi/pack.c - 664 sys sys 1045502094 0
+1189020603 206 d sys/src/libventi/fatal.c - 664 sys sys 1084468118 0
+1189020603 207 d sys/src/libventi/errfmt.c - 664 sys sys 1019678691 0
+1189020603 208 d sys/src/cmd/venti/zeropart.c - 664 sys sys 1132452194 0
+1189020603 209 d sys/src/cmd/venti/xml.h - 664 sys sys 1019678881 0
+1189020603 210 d sys/src/cmd/venti/xml.c - 664 sys sys 1091904425 0
+1189020603 211 d sys/src/cmd/venti/wrtape - 775 sys sys 1019678881 0
+1189020603 212 d sys/src/cmd/venti/wrarena.c - 664 sys sys 1038536023 0
+1189020603 213 d sys/src/cmd/venti/whack.h - 664 sys sys 1019678881 0
+1189020603 214 d sys/src/cmd/venti/whack.c - 664 sys sys 1019678881 0
+1189020603 215 d sys/src/cmd/venti/verifyarena.c - 664 sys sys 1019678881 0
+1189020603 216 d sys/src/cmd/venti/ventifs.c - 664 sys sys 1063854129 0
+1189020603 217 d sys/src/cmd/venti/venti.conf - 664 sys sys 1019867537 0
+1189020603 218 d sys/src/cmd/venti/venti.c - 664 sys sys 1069101830 0
+1189020603 219 d sys/src/cmd/venti/utils.c - 664 sys sys 1127527222 0
+1189020603 220 d sys/src/cmd/venti/unwhack.c - 664 sys sys 1019678881 0
+1189020603 221 d sys/src/cmd/venti/unittoull.c - 664 sys sys 1019678881 0
+1189020603 222 d sys/src/cmd/venti/syncindex0.c - 664 sys sys 1036470283 0
+1189020603 223 d sys/src/cmd/venti/syncindex.c - 664 sys sys 1055707433 0
+1189020603 224 d sys/src/cmd/venti/syncarena.c - 664 sys sys 1183441016 0
+1189020603 225 d sys/src/cmd/venti/stdinc.h - 664 sys sys 1019678880 0
+1189020603 226 d sys/src/cmd/venti/stats.c - 664 sys sys 1019678880 0
+1189020603 227 d sys/src/cmd/venti/sortientry.c - 664 sys sys 1143759340 0
+1189020603 228 d sys/src/cmd/venti/score.c - 664 sys sys 1045503655 0
+1189020603 229 d sys/src/cmd/venti/rdarena.c - 664 sys sys 1055707432 0
+1189020603 230 d sys/src/cmd/venti/printarena.c - 664 sys sys 1109511493 0
+1189020603 231 d sys/src/cmd/venti/part.c - 664 sys sys 1036389637 0
+1189020603 232 d sys/src/cmd/venti/notes - 664 sys sys 1068500911 0
+1189020603 233 d sys/src/cmd/venti/mkxml.elems - 775 sys sys 1019678880 0
+1189020603 234 d sys/src/cmd/venti/mkxml - 775 sys sys 1019678879 0
+1189020603 235 d sys/src/cmd/venti/lumpqueue.c - 664 sys sys 1168307536 0
+1189020603 236 d sys/src/cmd/venti/lumpcache.c - 664 sys sys 1019678879 0
+1189020603 237 d sys/src/cmd/venti/lump.c - 664 sys sys 1055707441 0
+1189020603 238 d sys/src/cmd/venti/index.c - 664 sys sys 1045503654 0
+1189020603 239 d sys/src/cmd/venti/ifile.c - 664 sys sys 1055707440 0
+1189020603 240 d sys/src/cmd/venti/icache.c - 664 sys sys 1091904425 0
+1189020603 241 d sys/src/cmd/venti/httpd.c - 664 sys sys 1045503654 0
+1189020603 242 d sys/src/cmd/venti/fns.h - 664 sys sys 1055707422 0
+1189020603 243 d sys/src/cmd/venti/fmtisect.c - 664 sys sys 1132452192 0
+1189020603 244 d sys/src/cmd/venti/fmtindex.c - 664 sys sys 1138471072 0
+1189020603 245 d sys/src/cmd/venti/fmtarenas.c - 664 sys sys 1132452192 0
+1189020603 246 d sys/src/cmd/venti/findscore.c - 664 sys sys 1121977913 0
+1189020603 247 d sys/src/cmd/venti/dumpvacroots - 775 sys sys 1167777001 0
+1189020603 248 d sys/src/cmd/venti/dumparena.c - 664 sys sys 1019678878 0
+1189020603 249 d sys/src/cmd/venti/dump.c - 664 sys sys 1068520313 0
+1189020603 250 d sys/src/cmd/venti/dcache.c - 664 sys sys 1019678878 0
+1189020603 251 d sys/src/cmd/venti/dat.h - 664 sys sys 1132452191 0
+1189020603 252 d sys/src/cmd/venti/conv.c - 664 sys sys 1019678878 0
+1189020603 253 d sys/src/cmd/venti/config.c - 664 sys sys 1138471074 0
+1189020603 254 d sys/src/cmd/venti/conf.rc - 775 sys sys 1055707446 0
+1189020603 255 d sys/src/cmd/venti/clumpstats.c - 664 sys sys 1055707430 0
+1189020603 256 d sys/src/cmd/venti/clump.c - 664 sys sys 1019678877 0
+1189020603 257 d sys/src/cmd/venti/checkindex.c - 664 sys sys 1143759337 0
+1189020603 258 d sys/src/cmd/venti/checkarenas.c - 664 sys sys 1019854295 0
+1189020603 259 d sys/src/cmd/venti/buildindex.c - 664 sys sys 1143759340 0
+1189020603 260 d sys/src/cmd/venti/buildbuck.c - 664 sys sys 1019678877 0
+1189020603 261 d sys/src/cmd/venti/backup.example - 775 sys sys 1045503662 0
+1189020603 262 d sys/src/cmd/venti/arenas.c - 664 sys sys 1063854192 0
+1189020603 263 d sys/src/cmd/venti/arena.c - 664 sys sys 1045503653 0
+1189020603 264 d sys/man/8/ventiaux - 664 sys sys 1159419552 0
+1189024204 0 c 386/bin/aux/na - 775 sys sys 1189023884 154372
+1189029603 0 c sys/man/4/cwfs - 664 sys sys 1189029640 6255
+1189029603 1 c sys/src/cmd/cwfs/main.c - 664 sys sys 1189029628 9977
+1189033207 0 c 386/bin/cwfs - 775 sys sys 1189032952 365566
+1189033207 1 c sys/src/cmd/cwfs/main.c - 664 sys sys 1189033288 10146
+1189035005 0 c 386/bin/cwfs - 775 sys sys 1189033352 365605
+1189049404 0 c 386/bin/cwfs - 775 sys sys 1189048120 365605
+1189049404 1 c 386/lib/libventi.a - 664 sys sys 1189048123 189448
+1189049404 2 c sys/src/cmd/aquarela/nblistener.c - 664 sys sys 1189049390 687
+1189049404 3 c sys/src/mkfile - 664 sys sys 1189049203 1017

+ 271 - 0
sys/include/oventi.h

@@ -0,0 +1,271 @@
+#pragma	lib	"liboventi.a"
+#pragma	src	"/sys/src/liboventi"
+
+typedef struct VtSession	VtSession;
+typedef struct VtSha1		VtSha1;
+typedef struct Packet		Packet;
+typedef struct VtLock 		VtLock;
+typedef struct VtRendez		VtRendez;
+typedef struct VtRoot		VtRoot;
+typedef struct VtEntry		VtEntry;
+typedef struct VtServerVtbl	VtServerVtbl;
+
+#pragma incomplete VtSession
+#pragma incomplete VtSha1
+#pragma incomplete Packet
+#pragma incomplete VtLock
+#pragma incomplete VtRendez
+
+enum {
+	VtScoreSize	= 20, /* Venti */
+	VtMaxLumpSize	= 56*1024,
+	VtPointerDepth	= 7,	
+	VtEntrySize	= 40,
+	VtRootSize 	= 300,
+	VtMaxStringSize	= 1000,
+	VtAuthSize 	= 1024,  /* size of auth group - in bits - must be multiple of 8 */
+	MaxFragSize 	= 9*1024,
+	VtMaxFileSize	= (1ULL<<48) - 1,
+	VtRootVersion	= 2,
+};
+
+/* crypto strengths */
+enum {
+	VtCryptoStrengthNone,
+	VtCryptoStrengthAuth,
+	VtCryptoStrengthWeak,
+	VtCryptoStrengthStrong,
+};
+
+/* crypto suites */
+enum {
+	VtCryptoNone,
+	VtCryptoSSL3,
+	VtCryptoTLS1,
+
+	VtCryptoMax
+};
+
+/* codecs */
+enum {
+	VtCodecNone,
+
+	VtCodecDeflate,
+	VtCodecThwack,
+
+	VtCodecMax
+};
+
+/* Lump Types */
+enum {
+	VtErrType,		/* illegal */
+
+	VtRootType,
+	VtDirType,
+	VtPointerType0,
+	VtPointerType1,
+	VtPointerType2,
+	VtPointerType3,
+	VtPointerType4,
+	VtPointerType5,
+	VtPointerType6,
+	VtPointerType7,		/* not used */
+	VtPointerType8,		/* not used */
+	VtPointerType9,		/* not used */
+	VtDataType,
+
+	VtMaxType
+};
+
+/* Dir Entry flags */
+enum {
+	VtEntryActive = (1<<0),		/* entry is in use */
+	VtEntryDir = (1<<1),		/* a directory */
+	VtEntryDepthShift = 2,		/* shift for pointer depth */
+	VtEntryDepthMask = (0x7<<2),	/* mask for pointer depth */
+	VtEntryLocal = (1<<5),		/* used for local storage: should not be set for Venti blocks */
+	VtEntryNoArchive = (1<<6),	/* used for local storage: should not be set for Venti blocks */
+};
+
+struct VtRoot {
+	ushort version;
+	char name[128];
+	char type[128];
+	uchar score[VtScoreSize];	/* to a Dir block */
+	ushort blockSize;		/* maximum block size */
+	uchar prev[VtScoreSize];	/* last root block */
+};
+
+struct VtEntry {
+	ulong gen;			/* generation number */
+	ushort psize;			/* pointer block size */
+	ushort dsize;			/* data block size */
+	uchar depth;			/* unpacked from flags */
+	uchar flags;
+	uvlong size;
+	uchar score[VtScoreSize];
+};
+
+struct VtServerVtbl {
+	Packet *(*read)(VtSession*, uchar score[VtScoreSize], int type, int n);
+	int (*write)(VtSession*, uchar score[VtScoreSize], int type, Packet *p);
+	void (*closing)(VtSession*, int clean);
+	void (*sync)(VtSession*);
+};
+
+/* versions */
+enum {
+	/* experimental versions */
+	VtVersion01 = 1,
+	VtVersion02,
+};
+
+/* score of zero length block */
+extern uchar vtZeroScore[VtScoreSize];	
+
+/* both sides */
+void vtAttach(void);
+void vtDetach(void);
+void vtClose(VtSession *s);
+void vtFree(VtSession *s);
+char *vtGetUid(VtSession *s);
+char *vtGetSid(VtSession *s);
+int vtSetDebug(VtSession *s, int);
+int vtGetDebug(VtSession *s);
+int vtSetFd(VtSession *s, int fd);
+int vtGetFd(VtSession *s);
+int vtConnect(VtSession *s, char *password);
+int vtSetCryptoStrength(VtSession *s, int);
+int vtGetCryptoStrength(VtSession *s);
+int vtSetCompression(VtSession *s, int);
+int vtGetCompression(VtSession *s);
+int vtGetCrypto(VtSession *s);
+int vtGetCodec(VtSession *s);
+char *vtGetVersion(VtSession *s);
+char *vtGetError(void);
+int vtErrFmt(Fmt *fmt);
+void vtDebug(VtSession*, char *, ...);
+void vtDebugMesg(VtSession *z, Packet *p, char *s);
+
+/* internal */
+VtSession *vtAlloc(void);
+void vtReset(VtSession*);
+int vtAddString(Packet*, char*);
+int vtGetString(Packet*, char**);
+int vtSendPacket(VtSession*, Packet*);
+Packet *vtRecvPacket(VtSession*);
+void vtDisconnect(VtSession*, int);
+int vtHello(VtSession*);
+
+/* client side */
+VtSession *vtClientAlloc(void);
+VtSession *vtDial(char *server, int canfail);
+int vtRedial(VtSession*, char *server);
+VtSession *vtStdioServer(char *server);
+int vtPing(VtSession *s);
+int vtSetUid(VtSession*, char *uid);
+int vtRead(VtSession*, uchar score[VtScoreSize], int type, uchar *buf, int n);
+int vtWrite(VtSession*, uchar score[VtScoreSize], int type, uchar *buf, int n);
+Packet *vtReadPacket(VtSession*, uchar score[VtScoreSize], int type, int n);
+int vtWritePacket(VtSession*, uchar score[VtScoreSize], int type, Packet *p);
+int vtSync(VtSession *s);
+
+int vtZeroExtend(int type, uchar *buf, int n, int nn);
+int vtZeroTruncate(int type, uchar *buf, int n);
+int vtParseScore(char*, uint, uchar[VtScoreSize]);
+
+void vtRootPack(VtRoot*, uchar*);
+int vtRootUnpack(VtRoot*, uchar*);
+void vtEntryPack(VtEntry*, uchar*, int index);
+int vtEntryUnpack(VtEntry*, uchar*, int index);
+
+/* server side */
+VtSession *vtServerAlloc(VtServerVtbl*);
+int vtSetSid(VtSession *s, char *sid);
+int vtExport(VtSession *s);
+
+/* sha1 */
+VtSha1* vtSha1Alloc(void);
+void vtSha1Free(VtSha1*);
+void vtSha1Init(VtSha1*);
+void vtSha1Update(VtSha1*, uchar *, int n);
+void vtSha1Final(VtSha1*, uchar sha1[VtScoreSize]);
+void vtSha1(uchar score[VtScoreSize], uchar *, int);
+int vtSha1Check(uchar score[VtScoreSize], uchar *, int);
+int vtScoreFmt(Fmt *fmt);
+
+/* Packet */
+Packet *packetAlloc(void);
+void packetFree(Packet*);
+Packet *packetForeign(uchar *buf, int n, void (*free)(void *a), void *a);
+Packet *packetDup(Packet*, int offset, int n);
+Packet *packetSplit(Packet*, int n);
+int packetConsume(Packet*, uchar *buf, int n);
+int packetTrim(Packet*, int offset, int n);
+uchar *packetHeader(Packet*, int n);
+uchar *packetTrailer(Packet*, int n);
+int packetPrefix(Packet*, uchar *buf, int n);
+int packetAppend(Packet*, uchar *buf, int n);
+int packetConcat(Packet*, Packet*);
+uchar *packetPeek(Packet*, uchar *buf, int offset, int n);
+int packetCopy(Packet*, uchar *buf, int offset, int n);
+int packetFragments(Packet*, IOchunk*, int nio, int offset);
+int packetSize(Packet*);
+int packetAllocatedSize(Packet*);
+void packetSha1(Packet*, uchar sha1[VtScoreSize]);
+int packetCompact(Packet*);
+int packetCmp(Packet*, Packet*);
+void packetStats(void);
+
+/* portability stuff - should be a seperate library */
+
+void vtMemFree(void *);
+void *vtMemAlloc(int);
+void *vtMemAllocZ(int);
+void *vtMemRealloc(void *p, int);
+void *vtMemBrk(int n);
+char *vtStrDup(char *);
+void vtFatal(char *, ...);
+char *vtGetError(void);
+char *vtSetError(char *, ...);
+char *vtOSError(void);
+
+/* locking/threads */
+int vtThread(void (*f)(void*), void *rock);
+void vtThreadSetName(char*);
+
+VtLock *vtLockAlloc(void);
+/* void vtLockInit(VtLock**); */
+void vtLock(VtLock*);
+int vtCanLock(VtLock*);
+void vtRLock(VtLock*);
+int vtCanRLock(VtLock*);
+void vtUnlock(VtLock*);
+void vtRUnlock(VtLock*);
+void vtLockFree(VtLock*);
+
+VtRendez *vtRendezAlloc(VtLock*);
+void vtRendezFree(VtRendez*);
+int vtSleep(VtRendez*);
+int vtWakeup(VtRendez*);
+int vtWakeupAll(VtRendez*);
+
+/* fd functions - really network (socket) functions */
+void vtFdClose(int);
+int vtFdRead(int, uchar*, int);
+int vtFdReadFully(int, uchar*, int);
+int vtFdWrite(int, uchar*, int);
+
+/*
+ * formatting
+ * other than noted, these formats all ignore
+ * the width and precision arguments, and all flags
+ *
+ * V	a venti score
+ * R	venti error
+ */
+#pragma	varargck	type	"V"		uchar*
+#pragma	varargck	type	"R"		void
+
+#pragma	varargck	argpos	vtSetError	1
+

+ 457 - 232
sys/include/venti.h

@@ -1,271 +1,496 @@
-#pragma	lib	"libventi.a"
-#pragma	src	"/sys/src/libventi"
-
-typedef struct VtSession	VtSession;
-typedef struct VtSha1		VtSha1;
-typedef struct Packet		Packet;
-typedef struct VtLock 		VtLock;
-typedef struct VtRendez		VtRendez;
-typedef struct VtRoot		VtRoot;
-typedef struct VtEntry		VtEntry;
-typedef struct VtServerVtbl	VtServerVtbl;
-
-#pragma incomplete VtSession
-#pragma incomplete VtSha1
+#pragma lib "libventi.a"
+#pragma src "/sys/src/libventi"
+
+
+/* XXX should be own library? */
+/*
+ * Packets
+ */
+enum
+{
+	MaxFragSize = 9*1024
+};
+
+typedef struct Packet Packet;
 #pragma incomplete Packet
-#pragma incomplete VtLock
-#pragma incomplete VtRendez
 
-enum {
-	VtScoreSize	= 20, /* Venti */
+Packet*	packetalloc(void);
+void	packetappend(Packet*, uchar *buf, int n);
+uint	packetasize(Packet*);
+int	packetcmp(Packet*, Packet*);
+int	packetcompact(Packet*);
+void	packetconcat(Packet*, Packet*);
+int	packetconsume(Packet*, uchar *buf, int n);
+int	packetcopy(Packet*, uchar *buf, int offset, int n);
+Packet*	packetdup(Packet*, int offset, int n);
+Packet*	packetforeign(uchar *buf, int n, void (*free)(void *a), void *a);
+int	packetfragments(Packet*, IOchunk*, int nio, int offset);
+void	packetfree(Packet*);
+uchar*	packetheader(Packet*, int n);
+uchar*	packetpeek(Packet*, uchar *buf, int offset, int n);
+void	packetprefix(Packet*, uchar *buf, int n);
+void	packetsha1(Packet*, uchar sha1[20]);
+uint	packetsize(Packet*);
+Packet*	packetsplit(Packet*, int n);
+void	packetstats(void);
+uchar*	packettrailer(Packet*, int n);
+int	packettrim(Packet*, int offset, int n);
+
+/* XXX should be own library? */
+/*
+ * Logging
+ */
+typedef struct VtLog VtLog;
+typedef struct VtLogChunk VtLogChunk;
+
+struct VtLog
+{
+	VtLog	*next;		/* in hash table */
+	char	*name;
+	VtLogChunk *chunk;
+	uint	nchunk;
+	VtLogChunk *w;
+	QLock	lk;
+	int	ref;
+};
+
+struct VtLogChunk
+{
+	char	*p;
+	char	*ep;
+	char	*wp;
+};
+
+VtLog*	vtlogopen(char *name, uint size);
+void	vtlogprint(VtLog *log, char *fmt, ...);
+void	vtlog(char *name, char *fmt, ...);
+void	vtlogclose(VtLog*);
+void	vtlogremove(char *name);
+char**	vtlognames(int*);
+void	vtlogdump(int fd, VtLog*);
+
+/* XXX begin actual venti.h */
+
+typedef struct VtFcall VtFcall;
+typedef struct VtConn VtConn;
+typedef struct VtEntry VtEntry;
+typedef struct VtRoot VtRoot;
+
+/*
+ * Fundamental constants.
+ */
+enum
+{
+	VtScoreSize	= 20,
+	VtMaxStringSize = 1024,
 	VtMaxLumpSize	= 56*1024,
-	VtPointerDepth	= 7,	
-	VtEntrySize	= 40,
-	VtRootSize 	= 300,
-	VtMaxStringSize	= 1000,
-	VtAuthSize 	= 1024,  /* size of auth group - in bits - must be multiple of 8 */
-	MaxFragSize 	= 9*1024,
-	VtMaxFileSize	= (1ULL<<48) - 1,
-	VtRootVersion	= 2,
+	VtPointerDepth	= 7
+};
+#define VtMaxFileSize ((1ULL<<48)-1)
+
+
+/* 
+ * Strings in packets.
+ */
+int vtputstring(Packet*, char*);
+int vtgetstring(Packet*, char**);
+
+/*
+ * Block types.
+ * 
+ * The initial Venti protocol had a much
+ * less regular list of block types.
+ * VtToDiskType converts from new to old.
+ */
+enum
+{
+	VtDataType	= 0<<3,
+	/* VtDataType+1, ... */
+	VtDirType	= 1<<3,
+	/* VtDirType+1, ... */
+	VtRootType	= 2<<3,
+	VtMaxType,
+	VtCorruptType = 0xFF,
+
+	VtTypeDepthMask = 7,
+	VtTypeBaseMask = ~VtTypeDepthMask
+};
+
+/* convert to/from on-disk type numbers */
+uint vttodisktype(uint);
+uint vtfromdisktype(uint);
+
+/*
+ * VtEntry describes a Venti stream
+ *
+ * The _ enums are only used on the wire.
+ * They are not present in the VtEntry structure
+ * and should not be used by client programs.
+ * (The info is in the type field.)
+ */
+enum
+{
+	VtEntryActive = 1<<0,		/* entry is in use */
+	_VtEntryDir = 1<<1,		/* a directory */
+	_VtEntryDepthShift = 2,		/* shift for pointer depth */
+	_VtEntryDepthMask = 7<<2,	/* mask for pointer depth */
+	VtEntryLocal = 1<<5		/* for local storage only */
+};
+enum
+{
+	VtEntrySize = 40
+};
+struct VtEntry
+{
+	ulong	gen;			/* generation number */
+	ushort	psize;			/* pointer block size */
+	ushort	dsize;			/* data block size */
+	uchar	type;
+	uchar	flags;
+	uvlong	size;
+	uchar	score[VtScoreSize];
 };
 
-/* crypto strengths */
-enum {
+void vtentrypack(VtEntry*, uchar*, int index);
+int vtentryunpack(VtEntry*, uchar*, int index);
+
+struct VtRoot
+{
+	char	name[128];
+	char	type[128];
+	uchar	score[VtScoreSize];	/* to a Dir block */
+	ushort	blocksize;		/* maximum block size */
+	uchar	prev[VtScoreSize];	/* last root block */
+};
+
+enum
+{
+	VtRootSize = 300,
+	VtRootVersion = 2
+};
+
+void vtrootpack(VtRoot*, uchar*);
+int vtrootunpack(VtRoot*, uchar*);
+
+/*
+ * score of zero length block
+ */
+extern uchar vtzeroscore[VtScoreSize];
+
+/*
+ * zero extend and truncate blocks
+ */
+void vtzeroextend(int type, uchar *buf, uint n, uint nn);
+uint vtzerotruncate(int type, uchar *buf, uint n);
+
+/*
+ * parse score: mungs s
+ */
+int vtparsescore(char *s, char **prefix, uchar[VtScoreSize]);
+
+/*
+ * formatting
+ * other than noted, these formats all ignore
+ * the width and precision arguments, and all flags
+ *
+ * V	a venti score
+ */
+#pragma	varargck	type	"V"	uchar*
+#pragma	varargck	type	"F"	VtFcall*
+#pragma	varargck	type	"T"	void
+#pragma	varargck	type	"lT"	void
+
+int vtscorefmt(Fmt*);
+
+/*
+ * error-checking malloc et al.
+ */
+void	vtfree(void *);
+void*	vtmalloc(int);
+void*	vtmallocz(int);
+void*	vtrealloc(void *p, int);
+void*	vtbrk(int n);
+char*	vtstrdup(char *);
+
+/*
+ * Venti protocol
+ */
+
+/*
+ * Crypto strengths
+ */
+enum
+{
 	VtCryptoStrengthNone,
 	VtCryptoStrengthAuth,
 	VtCryptoStrengthWeak,
-	VtCryptoStrengthStrong,
+	VtCryptoStrengthStrong
 };
 
-/* crypto suites */
-enum {
+/*
+ * Crypto suites
+ */
+enum
+{
 	VtCryptoNone,
 	VtCryptoSSL3,
 	VtCryptoTLS1,
-
 	VtCryptoMax
 };
 
-/* codecs */
-enum {
+/* 
+ * Codecs
+ */
+enum
+{
 	VtCodecNone,
-
 	VtCodecDeflate,
 	VtCodecThwack,
-
 	VtCodecMax
 };
 
-/* Lump Types */
-enum {
-	VtErrType,		/* illegal */
-
-	VtRootType,
-	VtDirType,
-	VtPointerType0,
-	VtPointerType1,
-	VtPointerType2,
-	VtPointerType3,
-	VtPointerType4,
-	VtPointerType5,
-	VtPointerType6,
-	VtPointerType7,		/* not used */
-	VtPointerType8,		/* not used */
-	VtPointerType9,		/* not used */
-	VtDataType,
-
-	VtMaxType
+enum
+{
+	VtRerror	= 1,
+	VtTping		= 2,
+	VtRping,
+	VtThello	= 4,
+	VtRhello,
+	VtTgoodbye	= 6,
+	VtRgoodbye,	/* not used */
+	VtTauth0	= 8,
+	VtRauth0,
+	VtTauth1	= 10,
+	VtRauth1,
+	VtTread		= 12,
+	VtRread,
+	VtTwrite	= 14,
+	VtRwrite,
+	VtTsync		= 16,
+	VtRsync,
+
+	VtTmax
 };
 
-/* Dir Entry flags */
-enum {
-	VtEntryActive = (1<<0),		/* entry is in use */
-	VtEntryDir = (1<<1),		/* a directory */
-	VtEntryDepthShift = 2,		/* shift for pointer depth */
-	VtEntryDepthMask = (0x7<<2),	/* mask for pointer depth */
-	VtEntryLocal = (1<<5),		/* used for local storage: should not be set for Venti blocks */
-	VtEntryNoArchive = (1<<6),	/* used for local storage: should not be set for Venti blocks */
+struct VtFcall
+{
+	uchar	msgtype;
+	uchar	tag;
+
+	char	*error;		/* Rerror */
+
+	char	*version;	/* Thello */
+	char	*uid;		/* Thello */
+	uchar	strength;	/* Thello */
+	uchar	*crypto;	/* Thello */
+	uint	ncrypto;	/* Thello */
+	uchar	*codec;		/* Thello */
+	uint	ncodec;		/* Thello */
+	char	*sid;		/* Rhello */
+	uchar	rcrypto;	/* Rhello */
+	uchar	rcodec;		/* Rhello */
+	uchar	*auth;		/* TauthX, RauthX */
+	uint	nauth;		/* TauthX, RauthX */
+	uchar	score[VtScoreSize];	/* Tread, Rwrite */
+	uchar	blocktype;	/* Tread, Twrite */
+	ushort	count;		/* Tread */
+	Packet	*data;		/* Rread, Twrite */
+};
+
+Packet*	vtfcallpack(VtFcall*);
+int	vtfcallunpack(VtFcall*, Packet*);
+void	vtfcallclear(VtFcall*);
+int	vtfcallfmt(Fmt*);
+
+enum
+{
+	VtStateAlloc,
+	VtStateConnected,
+	VtStateClosed
 };
 
-struct VtRoot {
-	ushort version;
-	char name[128];
-	char type[128];
-	uchar score[VtScoreSize];	/* to a Dir block */
-	ushort blockSize;		/* maximum block size */
-	uchar prev[VtScoreSize];	/* last root block */
+struct VtConn
+{
+	QLock	lk;
+	QLock	inlk;
+	QLock	outlk;
+	int	debug;
+	int	infd;
+	int	outfd;
+	int	muxer;
+	void	*writeq;
+	void	*readq;
+	int	state;
+	void	*wait[256];
+	uint	ntag;
+	uint	nsleep;
+	Packet	*part;
+	Rendez	tagrend;
+	Rendez	rpcfork;
+	char	*version;
+	char	*uid;
+	char	*sid;
+	char	addr[256];	/* address of other side */
 };
 
-struct VtEntry {
-	ulong gen;			/* generation number */
-	ushort psize;			/* pointer block size */
-	ushort dsize;			/* data block size */
-	uchar depth;			/* unpacked from flags */
-	uchar flags;
-	uvlong size;
-	uchar score[VtScoreSize];
+VtConn*	vtconn(int infd, int outfd);
+VtConn*	vtdial(char*);
+void	vtfreeconn(VtConn*);
+int	vtsend(VtConn*, Packet*);
+Packet*	vtrecv(VtConn*);
+int	vtversion(VtConn* z);
+void	vtdebug(VtConn* z, char*, ...);
+void	vthangup(VtConn* z);
+int	vtgoodbye(VtConn* z);
+
+/* #pragma varargck argpos vtdebug 2 */
+
+/* server */
+typedef struct VtSrv VtSrv;
+#pragma incomplete VtSrv
+typedef struct VtReq VtReq;
+struct VtReq
+{
+	VtFcall	tx;
+	VtFcall	rx;
+/* private */
+	VtSrv	*srv;
+	void	*sc;
 };
 
-struct VtServerVtbl {
-	Packet *(*read)(VtSession*, uchar score[VtScoreSize], int type, int n);
-	int (*write)(VtSession*, uchar score[VtScoreSize], int type, Packet *p);
-	void (*closing)(VtSession*, int clean);
-	void (*sync)(VtSession*);
+int	vtsrvhello(VtConn*);
+VtSrv*	vtlisten(char *addr);
+VtReq*	vtgetreq(VtSrv*);
+void	vtrespond(VtReq*);
+
+/* client */
+Packet*	vtrpc(VtConn*, Packet*);
+Packet*	_vtrpc(VtConn*, Packet*, VtFcall*);
+void	vtrecvproc(void*);	/* VtConn */
+void	vtsendproc(void*);	/* VtConn */
+
+int	vtconnect(VtConn*);
+int	vthello(VtConn*);
+int	vtread(VtConn*, uchar score[VtScoreSize], uint type, uchar *buf, int n);
+int	vtwrite(VtConn*, uchar score[VtScoreSize], uint type, uchar *buf, int n);
+Packet*	vtreadpacket(VtConn*, uchar score[VtScoreSize], uint type, int n);
+int	vtwritepacket(VtConn*, uchar score[VtScoreSize], uint type, Packet *p);
+int	vtsync(VtConn*);
+int	vtping(VtConn*);
+
+/*
+ * Data blocks and block cache.
+ */
+enum
+{
+	NilBlock = ~0
 };
 
-/* versions */
-enum {
-	/* experimental versions */
-	VtVersion01 = 1,
-	VtVersion02,
+typedef struct VtBlock VtBlock;
+typedef struct VtCache VtCache;
+#pragma incomplete VtCache
+
+struct VtBlock
+{
+	VtCache	*c;
+	QLock	lk;
+
+	uchar	*data;
+	uchar	score[VtScoreSize];
+	uchar	type;			/* BtXXX */
+
+	/* internal to cache */
+	int	nlock;
+	int	iostate;
+	int	ref;
+	u32int	heap;
+	VtBlock	*next;
+	VtBlock	**prev;
+	u32int	used;
+	u32int	used2;
+	u32int	addr;
+	uintptr	pc;
 };
 
-/* score of zero length block */
-extern uchar vtZeroScore[VtScoreSize];	
-
-/* both sides */
-void vtAttach(void);
-void vtDetach(void);
-void vtClose(VtSession *s);
-void vtFree(VtSession *s);
-char *vtGetUid(VtSession *s);
-char *vtGetSid(VtSession *s);
-int vtSetDebug(VtSession *s, int);
-int vtGetDebug(VtSession *s);
-int vtSetFd(VtSession *s, int fd);
-int vtGetFd(VtSession *s);
-int vtConnect(VtSession *s, char *password);
-int vtSetCryptoStrength(VtSession *s, int);
-int vtGetCryptoStrength(VtSession *s);
-int vtSetCompression(VtSession *s, int);
-int vtGetCompression(VtSession *s);
-int vtGetCrypto(VtSession *s);
-int vtGetCodec(VtSession *s);
-char *vtGetVersion(VtSession *s);
-char *vtGetError(void);
-int vtErrFmt(Fmt *fmt);
-void vtDebug(VtSession*, char *, ...);
-void vtDebugMesg(VtSession *z, Packet *p, char *s);
-
-/* internal */
-VtSession *vtAlloc(void);
-void vtReset(VtSession*);
-int vtAddString(Packet*, char*);
-int vtGetString(Packet*, char**);
-int vtSendPacket(VtSession*, Packet*);
-Packet *vtRecvPacket(VtSession*);
-void vtDisconnect(VtSession*, int);
-int vtHello(VtSession*);
-
-/* client side */
-VtSession *vtClientAlloc(void);
-VtSession *vtDial(char *server, int canfail);
-int vtRedial(VtSession*, char *server);
-VtSession *vtStdioServer(char *server);
-int vtPing(VtSession *s);
-int vtSetUid(VtSession*, char *uid);
-int vtRead(VtSession*, uchar score[VtScoreSize], int type, uchar *buf, int n);
-int vtWrite(VtSession*, uchar score[VtScoreSize], int type, uchar *buf, int n);
-Packet *vtReadPacket(VtSession*, uchar score[VtScoreSize], int type, int n);
-int vtWritePacket(VtSession*, uchar score[VtScoreSize], int type, Packet *p);
-int vtSync(VtSession *s);
-
-int vtZeroExtend(int type, uchar *buf, int n, int nn);
-int vtZeroTruncate(int type, uchar *buf, int n);
-int vtParseScore(char*, uint, uchar[VtScoreSize]);
-
-void vtRootPack(VtRoot*, uchar*);
-int vtRootUnpack(VtRoot*, uchar*);
-void vtEntryPack(VtEntry*, uchar*, int index);
-int vtEntryUnpack(VtEntry*, uchar*, int index);
-
-/* server side */
-VtSession *vtServerAlloc(VtServerVtbl*);
-int vtSetSid(VtSession *s, char *sid);
-int vtExport(VtSession *s);
-
-/* sha1 */
-VtSha1* vtSha1Alloc(void);
-void vtSha1Free(VtSha1*);
-void vtSha1Init(VtSha1*);
-void vtSha1Update(VtSha1*, uchar *, int n);
-void vtSha1Final(VtSha1*, uchar sha1[VtScoreSize]);
-void vtSha1(uchar score[VtScoreSize], uchar *, int);
-int vtSha1Check(uchar score[VtScoreSize], uchar *, int);
-int vtScoreFmt(Fmt *fmt);
-
-/* Packet */
-Packet *packetAlloc(void);
-void packetFree(Packet*);
-Packet *packetForeign(uchar *buf, int n, void (*free)(void *a), void *a);
-Packet *packetDup(Packet*, int offset, int n);
-Packet *packetSplit(Packet*, int n);
-int packetConsume(Packet*, uchar *buf, int n);
-int packetTrim(Packet*, int offset, int n);
-uchar *packetHeader(Packet*, int n);
-uchar *packetTrailer(Packet*, int n);
-int packetPrefix(Packet*, uchar *buf, int n);
-int packetAppend(Packet*, uchar *buf, int n);
-int packetConcat(Packet*, Packet*);
-uchar *packetPeek(Packet*, uchar *buf, int offset, int n);
-int packetCopy(Packet*, uchar *buf, int offset, int n);
-int packetFragments(Packet*, IOchunk*, int nio, int offset);
-int packetSize(Packet*);
-int packetAllocatedSize(Packet*);
-void packetSha1(Packet*, uchar sha1[VtScoreSize]);
-int packetCompact(Packet*);
-int packetCmp(Packet*, Packet*);
-void packetStats(void);
-
-/* portability stuff - should be a seperate library */
-
-void vtMemFree(void *);
-void *vtMemAlloc(int);
-void *vtMemAllocZ(int);
-void *vtMemRealloc(void *p, int);
-void *vtMemBrk(int n);
-char *vtStrDup(char *);
-void vtFatal(char *, ...);
-char *vtGetError(void);
-char *vtSetError(char *, ...);
-char *vtOSError(void);
-
-/* locking/threads */
-int vtThread(void (*f)(void*), void *rock);
-void vtThreadSetName(char*);
-
-VtLock *vtLockAlloc(void);
-/* void vtLockInit(VtLock**); */
-void vtLock(VtLock*);
-int vtCanLock(VtLock*);
-void vtRLock(VtLock*);
-int vtCanRLock(VtLock*);
-void vtUnlock(VtLock*);
-void vtRUnlock(VtLock*);
-void vtLockFree(VtLock*);
-
-VtRendez *vtRendezAlloc(VtLock*);
-void vtRendezFree(VtRendez*);
-int vtSleep(VtRendez*);
-int vtWakeup(VtRendez*);
-int vtWakeupAll(VtRendez*);
-
-/* fd functions - really network (socket) functions */
-void vtFdClose(int);
-int vtFdRead(int, uchar*, int);
-int vtFdReadFully(int, uchar*, int);
-int vtFdWrite(int, uchar*, int);
+u32int	vtglobaltolocal(uchar[VtScoreSize]);
+void	vtlocaltoglobal(u32int, uchar[VtScoreSize]);
+
+VtCache*vtcachealloc(VtConn*, int blocksize, ulong nblocks);
+void	vtcachefree(VtCache*);
+VtBlock*vtcachelocal(VtCache*, u32int addr, int type);
+VtBlock*vtcacheglobal(VtCache*, uchar[VtScoreSize], int type);
+VtBlock*vtcacheallocblock(VtCache*, int type);
+void	vtcachesetwrite(VtCache*,
+	int(*)(VtConn*, uchar[VtScoreSize], uint, uchar*, int));
+void	vtblockput(VtBlock*);
+u32int	vtcacheblocksize(VtCache*);
+int	vtblockwrite(VtBlock*);
+VtBlock*vtblockcopy(VtBlock*);
+void	vtblockduplock(VtBlock*);
+
+extern int vtcachencopy, vtcachenread, vtcachenwrite;
+extern int vttracelevel;
 
 /*
- * formatting
- * other than noted, these formats all ignore
- * the width and precision arguments, and all flags
- *
- * V	a venti score
- * R	venti error
+ * Hash tree file tree.
  */
-#pragma	varargck	type	"V"		uchar*
-#pragma	varargck	type	"R"		void
+typedef struct VtFile VtFile;
+struct VtFile
+{
+	QLock	lk;
+	int	ref;
+	int	local;
+	VtBlock	*b;			/* block containing this file */
+	uchar	score[VtScoreSize];	/* score of block containing this file */
+
+/* immutable */
+	VtCache	*c;
+	int	mode;
+	u32int	gen;
+	int	dsize;
+	int	psize;
+	int	dir;
+	VtFile	*parent;
+	int	epb;			/* entries per block in parent */
+	u32int	offset; 		/* entry offset in parent */
+};
 
-#pragma	varargck	argpos	vtSetError	1
+enum
+{
+	VtOREAD,
+	VtOWRITE,
+	VtORDWR
+};
 
+VtBlock*vtfileblock(VtFile*, u32int, int mode);
+int	vtfileblockscore(VtFile*, u32int, uchar[VtScoreSize]);
+void	vtfileclose(VtFile*);
+VtFile*	_vtfilecreate(VtFile*, int offset, int psize, int dsize, int dir);
+VtFile*	vtfilecreate(VtFile*, int psize, int dsize, int dir);
+VtFile*	vtfilecreateroot(VtCache*, int psize, int dsize, int type);
+int	vtfileflush(VtFile*);
+int	vtfileflushbefore(VtFile*, u64int);
+u32int	vtfilegetdirsize(VtFile*);
+int	vtfilegetentry(VtFile*, VtEntry*);
+uvlong	vtfilegetsize(VtFile*);
+void	vtfileincref(VtFile*);
+int	vtfilelock2(VtFile*, VtFile*, int);
+int	vtfilelock(VtFile*, int);
+VtFile*	vtfileopen(VtFile*, u32int, int);
+VtFile*	vtfileopenroot(VtCache*, VtEntry*);
+long	vtfileread(VtFile*, void*, long, vlong);
+int	vtfileremove(VtFile*);
+int	vtfilesetdirsize(VtFile*, u32int);
+int	vtfilesetentry(VtFile*, VtEntry*);
+int	vtfilesetsize(VtFile*, u64int);
+int	vtfiletruncate(VtFile*);
+void	vtfileunlock(VtFile*);
+long	vtfilewrite(VtFile*, void*, long, vlong);
+
+int	vttimefmt(Fmt*);
+
+extern int chattyventi;
+extern int ventidoublechecksha1;
+extern int ventilogging;
+
+extern char *VtServerLog;

+ 158 - 0
sys/man/1/venti

@@ -0,0 +1,158 @@
+.TH VENTI 1
+.SH NAME
+read, write, copy \- simple Venti clients
+.SH SYNOPSIS
+.B venti/read
+[
+.B -h
+.I host
+]
+[
+.B -t
+.I type
+]
+.I score
+.br
+.B venti/write
+[
+.B -z
+]
+[
+.B -h
+.I host
+]
+[
+.B -t
+.I type
+]
+.br
+.B venti/copy
+[
+.B -fir
+]
+[
+.B -t
+.I type
+]
+.I srchost
+.I dsthost
+.I score
+.SH DESCRIPTION
+Venti is a SHA1-addressed block storage server.
+See 
+.IR venti (6)
+for a full introduction.
+.PP
+.I Read
+reads a block with the given
+.I score
+and numeric
+.I type 
+from the server
+.I host
+and prints the block to standard output.
+If the
+.B -h
+option is omitted,
+.I read
+consults the environment variable
+.B $venti
+for the name of the Venti server.
+If the
+.B -t
+option is omitted,
+.I read
+will try each type, one at a time, until it finds
+one that works.
+It prints the corresponding
+.B read
+.B -t
+command to standard error
+to indicate the type of the block.
+.PP
+.I Write
+writes at most 56 kilobytes of data from standard input 
+to the server
+.I host
+and prints the resulting score to standard output.
+If the
+.B -t
+option is omitted,
+.I write
+uses type 0,
+denoting a data block.
+If the
+.B -z
+option is given,
+.I write
+zero truncates the block before writing it to the server.
+.PP
+.I Copy
+expects
+.I score
+to be the score of a 
+.B VtRoot
+block.
+It copies the entire tree of blocks reachable from
+the root block from the server
+.I srchost
+to the server
+.IR dsthost .
+.PP
+The
+.B -f
+option causes
+.I copy
+to run in `fast' mode,
+assuming that if a block already exists on the
+destination Venti server, all its children also
+exist and need not be checked.
+.PP
+The
+.B -i
+and
+.B -r
+option control
+.IR copy 's
+reaction to errors reading
+from
+.IR srchost .
+.I Copy
+always prints information to standard error
+about each read error.
+By default,
+.I copy
+exits after printing the first error.
+If the
+.B -i
+option is given, read errors are ignored.
+This is dangerous behavior because it breaks the 
+assumption made by `fast' mode.
+If the
+.B -r
+option is given, 
+.I copy
+replaces pointers to unreadable blocks with
+pointers to the zero block.
+It writes the new root score to standard output.
+.SH SOURCE
+.B /sys/src/cmd/venti
+.SH SEE ALSO
+.IR vac (1),
+.IR venti (2),
+.IR vacfs (4),
+.IR venti (6),
+.IR venti (8),
+.IR venti-backup (8),
+.IR venti-fmt (8)
+.SH BUGS
+Currently
+.I venti/copy
+doesn't seem to copy the block corresponding to
+.IR score
+and possibly others, so use
+.I venti/copy
+for now.
+.PP
+There should be programs to read and write
+venti files and directories.

+ 246 - 0
sys/man/2/venti-cache

@@ -0,0 +1,246 @@
+.TH VENTI-CACHE 2
+.SH NAME
+VtBlock, VtCache, 
+vtblockcopy,
+vtblockdirty,
+vtblockduplock,
+vtblockput,
+vtblockwrite,
+vtcachealloc,
+vtcacheallocblock,
+vtcacheblocksize,
+vtcachefree,
+vtcacheglobal,
+vtcachelocal,
+vtcachesetwrite,
+vtglobaltolocal,
+vtlocaltoglobal \- Venti block cache
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLxxxx 'u
+.PP
+.ft L
+.nf
+typedef struct VtBlock
+{
+	uchar *data;
+	uchar type;
+	uchar score[VtScoreSize];
+	u32int addr;
+	...
+} VtBlock;
+.ta +\w'\fLVtBlock* 'u +\w'\fLxxxxxxxx'u
+.PP
+.B
+VtCache*	vtcachealloc(VtConn *z, int blocksize, ulong nblocks);
+.PP
+.B
+void	vtcachefree(VtCache *c);
+.PP
+.B
+u32int	vtcacheblocksize(VtCache *c);
+.PP
+.B
+u32int	vtglobaltolocal(uchar score[VtScoreSize])
+.br
+.B
+void	vtlocaltoglobal(u32int local, uchar score[VtScoreSize])
+.PP
+.B
+VtBlock*	vtcacheallocblock(VtCache *c, int type);
+.PP
+.B
+VtBlock*	vtcachelocal(VtCache *c, u32int addr, int type);
+.PP
+.B
+VtBlock*	vtcacheglobal(VtCache *c, uchar[VtScoreSize], int type);
+.PP
+.B
+void	vtblockput(VtBlock *b);
+.PP
+.B
+void	vtblockduplock(VtBlock *b);
+.PP
+.B
+int	vtblockwrite(VtBlock *b);
+.PP
+.B
+void	vtcachesetwrite(VtCache *c,
+.br
+.B
+	   int (*write)(VtConn*, uchar[VtScoreSize], uint, uchar*, int));
+.PP
+.B
+VtBlock*	vtblockcopy(VtBlock *b);
+.PP
+.B
+int	vtblockdirty(VtBlock *b);
+.SH DESCRIPTION
+These functions provide access to a simple in-memory
+cache of blocks already stored on a Venti server
+and blocks that will eventually be stored on a Venti server.
+.PP
+A 
+.B VtBlock
+represents a venti data block.
+Blocks stored on a venti server,
+called
+.IR "global blocks" ,
+are named by the SHA1 hash of their contents.
+This hash is recorded as the block's
+.IR score .
+Such blocks are immutable.
+The cache also stores mutable blocks that have not yet been
+written to a venti server.  These blocks are called
+.IR "local blocks" ,
+and have special scores that are 16 zero bytes
+followed by a 4-byte big-endian
+.IR address .
+The address is an index into the internal set of cache blocks.
+.PP
+The user-visible contents of a
+.B VtBlock
+are
+.BR data ,
+a pointer to the data;
+.BR type ,
+the venti block type;
+.BR score ,
+the block's score;
+and
+.BR addr ,
+the block's cache address.
+.PP
+.I Vtcachealloc
+allocates a new cache using the client connection
+.I z
+(see
+.IR venti-conn (2)
+and
+.IR venti-client (2)),
+with room for
+.I nblocks
+of maximum block size
+.I blocksize .
+.PP
+.I Vtcachefree
+frees a cache and all the associated blocks.
+.PP
+.I Vtcacheblocksize
+returns the cache's maximum block size.
+.PP
+.I Vtglobaltolocal
+returns the local address corresponding to the given
+local
+.IR score .
+If passed a global score,
+.I vtglobaltolocal
+returns the special constant
+.B NilBlock
+.RB ( ~0 ).
+.I Vtlocaltoglobal
+is the opposite, setting
+.I score
+to the local score for the cache address
+.IR local .
+.PP
+.I Vtcacheallocblock
+allocates a new local block with the given
+.IR type .
+.PP
+.I Vtcachelocal
+retrieves the local block at address
+.I addr
+from the cache.
+The given
+.I type
+must match the type of the block found at
+.IR addr .
+.PP
+.I Vtcacheglobal
+retrieves the block with the given
+.I score
+and
+.I dtype
+from the cache, consulting the Venti server
+if necessary.
+If passed a local score,
+.I vtcacheglobal
+invokes
+.I vtcachelocal
+appropriately.
+.PP
+The block references returned by
+.IR vtcacheallocblock ,
+.IR vtcachelocal ,
+and
+.I vtcacheglobal
+must be released when no longer needed.
+.I Vtblockput
+releases such a reference.
+.PP
+It is occasionally convenient to have multiple variables
+refer to the same block.
+.I Vtblockduplock
+increments the block's reference count so that
+an extra 
+.I vtblockput
+will be required in order to release the block.
+.PP
+.I Vtblockwrite
+writes a local block to the Venti server,
+changing the block to a global block.
+It calls the cache's
+.I write
+function
+to write the block to the server.
+The default
+.I write
+function is 
+.I vtwrite
+(see
+.IR venti-client (2));
+.I vtsetcachewrite
+sets it.
+.I Vtsetcachewrite
+is used by clients to install replacement functions 
+that run writes in the background or perform other
+additional processing.
+.PP
+.I Vtblockcopy
+copies a block in preparation for modifying its contents.
+The old block may be a local or global block, 
+but the new block will be a local block.
+.PP
+The cache only evicts global blocks.
+Local blocks can only leave the cache via
+.IR vtblockwrite ,
+which turns them into global blocks, making them candidates for
+eviction.
+.PP
+If a new cache block must be allocated (for
+.IR vtcacheallocblock ,
+.IR vtcachelocal ,
+.IR vtcacheglobal ,
+or
+.IR vtblockcopy ),
+but the cache is filled (with local blocks and blocks that
+have not yet been released with
+.IR vtblockput ),
+the library prints the score and reference count of
+every block in the cache and then aborts.
+A full cache indicates either that the cache is too small,
+or, more commonly, that cache blocks are being leaked.
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2),
+.IR venti-client (2),
+.IR venti-conn (2),
+.IR venti-file (2),
+.IR venti (6)

+ 190 - 0
sys/man/2/venti-client

@@ -0,0 +1,190 @@
+.TH VENTI-CLIENT 2
+.SH NAME
+vtconnect, vthello, vtread, vtwrite, vtreadpacket, vtwritepacket, vtsync, vtping, vtrpc, ventidoublechecksha1 \- Venti client
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLPacket* 'u +\w'\fLxxxxxxxx'u
+.PP
+.B
+Packet*	vtrpc(VtConn *z, Packet *p)
+.PP
+.B
+int	vthello(VtConn *z)
+.PP
+.B
+int	vtconnect(VtConn *z)
+.PP
+.B
+int	vtread(VtConn *z, uchar score[VtScoreSize],
+.br
+.B
+		uint type, uchar *buf, int n)
+.PP
+.B
+int	vtwrite(VtConn *z, uchar score[VtScoreSize],
+.br
+.B
+		uint type, uchar *buf, int n)
+.PP
+.B
+Packet*	vtreadpacket(VtConn *z, uchar score[VtScoreSize],
+.br
+.B
+		uint type, int n)
+.PP
+.B
+int	vtwritepacket(VtConn *z, uchar score[VtScoreSize],
+.br
+.B
+		uint type, Packet *p)
+.PP
+.B
+int	vtsync(VtConn *z)
+.PP
+.B
+int	vtping(VtConn *z)
+.PP
+.B
+extern int ventidoublechecksha1;  /* default 1 */
+.SH DESCRIPTION
+These routines execute the client side of the 
+.IR venti (6)
+protocol.
+.PP
+.I Vtrpc
+executes a single Venti RPC transaction, sending the request 
+packet
+.IR p
+and then waiting for and returning the response packet.
+.I Vtrpc
+will set the tag in the packet.
+.I Vtrpc
+frees
+.IR p ,
+even on error.
+.I Vtrpc
+is typically called only indirectly, via the functions below.
+.PP
+.I Vthello
+executes a
+.B hello
+transaction, setting
+.IB z ->sid
+to the name used by the server.
+.I Vthello
+is typically called only indirectly, via
+.IR vtconnect .
+.PP
+.I Vtconnect
+calls
+.I vtversion
+(see
+.IR venti-conn (2))
+and
+.IR vthello ,
+in that order, returning success only
+if both succeed.
+This sequence (calling 
+.I vtversion
+and then
+.IR vthello )
+must be done before the functions below can be called.
+.PP
+.I Vtread
+reads the block with the given
+.I score
+and
+.I type
+from the server,
+stores the returned data
+in memory at
+.IR buf ,
+and returns the number of bytes read.
+If the server's block has size larger than
+.IR n ,
+.I vtread 
+does not modify
+.I buf
+and
+returns an error.
+.PP
+.I Vtwrite
+writes the
+.I n
+bytes in 
+.I buf
+as a block of the given
+.IR type ,
+setting 
+.IR score .
+.PP
+.I Vtreadpacket
+and
+.I vtwritepacket
+are like
+.I vtread
+and
+.I vtwrite
+but return or accept the block contents in the
+form of a
+.BR Packet .
+They avoid making a copy of the data.
+.PP
+.I Vtsync
+causes the server to flush all pending write requests
+to disk before returning.
+.PP
+.I Vtping
+executes a ping transaction with the server.
+.PP
+By default,
+.I vtread
+and
+.I vtreadpacket
+check that the SHA1 hash of the returned data 
+matches the requested
+.IR score ,
+and
+.I vtwrite
+and
+.I vtwritepacket
+check that the returned
+.I score
+matches the SHA1 hash of the written data.
+Setting
+.I ventidoublechecksha1
+to zero disables these extra checks,
+mainly for benchmarking purposes.
+Doing so in production code is not recommended.
+.PP
+These functions can be called from multiple threads
+or procs simultaneously to issue requests 
+in parallel.
+Programs that issue requests from multiple threads
+in the same proc should start separate procs running
+.I vtsendproc
+and
+.I vtrecvproc
+as described in
+.IR venti-conn (2).
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2),
+.IR venti-conn (2),
+.IR venti-packet (2),
+.IR venti (6)
+.SH DIAGNOSTICS
+.I Vtrpc
+and
+.I vtpacket
+return nil on error.
+The other routines return \-1 on error.
+.PP
+.I Vtwrite
+returns 0 on success: there are no partial writes.

+ 200 - 0
sys/man/2/venti-conn

@@ -0,0 +1,200 @@
+.TH VENTI-CONN 2
+.SH NAME
+VtConn, vtconn, vtdial, vtfreeconn, vtsend, vtrecv, vtversion,
+vtdebug, vthangup \- Venti network connections
+.SH SYNOPSIS
+.PP
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.PP
+.ft L
+.nf
+.ta +\w'\fL    'u
+typedef struct VtConn {
+	int  debug;
+	char *version;
+	char *uid;
+	char *sid;
+	char addr[256];
+	...
+} VtConn;
+.PP
+.ta \w'\fLextern int 'u
+.B
+VtConn*	vtconn(int infd, int outfd)
+.PP
+.B
+VtConn*	vtdial(char *addr)
+.PP
+.B
+int	vtversion(VtConn *z)
+.PP
+.B
+int	vtsend(VtConn *z, Packet *p)
+.PP
+.B
+Packet*	vtrecv(VtConn *z)
+.PP
+.B
+void	vtrecvproc(void *z)
+.PP
+.B
+void	vtsendproc(void *z)
+.PP
+.B
+void	vtdebug(VtConn *z, char *fmt, ...)
+.PP
+.B
+void	vthangup(VtConn *z)
+.PP
+.B
+void	vtfreeconn(VtConn *z)
+.PP
+.B
+extern int	chattyventi;	/* default 0 */
+.SH DESCRIPTION
+A
+.B VtConn
+structure represents a connection to a Venti server
+(when used by a client) or to a client (when used by a server).
+It contains the following user-visible fields:
+.BR debug ,
+a flag enabling debugging prints;
+.BR version ,
+the protocol version in use;
+.BR uid ,
+the (unverified) name of the client;
+.BR sid ,
+the (unverified) name of the server;
+and
+.BR addr ,
+the network address of the remote side.
+.PP
+.I Vtconn
+initializes a new connection structure using file descriptors
+.I infd
+and
+.I outfd
+(which may be the same)
+for reading and writing.
+.I Vtdial
+dials the given network address
+(see
+.IR dial (2))
+and returns a corresponding connection.
+It returns nil if the connection cannot be established.
+.PP
+.I Vtversion
+exchanges version information with the remote side
+as described in
+.IR venti (6).
+The negotiated version is stored in
+.IB z ->version \fR.
+.PP
+.I Vtsend
+writes a packet
+(see
+.IR venti-packet (2))
+on the connection
+.IR z .
+The packet
+.IR p
+should be a formatted Venti message as might
+be returned by
+.IR vtfcallpack ;
+.I vtsend
+will add the two-byte length field
+(see
+.IR venti (6))
+at the begnning.
+.I Vtsend
+frees
+.IR p ,
+even on error.
+.PP
+.I Vtrecv
+reads a packet from the connection
+.IR z .
+Analogous to
+.IR vtsend ,
+the data read from the connection must start with
+a two-byte length, but the returned packet will omit them.
+.PP
+By default, 
+.I vtsend
+and
+.I vtrecv
+block until the packet can be written or read from the network.
+In a threaded program
+(see
+.IR thread (2)),
+this may not be desirable.
+If the caller arranges for
+.IR vtsendproc
+and
+.IR vtrecvproc
+to run in their own procs
+(typically by calling
+.IR proccreate ),
+then
+.I vtsend
+and
+.I vtrecv
+will yield the proc in which they are run
+to other threads when waiting on the network.
+The
+.B void*
+argument to
+.I vtsendproc
+and
+.I vtrecvproc
+must be the connection structure
+.IR z .
+.PP
+.I Vtdebug
+prints the formatted message to standard error
+when
+.IB z ->debug
+is set.  Otherwise it is a no-op.
+.PP
+.I Vthangup
+hangs up a connection.
+It closes the associated file descriptors
+and shuts down send and receive procs if they have been
+started.
+Future calls to
+.IR vtrecv
+or
+.IR vtsend
+will return errors.
+Additional calls to
+.I vthangup
+will have no effect.
+.PP
+.I Vtfreeconn
+frees the connection structure, hanging it up first
+if necessary.
+.PP
+If the global variable
+.I chattyventi
+is set, the library prints all Venti RPCs to standard error
+as they are sent or received.
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (1),
+.IR venti (2),
+.IR venti-client (2),
+.IR venti-packet (2),
+.IR venti-server (2),
+.IR venti (6)
+.SH DIAGNOSTICS
+Routines that return pointers return nil on error.
+Routines returning integers return 0 on success, \-1 on error.
+All routines set
+.I errstr
+on error.

+ 275 - 0
sys/man/2/venti-fcall

@@ -0,0 +1,275 @@
+.TH VENTI-FCALL 2
+.SH NAME
+VtEntry, VtFcall, VtRoot,
+vtentrypack,
+vtentryunpack,
+vtfcallclear,
+vtfcallfmt,
+vtfcallpack,
+vtfcallunpack,
+vtfromdisktype,
+vttodisktype,
+vtgetstring,
+vtputstring,
+vtrootpack,
+vtrootunpack,
+vtparsescore,
+vtscorefmt \- venti data formats
+.SH SYNOPSIS
+.PP
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLxxxx'u
+.PP
+.ft L
+.nf
+enum
+{
+	VtEntrySize = 40,
+	VtRootSize = 300,
+	VtScoreSize = 20,
+};
+.PP
+.ft L
+.nf
+typedef struct VtEntry
+{
+	ulong gen;    /* generation number */
+	ushort psize;   /* pointer block size */
+	ushort dsize;   /* data block size */
+	uchar type;
+	uchar flags;
+	uvlong size;
+	uchar score[VtScoreSize];
+} VtEntry;
+.PP
+.ft L
+.nf
+typedef struct VtRoot
+{
+	char name[128];
+	char type[128];
+	uchar score[VtScoreSize];  /* to a Dir block */
+	ushort blocksize;          /* maximum block size */
+	uchar prev[VtScoreSize];   /* previous root block */
+} VtRoot;
+.ta +\w'\fLPacket* 'u
+.PP
+.B
+void	vtentrypack(VtEntry *e, uchar *buf, int index)
+.br
+.B
+int	vtentryunpack(VtEntry *e, uchar *buf, int index)
+.PP
+.B
+Packet*	vtfcallpack(VtFcall *f)
+.br
+.B
+int	vtfcallunpack(VtFcall *f, Packet *p)
+.PP
+.B
+void vtfcallclear(VtFcall *f)
+.PP
+.B
+uint	vttodisktype(uint type)
+.br
+.B
+uint	vtfromdisktype(uint type)
+.PP
+.B
+int	vtputstring(Packet *p, char *s)
+.br
+.B
+int	vtgetstring(Packet *p, char **s)
+.PP
+.B
+void	vtrootpack(VtRoot *r, uchar *buf)
+.br
+.B
+int	vtrootunpack(VtRoot *r, uchar *buf)
+.PP
+.B
+int	vtparsescore(char *s, char **prefix, uchar score[VtScoreSize])
+.PP
+.B
+int	vtfcallfmt(Fmt *fmt)
+.B
+int	vtscorefmt(Fmt *fmt)
+.SH DESCRIPTION
+These routines convert between C representations of Venti
+structures and serialized representations used on disk and
+on the network.
+.PP
+.I Vtentrypack
+converts a
+.B VtEntry
+structure describing a Venti file
+(see
+.IR venti (6))
+into a 40-byte
+.RB ( VtEntrySize )
+structure at
+.IB buf + index *40 \fR.
+Vtentryunpack
+does the reverse conversion.
+.PP
+.I Vtfcallpack
+converts a
+.B VtFcall
+structure describing a Venti protocol message
+(see
+.IR venti (6))
+into a packet.
+.I Vtfcallunpack
+does the reverse conversion.
+.PP
+The fields in a
+.B VtFcall
+are named after the protocol fields described in
+.IR venti (6),
+except that the
+.B type
+field is renamed
+.BR blocktype .
+The
+.B msgtype
+field holds the one-byte message type:
+.BR VtThello ,
+.BR VtRhello ,
+and so on.
+.PP
+.I Vtfcallclear
+frees the strings
+.IB f ->error \fR,
+.IB f ->version \fR,
+.IB f ->uid \fR,
+.IB f ->sid \fR,
+the buffers
+.IB f ->crypto
+and
+.IB f ->codec \fR,
+and the packet
+.IB f ->data \fR.
+.PP
+The block type enumeration defined in
+.B <venti.h>
+(presented in 
+.IR venti (6))
+differs from the one used on disk and in the network
+protocol.
+The disk and network representation uses different
+constants and does not distinguish between
+.BI VtDataType+ n
+and
+.BI VtDirType+ n
+blocks.
+.I Vttodisktype
+converts a
+.B <venti.h>
+enumeration value to the disk value;
+.I vtfromdisktype
+converts a disk value to the enumeration value,
+always using the
+.B VtDirType
+pointers.
+The
+.B VtFcall
+field
+.B blocktype
+is an enumeration value
+.RI ( vtfcallpack
+and
+.I vtfcallunpack
+convert to and from the disk values used in packets
+automatically),
+so most programs will not need to call these functions.
+.PP
+.I Vtputstring
+appends the Venti protocol representation of the string
+.I s
+to the packet
+.IR p .
+.I Vtgetstring
+reads a string from the packet, returning a pointer to a copy
+of the string in
+.BI * s \fR.
+The copy must be freed by the caller.
+These functions are used by
+.I vtfcallpack
+and
+.IR vtfcallunpack ;
+most programs will not need to call them directly.
+.PP
+.I Vtrootpack
+converts a
+.B VtRoot
+structure describing a Venti file tree
+into the 300-byte 
+.RB ( VtRootSize )
+buffer pointed to by
+.IR buf .
+.I Vtrootunpack does the reverse conversion.
+.PP
+.I Vtparsescore
+parses the 40-digit hexadecimal string
+.IR s ,
+writing its value
+into
+.IR score .
+If the hexadecimal string is prefixed with
+a text label followed by a colon, a copy of that
+label is returned in
+.BI * prefix \fR.
+If
+.I prefix
+is nil, the label is ignored.
+.PP
+.I Vtfcallfmt
+and
+.I vtscorefmt
+are
+.IR print (2)
+formatters to print
+.B VtFcall
+structures and scores.
+.I Vtfcallfmt
+assumes that
+.I vtscorefmt
+is installed as
+.BR %V .
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (1),
+.IR venti (2),
+.IR venti (6)
+.SH DIAGNOSTICS
+.IR Vtentrypack ,
+.IR vtfcallpack ,
+.IR vtrootpack ,
+and
+.I vtfcallclear
+cannot fail.
+.PP
+.IR Vtentryunpack ,
+.IR vtrootunpack ,
+.IR vtputstring ,
+.IR vtgetstring ,
+and
+.I vtparsescore
+return 0 on success, \-1 on error.
+.PP
+.I Vtfcallpack
+returns a packet on success, nil on error.
+.PP
+.I Vttodisktype
+and
+.I vtfromdisktype
+return
+.B VtCorruptType
+(255)
+when presented with invalid input.

+ 325 - 0
sys/man/2/venti-file

@@ -0,0 +1,325 @@
+.TH VENTI-FILE 2
+.SH NAME
+VtFile,
+vtfileblock,
+vtfileblockscore,
+vtfileclose,
+vtfilecreate,
+vtfilecreateroot,
+vtfileflush,
+vtfileflushbefore,
+vtfilegetdirsize,
+vtfilegetentry,
+vtfilegetsize,
+vtfileincref,
+vtfilelock,
+vtfilelock2,
+vtfileopen,
+vtfileopenroot,
+vtfileread, 
+vtfileremove,
+vtfilesetdirsize,
+vtfilesetentry,
+vtfilesetsize,
+vtfiletruncate,
+vtfileunlock,
+vtfilewrite \- Venti files
+.SH SYNOPSIS
+.ta +\w'\fLVtBlock* 'u
+.PP
+.B
+VtFile*	vtfilecreateroot(VtCache *c, int psize, int dsize, int type);
+.PP
+.B
+VtFile*	vtfileopenroot(VtCache *c, VtEntry *e);
+.PP
+.B
+VtFile*	vtfileopen(VtFile *f, u32int n, int mode);
+.PP
+.B
+VtFile*	vtfilecreate(VtFile *f, int psize, int dsize, int type);
+.PP
+.B
+void	vtfileincref(VtFile *f);
+.PP
+.B
+void	vtfileclose(VtFile *f);
+.PP
+.B
+int	vtfileremove(VtFile *f);
+.PP
+.B
+VtBlock*	vtfileblock(VtFile *f, u32int n, int mode);
+.PP
+.B
+long	vtfileread(VtFile *f, void *buf, long n, vlong offset);
+.PP
+.B
+long	vtfilewrite(VtFile *f, void *buf, long n, vlong offset);
+.PP
+.B
+int	vtfileflush(VtFile *f);
+.PP
+.B
+int	vtfileflushbefore(VtFile *f, vlong offset);
+.PP
+.B
+int	vtfiletruncate(VtFile *f);
+.PP
+.B
+uvlong	vtfilegetsize(VtFile *f);
+.PP
+.B
+int	vtfilesetsize(VtFile *f, vlong size);
+.PP
+.B
+u32int	vtfilegetdirsize(VtFile *f);
+.PP
+.B
+int	vtfilesetdirsize(VtFile *f, u32int size);
+.PP
+.B
+int	vtfilegetentry(VtFile *f, VtEntry *e);
+.PP
+.B
+int	vtfilesetentry(VtFile *f, VtEntry *e);
+.PP
+.B
+int	vtfileblockscore(VtFile *f, u32int n, 
+	    uchar score[VtScoreSize]);
+.PP
+.B
+int	vtfilelock(VtFile *f, int mode);
+.PP
+.B
+int	vtfilelock2(VtFile *f, VtFile *f, int mode);
+.PP
+.B
+void	vtfileunlock(VtFile *f);
+.SH DESCRIPTION
+These routines provide a simple interface to create and
+manipulate Venti file trees (see
+.IR venti (6)).
+.PP
+.I Vtfilecreateroot
+creates a new Venti file.
+.I Type
+must be either
+.B VtDataType
+or
+.BR VtDirType ,
+specifying a data or directory file.
+.I Dsize
+is the block size to use for leaf (data or directory) blocks in the hash tree;
+.I psize
+is the block size to use for internal (pointer) blocks.
+.PP
+.I Vtfileopenroot
+opens an existing Venti file described by
+.IR e .
+.PP
+.I Vtfileopen
+opens the Venti file described by the
+.IR n th
+entry in the directory
+.IR f .
+.I Mode
+should be one of
+.BR VtOREAD ,
+.BR VtOWRITE ,
+or
+.BR VtORDWR ,
+indicating how the returned file is to be used.
+The
+.BR VtOWRITE
+and
+.BR VtORDWR
+modes can only be used if
+.IR f
+is open with mode
+.BR VtORDWR .
+.PP
+.I Vtfilecreate
+creates a new file in the directory
+.I f
+with block type
+.I type
+and block sizes
+.I dsize
+and
+.I psize
+(see
+.I vtfilecreateroot
+above).
+.PP
+Each file has an associated reference count
+and holds a reference to its parent in the file tree.
+.I Vtfileincref
+increments this reference count.
+.I Vtfileclose
+decrements the reference count.
+If there are no other references,
+.I vtfileclose
+releases the reference to
+.IR f 's
+parent and then frees the in-memory structure
+.IR f .
+The data stored in 
+.I f
+is still accessible by reopening it.
+.PP
+.I Vtfileremove
+removes the file
+.I f
+from its parent directory.
+It also acts as 
+.IR vtfileclose ,
+releasing the reference to
+.I f
+and potentially freeing the structure.
+.PP
+.I Vtfileblock
+returns the
+.IR n th
+block in the file
+.IR f .
+If there are not 
+.I n
+blocks in the file and
+.I mode
+is 
+.BR VtOREAD ,
+.I vtfileblock
+returns nil.
+If the mode is
+.B VtOWRITE
+or
+.BR VtORDWR ,
+.I vtfileblock
+grows the file as needed and then returns the block.
+.PP
+.I Vtfileread
+reads at most
+.I n
+bytes at offset
+.I offset
+from
+.I f
+into memory at
+.IR buf .
+It returns the number of bytes read.
+.PP
+.I Vtfilewrite
+writes the 
+.I n
+bytes in memory at
+.I buf
+into the file
+.I f
+at offset 
+.IR n .
+It returns the number of bytes written,
+or \-1 on error.
+Writing fewer bytes than requested will only happen
+if an error is encountered.
+.PP
+.I Vtfilewrite
+writes to an in-memory copy of the data blocks
+(see
+.IR venti-cache (2))
+instead of writing directly to Venti.
+.I Vtfileflush
+writes all copied blocks associated with 
+.I f
+to the Venti server.
+.I Vtfileflushbefore
+flushes only those blocks corresponding to data in the file before
+byte
+.IR offset .
+Loops that
+.I vtfilewrite
+should call
+.I vtfileflushbefore
+regularly to avoid filling the block cache with unwritten blocks.
+.PP
+.I Vtfiletruncate
+changes the file
+.I f
+to have zero length.
+.PP
+.I Vtfilegetsize
+returns the length (in bytes) of file
+.IR f .
+.PP
+.I Vtfilesetsize
+sets the length (in bytes) of file
+.IR f .
+.PP
+.I Vtfilegetdirsize
+returns the length (in directory entries)
+of the directory
+.IR f .
+.PP
+.I Vtfilesetdirsize
+sets the length (in directory entries)
+of the directory
+.IR f .
+.PP
+.I Vtfilegetentry
+fills
+.I e
+with an entry that can be passed to
+.IR vtfileopenroot
+to reopen
+.I f
+at a later time.
+.PP
+.I Vtfilesetentry
+sets the entry associated with
+.I f
+to be
+.IR e .
+.PP
+.I Vtfileblockscore
+returns in
+.I score
+the score of the
+.IR n th
+block in the file
+.IR f .
+.PP
+Venti files are locked and unlocked
+via
+.I vtfilelock
+and
+.I vtfileunlock
+to moderate concurrent access.
+Only one thread at a time\(emthe one that has the file locked\(emcan
+read or modify the file.
+The functions that return files
+.RI ( vtfilecreateroot ,
+.IR vtfileopenroot ,
+.IR vtfilecreate ,
+and
+.IR vtfileopen )
+return them unlocked.
+When files are passed to any of the functions documented in 
+this manual page, it is the caller's responsibility to ensure that
+they are already locked.
+.PP
+Internally, a file is locked by locking the
+block that contains its directory entry.
+When two files in the same
+directory both need to be locked,
+.I vtfilelock2
+must be used.
+It locks both its arguments, taking special care
+not to deadlock if their entries are stored
+in the same directory block.
+.SH SOURCE
+.B /sys/src/libventi/file.c
+.SH SEE ALSO
+.IR venti-cache (2),
+.IR venti-conn (2),
+.IR venti-client (2),
+.IR venti (6)

+ 136 - 0
sys/man/2/venti-log

@@ -0,0 +1,136 @@
+.TH VENTI-LOG 2
+.SH NAME
+VtLog,
+VtLogChunk, 
+vtlog,
+vtlogclose,
+vtlogdump,
+vtlognames,
+vtlogopen,
+vtlogprint,
+vtlogremove,
+vtlogopen,
+ventilogging \- Venti logs
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLVtLog* 'u
+.PP
+.B
+VtLog*	vtlogopen(char *name, uint size);
+.PP
+.B
+void	vtlogprint(VtLog *log, char *fmt, ...);
+.PP
+.B
+void	vtlogclose(VtLog *log);
+.PP
+.B
+void	vtlog(char *name, char *fmt, ...);
+.PP
+.B
+void	vtlogremove(char *name);
+.PP
+.B
+char**	vtlognames(int *n);
+.PP
+.B
+void	vtlogdump(int fd, VtLog *log);
+.PP
+.B
+extern int ventilogging;    /* default 0 */
+.PP
+.B
+extern char *VtServerLog;    /* "libventi/server" */
+.SH DESCRIPTION
+These routines provide an in-memory circular log
+structure used by the Venti library and the Venti server
+to record events for debugging purposes.
+The logs are named by UTF strings.
+.PP
+.I Vtlogopen
+returns a reference to the log with the given
+.I name .
+If a log with that name does not exist and
+.I size 
+is non-zero,
+.I vtlogopen
+creates a new log capable of holding at
+least
+.I size
+bytes and returns it.
+.I Vtlogclose
+releases the reference returned by
+.IR vtlogopen .
+.PP
+.I Vtlogprint
+writes to
+.IR log ,
+which must be open.
+.PP
+.I Vtlog
+is a convenient packaging of
+.I vtlogopen
+followed by
+.I vtlogprint
+and
+.IR vtlogclose .
+.PP
+.I Vtlogremove
+removes the log with the given
+.IR name ,
+freeing any associated storage.
+.PP
+.I Vtlognames
+returns a list of the names of all the logs.
+The length of the list is returned in
+.BI * n \fR.
+The list
+should be freed
+by calling
+.I vtfree
+on the returned pointer.
+The strings in the list will be freed by this call as well.
+(It is an error to call
+.I vtfree
+on any of the strings in the list.)
+.PP
+.I Vtlogdump
+prints
+.IR log ,
+which must be open, to the file descriptor
+.IR fd .
+.PP
+If
+.I ventilogging
+is set to zero (the default),
+.I vtlognames
+and
+.I vtlogdump
+can inspect existing logs, but
+.I vtlogopen
+always returns nil
+and
+.I vtlog
+is a no-op.
+The other functions are no-ops when
+passed nil log structures.
+.PP
+The server library
+(see
+.IR venti-conn (2)
+and
+.IR venti-server (2))
+writes debugging information to the log named
+.IR VtServerLog ,
+which defaults to the string
+.RB ` libventi/server '.
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2),
+.IR venti (8)

+ 66 - 0
sys/man/2/venti-mem

@@ -0,0 +1,66 @@
+.TH VENTI-MEM 2
+.SH NAME
+vtbrk,
+vtmalloc,
+vtmallocz,
+vtrealloc,
+vtstrdup,
+vtfree \- error-checking memory allocators
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLvoid* 'u
+.PP
+.B
+void*	vtbrk(int size)
+.PP
+.B
+void*	vtmalloc(int size)
+.PP
+.B
+void*	vtmallocz(int size)
+.PP
+.B
+void*	vtrealloc(void *ptr, int size)
+.PP
+.B 
+char*	vtstrdup(char *s)
+.PP
+.B
+void	vtfree(void *ptr)
+.SH DESCRIPTION
+These routines allocate and free memory.
+On failure, they print an error message and call
+.IR sysfatal (2).
+They do not return.
+.PP
+.I Vtbrk
+returns a pointer to a new, permanently allocated block of at least
+.I size
+bytes.
+.PP
+.IR Vtmalloc ,
+.IR vtrealloc ,
+and
+.I vtstrdup
+are like
+.IR malloc ,
+.IR realloc ,
+and
+.IR strdup ,
+but, as noted above, do not return on error.
+.I Vtmallocz
+is like
+.I vtmalloc
+but zeros the block before returning it.
+Memory allocated with all four should be freed with
+.I vtfree
+when no longer needed.
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2)

+ 281 - 0
sys/man/2/venti-packet

@@ -0,0 +1,281 @@
+.TH VENTI-PACKET 2
+.SH NAME
+Packet,
+packetalloc,
+packetappend,
+packetasize,
+packetcmp,
+packetconcat,
+packetconsume,
+packetcopy,
+packetdup,
+packetforeign,
+packetfragments,
+packetfree,
+packetheader,
+packetpeek,
+packetprefix,
+packetsha1,
+packetsize,
+packetsplit,
+packetstats,
+packettrailer,
+packettrim \- zero-copy network buffers
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLPacket* 'u +\w'\fLxxxx'u
+.PP
+.B
+.PP
+.B
+Packet*	packetalloc(void);
+.PP
+.B
+void	packetappend(Packet *p, uchar *buf, int n)
+.PP
+.B
+uint	packetasize(Packet *p)
+.PP
+.B
+int	packetcmp(Packet *p, Packet *q)
+.PP
+.B
+void	packetconcat(Packet *p, Packet *q)
+.PP
+.B
+int	packetconsume(Packet *p, uchar *buf, int n)
+.PP
+.B
+int	packetcopy(Packet *p, uchar *buf, int offset, int n)
+.PP
+.B
+Packet*	packetdup(Packet *p, int offset, int n)
+.PP
+.B
+Packet*	packetforeign(uchar *buf, int n,
+.br
+.B
+		void (*free)(void *a), void *a)
+.PP
+.B
+int	packetfragments(Packet *p, IOchunk *io, int nio,
+.br
+.B
+		int offset)
+.PP
+.B
+void	packetfree(Packet *p)
+.PP
+.B
+uchar*	packetheader(Packet *p, int n)
+.PP
+.B
+uchar*	packetpeek(Packet *p, uchar *buf, int offset, int n)
+.PP
+.B
+void	packetprefix(Packet *p, uchar *buf, int n)
+.PP
+.B
+void	packetsha1(Packet *p, uchar sha1[20])
+.PP
+.B
+uint	packetsize(Packet *p)
+.PP
+.B
+Packet*	packetsplit(Packet *p, int n)
+.PP
+.B
+void	packetstats(void)
+.PP
+.B
+uchar*	packettrailer(Packet *p, int n)
+.PP
+.B
+int	packettrim(Packet *p, int offset, int n)
+.SH DESCRIPTION
+A
+.B Packet
+is a chain of blocks of data.
+Each block, called a fragment,
+is contiguous in memory, but the entire packet
+may not be.
+This representation helps avoid unnecessary memory copies.
+.PP
+.I Packetalloc
+allocates an empty packet.
+.PP
+.I Packetappend
+appends the
+.I n
+bytes at
+.I buf
+to the end of
+.IR p .
+.PP
+.I Packetasize
+returns the number of data bytes allocated to
+.IR p .
+This may be larger than the number of bytes stored
+in
+.IR p
+because fragments may not be filled completely.
+.PP
+.I Packetcmp
+compares the data sections of two packets as
+.I memcmp
+(see
+.IR memory (2))
+would.
+.PP
+.I Packetconcat
+removes all data from
+.IR q ,
+appending it to
+.IR p .
+.PP
+.I Packetconsume
+removes
+.I n
+bytes from the beginning of 
+.IR p ,
+storing them into
+.IR buf .
+.PP
+.I Packetcopy
+copies
+.I n
+bytes at
+.I offset
+in
+.I p
+to
+.IR buf .
+.PP
+.I Packetdup
+creates a new packet initialized with
+.I n
+bytes from
+.I offset
+in
+.IR p .
+.PP
+.I Packetforeign
+allocates a packet containing `foreign' data: the
+.I n
+bytes pointed to by
+.IR buf .
+Once the bytes are no longer needed, they are freed by calling
+.IB free ( a )\fR.
+.PP
+.I Packetfragments
+initializes up to
+.I nio
+of the
+.I io
+structures with pointers to the data in
+.IR p ,
+starting at
+.IR offset .
+It returns the total number of bytes represented
+by the returned structures.
+.I Packetfragments
+initializes any unused
+.I io
+structures with nil pointer and zero length.
+.PP
+.I Packetfree
+frees the packet
+.IR p .
+.PP
+.I Packetheader
+returns a pointer to the first
+.I n
+bytes of 
+.IR p ,
+making them contiguous in memory
+if necessary.
+.PP
+.I Packetpeek
+returns a pointer to the
+.I n
+bytes at
+.I offset
+in
+.IR p .
+If the requested bytes are already stored contiguously in memory,
+the returned pointer points at the internal data storage for
+.IR p .
+Otherwise, the bytes are copied into
+.IR buf ,
+and
+.I packetpeek
+returns
+.IR buf .
+.PP
+.I Packetprefix
+inserts a copy of the
+.I n
+bytes at
+.I buf
+at the beginning of
+.IR p .
+.PP
+.I Packetsha1
+computes the SHA1 hash of the data contained in
+.IR p .
+.PP
+.I Packetsize
+returns the length, in bytes, of the data contained in
+.IR p .
+.PP
+.I Packetsplit
+returns a new packet initialized with
+.I n
+bytes removed from the beginning of 
+.IR p .
+.PP
+.I Packetstats
+prints run-time statistics to standard output.
+.PP
+.I Packettrailer
+returns a pointer to the last
+.I n
+bytes of
+.IR p ,
+making them contiguous in memory
+if necessary.
+.PP
+.I Packettrim
+deletes all bytes from the packet
+.I p
+except the
+.I n
+bytes at offset
+.IR offset .
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2)
+.SH DIAGNOSTICS
+These functions return errors only when passed
+invalid inputs,
+.IR e.g. ,
+requests for data at negative offsets or beyond the end of a packet.
+.PP
+Functions returning pointers return nil on error;
+functions returning integers return \-1 on error.
+Most functions returning integers return 0 on success.
+The exceptions are
+.I packetfragments
+and
+.IR packetcmp ,
+whose return values are described above.
+.PP
+When these functions run out of memory, they
+print error messages and call
+.IR sysfatal .

+ 122 - 0
sys/man/2/venti-server

@@ -0,0 +1,122 @@
+.TH VENTI-SERVER 2
+.SH NAME
+vtsrvhello, vtlisten, vtgetreq, vtrespond \- Venti server
+.SH SYNOPSIS
+.PP
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLVtReq* 'u
+.PP
+.ft L
+.nf
+typedef struct VtReq
+{
+	VtFcall tx;
+	VtFcall rx;
+	...
+} VtReq;
+.PP
+.B
+int	vtsrvhello(VtConn *z)
+.PP
+.B
+VtSrv*	vtlisten(char *addr)
+.PP
+.B
+VtReq*	vtgetreq(VtSrv *srv)
+.PP
+.B
+void	vtrespond(VtReq *req)
+.SH DESCRIPTION
+These routines execute the server side of the
+.IR venti (6)
+protocol.
+.PP
+.I Vtsrvhello
+executes the server side of the initial
+.B hello
+transaction.
+It sets
+.IB z -> uid
+with the user name claimed by the other side.
+Each new connection must be initialized by running
+.I vtversion
+and then
+.IR vtsrvhello .
+The framework below takes care of this detail automatically;
+.I vtsrvhello
+is provided for programs that do not use the functions below.
+.PP
+.IR Vtlisten ,
+.IR vtgetreq ,
+and
+.I vtrespond
+provide a simple framework for writing Venti servers.
+.PP
+.I Vtlisten
+announces at the network address
+.IR addr ,
+returning a fresh
+.B VtSrv
+structure representing the service.
+.PP
+.I Vtgetreq
+waits for and returns
+the next 
+.BR read ,
+.BR write ,
+.BR sync ,
+or
+.B ping
+request from any client connected to
+the service
+.IR srv .
+.B Hello
+and
+.B goodbye
+messages are handled internally and not returned to the client.
+The interface does not distinguish between the
+different clients that may be connected at any given time.
+The request can be found in the
+.I tx
+field of the returned
+.BR VtReq .
+.PP
+Once a request has been served and a response stored in
+.IB r ->rx \fR,
+the server should call
+.IR vtrespond
+to send the response to the client.
+.I Vtrespond
+frees the structure
+.I r
+as well as the packets
+.IB r ->tx.data
+and
+.IB r ->rx.data \fR.
+.SH EXAMPLE
+.B /sys/src/venti/cmd
+contains two simple Venti servers 
+.B ro.c
+and
+.B devnull.c
+written using these routines.
+.I Ro
+is a read-only Venti proxy (it rejects
+.B write
+requests).
+.I Devnull
+is a dangerous write-only Venti server: it discards all
+blocks written to it and returns error on all reads.
+.SH SOURCE
+.B /sys/src/libventi
+.SH SEE ALSO
+.IR venti (2),
+.IR venti-conn (2),
+.IR venti-packet (2),
+.IR venti (6),
+.IR venti (8)

+ 56 - 0
sys/man/2/venti-zero

@@ -0,0 +1,56 @@
+.TH VENTI-ZERO 2
+.SH NAME
+vtzerotruncate, vtzeroextend, vtzeroscore \- Venti block truncation
+.SH SYNOPSIS
+.ft L
+#include <u.h>
+.br
+#include <libc.h>
+.br
+#include <venti.h>
+.ta +\w'\fLuint 'u
+.PP
+.B
+uint	vtzerotruncate(int type, uchar *buf, uint size)
+.PP
+.B
+void	vtzeroextend(int type, uchar *buf, uint size, uint newsize)
+.PP
+.B
+extern uchar vtzeroscore[VtScoreSize];
+.SH DESCRIPTION
+These utility functions compute how to truncate or replace
+trailing zeros (for data blocks) or trailing zero scores
+(for pointer blocks) to canonicalize the blocks before
+storing them to Venti.
+.PP
+.I Vtzerotruncate
+returns the size of the 
+.IR size -byte
+buffer pointed to by
+.I buf
+ignoring trailing zeros or zero scores,
+according to the given
+.IR type .
+.PP
+.I Vtzeroextend
+pads
+.I buf
+with zeros or zero scores,
+according to the given
+.IR type ,
+to grow it from
+.I size
+bytes to
+.I newsize
+bytes.
+.PP
+.I Vtzeroscore
+is the score of the zero-length block.
+.SH SOURCE
+.B /sys/src/libventi/zero.c
+.br
+.B /sys/src/libventi/zeroscore.c
+.SH SEE ALSO
+.IR venti (2),
+.IR venti (6)

+ 3 - 0
sys/man/4/cwfs

@@ -74,6 +74,9 @@ The device name is in the usual
 notation of
 .IR fsconfig (8).
 The replacement can be the name of an existing file
+(which
+.I cwfs
+will not grow)
 or another such device name.
 For example, the file
 .RS

+ 451 - 0
sys/man/6/venti

@@ -0,0 +1,451 @@
+.TH VENTI 6
+.SH NAME
+venti \- archival storage server
+.SH DESCRIPTION
+Venti is a block storage server intended for archival data.
+In a Venti server, the SHA1 hash of a block's contents acts
+as the block identifier for read and write operations.
+This approach enforces a write-once policy, preventing
+accidental or malicious destruction of data.  In addition,
+duplicate copies of a block are coalesced, reducing the
+consumption of storage and simplifying the implementation
+of clients.
+.PP
+This manual page documents the basic concepts of
+block storage using Venti as well as the Venti network protocol.
+.PP
+.IR Venti (1)
+documents some simple clients.
+.IR Vac (1)
+and
+.IR vacfs (4)
+are more complex clients.
+.PP
+.IR Venti (2)
+describes a C library interface for accessing
+Venti servers and manipulating Venti data structures.
+.PP
+.IR Venti (8)
+describes the programs used to run a Venti server.
+.PP
+.SS "Scores
+The SHA1 hash that identifies a block is called its
+.IR score .
+The score of the zero-length block is called the
+.IR "zero score" .
+.PP
+Scores may have an optional 
+.IB label :
+prefix, typically used to
+describe the format of the data.
+For example, 
+.IR vac (1)
+uses a
+.B vac:
+prefix, while
+.IR vbackup (8)
+uses prefixes corresponding to the file system
+types: 
+.BR ext2: ,
+.BR ffs: ,
+and so on.
+.SS "Files and Directories
+Venti accepts blocks up to 56 kilobytes in size.  
+By covention, Venti clients use hash trees of blocks to
+represent arbitrary-size data
+.IR files .
+The data to be stored is split into fixed-size
+blocks and written to the server, producing a list
+of scores.
+The resulting list of scores is split into fixed-size pointer
+blocks (using only an integral number of scores per block)
+and written to the server, producing a smaller list
+of scores.
+The process continues, eventually ending with the
+score for the hash tree's top-most block.
+Each file stored this way is summarized by
+a
+.B VtEntry
+structure recording the top-most score, the depth
+of the tree, the data block size, and the pointer block size.
+One or more 
+.B VtEntry
+structures can be concatenated
+and stored as a special file called a
+.IR directory .
+In this
+manner, arbitrary trees of files can be constructed
+and stored.
+.PP
+Scores passed between programs coventionally refer
+to
+.B VtRoot
+blocks, which contain descriptive information
+as well as the score of a directory block containing a small number
+of directory entries.
+.PP
+Coventionally, programs do not mix data and directory entries
+in the same file.  Instead, they keep two separate files, one with
+directory entries and one with metadata referencing those
+entries by position.
+Keeping this parallel representation is a minor annoyance
+but makes it possible for general programs like
+.I venti/copy
+(see
+.IR venti (1))
+to traverse the block tree without knowing the specific details
+of any particular program's data.
+.SS "Block Types
+To allow programs to traverse these structures without
+needing to understand their higher-level meanings,
+Venti tags each block with a type.  The types are:
+.PP
+.nf
+.ft L
+    VtDataType     000  \f1data\fL
+    VtDataType+1   001  \fRscores of \fPVtDataType\fR blocks\fL
+    VtDataType+2   002  \fRscores of \fPVtDataType+1\fR blocks\fL
+    \fR\&...\fL
+    VtDirType      010  VtEntry\fR structures\fL
+    VtDirType+1    011  \fRscores of \fLVtDirType\fR blocks\fL
+    VtDirType+2    012  \fRscores of \fLVtDirType+1\fR blocks\fL
+    \fR\&...\fL
+    VtRootType     020  VtRoot\fR structure\fL
+.fi
+.PP
+The octal numbers listed are the type numbers used
+by the commands below.
+(For historical reasons, the type numbers used on
+disk and on the wire are different from the above.
+They do not distinguish
+.BI VtDataType+ n
+blocks from
+.BI VtDirType+ n
+blocks.)
+.SS "Zero Truncation
+To avoid storing the same short data blocks padded with
+differing numbers of zeros, Venti clients working with fixed-size
+blocks coventionally
+`zero truncate' the blocks before writing them to the server.
+For example, if a 1024-byte data block contains the 
+11-byte string 
+.RB ` hello " " world '
+followed by 1013 zero bytes,
+a client would store only the 11-byte block.
+When the client later read the block from the server,
+it would append zero bytes to the end as necessary to
+reach the expected size.
+.PP
+When truncating pointer blocks
+.RB ( VtDataType+ \fIn
+and
+.BI VtDirType+ n
+blocks),
+trailing zero scores are removed
+instead of trailing zero bytes.
+.PP
+Because of the truncation covention,
+any file consisting entirely of zero bytes,
+no matter what its length, will be represented by the zero score:
+the data blocks contain all zeros and are thus truncated
+to the empty block, and the pointer blocks contain all zero scores
+and are thus also truncated to the empty block, 
+and so on up the hash tree.
+.SS Network Protocol
+A Venti session begins when a
+.I client
+connects to the network address served by a Venti
+.IR server ;
+the coventional address is 
+.BI tcp! server !venti
+(the
+.B venti
+port is 17034).
+Both client and server begin by sending a version
+string of the form
+.BI venti- versions - comment \en \fR.
+The
+.I versions
+field is a list of acceptable versions separated by
+colons.
+The protocol described here is version
+.BR 02 .
+The client is responsible for choosing a common
+version and sending it in the
+.B VtThello
+message, described below.
+.PP
+After the initial version exchange, the client transmits
+.I requests
+.RI ( T-messages )
+to the server, which subsequently returns
+.I replies
+.RI ( R-messages )
+to the client.
+The combined act of transmitting (receiving) a request
+of a particular type, and receiving (transmitting) its reply
+is called a
+.I transaction
+of that type.
+.PP
+Each message consists of a sequence of bytes.
+Two-byte fields hold unsigned integers represented
+in big-endian order (most significant byte first).
+Data items of variable lengths are represented by
+a one-byte field specifying a count,
+.IR n ,
+followed by
+.I n
+bytes of data.
+Text strings are represented similarly,
+using a two-byte count with
+the text itself stored as a UTF-encoded sequence
+of Unicode characters (see
+.IR utf (6)).
+Text strings are not
+.SM NUL\c
+-terminated:
+.I n
+counts the bytes of UTF data, which include no final
+zero byte.
+The
+.SM NUL
+character is illegal in text strings in the Venti protocol.
+The maximum string length in Venti is 1024 bytes.
+.PP
+Each Venti message begins with a two-byte size field 
+specifying the length in bytes of the message,
+not including the length field itself.
+The next byte is the message type, one of the constants
+in the enumeration in the include file
+.BR <venti.h> .
+The next byte is an identifying
+.IR tag ,
+used to match responses to requests.
+The remaining bytes are parameters of different sizes.
+In the message descriptions, the number of bytes in a field
+is given in brackets after the field name.
+The notation
+.IR parameter [ n ]
+where
+.I n
+is not a constant represents a variable-length parameter:
+.IR n [1]
+followed by
+.I n
+bytes of data forming the
+.IR parameter .
+The notation
+.IR string [ s ]
+(using a literal
+.I s
+character)
+is shorthand for
+.IR s [2]
+followed by
+.I s
+bytes of UTF-8 text.
+The notation
+.IR parameter []
+where 
+.I parameter
+is the last field in the message represents a 
+variable-length field that comprises all remaining
+bytes in the message.
+.PP
+All Venti RPC messages are prefixed with a field
+.IR size [2]
+giving the length of the message that follows
+(not including the
+.I size
+field itself).
+The message bodies are:
+.ta \w'\fLVtTgoodbye 'u
+.IP
+.ne 2v
+.B VtThello
+.IR tag [1]
+.IR version [ s ]
+.IR uid [ s ]
+.IR strength [1]
+.IR crypto [ n ]
+.IR codec [ n ]
+.br
+.B VtRhello
+.IR tag [1]
+.IR sid [ s ] 
+.IR rcrypto [1]
+.IR rcodec [1]
+.IP
+.ne 2v
+.B VtTping
+.IR tag [1]
+.br
+.B VtRping
+.IR tag [1]
+.IP
+.ne 2v
+.B VtTread
+.IR tag [1]
+.IR score [20]
+.IR type [1]
+.IR pad [1]
+.IR count [2]
+.br
+.B VtRead
+.IR tag [1]
+.IR data []
+.IP
+.ne 2v
+.B VtTwrite
+.IR tag [1]
+.IR type [1]
+.IR pad [3]
+.IR data []
+.br
+.B VtRwrite
+.IR tag [1]
+.IR score [20]
+.IP
+.ne 2v
+.B VtTsync
+.IR tag [1]
+.br
+.B VtRsync
+.IR tag [1]
+.IP
+.ne 2v
+.B VtRerror
+.IR tag [1]
+.IR error [ s ]
+.IP
+.ne 2v
+.B VtTgoodbye
+.IR tag [1]
+.PP
+Each T-message has a one-byte
+.I tag
+field, chosen and used by the client to identify the message.
+The server will echo the request's
+.I tag
+field in the reply.
+Clients should arrange that no two outstanding
+messages have the same tag field so that responses
+can be distinguished.
+.PP
+The type of an R-message will either be one greater than
+the type of the corresponding T-message or
+.BR Rerror ,
+indicating that the request failed.
+In the latter case, the
+.I error
+field contains a string describing the reason for failure.
+.PP
+Venti connections must begin with a 
+.B hello
+transaction.
+The
+.B VtThello
+message contains the protocol
+.I version
+that the client has chosen to use.
+The fields
+.IR strength ,
+.IR crypto ,
+and
+.IR codec
+could be used to add authentication, encryption,
+and compression to the Venti session
+but are currently ignored.
+The 
+.IR rcrypto ,
+and
+.I rcodec
+fields in the 
+.B VtRhello
+response are similarly ignored.
+The
+.IR uid 
+and
+.IR sid
+fields are intended to be the identity
+of the client and server but, given the lack of
+authentication, should be treated only as advisory.
+The initial
+.B hello
+should be the only
+.B hello
+transaction during the session.
+.PP
+The
+.B ping
+message has no effect and 
+is used mainly for debugging.
+Servers should respond immediately to pings.
+.PP
+The
+.B read
+message requests a block with the given
+.I score
+and
+.IR type .
+Use
+.I vttodisktype
+and
+.I vtfromdisktype
+(see
+.IR venti (2))
+to convert a block type enumeration value
+.RB ( VtDataType ,
+etc.)
+to the 
+.I type
+used on disk and in the protocol.
+The
+.I count
+field specifies the maximum expected size
+of the block.
+The
+.I data
+in the reply is the block's contents.
+.PP
+The
+.B write
+message writes a new block of the given
+.I type
+with contents
+.I data
+to the server.
+The response includes the
+.I score
+to use to read the block,
+which should be the SHA1 hash of 
+.IR data .
+.PP
+The Venti server may buffer written blocks in memory,
+waiting until after responding to the
+.B write
+message before writing them to
+permanent storage.
+The server will delay the response to a
+.B sync
+message until after all blocks in earlier
+.B write
+messages have been written to permanent storage.
+.PP
+The
+.B goodbye
+message ends a session.  There is no
+.BR VtRgoodbye :
+upon receiving the
+.BR VtTgoodbye
+message, the server terminates up the connection.
+.SH SEE ALSO
+.IR venti (1),
+.IR venti (2),
+.IR venti (8)
+.br
+Sean Quinlan and Sean Dorward,
+``Venti: a new approach to archival storage'',
+.I "Usenix Conference on File and Storage Technologies" ,
+2002.

+ 426 - 164
sys/man/8/venti

@@ -1,14 +1,16 @@
 .TH VENTI 8
 .SH NAME
-venti \- an archival block storage server
+venti \- archival storage server
 .SH SYNOPSIS
+.in +0.25i
+.ti -0.25i
 .B venti/venti
 [
-.B -dsw
+.B -Ldrs
 ]
 [
 .B -a
-.I ventiaddress
+.I address
 ]
 [
 .B -B
@@ -20,7 +22,7 @@ venti \- an archival block storage server
 ]
 [
 .B -C
-.I cachesize
+.I lumpcachesize
 ]
 [
 .B -h
@@ -28,206 +30,466 @@ venti \- an archival block storage server
 ]
 [
 .B -I
-.I icachesize
+.I indexcachesize
 ]
-.PP
-.B venti/sync
 [
-.B -h
-.I host
+.B -W
+.I webroot
 ]
 .SH DESCRIPTION
-.I Venti
-is a block storage server intended for archival data.
-In a Venti server,
-the SHA1 hash of a block's contents acts as the block
-identifier for read and write operations.
-This approach enforces a write-once policy, preventing accidental or
-malicious destruction of data.  In addition, duplicate copies of a
-block are coalesced, reducing the consumption of storage and
-simplifying the implementation of clients.
-.PP
-Storage for
-.I venti
-consists of a data log and an index, both of which
-can be spread across multiple files.
-The files containing the data log are themselves divided into self-contained sections called arenas.
-Each arena contains a large number of data blocks and is sized to
-facilitate operations such as copying to removable media.
-The index provides a mapping between the a Sha1 fingerprint and
-the location of the corresponding block in the data log.
-.PP
-The index and data log are typically stored on raw disk partitions.
-To improve the robustness, the data log should be stored on
-a device that provides RAID functionality.  The index does
-not require such protection, since if necessary, it can
-can be regenerated from the data log.
-The performance of
-.I venti
-is typically limited to the random access performance
-of the index.  This performance can be improved by spreading the
-index accross multiple disks.  
-.PP
-The storage for
-.I venti
-is initialized using
-.IR fmtarenas ,
-.IR fmtisect ,
+Venti
+is a SHA1-addressed archival storage server.
+See 
+.IR venti (8)
+for a full introduction to the system.
+This page documents the structure and operation of the server.
+.PP
+A venti server requires multiple disks or disk partitions,
+each of which must be properly formatted before the server
+can be run.
+.SS Disk 
+The venti server maintains three disk structures, typically
+stored on raw disk partitions:
+the append-only
+.IR "data log" ,
+which holds, in sequential order,
+the contents of every block written to the server;
+the 
+.IR index ,
+which helps locate a block in the data log given its score;
+and optionally the 
+.IR "bloom filter" ,
+a concise summary of which scores are present in the index.
+The data log is the primary storage.
+To improve the robustness, it should be stored on
+a device that provides RAID functionality.
+The index and the bloom filter are optimizations 
+employed to access the data log efficiently and can be rebuilt
+if lost or damaged.
+.PP
+The data log is logically split into sections called
+.IR arenas ,
+typically sized for easy offline backup
+(e.g., 500MB).
+A data log may comprise many disks, each storing
+one or more arenas.
+Such disks are called
+.IR "arena partitions" .
+Arena partitions are filled in the order given in the configuration.
+.PP
+The index is logically split into block-sized pieces called
+.IR buckets ,
+each of which is responsible for a particular range of scores.
+An index may be split across many disks, each storing many buckets.
+Such disks are called
+.IR "index sections" .
+.PP
+The index must be sized so that no bucket is full.
+When a bucket fills, the server must be shut down and
+the index made larger.
+Since scores appear random, each bucket will contain
+approximately the same number of entries.
+Index entries are 40 bytes long.  Assuming that a typical block
+being written to the server is 8192 bytes and compresses to 4096
+bytes, the active index is expected to be about 1% of
+the active data log.
+Storing smaller blocks increases the relative index footprint;
+storing larger blocks decreases it.
+To allow variation in both block size and the random distribution
+of scores to buckets, the suggested index size is 5% of
+the active data log.
+.PP
+The (optional) bloom filter is a large bitmap that is stored on disk but
+also kept completely in memory while the venti server runs.
+It helps the venti server efficiently detect scores that are
+.I not
+already stored in the index.
+The bloom filter starts out zeroed.
+Each score recorded in the bloom filter is hashed to choose
+.I nhash
+bits to set in the bloom filter.
+A score is definitely not stored in the index of any of its
+.I nhash 
+bits are not set.
+The bloom filter thus has two parameters: 
+.I nhash
+(maximum 32)
+and the total bitmap size 
+(maximum 512MB, 2\s-2\u32\d\s+2 bits).
+.PP
+The bloom filter should be sized so that
+.I nhash
+\(mu
+.I nblock
+\(<=
+0.7 \(mu
+.IR b ,
+where
+.I nblock
+is the expected number of blocks stored on the server
 and
-.I fmtindex
+.I b
+is the bitmap size in bits.
+The false positive rate of the bloom filter when sized
+this way is approximately 2\s-2\u\-\fInblock\fR\d\s+2.
+.I Nhash
+less than 10 are not very useful;
+.I nhash
+greater than 24 are probably a waste of memory.
+.I Fmtbloom
 (see
-.IR ventiaux (8)).
-A configuration file,
-.IR venti.conf (6),
-ties the index sections and data arenas together.
-.PP
-A Venti
-server is accessed via an undocumented network protocol.
-Two client applications are included in this distribution:
-.IR vac (1)
-and
-.IR vacfs (4).
-.I Vac
-copies files from a Plan 9 file system to Venti, creating an
-archive and returning the fingerprint of the root.
-This archive can be mounted in Plan 9 using 
-.IR vacfs .
-These two commands enable a rudimentary backup system.
-A future release will include a Plan 9 file system that uses
-Venti as a replacement for the WORM device of 
-.IR fs (4).
+.IR venti-fmt (8))
+can be given either
+.I nhash
+or
+.IR nblock ;
+if given
+.IR nblock ,
+it will derive an appropriate
+.IR nhash .
+.SS Memory
+Venti can make effective use of large amounts of memory
+for various caches.
+.PP
+The
+.I "lump cache
+holds recently-accessed venti data blocks, which the server refers to as 
+.IR lumps .
+The lump cache should be at least 1MB but can profitably be much larger.
+The lump cache can be thought of as the level-1 cache:
+read requests handled by the lump cache can
+be served instantly.
+.PP
+The
+.I "block cache
+holds recently-accessed
+.I disk
+blocks from the arena partitions.
+The block cache needs to be able to simultaneously hold two blocks
+from each arena plus four blocks for the currently-filling arena.
+The block cache can be thought of as the level-2 cache:
+read requests handled by the block cache are slower than those
+handled by the lump cache, since the lump data must be extracted
+from the raw disk blocks and possibly decompressed, but no
+disk accesses are necessary.
 .PP
 The
-.I venti
-server provides rudimentary status information via
-a built-in http server.  The URL files it serves are:
+.I "index cache
+holds recently-accessed or prefetched
+index entries.
+The index cache needs to be able to hold index entries
+for three or four arenas, at least, in order for prefetching
+to work properly.  Each index entry is 50 bytes.
+Assuming 500MB arenas of
+128,000 blocks that are 4096 bytes each after compression,
+the minimum index cache size is about 6MB.
+The index cache can be thought of as the level-3 cache:
+read requests handled by the index cache must still go
+to disk to fetch the arena blocks, but the costly random
+access to the index is avoided.
+.PP
+The size of the index cache determines how long venti
+can sustain its `burst' write throughput, during which time
+the only disk accesses on the critical path
+are sequential writes to the arena partitions.
+For example, if you want to be able to sustain 10MB/s
+for an hour, you need enough index cache to hold entries
+for 36GB of blocks.  Assuming 8192-byte blocks,
+you need room for almost five million index entries.
+Since index entries are 50 bytes each, you need 250MB
+of index cache.
+If the background index update process can make a single
+pass through the index in an hour, which is possible,
+then you can sustain the 10MB/s indefinitely (at least until
+the arenas are all filled).
+.PP
+The
+.I "bloom filter
+requires memory equal to its size on disk,
+as discussed above.
+.PP
+A reasonable starting allocation is to
+divide memory equally (in thirds) between
+the bloom filter, the index cache, and the lump and block caches;
+the third of memory allocated to the lump and block caches 
+should be split unevenly, with more (say, two thirds)
+going to the block cache.
+.SS Network
+The venti server announces two network services, one 
+(coventionally TCP port 
+.BR venti ,
+17034) serving
+the venti protocol as described in
+.IR venti (6),
+and one serving HTTP
+(coventionally TCP port 
+.BR venti ,
+80).
+.PP
+The venti web server provides the following 
+URLs for accessing status information:
+.TP
+.B /index
+A summary of the usage of the arenas and index sections.
+.TP
+.B /xindex
+An XML version of
+.BR /index .
+.TP
+.B /storage
+Brief storage totals.
 .TP
-.B stats
-Various internal statistics.
+.BI /set/ variable
+The current integer value of
+.IR variable .
+Variables are:
+.BR compress ,
+whether or not to compress blocks
+(for debugging);
+.BR logging ,
+whether to write entries to the debugging logs;
+.BR stats ,
+whether to collect run-time statistics;
+.BR icachesleeptime ,
+the time in milliseconds between successive updates
+of megabytes of the index cache;
+.BR arenasumsleeptime ,
+the time in milliseconds between reads while
+checksumming an arena in the background.
+The two sleep times should be (but are not) managed by venti;
+they exist to provide more experience with their effects.
+The other variables exist only for debugging and
+performance measurement.
 .TP
-.B index
-An enumeration of the index sections and all non empty arenas, including various statistics.
+.BI /set/ variable / value
+Set
+.I variable
+to
+.IR value .
 .TP
-.B storage
-A summary of the state of the data log.
+.BI /graph/ name / param / param / \fR...
+A PNG image graphing the named run-time statistic over time.
+The details of names and parameters are undocumented;
+see
+.B httpd.c
+in the venti sources.
 .TP
-.B xindex
-An enumeration of the index sections and all non empty arenas, in XML format.
+.B /log
+A list of all debugging logs present in the server's memory.
+.TP
+.BI /log/ name
+The contents of the debugging log with the given
+.IR name .
+.TP
+.B /flushicache
+Force venti to begin flushing the index cache to disk.
+The request response will not be sent until the flush
+has completed.
+.TP
+.B /flushdcache
+Force venti to begin flushing the arena block cache to disk.
+The request response will not be sent until the flush
+has completed.
+.PD
 .PP
-Several auxiliary utilities (see
-.IR ventiaux (8))
-aid in maintaining the storage for Venti.
-With the exception of
-.I rdarena ,
-these utilities should generally be run after killing the
-.I venti
-server.
-The utilities are:
+Requests for other files are served by consulting a
+directory named in the configuration file
+(see
+.B webroot
+below).
+.SS Configuration File
+A venti configuration file 
+enumerates the various index sections and
+arenas that constitute a venti system.
+The components are indicated by the name of the file, typically
+a disk partition, in which they reside.  The configuration
+file is the only location that file names are used.  Internally,
+venti uses the names assigned when the components were formatted
+with 
+.I fmtarenas
+or 
+.I fmtisect
+(see
+.IR venti-fmt (8)).
+In particular, only the configuration needs to be
+changed if a component is moved to a different file.
+.PP
+The configuration file consists of lines in the form described below.
+Lines starting with
+.B #
+are comments.
 .TP
-.I checkarenas
-Check the integrity, and optionally fix, Venti arenas.
+.BI index " name
+Names the index for the system.
 .TP
-.I checkindex
-Check the integrity, and optionally fix, a Venti index.
+.BI arenas " file
+.I File
+is an arena partition, formatted using
+.IR fmtarenas .
 .TP
-.I buildindex
-Rebuild a Venti index from scratch.
+.BI isect " file
+.I File
+is an index section, formatted using
+.IR fmtisect .
 .TP
-.I rdarena
-Extract a Venti arena and write to standard output.
+.BI bloom " file
+.I File
+is a bloom filter, formatted using
+.IR fmtbloom .
 .PD
 .PP
-Options to 
-.I venti
-are:
+After formatting a venti system using
+.IR fmtindex ,
+the order of arenas and index sections should not be changed.
+Additional arenas can be appended to the configuration;
+run
+.I fmtindex
+with the
+.B -a
+flag to update the index.
+.PP
+The configuration file also holds configuration parameters
+for the venti server itself.
+These are:
+.TF httpaddr netaddr
 .TP
-.BI -a " ventiaddress
-The network address on which the server listens for incoming connections.
-The default is
-.LR tcp!*!venti .
+.BI mem " size
+lump cache size
 .TP
-.BI -B " blockcachesize
-The size, in bytes, of memory allocated to caching raw disk blocks.
+.BI bcmem " size
+block cache size
 .TP
-.BI -c " config
-Specifies the
-Venti
+.BI icmem " size
+index cache size
+.TP
+.BI addr " netaddr
+network address to announce venti service
+(default
+.BR tcp!*!venti )
+.TP
+.BI httpaddr " netaddr
+network address to announce HTTP service
+(default
+.BR tcp!*!http )
+.TP
+.B queuewrites
+queue writes in memory
+(default is not to queue)
+.TP
+.BI webroot " dir
+directory tree containing files for HTTP server
+to consult for unrecognized URLs
+.PD
+.PP
+The units for the various cache sizes above can be specified by appending a
+.LR k ,
+.LR m ,
+or
+.LR g
+(case-insensitive)
+to indicate kilobytes, megabytes, or gigabytes respectively.
+.PP
+The
+.I file
+name in the configuration lines above can be of the form
+.IB file : lo - hi
+to specify a range of the file. 
+.I Lo
+and
+.I hi
+are specified in bytes but can have the usual
+.BI k ,
+.BI m ,
+or
+.B g
+suffixes.
+Either
+.I lo
+or
+.I hi
+may be omitted.
+This notation eliminates the need to
+partition raw disks on non-Plan 9 systems.
+.SS Command Line
+Many of the options to Venti duplicate parameters that
+can be specified in the configuration file.
+The command line options override those found in a
 configuration file.
-Defaults to
-.LR venti.conf .
+Additional options are:
 .TP
-.BI -C " cachesize
-The size, in bytes, of memory allocated to caching 
-Venti
-blocks.
+.BI -c " config
+The server configuration file
+(default
+.BR venti.conf )
 .TP
-.BI -d
+.B -d
 Produce various debugging information on standard error.
+Implies
+.BR -s .
 .TP
-.BI -h " httpaddress
-The network address of Venti's built-in
-http
-server.
-The default is
-.LR tcp!*!http .
+.B -L
+Enable logging.  By default all logging is disabled.
+Logging slows server operation considerably.
 .TP
-.BI -I " icachesize
-The size, in bytes, of memory allocated to caching the index mapping fingerprints
-to locations in 
-.IR venti 's
-data log.
+.B -r
+Allow only read access to the venti data.
 .TP
 .B -s
 Do not run in the background.
 Normally,
 the foreground process will exit once the Venti server
 is initialized and ready for connections.
-.TP
-.B -w
-Enable write buffering.  This option increase the performance of writes to
-.I venti
-at the cost of returning success to the client application before the
-data has been written to disk.
-The server implements a
-.I sync
-rpc that waits for completion of all the writes buffered at the time
-the rpc was received.
-Applications such as
-.IR vac (1)
-and the
-.I sync
-command described below
-use this rpc to make sure that the data is correctly written to disk.
-Use of this option is recommended.
 .PD
+.SH EXAMPLE
+A simple configuration:
+.IP
+.EX
+% cat venti.conf
+index main
+isect /tmp/disks/isect0
+isect /tmp/disks/isect1
+arenas /tmp/disks/arenas
+bloom /tmp/disks/bloom
+mem 10M
+bcmem 20M
+icmem 30M
+% 
+.EE
 .PP
-The units for the various cache sizes above can be specified by appending a
-.LR k ,
-.LR m ,
-or
-.LR g
-to indicate kilobytes, megabytes, or gigabytes respectively.
-The command line options override options found in the
-.IR venti.conf (6)
-file.
-.PP
-.I Sync
-connects to a running Venti server and executes a sync rpc
-(described with the
-.B -w
-option above). 
-If sync exits successfully, it means that all writes buffered at the
-time the command was issued are now on disk.
+Format the index sections, the arena partition, and 
+finally the main index:
+.IP
+.EX
+% venti/fmtisect isect0. /tmp/disks/isect0 &
+% venti/fmtisect isect1. /tmp/disks/isect1 &
+% venti/fmtarenas arenas0. /tmp/disks/arenas &
+% venti/fmtbloom /tmp/disks/bloom &
+% wait
+% venti/fmtindex venti.conf
+% 
+.EE
+.PP
+Start the server and check the storage statistics:
+.IP
+.EX
+% venti/venti
+% hget http://$sysname/storage
+.EE
 .SH SOURCE
-.B /sys/src/cmd/venti
+.B /sys/src/cmd/venti/srv
 .SH "SEE ALSO"
-.IR venti.conf (6),
-.IR ventiaux (8),
-.IR fs (3),
-.IR vac (1),
-.IR vacfs (4).
+.IR venti (1),
+.IR venti (2),
+.IR venti (6),
+.IR venti-backup (8)
+.IR venti-fmt (8)
 .br
 Sean Quinlan and Sean Dorward,
 ``Venti: a new approach to archival storage'',
 .I "Usenix Conference on File and Storage Technologies" ,
 2002.
+.SH BUGS
+Setting up a venti server is too complicated.
+.PP
+Venti should not require the user to decide how to
+partition its memory usage.

+ 106 - 0
sys/man/8/venti-backup

@@ -0,0 +1,106 @@
+.TH VENTI-BACKUP 8
+.SH NAME
+rdarena, wrarena \- copy arenas between venti servers
+.SH SYNOPSIS
+.PP
+.B venti/rdarena
+[
+.B -v
+]
+.I arenapart
+.I arenaname
+.PP
+.B venti/wrarena
+[
+.B -o
+.I fileoffset
+]
+[
+.B -h
+.I host
+]
+.I arenafile
+[
+.I clumpoffset
+]
+.SH DESCRIPTION
+.PP
+.I Rdarena
+extracts the named
+.I arena
+from the arena partition
+.I arenapart
+and writes this arena to standard output.
+This command is typically used to back up an arena to external media.
+The
+.B -v
+option generates more verbose output on standard error.
+.PP
+.I Wrarena
+writes the blocks contained in the arena
+.I arenafile
+(typically, the output of
+.IR rdarena )
+to a Venti server.
+It is typically used to reinitialize a Venti server from backups of the arenas.
+For example,
+.IP
+.EX
+venti/rdarena /dev/sdC0/arenas arena.0 >external.media
+venti/wrarena -h venti2 external.media
+.EE
+.LP
+writes the blocks contained in
+.B arena.0
+to the Venti server
+.B venti2
+(typically not the one using
+.BR /dev/sdC0/arenas ).
+.PP
+The
+.B -o
+option specifies that the arena starts at byte
+.I fileoffset
+(default
+.BR 0 )
+in
+.I arenafile .
+This is useful for reading directly from
+the Venti arena partition:
+.IP
+.EX
+venti/wrarena -h venti2 -o 335872 /dev/sdC0/arenas
+.EE
+.LP
+(In this example, 335872 is the offset shown in the Venti
+server's index list (344064) minus one block (8192).
+You will need to substitute your own arena offsets
+and block size.)
+.PP
+Finally, the optional
+.I offset
+argument specifies that the writing should begin with the
+clump starting at
+.I offset
+within the arena.
+.I Wrarena
+prints the offset it stopped at (because there were no more data blocks).
+This could be used to incrementally back up a Venti server
+to another Venti server:
+.IP
+.EX
+last=`{cat last}
+venti/wrarena -h venti2 -o 335872 /dev/sdC0/arenas $last >output
+awk '/^end offset/ { print $3 }' offset >last
+.EE
+.LP
+Of course, one would need to add wrapper code to keep track
+of which arenas have been processed.
+See
+.B /sys/src/cmd/venti/backup.example
+for a version that does this.
+.SH SOURCE
+.B /sys/src/cmd/venti/srv
+.SH SEE ALSO
+.IR venti (6),
+.IR venti (8)

+ 404 - 0
sys/man/8/venti-fmt

@@ -0,0 +1,404 @@
+.TH VENTI-FMT 8
+.SH NAME
+buildindex,
+checkarenas,
+checkindex,
+conf,
+fmtarenas,
+fmtbloom,
+fmtindex,
+fmtisect,
+syncindex \- prepare and maintain a venti server
+.SH SYNOPSIS
+.PP
+.B venti/fmtarenas
+[
+.B -Z
+]
+[
+.B -a
+.I arenasize
+]
+[
+.B -b
+.I blocksize
+]
+.I name
+.I file
+.PP
+.B venti/fmtisect
+[
+.B -1Z
+]
+[
+.B -b
+.I blocksize
+]
+.I name
+.I file
+.PP
+.B venti/fmtbloom
+[
+.B -n
+.I nblocks
+|
+.B -N
+.I nhash
+]
+[
+.B -s
+.I size
+]
+.I file
+.PP
+.B venti/fmtindex
+[
+.B -a
+]
+.I venti.conf
+.PP
+.B venti/conf
+[
+.B -w
+]
+.I partition
+[
+.I configfile
+]
+.if t .sp 0.5
+.PP
+.B venti/buildindex
+[
+.B -bd
+] [
+.B -i
+.I isect
+] ... [
+.B -M
+.I imemsize
+]
+.I venti.conf
+.PP
+.B venti/checkindex
+[
+.B -f
+]
+[
+.B -B
+.I blockcachesize
+]
+.I venti.conf
+.I tmp
+.PP
+.B venti/checkarenas
+[
+.B -afv 
+]
+.I file
+.SH DESCRIPTION
+These commands aid in the setup, maintenance, and debugging of
+venti servers.
+See
+.IR venti (6)
+for an overview of the venti system and
+.IR venti (8)
+for an overview of the data structures used by the venti server.
+.PP
+Note that the units for the various sizes in the following
+commands can be specified by appending
+.LR k ,
+.LR m ,
+or
+.LR g
+to indicate kilobytes, megabytes, or gigabytes respectively.
+.SS Formatting
+To prepare a server for its initial use, the arena partitions and
+the index sections must be formatted individually, with
+.I fmtarenas
+and
+.IR fmtisect .
+Then the 
+collection of index sections must be combined into a venti
+index with 
+.IR fmtindex .
+.PP
+.I Fmtarenas
+formats the given
+.IR file ,
+typically a disk partition, into an arena partition.
+The arenas in the partition are given names of the form
+.IR name%d ,
+where
+.I %d
+is replaced with a sequential number starting at 0.
+.PP
+Options to 
+.I fmtarenas
+are:
+.TP
+.BI -a " arenasize
+The arenas are of
+.I arenasize
+bytes.  The default is
+.BR 512M ,
+which was selected to provide a balance
+between the number of arenas and the ability to copy an arena to external
+media such as recordable CDs and tapes.
+.TP
+.BI -b " blocksize
+The size, in bytes, for read and write operations to the file.
+The size is recorded in the file, and is used by applications that access the arenas.
+The default is
+.BR 8k .
+.TP
+.B -4
+Create a `version 4' arena partition for backwards compatibility with old servers.
+The default is version 5, used by the current venti server.
+.TP
+.B -Z
+Do not zero the data sections of the arenas.
+Using this option reduces the formatting time
+but should only be used when it is known that the file was already zeroed.
+(Version 4 only; version 5 sections are not and do not need to be zeroed.)
+.PD
+.PP
+.I Fmtisect
+formats the given
+.IR file ,
+typically a disk partition, as a venti index section with the specified
+.IR name .
+Each of the index sections in a venti configuration must have a unique name.
+.PP
+Options to 
+.I fmtisect
+are:
+.TP
+.BI -b " bucketsize
+The size of an index bucket, in bytes.
+All the index sections within a index must have the same bucket size.
+The default is
+.BR 8k .
+.TP
+.B -1
+Create a `version 1' index section for backwards compatibility with old servers.
+The default is version 2, used by the current venti server.
+.TP
+.B -Z
+Do not zero the index.
+Using this option reduces the formatting time
+but should only be used when it is known that the file was already zeroed.
+(Version 1 only; version 2 sections are not and do not need to be zeroed.)
+.PD
+.PP
+.I Fmtbloom
+formats the given
+.I file
+as a bloom filter
+(see
+.IR venti (6)).
+The options are:
+.TP
+.BI -n " nblock \fR| " -N " nhash
+The number of blocks expected to be indexed by the filter
+or the number of hash functions to use.
+If the
+.B -n
+option
+is given, it is used, along with the total size of the filter,
+to compute an appropriate
+.IR nhash .
+.TP
+.BI -s " size
+The size of the bloom filter.  The default is the total size of the file.
+In either case,
+.I size
+is rounded down to a power of two.
+.PD
+.PP
+The
+.I file
+argument in the commands above can be of the form
+.IB file : lo - hi
+to specify a range of the file. 
+.I Lo
+and
+.I hi
+are specified in bytes but can have the usual
+.BI k ,
+.BI m ,
+or
+.B g
+suffixes.
+Either
+.I lo
+or
+.I hi
+may be omitted.
+This notation eliminates the need to
+partition raw disks on non-Plan 9 systems.
+.PP
+.I Fmtindex
+reads the configuration file
+.I venti.conf
+and initializes the index sections to form a usable index structure.
+The arena files and index sections must have previously been formatted
+using 
+.I fmtarenas
+and 
+.I fmtisect
+respectively.
+.PP
+The function of a venti index is to map a SHA1 fingerprint to a location
+in the data section of one of the arenas.  The index is composed of
+blocks, each of which contains the mapping for a fixed range of possible
+fingerprint values.
+.I Fmtindex
+determines the mapping between SHA1 values and the blocks
+of the collection of index sections.  Once this mapping has been determined,
+it cannot be changed without rebuilding the index. 
+The basic assumption in the current implementation is that the index
+structure is sufficiently empty that individual blocks of the index will rarely
+overflow.  The total size of the index should be about 2% to 10% of
+the total size of the arenas, but the exact percentage depends both on the
+index block size and the compressed size of blocks stored.
+See the discussion in
+.IR venti (8)
+for more.
+.PP
+.I Fmtindex
+also computes a mapping between a linear address space and
+the data section of the collection of arenas.  The
+.B -a
+option can be used to add additional arenas to an index.
+To use this feature,
+add the new arenas to
+.I venti.conf
+after the existing arenas and then run
+.I fmtindex
+.BR -a .
+.PP
+A copy of the above mappings is stored in the header for each of the index sections.
+These copies enable
+.I buildindex
+to restore a single index section without rebuilding the entire index.
+.PP
+To make it easier to bootstrap servers, the configuration
+file can be stored in otherwise empty space
+at the beginning of any venti partitions using
+.IR conf .
+A partition so branded with a configuration file can
+be used in place of a configuration file when invoking any
+of the venti commands.
+By default,
+.I conf
+prints the configuration stored in
+.IR partition .
+When invoked with the
+.B -w
+flag,
+.I conf
+reads a configuration file from 
+.I configfile
+(or else standard input)
+and stores it in
+.IR partition .
+.SS Checking and Rebuilding
+.PP
+.I Buildindex
+populates the index for the Venti system described in
+.IR venti.conf .
+The index must have previously been formatted using
+.IR fmtindex .
+This command is typically used to build a new index for a Venti
+system when the old index becomes too small, or to rebuild
+an index after media failure.
+Small errors in an index can usually be fixed with
+.IR checkindex ,
+but 
+.I checkindex
+requires a large temporary workspace and 
+.I buildindex
+does not.
+.PP
+Options to 
+.I buildindex
+are:
+.TP
+.B -b
+Reinitialise the Bloom filter, if any.
+.TP
+.B -d
+`Dumb' mode; run all three passes.
+.TP
+.BI -i " isect
+Only rebuild index section
+.IR isect ;
+may be repeated to rebuild multiple sections.
+The name
+.L none
+is special and just reads the arenas.
+.TP
+.BI -M " imemsize
+The amount of memory, in bytes, to use for caching raw disk accesses while running
+.IR buildindex .
+(This is not a property of the created index.)
+The usual suffices apply.
+The default is 256M.
+.PD
+.PP
+.I Checkindex
+examines the Venti index described in
+.IR venti.conf .
+The program detects various error conditions including:
+blocks that are not indexed, index entries for blocks that do not exist,
+and duplicate index entries.
+If requested, an attempt can be made to fix errors that are found.
+.PP
+The
+.I tmp
+file, usually a disk partition, must be large enough to store a copy of the index.
+This temporary space is used to perform a merge sort of index entries
+generated by reading the arenas.
+.PP
+Options to 
+.I checkindex
+are:
+.TP
+.BI -B " blockcachesize
+The amount of memory, in bytes, to use for caching raw disk accesses while running
+.IR checkindex .
+The default is 8k.
+.TP
+.B -f
+Attempt to fix any errors that are found.
+.PD
+.PP
+.I Checkarenas
+examines the Venti arenas contained in the given
+.IR file .
+The program detects various error conditions, and optionally attempts
+to fix any errors that are found.
+.PP
+Options to 
+.I checkarenas
+are:
+.TP
+.B -a
+For each arena, scan the entire data section.
+If this option is omitted, only the end section of
+the arena is examined.
+.TP
+.B -f
+Attempt to fix any errors that are found.
+.TP
+.B -v
+Increase the verbosity of output.
+.PD
+.SH SOURCE
+.B /sys/src/cmd/venti/srv
+.SH SEE ALSO
+.IR venti (6),
+.IR venti (8)
+.SH BUGS
+.I Buildindex
+should allow an individual index section to be rebuilt.

+ 0 - 507
sys/man/8/ventiaux

@@ -1,507 +0,0 @@
-.TH VENTIAUX 8
-.SH NAME
-buildindex,
-checkarenas,
-checkindex,
-conf,
-copy,
-fmtarenas,
-fmtindex,
-fmtisect,
-rdarena,
-rdarenablocks,
-read,
-wrarenablocks,
-write \- Venti maintenance and debugging commands
-.SH SYNOPSIS
-.B venti/buildindex
-[
-.B -B
-.I blockcachesize
-]
-[
-.B -Z
-]
-.I venti.config
-.I tmp
-.PP
-.B venti/checkarenas
-[
-.B -afv 
-]
-.I file
-.PP
-.B venti/checkindex
-[
-.B -f
-]
-[
-.B -B
-.I blockcachesize
-]
-.I venti.config
-.I tmp
-.PP
-.B venti/conf
-[
-.B -w
-]
-.I partition
-[
-.I configfile
-]
-.PP
-.B venti/copy
-[
-.B -fq
-]
-.I src
-.I dst
-.I score
-[
-.I type
-]
-.PP
-.B venti/fmtarenas
-[
-.B -Z
-]
-[
-.B -a
-.I arenasize
-]
-[
-.B -b
-.I blocksize
-]
-.I name
-.I file
-.PP
-.B venti/fmtindex
-[
-.B -a
-]
-.I venti.config
-.PP
-.B venti/fmtisect
-[
-.B -Z
-]
-[
-.B -b
-.I blocksize
-]
-.I name
-.I file
-.PP
-.B venti/rdarena
-[
-.B -v
-]
-.I arenapart
-.I arenaname
-.PP
-.B venti/read
-[
-.B -h
-.I host
-]
-.I score
-[
-.I type
-]
-.PP
-.B venti/wrarena
-[
-.B -o
-.I fileoffset
-]
-[
-.B -h
-.I host
-]
-.I arenafile
-[
-.I clumpoffset
-]
-.PP
-.B venti/write
-[
-.B -h
-.I host
-]
-[
-.B -t
-.I type
-]
-[
-.B -z
-]
-.SH DESCRIPTION
-These commands aid in the setup, maintenance, and debugging of
-Venti servers.
-See
-.IR venti (8)
-and
-.IR venti.conf (6)
-for an overview of the data structures stored by Venti.
-.PP
-Note that the units for the various sizes in the following
-commands can be specified by appending
-.LR k ,
-.LR m ,
-or
-.LR g
-to indicate kilobytes, megabytes, or gigabytes respectively.
-.PP
-.I Buildindex
-populates the index for the Venti system described in
-.IR venti.config .
-The index must have previously been formatted using
-.IR fmtindex .
-This command is typically used to build a new index for a Venti
-system when the old index becomes too small, or to rebuild
-an index after media failure.
-Small errors in an index can usually be fixed with
-.IR checkindex .
-.PP
-The
-.I tmp
-file, usually a disk partition, must be large enough to store a copy of the index.
-This temporary space is used to perform a merge sort of index entries
-generated by reading the arenas.
-.PP
-Options to 
-.I buildindex
-are:
-.TP
-.BI -B " blockcachesize
-The amount of memory, in bytes, to use for caching raw disk accesses while running
-.IR buildindex .
-(This is not a property of the created index.)
-The default is 8k.
-.TP
-.B -Z
-Do not zero the index.
-This option should only be used when it is known that the index was already zeroed.
-.PD
-.PP
-.I Checkarenas
-examines the Venti arenas contained in the given
-.IR file .
-The program detects various error conditions, and optionally attempts
-to fix any errors that are found.
-.PP
-Options to 
-.I checkarenas
-are:
-.TP
-.B -a
-For each arena, scan the entire data section.
-If this option is omitted, only the end section of
-the arena is examined.
-.TP
-.B -f
-Attempt to fix any errors that are found.
-.TP
-.B -v
-Increase the verbosity of output.
-.PD
-.PP
-.I Checkindex
-examines the Venti index described in
-.IR venti.config .
-The program detects various error conditions including:
-blocks that are not indexed, index entries for blocks that do not exist,
-and duplicate index entries.
-If requested, an attempt can be made to fix errors that are found.
-.PP
-The
-.I tmp
-file, usually a disk partition, must be large enough to store a copy of the index.
-This temporary space is used to perform a merge sort of index entries
-generated by reading the arenas.
-.PP
-Options to 
-.I checkindex
-are:
-.TP
-.BI -B " blockcachesize
-The amount of memory, in bytes, to use for caching raw disk accesses while running
-.IR checkindex .
-The default is 8k.
-.TP
-.B -f
-Attempt to fix any errors that are found.
-.PD
-.PP
-.I Fmtarenas
-formats the given
-.IR file ,
-typically a disk partition, into a number of
-Venti
-arenas.
-The arenas are given names of the form
-.IR name%d ,
-where
-.I %d
-is replaced with a sequential number starting at 0.
-.PP
-Options to 
-.I fmtarenas
-are:
-.TP
-.BI -a " arenasize
-The arenas are of
-.I arenasize
-bytes.  The default is 512 megabytes, which was selected to provide a balance
-between the number of arenas and the ability to copy an arena to external
-media such as recordable CDs and tapes.
-.TP
-.BI -b " blocksize
-The size, in bytes, for read and write operations to the file.
-The size is recorded in the file, and is used by applications that access the arenas.
-The default is 8k.
-.TP
-.B -Z
-Do not zero the data sections of the arenas.
-Using this option reduces the formatting time
-but should only be used when it is known that the file was already zeroed.
-.PD
-.I Fmtindex
-takes the
-.IR venti.conf (6)
-file
-.I venti.config
-and initializes the index sections to form a usable index structure.
-The arena files and index sections must have previously been formatted
-using 
-.I fmtarenas
-and 
-.I fmtisect
-respectively.
-.PP
-The function of a Venti index is to map a SHA1 fingerprint to a location
-in the data section of one of the arenas.  The index is composed of
-blocks, each of which contains the mapping for a fixed range of possible
-fingerprint values.
-.I Fmtindex
-determines the mapping between SHA1 values and the blocks
-of the collection of index sections.  Once this mapping has been determined,
-it cannot be changed without rebuilding the index. 
-The basic assumption in the current implementation is that the index
-structure is sufficiently empty that individual blocks of the index will rarely
-overflow.  The total size of the index should be about 2% to 10% of
-the total size of the arenas, but the exact depends both the index block size
-and the compressed size of block stored to Venti.
-.PP
-.I Fmtindex
-also computes a mapping between a linear address space and
-the data section of the collection of arenas.  The
-.B -a
-option can be used to add additional arenas to an index.
-To use this feature,
-add the new arenas to
-.I venti.config
-after the existing arenas and then run
-.I fmtindex
-.BR -a .
-.PP
-A copy of the above mappings is stored in the header for each of the index sections.
-These copies enable
-.I buildindex
-to restore a single index section without rebuilding the entire index.
-.PP
-.I Fmtisect
-formats the given
-.IR file ,
-typically a disk partition, as a Venti index section with the specified
-.IR name .
-One or more formatted index sections are combined into a Venti
-index using 
-.IR fmtindex .
-Each of the index sections within an index must have a unique name.
-.PP
-Options to 
-.I fmtisect
-are:
-.TP
-.BI -b " blocksize
-The size, in bytes, for read and write operations to the file.
-All the index sections within a index must have the same block size.
-The default is 8k.
-.TP
-.B -Z
-Do not zero the index.
-Using this option reduces the formatting time
-but should only be used when it is known that the file was already zeroed.
-.PD
-.PP
-.I Rdarena
-extracts the named
-.I arena
-from the arena partition
-.I arenapart
-and writes this arena to standard output.
-This command is typically used to back up an arena to external media.
-The
-.B -v
-option generates more verbose output on standard error.
-.PP
-.I Wrarena
-writes the blocks contained in the arena
-.I arenafile
-(typically, the output of
-.IR rdarena )
-to a Venti server.
-It is typically used to reinitialize a Venti server from backups of the arenas.
-For example,
-.IP
-.EX
-venti/rdarena /dev/sdC0/arenas arena.0 >external.media
-venti/wrarena -h venti2 external.media
-.EE
-.LP
-writes the blocks contained in
-.B arena.0
-to the Venti server
-.B venti2
-(typically not the one using
-.BR /dev/sdC0/arenas ).
-.PP
-The
-.B -o
-option specifies that the arena starts at byte
-.I fileoffset
-(default
-.BR 0 )
-in
-.I arenafile .
-This is useful for reading directly from
-the Venti arena partition:
-.IP
-.EX
-venti/wrarena -h venti2 -o 335872 /dev/sdC0/arenas
-.EE
-.LP
-(In this example, 335872 is the offset shown in the Venti
-server's index list (344064) minus one block (8192).
-You will need to substitute your own arena offsets
-and block size.)
-.PP
-Finally, the optional
-.I offset
-argument specifies that the writing should begin with the
-clump starting at
-.I offset
-within the arena.
-.I Wrarena
-prints the offset it stopped at (because there were no more data blocks).
-This could be used to incrementally back up a Venti server
-to another Venti server:
-.IP
-.EX
-last=`{cat last}
-venti/wrarena -h venti2 -o 335872 /dev/sdC0/arenas $last >output
-awk '/^end offset/ { print $3 }' offset >last
-.EE
-.LP
-Of course, one would need to add wrapper code to keep track
-of which arenas have been processed.
-See
-.B /sys/src/cmd/venti/backup.example
-for a version that does this.
-.PP
-.I Read
-and
-.I write
-read and write blocks from a running Venti server.
-They are intended to ease debugging of the server.
-The default
-.I host
-is the environment variable
-.BR $venti ,
-followed by the network metaname
-.BR $venti .
-The
-.I type
-is the decimal type of block to be read or written.
-If no 
-.I type
-is specified for
-.I read ,
-all types are tried, and a command-line is printed to
-show the type that eventually worked.
-If no
-.I type
-is specified for
-.I write ,
-.B VtDataType
-(13)
-is used.
-.I Read
-reads the block named by
-.I score
-(a SHA1 hash)
-from the Venti server and writes it to standard output.
-.I Write
-reads a block from standard input and attempts to write
-it to the Venti server.
-If successful, it prints the score of the block on the server.
-.PP
-.I Copy
-walks the entire tree of blocks rooted at
-.I score ,
-copying all the blocks visited during the walk from
-the Venti server at network address
-.I src
-to the Venti server at network address
-.I dst .
-If
-.I type
-(a decimal block type for
-.IR score )
-is omitted, all types will be tried in sequence
-until one is found that works.
-The
-.B -f
-flag runs the copy in ``fast'' mode: if a block is already on
-.IR dst ,
-the walk does not descend below it, on the assumption that all its
-children are also already on
-.IR dst .
-Without this flag, the copy often transfers many times more
-data than necessary.
-The
-.B -q
-option suppresses messages in fast mode about skipping duplicate blocks.
-.PP
-To make it easier to bootstrap servers, the configuration
-file can be stored at the beginning of any Venti partitions using
-.IR conf .
-A partition so branded with a configuration file can
-be used in place of a configuration file when invoking any
-of the venti commands.
-By default,
-.I conf
-prints the configuration stored in
-.IR partition .
-When invoked with the
-.B -w
-flag,
-.I conf
-reads a configuration file from 
-.I configfile
-(or else standard input)
-and stores it in
-.IR partition .
-.SH SOURCE
-.B /sys/src/cmd/venti
-.SH "SEE ALSO"
-.IR venti (8),
-.IR venti.conf (6)
-.SH BUGS
-.I Buildindex
-should allow an individual index section to be rebuilt.
-The merge sort could be performed in the space used to store the
-index rather than requiring a temporary file.

+ 3 - 6
sys/src/cmd/aquarela/nblistener.c

@@ -1,7 +1,6 @@
 #include "headers.h"
 
 static char *hmsg = "headers";
-static char *ohmsg = "oldheaders";
 
 int nbudphdrsize;
 
@@ -9,9 +8,8 @@ char *
 nbudpannounce(ushort port, int *fdp)
 {
 	int data, ctl;
-	char dir[64];
-	char datafile[64+6];
-	char addr[NETPATHLEN];
+	char dir[64], datafile[64+6], addr[NETPATHLEN];
+
 	snprint(addr, sizeof(addr), "udp!*!%d", port);
 	/* get a udp port */
 	ctl = announce(addr, dir);
@@ -21,9 +19,8 @@ nbudpannounce(ushort port, int *fdp)
 
 	/* turn on header style interface */
 	nbudphdrsize = Udphdrsize;
-	if (write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)) {
+	if (write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg))
 		return "failed to turn on headers";
-	}
 	data = open(datafile, ORDWR);
 	if (data < 0) {
 		close(ctl);

+ 10 - 3
sys/src/cmd/cwfs/main.c

@@ -105,8 +105,15 @@ mapinit(char *mapfile)
 		map->from = strdup(fields[0]);
 		map->to =   strdup(fields[1]);
 		map->fdev = iconfig(fields[0]);
-		if(testconfig(fields[1]) == 0)
-			map->tdev = iconfig(fields[1]);
+		map->tdev = nil;
+		if (access(map->to, AEXIST) < 0) {
+			/*
+			 * map->to isn't an existing file, so it had better be
+			 * a config string for a device.
+			 */
+			if(testconfig(fields[1]) == 0)
+				map->tdev = iconfig(fields[1]);
+		}
 		/* else map->to is the replacement file name */
 		map->next = devmap;
 		devmap = map;
@@ -555,7 +562,7 @@ inqsize(char *file)
 	strcpy(data, file);
 	end = strstr(data, "/data");
 	if (end == nil)
-		strcat(end, "/ctl");
+		strcat(data, "/ctl");
 	else
 		strcpy(end, "/ctl");
 	bp = Bopen(data, OREAD);

+ 1 - 1
sys/src/cmd/fossil/mkfile

@@ -37,7 +37,7 @@ LIBOFILES=${LIBFILES:%=%.$O}
 LIB=libfs.a.$O
 
 HFILES=\
-	/sys/include/venti.h\
+	/sys/include/oventi.h\
 	stdinc.h\
 	vac.h\
 	dat.h\

+ 1 - 1
sys/src/cmd/fossil/stdinc.h

@@ -5,7 +5,7 @@ typedef uvlong	u64int;
 typedef	uchar	u8int;
 typedef ushort	u16int;
 
-#include "venti.h"
+#include "oventi.h"
 #include "vac.h"
 #include "fs.h"
 

+ 1 - 0
sys/src/cmd/oventi/README

@@ -0,0 +1 @@
+This is the original venti, stripped down to just oventi/copy.

+ 0 - 0
sys/src/cmd/venti/backup.example → sys/src/cmd/oventi/backup.example


+ 0 - 0
sys/src/cmd/venti/conf.rc → sys/src/cmd/oventi/conf.rc


+ 193 - 0
sys/src/cmd/oventi/copy.c

@@ -0,0 +1,193 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int fast;
+static int quiet;
+
+VtSession *zsrc, *zdst;
+
+void
+usage(void)
+{
+	fprint(2, "usage: copy [-fq] src-host dst-host score [type]\n");
+	exits("usage");
+}
+
+int
+parseScore(uchar *score, char *buf, int n)
+{
+	int i, c;
+
+	memset(score, 0, VtScoreSize);
+
+	if(n < VtScoreSize*2)
+		return 0;
+	for(i=0; i<VtScoreSize*2; i++) {
+		if(buf[i] >= '0' && buf[i] <= '9')
+			c = buf[i] - '0';
+		else if(buf[i] >= 'a' && buf[i] <= 'f')
+			c = buf[i] - 'a' + 10;
+		else if(buf[i] >= 'A' && buf[i] <= 'F')
+			c = buf[i] - 'A' + 10;
+		else {
+			return 0;
+		}
+
+		if((i & 1) == 0)
+			c <<= 4;
+
+		score[i>>1] |= c;
+	}
+	return 1;
+}
+
+void
+walk(uchar score[VtScoreSize], uint type, int base)
+{
+	int i, n, sub;
+	uchar *buf;
+	VtEntry e;
+	VtRoot root;
+
+	if(memcmp(score, vtZeroScore, VtScoreSize) == 0)
+		return;
+
+	buf = vtMemAllocZ(VtMaxLumpSize);
+	if(fast && vtRead(zdst, score, type, buf, VtMaxLumpSize) >= 0){
+		if(!quiet)
+			fprint(2, "%V already exists on dst server; skipping.\n", score);
+		free(buf);
+		return;
+	}
+
+	n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
+	/*
+	 * we usually see this at the end of a venti/copy of a vac tree:
+	 * warning: could not read block \
+	 * 0000000000000000000000000000000000000000 1: \
+	 * no block with that score exists
+	 * maybe it's harmless.
+	 */
+	if(n < 0){
+		fprint(2, "warning: could not read block %V %d: %R\n",
+			score, type);
+		return;
+	}
+
+	switch(type){
+	case VtRootType:
+		if(!vtRootUnpack(&root, buf)){
+			fprint(2, "warning: could not unpack root in %V %d\n", score, type);
+			break;
+		}
+		walk(root.score, VtDirType, 0);
+		walk(root.prev, VtRootType, 0);
+		break;
+
+	case VtDirType:
+		for(i=0; i<n/VtEntrySize; i++){
+			if(!vtEntryUnpack(&e, buf, i)){
+				fprint(2, "warning: could not unpack entry #%d in %V %d\n", i, score, type);
+				continue;
+			}
+			if(!(e.flags & VtEntryActive))
+				continue;
+			if(e.flags&VtEntryDir)
+				base = VtDirType;
+			else
+				base = VtDataType;
+			if(e.depth == 0)
+				sub = base;
+			else
+				sub = VtPointerType0+e.depth-1;
+			walk(e.score, sub, base);
+		}
+		break;
+
+	case VtDataType:
+		break;
+
+	default:	/* pointers */
+		if(type == VtPointerType0)
+			sub = base;
+		else
+			sub = type-1;
+		for(i=0; i<n; i+=VtScoreSize)
+			if(memcmp(buf+i, vtZeroScore, VtScoreSize) != 0)
+				walk(buf+i, sub, base);
+		break;
+	}
+
+	if(!vtWrite(zdst, score, type, buf, n))
+		fprint(2, "warning: could not write block %V %d: %R\n", score, type);
+	free(buf);
+}
+
+void
+main(int argc, char *argv[])
+{
+	int type, n;
+	uchar score[VtScoreSize];
+	uchar *buf;
+
+	ARGBEGIN{
+	case 'f':
+		fast = 1;
+		break;
+	case 'q':
+		quiet = 1;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 3 && argc != 4)
+		usage();
+
+	vtAttach();
+
+	fmtinstall('V', vtScoreFmt);
+	fmtinstall('R', vtErrFmt);
+
+	if(!parseScore(score, argv[2], strlen(argv[2])))
+		vtFatal("could not parse score: %s", vtGetError());
+
+	buf = vtMemAllocZ(VtMaxLumpSize);
+
+	zsrc = vtDial(argv[0], 0);
+	if(zsrc == nil)
+		vtFatal("could not dial src server: %R");
+	if(!vtConnect(zsrc, 0))
+		sysfatal("vtConnect src: %r");
+
+	zdst = vtDial(argv[1], 0);
+	if(zdst == nil)
+		vtFatal("could not dial dst server: %R");
+	if(!vtConnect(zdst, 0))
+		sysfatal("vtConnect dst: %r");
+
+	if(argc == 4){
+		type = atoi(argv[3]);
+		n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
+		if(n < 0)
+			vtFatal("could not read block: %R");
+	}else{
+		for(type=0; type<VtMaxType; type++){
+			n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
+			if(n >= 0)
+				break;
+		}
+		if(type == VtMaxType)
+			vtFatal("could not find block %V of any type", score);
+	}
+
+	walk(score, type, VtDirType);
+
+	if(!vtSync(zdst))
+		vtFatal("could not sync dst server: %R");
+
+	vtDetach();
+	exits(0);
+}

+ 0 - 0
sys/src/cmd/venti/dat.h → sys/src/cmd/oventi/dat.h


+ 0 - 0
sys/src/cmd/venti/dumpvacroots → sys/src/cmd/oventi/dumpvacroots


+ 0 - 0
sys/src/cmd/venti/fns.h → sys/src/cmd/oventi/fns.h


+ 20 - 0
sys/src/cmd/oventi/mkfile

@@ -0,0 +1,20 @@
+</$objtype/mkfile
+
+TARG=copy
+HFILES=	dat.h\
+	fns.h\
+	stdinc.h\
+
+CFILES=${TARG:%=%.c}
+OFILES=${TARG:%=%.$O}
+
+UPDATE=mkfile\
+	$HFILES\
+	$CFILES\
+
+BIN=/$objtype/bin/oventi
+
+</sys/src/cmd/mkone
+
+acid:D:	$HFILES icache.c
+	$CC -a icache.c >$target || rm $target

+ 0 - 0
sys/src/cmd/venti/notes → sys/src/cmd/oventi/notes


+ 1 - 1
sys/src/cmd/venti/stdinc.h → sys/src/cmd/oventi/stdinc.h

@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <stdio.h>
-#include <venti.h>
+#include <oventi.h>
 
 typedef uvlong	u64int;
 typedef	uchar	u8int;

+ 0 - 0
sys/src/cmd/venti/venti.conf → sys/src/cmd/oventi/venti.conf


+ 0 - 0
sys/src/cmd/venti/wrtape → sys/src/cmd/oventi/wrtape


+ 1 - 0
sys/src/cmd/vac/cache.c

@@ -448,6 +448,7 @@ found:
 	
 	n = vtRead(c->z, score, type, b->data, size);
 	if(n < 0) {
+		fprint(2, "read %V: %r\n", score);
 		lumpDecRef(b, 1);
 		return nil;
 	}

+ 3 - 1
sys/src/cmd/vac/fs.c

@@ -73,8 +73,10 @@ vfsOpen(VtSession *z, char *file, int readOnly, long ncache)
 		return nil;
 
 	n = vtRead(z, score, VtRootType, buf, VtRootSize);
-	if(n < 0)
+	if(n < 0){
+		fprint(2, "cannot read %V: %r\n", score);
 		return nil;
+	}
 	if(n != VtRootSize) {
 		vtSetError("vtRead on root too short");
 		return nil;

+ 1 - 1
sys/src/cmd/vac/mkfile

@@ -11,7 +11,7 @@ LIBFILES=\
 LIB=${LIBFILES:%=%.$O}
 
 HFILES=\
-	/sys/include/venti.h\
+	/sys/include/oventi.h\
 	stdinc.h\
 	error.h\
 	vac.h\

+ 1 - 1
sys/src/cmd/vac/stdinc.h

@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 
-#include "venti.h"
+#include "oventi.h"
 
 typedef uvlong	u64int;
 typedef	uchar	u8int;

+ 0 - 649
sys/src/cmd/venti/arena.c

@@ -1,649 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct ASum ASum;
-
-struct ASum
-{
-	Arena	*arena;
-	ASum	*next;
-};
-
-static void	sealArena(Arena *arena);
-static int	okArena(Arena *arena);
-static int	loadArena(Arena *arena);
-static CIBlock	*getCIB(Arena *arena, int clump, int writing, CIBlock *rock);
-static void	putCIB(Arena *arena, CIBlock *cib);
-static void	doASum(void *);
-
-static VtLock	*sumLock;
-static VtRendez	*sumWait;
-static ASum	*sumq;
-
-int
-initArenaSum(void)
-{
-	sumLock = vtLockAlloc();
-	sumWait = vtRendezAlloc(sumLock);
-	if(vtThread(doASum, nil) < 0){
-		setErr(EOk, "can't start arena checksum slave: %R");
-		return 0;
-	}
-	return 1;
-}
-
-/*
- * make an Arena, and initialize it based upon the disk header and trailer.
- */
-Arena*
-initArena(Part *part, u64int base, u64int size, u32int blockSize)
-{
-	Arena *arena;
-
-	arena = MKZ(Arena);
-	arena->part = part;
-	arena->blockSize = blockSize;
-	arena->clumpMax = arena->blockSize / ClumpInfoSize;
-	arena->base = base + blockSize;
-	arena->size = size - 2 * blockSize;
-	arena->lock = vtLockAlloc();
-
-	if(!loadArena(arena)){
-		setErr(ECorrupt, "arena header or trailer corrupted");
-		freeArena(arena);
-		return nil;
-	}
-	if(!okArena(arena)){
-		freeArena(arena);
-		return nil;
-	}
-
-	if(arena->sealed && scoreEq(zeroScore, arena->score))
-		backSumArena(arena);
-
-	return arena;
-}
-
-void
-freeArena(Arena *arena)
-{
-	if(arena == nil)
-		return;
-	if(arena->cib.data != nil){
-		putDBlock(arena->cib.data);
-		arena->cib.data = nil;
-	}
-	vtLockFree(arena->lock);
-	free(arena);
-}
-
-Arena*
-newArena(Part *part, char *name, u64int base, u64int size, u32int blockSize)
-{
-	Arena *arena;
-
-	if(!nameOk(name)){
-		setErr(EOk, "illegal arena name", name);
-		return nil;
-	}
-	arena = MKZ(Arena);
-	arena->part = part;
-	arena->version = ArenaVersion;
-	arena->blockSize = blockSize;
-	arena->clumpMax = arena->blockSize / ClumpInfoSize;
-	arena->base = base + blockSize;
-	arena->size = size - 2 * blockSize;
-	arena->lock = vtLockAlloc();
-
-	nameCp(arena->name, name);
-
-	if(!wbArena(arena) || !wbArenaHead(arena)){
-		freeArena(arena);
-		return nil;
-	}
-
-	return arena;
-}
-
-int
-readClumpInfo(Arena *arena, int clump, ClumpInfo *ci)
-{
-	CIBlock *cib, r;
-
-	cib = getCIB(arena, clump, 0, &r);
-	if(cib == nil)
-		return 0;
-	unpackClumpInfo(ci, &cib->data->data[cib->offset]);
-	putCIB(arena, cib);
-	return 1;
-}
-
-int
-readClumpInfos(Arena *arena, int clump, ClumpInfo *cis, int n)
-{
-	CIBlock *cib, r;
-	int i;
-
-	for(i = 0; i < n; i++){
-		cib = getCIB(arena, clump + i, 0, &r);
-		if(cib == nil)
-			break;
-		unpackClumpInfo(&cis[i], &cib->data->data[cib->offset]);
-		putCIB(arena, cib);
-	}
-	return i;
-}
-
-/*
- * write directory information for one clump
- * must be called the arena locked
- */
-int
-writeClumpInfo(Arena *arena, int clump, ClumpInfo *ci)
-{
-	CIBlock *cib, r;
-
-	cib = getCIB(arena, clump, 1, &r);
-	if(cib == nil)
-		return 0;
-	packClumpInfo(ci, &cib->data->data[cib->offset]);
-	putCIB(arena, cib);
-	return 1;
-}
-
-u64int
-arenaDirSize(Arena *arena, u32int clumps)
-{
-	return ((clumps / arena->clumpMax) + 1) * arena->blockSize;
-}
-
-/*
- * read a clump of data
- * n is a hint of the size of the data, not including the header
- * make sure it won't run off the end, then return the number of bytes actually read
- */
-u32int
-readArena(Arena *arena, u64int aa, u8int *buf, long n)
-{
-	DBlock *b;
-	u64int a;
-	u32int blockSize, off, m;
-	long nn;
-
-	if(n == 0)
-		return 0;
-
-	vtLock(arena->lock);
-	a = arena->size - arenaDirSize(arena, arena->clumps);
-	vtUnlock(arena->lock);
-	if(aa >= a){
-		setErr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->clumps, aa, a, arena->size - arenaDirSize(arena, arena->clumps - 1));
-		return 0;
-	}
-	if(aa + n > a)
-		n = a - aa;
-
-	blockSize = arena->blockSize;
-	a = arena->base + aa;
-	off = a & (blockSize - 1);
-	a -= off;
-	nn = 0;
-	for(;;){
-		b = getDBlock(arena->part, a, 1);
-		if(b == nil)
-			return 0;
-		m = blockSize - off;
-		if(m > n - nn)
-			m = n - nn;
-		memmove(&buf[nn], &b->data[off], m);
-		putDBlock(b);
-		nn += m;
-		if(nn == n)
-			break;
-		off = 0;
-		a += blockSize;
-	}
-	return n;
-}
-
-/*
- * write some data to the clump section at a given offset
- * used to fix up corrupted arenas.
- */
-u32int
-writeArena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
-{
-	DBlock *b;
-	u64int a;
-	u32int blockSize, off, m;
-	long nn;
-	int ok;
-
-	if(n == 0)
-		return 0;
-
-	vtLock(arena->lock);
-	a = arena->size - arenaDirSize(arena, arena->clumps);
-	if(aa >= a || aa + n > a){
-		vtUnlock(arena->lock);
-		setErr(EOk, "writing beyond arena clump storage");
-		return 0;
-	}
-
-	blockSize = arena->blockSize;
-	a = arena->base + aa;
-	off = a & (blockSize - 1);
-	a -= off;
-	nn = 0;
-	for(;;){
-		b = getDBlock(arena->part, a, off != 0 || off + n < blockSize);
-		if(b == nil){
-			vtUnlock(arena->lock);
-			return 0;
-		}
-		m = blockSize - off;
-		if(m > n - nn)
-			m = n - nn;
-		memmove(&b->data[off], &clbuf[nn], m);
-		ok = writePart(arena->part, a, b->data, blockSize);
-		putDBlock(b);
-		if(!ok){
-			vtUnlock(arena->lock);
-			return 0;
-		}
-		nn += m;
-		if(nn == n)
-			break;
-		off = 0;
-		a += blockSize;
-	}
-	vtUnlock(arena->lock);
-	return n;
-}
-
-/*
- * allocate space for the clump and write it,
- * updating the arena directory
-ZZZ question: should this distinguish between an arena
-filling up and real errors writing the clump?
- */
-u64int
-writeAClump(Arena *arena, Clump *c, u8int *clbuf)
-{
-	DBlock *b;
-	u64int a, aa;
-	u32int clump, n, nn, m, off, blockSize;
-	int ok;
-
-	n = c->info.size + ClumpSize;
-	vtLock(arena->lock);
-	aa = arena->used;
-	if(arena->sealed
-	|| aa + n + U32Size + arenaDirSize(arena, arena->clumps + 1) > arena->size){
-		if(!arena->sealed)
-			sealArena(arena);
-		vtUnlock(arena->lock);
-		return TWID64;
-	}
-	if(!packClump(c, &clbuf[0])){
-		vtUnlock(arena->lock);
-		return TWID64;
-	}
-
-	/*
-	 * write the data out one block at a time
-	 */
-	blockSize = arena->blockSize;
-	a = arena->base + aa;
-	off = a & (blockSize - 1);
-	a -= off;
-	nn = 0;
-	for(;;){
-		b = getDBlock(arena->part, a, off != 0);
-		if(b == nil){
-			vtUnlock(arena->lock);
-			return TWID64;
-		}
-		m = blockSize - off;
-		if(m > n - nn)
-			m = n - nn;
-		memmove(&b->data[off], &clbuf[nn], m);
-		ok = writePart(arena->part, a, b->data, blockSize);
-		putDBlock(b);
-		if(!ok){
-			vtUnlock(arena->lock);
-			return TWID64;
-		}
-		nn += m;
-		if(nn == n)
-			break;
-		off = 0;
-		a += blockSize;
-	}
-
-	arena->used += c->info.size + ClumpSize;
-	arena->uncsize += c->info.uncsize;
-	if(c->info.size < c->info.uncsize)
-		arena->cclumps++;
-
-	clump = arena->clumps++;
-	if(arena->clumps == 0)
-		fatal("clumps wrapped\n");
-	arena->wtime = now();
-	if(arena->ctime == 0)
-		arena->ctime = arena->wtime;
-
-	writeClumpInfo(arena, clump, &c->info);
-//ZZZ make this an enum param
-	if((clump & 0x1ff) == 0x1ff){
-		flushCIBlocks(arena);
-		wbArena(arena);
-	}
-
-	vtUnlock(arena->lock);
-	return aa;
-}
-
-/*
- * once sealed, an arena never has any data added to it.
- * it should only be changed to fix errors.
- * this also syncs the clump directory.
- */
-static void
-sealArena(Arena *arena)
-{
-	flushCIBlocks(arena);
-	arena->sealed = 1;
-	wbArena(arena);
-	backSumArena(arena);
-}
-
-void
-backSumArena(Arena *arena)
-{
-	ASum *as;
-
-	if(sumLock == nil)
-		return;
-	as = MK(ASum);
-	if(as == nil)
-		return;
-	vtLock(sumLock);
-	as->arena = arena;
-	as->next = sumq;
-	sumq = as;
-	vtWakeup(sumWait);
-	vtUnlock(sumLock);
-}
-
-static void
-doASum(void *unused)
-{
-	ASum *as;
-	Arena *arena;
-
-	if(unused){;}
-
-	for(;;){
-		vtLock(sumLock);
-		while(sumq == nil)
-			vtSleep(sumWait);
-		as = sumq;
-		sumq = as->next;
-		vtUnlock(sumLock);
-		arena = as->arena;
-		free(as);
-
-		sumArena(arena);
-	}
-}
-
-void
-sumArena(Arena *arena)
-{
-	ZBlock *b;
-	VtSha1 *s;
-	u64int a, e;
-	u32int bs;
-	u8int score[VtScoreSize];
-
-	bs = MaxIoSize;
-	if(bs < arena->blockSize)
-		bs = arena->blockSize;
-
-	s = vtSha1Alloc();
-	if(s == nil){
-		logErr(EOk, "sumArena can't initialize sha1 state");
-		return;
-	}
-
-	/*
-	 * read & sum all blocks except the last one
-	 */
-	vtSha1Init(s);
-	b = allocZBlock(bs, 0);
-	e = arena->base + arena->size;
-	for(a = arena->base - arena->blockSize; a + arena->blockSize <= e; a += bs){
-		if(a + bs > e)
-			bs = arena->blockSize;
-		if(!readPart(arena->part, a, b->data, bs))
-			goto ReadErr;
-		vtSha1Update(s, b->data, bs);
-	}
-
-	/*
-	 * the last one is special, since it may already have the checksum included
-	 */
-	bs = arena->blockSize;
-	if(!readPart(arena->part, e, b->data, bs)){
-ReadErr:
-		logErr(EOk, "sumArena can't sum %s, read at %lld failed: %r", arena->name, a);
-		freeZBlock(b);
-		vtSha1Free(s);
-		return;
-	}
-
-	vtSha1Update(s, b->data, bs - VtScoreSize);
-	vtSha1Update(s, zeroScore, VtScoreSize);
-	vtSha1Final(s, score);
-	vtSha1Free(s);
-
-	/*
-	 * check for no checksum or the same
-	 */
-	if(!scoreEq(score, &b->data[bs - VtScoreSize])){
-		if(!scoreEq(zeroScore, &b->data[bs - VtScoreSize]))
-			logErr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
-				arena->name, &b->data[bs - VtScoreSize], score);
-		scoreCp(&b->data[bs - VtScoreSize], score);
-		if(!writePart(arena->part, e, b->data, bs))
-			logErr(EOk, "sumArena can't write sum for %s: %r", arena->name);
-	}
-	freeZBlock(b);
-
-	vtLock(arena->lock);
-	scoreCp(arena->score, score);
-	vtUnlock(arena->lock);
-}
-
-/*
- * write the arena trailer block to the partition
- */
-int
-wbArena(Arena *arena)
-{
-	ZBlock *b;
-	int ok;
-
-	b = allocZBlock(arena->blockSize, 1);
-	if(b == nil){
-		logErr(EAdmin, "can't write arena trailer: %R");
-///ZZZ add error message?
-		return 0;
-	}
-	ok = okArena(arena) && packArena(arena, b->data)
-		&& writePart(arena->part, arena->base + arena->size, b->data, arena->blockSize);
-	freeZBlock(b);
-	return ok;
-}
-
-int
-wbArenaHead(Arena *arena)
-{
-	ZBlock *b;
-	ArenaHead head;
-	int ok;
-
-	nameCp(head.name, arena->name);
-	head.version = arena->version;
-	head.size = arena->size + 2 * arena->blockSize;
-	head.blockSize = arena->blockSize;
-	b = allocZBlock(arena->blockSize, 1);
-	if(b == nil){
-		logErr(EAdmin, "can't write arena header: %R");
-///ZZZ add error message?
-		return 0;
-	}
-	ok = packArenaHead(&head, b->data)
-		&& writePart(arena->part, arena->base - arena->blockSize, b->data, arena->blockSize);
-	freeZBlock(b);
-	return ok;
-}
-
-/*
- * read the arena header and trailer blocks from disk
- */
-static int
-loadArena(Arena *arena)
-{
-	ArenaHead head;
-	ZBlock *b;
-
-	b = allocZBlock(arena->blockSize, 0);
-	if(b == nil)
-		return 0;
-	if(!readPart(arena->part, arena->base + arena->size, b->data, arena->blockSize)){
-		freeZBlock(b);
-		return 0;
-	}
-	if(!unpackArena(arena, b->data)){
-		freeZBlock(b);
-		return 0;
-	}
-	if(arena->version != ArenaVersion){
-		setErr(EAdmin, "unknown arena version %d", arena->version);
-		freeZBlock(b);
-		return 0;
-	}
-	scoreCp(arena->score, &b->data[arena->blockSize - VtScoreSize]);
-
-	if(!readPart(arena->part, arena->base - arena->blockSize, b->data, arena->blockSize)){
-		logErr(EAdmin, "can't read arena header: %R");
-		freeZBlock(b);
-		return 1;
-	}
-	if(!unpackArenaHead(&head, b->data))
-		logErr(ECorrupt, "corrupted arena header: %R");
-	else if(!nameEq(arena->name, head.name)
-	     || arena->version != head.version
-	     || arena->blockSize != head.blockSize
-	     || arena->size + 2 * arena->blockSize != head.size)
-		logErr(ECorrupt, "arena header inconsistent with arena data");
-	freeZBlock(b);
-
-	return 1;
-}
-
-static int
-okArena(Arena *arena)
-{
-	u64int dsize;
-	int ok;
-
-	ok = 1;
-	dsize = arenaDirSize(arena, arena->clumps);
-	if(arena->used + dsize > arena->size){
-		setErr(ECorrupt, "arena used > size");
-		ok = 0;
-	}
-
-	if(arena->cclumps > arena->clumps)
-		logErr(ECorrupt, "arena has more compressed clumps than total clumps");
-
-	if(arena->uncsize + arena->clumps * ClumpSize + arena->blockSize < arena->used)
-		logErr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->uncsize, arena->clumps, arena->used);
-
-	if(arena->ctime > arena->wtime)
-		logErr(ECorrupt, "arena creation time after last write time");
-
-	return ok;
-}
-
-static CIBlock*
-getCIB(Arena *arena, int clump, int writing, CIBlock *rock)
-{
-	CIBlock *cib;
-	u32int block, off;
-
-	if(clump >= arena->clumps){
-		setErr(EOk, "clump directory access out of range");
-		return nil;
-	}
-	block = clump / arena->clumpMax;
-	off = (clump - block * arena->clumpMax) * ClumpInfoSize;
-
-	if(arena->cib.block == block
-	&& arena->cib.data != nil){
-		arena->cib.offset = off;
-		return &arena->cib;
-	}
-
-	if(writing){
-		flushCIBlocks(arena);
-		cib = &arena->cib;
-	}else
-		cib = rock;
-
-	vtLock(stats.lock);
-	stats.ciReads++;
-	vtUnlock(stats.lock);
-
-	cib->block = block;
-	cib->offset = off;
-	cib->data = getDBlock(arena->part, arena->base + arena->size - (block + 1) * arena->blockSize, arena->blockSize);
-	if(cib->data == nil)
-		return nil;
-	return cib;
-}
-
-static void
-putCIB(Arena *arena, CIBlock *cib)
-{
-	if(cib != &arena->cib){
-		putDBlock(cib->data);
-		cib->data = nil;
-	}
-}
-
-/*
- * must be called with arena locked
- */
-int
-flushCIBlocks(Arena *arena)
-{
-	int ok;
-
-	if(arena->cib.data == nil)
-		return 1;
-	vtLock(stats.lock);
-	stats.ciWrites++;
-	vtUnlock(stats.lock);
-	ok = writePart(arena->part, arena->base + arena->size - (arena->cib.block + 1) * arena->blockSize, arena->cib.data->data, arena->blockSize);
-
-	if(!ok)
-		setErr(EAdmin, "failed writing arena directory block");
-	putDBlock(arena->cib.data);
-	arena->cib.data = nil;
-	return ok;
-}

+ 0 - 404
sys/src/cmd/venti/arenas.c

@@ -1,404 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct AHash	AHash;
-
-/*
- * hash table for finding arena's based on their names.
- */
-struct AHash
-{
-	AHash	*next;
-	Arena	*arena;
-};
-
-enum
-{
-	AHashSize	= 512
-};
-
-static AHash	*ahash[AHashSize];
-
-static u32int
-hashStr(char *s)
-{
-	u32int h;
-	int c;
-
-	h = 0;
-	for(; c = *s; s++){
-		c ^= c << 6;
-		h += (c << 11) ^ (c >> 1);
-		c = *s;
-		h ^= (c << 14) + (c << 7) + (c << 4) + c;
-	}
-	return h;
-}
-
-int
-addArena(Arena *arena)
-{
-	AHash *a;
-	u32int h;
-
-	h = hashStr(arena->name) & (AHashSize - 1);
-	a = MK(AHash);
-	if(a == nil)
-		return 0;
-	a->arena = arena;
-	a->next = ahash[h];
-	ahash[h] = a;
-	return 1;
-}
-
-Arena*
-findArena(char *name)
-{
-	AHash *a;
-	u32int h;
-
-	h = hashStr(name) & (AHashSize - 1);
-	for(a = ahash[h]; a != nil; a = a->next)
-		if(strcmp(a->arena->name, name) == 0)
-			return a->arena;
-	return nil;
-}
-
-int
-delArena(Arena *arena)
-{
-	AHash *a, *last;
-	u32int h;
-
-	h = hashStr(arena->name) & (AHashSize - 1);
-	last = nil;
-	for(a = ahash[h]; a != nil; a = a->next){
-		if(a->arena == arena){
-			if(last != nil)
-				last->next = a->next;
-			else
-				ahash[h] = a->next;
-			free(a);
-			return 1;
-		}
-		last = a;
-	}
-	return 0;
-}
-
-ArenaPart*
-initArenaPart(Part *part)
-{
-	AMapN amn;
-	ArenaPart *ap;
-	ZBlock *b;
-	u32int i;
-	int ok;
-
-	b = allocZBlock(HeadSize, 0);
-	if(b == nil || !readPart(part, PartBlank, b->data, HeadSize)){
-		setErr(EAdmin, "can't read arena partition header: %R");
-		return nil;
-	}
-
-	ap = MKZ(ArenaPart);
-	if(ap == nil){
-		freeZBlock(b);
-		return nil;
-	}
-	ap->part = part;
-	ok = unpackArenaPart(ap, b->data);
-	freeZBlock(b);
-	if(!ok){
-		setErr(ECorrupt, "corrupted arena partition header: %R");
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-
-	ap->tabBase = (PartBlank + HeadSize + ap->blockSize - 1) & ~(ap->blockSize - 1);
-	if(ap->version != ArenaPartVersion){
-		setErr(ECorrupt, "unknown arena partition version %d", ap->version);
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-	if(ap->blockSize & (ap->blockSize - 1)){
-		setErr(ECorrupt, "illegal non-power-of-2 block size %d\n", ap->blockSize);
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-	if(ap->tabBase >= ap->arenaBase){
-		setErr(ECorrupt, "arena partition table overlaps with arena storage");
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-	ap->tabSize = ap->arenaBase - ap->tabBase;
-	partBlockSize(part, ap->blockSize);
-	ap->size = ap->part->size & ~(u64int)(ap->blockSize - 1);
-
-	if(!readArenaMap(&amn, part, ap->tabBase, ap->tabSize)){
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-	ap->narenas = amn.n;
-	ap->map = amn.map;
-	if(!okAMap(ap->map, ap->narenas, ap->arenaBase, ap->size, "arena table")){
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-
-	ap->arenas = MKNZ(Arena*, ap->narenas);
-	for(i = 0; i < ap->narenas; i++){
-		ap->arenas[i] = initArena(part, ap->map[i].start, ap->map[i].stop - ap->map[i].start, ap->blockSize);
-		if(ap->arenas[i] == nil){
-			freeArenaPart(ap, 1);
-			return nil;
-		}
-		if(!nameEq(ap->map[i].name, ap->arenas[i]->name)){
-			setErr(ECorrupt, "arena name mismatches with expected name: %s vs. %s",
-				ap->map[i].name, ap->arenas[i]->name);
-			freeArenaPart(ap, 1);
-			return nil;
-		}
-		if(findArena(ap->arenas[i]->name)){
-			setErr(ECorrupt, "duplicate arena name %s in %s",
-				ap->map[i].name, ap->part->name);
-			freeArenaPart(ap, 1);
-			return nil;
-		}
-	}
-
-	for(i = 0; i < ap->narenas; i++)
-		addArena(ap->arenas[i]);
-
-	return ap;
-}
-
-ArenaPart*
-newArenaPart(Part *part, u32int blockSize, u32int tabSize)
-{
-	ArenaPart *ap;
-
-	if(blockSize & (blockSize - 1)){
-		setErr(ECorrupt, "illegal non-power-of-2 block size %d\n", blockSize);
-		return nil;
-	}
-	ap = MKZ(ArenaPart);
-	if(ap == nil)
-		return nil;
-
-	ap->version = ArenaPartVersion;
-	ap->part = part;
-	ap->blockSize = blockSize;
-	partBlockSize(part, blockSize);
-	ap->size = part->size & ~(u64int)(blockSize - 1);
-	ap->tabBase = (PartBlank + HeadSize + blockSize - 1) & ~(blockSize - 1);
-	ap->arenaBase = (ap->tabBase + tabSize + blockSize - 1) & ~(blockSize - 1);
-	ap->tabSize = ap->arenaBase - ap->tabBase;
-	ap->narenas = 0;
-
-	if(!wbArenaPart(ap)){
-		freeArenaPart(ap, 0);
-		return nil;
-	}
-
-	return ap;
-}
-
-int
-wbArenaPart(ArenaPart *ap)
-{
-	ZBlock *b;
-
-	if(!okAMap(ap->map, ap->narenas, ap->arenaBase, ap->size, "arena table"))
-		return 0;
-	b = allocZBlock(HeadSize, 1);
-	if(b == nil)
-//ZZZ set error message?
-		return 0;
-
-	if(!packArenaPart(ap, b->data)){
-		setErr(ECorrupt, "can't make arena partition header: %R");
-		freeZBlock(b);
-		return 0;
-	}
-	if(!writePart(ap->part, PartBlank, b->data, HeadSize)){
-		setErr(EAdmin, "can't write arena partition header: %R");
-		freeZBlock(b);
-		return 0;
-	}
-	freeZBlock(b);
-
-	return wbArenaMap(ap->map, ap->narenas, ap->part, ap->tabBase, ap->tabSize);
-}
-
-void
-freeArenaPart(ArenaPart *ap, int freeArenas)
-{
-	int i;
-
-	if(ap == nil)
-		return;
-	if(freeArenas){
-		for(i = 0; i < ap->narenas; i++){
-			if(ap->arenas[i] == nil)
-				continue;
-			delArena(ap->arenas[i]);
-			freeArena(ap->arenas[i]);
-		}
-	}
-	free(ap->map);
-	free(ap->arenas);
-	free(ap);
-}
-
-int
-okAMap(AMap *am, int n, u64int start, u64int stop, char *what)
-{
-	u64int last;
-	u32int i;
-
-	last = start;
-	for(i = 0; i < n; i++){
-		if(am[i].start < last){
-			if(i == 0)
-				setErr(ECorrupt, "invalid start address in %s", what);
-			else
-				setErr(ECorrupt, "overlapping ranges in %s", what);
-			return 0;
-		}
-		if(am[i].stop < am[i].start){
-			setErr(ECorrupt, "invalid range in %s", what);
-			return 0;
-		}
-		last = am[i].stop;
-	}
-	if(last > stop){
-		setErr(ECorrupt, "invalid ending address in %s", what);
-		return 0;
-	}
-	return 1;
-}
-
-int
-mapArenas(AMap *am, Arena **arenas, int n, char *what)
-{
-	u32int i;
-
-	for(i = 0; i < n; i++){
-		arenas[i] = findArena(am[i].name);
-		if(arenas[i] == nil){
-			setErr(EAdmin, "can't find arena '%s' for '%s'\n", am[i].name, what);
-			return 0;
-		}
-	}
-	return 1;
-}
-
-int
-readArenaMap(AMapN *amn, Part *part, u64int base, u32int size)
-{
-	IFile f;
-	u32int ok;
-
-	if(!partIFile(&f, part, base, size))
-		return 0;
-	ok = parseAMap(&f, amn);
-	freeIFile(&f);
-	return ok;
-}
-
-int
-wbArenaMap(AMap *am, int n, Part *part, u64int base, u64int size)
-{
-	Fmt f;
-	ZBlock *b;
-
-	b = allocZBlock(size, 1);
-	if(b == nil)
-		return 0;
-
-	fmtZBInit(&f, b);
-
-	if(!outputAMap(&f, am, n)){
-		setErr(ECorrupt, "arena set size too small");
-		freeZBlock(b);
-		return 0;
-	}
-	if(!writePart(part, base, b->data, size)){
-		setErr(EAdmin, "can't write arena set: %R");
-		freeZBlock(b);
-		return 0;
-	}
-	freeZBlock(b);
-	return 1;
-}
-
-/*
- * amap: n '\n' amapelem * n
- * n: u32int
- * amapelem: name '\t' astart '\t' astop '\n'
- * astart, astop: u64int
- */
-int
-parseAMap(IFile *f, AMapN *amn)
-{
-	AMap *am;
-	u64int v64;
-	u32int v;
-	char *s, *flds[4];
-	int i, n;
-
-	/*
-	 * arenas
-	 */
-	if(!ifileU32Int(f, &v)){
-		setErr(ECorrupt, "syntax error: bad number of elements in %s", f->name);
-		return 0;
-	}
-	n = v;
-	if(n > MaxAMap){
-		setErr(ECorrupt, "illegal number of elements in %s", f->name);
-		return 0;
-	}
-	am = MKNZ(AMap, n);
-	if(am == nil)
-		return 0;
-	for(i = 0; i < n; i++){
-		s = ifileLine(f);
-		if(s == nil || getfields(s, flds, 4, 0, "\t") != 3)
-			return 0;
-		if(!nameOk(flds[0]))
-			return 0;
-		nameCp(am[i].name, flds[0]);
-		if(!strU64Int(flds[1], &v64)){
-			setErr(ECorrupt, "syntax error: bad arena base address in %s", f->name);
-			free(am);
-			return 0;
-		}
-		am[i].start = v64;
-		if(!strU64Int(flds[2], &v64)){
-			setErr(ECorrupt, "syntax error: bad arena size in %s", f->name);
-			free(am);
-			return 0;
-		}
-		am[i].stop = v64;
-	}
-
-	amn->map = am;
-	amn->n = n;
-	return 1;
-}
-
-int
-outputAMap(Fmt *f, AMap *am, int n)
-{
-	int i;
-
-	if(fmtprint(f, "%ud\n", n) < 0)
-		return 0;
-	for(i = 0; i < n; i++)
-		if(fmtprint(f, "%s\t%llud\t%llud\n", am[i].name, am[i].start, am[i].stop) < 0)
-			return 0;
-	return 1;
-}

+ 0 - 147
sys/src/cmd/venti/buildindex.c

@@ -1,147 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-static int
-writeBucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
-{
-	ISect *is;
-
-	is = findISect(ix, buck);
-	if(is == nil){
-		setErr(EAdmin, "bad math in writeBucket");
-		return 0;
-	}
-	if(buck < is->start || buck >= is->stop)
-		setErr(EAdmin, "index write out of bounds: %d not in [%d,%d)\n",
-				buck, is->start, is->stop);
-	buck -= is->start;
-	vtLock(stats.lock);
-	stats.indexWrites++;
-	vtUnlock(stats.lock);
-	packIBucket(ib, b->data);
-	return writePart(is->part, is->blockBase + ((u64int)buck << is->blockLog), b->data, is->blockSize);
-}
-
-static int
-buildIndex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
-{
-	IEStream *ies;
-	IBucket ib, zib;
-	ZBlock *z, *b;
-	u32int next, buck;
-	int ok;
-	u64int found = 0;
-
-//ZZZ make buffer size configurable
-	b = allocZBlock(ix->blockSize, 0);
-	z = allocZBlock(ix->blockSize, 1);
-	ies = initIEStream(part, off, clumps, 64*1024);
-	if(b == nil || z == nil || ies == nil){
-		ok = 0;
-		goto breakout;
-	}
-	ok = 1;
-	next = 0;
-	ib.data = b->data + IBucketSize;
-	zib.data = z->data + IBucketSize;
-	zib.n = 0;
-	zib.next = 0;
-	for(;;){
-		buck = buildBucket(ix, ies, &ib);
-		found += ib.n;
-		if(zero){
-			for(; next != buck; next++){
-				if(next == ix->buckets){
-					if(buck != TWID32){
-						fprint(2, "bucket out of range\n");
-						ok = 0;
-					}
-					goto breakout;
-				}
-				if(!writeBucket(ix, next, &zib, z)){
-					fprint(2, "can't write zero bucket to buck=%d: %R", next);
-					ok = 0;
-				}
-			}
-		}
-		if(buck >= ix->buckets){
-			if(buck == TWID32)
-				break;
-			fprint(2, "bucket out of range\n");
-			ok = 0;
-			goto breakout;
-		}
-		if(!writeBucket(ix, buck, &ib, b)){
-			fprint(2, "bad bucket found=%lld: %R\n", found);
-			ok = 0;
-		}
-		next = buck + 1;
-	}
-breakout:;
-	fprint(2, "constructed index with %lld entries\n", found);
-	freeIEStream(ies);
-	freeZBlock(z);
-	freeZBlock(b);
-	return ok;
-}
-
-void
-usage(void)
-{
-	fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	Part *part;
-	u64int clumps, base;
-	u32int bcmem;
-	int zero;
-
-	vtAttach();
-
-	zero = 1;
-	bcmem = 0;
-	ARGBEGIN{
-	case 'B':
-		bcmem = unittoull(ARGF());
-		break;
-	case 'Z':
-		zero = 0;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(argc != 2)
-		usage();
-
-	if(!initVenti(argv[0], nil))
-		fatal("can't init venti: %R");
-
-	if(bcmem < maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16))
-		bcmem = maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16);
-	fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
-	initDCache(bcmem);
-
-	fprint(2, "building a new index %s using %s for temporary storage\n", mainIndex->name, argv[1]);
-
-	part = initPart(argv[1], 1);
-	if(part == nil)
-		fatal("can't initialize temporary partition: %R");
-
-	clumps = sortRawIEntries(mainIndex, part, &base);
-	if(clumps == TWID64)
-		fatal("can't build sorted index: %R");
-	fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
-
-	if(!buildIndex(mainIndex, part, base, clumps, zero))
-		fatal("can't build new index: %R");
-	
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 122
sys/src/cmd/venti/checkarenas.c

@@ -1,122 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-static int	verbose;
-
-static void
-checkArena(Arena *arena, int scan, int fix)
-{
-	Arena old;
-	int err, e;
-
-	if(verbose && arena->clumps)
-		printArena(2, arena);
-
-	old = *arena;
-
-	if(scan){
-		arena->used = 0;
-		arena->clumps = 0;
-		arena->cclumps = 0;
-		arena->uncsize = 0;
-	}
-
-	err = 0;
-	for(;;){
-		e = syncArena(arena, 1000, 0, fix);
-		err |= e;
-		if(!(e & SyncHeader))
-			break;
-		if(verbose && arena->clumps)
-			fprint(2, ".");
-	}
-	if(verbose && arena->clumps)
-		fprint(2, "\n");
-
-	err &= ~SyncHeader;
-	if(arena->used != old.used
-	|| arena->clumps != old.clumps
-	|| arena->cclumps != old.cclumps
-	|| arena->uncsize != old.uncsize){
-		fprint(2, "incorrect arena header fields\n");
-		printArena(2, arena);
-		err |= SyncHeader;
-	}
-
-	if(!err || !fix)
-		return;
-
-	fprint(2, "writing fixed arena header fields\n");
-	if(!wbArena(arena))
-		fprint(2, "arena header write failed: %r\n");
-}
-
-void
-usage(void)
-{
-	fprint(2, "usage: checkarenas [-afv] file\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	ArenaPart *ap;
-	Part *part;
-	char *file;
-	int i, fix, scan;
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-	vtAttach();
-	statsInit();
-
-	fix = 0;
-	scan = 0;
-	ARGBEGIN{
-	case 'f':
-		fix++;
-		break;
-	case 'a':
-		scan = 1;
-		break;
-	case 'v':
-		verbose++;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(!fix)
-		readonly = 1;
-
-	if(argc != 1)
-		usage();
-
-	file = argv[0];
-
-	part = initPart(file, 0);
-	if(part == nil)
-		fatal("can't open partition %s: %r", file);
-
-	ap = initArenaPart(part);
-	if(ap == nil)
-		fatal("can't initialize arena partition in %s: %R", file);
-
-	if(verbose > 1){
-		printArenaPart(2, ap);
-		fprint(2, "\n");
-	}
-
-	initDCache(8 * MaxDiskBlock);
-
-	for(i = 0; i < ap->narenas; i++)
-		checkArena(ap->arenas[i], scan, fix);
-
-	if(verbose > 1)
-		printStats();
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 189
sys/src/cmd/venti/checkindex.c

@@ -1,189 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-static int
-checkBucket(Index *ix, u32int buck, IBucket *ib)
-{
-	ISect *is;
-	DBlock *eb;
-	IBucket eib;
-	IEntry ie, eie;
-	int i, ei, ok, c;
-
-	is = findISect(ix, buck);
-	if(is == nil){
-		setErr(EAdmin, "bad math in checkBuckets");
-		return 0;
-	}
-	buck -= is->start;
-	eb = getDBlock(is->part, is->blockBase + ((u64int)buck << is->blockLog), 1);
-	if(eb == nil)
-		return 0;
-	unpackIBucket(&eib, eb->data);
-
-	ok = 1;
-	ei = 0;
-	for(i = 0; i < ib->n; i++){
-		while(ei < eib.n){
-			c = ientryCmp(&ib->data[i * IEntrySize], &eib.data[ei * IEntrySize]);
-			if(c == 0){
-				unpackIEntry(&ie, &ib->data[i * IEntrySize]);
-				unpackIEntry(&eie, &eib.data[ei * IEntrySize]);
-				if(!iAddrEq(&ie.ia, &eie.ia)){
-					fprint(2, "bad entry in index for score=%V\n", &ib->data[i * IEntrySize]);
-					fprint(2, "\taddr=%lld type=%d size=%d blocks=%d\n",
-						ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
-					fprint(2, "\taddr=%lld type=%d size=%d blocks=%d\n",
-						eie.ia.addr, eie.ia.type, eie.ia.size, eie.ia.blocks);
-				}
-				ei++;
-				goto cont;
-			}
-			if(c < 0)
-				break;
-if(1)
-			fprint(2, "spurious entry in index for score=%V type=%d\n",
-				&eib.data[ei * IEntrySize], eib.data[ei * IEntrySize + IEntryTypeOff]);
-			ei++;
-			ok = 0;
-		}
-		fprint(2, "missing entry in index for score=%V type=%d\n",
-			&ib->data[i * IEntrySize], ib->data[i * IEntrySize + IEntryTypeOff]);
-		ok = 0;
-	cont:;
-	}
-	for(; ei < eib.n; ei++){
-if(1)		fprint(2, "spurious entry in index for score=%V; found %d entries expected %d\n",
-			&eib.data[ei * IEntrySize], eib.n, ib->n);
-		ok = 0;
-	}
-	putDBlock(eb);
-	return ok;
-}
-
-int
-checkIndex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
-{
-	IEStream *ies;
-	IBucket ib, zib;
-	ZBlock *z, *b;
-	u32int next, buck;
-	int ok, bok;
-u64int found = 0;
-
-//ZZZ make buffer size configurable
-	b = allocZBlock(ix->blockSize, 0);
-	z = allocZBlock(ix->blockSize, 1);
-	ies = initIEStream(part, off, clumps, 64*1024);
-	if(b == nil || z == nil || ies == nil){
-		ok = 0;
-		goto breakout;
-	}
-	ok = 1;
-	next = 0;
-	ib.data = b->data;
-	zib.data = z->data;
-	zib.n = 0;
-	zib.next = 0;
-	for(;;){
-		buck = buildBucket(ix, ies, &ib);
-		found += ib.n;
-		if(zero){
-			for(; next != buck; next++){
-				if(next == ix->buckets){
-					if(buck != TWID32)
-						fprint(2, "bucket out of range\n");
-					goto breakout;
-				}
-				bok = checkBucket(ix, next, &zib);
-				if(!bok){
-					fprint(2, "bad bucket=%d found: %R\n", next);
-					ok = 0;
-				}
-			}
-		}
-		if(buck >= ix->buckets){
-			if(buck == TWID32)
-				break;
-			fprint(2, "bucket out of range\n");
-			ok = 0;
-			goto breakout;
-		}
-		bok = checkBucket(ix, buck, &ib);
-		if(!bok){
-			fprint(2, "bad bucket found=%lld: %R\n", found);
-			ok = 0;
-		}
-		next = buck + 1;
-	}
-breakout:;
-fprint(2, "found %lld entries in sorted list\n", found);
-	freeIEStream(ies);
-	freeZBlock(z);
-	freeZBlock(b);
-	return ok;
-}
-
-void
-usage(void)
-{
-	fprint(2, "usage: checkindex [-f] [-B blockcachesize] config tmp\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	Part *part;
-	u64int clumps, base;
-	u32int bcmem;
-	int fix, skipz;
-
-	vtAttach();
-
-	fix = 0;
-	bcmem = 0;
-	skipz = 0;
-	ARGBEGIN{
-	case 'B':
-		bcmem = unittoull(ARGF());
-		break;
-	case 'f':
-		fix++;
-		break;
-	case 'Z':
-		skipz = 1;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(!fix)
-		readonly = 1;
-
-	if(argc != 2)
-		usage();
-
-	if(!initVenti(argv[0], nil))
-		fatal("can't init venti: %R");
-
-	if(bcmem < maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16))
-		bcmem = maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16);
-	fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
-	initDCache(bcmem);
-
-	part = initPart(argv[1], 1);
-	if(part == nil)
-		fatal("can't initialize temporary partition: %R");
-
-	clumps = sortRawIEntries(mainIndex, part, &base);
-	if(clumps == TWID64)
-		fatal("can't build sorted index: %R");
-	fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
-	checkIndex(mainIndex, part, base, clumps, !skipz);
-	
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 197
sys/src/cmd/venti/clump.c

@@ -1,197 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-#include "whack.h"
-
-/*
- * writes a lump to disk
- * returns the address in amap of the clump
- */
-int
-storeClump(Index *ix, ZBlock *zb, u8int *sc, int type, u32int creator, IAddr *ia)
-{
-	ZBlock *cb;
-	Clump cl;
-	u64int a;
-	u8int bh[VtScoreSize];
-	int size, dsize;
-
-	size = zb->len;
-	if(size > VtMaxLumpSize){
-		setErr(EStrange, "lump too large");
-		return 0;
-	}
-	if(!vtTypeValid(type)){
-		setErr(EStrange, "invalid lump type");
-		return 0;
-	}
-
-	if(1){
-		scoreMem(bh, zb->data, size);
-		if(!scoreEq(sc, bh)){
-			setErr(ECorrupt, "storing clump: corrupted; expected=%V got=%V, size=%d", sc, bh, size);
-			return 0;
-		}
-	}
-
-	cb = allocZBlock(size + ClumpSize, 0);
-	if(cb == nil)
-		return 0;
-
-	cl.info.type = type;
-	cl.info.uncsize = size;
-	cl.creator = creator;
-	cl.time = now();
-	scoreCp(cl.info.score, sc);
-
-	dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
-	if(dsize > 0 && dsize < size){
-		cl.encoding = ClumpECompress;
-	}else{
-		cl.encoding = ClumpENone;
-		dsize = size;
-		memmove(&cb->data[ClumpSize], zb->data, size);
-	}
-	cl.info.size = dsize;
-
-	a = writeIClump(ix, &cl, cb->data);
-
-	freeZBlock(cb);
-	if(a == 0)
-		return 0;
-
-	vtLock(stats.lock);
-	stats.clumpWrites++;
-	stats.clumpBWrites += size;
-	stats.clumpBComp += dsize;
-	vtUnlock(stats.lock);
-
-	ia->addr = a;
-	ia->type = type;
-	ia->size = size;
-	ia->blocks = (dsize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
-
-	return 1;
-}
-
-u32int
-clumpMagic(Arena *arena, u64int aa)
-{
-	u8int buf[U32Size];
-
-	if(!readArena(arena, aa, buf, U32Size))
-		return TWID32;
-	return unpackMagic(buf);
-}
-
-/*
- * fetch a block based at addr.
- * score is filled in with the block's score.
- * blocks is roughly the length of the clump on disk;
- * if zero, the length is unknown.
- */
-ZBlock*
-loadClump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify)
-{
-	Unwhack uw;
-	ZBlock *zb, *cb;
-	u8int bh[VtScoreSize], *buf;
-	u32int n;
-	int nunc;
-
-	vtLock(stats.lock);
-	stats.clumpReads++;
-	vtUnlock(stats.lock);
-
-	if(blocks <= 0)
-		blocks = 1;
-
-	cb = allocZBlock(blocks << ABlockLog, 0);
-	if(cb == nil)
-		return nil;
-	n = readArena(arena, aa, cb->data, blocks << ABlockLog);
-	if(n < ClumpSize){
-		if(n != 0)
-			setErr(ECorrupt, "loadClump read less than a header");
-		freeZBlock(cb);
-		return nil;
-	}
-	if(!unpackClump(cl, cb->data)){
-		freeZBlock(cb);
-		return nil;
-	}
-	n -= ClumpSize;
-	if(n < cl->info.size){
-		freeZBlock(cb);
-		n = cl->info.size;
-		cb = allocZBlock(n, 0);
-		if(cb == nil)
-			return nil;
-		if(readArena(arena, aa + ClumpSize, cb->data, n) != n){
-			setErr(ECorrupt, "loadClump read too little data");
-			freeZBlock(cb);
-			return nil;
-		}
-		buf = cb->data;
-	}else
-		buf = cb->data + ClumpSize;
-
-	scoreCp(score, cl->info.score);
-
-	zb = allocZBlock(cl->info.uncsize, 0);
-	if(zb == nil){
-		freeZBlock(cb);
-		return nil;
-	}
-	switch(cl->encoding){
-	case ClumpECompress:
-		unwhackinit(&uw);
-		nunc = unwhack(&uw, zb->data, cl->info.uncsize, buf, cl->info.size);
-		if(nunc != cl->info.uncsize){
-			if(nunc < 0)
-				setErr(ECorrupt, "decompression failed: %s", uw.err);
-			else
-				setErr(ECorrupt, "decompression gave partial block: %d/%d\n", nunc, cl->info.uncsize);
-			freeZBlock(cb);
-			freeZBlock(zb);
-			return nil;
-		}
-		break;
-	case ClumpENone:
-		if(cl->info.size != cl->info.uncsize){
-			setErr(ECorrupt, "loading clump: bad uncompressed size for uncompressed block");
-			freeZBlock(cb);
-			freeZBlock(zb);
-			return nil;
-		}
-		memmove(zb->data, buf, cl->info.uncsize);
-		break;
-	default:
-		setErr(ECorrupt, "unknown encoding in loadLump");
-		freeZBlock(cb);
-		freeZBlock(zb);
-		return nil;
-	}
-	freeZBlock(cb);
-
-	if(verify){
-		scoreMem(bh, zb->data, cl->info.uncsize);
-		if(!scoreEq(cl->info.score, bh)){
-			setErr(ECorrupt, "loading clump: corrupted; expected=%V got=%V", cl->info.score, bh);
-			freeZBlock(zb);
-			return nil;
-		}
-		if(!vtTypeValid(cl->info.type)){
-			setErr(ECorrupt, "loading lump: invalid lump type %d", cl->info.type);
-			freeZBlock(zb);
-			return nil;
-		}
-	}
-
-	vtLock(stats.lock);
-	stats.clumpBReads += cl->info.size;
-	stats.clumpBUncomp += cl->info.uncsize;
-	vtUnlock(stats.lock);
-
-	return zb;
-}

+ 0 - 471
sys/src/cmd/venti/conv.c

@@ -1,471 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-/*
- * disk structure conversion routines
- */
-#define	U8GET(p)	((p)[0])
-#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
-#define	U32GET(p)	(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
-#define	U64GET(p)	(((vlong)U32GET(p)<<32)|(vlong)U32GET((p)+4))
-
-#define	U8PUT(p,v)	(p)[0]=(v)
-#define	U16PUT(p,v)	(p)[0]=(v)>>8;(p)[1]=(v)
-#define	U32PUT(p,v)	(p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
-#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
-
-u32int
-unpackMagic(u8int *buf)
-{
-	return U32GET(buf);
-}
-
-void
-packMagic(u32int magic, u8int *buf)
-{
-	U32PUT(buf, magic);
-}
-
-int
-unpackArenaPart(ArenaPart *ap, u8int *buf)
-{
-	u8int *p;
-	u32int m;
-
-	p = buf;
-
-	m = U32GET(p);
-	if(m != ArenaPartMagic){
-		setErr(ECorrupt, "arena set has wrong magic number: %lux expected %lux", m, ArenaPartMagic);
-		return 0;
-	}
-	p += U32Size;
-	ap->version = U32GET(p);
-	p += U32Size;
-	ap->blockSize = U32GET(p);
-	p += U32Size;
-	ap->arenaBase = U32GET(p);
-	p += U32Size;
-
-	if(buf + ArenaPartSize != p)
-		fatal("unpackArenaPart unpacked wrong amount");
-
-	return 1;
-}
-
-int
-packArenaPart(ArenaPart *ap, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-
-	U32PUT(p, ArenaPartMagic);
-	p += U32Size;
-	U32PUT(p, ap->version);
-	p += U32Size;
-	U32PUT(p, ap->blockSize);
-	p += U32Size;
-	U32PUT(p, ap->arenaBase);
-	p += U32Size;
-
-	if(buf + ArenaPartSize != p)
-		fatal("packArenaPart packed wrong amount");
-
-	return 1;
-}
-
-int
-unpackArena(Arena *arena, u8int *buf)
-{
-	u8int *p;
-	u32int m;
-
-	p = buf;
-
-	m = U32GET(p);
-	if(m != ArenaMagic){
-		setErr(ECorrupt, "arena has wrong magic number: %lux expected %lux", m, ArenaMagic);
-		return 0;
-	}
-	p += U32Size;
-	arena->version = U32GET(p);
-	p += U32Size;
-	nameCp(arena->name, (char*)p);
-	p += ANameSize;
-	arena->clumps = U32GET(p);
-	p += U32Size;
-	arena->cclumps = U32GET(p);
-	p += U32Size;
-	arena->ctime = U32GET(p);
-	p += U32Size;
-	arena->wtime = U32GET(p);
-	p += U32Size;
-	arena->used = U64GET(p);
-	p += U64Size;
-	arena->uncsize = U64GET(p);
-	p += U64Size;
-	arena->sealed = U8GET(p);
-	p += U8Size;
-
-	if(buf + ArenaSize != p)
-		fatal("unpackArena unpacked wrong amount");
-
-	return 1;
-}
-
-int
-packArena(Arena *arena, u8int *buf)
-{
-	u8int *p;
-	u32int t32;
-
-	p = buf;
-
-	U32PUT(p, ArenaMagic);
-	p += U32Size;
-	U32PUT(p, arena->version);
-	p += U32Size;
-	nameCp((char*)p, arena->name);
-	p += ANameSize;
-	U32PUT(p, arena->clumps);
-	p += U32Size;
-	U32PUT(p, arena->cclumps);
-	p += U32Size;
-	U32PUT(p, arena->ctime);
-	p += U32Size;
-	U32PUT(p, arena->wtime);
-	p += U32Size;
-	U64PUT(p, arena->used, t32);
-	p += U64Size;
-	U64PUT(p, arena->uncsize, t32);
-	p += U64Size;
-	U8PUT(p, arena->sealed);
-	p += U8Size;
-
-	if(buf + ArenaSize != p)
-		fatal("packArena packed wrong amount");
-
-	return 1;
-}
-
-int
-unpackArenaHead(ArenaHead *head, u8int *buf)
-{
-	u8int *p;
-	u32int m;
-
-	p = buf;
-
-	m = U32GET(p);
-	if(m != ArenaHeadMagic){
-		setErr(ECorrupt, "arena has wrong magic number: %lux expected %lux", m, ArenaHeadMagic);
-		return 0;
-	}
-	p += U32Size;
-	head->version = U32GET(p);
-	p += U32Size;
-	nameCp(head->name, (char*)p);
-	p += ANameSize;
-	head->blockSize = U32GET(p);
-	p += U32Size;
-	head->size = U64GET(p);
-	p += U64Size;
-
-	if(buf + ArenaHeadSize != p)
-		fatal("unpackArenaHead unpacked wrong amount");
-
-	return 1;
-}
-
-int
-packArenaHead(ArenaHead *head, u8int *buf)
-{
-	u8int *p;
-	u32int t32;
-
-	p = buf;
-
-	U32PUT(p, ArenaHeadMagic);
-	p += U32Size;
-	U32PUT(p, head->version);
-	p += U32Size;
-	nameCp((char*)p, head->name);
-	p += ANameSize;
-	U32PUT(p, head->blockSize);
-	p += U32Size;
-	U64PUT(p, head->size, t32);
-	p += U64Size;
-
-	if(buf + ArenaHeadSize != p)
-		fatal("packArenaHead packed wrong amount");
-
-	return 1;
-}
-
-static int
-checkClump(Clump *w)
-{
-	if(w->encoding == ClumpENone){
-		if(w->info.size != w->info.uncsize){
-			setErr(ECorrupt, "uncompressed wad size mismatch");
-			return 0;
-		}
-	}else if(w->encoding == ClumpECompress){
-		if(w->info.size >= w->info.uncsize){
-			setErr(ECorrupt, "compressed lump has inconsistent block sizes %d %d", w->info.size, w->info.uncsize);
-			return 0;
-		}
-	}else{
-		setErr(ECorrupt, "clump has illegal encoding");
-		return 0;
-	}
-
-	return 1;
-}
-
-int
-unpackClump(Clump *c, u8int *buf)
-{
-	u8int *p;
-	u32int magic;
-
-	p = buf;
-	magic = U32GET(p);
-	if(magic != ClumpMagic){
-		setErr(ECorrupt, "clump has bad magic number=%#8.8ux", magic);
-		return 0;
-	}
-	p += U32Size;
-
-	c->info.type = U8GET(p);
-	p += U8Size;
-	c->info.size = U16GET(p);
-	p += U16Size;
-	c->info.uncsize = U16GET(p);
-	p += U16Size;
-	scoreCp(c->info.score, p);
-	p += VtScoreSize;
-
-	c->encoding = U8GET(p);
-	p += U8Size;
-	c->creator = U32GET(p);
-	p += U32Size;
-	c->time = U32GET(p);
-	p += U32Size;
-
-	if(buf + ClumpSize != p)
-		fatal("unpackClump unpacked wrong amount");
-
-	return checkClump(c);
-}
-
-int
-packClump(Clump *c, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-	U32PUT(p, ClumpMagic);
-	p += U32Size;
-
-	U8PUT(p, c->info.type);
-	p += U8Size;
-	U16PUT(p, c->info.size);
-	p += U16Size;
-	U16PUT(p, c->info.uncsize);
-	p += U16Size;
-	scoreCp(p, c->info.score);
-	p += VtScoreSize;
-
-	U8PUT(p, c->encoding);
-	p += U8Size;
-	U32PUT(p, c->creator);
-	p += U32Size;
-	U32PUT(p, c->time);
-	p += U32Size;
-
-	if(buf + ClumpSize != p)
-		fatal("packClump packed wrong amount");
-
-	return checkClump(c);
-}
-
-void
-unpackClumpInfo(ClumpInfo *ci, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-	ci->type = U8GET(p);
-	p += U8Size;
-	ci->size = U16GET(p);
-	p += U16Size;
-	ci->uncsize = U16GET(p);
-	p += U16Size;
-	scoreCp(ci->score, p);
-	p += VtScoreSize;
-
-	if(buf + ClumpInfoSize != p)
-		fatal("unpackClumpInfo unpacked wrong amount");
-}
-
-void
-packClumpInfo(ClumpInfo *ci, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-	U8PUT(p, ci->type);
-	p += U8Size;
-	U16PUT(p, ci->size);
-	p += U16Size;
-	U16PUT(p, ci->uncsize);
-	p += U16Size;
-	scoreCp(p, ci->score);
-	p += VtScoreSize;
-
-	if(buf + ClumpInfoSize != p)
-		fatal("packClumpInfo packed wrong amount");
-}
-
-int
-unpackISect(ISect *is, u8int *buf)
-{
-	u8int *p;
-	u32int m;
-
-	p = buf;
-
-
-	m = U32GET(p);
-	if(m != ISectMagic){
-		setErr(ECorrupt, "index section has wrong magic number: %lux expected %lux", m, ISectMagic);
-		return 0;
-	}
-	p += U32Size;
-	is->version = U32GET(p);
-	p += U32Size;
-	nameCp(is->name, (char*)p);
-	p += ANameSize;
-	nameCp(is->index, (char*)p);
-	p += ANameSize;
-	is->blockSize = U32GET(p);
-	p += U32Size;
-	is->blockBase = U32GET(p);
-	p += U32Size;
-	is->blocks = U32GET(p);
-	p += U32Size;
-	is->start = U32GET(p);
-	p += U32Size;
-	is->stop = U32GET(p);
-	p += U32Size;
-
-	if(buf + ISectSize != p)
-		fatal("unpackISect unpacked wrong amount");
-
-	return 1;
-}
-
-int
-packISect(ISect *is, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-
-	U32PUT(p, ISectMagic);
-	p += U32Size;
-	U32PUT(p, is->version);
-	p += U32Size;
-	nameCp((char*)p, is->name);
-	p += ANameSize;
-	nameCp((char*)p, is->index);
-	p += ANameSize;
-	U32PUT(p, is->blockSize);
-	p += U32Size;
-	U32PUT(p, is->blockBase);
-	p += U32Size;
-	U32PUT(p, is->blocks);
-	p += U32Size;
-	U32PUT(p, is->start);
-	p += U32Size;
-	U32PUT(p, is->stop);
-	p += U32Size;
-
-	if(buf + ISectSize != p)
-		fatal("packISect packed wrong amount");
-
-	return 1;
-}
-
-void
-unpackIEntry(IEntry *ie, u8int *buf)
-{
-	u8int *p;
-
-	p = buf;
-
-	scoreCp(ie->score, p);
-	p += VtScoreSize;
-	ie->wtime = U32GET(p);
-	p += U32Size;
-	ie->train = U16GET(p);
-	p += U16Size;
-	ie->ia.addr = U64GET(p);
-	p += U64Size;
-	ie->ia.size = U16GET(p);
-	p += U16Size;
-	if(p - buf != IEntryTypeOff)
-		fatal("unpackIEntry bad IEntryTypeOff amount");
-	ie->ia.type = U8GET(p);
-	p += U8Size;
-	ie->ia.blocks = U8GET(p);
-	p += U8Size;
-
-	if(p - buf != IEntrySize)
-		fatal("unpackIEntry unpacked wrong amount");
-}
-
-void
-packIEntry(IEntry *ie, u8int *buf)
-{
-	u32int t32;
-	u8int *p;
-
-	p = buf;
-
-	scoreCp(p, ie->score);
-	p += VtScoreSize;
-	U32PUT(p, ie->wtime);
-	p += U32Size;
-	U16PUT(p, ie->train);
-	p += U16Size;
-	U64PUT(p, ie->ia.addr, t32);
-	p += U64Size;
-	U16PUT(p, ie->ia.size);
-	p += U16Size;
-	U8PUT(p, ie->ia.type);
-	p += U8Size;
-	U8PUT(p, ie->ia.blocks);
-	p += U8Size;
-
-	if(p - buf != IEntrySize)
-		fatal("packIEntry packed wrong amount");
-}
-
-void
-unpackIBucket(IBucket *b, u8int *buf)
-{
-	b->n = U16GET(buf);
-	b->next = U32GET(&buf[U16Size]);
-	b->data = buf + IBucketSize;
-}
-
-void
-packIBucket(IBucket *b, u8int *buf)
-{
-	U16PUT(buf, b->n);
-	U32PUT(&buf[U16Size], b->next);
-}

+ 165 - 95
sys/src/cmd/venti/copy.c

@@ -1,107 +1,146 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
+/* venti copy.  this doesn't work very well; see ../oventi/copy.c instead */
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <avl.h>
+#include <bin.h>
 
-static int fast;
-static int quiet;
+int changes;
+int rewrite;
+int ignoreerrors;
+int fast;
+int verbose;
+int nskip;
+int nwrite;
 
-VtSession *zsrc, *zdst;
+VtConn *zsrc, *zdst;
+uchar zeroscore[VtScoreSize];	/* all zeros */
 
-void
-usage(void)
+typedef struct ScoreTree ScoreTree;
+struct ScoreTree
 {
-	fprint(2, "usage: copy [-fq] src-host dst-host score [type]\n");
-	exits("usage");
-}
+	Avl avl;
+	uchar score[VtScoreSize];
+	int type;
+};
 
-int
-parseScore(uchar *score, char *buf, int n)
+Avltree *scoretree;
+Bin *scorebin;
+
+static int
+scoretreecmp(Avl *va, Avl *vb)
 {
-	int i, c;
+	ScoreTree *a, *b;
+	int i;
+
+	a = (ScoreTree*)va;
+	b = (ScoreTree*)vb;
 
-	memset(score, 0, VtScoreSize);
+	i = memcmp(a->score, b->score, VtScoreSize);
+	if(i != 0)
+		return i;
+	return a->type - b->type;
+}
 
-	if(n < VtScoreSize*2)
+static int
+havevisited(uchar score[VtScoreSize], int type)
+{
+	ScoreTree a;
+	
+	if(scoretree == nil)
 		return 0;
-	for(i=0; i<VtScoreSize*2; i++) {
-		if(buf[i] >= '0' && buf[i] <= '9')
-			c = buf[i] - '0';
-		else if(buf[i] >= 'a' && buf[i] <= 'f')
-			c = buf[i] - 'a' + 10;
-		else if(buf[i] >= 'A' && buf[i] <= 'F')
-			c = buf[i] - 'A' + 10;
-		else {
-			return 0;
-		}
+	memmove(a.score, score, VtScoreSize);
+	a.type = type;
+	return lookupavl(scoretree, &a.avl) != nil;
+}
 
-		if((i & 1) == 0)
-			c <<= 4;
+static void
+markvisited(uchar score[VtScoreSize], int type)
+{
+	ScoreTree *a;
+	Avl *old;
 
-		score[i>>1] |= c;
-	}
-	return 1;
+	if(scoretree == nil)
+		return;
+	a = binalloc(&scorebin, sizeof *a, 1);
+	memmove(a->score, score, VtScoreSize);
+	a->type = type;
+	insertavl(scoretree, &a->avl, &old);
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: copy [-fir] [-t type] srchost dsthost score\n");
+	exits("usage");
 }
 
 void
 walk(uchar score[VtScoreSize], uint type, int base)
 {
-	int i, n, sub;
+	int i, n;
 	uchar *buf;
+	uchar nscore[VtScoreSize];
 	VtEntry e;
 	VtRoot root;
 
-	if(memcmp(score, vtZeroScore, VtScoreSize) == 0)
+	if(memcmp(score, vtzeroscore, VtScoreSize) == 0 || memcmp(score, zeroscore, VtScoreSize) == 0)
+		return;
+	
+	if(havevisited(score, type)){
+		nskip++;
 		return;
+	}
 
-	buf = vtMemAllocZ(VtMaxLumpSize);
-	if(fast && vtRead(zdst, score, type, buf, VtMaxLumpSize) >= 0){
-		if(!quiet)
-			fprint(2, "%V already exists on dst server; skipping.\n", score);
+	buf = vtmallocz(VtMaxLumpSize);
+	if(fast && vtread(zdst, score, type, buf, VtMaxLumpSize) >= 0){
+		if(verbose)
+			fprint(2, "skip %V\n", score);
 		free(buf);
 		return;
 	}
 
-	n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
-	/*
-	 * we usually see this at the end of a venti/copy of a vac tree:
-	 * warning: could not read block \
-	 * 0000000000000000000000000000000000000000 1: \
-	 * no block with that score exists
-	 * maybe it's harmless.
-	 */
+	n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
 	if(n < 0){
-		fprint(2, "warning: could not read block %V %d: %R\n",
-			score, type);
+		if(rewrite){
+			changes++;
+			memmove(score, vtzeroscore, VtScoreSize);
+		}else if(!ignoreerrors)
+			sysfatal("reading block %V (type %d): %r", score, type);
 		return;
 	}
 
 	switch(type){
 	case VtRootType:
-		if(!vtRootUnpack(&root, buf)){
+		if(vtrootunpack(&root, buf) < 0){
 			fprint(2, "warning: could not unpack root in %V %d\n", score, type);
 			break;
 		}
-		walk(root.score, VtDirType, 0);
 		walk(root.prev, VtRootType, 0);
+		walk(root.score, VtDirType, 0);
+		if(rewrite)
+			vtrootpack(&root, buf);	/* walk might have changed score */
 		break;
 
 	case VtDirType:
 		for(i=0; i<n/VtEntrySize; i++){
-			if(!vtEntryUnpack(&e, buf, i)){
+			if(vtentryunpack(&e, buf, i) < 0){
 				fprint(2, "warning: could not unpack entry #%d in %V %d\n", i, score, type);
 				continue;
 			}
 			if(!(e.flags & VtEntryActive))
 				continue;
-			if(e.flags&VtEntryDir)
-				base = VtDirType;
-			else
-				base = VtDataType;
-			if(e.depth == 0)
-				sub = base;
-			else
-				sub = VtPointerType0+e.depth-1;
-			walk(e.score, sub, base);
+			walk(e.score, e.type, e.type&VtTypeBaseMask);
+			/*
+			 * Don't repack unless we're rewriting -- some old 
+			 * vac files have psize==0 and dsize==0, and these
+			 * get rewritten by vtentryunpack to have less strange
+			 * block sizes.  So vtentryunpack; vtentrypack does not
+			 * guarantee to preserve the exact bytes in buf.
+			 */
+			if(rewrite)
+				vtentrypack(&e, buf, i);
 		}
 		break;
 
@@ -109,18 +148,27 @@ walk(uchar score[VtScoreSize], uint type, int base)
 		break;
 
 	default:	/* pointers */
-		if(type == VtPointerType0)
-			sub = base;
-		else
-			sub = type-1;
 		for(i=0; i<n; i+=VtScoreSize)
-			if(memcmp(buf+i, vtZeroScore, VtScoreSize) != 0)
-				walk(buf+i, sub, base);
+			if(memcmp(buf+i, vtzeroscore, VtScoreSize) != 0)
+				walk(buf+i, type-1, base);
 		break;
 	}
 
-	if(!vtWrite(zdst, score, type, buf, n))
-		fprint(2, "warning: could not write block %V %d: %R\n", score, type);
+	nwrite++;
+	if(vtwrite(zdst, nscore, type, buf, n) < 0){
+		/* figure out score for better error message */
+		/* can't use input argument - might have changed contents */
+		n = vtzerotruncate(type, buf, n);
+		sha1(buf, n, score, nil);
+		sysfatal("writing block %V (type %d): %r", score, type);
+	}
+	if(!rewrite && memcmp(score, nscore, VtScoreSize) != 0){
+		fprint(2, "not rewriting: wrote %V got %V\n", score, nscore);
+		abort();
+		sysfatal("not rewriting: wrote %V got %V", score, nscore);
+	}
+	
+	markvisited(score, type);
 	free(buf);
 }
 
@@ -130,64 +178,86 @@ main(int argc, char *argv[])
 	int type, n;
 	uchar score[VtScoreSize];
 	uchar *buf;
+	char *prefix;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
 
+	type = -1;
 	ARGBEGIN{
+	case 'V':
+		chattyventi++;
+		break;
 	case 'f':
 		fast = 1;
 		break;
-	case 'q':
-		quiet = 1;
+	case 'i':
+		if(rewrite)
+			usage();
+		ignoreerrors = 1;
+		break;
+	case 'm':
+		scoretree = mkavltree(scoretreecmp);
+		break;
+	case 'r':
+		if(ignoreerrors)
+			usage();
+		rewrite = 1;
+		break;
+	case 't':
+		type = atoi(EARGF(usage()));
+		break;
+	case 'v':
+		verbose = 1;
 		break;
 	default:
 		usage();
 		break;
 	}ARGEND
 
-	if(argc != 3 && argc != 4)
+	if(argc != 3)
 		usage();
 
-	vtAttach();
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
+	if(vtparsescore(argv[2], &prefix, score) < 0)
+		sysfatal("could not parse score: %r");
 
-	if(!parseScore(score, argv[2], strlen(argv[2])))
-		vtFatal("could not parse score: %s", vtGetError());
+	buf = vtmallocz(VtMaxLumpSize);
 
-	buf = vtMemAllocZ(VtMaxLumpSize);
-
-	zsrc = vtDial(argv[0], 0);
+	zsrc = vtdial(argv[0]);
 	if(zsrc == nil)
-		vtFatal("could not dial src server: %R");
-	if(!vtConnect(zsrc, 0))
-		sysfatal("vtConnect src: %r");
+		sysfatal("could not dial src server: %r");
+	if(vtconnect(zsrc) < 0)
+		sysfatal("vtconnect src: %r");
 
-	zdst = vtDial(argv[1], 0);
+	zdst = vtdial(argv[1]);
 	if(zdst == nil)
-		vtFatal("could not dial dst server: %R");
-	if(!vtConnect(zdst, 0))
-		sysfatal("vtConnect dst: %r");
+		sysfatal("could not dial dst server: %r");
+	if(vtconnect(zdst) < 0)
+		sysfatal("vtconnect dst: %r");
 
-	if(argc == 4){
-		type = atoi(argv[3]);
-		n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
+	if(type != -1){
+		n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
 		if(n < 0)
-			vtFatal("could not read block: %R");
+			sysfatal("could not read block: %r");
 	}else{
 		for(type=0; type<VtMaxType; type++){
-			n = vtRead(zsrc, score, type, buf, VtMaxLumpSize);
+			n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
 			if(n >= 0)
 				break;
 		}
 		if(type == VtMaxType)
-			vtFatal("could not find block %V of any type", score);
+			sysfatal("could not find block %V of any type", score);
 	}
 
 	walk(score, type, VtDirType);
+	if(changes)
+		print("%s:%V (%d pointers rewritten)\n", prefix, score, changes);
+
+	if(verbose)
+		print("%d skipped, %d written\n", nskip, nwrite);
 
-	if(!vtSync(zdst))
-		vtFatal("could not sync dst server: %R");
+	if(vtsync(zdst) < 0)
+		sysfatal("could not sync dst server: %r");
 
-	vtDetach();
 	exits(0);
 }

+ 0 - 373
sys/src/cmd/venti/dcache.c

@@ -1,373 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct DCache	DCache;
-
-enum
-{
-	HashLog		= 9,
-	HashSize	= 1<<HashLog,
-	HashMask	= HashSize - 1,
-};
-
-struct DCache
-{
-	VtLock		*lock;
-	VtRendez	*full;
-	DBlock		*free;			/* list of available lumps */
-	u32int		now;			/* ticks for usage timestamps */
-	int		size;			/* max. size of any block; allocated to each block */
-	DBlock		**heads;		/* hash table for finding address */
-	int		nheap;			/* number of available victims */
-	DBlock		**heap;			/* heap for locating victims */
-	int		nblocks;		/* number of blocks allocated */
-	DBlock		*blocks;		/* array of block descriptors */
-	u8int		*mem;			/* memory for all block descriptors */
-};
-
-static DCache	dCache;
-
-static int	downHeap(int i, DBlock *b);
-static int	upHeap(int i, DBlock *b);
-static DBlock	*bumpDBlock(void);
-static void	delHeap(DBlock *db);
-static void	fixHeap(int i, DBlock *b);
-
-void
-initDCache(u32int mem)
-{
-	DBlock *b, *last;
-	u32int nblocks, blockSize;
-	int i;
-
-	if(mem < maxBlockSize * 2)
-		fatal("need at least %d bytes for the disk cache", maxBlockSize * 2);
-	if(maxBlockSize == 0)
-		fatal("no max. block size given for disk cache");
-	blockSize = maxBlockSize;
-	nblocks = mem / blockSize;
-	if(0)
-		fprint(2, "initialize disk cache with %d blocks of %d bytes\n", nblocks, blockSize);
-	dCache.lock = vtLockAlloc();
-	dCache.full = vtRendezAlloc(dCache.lock);
-	dCache.nblocks = nblocks;
-	dCache.size = blockSize;
-	dCache.heads = MKNZ(DBlock*, HashSize);
-	dCache.heap = MKNZ(DBlock*, nblocks);
-	dCache.blocks = MKNZ(DBlock, nblocks);
-	dCache.mem = MKNZ(u8int, nblocks * blockSize);
-
-	last = nil;
-	for(i = 0; i < nblocks; i++){
-		b = &dCache.blocks[i];
-		b->data = &dCache.mem[i * blockSize];
-		b->heap = TWID32;
-		b->lock = vtLockAlloc();
-		b->next = last;
-		last = b;
-	}
-	dCache.free = last;
-	dCache.nheap = 0;
-}
-
-static u32int
-pbHash(u64int addr)
-{
-	u32int h;
-
-#define hashit(c)	((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
-	h = (addr >> 32) ^ addr;
-	return hashit(h);
-}
-
-DBlock*
-getDBlock(Part *part, u64int addr, int read)
-{
-	DBlock *b;
-	u32int h, size;
-
-	size = part->blockSize;
-	if(size > dCache.size){
-		setErr(EAdmin, "block size %d too big for cache", size);
-		return nil;
-	}
-	h = pbHash(addr);
-
-	/*
-	 * look for the block in the cache
-	 */
-//checkDCache();
-	vtLock(dCache.lock);
-again:
-	for(b = dCache.heads[h]; b != nil; b = b->next){
-		if(b->part == part && b->addr == addr){
-			vtLock(stats.lock);
-			stats.pcHit++;
-			vtUnlock(stats.lock);
-			goto found;
-		}
-	}
-	vtLock(stats.lock);
-	stats.pcMiss++;
-	vtUnlock(stats.lock);
-
-	/*
-	 * missed: locate the block with the oldest second to last use.
-	 * remove it from the heap, and fix up the heap.
-	 */
-	b = bumpDBlock();
-	if(b == nil){
-		logErr(EAdmin, "all disk cache blocks in use");
-		vtSleep(dCache.full);
-		goto again;
-	}
-
-	/*
-	 * the new block has no last use, so assume it happens sometime in the middle
-ZZZ this is not reasonable
-	 */
-	b->used = (b->used2 + dCache.now) / 2;
-
-	/*
-	 * rechain the block on the correct hash chain
-	 */
-	b->next = dCache.heads[h];
-	dCache.heads[h] = b;
-	if(b->next != nil)
-		b->next->prev = b;
-	b->prev = nil;
-
-	b->addr = addr;
-	b->part = part;
-	b->size = 0;
-
-found:
-	b->ref++;
-	b->used2 = b->used;
-	b->used = dCache.now++;
-	if(b->heap != TWID32)
-		fixHeap(b->heap, b);
-
-	vtUnlock(dCache.lock);
-//checkDCache();
-
-	vtLock(b->lock);
-	if(b->size != size){
-		if(b->size < size){
-			if(!read)
-				memset(&b->data[b->size], 0, size - b->size);
-			else if(readPart(part, addr + b->size, &b->data[b->size], size - b->size)){
-				vtLock(stats.lock);
-				stats.pcReads++;
-				stats.pcBReads += size - b->size;
-				vtUnlock(stats.lock);
-			}else{
-				putDBlock(b);
-				return nil;
-			}
-		}
-		b->size = size;
-	}
-
-	return b;
-}
-
-void
-putDBlock(DBlock *b)
-{
-	if(b == nil)
-		return;
-
-	vtUnlock(b->lock);
-//checkDCache();
-	vtLock(dCache.lock);
-	if(--b->ref == 0){
-		if(b->heap == TWID32)
-			upHeap(dCache.nheap++, b);
-		vtWakeup(dCache.full);
-	}
-
-	vtUnlock(dCache.lock);
-//checkDCache();
-}
-
-/*
- * remove some block from use and update the free list and counters
- */
-static DBlock*
-bumpDBlock(void)
-{
-	DBlock *b;
-	ulong h;
-
-	b = dCache.free;
-	if(b != nil){
-		dCache.free = b->next;
-		return b;
-	}
-
-	/*
-	 * remove blocks until we find one that is unused
-	 * referenced blocks are left in the heap even though
-	 * they can't be scavenged; this is simple a speed optimization
-	 */
-	for(;;){
-		if(dCache.nheap == 0)
-			return nil;
-		b = dCache.heap[0];
-		delHeap(b);
-		if(!b->ref)
-			break;
-	}
-
-	/*
-	 * unchain the block
-	 */
-	if(b->prev == nil){
-		h = pbHash(b->addr);
-		if(dCache.heads[h] != b)
-			fatal("bad hash chains in disk cache");
-		dCache.heads[h] = b->next;
-	}else
-		b->prev->next = b->next;
-	if(b->next != nil)
-		b->next->prev = b->prev;
-
-	return b;
-}
-
-/*
- * delete an arbitrary block from the heap
- */
-static void
-delHeap(DBlock *db)
-{
-	fixHeap(db->heap, dCache.heap[--dCache.nheap]);
-	db->heap = TWID32;
-}
-
-/*
- * push an element up or down to it's correct new location
- */
-static void
-fixHeap(int i, DBlock *b)
-{
-	if(upHeap(i, b) == i)
-		downHeap(i, b);
-}
-
-static int
-upHeap(int i, DBlock *b)
-{
-	DBlock *bb;
-	u32int now;
-	int p;
-
-	now = dCache.now;
-	for(; i != 0; i = p){
-		p = (i - 1) >> 1;
-		bb = dCache.heap[p];
-		if(b->used2 - now >= bb->used2 - now)
-			break;
-		dCache.heap[i] = bb;
-		bb->heap = i;
-	}
-
-	dCache.heap[i] = b;
-	b->heap = i;
-	return i;
-}
-
-static int
-downHeap(int i, DBlock *b)
-{
-	DBlock *bb;
-	u32int now;
-	int k;
-
-	now = dCache.now;
-	for(; ; i = k){
-		k = (i << 1) + 1;
-		if(k >= dCache.nheap)
-			break;
-		if(k + 1 < dCache.nheap && dCache.heap[k]->used2 - now > dCache.heap[k + 1]->used2 - now)
-			k++;
-		bb = dCache.heap[k];
-		if(b->used2 - now <= bb->used2 - now)
-			break;
-		dCache.heap[i] = bb;
-		bb->heap = i;
-	}
-
-	dCache.heap[i] = b;
-	b->heap = i;
-	return i;
-}
-
-static void
-findBlock(DBlock *bb)
-{
-	DBlock *b, *last;
-	int h;
-
-	last = nil;
-	h = pbHash(bb->addr);
-	for(b = dCache.heads[h]; b != nil; b = b->next){
-		if(last != b->prev)
-			fatal("bad prev link");
-		if(b == bb)
-			return;
-		last = b;
-	}
-	fatal("block missing from hash table");
-}
-
-void
-checkDCache(void)
-{
-	DBlock *b;
-	u32int size, now;
-	int i, k, refed, nfree;
-
-	vtLock(dCache.lock);
-	size = dCache.size;
-	now = dCache.now;
-	for(i = 0; i < dCache.nheap; i++){
-		if(dCache.heap[i]->heap != i)
-			fatal("dc: mis-heaped at %d: %d", i, dCache.heap[i]->heap);
-		if(i > 0 && dCache.heap[(i - 1) >> 1]->used2 - now > dCache.heap[i]->used2 - now)
-			fatal("dc: bad heap ordering");
-		k = (i << 1) + 1;
-		if(k < dCache.nheap && dCache.heap[i]->used2 - now > dCache.heap[k]->used2 - now)
-			fatal("dc: bad heap ordering");
-		k++;
-		if(k < dCache.nheap && dCache.heap[i]->used2 - now > dCache.heap[k]->used2 - now)
-			fatal("dc: bad heap ordering");
-	}
-
-	refed = 0;
-	for(i = 0; i < dCache.nblocks; i++){
-		b = &dCache.blocks[i];
-		if(b->data != &dCache.mem[i * size])
-			fatal("dc: mis-blocked at %d", i);
-		if(b->ref && b->heap == TWID32)
-			refed++;
-		if(b->addr)
-			findBlock(b);
-		if(b->heap != TWID32
-		&& dCache.heap[b->heap] != b)
-			fatal("dc: spurious heap value");
-	}
-
-	nfree = 0;
-	for(b = dCache.free; b != nil; b = b->next){
-		if(b->addr != 0 || b->heap != TWID32)
-			fatal("dc: bad free list");
-		nfree++;
-	}
-
-	if(dCache.nheap + nfree + refed != dCache.nblocks)
-		fatal("dc: missing blocks: %d %d %d", dCache.nheap, refed, dCache.nblocks);
-	vtUnlock(dCache.lock);
-}

+ 79 - 0
sys/src/cmd/venti/devnull.c

@@ -0,0 +1,79 @@
+/* Copyright (c) 2004 Russ Cox */
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
+#include <libsec.h>
+
+#ifndef _UNISTD_H_
+#pragma varargck type "F" VtFcall*
+#pragma varargck type "T" void
+#endif
+
+int verbose;
+
+enum
+{
+	STACK = 8192
+};
+
+void
+usage(void)
+{
+	fprint(2, "usage: venti/devnull [-v] [-a address]\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	VtReq *r;
+	VtSrv *srv;
+	char *address;
+
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
+	
+	address = "tcp!*!venti";
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	case 'a':
+		address = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	srv = vtlisten(address);
+	if(srv == nil)
+		sysfatal("vtlisten %s: %r", address);
+
+	while((r = vtgetreq(srv)) != nil){
+		r->rx.msgtype = r->tx.msgtype+1;
+		if(verbose)
+			fprint(2, "<- %F\n", &r->tx);
+		switch(r->tx.msgtype){
+		case VtTping:
+			break;
+		case VtTgoodbye:
+			break;
+		case VtTread:
+			r->rx.error = vtstrdup("no such block");
+			r->rx.msgtype = VtRerror;
+			break;
+		case VtTwrite:
+			packetsha1(r->tx.data, r->rx.score);
+			break;
+		case VtTsync:
+			break;
+		}
+		if(verbose)
+			fprint(2, "-> %F\n", &r->rx);
+		vtrespond(r);
+	}
+	threadexitsall(nil);
+}
+

+ 0 - 365
sys/src/cmd/venti/dumparena.c

@@ -1,365 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-#include <disk.h>
-
-typedef struct Label Label;
-
-struct Label {
-	ulong ver;
-	ulong trailer;
-	ulong tape;
-	ulong slot;
-};
-
-static void usage(void);
-static void verifyArena(void);
-static int writeBlock(uchar *buf);
-static int writeLabel(Label*);
-static int readBlock(uchar *buf);
-static int readLabel(Label*);
-static int writeMark(void);
-static int rewind(int);
-static int space(int);
-
-int tape;
-int slot;
-int pos;
-Scsi *dev;
-
-enum {
-	LabelMagic = 0x4b5474d2,
-	BlockSize = 16*1024,
-	Version = 1,
-};
-
-void
-main(int argc, char *argv[])
-{
-	char *p;
-	int overwrite = 0;
-
-	fmtregister('V', vtScoreFmt);
-	fmtregister('R', vtErrFmt);
-	vtAttach();
-	statsInit();
-
-	ARGBEGIN{
-	default:
-		usage();
-		break;
-	case 's':
-		p = ARGF();
-		if(p == nil)
-			usage();
-		slot = atoi(p);
-		break;
-	case 't':
-		p = ARGF();
-		if(p == nil)
-			usage();
-		tape = atoi(p);
-		break;
-	case 'o':
-		overwrite++;
-		break;
-	case '
-	}ARGEND
-
-	readonly = 1;
-
-	if(argc != 1)
-		usage();
-
-       	dev = openscsi(argv[0]);
-	if(dev == nil)
-		fatal("could not open scsi device: %r");
-
-	if(!initPos() && !rewind())
-		fatal("could not rewind: %r");
-
-fprint(2, "pos = %d\n", pos);
-
-	if(pos != 0) {
-		if(!movetoSlot(slot)) {
-			if(!rewind())
-				fatal("could not rewind: %r");
-			pos = 0;
-		}
-	}
-
-	if(pos != slot && !movetoSlot(slot))
-		fatal("could not seek to slot: %r");
-
-	if(!overwrite) {
-		if(readLabel(&lb))
-			fatal("tape not empty: tape=%d", lb->tape);
-	}
-
-	memset(&lb, 0, sizeof(lb));
-	lb.ver = Version;
-	lb.tape = tape;
-	lb.slot = slot;
-	
-	if(!writeLabel(&lb))
-		fatal("could not write header: %r");
-	
-	if(!writeArena(score))
-		fatal("could not write arena: %r");
-	
-	lb.trailer = 1;
-	if(!writeLabel(&lb))
-		fatal("could not write header: %r");
-	if(!writeArena(score))
-		fatal("could not write arena: %r");
-	
-
-	exits(0);
-}
-
-static void
-usage(void)
-{
-	fprint(2, "usage: dumparena [-o] [-t tape] [-s slot] device\n");
-	exits("usage");
-}
-
-static int
-initPos(void)
-{
-	Label lb;
-	int i;
-
-	for(i=0; i<4; i++) {
-		slot(-2);
-		if(space(1))
-		if(readLabel(&lb))
-		if(lb->tape == tape && lb->trailer) {
-			pos = lb->slot;
-			return 1;
-		}
-	}
-	return 0;
-}		
-
-static void
-readBlock(uchar *buf, int n)
-{
-	int nr, m;
-
-	for(nr = 0; nr < n; nr += m){
-		m = n - nr;
-		m = read(0, &buf[nr], m);
-		if(m <= 0)
-			fatal("can't read arena from standard input: %r");
-	}
-}
-
-static void
-verifyArena(void)
-{
-	Arena arena;
-	ArenaHead head;
-	ZBlock *b;
-	VtSha1 *s;
-	u64int n, e;
-	u32int bs;
-	u8int score[VtScoreSize];
-
-	memset(&arena, 0, sizeof arena);
-
-	fprint(2, "verify arena from standard input\n");
-	s = vtSha1Alloc();
-	if(s == nil)
-		fatal("can't initialize sha1 state");
-	vtSha1Init(s);
-
-	/*
-	 * read the little bit, which will included the header
-	 */
-	bs = MaxIoSize;
-	b = allocZBlock(bs, 0);
-	readBlock(b->data, HeadSize);
-	vtSha1Update(s, b->data, HeadSize);
-	if(!unpackArenaHead(&head, b->data))
-		fatal("corrupted arena header: %R");
-	if(head.version != ArenaVersion)
-		fatal("unknown arena version %d", head.version);
-
-	/*
-	 * now we know how much to read
-	 * read everything but the last block, which is special
-	 */
-	e = head.size - head.blockSize;
-	for(n = HeadSize; n < e; n += bs){
-		if(n + bs > e)
-			bs = e - n;
-		readBlock(b->data, bs);
-		vtSha1Update(s, b->data, bs);
-	}
-
-	/*
-	 * read the last block update the sum.
-	 * the sum is calculated assuming the slot for the sum is zero.
-	 */
-	bs = head.blockSize;
-	readBlock(b->data, bs);
-	vtSha1Update(s, b->data, bs - VtScoreSize);
-	vtSha1Update(s, zeroScore, VtScoreSize);
-	vtSha1Final(s, score);
-	vtSha1Free(s);
-
-	/*
-	 * validity check on the trailer
-	 */
-	arena.blockSize = head.blockSize;
-	if(!unpackArena(&arena, b->data))
-		fatal("corrupted arena trailer: %R");
-	scoreCp(arena.score, &b->data[arena.blockSize - VtScoreSize]);
-
-	if(!nameEq(arena.name, head.name))
-		fatal("arena header and trailer names clash: %s vs. %s\n", head.name, arena.name);
-	if(arena.version != head.version)
-		fatal("arena header and trailer versions clash: %d vs. %d\n", head.version, arena.version);
-	arena.size = head.size - 2 * head.blockSize;
-
-	/*
-	 * check for no checksum or the same
-	 */
-	if(!scoreEq(score, arena.score)){
-		if(!scoreEq(zeroScore, arena.score))
-			fprint(2, "warning: mismatched checksums for arena=%s, found=%V calculated=%V",
-				arena.name, arena.score, score);
-		scoreCp(arena.score, score);
-	}else
-		fprint(2, "matched score\n");
-
-	printArena(2, &arena);
-}
-
-static int
-writeBlock(uchar *buf)
-{
-	uchar cmd[6];
-
-	memset(cmd, 0, sizeof(cmd));
-	cmd[0] = 0x0a;
-	cmd[2] = BlockSize>>16;
-	cmd[3] = BlockSize>>8;
-	cmd[4] = BlockSize;
-
-	if(scsi(dev, cmd, 6, buf, BlockSize, Swrite) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-readBlock(uchar *buf)
-{
-	uchar cmd[6];
-
-	memset(cmd, 0, sizeof(cmd));
-	cmd[0] = 0x08;
-	cmd[2] = BlockSize>>16;
-	cmd[3] = BlockSize>>8;
-	cmd[4] = BlockSize;
-
-	if(scsi(dev, cmd, 6, buf, BlockSize, Sread) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-writeMark(void)
-{
-	uchar cmd[6];
-
-	memset(cmd, 0, sizeof(cmd));
-	cmd[0] = 0x10;
-
-	if(scsi(dev, cmd, 6, buf, BlockSize, Sread) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-rewind(int n)
-{
-	uchar cmd[6];
-
-	if(n > 225)
-		n = 255;
-
-	memset(cmd, 0, sizeof(cmd));
-	cmd[0] = 0x01;
-	cmd[4] = n;
-
-	if(scsi(dev, cmd, 6, buf, BlockSize, Sread) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-space(int off)
-{
-	uchar cmd[6];
-
-	memset(cmd, 0, sizeof(cmd));
-	cmd[0] = 0x11;
-	cmd[1] = 0x01;	/* file marks */
-	cmd[2] = off>>16;
-	cmd[3] = off>>8;
-	cmd[4] = off;
-	
-	if(scsi(dev, cmd, 6, buf, BlockSize, Sread) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-writeLabel(Label *lb)
-{
-	uchar block[BlockSize];
-	
-	if(lb->ver != Version) {
-		vtSerError("unknown header version");
-		return 0;
-	}
-
-	memset(block, 0, blockSize);
-	vtPutUint32(block+0, Magic);
-	vtPutUint32(block+4, lb->ver);
-	vtPutUint32(block+8, lb->trailer);
-	vtPutUint32(block+12, lb->tape);
-	vtPutUint32(block+16, lb->slot);
-
-	if(!writeBlock(block))
-		return 0;
-	if(!writeMark(1))
-		return 0;
-	return 1;
-}
-
-static int
-readLabel(Label *lb)
-{
-	uchar block[BlockSize];
-
-	if(!readBlock(block))
-		return 0;
-	if(vtGetUint32(block+0) != Magic) {
-		vtSetError("bad magic in header");
-		return 0;
-	}
-	lb->ver = vtGetUint32(block+4);
-	if(lb->ver != Version) {
-		vtSerError("unknown header version");
-		return 0;
-	}
-	lb->trailer = vtPutUint32(block+8);
-	lb->tape = vtGetUint32(block+12);
-	lb->slot = vtGetUint32(block+16);
-
-	return 1;
-}
-

+ 0 - 137
sys/src/cmd/venti/findscore.c

@@ -1,137 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-enum
-{
-	ClumpChunks	= 32*1024
-};
-
-static int	verbose;
-
-static int	writeClumpHead(Arena *arena, u64int aa, Clump *cl);
-static int	writeClumpMagic(Arena *arena, u64int aa, u32int magic);
-
-int
-clumpInfoEq(ClumpInfo *c, ClumpInfo *d)
-{
-	return c->type == d->type
-		&& c->size == d->size
-		&& c->uncsize == d->uncsize
-		&& scoreEq(c->score, d->score);
-}
-
-/*
- * synchronize the clump info directory with
- * with the clumps actually stored in the arena.
- * the directory should be at least as up to date
- * as the arena's trailer.
- *
- * checks/updates at most n clumps.
- *
- * returns 1 if ok, -1 if an error occured, 0 if blocks were updated
- */
-int
-findscore(Arena *arena, uchar *score)
-{
-	IEntry ie;
-	ClumpInfo *ci, *cis;
-	u64int a;
-	u32int clump;
-	int i, n, found;
-
-	if(arena->clumps && verbose)
-		fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->clumps);
-
-	cis = MKN(ClumpInfo, ClumpChunks);
-	found = 0;
-	a = 0;
-	memset(&ie, 0, sizeof(IEntry));
-	for(clump = 0; clump < arena->clumps; clump += n){
-		n = ClumpChunks;
-		if(n > arena->clumps - clump)
-			n = arena->clumps - clump;
-		if(readClumpInfos(arena, clump, cis, n) != n){
-			setErr(EOk, "arena directory read failed: %R");
-			break;
-		}
-
-		for(i = 0; i < n; i++){
-			ci = &cis[i];
-			if(scoreEq(score, ci->score)){
-				fprint(2, "found at clump=%d with type=%d size=%d csize=%d position=%lld\n",
-					clump + i, ci->type, ci->uncsize, ci->size, a);
-				found++;
-			}
-			a += ci->size + ClumpSize;
-		}
-	}
-	free(cis);
-	return found;
-}
-
-void
-usage(void)
-{
-	fprint(2, "usage: findscore [-v] arenafile score\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	ArenaPart *ap;
-	Part *part;
-	char *file;
-	u8int score[VtScoreSize];
-	int i, found;
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-	vtAttach();
-	statsInit();
-
-	ARGBEGIN{
-	case 'v':
-		verbose++;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	readonly = 1;
-
-	if(argc != 2)
-		usage();
-
-	file = argv[0];
-	if(!strScore(argv[1], score))
-		fatal("bad score %s\n", argv[1]);
-
-	part = initPart(file, 0);
-	if(part == nil)
-		fatal("can't open partition %s: %r", file);
-
-	ap = initArenaPart(part);
-	if(ap == nil)
-		fatal("can't initialize arena partition in %s: %R", file);
-
-	if(verbose > 1){
-		printArenaPart(2, ap);
-		fprint(2, "\n");
-	}
-
-	initDCache(8 * MaxDiskBlock);
-
-	found = 0;
-	for(i = 0; i < ap->narenas; i++)
-		found += findscore(ap->arenas[i], score);
-
-	print("found %d occurences of %V\n", found, score);
-
-	if(verbose > 1)
-		printStats();
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 109
sys/src/cmd/venti/fmtarenas.c

@@ -1,109 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-void
-usage(void)
-{
-	fprint(2, "usage: fmtarenas [-Z] [-b blocksize] [-a arenasize] name file\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	ArenaPart *ap;
-	Part *part;
-	Arena *arena;
-	u64int addr, limit, asize, apsize;
-	char *file, *name, aname[ANameSize];
-	int i, n, blockSize, tabSize, zero;
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-	vtAttach();
-	statsInit();
-
-	blockSize = 8 * 1024;
-	asize = 512 * 1024 *1024;
-	tabSize = 64 * 1024;		/* BUG: should be determine from number of arenas */
-	zero = 1;
-	ARGBEGIN{
-	case 'd':
-		chattyzero++;
-		break;
-	case 'a':
-		asize = unittoull(ARGF());
-		if(asize == TWID64)
-			usage();
-		break;
-	case 'b':
-		blockSize = unittoull(ARGF());
-		if(blockSize == ~0)
-			usage();
-		if(blockSize > MaxDiskBlock){
-			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
-			exits("usage");
-		}
-		break;
-	case 'Z':
-		zero = 0;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(argc != 2)
-		usage();
-
-	name = argv[0];
-	file = argv[1];
-
-	if(!nameOk(name))
-		fatal("illegal name template %s", name);
-
-	part = initPart(file, 1);
-	if(part == nil)
-		fatal("can't open partition %s: %r", file);
-
-	if(zero)
-		zeroPart(part, blockSize);
-
-	ap = newArenaPart(part, blockSize, tabSize);
-	if(ap == nil)
-		fatal("can't initialize arena: %R");
-
-	apsize = ap->size - ap->arenaBase;
-	n = apsize / asize;
-
-	fprint(2, "configuring %s with arenas=%d for a total storage of bytes=%lld and directory bytes=%d\n",
-		file, n, apsize, ap->tabSize);
-
-	ap->narenas = n;
-	ap->map = MKNZ(AMap, n);
-	ap->arenas = MKNZ(Arena*, n);
-
-	addr = ap->arenaBase;
-	for(i = 0; i < n; i++){
-		limit = addr + asize;
-		snprint(aname, ANameSize, "%s%d", name, i);
-		fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit);
-		arena = newArena(part, aname, addr, limit - addr, blockSize);
-		if(!arena)
-			fprint(2, "can't make new arena %s: %r", aname);
-		freeArena(arena);
-
-		ap->map[i].start = addr;
-		ap->map[i].stop = limit;
-		nameCp(ap->map[i].name, aname);
-
-		addr = limit;
-	}
-
-	if(!wbArenaPart(ap))
-		fprint(2, "can't write back arena partition header for %s: %R\n", file);
-
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 75
sys/src/cmd/venti/fmtisect.c

@@ -1,75 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-void
-usage(void)
-{
-	fprint(2, "usage: fmtisect [-Z] [-b blocksize] name file\n");
-	exits(0);
-}
-
-int
-main(int argc, char *argv[])
-{
-	ISect *is;
-	Part *part;
-	char *file, *name;
-	int blockSize, setSize, zero;
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-	vtAttach();
-	statsInit();
-
-	blockSize = 8 * 1024;
-	setSize = 64 * 1024;
-	zero = 1;
-	ARGBEGIN{
-	case 'd':
-		chattyzero++;
-		break;
-	case 'b':
-		blockSize = unittoull(ARGF());
-		if(blockSize == ~0)
-			usage();
-		if(blockSize > MaxDiskBlock){
-			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
-			exits("usage");
-		}
-		break;
-	case 'Z':
-		zero = 0;
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	if(argc != 2)
-		usage();
-
-	name = argv[0];
-	file = argv[1];
-
-	if(!nameOk(name))
-		fatal("illegal name %s", name);
-
-	part = initPart(file, 0);
-	if(part == nil)
-		fatal("can't open partition %s: %r", file);
-
-	if(zero)
-		zeroPart(part, blockSize);
-
-	fprint(2, "configuring index section %s with space for index config bytes=%d\n", name, setSize);
-	is = newISect(part, name, blockSize, setSize);
-	if(is == nil)
-		fatal("can't initialize new index: %R");
-
-	if(!wbISect(is))
-		fprint(2, "can't write back index section header for %s: %R\n", file);
-
-	exits(0);
-	return 0;	/* shut up stupid compiler */
-}

+ 0 - 438
sys/src/cmd/venti/httpd.c

@@ -1,438 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-#include "httpd.h"
-#include "xml.h"
-
-typedef struct HttpObj	HttpObj;
-
-enum
-{
-	ObjNameSize	= 64,
-	MaxObjs		= 16
-};
-
-struct HttpObj
-{
-	char	name[ObjNameSize];
-	int	(*f)(HConnect*);
-};
-
-static HttpObj	objs[MaxObjs];
-
-static	void		dolisten(void*);
-static	int		estats(HConnect *c);
-static	int		dindex(HConnect *c);
-static	int		xindex(HConnect *c);
-static	int		sindex(HConnect *c);
-static	int		notfound(HConnect *c);
-static	int		httpdObj(char *name, int (*f)(HConnect*));
-
-int
-httpdInit(char *address)
-{
-	fmtinstall('D', hdatefmt);
-	fmtinstall('H', httpfmt);
-	fmtinstall('U', hurlfmt);
-
-	if(address == nil)
-		address = "tcp!*!http";
-
-	httpdObj("/stats", estats);
-	httpdObj("/index", dindex);
-	httpdObj("/storage", sindex);
-	httpdObj("/xindex", xindex);
-
-	if(vtThread(dolisten, address) < 0)
-		return 0;
-	return 1;
-}
-
-static int
-httpdObj(char *name, int (*f)(HConnect*))
-{
-	int i;
-
-	if(name == nil || strlen(name) >= ObjNameSize)
-		return 0;
-	for(i = 0; i < MaxObjs; i++){
-		if(objs[i].name[0] == '\0'){
-			strcpy(objs[i].name, name);
-			objs[i].f = f;
-			return 1;
-		}
-		if(strcmp(objs[i].name, name) == 0)
-			return 0;
-	}
-	return 0;
-}
-
-static HConnect*
-mkconnect(void)
-{
-	HConnect *c;
-
-	c = mallocz(sizeof(HConnect), 1);
-	if(c == nil)
-		sysfatal("out of memory");
-	c->replog = nil;
-	c->hpos = c->header;
-	c->hstop = c->header;
-	return c;
-}
-
-static void
-dolisten(void *vaddress)
-{
-	HConnect *c;
-	char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
-	int ctl, nctl, data, ok, t, i;
-
-	address = vaddress;
-	ctl = announce(address, dir);
-	if(ctl < 0){
-		fprint(2, "venti: httpd can't announce on %s: %r\n", address);
-		return;
-	}
-
-	for(;;){
-
-		/*
-		 *  wait for a call (or an error)
-		 */
-		nctl = listen(dir, ndir);
-		if(nctl < 0){
-			fprint(2, "venti: httpd can't listen on %s: %r\n", address);
-			return;
-		}
-
-		/*
-		 *  start a process for the service
-		 */
-		switch(rfork(RFFDG|RFPROC|RFNOWAIT|RFNAMEG|RFMEM)){
-		case -1:
-			close(nctl);
-			continue;
-		case 0:
-			/*
-			 *  see if we know the service requested
-			 */
-			data = accept(ctl, ndir);
-			if(data < 0){
-				fprint(2, "venti: httpd can't open %s/data: %r", ndir);
-				exits(nil);
-			}
-			close(ctl);
-			close(nctl);
-
-			c = mkconnect();
-
-			hinit(&c->hin, data, Hread);
-			hinit(&c->hout, data, Hwrite);
-
-			for(t = 15*60*1000; ; t = 15*1000){
-				if(hparsereq(c, t) <= 0)
-					break;
-
-				ok = -1;
-				for(i = 0; i < MaxObjs && objs[i].name[0]; i++){
-					if(strcmp(c->req.uri, objs[i].name) == 0){
-						ok = (*objs[i].f)(c);
-						break;
-					}
-				}
-				if(i == MaxObjs)
-					ok = notfound(c);
-				if(c->head.closeit)
-					ok = -1;
-				hreqcleanup(c);
-
-				if(ok < 0)
-					break;
-			}
-			hreqcleanup(c);
-			free(c);
-			exits(nil);
-		default:
-			close(nctl);
-			break;
-		}
-	}
-}
-
-static int
-percent(long v, long total)
-{
-	if(total == 0)
-		total = 1;
-	if(v < 1000*1000)
-		return (v * 100) / total;
-	total /= 100;
-	if(total == 0)
-		total = 1;
-	return v / total;
-}
-
-static int
-preq(HConnect *c)
-{
-	if(hparseheaders(c, 15*60*1000) < 0)
-		return -1;
-	if(strcmp(c->req.meth, "GET") != 0
-	&& strcmp(c->req.meth, "HEAD") != 0)
-		return hunallowed(c, "GET, HEAD");
-	if(c->head.expectother || c->head.expectcont)
-		return hfail(c, HExpectFail, nil);
-	return 1;
-}
-
-static int
-preqtext(HConnect *c)
-{
-	Hio *hout;
-	int r;
-
-	r = preq(c);
-	if(r <= 0)
-		return r;
-
-	hout = &c->hout;
-	if(c->req.vermaj){
-		hokheaders(c);
-		hprint(hout, "Content-type: text/plain\r\n");
-		if(http11(c))
-			hprint(hout, "Transfer-Encoding: chunked\r\n");
-		hprint(hout, "\r\n");
-	}
-
-	if(http11(c))
-		hxferenc(hout, 1);
-	else
-		c->head.closeit = 1;
-	return 1;
-}
-
-static int
-notfound(HConnect *c)
-{
-	int r;
-
-	r = preq(c);
-	if(r <= 0)
-		return r;
-	return hfail(c, HNotFound, c->req.uri);
-}
-
-static int
-estats(HConnect *c)
-{
-	Hio *hout;
-	int r;
-
-	r = preqtext(c);
-	if(r <= 0)
-		return r;
-
-	hout = &c->hout;
-	hprint(hout, "lump writes=%,ld\n", stats.lumpWrites);
-	hprint(hout, "lump reads=%,ld\n", stats.lumpReads);
-	hprint(hout, "lump cache read hits=%,ld\n", stats.lumpHit);
-	hprint(hout, "lump cache read misses=%,ld\n", stats.lumpMiss);
-
-	hprint(hout, "clump disk writes=%,ld\n", stats.clumpWrites);
-	hprint(hout, "clump disk bytes written=%,lld\n", stats.clumpBWrites);
-	hprint(hout, "clump disk bytes compressed=%,lld\n", stats.clumpBComp);
-	hprint(hout, "clump disk reads=%,ld\n", stats.clumpReads);
-	hprint(hout, "clump disk bytes read=%,lld\n", stats.clumpBReads);
-	hprint(hout, "clump disk bytes uncompressed=%,lld\n", stats.clumpBUncomp);
-
-	hprint(hout, "clump directory disk writes=%,ld\n", stats.ciWrites);
-	hprint(hout, "clump directory disk reads=%,ld\n", stats.ciReads);
-
-	hprint(hout, "index disk writes=%,ld\n", stats.indexWrites);
-	hprint(hout, "index disk reads=%,ld\n", stats.indexReads);
-	hprint(hout, "index disk reads for modify=%,ld\n", stats.indexWReads);
-	hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexAReads);
-
-	hprint(hout, "index cache lookups=%,ld\n", stats.icLookups);
-	hprint(hout, "index cache hits=%,ld %d%%\n", stats.icHits,
-		percent(stats.icHits, stats.icLookups));
-	hprint(hout, "index cache fills=%,ld %d%%\n", stats.icFills,
-		percent(stats.icFills, stats.icLookups));
-	hprint(hout, "index cache inserts=%,ld\n", stats.icInserts);
-
-	hprint(hout, "disk cache hits=%,ld\n", stats.pcHit);
-	hprint(hout, "disk cache misses=%,ld\n", stats.pcMiss);
-	hprint(hout, "disk cache reads=%,ld\n", stats.pcReads);
-	hprint(hout, "disk cache bytes read=%,lld\n", stats.pcBReads);
-
-	hprint(hout, "disk writes=%,ld\n", stats.diskWrites);
-	hprint(hout, "disk bytes written=%,lld\n", stats.diskBWrites);
-	hprint(hout, "disk reads=%,ld\n", stats.diskReads);
-	hprint(hout, "disk bytes read=%,lld\n", stats.diskBReads);
-
-	hflush(hout);
-	return 1;
-}
-
-static int
-sindex(HConnect *c)
-{
-	Hio *hout;
-	Index *ix;
-	Arena *arena;
-	vlong clumps, cclumps, uncsize, used, size;
-	int i, r, active;
-
-	r = preqtext(c);
-	if(r <= 0)
-		return r;
-	hout = &c->hout;
-
-	ix = mainIndex;
-
-	hprint(hout, "index=%s\n", ix->name);
-
-	active = 0;
-	clumps = 0;
-	cclumps = 0;
-	uncsize = 0;
-	used = 0;
-	size = 0;
-	for(i = 0; i < ix->narenas; i++){
-		arena = ix->arenas[i];
-		if(arena != nil && arena->clumps != 0){
-			active++;
-			clumps += arena->clumps;
-			cclumps += arena->cclumps;
-			uncsize += arena->uncsize;
-			used += arena->used;
-		}
-		size += arena->size;
-	}
-	hprint(hout, "total arenas=%d active=%d\n", ix->narenas, active);
-	hprint(hout, "total space=%lld used=%lld\n", size, used + clumps * ClumpInfoSize);
-	hprint(hout, "clumps=%lld compressed clumps=%lld data=%lld compressed data=%lld\n",
-		clumps, cclumps, uncsize, used - clumps * ClumpSize);
-	hflush(hout);
-	return 1;
-}
-
-static void
-dArena(Hio *hout, Arena *arena)
-{
-	hprint(hout, "arena='%s' on %s at [%lld,%lld)\n\tversion=%d created=%d modified=%d",
-		arena->name, arena->part->name, arena->base, arena->base + arena->size + 2 * arena->blockSize,
-		arena->version, arena->ctime, arena->wtime);
-	if(arena->sealed)
-		hprint(hout, " sealed\n");
-	else
-		hprint(hout, "\n");
-	if(!scoreEq(zeroScore, arena->score))
-		hprint(hout, "\tscore=%V\n", arena->score);
-
-	hprint(hout, "\tclumps=%d compressed clumps=%d data=%lld compressed data=%lld disk storage=%lld\n",
-		arena->clumps, arena->cclumps, arena->uncsize,
-		arena->used - arena->clumps * ClumpSize,
-		arena->used + arena->clumps * ClumpInfoSize);
-}
-
-static int
-dindex(HConnect *c)
-{
-	Hio *hout;
-	Index *ix;
-	int i, r;
-
-	r = preqtext(c);
-	if(r <= 0)
-		return r;
-	hout = &c->hout;
-
-
-	ix = mainIndex;
-	hprint(hout, "index=%s version=%d blockSize=%d tabSize=%d\n",
-		ix->name, ix->version, ix->blockSize, ix->tabSize);
-	hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
-	for(i = 0; i < ix->nsects; i++)
-		hprint(hout, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
-	for(i = 0; i < ix->narenas; i++){
-		if(ix->arenas[i] != nil && ix->arenas[i]->clumps != 0){
-			hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
-			dArena(hout, ix->arenas[i]);
-		}
-	}
-	hflush(hout);
-	return 1;
-}
-
-static int
-xindex(HConnect *c)
-{
-	Hio *hout;
-	int r;
-
-	r = preq(c);
-	if(r <= 0)
-		return r;
-
-	hout = &c->hout;
-	if(c->req.vermaj){
-		hokheaders(c);
-		hprint(hout, "Content-type: text/xml\r\n");
-		if(http11(c))
-			hprint(hout, "Transfer-Encoding: chunked\r\n");
-		hprint(hout, "\r\n");
-	}
-
-	if(http11(c))
-		hxferenc(hout, 1);
-	else
-		c->head.closeit = 1;
-	xmlIndex(hout, mainIndex, "index", 0);
-	hflush(hout);
-	return 1;
-}
-
-void
-xmlIndent(Hio *hout, int indent)
-{
-	int i;
-
-	for(i = 0; i < indent; i++)
-		hputc(hout, '\t');
-}
-
-void
-xmlAName(Hio *hout, char *v, char *tag)
-{
-	hprint(hout, " %s=\"%s\"", tag, v);
-}
-
-void
-xmlScore(Hio *hout, u8int *v, char *tag)
-{
-	if(scoreEq(zeroScore, v))
-		return;
-	hprint(hout, " %s=\"%V\"", tag, v);
-}
-
-void
-xmlSealed(Hio *hout, int v, char *tag)
-{
-	if(!v)
-		return;
-	hprint(hout, " %s=\"yes\"", tag);
-}
-
-void
-xmlU32int(Hio *hout, u32int v, char *tag)
-{
-	hprint(hout, " %s=\"%ud\"", tag, v);
-}
-
-void
-xmlU64int(Hio *hout, u64int v, char *tag)
-{
-	hprint(hout, " %s=\"%llud\"", tag, v);
-}

+ 0 - 201
sys/src/cmd/venti/icache.c

@@ -1,201 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct ICache		ICache;
-
-struct ICache
-{
-	VtLock	*lock;			/* locks hash table & all associated data */
-	IEntry	**heads;		/* heads of all the hash chains */
-	int	bits;			/* bits to use for indexing heads */
-	u32int	size;			/* number of heads; == 1 << bits, should be < entries */
-	IEntry	*base;			/* all allocated hash table entries */
-	u32int	entries;		/* elements in base */
-	u32int	unused;			/* index of first unused element in base */
-	u32int	stolen;			/* last head from which an element was stolen */
-};
-
-static ICache icache;
-
-static IEntry	*icacheAlloc(IAddr *ia, u8int *score);
-
-/*
- * bits is the number of bits in the icache hash table
- * depth is the average depth
- * memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*)
- */
-void
-initICache(int bits, int depth)
-{
-	icache.lock = vtLockAlloc();
-	icache.bits = bits;
-	icache.size = 1 << bits;
-	icache.entries = depth * icache.size;
-	icache.base = MKNZ(IEntry, icache.entries);
-	icache.heads = MKNZ(IEntry*, icache.size);
-}
-
-u32int
-hashBits(u8int *sc, int bits)
-{
-	u32int v;
-
-	v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
-	if(bits < 32)
-		 v >>= (32 - bits);
-	return v;
-}
-
-/*
-ZZZ need to think about evicting the correct IEntry,
-and writing back the wtime.
- * look up data score in the index cache
- * if this fails, pull it in from the disk index table, if it exists.
- *
- * must be called with the lump for this score locked
- */
-int
-lookupScore(u8int *score, int type, IAddr *ia, int *rac)
-{
-	IEntry d, *ie, *last;
-	u32int h;
-
-	vtLock(stats.lock);
-	stats.icLookups++;
-	vtUnlock(stats.lock);
-
-	vtLock(icache.lock);
-	h = hashBits(score, icache.bits);
-	last = nil;
-	for(ie = icache.heads[h]; ie != nil; ie = ie->next){
-		if(ie->ia.type == type && scoreEq(ie->score, score)){
-			if(last != nil)
-				last->next = ie->next;
-			else
-				icache.heads[h] = ie->next;
-			vtLock(stats.lock);
-			stats.icHits++;
-			vtUnlock(stats.lock);
-			ie->rac = 1;
-			goto found;
-		}
-		last = ie;
-	}
-
-	vtUnlock(icache.lock);
-
-	if(!loadIEntry(mainIndex, score, type, &d))
-		return 0;
-
-	/*
-	 * no one else can load an entry for this score,
-	 * since we have the overall score lock.
-	 */
-	vtLock(stats.lock);
-	stats.icFills++;
-	vtUnlock(stats.lock);
-
-	vtLock(icache.lock);
-
-	ie = icacheAlloc(&d.ia, score);
-
-found:
-	ie->next = icache.heads[h];
-	icache.heads[h] = ie;
-
-	*ia = ie->ia;
-	*rac = ie->rac;
-
-	vtUnlock(icache.lock);
-
-	return 1;
-}
-
-/*
- * insert a new element in the hash table.
- */
-int
-insertScore(u8int *score, IAddr *ia, int write)
-{
-	IEntry *ie, se;
-	u32int h;
-
-	vtLock(stats.lock);
-	stats.icInserts++;
-	vtUnlock(stats.lock);
-
-	vtLock(icache.lock);
-	h = hashBits(score, icache.bits);
-
-	ie = icacheAlloc(ia, score);
-
-	ie->next = icache.heads[h];
-	icache.heads[h] = ie;
-
-	se = *ie;
-
-	vtUnlock(icache.lock);
-
-	if(!write)
-		return 1;
-
-	return storeIEntry(mainIndex, &se);
-}
-
-/*
- * allocate a index cache entry which hasn't been used in a while.
- * must be called with icache.lock locked
- * if the score is already in the table, update the entry.
- */
-static IEntry *
-icacheAlloc(IAddr *ia, u8int *score)
-{
-	IEntry *ie, *last, *next;
-	u32int h;
-
-	h = hashBits(score, icache.bits);
-	last = nil;
-	for(ie = icache.heads[h]; ie != nil; ie = ie->next){
-		if(ie->ia.type == ia->type && scoreEq(ie->score, score)){
-			if(last != nil)
-				last->next = ie->next;
-			else
-				icache.heads[h] = ie->next;
-			ie->rac = 1;
-			return ie;
-		}
-		last = ie;
-	}
-
-	h = icache.unused;
-	if(h < icache.entries){
-		ie = &icache.base[h++];
-		icache.unused = h;
-		goto Found;
-	}
-
-	h = icache.stolen;
-	for(;;){
-		h++;
-		if(h >= icache.size)
-			h = 0;
-		ie = icache.heads[h];
-		if(ie != nil){
-			last = nil;
-			for(; next = ie->next; ie = next)
-				last = ie;
-			if(last != nil)
-				last->next = nil;
-			else
-				icache.heads[h] = nil;
-			icache.stolen = h;
-			goto Found;
-		}
-	}
-Found:
-	ie->ia = *ia;
-	scoreCp(ie->score, score);
-	ie->rac = 0;
-	return ie;
-}

+ 0 - 781
sys/src/cmd/venti/index.c

@@ -1,781 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-static int	buckLook(u8int *score, int type, u8int *data, int n);
-static int	writeBucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
-static int	okIBucket(IBucket *ib, ISect *is);
-static ISect	*initISect1(ISect *is);
-
-static VtLock	*indexLock;	//ZZZ
-
-static char IndexMagic[] = "venti index configuration";
-
-Index*
-initIndex(char *name, ISect **sects, int n)
-{
-	IFile f;
-	Index *ix;
-	ISect *is;
-	u32int last, blockSize, tabSize;
-	int i;
-
-	if(n <= 0){
-		setErr(EOk, "no index sections to initialize index");
-		return nil;
-	}
-	ix = MKZ(Index);
-	if(ix == nil){
-		setErr(EOk, "can't initialize index: out of memory");
-		freeIndex(ix);
-		return nil;
-	}
-
-	tabSize = sects[0]->tabSize;
-	if(!partIFile(&f, sects[0]->part, sects[0]->tabBase, tabSize))
-		return 0;
-	if(!parseIndex(&f, ix)){
-		freeIFile(&f);
-		freeIndex(ix);
-		return nil;
-	}
-	freeIFile(&f);
-	if(!nameEq(ix->name, name)){
-		setErr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
-		return nil;
-	}
-	ix->sects = sects;
-	if(ix->nsects != n){
-		setErr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
-		freeIndex(ix);
-		return nil;
-	}
-	last = 0;
-	blockSize = ix->blockSize;
-	for(i = 0; i < ix->nsects; i++){
-		is = sects[i];
-		if(!nameEq(ix->name, is->index)
-		|| is->blockSize != blockSize
-		|| is->tabSize != tabSize
-		|| !nameEq(is->name, ix->smap[i].name)
-		|| is->start != ix->smap[i].start
-		|| is->stop != ix->smap[i].stop
-		|| last != is->start
-		|| is->start > is->stop){
-			setErr(ECorrupt, "inconsistent index sections in %s", ix->name);
-			freeIndex(ix);
-			return nil;
-		}
-		last = is->stop;
-	}
-	ix->tabSize = tabSize;
-	ix->buckets = last;
-
-	ix->div = (((u64int)1 << 32) + last - 1) / last;
-	last = (((u64int)1 << 32) - 1) / ix->div + 1;
-	if(last != ix->buckets){
-		setErr(ECorrupt, "inconsistent math for buckets in %s", ix->name);
-		freeIndex(ix);
-		return nil;
-	}
-
-	ix->arenas = MKNZ(Arena*, ix->narenas);
-	if(!mapArenas(ix->amap, ix->arenas, ix->narenas, ix->name)){
-		freeIndex(ix);
-		return nil;
-	}
-	return ix;
-}
-
-int
-wbIndex(Index *ix)
-{
-	Fmt f;
-	ZBlock *b;
-	int i;
-
-	if(ix->nsects == 0){
-		setErr(EOk, "no sections in index %s", ix->name);
-		return 0;
-	}
-	b = allocZBlock(ix->tabSize, 1);
-	if(b == nil){
-		setErr(EOk, "can't write index configuration: out of memory");
-		return 0;
-	}
-	fmtZBInit(&f, b);
-	if(!outputIndex(&f, ix)){
-		setErr(EOk, "can't make index configuration: table storage too small %d", ix->tabSize);
-		freeZBlock(b);
-		return 0;
-	}
-	for(i = 0; i < ix->nsects; i++){
-		if(!writePart(ix->sects[i]->part, ix->sects[i]->tabBase, b->data, ix->tabSize)){
-			setErr(EOk, "can't write index: %R");
-			freeZBlock(b);
-			return 0;
-		}
-	}
-	freeZBlock(b);
-
-	for(i = 0; i < ix->nsects; i++)
-		if(!wbISect(ix->sects[i]))
-			return 0;
-
-	return 1;
-}
-
-/*
- * index: IndexMagic '\n' version '\n' name '\n' blockSize '\n' sections arenas
- * version, blockSize: u32int
- * name: max. ANameSize string
- * sections, arenas: AMap
- */
-int
-outputIndex(Fmt *f, Index *ix)
-{
-	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blockSize) < 0)
-		return 0;
-	return outputAMap(f, ix->smap, ix->nsects) && outputAMap(f, ix->amap, ix->narenas);
-}
-
-int
-parseIndex(IFile *f, Index *ix)
-{
-	AMapN amn;
-	u32int v;
-	char *s;
-
-	/*
-	 * magic
-	 */
-	s = ifileLine(f);
-	if(s == nil || strcmp(s, IndexMagic) != 0){
-		setErr(ECorrupt, "bad index magic for %s", f->name);
-		return 0;
-	}
-
-	/*
-	 * version
-	 */
-	if(!ifileU32Int(f, &v)){
-		setErr(ECorrupt, "syntax error: bad version number in %s", f->name);
-		return 0;
-	}
-	ix->version = v;
-	if(ix->version != IndexVersion){
-		setErr(ECorrupt, "bad version number in %s", f->name);
-		return 0;
-	}
-
-	/*
-	 * name
-	 */
-	if(!ifileName(f, ix->name)){
-		setErr(ECorrupt, "syntax error: bad index name in %s", f->name);
-		return 0;
-	}
-
-	/*
-	 * block size
-	 */
-	if(!ifileU32Int(f, &v)){
-		setErr(ECorrupt, "syntax error: bad version number in %s", f->name);
-		return 0;
-	}
-	ix->blockSize = v;
-
-	if(!parseAMap(f, &amn))
-		return 0;
-	ix->nsects = amn.n;
-	ix->smap = amn.map;
-
-	if(!parseAMap(f, &amn))
-		return 0;
-	ix->narenas = amn.n;
-	ix->amap = amn.map;
-
-	return 1;
-}
-
-/*
- * initialize an entirely new index
- */
-Index *
-newIndex(char *name, ISect **sects, int n)
-{
-	Index *ix;
-	AMap *smap;
-	u64int nb;
-	u32int div, ub, xb, start, stop, blockSize, tabSize;
-	int i, j;
-
-	if(n < 1){
-		setErr(EOk, "creating index with no index sections");
-		return nil;
-	}
-
-	/*
-	 * compute the total buckets available in the index,
-	 * and the total buckets which are used.
-	 */
-	nb = 0;
-	blockSize = sects[0]->blockSize;
-	tabSize = sects[0]->tabSize;
-	for(i = 0; i < n; i++){
-		if(sects[i]->start != 0 || sects[i]->stop != 0
-		|| sects[i]->index[0] != '\0'){
-			setErr(EOk, "creating new index using non-empty section %s", sects[i]->name);
-			return nil;
-		}
-		if(blockSize != sects[i]->blockSize){
-			setErr(EOk, "mismatched block sizes in index sections");
-			return nil;
-		}
-		if(tabSize != sects[i]->tabSize){
-			setErr(EOk, "mismatched config table sizes in index sections");
-			return nil;
-		}
-		nb += sects[i]->blocks;
-	}
-
-	/*
-	 * check for duplicate names
-	 */
-	for(i = 0; i < n; i++){
-		for(j = i + 1; j < n; j++){
-			if(nameEq(sects[i]->name, sects[j]->name)){
-				setErr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
-				return nil;
-			}
-		}
-	}
-
-	if(nb >= ((u64int)1 << 32)){
-		setErr(EBug, "index too large");
-		return nil;
-	}
-	div = (((u64int)1 << 32) + nb - 1) / nb;
-	ub = (((u64int)1 << 32) - 1) / div + 1;
-	if(div < 100){
-		setErr(EBug, "index divisor too coarse");
-		return nil;
-	}
-	if(ub > nb){
-		setErr(EBug, "index initialization math wrong");
-		return nil;
-	}
-
-	/*
-	 * initialize each of the index sections
-	 * and the section map table
-	 */
-	smap = MKNZ(AMap, n);
-	if(smap == nil){
-		setErr(EOk, "can't create new index: out of memory");
-		return nil;
-	}
-	xb = nb - ub;
-	start = 0;
-	for(i = 0; i < n; i++){
-		stop = start + sects[i]->blocks - xb / n;
-		if(i == n - 1)
-			stop = ub;
-		sects[i]->start = start;
-		sects[i]->stop = stop;
-		nameCp(sects[i]->index, name);
-
-		smap[i].start = start;
-		smap[i].stop = stop;
-		nameCp(smap[i].name, sects[i]->name);
-		start = stop;
-	}
-
-	/*
-	 * initialize the index itself
-	 */
-	ix = MKZ(Index);
-	if(ix == nil){
-		setErr(EOk, "can't create new index: out of memory");
-		free(smap);
-		return nil;
-	}
-	ix->version = IndexVersion;
-	nameCp(ix->name, name);
-	ix->sects = sects;
-	ix->smap = smap;
-	ix->nsects = n;
-	ix->blockSize = blockSize;
-	ix->div = div;
-	ix->buckets = ub;
-	ix->tabSize = tabSize;
-	return ix;
-}
-
-ISect*
-initISect(Part *part)
-{
-	ISect *is;
-	ZBlock *b;
-	int ok;
-
-	b = allocZBlock(HeadSize, 0);
-	if(b == nil || !readPart(part, PartBlank, b->data, HeadSize)){
-		setErr(EAdmin, "can't read index section header: %R");
-		return nil;
-	}
-
-	is = MKZ(ISect);
-	if(is == nil){
-		freeZBlock(b);
-		return nil;
-	}
-	is->part = part;
-	ok = unpackISect(is, b->data);
-	freeZBlock(b);
-	if(!ok){
-		setErr(ECorrupt, "corrupted index section header: %R");
-		freeISect(is);
-		return nil;
-	}
-
-	if(is->version != ISectVersion){
-		setErr(EAdmin, "unknown index section version %d", is->version);
-		freeISect(is);
-		return nil;
-	}
-
-	return initISect1(is);
-}
-
-ISect*
-newISect(Part *part, char *name, u32int blockSize, u32int tabSize)
-{
-	ISect *is;
-	u32int tabBase;
-
-	is = MKZ(ISect);
-	if(is == nil)
-		return nil;
-
-	nameCp(is->name, name);
-	is->version = ISectVersion;
-	is->part = part;
-	is->blockSize = blockSize;
-	is->start = 0;
-	is->stop = 0;
-	tabBase = (PartBlank + HeadSize + blockSize - 1) & ~(blockSize - 1);
-	is->blockBase = (tabBase + tabSize + blockSize - 1) & ~(blockSize - 1);
-	is->blocks = is->part->size / blockSize - is->blockBase / blockSize;
-
-	is = initISect1(is);
-	if(is == nil)
-		return nil;
-
-	return is;
-}
-
-/*
- * initialize the computed paramaters for an index
- */
-static ISect*
-initISect1(ISect *is)
-{
-	u64int v;
-
-	is->buckMax = (is->blockSize - IBucketSize) / IEntrySize;
-	is->blockLog = u64log2(is->blockSize);
-	if(is->blockSize != (1 << is->blockLog)){
-		setErr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blockSize);
-		freeISect(is);
-		return nil;
-	}
-	partBlockSize(is->part, is->blockSize);
-	is->tabBase = (PartBlank + HeadSize + is->blockSize - 1) & ~(is->blockSize - 1);
-	if(is->tabBase >= is->blockBase){
-		setErr(ECorrupt, "index section config table overlaps bucket storage");
-		freeISect(is);
-		return nil;
-	}
-	is->tabSize = is->blockBase - is->tabBase;
-	v = is->part->size & ~(u64int)(is->blockSize - 1);
-	if(is->blockBase + (u64int)is->blocks * is->blockSize != v){
-		setErr(ECorrupt, "invalid blocks in index section %s", is->name);
-//ZZZZZZZZZ
-//		freeISect(is);
-//		return nil;
-	}
-
-	if(is->stop - is->start > is->blocks){
-		setErr(ECorrupt, "index section overflows available space");
-		freeISect(is);
-		return nil;
-	}
-	if(is->start > is->stop){
-		setErr(ECorrupt, "invalid index section range");
-		freeISect(is);
-		return nil;
-	}
-
-if(indexLock == nil)indexLock = vtLockAlloc();
-	return is;
-}
-
-int
-wbISect(ISect *is)
-{
-	ZBlock *b;
-
-	b = allocZBlock(HeadSize, 1);
-	if(b == nil)
-//ZZZ set error?
-		return 0;
-
-	if(!packISect(is, b->data)){
-		setErr(ECorrupt, "can't make index section header: %R");
-		freeZBlock(b);
-		return 0;
-	}
-	if(!writePart(is->part, PartBlank, b->data, HeadSize)){
-		setErr(EAdmin, "can't write index section header: %R");
-		freeZBlock(b);
-		return 0;
-	}
-	freeZBlock(b);
-
-	return 1;
-}
-
-void
-freeISect(ISect *is)
-{
-	if(is == nil)
-		return;
-	free(is);
-}
-
-void
-freeIndex(Index *ix)
-{
-	int i;
-
-	if(ix == nil)
-		return;
-	free(ix->amap);
-	free(ix->arenas);
-	for(i = 0; i < ix->nsects; i++)
-		freeISect(ix->sects[i]);
-	free(ix->sects);
-	free(ix->smap);
-	free(ix);
-}
-
-/*
- * write a clump to an available arena in the index
- * and return the address of the clump within the index.
-ZZZ question: should this distinguish between an arena
-filling up and real errors writing the clump?
- */
-u64int
-writeIClump(Index *ix, Clump *c, u8int *clbuf)
-{
-	u64int a;
-	int i;
-
-	for(i = ix->mapAlloc; i < ix->narenas; i++){
-		a = writeAClump(ix->arenas[i], c, clbuf);
-		if(a != TWID64)
-			return a + ix->amap[i].start;
-	}
-
-	setErr(EAdmin, "no space left in arenas");
-	return 0;
-}
-
-/*
- * convert an arena index to an relative address address
- */
-Arena*
-amapItoA(Index *ix, u64int a, u64int *aa)
-{
-	int r, l, m;
-
-	l = 1;
-	r = ix->narenas - 1;
-	while(l <= r){
-		m = (r + l) / 2;
-		if(ix->amap[m].start <= a)
-			l = m + 1;
-		else
-			r = m - 1;
-	}
-	l--;
-
-	if(a > ix->amap[l].stop){
-		setErr(ECrash, "unmapped address passed to amapItoA");
-		return nil;
-	}
-
-	if(ix->arenas[l] == nil){
-		setErr(ECrash, "unmapped arena selected in amapItoA");
-		return nil;
-	}
-	*aa = a - ix->amap[l].start;
-	return ix->arenas[l];
-}
-
-int
-iAddrEq(IAddr *ia1, IAddr *ia2)
-{
-	return ia1->type == ia2->type
-		&& ia1->size == ia2->size
-		&& ia1->blocks == ia2->blocks
-		&& ia1->addr == ia2->addr;
-}
-
-/*
- * lookup the the score int the partition
- *
- * nothing needs to be explicitly locked:
- * only static parts of ix are used, and
- * the bucket is locked by the DBlock lock.
- */
-int
-loadIEntry(Index *ix, u8int *score, int type, IEntry *ie)
-{
-	ISect *is;
-	DBlock *b;
-	IBucket ib;
-	u32int buck;
-	int h, ok;
-
-	buck = hashBits(score, 32) / ix->div;
-	ok = 0;
-	for(;;){
-		vtLock(stats.lock);
-		stats.indexReads++;
-		vtUnlock(stats.lock);
-		is = findISect(ix, buck);
-		if(is == nil){
-			setErr(EAdmin, "bad math in loadIEntry");
-			return 0;
-		}
-		buck -= is->start;
-		b = getDBlock(is->part, is->blockBase + ((u64int)buck << is->blockLog), 1);
-		if(b == nil)
-			break;
-
-		unpackIBucket(&ib, b->data);
-		if(!okIBucket(&ib, is))
-			break;
-
-		h = buckLook(score, type, ib.data, ib.n);
-		if(h & 1){
-			h ^= 1;
-			unpackIEntry(ie, &ib.data[h]);
-			ok = 1;
-			break;
-		}
-
-		break;
-	}
-	putDBlock(b);
-	return ok;
-}
-
-/*
- * insert or update an index entry into the appropriate bucket
- */
-int
-storeIEntry(Index *ix, IEntry *ie)
-{
-	ISect *is;
-	DBlock *b;
-	IBucket ib;
-	u32int buck;
-	int h, ok;
-
-	buck = hashBits(ie->score, 32) / ix->div;
-	ok = 0;
-	for(;;){
-		vtLock(stats.lock);
-		stats.indexWReads++;
-		vtUnlock(stats.lock);
-		is = findISect(ix, buck);
-		if(is == nil){
-			setErr(EAdmin, "bad math in storeIEntry");
-			return 0;
-		}
-		buck -= is->start;
-		b = getDBlock(is->part, is->blockBase + ((u64int)buck << is->blockLog), 1);
-		if(b == nil)
-			break;
-
-		unpackIBucket(&ib, b->data);
-		if(!okIBucket(&ib, is))
-			break;
-
-		h = buckLook(ie->score, ie->ia.type, ib.data, ib.n);
-		if(h & 1){
-			h ^= 1;
-			packIEntry(ie, &ib.data[h]);
-			ok = writeBucket(is, buck, &ib, b);
-			break;
-		}
-
-		if(ib.n < is->buckMax){
-			memmove(&ib.data[h + IEntrySize], &ib.data[h], ib.n * IEntrySize - h);
-			ib.n++;
-
-			packIEntry(ie, &ib.data[h]);
-			ok = writeBucket(is, buck, &ib, b);
-			break;
-		}
-
-		break;
-	}
-
-	putDBlock(b);
-	return ok;
-}
-
-static int
-writeBucket(ISect *is, u32int buck, IBucket *ib, DBlock *b)
-{
-	if(buck >= is->blocks)
-		setErr(EAdmin, "index write out of bounds: %d >= %d\n",
-				buck, is->blocks);
-	vtLock(stats.lock);
-	stats.indexWrites++;
-	vtUnlock(stats.lock);
-	packIBucket(ib, b->data);
-	return writePart(is->part, is->blockBase + ((u64int)buck << is->blockLog), b->data, is->blockSize);
-}
-
-/*
- * find the number of the index section holding score
- */
-int
-indexSect(Index *ix, u8int *score)
-{
-	u32int buck;
-	int r, l, m;
-
-	buck = hashBits(score, 32) / ix->div;
-	l = 1;
-	r = ix->nsects - 1;
-	while(l <= r){
-		m = (r + l) >> 1;
-		if(ix->sects[m]->start <= buck)
-			l = m + 1;
-		else
-			r = m - 1;
-	}
-	return l - 1;
-}
-
-/*
- * find the index section which holds buck
- */
-ISect*
-findISect(Index *ix, u32int buck)
-{
-	ISect *is;
-	int r, l, m;
-
-	l = 1;
-	r = ix->nsects - 1;
-	while(l <= r){
-		m = (r + l) >> 1;
-		if(ix->sects[m]->start <= buck)
-			l = m + 1;
-		else
-			r = m - 1;
-	}
-	is = ix->sects[l - 1];
-	if(is->start <= buck && is->stop > buck)
-		return is;
-	return nil;
-}
-
-static int
-okIBucket(IBucket *ib, ISect *is)
-{
-	if(ib->n <= is->buckMax && (ib->next == 0 || ib->next >= is->start && ib->next < is->stop))
-		return 1;
-
-	setErr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, next=%lud range=[%lud,%lud)",
-		ib->n, is->buckMax, ib->next, is->start, is->stop);
-	return 0;
-}
-
-/*
- * look for score within data;
- * return 1 | byte index of matching index,
- * or 0 | index of least element > score
- */
-static int
-buckLook(u8int *score, int type, u8int *data, int n)
-{
-	int i, r, l, m, h, c, cc;
-
-	l = 0;
-	r = n - 1;
-	while(l <= r){
-		m = (r + l) >> 1;
-		h = m * IEntrySize;
-		for(i = 0; i < VtScoreSize; i++){
-			c = score[i];
-			cc = data[h + i];
-			if(c != cc){
-				if(c > cc)
-					l = m + 1;
-				else
-					r = m - 1;
-				goto cont;
-			}
-		}
-		cc = data[h + IEntryTypeOff];
-		if(type != cc){
-			if(type > cc)
-				l = m + 1;
-			else
-				r = m - 1;
-			goto cont;
-		}
-		return h | 1;
-	cont:;
-	}
-
-	return l * IEntrySize;
-}
-
-/*
- * compare two IEntries; consistent with buckLook
- */
-int
-ientryCmp(void *vie1, void *vie2)
-{
-	u8int *ie1, *ie2;
-	int i, v1, v2;
-
-	ie1 = vie1;
-	ie2 = vie2;
-	for(i = 0; i < VtScoreSize; i++){
-		v1 = ie1[i];
-		v2 = ie2[i];
-		if(v1 != v2){
-			if(v1 < v2)
-				return -1;
-			return 1;
-		}
-	}
-	v1 = ie1[IEntryTypeOff];
-	v2 = ie2[IEntryTypeOff];
-	if(v1 != v2){
-		if(v1 < v2)
-			return -1;
-		return 1;
-	}
-	return 0;
-}

+ 0 - 205
sys/src/cmd/venti/lump.c

@@ -1,205 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-int			queueWrites = 0;
-
-static Packet		*readILump(Lump *u, IAddr *ia, u8int *score, int rac);
-
-Packet*
-readLump(u8int *score, int type, u32int size)
-{
-	Lump *u;
-	Packet *p;
-	IAddr ia;
-	u32int n;
-	int rac;
-
-	vtLock(stats.lock);
-	stats.lumpReads++;
-	vtUnlock(stats.lock);
-	u = lookupLump(score, type);
-	if(u->data != nil){
-		n = packetSize(u->data);
-		if(n > size){
-			setErr(EOk, "read too small: asked for %d need at least %d", size, n);
-			putLump(u);
-
-			return nil;
-		}
-		p = packetDup(u->data, 0, n);
-		putLump(u);
-
-		return p;
-	}
-
-	if(!lookupScore(score, type, &ia, &rac)){
-		//ZZZ place to check for someone trying to guess scores
-		setErr(EOk, "no block with that score exists");
-
-		putLump(u);
-		return nil;
-	}
-	if(ia.size > size){
-		setErr(EOk, "read too small 1: asked for %d need at least %d", size, ia.size);
-
-		putLump(u);
-		return nil;
-	}
-
-	p = readILump(u, &ia, score, rac);
-	putLump(u);
-
-	return p;
-}
-
-/*
- * save away a lump, and return it's score.
- * doesn't store duplicates, but checks that the data is really the same.
- */
-int
-writeLump(Packet *p, u8int *score, int type, u32int creator)
-{
-	Lump *u;
-	int ok;
-
-	vtLock(stats.lock);
-	stats.lumpWrites++;
-	vtUnlock(stats.lock);
-
-	packetSha1(p, score);
-
-	u = lookupLump(score, type);
-	if(u->data != nil){
-		ok = 1;
-		if(packetCmp(p, u->data) != 0){
-			setErr(EStrange, "score collision");
-			ok = 0;
-		}
-		packetFree(p);
-		putLump(u);
-		return ok;
-	}
-
-	if(queueWrites)
-		return queueWrite(u, p, creator);
-
-	ok = writeQLump(u, p, creator);
-
-	putLump(u);
-	return ok;
-}
-
-int
-writeQLump(Lump *u, Packet *p, int creator)
-{
-	ZBlock *flat;
-	Packet *old;
-	IAddr ia;
-	int ok;
-	int rac;
-
-	if(lookupScore(u->score, u->type, &ia, &rac)){
-		/*
-		 * if the read fails,
-		 * assume it was corrupted data and store the block again
-		 */
-		old = readILump(u, &ia, u->score, rac);
-		if(old != nil){
-			ok = 1;
-			if(packetCmp(p, old) != 0){
-				setErr(EStrange, "score collision");
-				ok = 0;
-			}
-			packetFree(p);
-			packetFree(old);
-
-			return ok;
-		}
-		logErr(EAdmin, "writelump: read %V failed, rewriting: %R\n", u->score);
-	}
-
-	flat = packet2ZBlock(p, packetSize(p));
-	ok = storeClump(mainIndex, flat, u->score, u->type, creator, &ia);
-	freeZBlock(flat);
-	if(ok)
-		ok = insertScore(u->score, &ia, 1);
-	if(ok)
-		insertLump(u, p);
-	else
-		packetFree(p);
-
-	return ok;
-}
-
-static void
-readAhead(u64int a, Arena *arena, u64int aa, int n)
-{
-	u8int buf[ClumpSize];
-	Clump cl;
-	IAddr ia;
-
-	while(n > 0) {
-		if (aa >= arena->used)
-			break;
-		if(readArena(arena, aa, buf, ClumpSize) < ClumpSize)
-			break;
-		if(!unpackClump(&cl, buf))
-			break;
-		ia.addr = a;
-		ia.type = cl.info.type;
-		ia.size = cl.info.uncsize;
-		ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
-		insertScore(cl.info.score, &ia, 0);
-		a += ClumpSize + cl.info.size;
-		aa += ClumpSize + cl.info.size;
-		n--;
-	}
-}
-
-static Packet*
-readILump(Lump *u, IAddr *ia, u8int *score, int rac)
-{
-	Arena *arena;
-	ZBlock *zb;
-	Packet *p, *pp;
-	Clump cl;
-	u64int a, aa;
-	u8int sc[VtScoreSize];
-
-	arena = amapItoA(mainIndex, ia->addr, &aa);
-	if(arena == nil)
-		return nil;
-
-	zb = loadClump(arena, aa, ia->blocks, &cl, sc, paranoid);
-	if(zb == nil)
-		return nil;
-
-	if(ia->size != cl.info.uncsize){
-		setErr(EInconsist, "index and clump size mismatch");
-		freeZBlock(zb);
-		return nil;
-	}
-	if(ia->type != cl.info.type){
-		setErr(EInconsist, "index and clump type mismatch");
-		freeZBlock(zb);
-		return nil;
-	}
-	if(!scoreEq(score, sc)){
-		setErr(ECrash, "score mismatch");
-		freeZBlock(zb);
-		return nil;
-	}
-
-	if(rac == 0) {
-		a = ia->addr + ClumpSize + cl.info.size;
-		aa += ClumpSize + cl.info.size;
-		readAhead(a, arena, aa, 20);
-	}
-
-	p = zblock2Packet(zb, cl.info.uncsize);
-	freeZBlock(zb);
-	pp = packetDup(p, 0, packetSize(p));
-	insertLump(u, pp);
-	return p;
-}

+ 0 - 382
sys/src/cmd/venti/lumpcache.c

@@ -1,382 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct LumpCache	LumpCache;
-
-enum
-{
-	HashLog		= 9,
-	HashSize	= 1<<HashLog,
-	HashMask	= HashSize - 1,
-};
-
-struct LumpCache
-{
-	VtLock		*lock;
-	VtRendez	*full;
-	Lump		*free;			/* list of available lumps */
-	u32int		allowed;		/* total allowable space for packets */
-	u32int		avail;			/* remaining space for packets */
-	u32int		now;			/* ticks for usage timestamps */
-	Lump		**heads;		/* hash table for finding address */
-	int		nheap;			/* number of available victims */
-	Lump		**heap;			/* heap for locating victims */
-	int		nblocks;		/* number of blocks allocated */
-	Lump		*blocks;		/* array of block descriptors */
-};
-
-static LumpCache	lumpCache;
-
-static void	delHeap(Lump *db);
-static int	downHeap(int i, Lump *b);
-static void	fixHeap(int i, Lump *b);
-static int	upHeap(int i, Lump *b);
-static Lump	*bumpLump(void);
-
-void
-initLumpCache(u32int size, u32int nblocks)
-{
-	Lump *last, *b;
-	int i;
-
-	lumpCache.lock = vtLockAlloc();
-	lumpCache.full = vtRendezAlloc(lumpCache.lock);
-	lumpCache.nblocks = nblocks;
-	lumpCache.allowed = size;
-	lumpCache.avail = size;
-	lumpCache.heads = MKNZ(Lump*, HashSize);
-	lumpCache.heap = MKNZ(Lump*, nblocks);
-	lumpCache.blocks = MKNZ(Lump, nblocks);
-
-	last = nil;
-	for(i = 0; i < nblocks; i++){
-		b = &lumpCache.blocks[i];
-		b->type = TWID8;
-		b->heap = TWID32;
-		b->lock = vtLockAlloc();
-		b->next = last;
-		last = b;
-	}
-	lumpCache.free = last;
-	lumpCache.nheap = 0;
-}
-
-Lump*
-lookupLump(u8int *score, int type)
-{
-	Lump *b;
-	u32int h;
-
-	h = hashBits(score, HashLog);
-
-	/*
-	 * look for the block in the cache
-	 */
-//checkLumpCache();
-	vtLock(lumpCache.lock);
-again:
-	for(b = lumpCache.heads[h]; b != nil; b = b->next){
-		if(scoreEq(score, b->score) && type == b->type){
-			vtLock(stats.lock);
-			stats.lumpHit++;
-			vtUnlock(stats.lock);
-			goto found;
-		}
-	}
-
-	/*
-	 * missed: locate the block with the oldest second to last use.
-	 * remove it from the heap, and fix up the heap.
-	 */
-	while(lumpCache.free == nil){
-		if(bumpLump() == nil){
-			logErr(EAdmin, "all lump cache blocks in use");
-			vtSleep(lumpCache.full);
-			goto again;
-		}
-	}
-	vtLock(stats.lock);
-	stats.lumpMiss++;
-	vtUnlock(stats.lock);
-
-	b = lumpCache.free;
-	lumpCache.free = b->next;
-
-	/*
-	 * the new block has no last use, so assume it happens sometime in the middle
-ZZZ this is not reasonable
-	 */
-	b->used = (b->used2 + lumpCache.now) / 2;
-
-	/*
-	 * rechain the block on the correct hash chain
-	 */
-	b->next = lumpCache.heads[h];
-	lumpCache.heads[h] = b;
-	if(b->next != nil)
-		b->next->prev = b;
-	b->prev = nil;
-
-	scoreCp(b->score, score);
-	b->type = type;
-	b->size = 0;
-	b->data = nil;
-
-found:
-	b->ref++;
-	b->used2 = b->used;
-	b->used = lumpCache.now++;
-	if(b->heap != TWID32)
-		fixHeap(b->heap, b);
-	vtUnlock(lumpCache.lock);
-
-//checkLumpCache();
-
-	vtLock(b->lock);
-
-	return b;
-}
-
-void
-insertLump(Lump *b, Packet *p)
-{
-	u32int size;
-
-	/*
-	 * look for the block in the cache
-	 */
-//checkLumpCache();
-	vtLock(lumpCache.lock);
-again:
-
-	/*
-	 * missed: locate the block with the oldest second to last use.
-	 * remove it from the heap, and fix up the heap.
-	 */
-	size = packetAllocatedSize(p);
-//ZZZ
-	while(lumpCache.avail < size){
-		if(bumpLump() == nil){
-			logErr(EAdmin, "all lump cache blocks in use");
-			vtSleep(lumpCache.full);
-			goto again;
-		}
-	}
-	b->data = p;
-	b->size = size;
-	lumpCache.avail -= size;
-
-	vtUnlock(lumpCache.lock);
-//checkLumpCache();
-}
-
-void
-putLump(Lump *b)
-{
-	if(b == nil)
-		return;
-
-	vtUnlock(b->lock);
-//checkLumpCache();
-	vtLock(lumpCache.lock);
-	if(--b->ref == 0){
-		if(b->heap == TWID32)
-			upHeap(lumpCache.nheap++, b);
-		vtWakeup(lumpCache.full);
-	}
-
-	vtUnlock(lumpCache.lock);
-//checkLumpCache();
-}
-
-/*
- * remove some lump from use and update the free list and counters
- */
-static Lump*
-bumpLump(void)
-{
-	Lump *b;
-	u32int h;
-
-	/*
-	 * remove blocks until we find one that is unused
-	 * referenced blocks are left in the heap even though
-	 * they can't be scavenged; this is simple a speed optimization
-	 */
-	for(;;){
-		if(lumpCache.nheap == 0)
-			return nil;
-		b = lumpCache.heap[0];
-		delHeap(b);
-		if(!b->ref){
-			vtWakeup(lumpCache.full);
-			break;
-		}
-	}
-
-	/*
-	 * unchain the block
-	 */
-	if(b->prev == nil){
-		h = hashBits(b->score, HashLog);
-		if(lumpCache.heads[h] != b)
-			fatal("bad hash chains in lump cache");
-		lumpCache.heads[h] = b->next;
-	}else
-		b->prev->next = b->next;
-	if(b->next != nil)
-		b->next->prev = b->prev;
-
-	if(b->data != nil){
-		packetFree(b->data);
-		b->data = nil;
-		lumpCache.avail += b->size;
-		b->size = 0;
-	}
-	b->type = TWID8;
-
-	b->next = lumpCache.free;
-	lumpCache.free = b;
-
-	return b;
-}
-
-/*
- * delete an arbitrary block from the heap
- */
-static void
-delHeap(Lump *db)
-{
-	fixHeap(db->heap, lumpCache.heap[--lumpCache.nheap]);
-	db->heap = TWID32;
-}
-
-/*
- * push an element up or down to it's correct new location
- */
-static void
-fixHeap(int i, Lump *b)
-{
-	if(upHeap(i, b) == i)
-		downHeap(i, b);
-}
-
-static int
-upHeap(int i, Lump *b)
-{
-	Lump *bb;
-	u32int now;
-	int p;
-
-	now = lumpCache.now;
-	for(; i != 0; i = p){
-		p = (i - 1) >> 1;
-		bb = lumpCache.heap[p];
-		if(b->used2 - now >= bb->used2 - now)
-			break;
-		lumpCache.heap[i] = bb;
-		bb->heap = i;
-	}
-
-	lumpCache.heap[i] = b;
-	b->heap = i;
-	return i;
-}
-
-static int
-downHeap(int i, Lump *b)
-{
-	Lump *bb;
-	u32int now;
-	int k;
-
-	now = lumpCache.now;
-	for(; ; i = k){
-		k = (i << 1) + 1;
-		if(k >= lumpCache.nheap)
-			break;
-		if(k + 1 < lumpCache.nheap && lumpCache.heap[k]->used2 - now > lumpCache.heap[k + 1]->used2 - now)
-			k++;
-		bb = lumpCache.heap[k];
-		if(b->used2 - now <= bb->used2 - now)
-			break;
-		lumpCache.heap[i] = bb;
-		bb->heap = i;
-	}
-
-	lumpCache.heap[i] = b;
-	b->heap = i;
-	return i;
-}
-
-static void
-findBlock(Lump *bb)
-{
-	Lump *b, *last;
-	int h;
-
-	last = nil;
-	h = hashBits(bb->score, HashLog);
-	for(b = lumpCache.heads[h]; b != nil; b = b->next){
-		if(last != b->prev)
-			fatal("bad prev link");
-		if(b == bb)
-			return;
-		last = b;
-	}
-	fatal("block score=%V type=%#x missing from hash table", bb->score, bb->type);
-}
-
-void
-checkLumpCache(void)
-{
-	Lump *b;
-	u32int size, now, nfree;
-	int i, k, refed;
-
-	vtLock(lumpCache.lock);
-	now = lumpCache.now;
-	for(i = 0; i < lumpCache.nheap; i++){
-		if(lumpCache.heap[i]->heap != i)
-			fatal("lc: mis-heaped at %d: %d", i, lumpCache.heap[i]->heap);
-		if(i > 0 && lumpCache.heap[(i - 1) >> 1]->used2 - now > lumpCache.heap[i]->used2 - now)
-			fatal("lc: bad heap ordering");
-		k = (i << 1) + 1;
-		if(k < lumpCache.nheap && lumpCache.heap[i]->used2 - now > lumpCache.heap[k]->used2 - now)
-			fatal("lc: bad heap ordering");
-		k++;
-		if(k < lumpCache.nheap && lumpCache.heap[i]->used2 - now > lumpCache.heap[k]->used2 - now)
-			fatal("lc: bad heap ordering");
-	}
-
-	refed = 0;
-	size = 0;
-	for(i = 0; i < lumpCache.nblocks; i++){
-		b = &lumpCache.blocks[i];
-		if(b->data == nil && b->size != 0)
-			fatal("bad size: %d data=%p", b->size, b->data);
-		if(b->ref && b->heap == TWID32)
-			refed++;
-		if(b->type != TWID8){
-			findBlock(b);
-			size += b->size;
-		}
-		if(b->heap != TWID32
-		&& lumpCache.heap[b->heap] != b)
-			fatal("lc: spurious heap value");
-	}
-	if(lumpCache.avail != lumpCache.allowed - size)
-		fatal("mismatched available=%d and allowed=%d - used=%d space", lumpCache.avail, lumpCache.allowed, size);
-
-	nfree = 0;
-	for(b = lumpCache.free; b != nil; b = b->next){
-		if(b->type != TWID8 || b->heap != TWID32)
-			fatal("lc: bad free list");
-		nfree++;
-	}
-
-	if(lumpCache.nheap + nfree + refed != lumpCache.nblocks)
-		fatal("lc: missing blocks: %d %d %d %d", lumpCache.nheap, refed, nfree, lumpCache.nblocks);
-	vtUnlock(lumpCache.lock);
-}

+ 0 - 153
sys/src/cmd/venti/lumpqueue.c

@@ -1,153 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-typedef struct LumpQueue	LumpQueue;
-typedef struct WLump		WLump;
-
-enum
-{
-	MaxLumpQ	= 1 << 3	/* max. lumps on a single write queue, must be pow 2 */
-};
-
-struct WLump
-{
-	Lump	*u;
-	Packet	*p;
-	int	creator;
-	int	gen;
-};
-
-struct LumpQueue
-{
-	VtLock		*lock;
-	VtRendez *flush;
-	VtRendez	*full;
-	VtRendez	*empty;
-	WLump		q[MaxLumpQ];
-	int		w;
-	int		r;
-};
-
-static LumpQueue	*lumpqs;
-static int		nqs;
-
-static VtLock	*glk;
-static int		gen;
-
-static void	doQueue(void *vq);
-
-int
-initLumpQueues(int nq)
-{
-	LumpQueue *q;
-
-	int i;
-	nqs = nq;
-
-	glk = vtLockAlloc();
-	lumpqs = MKNZ(LumpQueue, nq);
-
-	for(i = 0; i < nq; i++){
-		q = &lumpqs[i];
-		q->lock = vtLockAlloc();
-		q->full = vtRendezAlloc(q->lock);
-		q->empty = vtRendezAlloc(q->lock);
-		q->flush = vtRendezAlloc(q->lock);
-
-		if(vtThread(doQueue, q) < 0){
-			setErr(EOk, "can't start write queue slave: %R");
-			return 0;
-		}
-	}
-
-	return 1;
-}
-
-/*
- * queue a lump & it's packet data for writing
- */
-int
-queueWrite(Lump *u, Packet *p, int creator)
-{
-	LumpQueue *q;
-	int i;
-
-	i = indexSect(mainIndex, u->score);
-	if(i < 0 || i >= nqs){
-		setErr(EBug, "internal error: illegal index section in queueWrite");
-		return 0;
-	}
-
-	q = &lumpqs[i];
-
-	vtLock(q->lock);
-	while(q->r == ((q->w + 1) & (MaxLumpQ - 1)))
-		vtSleep(q->full);
-
-	q->q[q->w].u = u;
-	q->q[q->w].p = p;
-	q->q[q->w].creator = creator;
-	q->q[q->w].gen = gen;
-	q->w = (q->w + 1) & (MaxLumpQ - 1);
-
-	vtWakeup(q->empty);
-
-	vtUnlock(q->lock);
-
-	return 1;
-}
-
-void
-queueFlush(void)
-{
-	int i;
-	LumpQueue *q;
-
-	vtLock(glk);
-	gen++;
-	vtUnlock(glk);
-
-	for(i=0; i<mainIndex->nsects; i++){
-		q = &lumpqs[i];
-		vtLock(q->lock);
-		while(q->w != q->r && gen - q->q[q->r].gen > 0)
-			vtSleep(q->flush);
-		vtUnlock(q->lock);
-	}
-}
-
-static void
-doQueue(void *vq)
-{
-	LumpQueue *q;
-	Lump *u;
-	Packet *p;
-	int creator;
-
-	q = vq;
-	for(;;){
-		vtLock(q->lock);
-		while(q->w == q->r)
-			vtSleep(q->empty);
-
-		u = q->q[q->r].u;
-		p = q->q[q->r].p;
-		creator = q->q[q->r].creator;
-
-		vtWakeup(q->full);
-
-		vtUnlock(q->lock);
-
-		if(!writeQLump(u, p, creator))
-			fprint(2, "%s: failed to write lump for %V: %R\n",
-				argv0, u->score);
-
-		vtLock(q->lock);
-		q->r = (q->r + 1) & (MaxLumpQ - 1);
-		vtWakeup(q->flush);
-		vtUnlock(q->lock);
-
-		putLump(u);
-	}
-}

+ 11 - 113
sys/src/cmd/venti/mkfile

@@ -1,127 +1,25 @@
 </$objtype/mkfile
 
-CFLAGS=$CFLAGS
-
-UPDATEFLAGS=
-
-FILES=\
-	arena\
-	arenas\
-	buildbuck\
-	clump\
-	config\
-	conv\
-	dcache\
-	dump\
-	httpd\
-	icache\
-	ifile\
-	index\
-	lump\
-	lumpcache\
-	lumpqueue\
-	part\
-	score\
-	sortientry\
-	stats\
-	syncarena\
-	syncindex0\
-	unwhack\
-	utils\
-	unittoull\
-	whack\
-	xml\
-	zeropart\
-
-LIBCFILES=${FILES:%=%.c}
-LIBOFILES=${FILES:%=%.$O}
-
-SLIB=libvs.a.$O
-
-LIB=$SLIB\
-
-HFILES=	dat.h\
-	fns.h\
-	stdinc.h\
-
 TARG=\
-	venti\
-	fmtarenas\
-	fmtisect\
-	fmtindex\
-	buildindex\
-	checkarenas\
-	checkindex\
-	clumpstats\
-	findscore\
-	rdarena\
-	wrarena\
-	syncindex\
-	verifyarena\
-	sync\
 	read\
+	sync\
 	write\
-	copy\
-	conf\
-	printarena\
-#	dumparena\
-
-CFILES=${TARG:%=%.c} $LIBCFILES
 
-UPDATE=mkfile\
-	mkxml\
-	$HFILES\
-	$CFILES\
+#	copy\
 
 BIN=/$objtype/bin/venti
 
-it:V: all
-
 </sys/src/cmd/mkmany
 
-INC=-I../include -I../lib/venti
-CFLAGS=$INC $CFLAGS
-
-acid:	$HFILES icache.c
-	$CC $INC -a icache.c > acid || rm acid
-
-xml.c:	mkxml dat.h
-	mkxml dat.h > xml.c
+CFLAGS=$CFLAGS -I.
 
-$SLIB(%.$O):N: %.$O
-$SLIB:	${LIBOFILES:%=$SLIB(%)}
-	names = `{echo $newprereq |sed 's/ /\n/g' |sed -n 's/'$SLIB'\(([^)]+)\)/\1/gp'}
-	ar vu $SLIB $names
-#	rm $names
+extra:V: $O.devnull $O.mkroot $O.randtest $O.readlist $O.ro $O.root
 
-test:V: all
-	slay $O.venti | rc
-	rm -f /tmp/arenas /tmp/isect	# zero them
-	{syscall seek 1 64000000 0; echo} >>/tmp/arenas
-	{syscall seek 1 3000000 0; echo} >>/tmp/isect
-	$O.fmtarenas -Z arena. /tmp/arenas
-	$O.fmtisect -Z isect0 /tmp/isect
-	$O.conf -w /tmp/arenas <{echo '
-	mem 1m
-	icmem 1m
-	bcmem 1m
-	index main
-	isect /tmp/isect
-	arenas /tmp/arenas
-	'}
-	$O.fmtindex /tmp/arenas
-	echo 
-	echo
-	echo starting venti
-	echo
-	echo
-	$O.venti -c /tmp/arenas -h tcp!127.1!888 -a tcp!127.1!777
+all:V:		srv.all.dir
+install:V:	srv.install.dir
+installall:V:	srv.installall.dir
+clean:V:	srv.clean.dir
+nuke:V:		srv.nuke.dir
 
-$O.conf:D: conf.rc
-	{
-		echo '#!/bin/rc'
-		echo '# THIS FILE IS AUTOMATICALLY GENERATED'
-		echo '# FROM /sys/src/cmd/fossil/conf.rc.  DO NOT EDIT.'
-		echo 
-		sed 1d conf.rc
-	} >$target && chmod +x $target
+srv.%.dir:V:
+	@{ cd srv && mk $stem }

+ 27 - 28
sys/src/cmd/venti/mkroot.c

@@ -1,6 +1,7 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
 
 char *host;
 
@@ -8,15 +9,15 @@ void
 usage(void)
 {
 	fprint(2, "usage: mkroot [-h host] name type score blocksize prev\n");
-	exits("usage");
+	threadexitsall("usage");
 }
 
-int
-main(int argc, char *argv[])
+void
+threadmain(int argc, char *argv[])
 {
 	uchar score[VtScoreSize];
 	uchar buf[VtRootSize];
-	VtSession *z;
+	VtConn *z;
 	VtRoot root;
 
 	ARGBEGIN{
@@ -31,32 +32,30 @@ main(int argc, char *argv[])
 	if(argc != 5)
 		usage();
 
-	vtAttach();
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
 
-	root.version = VtRootVersion;
 	strecpy(root.name, root.name+sizeof root.name, argv[0]);
 	strecpy(root.type, root.type+sizeof root.type, argv[1]);
-	if(!vtParseScore(argv[2], strlen(argv[2]), root.score))
-		vtFatal("bad score '%s'", argv[2]);
-	root.blockSize = atoi(argv[3]);
-	if(!vtParseScore(argv[4], strlen(argv[4]), root.prev))
-		vtFatal("bad score '%s'", argv[4]);
-	vtRootPack(&root, buf);
-
-	z = vtDial(host, 0);
+	if(vtparsescore(argv[2], nil, root.score) < 0)
+		sysfatal("bad score '%s'", argv[2]);
+	root.blocksize = atoi(argv[3]);
+	if(vtparsescore(argv[4], nil, root.prev) < 0)
+		sysfatal("bad score '%s'", argv[4]);
+	vtrootpack(&root, buf);
+
+	z = vtdial(host);
 	if(z == nil)
-		vtFatal("could not connect to server: %R");
+		sysfatal("could not connect to server: %r");
 
-	if(!vtConnect(z, 0))
-		sysfatal("vtConnect: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
 
-	if(!vtWrite(z, score, VtRootType, buf, VtRootSize))
-		vtFatal("vtWrite: %R");
-	vtClose(z);
+	if(vtwrite(z, score, VtRootType, buf, VtRootSize) < 0)
+		sysfatal("vtwrite: %r");
+	if(vtsync(z) < 0)
+		sysfatal("vtsync: %r");
+	vthangup(z);
 	print("%V\n", score);
-	vtDetach();
-	exits(0);
-	return 0;
+	threadexitsall(0);
 }

+ 0 - 47
sys/src/cmd/venti/mkxml

@@ -1,47 +0,0 @@
-#!/bin/rc
-
-echo \
-'#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-#include "httpd.h"
-#include "xml.h"
-'
-
-sed -n '/<struct/,/<\/struct>/{
-	s/^[^<]+</</
-	t cleart
-:cleart
-	s/<struct name="(.*)" type="(.*)">.*/void xml\1(Hio *hout, \2s, char *tag, int indent){\n	xmlIndent(hout, indent);\n	hprint(hout, "<%s", tag);\n	hprint(hout, ">\\n");/
-	t start
-	s/<field name="(.*)" val="(.*)" type="(.*)"\/>.*/	xml\3(hout, \2, "\1");\n	hprint(hout, ">\\n");/
-	t field
-	s/<array name="(.*)" val="(.*)" elems="(.*)" type="(.*)"\/>.*/	xmlIndent(hout, indent + 1);\n	hprint(hout, "<\1s>\\n");\n	for(i = 0; i < \3; i++)\n		xml\4(hout, \2, "\1", indent + 2);\n	xmlIndent(hout, indent + 1);\n	hprint(hout, "<\/\1s>\\n");/
-	t array
-	s/<\/struct>/	xmlIndent(hout, indent);\n	hprint(hout, "<\/%s>\\n", tag);\n}\n/
-	t produce
-}
-b
-:start
-	h
-	b
-:field
-	x
-	s/	hprint\(hout, ">\\n"\);//g
-	x
-	H
-	b
-:array
-	H
-	x
-	s/{\n/&	int i;\n\n/
-	s/^	int i;\n\n	int i;\n/	int i;\n/
-	x
-	b
-:produce
-	H
-	g
-	s/\n\n/\n/g
-	s/	hprint.hout, ">\\n".;\n	xmlIndent.hout, indent.;\n	hprint.*/	hprint(hout, "\/>\\n");/
-	p
-' $1

+ 0 - 42
sys/src/cmd/venti/mkxml.elems

@@ -1,42 +0,0 @@
-#!/bin/rc
-
-echo \
-'#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-#include "httpd.h"
-#include "xml.h"
-'
-
-sed -n '/<struct/,/<\/struct>/{
-	s/^[^<]+</</
-	t cleart
-:cleart
-	s/<struct name="(.*)" type="(.*)">.*/void xml\1(Hio *hout, \2s, char *tag, int indent){\n	xmlIndent(hout, indent);\n	hprint(hout, "<%s>\\n", tag);/
-	t start
-	s/<field name="(.*)" val="(.*)" type="(.*)"\/>.*/	xml\3(hout, \2, "\1", indent + 1);/
-	t field
-	s/<array name="(.*)" val="(.*)" elems="(.*)" type="(.*)"\/>.*/	for(i = 0; i < \3; i++)\n		xml\4(hout, \2, "\1", indent + 1);/
-	t array
-	s/<\/struct>/	xmlIndent(hout, indent);\n	hprint(hout, "<\/%s>\\n", tag);\n}\n/
-	t produce
-}
-b
-:start
-	h
-	b
-:field
-	H
-	b
-:array
-	H
-	x
-	s/{\n/&	int i;\n\n/
-	s/^	int i;\n\n	int i;\n/	int i;\n/
-	x
-	b
-:produce
-	H
-	g
-	p
-' $1

+ 0 - 128
sys/src/cmd/venti/part.c

@@ -1,128 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-u32int	maxBlockSize;
-int	readonly;
-
-Part*
-initPart(char *name, int writable)
-{
-	Part *part;
-	Dir *dir;
-	int how;
-
-	part = MK(Part);
-	part->name = estrdup(name);
-	if(!writable && readonly)
-		how = OREAD;
-	else
-		how = ORDWR;
-	part->fd = open(name, how);
-	if(part->fd < 0){
-		if(how == ORDWR)
-			part->fd = open(name, OREAD);
-		if(part->fd < 0){
-			freePart(part);
-			setErr(EOk, "can't open partition='%s': %r", name);
-			return nil;
-		}
-		fprint(2, "warning: %s opened for reading only\n", name);
-	}
-	dir = dirfstat(part->fd);
-	if(dir == nil){
-		freePart(part);
-		setErr(EOk, "can't stat partition='%s': %r", name);
-		return nil;
-	}
-	part->size = dir->length;
-	part->blockSize = 0;
-	free(dir);
-	return part;
-}
-
-void
-freePart(Part *part)
-{
-	if(part == nil)
-		return;
-	close(part->fd);
-	free(part->name);
-	free(part);
-}
-
-void
-partBlockSize(Part *part, u32int blockSize)
-{
-	if(part->blockSize)
-		fatal("resetting partition=%s's block size", part->name);
-	part->blockSize = blockSize;
-	if(blockSize > maxBlockSize)
-		maxBlockSize = blockSize;
-}
-
-int
-writePart(Part *part, u64int addr, u8int *buf, u32int n)
-{
-	long m, mm, nn;
-
-	vtLock(stats.lock);
-	stats.diskWrites++;
-	stats.diskBWrites += n;
-	vtUnlock(stats.lock);
-
-	if(addr > part->size || addr + n > part->size){
-		setErr(ECorrupt, "out of bounds write to partition='%s'", part->name);
-		return 0;
-	}
-	for(nn = 0; nn < n; nn += m){
-		mm = n - nn;
-		if(mm > MaxIo)
-			mm = MaxIo;
-		m = pwrite(part->fd, &buf[nn], mm, addr + nn);
-		if(m != mm){
-			if(m < 0){
-				setErr(EOk, "can't write partition='%s': %r", part->name);
-				return 0;
-			}
-			logErr(EOk, "truncated write to partition='%s' n=%ld wrote=%ld", part->name, mm, m);
-		}
-	}
-	return 1;
-}
-
-int
-readPart(Part *part, u64int addr, u8int *buf, u32int n)
-{
-	long m, mm, nn;
-	int i;
-
-	vtLock(stats.lock);
-	stats.diskReads++;
-	stats.diskBReads += n;
-	vtUnlock(stats.lock);
-
-	if(addr > part->size || addr + n > part->size){
-		setErr(ECorrupt, "out of bounds read from partition='%s': addr=%lld n=%d size=%lld", part->name, addr, n, part->size);
-		return 0;
-	}
-	for(nn = 0; nn < n; nn += m){
-		mm = n - nn;
-		if(mm > MaxIo)
-			mm = MaxIo;
-		m = -1;
-		for(i=0; i<4; i++) {
-			m = pread(part->fd, &buf[nn], mm, addr + nn);
-			if(m == mm)
-				break;
-		}
-		if(m != mm){
-			if(m < 0){
-				setErr(EOk, "can't read partition='%s': %r", part->name);
-				return 0;
-			}
-			logErr(EOk, "warning: truncated read from partition='%s' n=%ld read=%ld", part->name, mm, m);
-		}
-	}
-	return 1;
-}

+ 0 - 137
sys/src/cmd/venti/printarena.c

@@ -1,137 +0,0 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-int wanttype;
-int readonly = 1;	/* for part.c */
-
-void
-usage(void)
-{
-	fprint(2, "usage: printarena arenafile [offset]\n");
-	exits("usage");
-}
-
-static void
-rdArena(Arena *arena, u64int offset)
-{
-	u64int a, aa, e;
-	u32int magic;
-	Clump cl;
-	uchar score[VtScoreSize];
-	ZBlock *lump;
-
-	printArena(2, arena);
-
-	a = arena->base;
-	e = arena->base + arena->size;
-	if(offset != ~(u64int)0) {
-		if(offset >= e-a)
-			vtFatal("bad offset %llud >= %llud\n",
-				offset, e-a);
-		aa = offset;
-	} else
-		aa = 0;
-
-	for(; aa < e; aa += ClumpSize+cl.info.size) {
-		magic = clumpMagic(arena, aa);
-		if(magic == ClumpFreeMagic)
-			break;
-		if(magic != ClumpMagic) {
-			fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
-				magic, aa);
-			break;
-		}
-		lump = loadClump(arena, aa, 0, &cl, score, 0);
-		if(lump == nil) {
-			fprint(2, "clump %llud failed to read: %R\n", aa);
-			break;
-		}
-		if(cl.info.type != VtTypeCorrupt) {
-			scoreMem(score, lump->data, cl.info.uncsize);
-			if(!scoreEq(cl.info.score, score)) {
-				fprint(2, "clump %llud has mismatched score\n", aa);
-				break;
-			}
-			if(!vtTypeValid(cl.info.type)) {
-				fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
-				break;
-			}
-		}
-		if(wanttype == 0 || cl.info.type == wanttype)
-			print("%V %d\n", score, cl.info.type);
-		freeZBlock(lump);
-	}
-	print("end offset %llud\n", aa);
-}
-
-int
-main(int argc, char *argv[])
-{
-	char *file;
-	Arena *arena;
-	u64int offset, aoffset;
-	Part *part;
-	Dir *d;
-	uchar buf[8192];
-	ArenaHead head;
-
-	aoffset = 0;
-	ARGBEGIN{
-	case 't':
-		wanttype = atoi(EARGF(usage()));
-		break;
-	case 'o':
-		aoffset = strtoull(EARGF(usage()), 0, 0);
-		break;
-	default:
-		usage();
-		break;
-	}ARGEND
-
-	offset = ~(u64int)0;
-	switch(argc) {
-	default:
-		usage();
-	case 2:
-		offset = strtoull(argv[1], 0, 0);
-		/* fall through */
-	case 1:
-		file = argv[0];
-	}
-
-	vtAttach();
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-
-	statsInit();
-
-	if((d = dirstat(file)) == nil)
-		vtFatal("can't stat file %s: %r", file);
-
-	part = initPart(file, 0);
-	if(part == nil)
-		vtFatal("can't open file %s: %R", file);
-	if(!readPart(part, aoffset, buf, sizeof buf))
-		vtFatal("can't read file %s: %R", file);
-
-	if(!unpackArenaHead(&head, buf))
-		vtFatal("corrupted arena header: %R");
-
-	if(aoffset+head.size > d->length)
-		vtFatal("arena is truncated: want %llud bytes have %llud\n",
-			head.size, d->length);
-
-	partBlockSize(part, head.blockSize);
-	initDCache(8 * MaxDiskBlock);
-
-	arena = initArena(part, aoffset, head.size, head.blockSize);
-	if(arena == nil)
-		vtFatal("initArena: %R");
-
-	rdArena(arena, offset);
-	vtDetach();
-	exits(0);
-	return 0;
-}

+ 335 - 0
sys/src/cmd/venti/randtest.c

@@ -0,0 +1,335 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+
+enum { STACK = 32768 };
+void xxxsrand(long);
+long xxxlrand(void);
+
+Channel *cw;
+Channel *cr;
+char *host;
+int blocksize, seed, randpct;
+int doread, dowrite, packets, permute;
+vlong totalbytes, cur;
+VtConn *z;
+int multi;
+int maxpackets;
+int sequence;
+int doublecheck = 1;
+uint *order;
+
+void
+usage(void)
+{
+	fprint(2, "usage: randtest [-q] [-h host] [-s seed] [-b blocksize] [-p randpct] [-n totalbytes] [-M maxblocks] [-P] [-r] [-w]\n");
+	threadexitsall("usage");
+}
+
+void
+wr(char *buf, char *buf2)
+{
+	uchar score[VtScoreSize], score2[VtScoreSize];
+	DigestState ds;
+
+	USED(buf2);
+	memset(&ds, 0, sizeof ds);
+	if(doublecheck)
+		sha1((uchar*)buf, blocksize, score, &ds);
+	if(vtwrite(z, score2, VtDataType, (uchar*)buf, blocksize) < 0)
+		sysfatal("vtwrite %V at %,lld: %r", score, cur);
+	if(doublecheck && memcmp(score, score2, VtScoreSize) != 0)
+		sysfatal("score mismatch! %V %V", score, score2);
+}
+
+void
+wrthread(void *v)
+{
+	char *p;
+
+	USED(v);
+	while((p = recvp(cw)) != nil){
+		wr(p, nil);
+		free(p);
+	}
+}
+
+void
+rd(char *buf, char *buf2)
+{
+	uchar score[VtScoreSize];
+	DigestState ds;
+
+	memset(&ds, 0, sizeof ds);
+	sha1((uchar*)buf, blocksize, score, &ds);
+	if(vtread(z, score, VtDataType, (uchar*)buf2, blocksize) < 0)
+		sysfatal("vtread %V at %,lld: %r", score, cur);
+	if(memcmp(buf, buf2, blocksize) != 0)
+		sysfatal("bad data read! %V", score);
+}
+
+void
+rdthread(void *v)
+{
+	char *p, *buf2;
+
+	buf2 = vtmalloc(blocksize);
+	USED(v);
+	while((p = recvp(cr)) != nil){
+		rd(p, buf2);
+		free(p);
+	}
+}
+
+char *template;
+
+void
+run(void (*fn)(char*, char*), Channel *c)
+{
+	int i, t, j, packets;
+	char *buf2, *buf;
+
+	buf2 = vtmalloc(blocksize);
+	buf = vtmalloc(blocksize);
+	cur = 0;
+	packets = totalbytes/blocksize;
+	if(maxpackets == 0)
+		maxpackets = packets;
+	order = vtmalloc(packets*sizeof order[0]);
+	for(i=0; i<packets; i++)
+		order[i] = i;
+	if(permute){
+		for(i=1; i<packets; i++){
+			j = nrand(i+1);
+			t = order[i];
+			order[i] = order[j];
+			order[j] = t;
+		}
+	}
+	for(i=0; i<packets && i<maxpackets; i++){
+		memmove(buf, template, blocksize);
+		*(uint*)buf = order[i];
+		if(c){
+			sendp(c, buf);
+			buf = vtmalloc(blocksize);
+		}else
+			(*fn)(buf, buf2);
+		cur += blocksize;
+	}
+	free(order);
+}
+
+#define TWID64	((u64int)~(u64int)0)
+
+u64int
+unittoull(char *s)
+{
+	char *es;
+	u64int n;
+
+	if(s == nil)
+		return TWID64;
+	n = strtoul(s, &es, 0);
+	if(*es == 'k' || *es == 'K'){
+		n *= 1024;
+		es++;
+	}else if(*es == 'm' || *es == 'M'){
+		n *= 1024*1024;
+		es++;
+	}else if(*es == 'g' || *es == 'G'){
+		n *= 1024*1024*1024;
+		es++;
+	}else if(*es == 't' || *es == 'T'){
+		n *= 1024*1024;
+		n *= 1024*1024;
+	}
+	if(*es != '\0')
+		return TWID64;
+	return n;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i, max;
+	vlong t0;
+	double t;
+
+	blocksize = 8192;
+	seed = 0;
+	randpct = 50;
+	host = nil;
+	doread = 0;
+	dowrite = 0;
+	totalbytes = 1*1024*1024*1024;
+	fmtinstall('V', vtscorefmt);
+	fmtinstall('F', vtfcallfmt);
+
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(EARGF(usage()));
+		break;
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 'M':
+		maxpackets = unittoull(EARGF(usage()));
+		break;
+	case 'm':
+		multi = atoi(EARGF(usage()));
+		break;
+	case 'n':
+		totalbytes = unittoull(EARGF(usage()));
+		break;
+	case 'p':
+		randpct = atoi(EARGF(usage()));
+		break;
+	case 'P':
+		permute = 1;
+		break;
+	case 'S':
+		doublecheck = 0;
+		ventidoublechecksha1 = 0;
+		break;
+	case 's':
+		seed = atoi(EARGF(usage()));
+		break;
+	case 'r':
+		doread = 1;
+		break;
+	case 'w':
+		dowrite = 1;
+		break;
+	case 'V':
+		chattyventi++;
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	if(doread==0 && dowrite==0){
+		doread = 1;
+		dowrite = 1;
+	}
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(multi){
+		cr = chancreate(sizeof(void*), 0);
+		cw = chancreate(sizeof(void*), 0);
+		for(i=0; i<multi; i++){
+			proccreate(wrthread, nil, STACK);
+			proccreate(rdthread, nil, STACK);
+		}
+	}
+
+	template = vtmalloc(blocksize);
+	xxxsrand(seed);
+	max = (256*randpct)/100;
+	if(max == 0)
+		max = 1;
+	for(i=0; i<blocksize; i++)
+		template[i] = xxxlrand()%max;
+	if(dowrite){
+		t0 = nsec();
+		run(wr, cw);
+		for(i=0; i<multi; i++)
+			sendp(cw, nil);
+		t = (nsec() - t0)/1.e9;
+		print("write: %lld bytes / %.3f seconds = %.6f MB/s\n",
+			totalbytes, t, (double)totalbytes/1e6/t);
+	}
+	if(doread){
+		t0 = nsec();
+		run(rd, cr);
+		for(i=0; i<multi; i++)
+			sendp(cr, nil);
+		t = (nsec() - t0)/1.e9;
+		print("read: %lld bytes / %.3f seconds = %.6f MB/s\n",
+			totalbytes, t, (double)totalbytes/1e6/t);
+	}
+	threadexitsall(nil);
+}
+
+
+/*
+ *	algorithm by
+ *	D. P. Mitchell & J. A. Reeds
+ */
+
+#define	LEN	607
+#define	TAP	273
+#define	MASK	0x7fffffffL
+#define	A	48271
+#define	M	2147483647
+#define	Q	44488
+#define	R	3399
+#define	NORM	(1.0/(1.0+MASK))
+
+static	ulong	rng_vec[LEN];
+static	ulong*	rng_tap = rng_vec;
+static	ulong*	rng_feed = 0;
+
+static void
+isrand(long seed)
+{
+	long lo, hi, x;
+	int i;
+
+	rng_tap = rng_vec;
+	rng_feed = rng_vec+LEN-TAP;
+	seed = seed%M;
+	if(seed < 0)
+		seed += M;
+	if(seed == 0)
+		seed = 89482311;
+	x = seed;
+	/*
+	 *	Initialize by x[n+1] = 48271 * x[n] mod (2**31 - 1)
+	 */
+	for(i = -20; i < LEN; i++) {
+		hi = x / Q;
+		lo = x % Q;
+		x = A*lo - R*hi;
+		if(x < 0)
+			x += M;
+		if(i >= 0)
+			rng_vec[i] = x;
+	}
+}
+
+void
+xxxsrand(long seed)
+{
+	isrand(seed);
+}
+
+long
+xxxlrand(void)
+{
+	ulong x;
+
+	rng_tap--;
+	if(rng_tap < rng_vec) {
+		if(rng_feed == 0) {
+			isrand(1);
+			rng_tap--;
+		}
+		rng_tap += LEN;
+	}
+	rng_feed--;
+	if(rng_feed < rng_vec)
+		rng_feed += LEN;
+	x = (*rng_feed + *rng_tap) & MASK;
+	*rng_feed = x;
+
+	return x;
+}
+

+ 36 - 64
sys/src/cmd/venti/read.c

@@ -1,102 +1,74 @@
-#include "stdinc.h"
-#include "dat.h"
-#include "fns.h"
-
-char *host;
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
 
 void
 usage(void)
 {
-	fprint(2, "usage: read [-h host] score [type]\n");
-	exits("usage");
-}
-
-int
-parseScore(uchar *score, char *buf, int n)
-{
-	int i, c;
-
-	memset(score, 0, VtScoreSize);
-
-	if(n < VtScoreSize*2)
-		return 0;
-	for(i=0; i<VtScoreSize*2; i++) {
-		if(buf[i] >= '0' && buf[i] <= '9')
-			c = buf[i] - '0';
-		else if(buf[i] >= 'a' && buf[i] <= 'f')
-			c = buf[i] - 'a' + 10;
-		else if(buf[i] >= 'A' && buf[i] <= 'F')
-			c = buf[i] - 'A' + 10;
-		else {
-			return 0;
-		}
-
-		if((i & 1) == 0)
-			c <<= 4;
-
-		score[i>>1] |= c;
-	}
-	return 1;
+	fprint(2, "usage: read [-h host] [-t type] score\n");
+	threadexitsall("usage");
 }
 
-int
-main(int argc, char *argv[])
+void
+threadmain(int argc, char *argv[])
 {
 	int type, n;
 	uchar score[VtScoreSize];
 	uchar *buf;
-	VtSession *z;
+	VtConn *z;
+	char *host;
 
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+
+	host = nil;
+	type = -1;
 	ARGBEGIN{
 	case 'h':
 		host = EARGF(usage());
 		break;
+	case 't':
+		type = atoi(EARGF(usage()));
+		break;
 	default:
 		usage();
 		break;
 	}ARGEND
 
-	if(argc != 1 && argc != 2)
+	if(argc != 1)
 		usage();
 
-	vtAttach();
+	if(vtparsescore(argv[0], nil, score) < 0)
+		sysfatal("could not parse score '%s': %r", argv[0]);
 
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
+	buf = vtmallocz(VtMaxLumpSize);
 
-	if(!parseScore(score, argv[0], strlen(argv[0])))
-		vtFatal("could not parse score: %s", vtGetError());
-
-	buf = vtMemAllocZ(VtMaxLumpSize);
-
-	z = vtDial(host, 0);
+	z = vtdial(host);
 	if(z == nil)
-		vtFatal("could not connect to server: %R");
+		sysfatal("could not connect to server: %r");
 
-	if(!vtConnect(z, 0))
-		sysfatal("vtConnect: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
 
-	if(argc == 1){
+	if(type == -1){
 		n = -1;
 		for(type=0; type<VtMaxType; type++){
-			n = vtRead(z, score, type, buf, VtMaxLumpSize);
+			n = vtread(z, score, type, buf, VtMaxLumpSize);
 			if(n >= 0){
 				fprint(2, "venti/read%s%s %V %d\n", host ? " -h" : "", host ? host : "",
 					score, type);
 				break;
 			}
 		}
-	}else{
-		type = atoi(argv[1]);
-		n = vtRead(z, score, type, buf, VtMaxLumpSize);
-	}
-	vtClose(z);
+	}else
+		n = vtread(z, score, type, buf, VtMaxLumpSize);
+
+	vthangup(z);
 	if(n < 0)
-		vtFatal("could not read block: %s", vtGetError());
+		sysfatal("could not read block: %r");
 	if(write(1, buf, n) != n)
-		vtFatal("write: %r");
-
-	vtDetach();
-	exits(0);
-	return 0;	/* shut up compiler */
+		sysfatal("write: %r");
+	threadexitsall(0);
 }

+ 112 - 0
sys/src/cmd/venti/readlist.c

@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <venti.h>
+#include <bio.h>
+
+char *host;
+Biobuf b;
+VtConn *z;
+uchar *buf;
+void run(Biobuf*);
+int nn;
+
+void
+usage(void)
+{
+	fprint(2, "usage: readlist [-h host] list\n");
+	threadexitsall("usage");
+}
+
+int
+parsescore(uchar *score, char *buf, int n)
+{
+	int i, c;
+
+	memset(score, 0, VtScoreSize);
+
+	if(n != VtScoreSize*2){
+		werrstr("score wrong length %d", n);
+		return -1;
+	}
+	for(i=0; i<VtScoreSize*2; i++) {
+		if(buf[i] >= '0' && buf[i] <= '9')
+			c = buf[i] - '0';
+		else if(buf[i] >= 'a' && buf[i] <= 'f')
+			c = buf[i] - 'a' + 10;
+		else if(buf[i] >= 'A' && buf[i] <= 'F')
+			c = buf[i] - 'A' + 10;
+		else {
+			c = buf[i];
+			werrstr("bad score char %d '%c'", c, c);
+			return -1;
+		}
+
+		if((i & 1) == 0)
+			c <<= 4;
+	
+		score[i>>1] |= c;
+	}
+	return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int fd, i;
+
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	fmtinstall('V', vtscorefmt);
+	buf = vtmallocz(VtMaxLumpSize);
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	if(argc == 0){
+		Binit(&b, 0, OREAD);
+		run(&b);
+	}else{
+		for(i=0; i<argc; i++){
+			if((fd = open(argv[i], OREAD)) < 0)
+				sysfatal("open %s: %r", argv[i]);
+			Binit(&b, fd, OREAD);
+			run(&b);
+		}
+	}
+	threadexitsall(nil);
+}
+
+void
+run(Biobuf *b)
+{
+	char *p, *f[10];
+	int nf;
+	uchar score[20];
+	int type, n;
+
+	while((p = Brdline(b, '\n')) != nil){
+		p[Blinelen(b)-1] = 0;
+		nf = tokenize(p, f, nelem(f));
+		if(nf != 2)
+			sysfatal("syntax error in work list");
+		if(parsescore(score, f[0], strlen(f[0])) < 0)
+			sysfatal("bad score %s in work list", f[0]);
+		type = atoi(f[1]);
+		n = vtread(z, score, type, buf, VtMaxLumpSize);
+		if(n < 0)
+			sysfatal("could not read %s %s: %r", f[0], f[1]);
+		/* write(1, buf, n); */
+		if(++nn%1000 == 0)
+			print("%d...", nn);
+	}
+}

+ 112 - 0
sys/src/cmd/venti/ro.c

@@ -0,0 +1,112 @@
+/* Copyright (c) 2004 Russ Cox */
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <thread.h>
+#include <libsec.h>
+
+#ifndef _UNISTD_H_
+#pragma varargck type "F" VtFcall*
+#pragma varargck type "T" void
+#endif
+
+VtConn *z;
+int verbose;
+
+enum
+{
+	STACK = 8192
+};
+
+void
+usage(void)
+{
+	fprint(2, "usage: venti/ro [-v] [-a address] [-h address]\n");
+	threadexitsall("usage");
+}
+
+void
+readthread(void *v)
+{
+	char err[ERRMAX];
+	VtReq *r;
+	uchar *buf;
+	int n;
+	
+	r = v;
+	buf = vtmalloc(r->tx.count);
+	if((n=vtread(z, r->tx.score, r->tx.blocktype, buf, r->tx.count)) < 0){
+		r->rx.msgtype = VtRerror;
+		rerrstr(err, sizeof err);
+		r->rx.error = vtstrdup(err);
+		free(buf);
+	}else{
+		r->rx.data = packetforeign(buf, n, free, buf);
+	}
+	if(verbose)
+		fprint(2, "-> %F\n", &r->rx);
+	vtrespond(r);
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	VtReq *r;
+	VtSrv *srv;
+	char *address, *ventiaddress;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+	
+	address = "tcp!*!venti";
+	ventiaddress = nil;
+	
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	case 'a':
+		address = EARGF(usage());
+		break;
+	case 'h':
+		ventiaddress = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	if((z = vtdial(ventiaddress)) == nil)
+		sysfatal("vtdial %s: %r", ventiaddress);
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	srv = vtlisten(address);
+	if(srv == nil)
+		sysfatal("vtlisten %s: %r", address);
+
+	while((r = vtgetreq(srv)) != nil){
+		r->rx.msgtype = r->tx.msgtype+1;
+		if(verbose)
+			fprint(2, "<- %F\n", &r->tx);
+		switch(r->tx.msgtype){
+		case VtTping:
+			break;
+		case VtTgoodbye:
+			break;
+		case VtTread:
+			threadcreate(readthread, r, 16384);
+			continue;
+		case VtTwrite:
+			r->rx.error = vtstrdup("read-only server");
+			r->rx.msgtype = VtRerror;
+			break;
+		case VtTsync:
+			break;
+		}
+		if(verbose)
+			fprint(2, "-> %F\n", &r->rx);
+		vtrespond(r);
+	}
+	threadexitsall(nil);
+}
+

+ 72 - 0
sys/src/cmd/venti/root.c

@@ -0,0 +1,72 @@
+#include <u.h>
+#include <libc.h>
+#include <venti.h>
+#include <libsec.h>
+#include <thread.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: root [-h host] score\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i, n;
+	uchar score[VtScoreSize];
+	uchar *buf;
+	VtConn *z;
+	char *host;
+	VtRoot root;
+
+	fmtinstall('F', vtfcallfmt);
+	fmtinstall('V', vtscorefmt);
+	quotefmtinstall();
+
+	host = nil;
+	ARGBEGIN{
+	case 'h':
+		host = EARGF(usage());
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc == 0)
+		usage();
+
+	buf = vtmallocz(VtMaxLumpSize);
+
+	z = vtdial(host);
+	if(z == nil)
+		sysfatal("could not connect to server: %r");
+
+	if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	for(i=0; i<argc; i++){
+		if(vtparsescore(argv[i], nil, score) < 0){
+			fprint(2, "cannot parse score '%s': %r\n", argv[i]);
+			continue;
+		}
+		n = vtread(z, score, VtRootType, buf, VtMaxLumpSize);
+		if(n < 0){
+			fprint(2, "could not read block %V: %r\n", score);
+			continue;
+		}
+		if(n != VtRootSize){
+			fprint(2, "block %V is wrong size %d != 300\n", score, n);
+			continue;
+		}
+		if(vtrootunpack(&root, buf) < 0){
+			fprint(2, "unpacking block %V: %r\n", score);
+			continue;
+		}
+		print("%V: %q %q %V %d %V\n", score, root.name, root.type, root.score, root.blocksize, root.prev);
+	}
+	vthangup(z);
+	threadexitsall(0);
+}

+ 756 - 0
sys/src/cmd/venti/srv/arena.c

@@ -0,0 +1,756 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct ASum ASum;
+
+struct ASum
+{
+	Arena	*arena;
+	ASum	*next;
+};
+
+static void	sealarena(Arena *arena);
+static int	okarena(Arena *arena);
+static int	loadarena(Arena *arena);
+static CIBlock	*getcib(Arena *arena, int clump, int writing, CIBlock *rock);
+static void	putcib(Arena *arena, CIBlock *cib);
+static void	sumproc(void *);
+
+static QLock	sumlock;
+static Rendez	sumwait;
+static ASum	*sumq;
+static ASum	*sumqtail;
+static uchar zero[8192];
+
+int	arenasumsleeptime;
+
+int
+initarenasum(void)
+{
+	needzeroscore();  /* OS X */
+
+	sumwait.l = &sumlock;
+
+	if(vtproc(sumproc, nil) < 0){
+		seterr(EOk, "can't start arena checksum slave: %r");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * make an Arena, and initialize it based upon the disk header and trailer.
+ */
+Arena*
+initarena(Part *part, u64int base, u64int size, u32int blocksize)
+{
+	Arena *arena;
+
+	arena = MKZ(Arena);
+	arena->part = part;
+	arena->blocksize = blocksize;
+	arena->clumpmax = arena->blocksize / ClumpInfoSize;
+	arena->base = base + blocksize;
+	arena->size = size - 2 * blocksize;
+
+	if(loadarena(arena) < 0){
+		seterr(ECorrupt, "arena header or trailer corrupted");
+		freearena(arena);
+		return nil;
+	}
+	if(okarena(arena) < 0){
+		freearena(arena);
+		return nil;
+	}
+
+	if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
+		backsumarena(arena);
+
+	return arena;
+}
+
+void
+freearena(Arena *arena)
+{
+	if(arena == nil)
+		return;
+	free(arena);
+}
+
+Arena*
+newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
+{
+	int bsize;
+	Arena *arena;
+
+	if(nameok(name) < 0){
+		seterr(EOk, "illegal arena name", name);
+		return nil;
+	}
+	arena = MKZ(Arena);
+	arena->part = part;
+	arena->version = vers;
+	if(vers == ArenaVersion4)
+		arena->clumpmagic = _ClumpMagic;
+	else{
+		do
+			arena->clumpmagic = fastrand();
+		while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
+	}
+	arena->blocksize = blocksize;
+	arena->clumpmax = arena->blocksize / ClumpInfoSize;
+	arena->base = base + blocksize;
+	arena->size = size - 2 * blocksize;
+
+	namecp(arena->name, name);
+
+	bsize = sizeof zero;
+	if(bsize > arena->blocksize)
+		bsize = arena->blocksize;
+
+	if(wbarena(arena)<0 || wbarenahead(arena)<0
+	|| writepart(arena->part, arena->base, zero, bsize)<0){
+		freearena(arena);
+		return nil;
+	}
+
+	return arena;
+}
+
+int
+readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
+{
+	CIBlock *cib, r;
+
+	cib = getcib(arena, clump, 0, &r);
+	if(cib == nil)
+		return -1;
+	unpackclumpinfo(ci, &cib->data->data[cib->offset]);
+	putcib(arena, cib);
+	return 0;
+}
+
+int
+readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
+{
+	CIBlock *cib, r;
+	int i;
+
+	for(i = 0; i < n; i++){
+		cib = getcib(arena, clump + i, 0, &r);
+		if(cib == nil)
+			break;
+		unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
+		putcib(arena, cib);
+	}
+	return i;
+}
+
+/*
+ * write directory information for one clump
+ * must be called the arena locked
+ */
+int
+writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
+{
+	CIBlock *cib, r;
+
+	cib = getcib(arena, clump, 1, &r);
+	if(cib == nil)
+		return -1;
+	dirtydblock(cib->data, DirtyArenaCib);
+	packclumpinfo(ci, &cib->data->data[cib->offset]);
+	putcib(arena, cib);
+	return 0;
+}
+
+u64int
+arenadirsize(Arena *arena, u32int clumps)
+{
+	return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
+}
+
+/*
+ * read a clump of data
+ * n is a hint of the size of the data, not including the header
+ * make sure it won't run off the end, then return the number of bytes actually read
+ */
+u32int
+readarena(Arena *arena, u64int aa, u8int *buf, long n)
+{
+	DBlock *b;
+	u64int a;
+	u32int blocksize, off, m;
+	long nn;
+
+	if(n == 0)
+		return -1;
+
+	qlock(&arena->lock);
+	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
+	qunlock(&arena->lock);
+	if(aa >= a){
+		seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
+		return -1;
+	}
+	if(aa + n > a)
+		n = a - aa;
+
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, OREAD);
+		if(b == nil)
+			return -1;
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&buf[nn], &b->data[off], m);
+		putdblock(b);
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+	return n;
+}
+
+/*
+ * write some data to the clump section at a given offset
+ * used to fix up corrupted arenas.
+ */
+u32int
+writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
+{
+	DBlock *b;
+	u64int a;
+	u32int blocksize, off, m;
+	long nn;
+	int ok;
+
+	if(n == 0)
+		return -1;
+
+	qlock(&arena->lock);
+	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
+	if(aa >= a || aa + n > a){
+		qunlock(&arena->lock);
+		seterr(EOk, "writing beyond arena clump storage");
+		return -1;
+	}
+
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
+		if(b == nil){
+			qunlock(&arena->lock);
+			return -1;
+		}
+		dirtydblock(b, DirtyArena);
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&b->data[off], &clbuf[nn], m);
+		ok = 0;
+		putdblock(b);
+		if(ok < 0){
+			qunlock(&arena->lock);
+			return -1;
+		}
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+	qunlock(&arena->lock);
+	return n;
+}
+
+/*
+ * allocate space for the clump and write it,
+ * updating the arena directory
+ZZZ question: should this distinguish between an arena
+filling up and real errors writing the clump?
+ */
+u64int
+writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
+{
+	DBlock *b;
+	u64int a, aa;
+	u32int clump, n, nn, m, off, blocksize;
+	int ok;
+	AState as;
+
+	n = c->info.size + ClumpSize + U32Size;
+	qlock(&arena->lock);
+	aa = arena->memstats.used;
+	if(arena->memstats.sealed
+	|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
+		if(!arena->memstats.sealed){
+			logerr(EOk, "seal memstats %s", arena->name);
+			arena->memstats.sealed = 1;
+			as.arena = arena;
+			as.aa = start+aa;
+			as.stats = arena->memstats;
+			setdcachestate(&as);
+		}
+		qunlock(&arena->lock);
+		return TWID64;
+	}
+	if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
+		qunlock(&arena->lock);
+		return TWID64;
+	}
+
+	/*
+	 * write the data out one block at a time
+	 */
+	blocksize = arena->blocksize;
+	a = arena->base + aa;
+	off = a & (blocksize - 1);
+	a -= off;
+	nn = 0;
+	for(;;){
+		b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
+		if(b == nil){
+			qunlock(&arena->lock);
+			return TWID64;
+		}
+		dirtydblock(b, DirtyArena);
+		m = blocksize - off;
+		if(m > n - nn)
+			m = n - nn;
+		memmove(&b->data[off], &clbuf[nn], m);
+		ok = 0;
+		putdblock(b);
+		if(ok < 0){
+			qunlock(&arena->lock);
+			return TWID64;
+		}
+		nn += m;
+		if(nn == n)
+			break;
+		off = 0;
+		a += blocksize;
+	}
+
+	arena->memstats.used += c->info.size + ClumpSize;
+	arena->memstats.uncsize += c->info.uncsize;
+	if(c->info.size < c->info.uncsize)
+		arena->memstats.cclumps++;
+
+	clump = arena->memstats.clumps++;
+	if(arena->memstats.clumps == 0)
+		sysfatal("clumps wrapped");
+	arena->wtime = now();
+	if(arena->ctime == 0)
+		arena->ctime = arena->wtime;
+
+	writeclumpinfo(arena, clump, &c->info);
+	wbarena(arena);
+
+	/* set up for call to setdcachestate */
+	as.arena = arena;
+	as.aa = start+arena->memstats.used;
+	as.stats = arena->memstats;
+
+	/* update this before calling setdcachestate so it cannot be behind dcache.diskstate */
+	*pa = start+aa;
+	setdcachestate(&as);
+	qunlock(&arena->lock);
+
+	return aa;
+}
+
+int
+atailcmp(ATailStats *a, ATailStats *b)
+{
+	/* good test */
+	if(a->used < b->used)
+		return -1;
+	if(a->used > b->used)
+		return 1;
+		
+	/* suspect tests - why order this way? (no one cares) */
+	if(a->clumps < b->clumps)
+		return -1;
+	if(a->clumps > b->clumps)
+		return 1;
+	if(a->cclumps < b->cclumps)
+		return -1;
+	if(a->cclumps > b->cclumps)
+		return 1;
+	if(a->uncsize < b->uncsize)
+		return -1;
+	if(a->uncsize > b->uncsize)
+		return 1;
+	if(a->sealed < b->sealed)
+		return -1;
+	if(a->sealed > b->sealed)
+		return 1;
+		
+	/* everything matches */
+	return 0;
+}
+
+void
+setatailstate(AState *as)
+{
+	int i, j, osealed;
+	Arena *a;
+	Index *ix;
+
+	trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
+
+	/*
+	 * Look up as->arena to find index.
+	 */
+	ix = mainindex;
+	for(i=0; i<ix->narenas; i++)
+		if(ix->arenas[i] == as->arena)
+			break;
+	if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
+		fprint(2, "funny settailstate 0x%llux\n", as->aa);
+		return;
+	}
+
+	for(j=0; j<=i; j++){
+		a = ix->arenas[j];
+		if(atailcmp(&a->diskstats, &a->memstats) == 0)
+			continue;
+		qlock(&a->lock);
+		osealed = a->diskstats.sealed;
+		if(j == i)
+			a->diskstats = as->stats;
+		else
+			a->diskstats = a->memstats;
+		wbarena(a);
+		if(a->diskstats.sealed != osealed && !a->inqueue)
+			sealarena(a);
+		qunlock(&a->lock);
+	}
+}
+
+/*
+ * once sealed, an arena never has any data added to it.
+ * it should only be changed to fix errors.
+ * this also syncs the clump directory.
+ */
+static void
+sealarena(Arena *arena)
+{
+	arena->inqueue = 1;
+	backsumarena(arena);
+}
+
+void
+backsumarena(Arena *arena)
+{
+	ASum *as;
+
+	if(sumwait.l == nil)
+		return;
+
+	as = MK(ASum);
+	if(as == nil)
+		return;
+	qlock(&sumlock);
+	as->arena = arena;
+	as->next = nil;
+	if(sumq)
+		sumqtail->next = as;
+	else
+		sumq = as;
+	sumqtail = as;
+	rwakeup(&sumwait);
+	qunlock(&sumlock);
+}
+
+static void
+sumproc(void *unused)
+{
+	ASum *as;
+	Arena *arena;
+
+	USED(unused);
+
+	for(;;){
+		qlock(&sumlock);
+		while(sumq == nil)
+			rsleep(&sumwait);
+		as = sumq;
+		sumq = as->next;
+		qunlock(&sumlock);
+		arena = as->arena;
+		free(as);
+
+		sumarena(arena);
+	}
+}
+
+void
+sumarena(Arena *arena)
+{
+	ZBlock *b;
+	DigestState s;
+	u64int a, e;
+	u32int bs;
+	int t;
+	u8int score[VtScoreSize];
+
+	bs = MaxIoSize;
+	if(bs < arena->blocksize)
+		bs = arena->blocksize;
+
+	/*
+	 * read & sum all blocks except the last one
+	 */
+	memset(&s, 0, sizeof s);
+	b = alloczblock(bs, 0, arena->part->blocksize);
+	e = arena->base + arena->size;
+	for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
+		disksched();
+		while((t=arenasumsleeptime) == SleepForever){
+			sleep(1000);
+			disksched();
+		}
+		sleep(t);
+		if(a + bs > e)
+			bs = arena->blocksize;
+		if(readpart(arena->part, a, b->data, bs) < 0)
+			goto ReadErr;
+		addstat(StatSumRead, 1);
+		addstat(StatSumReadBytes, bs);
+		sha1(b->data, bs, nil, &s);
+	}
+
+	/*
+	 * the last one is special, since it may already have the checksum included
+	 */
+	bs = arena->blocksize;
+	if(readpart(arena->part, e, b->data, bs) < 0){
+ReadErr:
+		logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
+		freezblock(b);
+		return;
+	}
+	addstat(StatSumRead, 1);
+	addstat(StatSumReadBytes, bs);
+
+	sha1(b->data, bs-VtScoreSize, nil, &s);
+	sha1(zeroscore, VtScoreSize, nil, &s);
+	sha1(nil, 0, score, &s);
+
+	/*
+	 * check for no checksum or the same
+	 *
+	 * the writepart is okay because we flushed the dcache in sealarena
+	 */
+	if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
+		if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
+			logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
+				arena->name, &b->data[bs - VtScoreSize], score);
+		scorecp(&b->data[bs - VtScoreSize], score);
+		if(writepart(arena->part, e, b->data, bs) < 0)
+			logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
+	}
+	freezblock(b);
+
+	qlock(&arena->lock);
+	scorecp(arena->score, score);
+	qunlock(&arena->lock);
+}
+
+/*
+ * write the arena trailer block to the partition
+ */
+int
+wbarena(Arena *arena)
+{
+	DBlock *b;
+	int bad;
+
+	if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
+		logerr(EAdmin, "can't write arena trailer: %r");
+		return -1;
+	}
+	dirtydblock(b, DirtyArenaTrailer);
+	bad = okarena(arena)<0 || packarena(arena, b->data)<0;
+	putdblock(b);
+	if(bad)
+		return -1;
+	return 0;
+}
+
+int
+wbarenahead(Arena *arena)
+{
+	ZBlock *b;
+	ArenaHead head;
+	int bad;
+
+	namecp(head.name, arena->name);
+	head.version = arena->version;
+	head.size = arena->size + 2 * arena->blocksize;
+	head.blocksize = arena->blocksize;
+	head.clumpmagic = arena->clumpmagic;
+	b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
+	if(b == nil){
+		logerr(EAdmin, "can't write arena header: %r");
+/* ZZZ add error message? */
+		return -1;
+	}
+	/*
+	 * this writepart is okay because it only happens
+	 * during initialization.
+	 */
+	bad = packarenahead(&head, b->data)<0 ||
+	      writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
+	      flushpart(arena->part)<0;
+	freezblock(b);
+	if(bad)
+		return -1;
+	return 0;
+}
+
+/*
+ * read the arena header and trailer blocks from disk
+ */
+static int
+loadarena(Arena *arena)
+{
+	ArenaHead head;
+	ZBlock *b;
+
+	b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
+	if(b == nil)
+		return -1;
+	if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
+		freezblock(b);
+		return -1;
+	}
+	if(unpackarena(arena, b->data) < 0){
+		freezblock(b);
+		return -1;
+	}
+	if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
+		seterr(EAdmin, "unknown arena version %d", arena->version);
+		freezblock(b);
+		return -1;
+	}
+	scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
+
+	if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
+		logerr(EAdmin, "can't read arena header: %r");
+		freezblock(b);
+		return 0;
+	}
+	if(unpackarenahead(&head, b->data) < 0)
+		logerr(ECorrupt, "corrupted arena header: %r");
+	else if(namecmp(arena->name, head.name)!=0
+	     || arena->clumpmagic != head.clumpmagic
+	     || arena->version != head.version
+	     || arena->blocksize != head.blocksize
+	     || arena->size + 2 * arena->blocksize != head.size){
+		if(namecmp(arena->name, head.name)!=0)
+			logerr(ECorrupt, "arena tail name %s head %s", 
+				arena->name, head.name);
+		else if(arena->clumpmagic != head.clumpmagic)
+			logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
+				(ulong)arena->clumpmagic, (ulong)head.clumpmagic);
+		else if(arena->version != head.version)
+			logerr(ECorrupt, "arena tail version %d head version %d",
+				arena->version, head.version);
+		else if(arena->blocksize != head.blocksize)
+			logerr(ECorrupt, "arena tail block size %d head %d",
+				arena->blocksize, head.blocksize);
+		else if(arena->size+2*arena->blocksize != head.size)
+			logerr(ECorrupt, "arena tail size %lud head %lud",
+				(ulong)arena->size+2*arena->blocksize, head.size);
+		else
+			logerr(ECorrupt, "arena header inconsistent with arena data");
+	}
+	freezblock(b);
+
+	return 0;
+}
+
+static int
+okarena(Arena *arena)
+{
+	u64int dsize;
+	int ok;
+
+	ok = 0;
+	dsize = arenadirsize(arena, arena->diskstats.clumps);
+	if(arena->diskstats.used + dsize > arena->size){
+		seterr(ECorrupt, "arena %s used > size", arena->name);
+		ok = -1;
+	}
+
+	if(arena->diskstats.cclumps > arena->diskstats.clumps)
+		logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
+
+	/*
+	 * This need not be true if some of the disk is corrupted.
+	 *
+	if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
+		logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
+	 */
+
+	if(arena->ctime > arena->wtime)
+		logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
+
+	return ok;
+}
+
+static CIBlock*
+getcib(Arena *arena, int clump, int writing, CIBlock *rock)
+{
+	int mode;
+	CIBlock *cib;
+	u32int block, off;
+
+	if(clump >= arena->memstats.clumps){
+		seterr(EOk, "clump directory access out of range");
+		return nil;
+	}
+	block = clump / arena->clumpmax;
+	off = (clump - block * arena->clumpmax) * ClumpInfoSize;
+	cib = rock;
+	cib->block = block;
+	cib->offset = off;
+
+	if(writing){
+		if(off == 0 && clump == arena->memstats.clumps-1)
+			mode = OWRITE;
+		else
+			mode = ORDWR;
+	}else
+		mode = OREAD;
+
+	cib->data = getdblock(arena->part,
+		arena->base + arena->size - (block + 1) * arena->blocksize, mode);
+	if(cib->data == nil)
+		return nil;
+	return cib;
+}
+
+static void
+putcib(Arena *arena, CIBlock *cib)
+{
+	USED(arena);
+
+	putdblock(cib->data);
+	cib->data = nil;
+}

+ 415 - 0
sys/src/cmd/venti/srv/arenas.c

@@ -0,0 +1,415 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct AHash	AHash;
+
+/*
+ * hash table for finding arena's based on their names.
+ */
+struct AHash
+{
+	AHash	*next;
+	Arena	*arena;
+};
+
+enum
+{
+	AHashSize	= 512
+};
+
+static AHash	*ahash[AHashSize];
+
+static u32int
+hashstr(char *s)
+{
+	u32int h;
+	int c;
+
+	h = 0;
+	for(; c = *s; s++){
+		c ^= c << 6;
+		h += (c << 11) ^ (c >> 1);
+		c = *s;
+		h ^= (c << 14) + (c << 7) + (c << 4) + c;
+	}
+	return h;
+}
+
+int
+addarena(Arena *arena)
+{
+	AHash *a;
+	u32int h;
+
+	h = hashstr(arena->name) & (AHashSize - 1);
+	a = MK(AHash);
+	if(a == nil)
+		return -1;
+	a->arena = arena;
+	a->next = ahash[h];
+	ahash[h] = a;
+	return 0;
+}
+
+Arena*
+findarena(char *name)
+{
+	AHash *a;
+	u32int h;
+
+	h = hashstr(name) & (AHashSize - 1);
+	for(a = ahash[h]; a != nil; a = a->next)
+		if(strcmp(a->arena->name, name) == 0)
+			return a->arena;
+	return nil;
+}
+
+int
+delarena(Arena *arena)
+{
+	AHash *a, *last;
+	u32int h;
+
+	h = hashstr(arena->name) & (AHashSize - 1);
+	last = nil;
+	for(a = ahash[h]; a != nil; a = a->next){
+		if(a->arena == arena){
+			if(last != nil)
+				last->next = a->next;
+			else
+				ahash[h] = a->next;
+			free(a);
+			return 0;
+		}
+		last = a;
+	}
+	return -1;
+}
+
+ArenaPart*
+initarenapart(Part *part)
+{
+	AMapN amn;
+	ArenaPart *ap;
+	ZBlock *b;
+	u32int i;
+	int ok;
+
+	b = alloczblock(HeadSize, 0, 0);
+	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
+		seterr(EAdmin, "can't read arena partition header: %r");
+		return nil;
+	}
+
+	ap = MKZ(ArenaPart);
+	if(ap == nil){
+		freezblock(b);
+		return nil;
+	}
+	ap->part = part;
+	ok = unpackarenapart(ap, b->data);
+	freezblock(b);
+	if(ok < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	ap->tabbase = (PartBlank + HeadSize + ap->blocksize - 1) & ~(ap->blocksize - 1);
+	if(ap->version != ArenaPartVersion){
+		seterr(ECorrupt, "unknown arena partition version %d", ap->version);
+		freearenapart(ap, 0);
+		return nil;
+	}
+	if(ap->blocksize & (ap->blocksize - 1)){
+		seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", ap->blocksize);
+		freearenapart(ap, 0);
+		return nil;
+	}
+	if(ap->tabbase >= ap->arenabase){
+		seterr(ECorrupt, "arena partition table overlaps with arena storage");
+		freearenapart(ap, 0);
+		return nil;
+	}
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	partblocksize(part, ap->blocksize);
+	ap->size = ap->part->size & ~(u64int)(ap->blocksize - 1);
+
+	if(readarenamap(&amn, part, ap->tabbase, ap->tabsize) < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+	ap->narenas = amn.n;
+	ap->map = amn.map;
+	if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	ap->arenas = MKNZ(Arena*, ap->narenas);
+	for(i = 0; i < ap->narenas; i++){
+		ap->arenas[i] = initarena(part, ap->map[i].start, ap->map[i].stop - ap->map[i].start, ap->blocksize);
+		if(ap->arenas[i] == nil){
+			seterr(ECorrupt, "%s: %r", ap->map[i].name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+		if(namecmp(ap->map[i].name, ap->arenas[i]->name) != 0){
+			seterr(ECorrupt, "arena name mismatches with expected name: %s vs. %s",
+				ap->map[i].name, ap->arenas[i]->name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+		if(findarena(ap->arenas[i]->name)){
+			seterr(ECorrupt, "duplicate arena name %s in %s",
+				ap->map[i].name, ap->part->name);
+			freearenapart(ap, 1);
+			return nil;
+		}
+	}
+
+	for(i = 0; i < ap->narenas; i++)
+		addarena(ap->arenas[i]);
+
+	return ap;
+}
+
+ArenaPart*
+newarenapart(Part *part, u32int blocksize, u32int tabsize)
+{
+	ArenaPart *ap;
+
+	if(blocksize & (blocksize - 1)){
+		seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", blocksize);
+		return nil;
+	}
+	ap = MKZ(ArenaPart);
+	if(ap == nil)
+		return nil;
+
+	ap->version = ArenaPartVersion;
+	ap->part = part;
+	ap->blocksize = blocksize;
+	partblocksize(part, blocksize);
+	ap->size = part->size & ~(u64int)(blocksize - 1);
+	ap->tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
+	ap->arenabase = (ap->tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	ap->narenas = 0;
+
+	if(wbarenapart(ap) < 0){
+		freearenapart(ap, 0);
+		return nil;
+	}
+
+	return ap;
+}
+
+int
+wbarenapart(ArenaPart *ap)
+{
+	ZBlock *b;
+
+	if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0)
+		return -1;
+	b = alloczblock(HeadSize, 1, 0);
+	if(b == nil)
+/* ZZZ set error message? */
+		return -1;
+
+	if(packarenapart(ap, b->data) < 0){
+		seterr(ECorrupt, "can't make arena partition header: %r");
+		freezblock(b);
+		return -1;
+	}
+	if(writepart(ap->part, PartBlank, b->data, HeadSize) < 0 ||
+	   flushpart(ap->part) < 0){
+		seterr(EAdmin, "can't write arena partition header: %r");
+		freezblock(b);
+		return -1;
+	}
+	freezblock(b);
+
+	return wbarenamap(ap->map, ap->narenas, ap->part, ap->tabbase, ap->tabsize);
+}
+
+void
+freearenapart(ArenaPart *ap, int freearenas)
+{
+	int i;
+
+	if(ap == nil)
+		return;
+	if(freearenas){
+		for(i = 0; i < ap->narenas; i++){
+			if(ap->arenas[i] == nil)
+				continue;
+			delarena(ap->arenas[i]);
+			freearena(ap->arenas[i]);
+		}
+	}
+	free(ap->map);
+	free(ap->arenas);
+	free(ap);
+}
+
+int
+okamap(AMap *am, int n, u64int start, u64int stop, char *what)
+{
+	u64int last;
+	u32int i;
+
+	last = start;
+	for(i = 0; i < n; i++){
+		if(am[i].start < last){
+			if(i == 0)
+				seterr(ECorrupt, "invalid start address in %s", what);
+			else
+				seterr(ECorrupt, "overlapping ranges in %s", what);
+			return -1;
+		}
+		if(am[i].stop < am[i].start){
+			seterr(ECorrupt, "invalid range in %s", what);
+			return -1;
+		}
+		last = am[i].stop;
+	}
+	if(last > stop){
+		seterr(ECorrupt, "invalid ending address in %s", what);
+		return -1;
+	}
+	return 0;
+}
+
+int
+maparenas(AMap *am, Arena **arenas, int n, char *what)
+{
+	u32int i;
+
+	for(i = 0; i < n; i++){
+		arenas[i] = findarena(am[i].name);
+		if(arenas[i] == nil){
+			seterr(EAdmin, "can't find arena '%s' for '%s'\n", am[i].name, what);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int
+readarenamap(AMapN *amn, Part *part, u64int base, u32int size)
+{
+	IFile f;
+	u32int ok;
+
+	if(partifile(&f, part, base, size) < 0)
+		return -1;
+	ok = parseamap(&f, amn);
+	freeifile(&f);
+	return ok;
+}
+
+int
+wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size)
+{
+	Fmt f;
+	ZBlock *b;
+
+	b = alloczblock(size, 1, part->blocksize);
+	if(b == nil)
+		return -1;
+
+	fmtzbinit(&f, b);
+
+	if(outputamap(&f, am, n) < 0){
+		seterr(ECorrupt, "arena set size too small");
+		freezblock(b);
+		return -1;
+	}
+	if(writepart(part, base, b->data, size) < 0 || flushpart(part) < 0){
+		seterr(EAdmin, "can't write arena set: %r");
+		freezblock(b);
+		return -1;
+	}
+	freezblock(b);
+	return 0;
+}
+
+/*
+ * amap: n '\n' amapelem * n
+ * n: u32int
+ * amapelem: name '\t' astart '\t' astop '\n'
+ * astart, astop: u64int
+ */
+int
+parseamap(IFile *f, AMapN *amn)
+{
+	AMap *am;
+	u64int v64;
+	u32int v;
+	char *s, *t, *flds[4];
+	int i, n;
+
+	/*
+	 * arenas
+	 */
+	if(ifileu32int(f, &v) < 0){
+		seterr(ECorrupt, "syntax error: bad number of elements in %s", f->name);
+		return -1;
+	}
+	n = v;
+	if(n > MaxAMap){
+		seterr(ECorrupt, "illegal number of elements in %s", f->name);
+		return -1;
+	}
+	am = MKNZ(AMap, n);
+	if(am == nil){
+		fprint(2, "out of memory\n");
+		return -1;
+	}
+	for(i = 0; i < n; i++){
+		s = ifileline(f);
+		if(s)
+			t = estrdup(s);
+		else
+			t = nil;
+		if(s == nil || getfields(s, flds, 4, 0, "\t") != 3){
+			fprint(2, "early eof after %d of %d, %s:#%d: %s\n", i, n, f->name, f->pos, t);
+			free(t);
+			return -1;
+		}
+		free(t);
+		if(nameok(flds[0]) < 0)
+			return -1;
+		namecp(am[i].name, flds[0]);
+		if(stru64int(flds[1], &v64) < 0){
+			seterr(ECorrupt, "syntax error: bad arena base address in %s", f->name);
+			free(am);
+			return -1;
+		}
+		am[i].start = v64;
+		if(stru64int(flds[2], &v64) < 0){
+			seterr(ECorrupt, "syntax error: bad arena size in %s", f->name);
+			free(am);
+			return -1;
+		}
+		am[i].stop = v64;
+	}
+
+	amn->map = am;
+	amn->n = n;
+	return 0;
+}
+
+int
+outputamap(Fmt *f, AMap *am, int n)
+{
+	int i;
+
+	if(fmtprint(f, "%ud\n", n) < 0)
+		return -1;
+	for(i = 0; i < n; i++)
+		if(fmtprint(f, "%s\t%llud\t%llud\n", am[i].name, am[i].start, am[i].stop) < 0)
+			return -1;
+	return 0;
+}

+ 260 - 0
sys/src/cmd/venti/srv/bloom.c

@@ -0,0 +1,260 @@
+/*
+ * Bloom filter tracking which scores are present in our arenas
+ * and (more importantly) which are not.  
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+int ignorebloom;
+
+int
+bloominit(Bloom *b, vlong vsize, u8int *data)
+{
+	ulong size;
+	
+	size = vsize;
+	if(size != vsize){	/* truncation */
+		werrstr("bloom data too big");
+		return -1;
+	}
+	
+	b->size = size;
+	b->nhash = 32;	/* will be fixed by caller on initialization */
+	if(data != nil)
+		if(unpackbloomhead(b, data) < 0)
+			return -1;
+	
+	b->bitmask = (b->size<<3) - 1;
+	b->data = data;
+	return 0;
+}
+
+void
+wbbloomhead(Bloom *b)
+{
+	packbloomhead(b, b->data);
+}
+
+Bloom*
+readbloom(Part *p)
+{
+	uchar buf[512];
+	Bloom *b;
+	
+	b = vtmallocz(sizeof *b);
+	if(readpart(p, 0, buf, sizeof buf) < 0)
+		return nil;
+	/*
+	 * pass buf as b->data so that bloominit
+	 * can parse header.  won't be used for
+	 * accessing bits (cleared below).
+	 */
+	if(bloominit(b, 0, buf) < 0){
+		vtfree(b);
+		freepart(p);
+		return nil;
+	}else{
+		/*
+		 * default block size is system page size.
+		 * the bloom filter is usually very big.
+		 * bump the block size up to speed i/o.
+		 */
+		if(p->blocksize < (1<<20)){
+			p->blocksize = 1<<20;
+			if(p->blocksize > p->size)
+				p->blocksize = p->size;
+		}
+	}
+	b->part = p;
+	b->data = nil;
+	return b;
+}
+
+int
+resetbloom(Bloom *b)
+{
+	uchar *data;
+	
+	data = vtmallocz(b->size);
+	b->data = data;
+	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
+		addstat(StatBloomBits, b->size*8-1);
+	else
+		addstat(StatBloomBits, b->size*8);
+	return 0;
+}
+
+int
+loadbloom(Bloom *b)
+{
+	int i, n;
+	uint ones;
+	uchar *data;
+	u32int *a;
+	
+	data = vtmallocz(b->size);
+	if(readpart(b->part, 0, data, b->size) < 0){
+		vtfree(b);
+		vtfree(data);
+		return -1;
+	}
+	b->data = data;
+
+	a = (u32int*)b->data;
+	n = b->size/4;
+	ones = 0;
+	for(i=0; i<n; i++)
+		ones += countbits(a[i]); 
+	addstat(StatBloomOnes, ones);
+
+	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
+		addstat(StatBloomBits, b->size*8-1);
+	else
+		addstat(StatBloomBits, b->size*8);
+		
+	return 0;
+}
+
+int
+writebloom(Bloom *b)
+{
+	wbbloomhead(b);
+	if(writepart(b->part, 0, b->data, b->size) < 0)
+		return -1;
+	if(flushpart(b->part) < 0)
+		return -1;
+	return 0;
+}
+
+/*
+ * Derive two random 32-bit quantities a, b from the score
+ * and then use a+b*i as a sequence of bloom filter indices.
+ * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
+ * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
+ */
+static void
+gethashes(u8int *score, ulong *h)
+{
+	int i;
+	u32int a, b;
+
+	a = 0;
+	b = 0;
+	for(i=4; i+8<=VtScoreSize; i+=8){
+		a ^= *(u32int*)(score+i);
+		b ^= *(u32int*)(score+i+4);
+	}
+	if(i+4 <= VtScoreSize)	/* 20 is not 4-aligned */
+		a ^= *(u32int*)(score+i);
+	for(i=0; i<BloomMaxHash; i++, a+=b)
+		h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
+}
+
+static void
+_markbloomfilter(Bloom *b, u8int *score)
+{
+	int i, nnew;
+	ulong h[BloomMaxHash];
+	u32int x, *y, z, *tab;
+
+	trace("markbloomfilter", "markbloomfilter %V", score);
+	gethashes(score, h);
+	nnew = 0;
+	tab = (u32int*)b->data;
+	for(i=0; i<b->nhash; i++){
+		x = h[i];
+		y = &tab[(x&b->bitmask)>>5];
+		z = 1<<(x&31);
+		if(!(*y&z)){
+			nnew++;
+			*y |= z;
+		}
+	}
+	if(nnew)
+		addstat(StatBloomOnes, nnew);
+
+	trace("markbloomfilter", "markbloomfilter exit");
+}
+
+static int
+_inbloomfilter(Bloom *b, u8int *score)
+{
+	int i;
+	ulong h[BloomMaxHash], x;
+	u32int *tab;
+
+	gethashes(score, h);
+	tab = (u32int*)b->data;
+	for(i=0; i<b->nhash; i++){
+		x = h[i];
+		if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
+			return 0;
+	}
+	return 1;
+}
+
+int
+inbloomfilter(Bloom *b, u8int *score)
+{
+	int r;
+	uint ms;
+
+	if(b == nil || b->data == nil)
+		return 1;
+
+	if(ignorebloom)
+		return 1;
+	
+	ms = msec();
+	rlock(&b->lk);
+	r = _inbloomfilter(b, score);
+	runlock(&b->lk);
+	ms = ms - msec();
+	addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
+	if(r)
+		addstat(StatBloomMiss, 1);
+	else
+		addstat(StatBloomHit, 1);
+	return r;
+}
+
+void
+markbloomfilter(Bloom *b, u8int *score)
+{
+	if(b == nil || b->data == nil)
+		return;
+
+	rlock(&b->lk);
+	qlock(&b->mod);
+	_markbloomfilter(b, score);
+	qunlock(&b->mod);
+	runlock(&b->lk);
+}
+
+static void
+bloomwriteproc(void *v)
+{
+	int ret;
+	Bloom *b;
+
+	threadsetname("bloomwriteproc");	
+	b = v;
+	for(;;){
+		recv(b->writechan, 0);
+		if((ret=writebloom(b)) < 0)
+			fprint(2, "oops! writing bloom: %r\n");
+		else
+			ret = 0;
+		sendul(b->writedonechan, ret);
+	}
+}
+
+void
+startbloomproc(Bloom *b)
+{
+	b->writechan = chancreate(sizeof(void*), 0);
+	b->writedonechan = chancreate(sizeof(void*), 0);
+	vtproc(bloomwriteproc, b);	
+}

+ 38 - 18
sys/src/cmd/venti/buildbuck.c → sys/src/cmd/venti/srv/buildbuck.c

@@ -2,6 +2,9 @@
 #include "dat.h"
 #include "fns.h"
 
+/*
+ * An IEStream is a sorted list of index entries.
+ */
 struct IEStream
 {
 	Part	*part;
@@ -14,11 +17,11 @@ struct IEStream
 };
 
 IEStream*
-initIEStream(Part *part, u64int off, u64int clumps, u32int size)
+initiestream(Part *part, u64int off, u64int clumps, u32int size)
 {
 	IEStream *ies;
 
-//ZZZ out of memory?
+/* out of memory? */
 	ies = MKZ(IEStream);
 	ies->buf = MKN(u8int, size);
 	ies->epos = ies->buf;
@@ -31,7 +34,7 @@ initIEStream(Part *part, u64int off, u64int clumps, u32int size)
 }
 
 void
-freeIEStream(IEStream *ies)
+freeiestream(IEStream *ies)
 {
 	if(ies == nil)
 		return;
@@ -39,8 +42,11 @@ freeIEStream(IEStream *ies)
 	free(ies);
 }
 
+/*
+ * Return the next IEntry (still packed) in the stream.
+ */
 static u8int*
-peekIEntry(IEStream *ies)
+peekientry(IEStream *ies)
 {
 	u32int n, nn;
 
@@ -55,8 +61,9 @@ peekIEntry(IEStream *ies)
 		nn -= n;
 		if(nn == 0)
 			return nil;
-		if(!readPart(ies->part, ies->off, ies->epos, nn)){
-			setErr(EOk, "can't read sorted index entries: %R");
+//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
+		if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
+			seterr(EOk, "can't read sorted index entries: %r");
 			return nil;
 		}
 		ies->epos += nn;
@@ -65,14 +72,24 @@ peekIEntry(IEStream *ies)
 	return ies->pos;
 }
 
+/*
+ * Compute the bucket number for the given IEntry.
+ * Knows that the score is the first thing in the packed
+ * representation.
+ */
 static u32int
-ieBuck(Index *ix, u8int *b)
+iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
 {
-	return hashBits(b, 32) / ix->div;
+	USED(ies);
+	USED(ib);
+	return hashbits(b, 32) / ix->div;
 }
 
+/*
+ * Fill ib with the next bucket in the stream.
+ */
 u32int
-buildBucket(Index *ix, IEStream *ies, IBucket *ib)
+buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
 {
 	IEntry ie1, ie2;
 	u8int *b;
@@ -80,29 +97,32 @@ buildBucket(Index *ix, IEStream *ies, IBucket *ib)
 
 	buck = TWID32;
 	ib->n = 0;
-	ib->next = 0;
 	while(ies->n){
-		b = peekIEntry(ies);
+		b = peekientry(ies);
 		if(b == nil)
 			return TWID32;
-//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, ieBuck(ix, b), b);
+/* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
 		if(ib->n == 0)
-			buck = ieBuck(ix, b);
+			buck = iebuck(ix, b, ib, ies);
 		else{
-			if(buck != ieBuck(ix, b))
+			if(buck != iebuck(ix, b, ib, ies))
 				break;
-			if(ientryCmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
+			if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
 				/*
 				 * guess that the larger address is the correct one to use
 				 */
-				unpackIEntry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
-				unpackIEntry(&ie2, b);
-				setErr(EOk, "duplicate index entry for score=%V type=%d\n", ie1.score, ie1.ia.type);
+				unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
+				unpackientry(&ie2, b);
+				seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
 				ib->n--;
 				if(ie1.ia.addr > ie2.ia.addr)
 					memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
 			}
 		}
+		if((ib->n+1)*IEntrySize > maxdata){
+			seterr(EOk, "bucket overflow");
+			return TWID32;
+		}
 		memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
 		ib->n++;
 		ies->n--;

+ 945 - 0
sys/src/cmd/venti/srv/buildindex.c

@@ -0,0 +1,945 @@
+/*
+ * Rebuild the index from scratch, in place.
+ */
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	MinBufSize = 64*1024,
+	MaxBufSize = 4*1024*1024,
+};
+
+int		dumb;
+int		errors;
+char		**isect;
+int		nisect;
+int		bloom;
+int		zero;
+
+u32int	isectmem;
+u64int	totalbuckets;
+u64int	totalclumps;
+Channel	*arenadonechan;
+Channel	*isectdonechan;
+Index	*ix;
+
+u64int	arenaentries;
+u64int	skipentries;
+u64int	indexentries;
+
+static int shouldprocess(ISect*);
+static void	isectproc(void*);
+static void	arenapartproc(void*);
+
+void
+usage(void)
+{
+	fprint(2, "usage: buildindex [-bd] [-i isect]... [-M imem] venti.conf\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int fd, i, napart;
+	u32int bcmem, imem;
+	Config conf;
+	Part *p;
+	
+	ventifmtinstall();
+	imem = 256*1024*1024;
+	ARGBEGIN{
+	case 'b':
+		bloom = 1;
+		break;
+	case 'd':	/* debugging - make sure to run all 3 passes */
+		dumb = 1;
+		break;
+	case 'i':
+		isect = vtrealloc(isect, (nisect+1)*sizeof(isect[0]));
+		isect[nisect++] = EARGF(usage());
+		break;
+	case 'M':
+		imem = unittoull(EARGF(usage()));
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+	ix = mainindex;
+	if(nisect == 0 && ix->bloom)
+		bloom = 1;
+	if(bloom && ix->bloom && resetbloom(ix->bloom) < 0)
+		sysfatal("loadbloom: %r");
+	if(bloom && !ix->bloom)
+		sysfatal("-b specified but no bloom filter");
+	if(!bloom)
+		ix->bloom = nil;
+	isectmem = imem/ix->nsects;
+
+	/*
+	 * safety first - only need read access to arenas
+	 */
+	p = nil;
+	for(i=0; i<ix->narenas; i++){
+		if(ix->arenas[i]->part != p){
+			p = ix->arenas[i]->part;
+			if((fd = open(p->filename, OREAD)) < 0)
+				sysfatal("cannot reopen %s: %r", p->filename);
+			dup(fd, p->fd);
+			close(fd);
+		}
+	}
+	
+	/*
+	 * need a block for every arena
+	 */
+	bcmem = maxblocksize * (mainindex->narenas + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+	
+	totalclumps = 0;
+	for(i=0; i<ix->narenas; i++)
+		totalclumps += ix->arenas[i]->diskstats.clumps;
+	
+	totalbuckets = 0;
+	for(i=0; i<ix->nsects; i++)
+		totalbuckets += ix->sects[i]->blocks;
+	fprint(2, "%,lld clumps, %,lld buckets\n", totalclumps, totalbuckets);
+
+	/* start index procs */
+	fprint(2, "%T read index\n");
+	isectdonechan = chancreate(sizeof(void*), 0);
+	for(i=0; i<ix->nsects; i++){
+		if(shouldprocess(ix->sects[i])){
+			ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0);
+			vtproc(isectproc, ix->sects[i]);
+		}
+	}
+	
+	for(i=0; i<nisect; i++)
+		if(isect[i])
+			fprint(2, "warning: did not find index section %s\n", isect[i]);
+
+	/* start arena procs */
+	p = nil;
+	napart = 0;
+	arenadonechan = chancreate(sizeof(void*), 0);
+	for(i=0; i<ix->narenas; i++){
+		if(ix->arenas[i]->part != p){
+			p = ix->arenas[i]->part;
+			vtproc(arenapartproc, p);
+			napart++;
+		}
+	}
+
+	/* wait for arena procs to finish */
+	for(i=0; i<napart; i++)
+		recvp(arenadonechan);
+
+	/* tell index procs to finish */
+	for(i=0; i<ix->nsects; i++)
+		if(ix->sects[i]->writechan)
+			send(ix->sects[i]->writechan, nil);
+
+	/* wait for index procs to finish */
+	for(i=0; i<ix->nsects; i++)
+		if(ix->sects[i]->writechan)
+			recvp(isectdonechan);
+
+	if(ix->bloom && writebloom(ix->bloom) < 0)
+		fprint(2, "writing bloom filter: %r\n");
+
+	fprint(2, "%T done arenaentries=%,lld indexed=%,lld (nskip=%,lld)\n", 
+		arenaentries, indexentries, skipentries);
+	threadexitsall(nil);
+}
+
+static int
+shouldprocess(ISect *is)
+{
+	int i;
+	
+	if(nisect == 0)
+		return 1;
+
+	for(i=0; i<nisect; i++)
+		if(isect[i] && strcmp(isect[i], is->name) == 0){
+			isect[i] = nil;
+			return 1;
+		}
+	return 0;
+}
+
+static void
+add(u64int *a, u64int n)
+{
+	static Lock l;
+	
+	lock(&l);
+	*a += n;
+	unlock(&l);
+}
+
+/*
+ * Read through an arena partition and send each of its IEntries
+ * to the appropriate index section.  When finished, send on
+ * arenadonechan.
+ */
+enum
+{
+	ClumpChunks = 32*1024,
+};
+static void
+arenapartproc(void *v)
+{
+	int i, j, n, nskip, x;
+	u32int clump;
+	u64int addr, tot;
+	Arena *a;
+	ClumpInfo *ci, *cis;
+	IEntry ie;
+	Part *p;
+	
+	p = v;
+	threadsetname("arenaproc %s", p->name);
+
+	nskip = 0;
+	tot = 0;
+	cis = MKN(ClumpInfo, ClumpChunks);
+	for(i=0; i<ix->narenas; i++){
+		a = ix->arenas[i];
+		if(a->part != p)
+			continue;
+		if(a->memstats.clumps)
+			fprint(2, "%T arena %s: %d entries\n", 
+				a->name, a->memstats.clumps);
+		addr = ix->amap[i].start;
+		for(clump=0; clump<a->memstats.clumps; clump+=n){
+			n = ClumpChunks;
+			if(n > a->memstats.clumps - clump)
+				n = a->memstats.clumps - clump;
+			if(readclumpinfos(a, clump, cis, n) != n){
+				fprint(2, "%T arena %s: directory read: %r\n", a->name);
+				errors = 1;
+				break;
+			}
+			for(j=0; j<n; j++){
+				ci = &cis[j];
+				ie.ia.type = ci->type;
+				ie.ia.size = ci->uncsize;
+				ie.ia.addr = addr;
+				addr += ci->size + ClumpSize;
+				ie.ia.blocks = (ci->size + ClumpSize + (1<<ABlockLog)-1) >> ABlockLog;
+				scorecp(ie.score, ci->score);
+				if(ci->type == VtCorruptType)
+					nskip++;
+				else{
+					tot++;
+					x = indexsect(ix, ie.score);
+					assert(0 <= x && x < ix->nsects);
+					if(ix->sects[x]->writechan)
+						send(ix->sects[x]->writechan, &ie);
+					if(ix->bloom)
+						markbloomfilter(ix->bloom, ie.score);
+				}
+			}
+		}
+	}
+	add(&arenaentries, tot);
+	add(&skipentries, nskip);
+	sendp(arenadonechan, p);
+}
+
+/*
+ * Convert score into relative bucket number in isect.
+ * Can pass a packed ientry instead of score - score is first.
+ */
+static u32int
+score2bucket(ISect *is, uchar *score)
+{
+	u32int b;
+	
+	b = hashbits(score, 32)/ix->div;
+	if(b < is->start || b >= is->stop){
+		fprint(2, "score2bucket: score=%V div=%d b=%ud start=%ud stop=%ud\n",
+			score, ix->div, b, is->start, is->stop);
+	}
+	assert(is->start <= b && b < is->stop);
+	return b - is->start;
+}
+
+/*
+ * Convert offset in index section to bucket number.
+ */
+static u32int
+offset2bucket(ISect *is, u64int offset)
+{
+	u32int b;
+	
+	assert(is->blockbase <= offset);
+	offset -= is->blockbase;
+	b = offset/is->blocksize;
+	assert(b < is->stop-is->start);
+	return b;
+}
+
+/*
+ * Convert bucket number to offset.
+ */
+static u64int
+bucket2offset(ISect *is, u32int b)
+{
+	assert(b <= is->stop-is->start);
+	return is->blockbase + (u64int)b*is->blocksize;
+}
+
+/* 
+ * IEntry buffers to hold initial round of spraying.
+ */
+typedef struct Buf Buf;
+struct Buf
+{
+	Part *part;			/* partition being written */
+	uchar *bp;		/* current block */
+	uchar *ep;		/* end of block */
+	uchar *wp;		/* write position in block */
+	u64int boffset;		/* start offset */
+	u64int woffset;		/* next write offset */
+	u64int eoffset;		/* end offset */
+	u32int nentry;		/* number of entries written */
+};
+
+static void
+bflush(Buf *buf)
+{
+	u32int bufsize;
+	
+	if(buf->woffset >= buf->eoffset)
+		sysfatal("buf index chunk overflow - need bigger index");
+	bufsize = buf->ep - buf->bp;
+	if(writepart(buf->part, buf->woffset, buf->bp, bufsize) < 0){
+		fprint(2, "write %s: %r\n", buf->part->name);
+		errors = 1;
+	}
+	buf->woffset += bufsize;
+	memset(buf->bp, 0, bufsize);
+	buf->wp = buf->bp;
+}
+
+static void
+bwrite(Buf *buf, IEntry *ie)
+{
+	if(buf->wp+IEntrySize > buf->ep)
+		bflush(buf);
+	assert(buf->bp <= buf->wp && buf->wp < buf->ep);
+	packientry(ie, buf->wp);
+	buf->wp += IEntrySize;
+	assert(buf->bp <= buf->wp && buf->wp <= buf->ep);
+	buf->nentry++;
+}
+
+/*
+ * Minibuffer.  In-memory data structure holds our place
+ * in the buffer but has no block data.  We are writing and
+ * reading the minibuffers at the same time.  (Careful!)
+ */
+typedef struct Minibuf Minibuf;
+struct Minibuf
+{
+	u64int boffset;		/* start offset */
+	u64int roffset;		/* read offset */
+	u64int woffset;		/* write offset */
+	u64int eoffset;		/* end offset */
+	u32int nentry;		/* # entries left to read */
+	u32int nwentry;	/* # entries written */
+};
+
+/*
+ * Index entry pool.  Used when trying to shuffle around 
+ * the entries in a big buffer into the corresponding M minibuffers.
+ * Sized to hold M*EntriesPerBlock entries, so that there will always
+ * either be room in the pool for another block worth of entries
+ * or there will be an entire block worth of sorted entries to 
+ * write out.
+ */
+typedef struct IEntryLink IEntryLink;
+typedef struct IPool IPool;
+
+struct IEntryLink
+{
+	uchar ie[IEntrySize];		/* raw IEntry */
+	IEntryLink *next;		/* next in chain */
+};
+
+struct IPool
+{
+	ISect *isect;
+	u32int buck0;			/* first bucket in pool */
+	u32int mbufbuckets;	/* buckets per minibuf */
+	IEntryLink *entry;		/* all IEntryLinks */
+	u32int nentry;			/* # of IEntryLinks */
+	IEntryLink *free;		/* free list */
+	u32int nfree;			/* # on free list */
+	Minibuf *mbuf;			/* all minibufs */
+	u32int nmbuf;			/* # of minibufs */
+	IEntryLink **mlist;		/* lists for each minibuf */
+	u32int *mcount;		/* # on each mlist[i] */
+	u32int bufsize;			/* block buffer size */
+	uchar *rbuf;			/* read buffer */
+	uchar *wbuf;			/* write buffer */
+	u32int epbuf;			/* entries per block buffer */
+};
+
+/*
+static int
+countsokay(IPool *p)
+{
+	int i;
+	u64int n;
+	
+	n = 0;
+	for(i=0; i<p->nmbuf; i++)
+		n += p->mcount[i];
+	n += p->nfree;
+	if(n != p->nentry){
+		print("free %ud:", p->nfree);
+		for(i=0; i<p->nmbuf; i++)
+			print(" %ud", p->mcount[i]);
+		print(" = %lld nentry: %ud\n", n, p->nentry);
+	}
+	return n == p->nentry;
+}
+*/
+
+static IPool*
+mkipool(ISect *isect, Minibuf *mbuf, u32int nmbuf, 
+	u32int mbufbuckets, u32int bufsize)
+{
+	u32int i, nentry;
+	uchar *data;
+	IPool *p;
+	IEntryLink *l;
+	
+	nentry = (nmbuf+1)*bufsize / IEntrySize;
+	p = ezmalloc(sizeof(IPool)
+		+nentry*sizeof(IEntry)
+		+nmbuf*sizeof(IEntryLink*)
+		+nmbuf*sizeof(u32int)
+		+3*bufsize);
+	
+	p->isect = isect;
+	p->mbufbuckets = mbufbuckets;
+	p->bufsize = bufsize;
+	p->entry = (IEntryLink*)(p+1);
+	p->nentry = nentry;
+	p->mlist = (IEntryLink**)(p->entry+nentry);
+	p->mcount = (u32int*)(p->mlist+nmbuf);
+	p->nmbuf = nmbuf;
+	p->mbuf = mbuf;
+	data = (uchar*)(p->mcount+nmbuf);
+	data += bufsize - (uintptr)data%bufsize;
+	p->rbuf = data;
+	p->wbuf = data+bufsize;
+	p->epbuf = bufsize/IEntrySize;
+
+	for(i=0; i<p->nentry; i++){
+		l = &p->entry[i];
+		l->next = p->free;
+		p->free = l;
+		p->nfree++;
+	}
+	return p;
+}
+
+/* 
+ * Add the index entry ie to the pool p.
+ * Caller must know there is room.
+ */
+static void
+ipoolinsert(IPool *p, uchar *ie)
+{
+	u32int buck, x;
+	IEntryLink *l;
+
+	assert(p->free != nil);
+
+	buck = score2bucket(p->isect, ie);
+	x = (buck-p->buck0) / p->mbufbuckets;
+	if(x >= p->nmbuf){
+		fprint(2, "buck=%ud mbufbucket=%ud x=%ud\n",
+			buck, p->mbufbuckets, x);
+	}
+	assert(x < p->nmbuf);
+
+	l = p->free;
+	p->free = l->next;
+	p->nfree--;
+	memmove(l->ie, ie, IEntrySize);
+	l->next = p->mlist[x];
+	p->mlist[x] = l;
+	p->mcount[x]++;
+}	
+
+/*
+ * Pull out a block containing as many
+ * entries as possible for minibuffer x.
+ */
+static u32int
+ipoolgetbuf(IPool *p, u32int x)
+{
+	uchar *bp, *ep, *wp;
+	IEntryLink *l;
+	u32int n;
+	
+	bp = p->wbuf;
+	ep = p->wbuf + p->bufsize;
+	n = 0;
+	assert(x < p->nmbuf);
+	for(wp=bp; wp+IEntrySize<=ep && p->mlist[x]; wp+=IEntrySize){
+		l = p->mlist[x];
+		p->mlist[x] = l->next;
+		p->mcount[x]--;
+		memmove(wp, l->ie, IEntrySize);
+		l->next = p->free;
+		p->free = l;
+		p->nfree++;
+		n++;
+	}
+	memset(wp, 0, ep-wp);
+	return n;
+}
+
+/*
+ * Read a block worth of entries from the minibuf
+ * into the pool.  Caller must know there is room.
+ */
+static void
+ipoolloadblock(IPool *p, Minibuf *mb)
+{
+	u32int i, n;
+	
+	assert(mb->nentry > 0);
+	assert(mb->roffset >= mb->woffset);
+	assert(mb->roffset < mb->eoffset);
+
+	n = p->bufsize/IEntrySize;
+	if(n > mb->nentry)
+		n = mb->nentry;
+	if(readpart(p->isect->part, mb->roffset, p->rbuf, p->bufsize) < 0)
+		fprint(2, "readpart %s: %r\n", p->isect->part->name);
+	else{
+		for(i=0; i<n; i++)
+			ipoolinsert(p, p->rbuf+i*IEntrySize);
+	}
+	mb->nentry -= n;
+	mb->roffset += p->bufsize;
+}
+
+/*
+ * Write out a block worth of entries to minibuffer x.
+ * If necessary, pick up the data there before overwriting it.
+ */
+static void
+ipoolflush0(IPool *pool, u32int x)
+{
+	u32int bufsize;
+	Minibuf *mb;
+	
+	mb = pool->mbuf+x;
+	bufsize = pool->bufsize;
+	mb->nwentry += ipoolgetbuf(pool, x);
+	if(mb->nentry > 0 && mb->roffset == mb->woffset){
+		assert(pool->nfree >= pool->bufsize/IEntrySize);
+		/*
+		 * There will be room in the pool -- we just 
+		 * removed a block worth.
+		 */
+		ipoolloadblock(pool, mb);
+	}
+	if(writepart(pool->isect->part, mb->woffset, pool->wbuf, bufsize) < 0)
+		fprint(2, "writepart %s: %r\n", pool->isect->part->name);
+	mb->woffset += bufsize;
+}
+
+/*
+ * Write out some full block of entries.
+ * (There must be one -- the pool is almost full!)
+ */
+static void
+ipoolflush1(IPool *pool)
+{
+	u32int i;
+
+	assert(pool->nfree <= pool->epbuf);
+
+	for(i=0; i<pool->nmbuf; i++){
+		if(pool->mcount[i] >= pool->epbuf){
+			ipoolflush0(pool, i);
+			return;
+		}
+	}
+	/* can't be reached - someone must be full */
+	sysfatal("ipoolflush1");
+}
+
+/*
+ * Flush all the entries in the pool out to disk.
+ * Nothing more to read from disk.
+ */
+static void
+ipoolflush(IPool *pool)
+{
+	u32int i;
+	
+	for(i=0; i<pool->nmbuf; i++)
+		while(pool->mlist[i])
+			ipoolflush0(pool, i);
+	assert(pool->nfree == pool->nentry);
+}
+
+/*
+ * Third pass.  Pick up each minibuffer from disk into
+ * memory and then write out the buckets.
+ */
+
+/*
+ * Compare two packed index entries.  
+ * Usual ordering except break ties by putting higher
+ * index addresses first (assumes have duplicates
+ * due to corruption in the lower addresses).
+ */
+static int
+ientrycmpaddr(const void *va, const void *vb)
+{
+	int i;
+	uchar *a, *b;
+	
+	a = (uchar*)va;
+	b = (uchar*)vb;
+	i = ientrycmp(a, b);
+	if(i)
+		return i;
+	return -memcmp(a+IEntryAddrOff, b+IEntryAddrOff, 8);
+}
+
+static void
+zerorange(Part *p, u64int o, u64int e)
+{
+	static uchar zero[MaxIoSize];
+	u32int n;
+	
+	for(; o<e; o+=n){
+		n = sizeof zero;
+		if(o+n > e)
+			n = e-o;
+		if(writepart(p, o, zero, n) < 0)
+			fprint(2, "writepart %s: %r\n", p->name);
+	}
+}
+
+/*
+ * Load a minibuffer into memory and write out the 
+ * corresponding buckets.
+ */
+static void
+sortminibuffer(ISect *is, Minibuf *mb, uchar *buf, u32int nbuf, u32int bufsize)
+{
+	uchar *buckdata, *p, *q, *ep;
+	u32int b, lastb, memsize, n;
+	u64int o;
+	IBucket ib;
+	Part *part;
+	
+	part = is->part;
+	buckdata = emalloc(is->blocksize);
+	
+	if(mb->nwentry == 0)
+		return;
+
+	/*
+	 * read entire buffer.
+	 */
+	assert(mb->nwentry*IEntrySize <= mb->woffset-mb->boffset);
+	assert(mb->woffset-mb->boffset <= nbuf);
+	if(readpart(part, mb->boffset, buf, mb->woffset-mb->boffset) < 0){
+		fprint(2, "readpart %s: %r\n", part->name);
+		errors = 1;
+		return;
+	}
+	assert(*(uint*)buf != 0xa5a5a5a5);
+	
+	/*
+	 * remove fragmentation due to IEntrySize
+	 * not evenly dividing Bufsize
+	 */
+	memsize = (bufsize/IEntrySize)*IEntrySize;
+	for(o=mb->boffset, p=q=buf; o<mb->woffset; o+=bufsize){
+		memmove(p, q, memsize);
+		p += memsize;
+		q += bufsize;
+	}
+	ep = buf + mb->nwentry*IEntrySize;
+	assert(ep <= buf+nbuf);
+
+	/* 
+	 * sort entries
+	 */
+	qsort(buf, mb->nwentry, IEntrySize, ientrycmpaddr);
+
+	/*
+	 * write buckets out
+	 */
+	n = 0;
+	lastb = offset2bucket(is, mb->boffset);
+	for(p=buf; p<ep; p=q){
+		b = score2bucket(is, p);
+		for(q=p; q<ep && score2bucket(is, q)==b; q+=IEntrySize)
+			;
+		if(lastb+1 < b && zero)
+			zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, b));
+		if(IBucketSize+(q-p) > is->blocksize)
+			sysfatal("bucket overflow - make index bigger");
+		memmove(buckdata+IBucketSize, p, q-p);
+		ib.n = (q-p)/IEntrySize;
+		n += ib.n;
+		packibucket(&ib, buckdata, is->bucketmagic);
+		if(writepart(part, bucket2offset(is, b), buckdata, is->blocksize) < 0)
+			fprint(2, "write %s: %r\n", part->name);
+		lastb = b;
+	}
+	if(lastb+1 < is->stop-is->start && zero)
+		zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, is->stop - is->start));
+
+	if(n != mb->nwentry)
+		fprint(2, "sortminibuffer bug: n=%ud nwentry=%ud have=%ld\n", n, mb->nwentry, (ep-buf)/IEntrySize);
+
+	free(buckdata);
+}
+
+static void
+isectproc(void *v)
+{
+	u32int buck, bufbuckets, bufsize, epbuf, i, j;
+	u32int mbufbuckets, n, nbucket, nn, space;
+	u32int nbuf, nminibuf, xminiclump, prod;
+	u64int blocksize, offset, xclump;
+	uchar *data, *p;
+	Buf *buf;
+	IEntry ie;
+	IPool *ipool;
+	ISect *is;
+	Minibuf *mbuf, *mb;
+	
+	is = v;
+	blocksize = is->blocksize;
+	nbucket = is->stop - is->start;
+
+	/*
+	 * Three passes:
+	 *	pass 1 - write index entries from arenas into 
+	 *		large sequential sections on index disk.
+	 *		requires nbuf * bufsize memory.
+	 *
+	 *	pass 2 - split each section into minibufs.
+	 *		requires nminibuf * bufsize memory.
+	 *
+	 *	pass 3 - read each minibuf into memory and
+	 *		write buckets out. 
+	 *		requires entries/minibuf * IEntrySize memory.
+	 * 
+	 * The larger we set bufsize the less seeking hurts us.
+	 * 
+	 * The fewer sections and minibufs we have, the less
+	 * seeking hurts us.
+	 * 
+	 * The fewer sections and minibufs we have, the 
+	 * more entries we end up with in each minibuf
+	 * at the end.  
+	 *
+	 * Shoot for using half our memory to hold each
+	 * minibuf.  The chance of a random distribution 
+	 * getting off by 2x is quite low.  
+	 *
+	 * Once that is decided, figure out the smallest 
+	 * nminibuf and nsection/biggest bufsize we can use
+	 * and still fit in the memory constraints.
+	 */
+	
+	/* expected number of clump index entries we'll see */
+	xclump = nbucket * (double)totalclumps/totalbuckets;
+	
+	/* number of clumps we want to see in a minibuf */
+	xminiclump = isectmem/2/IEntrySize;
+	
+	/* total number of minibufs we need */
+	prod = (xclump+xminiclump-1) / xminiclump;
+	
+	/* if possible, skip second pass */
+	if(!dumb && prod*MinBufSize < isectmem){
+		nbuf = prod;
+		nminibuf = 1;
+	}else{
+		/* otherwise use nsection = sqrt(nmini) */
+		for(nbuf=1; nbuf*nbuf<prod; nbuf++)
+			;
+		if(nbuf*MinBufSize > isectmem)
+			sysfatal("not enough memory");
+		nminibuf = nbuf;
+	}
+	/* size buffer to use extra memory */
+	bufsize = MinBufSize;
+	while(bufsize*2*nbuf <= isectmem && bufsize < MaxBufSize)
+		bufsize *= 2;
+	data = emalloc(nbuf*bufsize);
+	epbuf = bufsize/IEntrySize;
+	fprint(2, "%T %s: %,ud buckets, %,ud groups, %,ud minigroups, %,ud buffer\n",
+		is->part->name, nbucket, nbuf, nminibuf, bufsize);
+	/*
+	 * Accept index entries from arena procs.
+	 */
+	buf = MKNZ(Buf, nbuf);
+	p = data;
+	offset = is->blockbase;
+	bufbuckets = (nbucket+nbuf-1)/nbuf;
+	for(i=0; i<nbuf; i++){
+		buf[i].part = is->part;
+		buf[i].bp = p;
+		buf[i].wp = p;
+		p += bufsize;
+		buf[i].ep = p;
+		buf[i].boffset = offset;
+		buf[i].woffset = offset;
+		if(i < nbuf-1){
+			offset += bufbuckets*blocksize;
+			buf[i].eoffset = offset;
+		}else{
+			offset = is->blockbase + nbucket*blocksize;
+			buf[i].eoffset = offset;
+		}
+	}
+	assert(p == data+nbuf*bufsize);
+
+	n = 0;
+	while(recv(is->writechan, &ie) == 1){
+		if(ie.ia.addr == 0)
+			break;
+		buck = score2bucket(is, ie.score);
+		i = buck/bufbuckets;
+		assert(i < nbuf);
+		bwrite(&buf[i], &ie);
+		n++;
+	}
+	add(&indexentries, n);
+	
+	nn = 0;
+	for(i=0; i<nbuf; i++){
+		bflush(&buf[i]);
+		buf[i].bp = nil;
+		buf[i].ep = nil;
+		buf[i].wp = nil;
+		nn += buf[i].nentry;
+	}
+	if(n != nn)
+		fprint(2, "isectproc bug: n=%ud nn=%ud\n", n, nn);
+		
+	free(data);
+
+	fprint(2, "%T %s: reordering\n", is->part->name);
+	
+	/*
+	 * Rearrange entries into minibuffers and then
+	 * split each minibuffer into buckets.
+	 * The minibuffer must be sized so that it is 
+	 * a multiple of blocksize -- ipoolloadblock assumes
+	 * that each minibuf starts aligned on a blocksize
+	 * boundary.
+	 */
+	mbuf = MKN(Minibuf, nminibuf);
+	mbufbuckets = (bufbuckets+nminibuf-1)/nminibuf;
+	while(mbufbuckets*blocksize % bufsize)
+		mbufbuckets++;
+	for(i=0; i<nbuf; i++){
+		/*
+		 * Set up descriptors.
+		 */
+		n = buf[i].nentry;
+		nn = 0;
+		offset = buf[i].boffset;
+		memset(mbuf, 0, nminibuf*sizeof(mbuf[0]));
+		for(j=0; j<nminibuf; j++){
+			mb = &mbuf[j];
+			mb->boffset = offset;
+			offset += mbufbuckets*blocksize;
+			if(offset > buf[i].eoffset)
+				offset = buf[i].eoffset;
+			mb->eoffset = offset;
+			mb->roffset = mb->boffset;
+			mb->woffset = mb->boffset;
+			mb->nentry = epbuf * (mb->eoffset - mb->boffset)/bufsize;
+			if(mb->nentry > buf[i].nentry)
+				mb->nentry = buf[i].nentry;
+			buf[i].nentry -= mb->nentry;
+			nn += mb->nentry;
+		}
+		if(n != nn)
+			fprint(2, "isectproc bug2: n=%ud nn=%ud (i=%d)\n", n, nn, i);;
+		/*
+		 * Rearrange.
+		 */
+		if(!dumb && nminibuf == 1){
+			mbuf[0].nwentry = mbuf[0].nentry;
+			mbuf[0].woffset = buf[i].woffset;
+		}else{
+			ipool = mkipool(is, mbuf, nminibuf, mbufbuckets, bufsize);
+			ipool->buck0 = bufbuckets*i;
+			for(j=0; j<nminibuf; j++){
+				mb = &mbuf[j];
+				while(mb->nentry > 0){
+					if(ipool->nfree < epbuf){
+						ipoolflush1(ipool);
+						/* ipoolflush1 might change mb->nentry */	
+						continue;
+					}
+					assert(ipool->nfree >= epbuf);
+					ipoolloadblock(ipool, mb);
+				}
+			}
+			ipoolflush(ipool);
+			nn = 0;
+			for(j=0; j<nminibuf; j++)
+				nn += mbuf[j].nwentry;
+			if(n != nn)
+				fprint(2, "isectproc bug3: n=%ud nn=%ud (i=%d)\n", n, nn, i);
+			free(ipool);
+		}
+
+		/*
+		 * Make buckets.
+		 */
+		space = 0;
+		for(j=0; j<nminibuf; j++)
+			if(space < mbuf[j].woffset - mbuf[j].boffset)
+				space = mbuf[j].woffset - mbuf[j].boffset;
+
+		data = emalloc(space);
+		for(j=0; j<nminibuf; j++){
+			mb = &mbuf[j];
+			sortminibuffer(is, mb, data, space, bufsize);
+		}
+		free(data);
+	}
+		
+	sendp(isectdonechan, is);
+}
+
+
+

+ 137 - 0
sys/src/cmd/venti/srv/checkarenas.c

@@ -0,0 +1,137 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+
+static void
+checkarena(Arena *arena, int scan, int fix)
+{
+	ATailStats old;
+	int err, e;
+
+	if(verbose && arena->memstats.clumps)
+		printarena(2, arena);
+
+	old = arena->memstats;
+
+	if(scan){
+		arena->memstats.used = 0;
+		arena->memstats.clumps = 0;
+		arena->memstats.cclumps = 0;
+		arena->memstats.uncsize = 0;
+	}
+
+	err = 0;
+	for(;;){
+		e = syncarena(arena, 0, 1000, 0, fix);
+		err |= e;
+		if(!(e & SyncHeader))
+			break;
+		if(verbose && arena->memstats.clumps)
+			fprint(2, ".");
+	}
+	if(verbose && arena->memstats.clumps)
+		fprint(2, "\n");
+
+	err &= ~SyncHeader;
+	if(arena->memstats.used != old.used
+	|| arena->memstats.clumps != old.clumps
+	|| arena->memstats.cclumps != old.cclumps
+	|| arena->memstats.uncsize != old.uncsize){
+		fprint(2, "%s: incorrect arena header fields\n", arena->name);
+		printarena(2, arena);
+		err |= SyncHeader;
+	}
+
+	if(!err || !fix)
+		return;
+
+	fprint(2, "%s: writing fixed arena header fields\n", arena->name);
+	arena->diskstats = arena->memstats;
+	if(wbarena(arena) < 0)
+		fprint(2, "arena header write failed: %r\n");
+	flushdcache();
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: checkarenas [-afv] file [arenaname...]\n");
+	threadexitsall(0);
+}
+
+int
+should(char *name, int argc, char **argv)
+{
+	int i;
+
+	if(argc == 0)
+		return 1;
+	for(i=0; i<argc; i++)
+		if(strcmp(name, argv[i]) == 0)
+			return 1;
+	return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ArenaPart *ap;
+	Part *part;
+	char *file;
+	int i, fix, scan;
+
+	ventifmtinstall();
+	statsinit();
+
+	fix = 0;
+	scan = 0;
+	ARGBEGIN{
+	case 'f':
+		fix++;
+		break;
+	case 'a':
+		scan = 1;
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(!fix)
+		readonly = 1;
+
+	if(argc < 1)
+		usage();
+
+	file = argv[0];
+	argc--;
+	argv++;
+
+	part = initpart(file, (fix ? ORDWR : OREAD)|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	ap = initarenapart(part);
+	if(ap == nil)
+		sysfatal("can't initialize arena partition in %s: %r", file);
+
+	if(verbose > 1){
+		printarenapart(2, ap);
+		fprint(2, "\n");
+	}
+
+	initdcache(8 * MaxDiskBlock);
+
+	for(i = 0; i < ap->narenas; i++)
+		if(should(ap->arenas[i]->name, argc, argv))
+			checkarena(ap->arenas[i], scan, fix);
+
+	if(verbose > 1)
+		printstats();
+	threadexitsall(0);
+}

+ 295 - 0
sys/src/cmd/venti/srv/checkindex.c

@@ -0,0 +1,295 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int extra, missing, wrong;
+
+static void
+phdr(DBlock *eb)
+{
+	static int did;
+
+	if(!did){
+		did = 1;
+		print("# diff actual correct\n");
+	}
+	print("%s block 0x%llux\n", eb->part->name, eb->addr);
+}
+
+static void
+pie(IEntry *ie, char c)
+{
+	print("%c %V %22lld %3d %5d %3d\n",
+		c, ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
+}
+
+static int
+checkbucket(Index *ix, u32int buck, IBucket *ib)
+{
+	ISect *is;
+	DBlock *eb;
+	IBucket eib;
+	IEntry ie, eie;
+	int i, ei, ok, c, hdr;
+
+	is = ix->sects[indexsect0(ix, buck)];
+	if(buck < is->start || buck >= is->stop){
+		seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
+		return -1;
+	}
+	buck -= is->start;
+	eb = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), OREAD);
+	if(eb == nil)
+		return -1;
+	unpackibucket(&eib, eb->data, is->bucketmagic);
+
+	ok = 0;
+	ei = 0;
+	hdr = 0;
+	for(i = 0; i < ib->n; i++){
+		while(ei < eib.n){
+			c = ientrycmp(&ib->data[i * IEntrySize], &eib.data[ei * IEntrySize]);
+			if(c == 0){
+				unpackientry(&ie, &ib->data[i * IEntrySize]);
+				unpackientry(&eie, &eib.data[ei * IEntrySize]);
+				if(iaddrcmp(&ie.ia, &eie.ia) != 0){
+					if(!hdr){
+						phdr(eb);
+						hdr = 1;
+					}
+					wrong++;
+					pie(&eie, '<');
+					pie(&ie, '>');
+				}
+				ei++;
+				goto cont;
+			}
+			if(c < 0)
+				break;
+			if(!hdr){
+				phdr(eb);
+				hdr = 1;
+			}
+			unpackientry(&eie, &eib.data[ei*IEntrySize]);
+			extra++;
+			pie(&eie, '<');
+			ei++;
+			ok = -1;
+		}
+		if(!hdr){
+			phdr(eb);
+			hdr = 1;
+		}
+		unpackientry(&ie, &ib->data[i*IEntrySize]);
+		missing++;
+		pie(&ie, '>');
+		ok = -1;
+	cont:;
+	}
+	for(; ei < eib.n; ei++){
+		if(!hdr){
+			phdr(eb);
+			hdr = 1;
+		}
+		unpackientry(&eie, &eib.data[ei*IEntrySize]);
+		pie(&eie, '<');
+		ok = -1;
+	}
+	putdblock(eb);
+	return ok;
+}
+
+int
+checkindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
+{
+	IEStream *ies;
+	IBucket ib, zib;
+	ZBlock *z, *b;
+	u32int next, buck;
+	int ok, bok;
+u64int found = 0;
+
+/* ZZZ make buffer size configurable */
+	b = alloczblock(ix->blocksize, 0, ix->blocksize);
+	z = alloczblock(ix->blocksize, 1, ix->blocksize);
+	ies = initiestream(part, off, clumps, 64*1024);
+	if(b == nil || z == nil || ies == nil){
+		werrstr("allocating: %r");
+		ok = -1;
+		goto out;
+	}
+	ok = 0;
+	next = 0;
+	memset(&ib, 0, sizeof ib);
+	ib.data = b->data;
+	zib.data = z->data;
+	zib.n = 0;
+	zib.buck = 0;
+	for(;;){
+		buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
+		found += ib.n;
+		if(zero){
+			for(; next != buck; next++){
+				if(next == ix->buckets){
+					if(buck != TWID32){
+						ok = -1;
+						werrstr("internal error: bucket out of range");
+					}
+					if(ok < 0)
+						werrstr("%d spurious entries, %d missing, %d wrong", extra, missing, wrong);
+					goto out;
+				}
+				bok = checkbucket(ix, next, &zib);
+				if(bok < 0)
+					ok = -1;
+			}
+		}
+		if(buck >= ix->buckets){
+			if(buck == TWID32)
+				break;
+			werrstr("internal error: bucket out of range");
+			ok = -1;
+			goto out;
+		}
+		bok = checkbucket(ix, buck, &ib);
+		if(bok < 0)
+			ok = -1;
+		next = buck + 1;
+	}
+out:
+	freeiestream(ies);
+	freezblock(z);
+	freezblock(b);
+	return ok;
+}
+
+int
+checkbloom(Bloom *b1, Bloom *b2, int fix)
+{
+	u32int *a1, *a2;
+	int i, n, extra, missing;
+
+	if(b1==nil && b2==nil)
+		return 0;
+	if(b1==nil || b2==nil){
+		werrstr("nil/non-nil");
+		return -1;
+	}
+	wbbloomhead(b1);
+	wbbloomhead(b2);
+	if(memcmp(b1->data, b2->data, BloomHeadSize) != 0){
+		werrstr("bloom header mismatch");
+		return -1;
+	}
+	a1 = (u32int*)b1->data;
+	a2 = (u32int*)b2->data;
+	n = b1->size/4;
+	extra = 0;
+	missing = 0;
+	for(i=BloomHeadSize/4; i<n; i++){
+		if(a1[i] != a2[i]){
+// print("%.8ux/%.8ux.", a1[i], a2[i]);
+			extra   += countbits(a1[i] & ~a2[i]);
+			missing += countbits(a2[i] & ~a1[i]);
+		}
+	}
+	if(extra || missing)
+		fprint(2, "bloom filter: %d spurious bits, %d missing bits\n",
+			extra, missing);
+	else
+		fprint(2, "bloom filter: correct\n");
+	if(!fix && missing){
+		werrstr("missing bits");
+		return -1;
+	}
+	if(fix && (missing || extra)){
+		memmove(b1->data, b2->data, b1->size);
+		return writebloom(b1);
+	}
+	return 0;
+}
+
+
+void
+usage(void)
+{
+	fprint(2, "usage: checkindex [-f] [-B blockcachesize] config tmp\n");
+	threadexitsall(0);
+}
+
+Config conf;
+
+void
+threadmain(int argc, char *argv[])
+{
+	Bloom *oldbloom, *newbloom;
+	Part *part;
+	u64int clumps, base;
+	u32int bcmem;
+	int fix, skipz, ok;
+
+	fix = 0;
+	bcmem = 0;
+	skipz = 0;
+	ARGBEGIN{
+	case 'B':
+		bcmem = unittoull(ARGF());
+		break;
+	case 'f':
+		fix++;
+		break;
+	case 'Z':
+		skipz = 1;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 2)
+		usage();
+
+	ventifmtinstall();
+
+	part = initpart(argv[1], ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't initialize temporary partition: %r");
+
+	if(!fix)
+		readonly = 1;
+
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
+	if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
+		sysfatal("can't load bloom filter: %r");
+	oldbloom = mainindex->bloom;
+	newbloom = nil;
+	if(oldbloom){
+		newbloom = vtmallocz(sizeof *newbloom);
+		bloominit(newbloom, oldbloom->size, nil);
+		newbloom->data = vtmallocz(oldbloom->size);
+	}
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
+
+	fprint(2, "checkindex: building entry list\n");
+	clumps = sortrawientries(mainindex, part, &base, newbloom);
+	if(clumps == TWID64)
+		sysfatal("can't build sorted index: %r");
+	fprint(2, "checkindex: checking %lld entries at %lld\n", clumps, base);
+	ok = 0;
+	if(checkindex(mainindex, part, base, clumps, !skipz) < 0){
+		fprint(2, "checkindex: %r\n");
+		ok = -1;
+	}
+	if(checkbloom(oldbloom, newbloom, fix) < 0){
+		fprint(2, "checkbloom: %r\n");
+		ok = -1;
+	}
+	if(ok < 0)
+		sysfatal("errors found");
+	fprint(2, "checkindex: index is correct\n");
+	threadexitsall(0);
+}

+ 227 - 0
sys/src/cmd/venti/srv/clump.c

@@ -0,0 +1,227 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+
+/*
+ * Write a lump to disk.  Updates ia with an index address
+ * for the newly-written lump.  Upon return, the lump will
+ * have been placed in the disk cache but will likely not be on disk yet.
+ */
+int
+storeclump(Index *ix, ZBlock *zb, u8int *sc, int type, u32int creator, IAddr *ia)
+{
+	ZBlock *cb;
+	Clump cl;
+	u64int a;
+	u8int bh[VtScoreSize];
+	int size, dsize;
+
+	trace(TraceLump, "storeclump enter", sc, type);
+	size = zb->len;
+	if(size > VtMaxLumpSize){
+		seterr(EStrange, "lump too large");
+		return -1;
+	}
+	if(vttypevalid(type) < 0){
+		seterr(EStrange, "invalid lump type");
+		return -1;
+	}
+
+	if(0){
+		scoremem(bh, zb->data, size);
+		if(scorecmp(sc, bh) != 0){
+			seterr(ECorrupt, "storing clump: corrupted; expected=%V got=%V, size=%d", sc, bh, size);
+			return -1;
+		}
+	}
+
+	cb = alloczblock(size + ClumpSize + U32Size, 0, 0);
+	if(cb == nil)
+		return -1;
+
+	cl.info.type = type;
+	cl.info.uncsize = size;
+	cl.creator = creator;
+	cl.time = now();
+	scorecp(cl.info.score, sc);
+
+	trace(TraceLump, "storeclump whackblock");
+	dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
+	if(dsize > 0 && dsize < size){
+		cl.encoding = ClumpECompress;
+	}else{
+		if(dsize > size){
+			fprint(2, "whack error: dsize=%d size=%d\n", dsize, size);
+			abort();
+		}
+		cl.encoding = ClumpENone;
+		dsize = size;
+		memmove(&cb->data[ClumpSize], zb->data, size);
+	}
+	memset(cb->data+ClumpSize+dsize, 0, 4);
+	cl.info.size = dsize;
+
+	ia->addr = 0;
+	ia->type = type;
+	ia->size = size;
+	ia->blocks = (dsize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
+
+	a = writeiclump(ix, &cl, cb->data, &ia->addr);
+
+	trace(TraceLump, "storeclump exit %lld", a);
+
+	freezblock(cb);
+	if(a == TWID64)
+		return -1;
+
+/*
+	qlock(&stats.lock);
+	stats.clumpwrites++;
+	stats.clumpbwrites += size;
+	stats.clumpbcomp += dsize;
+	qunlock(&stats.lock);
+*/
+
+	return 0;
+}
+
+u32int
+clumpmagic(Arena *arena, u64int aa)
+{
+	u8int buf[U32Size];
+
+	if(readarena(arena, aa, buf, U32Size) == TWID32)
+		return TWID32;
+	return unpackmagic(buf);
+}
+
+/*
+ * fetch a block based at addr.
+ * score is filled in with the block's score.
+ * blocks is roughly the length of the clump on disk;
+ * if zero, the length is unknown.
+ */
+ZBlock*
+loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify)
+{
+	Unwhack uw;
+	ZBlock *zb, *cb;
+	u8int bh[VtScoreSize], *buf;
+	u32int n;
+	int nunc;
+
+/*
+	qlock(&stats.lock);
+	stats.clumpreads++;
+	qunlock(&stats.lock);
+*/
+
+	if(blocks <= 0)
+		blocks = 1;
+
+	trace(TraceLump, "loadclump enter");
+
+	cb = alloczblock(blocks << ABlockLog, 0, 0);
+	if(cb == nil)
+		return nil;
+	n = readarena(arena, aa, cb->data, blocks << ABlockLog);
+	if(n < ClumpSize){
+		if(n != 0)
+			seterr(ECorrupt, "loadclump read less than a header");
+		freezblock(cb);
+		return nil;
+	}
+	trace(TraceLump, "loadclump unpack");
+	if(unpackclump(cl, cb->data, arena->clumpmagic) < 0){
+		seterr(ECorrupt, "loadclump %s %llud: %r", arena->name, aa);
+		freezblock(cb);
+		return nil;
+	}
+	if(cl->info.type == VtCorruptType){
+		seterr(EOk, "clump is marked corrupt");
+		freezblock(cb);
+		return nil;
+	}
+	n -= ClumpSize;
+	if(n < cl->info.size){
+		freezblock(cb);
+		n = cl->info.size;
+		cb = alloczblock(n, 0, 0);
+		if(cb == nil)
+			return nil;
+		if(readarena(arena, aa + ClumpSize, cb->data, n) != n){
+			seterr(ECorrupt, "loadclump read too little data");
+			freezblock(cb);
+			return nil;
+		}
+		buf = cb->data;
+	}else
+		buf = cb->data + ClumpSize;
+
+	scorecp(score, cl->info.score);
+
+	zb = alloczblock(cl->info.uncsize, 0, 0);
+	if(zb == nil){
+		freezblock(cb);
+		return nil;
+	}
+	switch(cl->encoding){
+	case ClumpECompress:
+		trace(TraceLump, "loadclump decompress");
+		unwhackinit(&uw);
+		nunc = unwhack(&uw, zb->data, cl->info.uncsize, buf, cl->info.size);
+		if(nunc != cl->info.uncsize){
+			if(nunc < 0)
+				seterr(ECorrupt, "decompression of %llud failed: %s", aa, uw.err);
+			else
+				seterr(ECorrupt, "decompression of %llud gave partial block: %d/%d\n", aa, nunc, cl->info.uncsize);
+			freezblock(cb);
+			freezblock(zb);
+			return nil;
+		}
+		break;
+	case ClumpENone:
+		if(cl->info.size != cl->info.uncsize){
+			seterr(ECorrupt, "loading clump: bad uncompressed size for uncompressed block %llud", aa);
+			freezblock(cb);
+			freezblock(zb);
+			return nil;
+		}
+		scoremem(bh, buf, cl->info.uncsize);
+		if(scorecmp(cl->info.score, bh) != 0)
+			seterr(ECorrupt, "pre-copy sha1 wrong at %s %llud: expected=%V got=%V", arena->name, aa, cl->info.score, bh);
+		memmove(zb->data, buf, cl->info.uncsize);
+		break;
+	default:
+		seterr(ECorrupt, "unknown encoding in loadlump %llud", aa);
+		freezblock(cb);
+		freezblock(zb);
+		return nil;
+	}
+	freezblock(cb);
+
+	if(verify){
+		trace(TraceLump, "loadclump verify");
+		scoremem(bh, zb->data, cl->info.uncsize);
+		if(scorecmp(cl->info.score, bh) != 0){
+			seterr(ECorrupt, "loading clump: corrupted at %s %llud; expected=%V got=%V", arena->name, aa, cl->info.score, bh);
+			freezblock(zb);
+			return nil;
+		}
+		if(vttypevalid(cl->info.type) < 0){
+			seterr(ECorrupt, "loading lump at %s %llud: invalid lump type %d", arena->name, aa, cl->info.type);
+			freezblock(zb);
+			return nil;
+		}
+	}
+
+	trace(TraceLump, "loadclump exit");
+/*
+	qlock(&stats.lock);
+	stats.clumpbreads += cl->info.size;
+	stats.clumpbuncomp += cl->info.uncsize;
+	qunlock(&stats.lock);
+*/
+	return zb;
+}

+ 29 - 33
sys/src/cmd/venti/clumpstats.c → sys/src/cmd/venti/srv/clumpstats.c

@@ -3,33 +3,34 @@
 #include "fns.h"
 
 int	count[VtMaxLumpSize][VtMaxType];
+Config conf;
 
 enum
 {
 	ClumpChunks	= 32*1024
 };
 
-static
-readArenaInfo(Arena *arena)
+static int
+readarenainfo(Arena *arena)
 {
 	ClumpInfo *ci, *cis;
 	u32int clump;
 	int i, n, ok;
 
-	if(arena->clumps)
-		fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->clumps);
+	if(arena->memstats.clumps)
+		fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
 
 	cis = MKN(ClumpInfo, ClumpChunks);
-	ok = 1;
-	for(clump = 0; clump < arena->clumps; clump += n){
+	ok = 0;
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
 		n = ClumpChunks;
 
-		if(n > arena->clumps - clump)
-			n = arena->clumps - clump;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
 
-		if(readClumpInfos(arena, clump, cis, n) != n){
-			setErr(EOk, "arena directory read failed: %R");
-			ok = 0;
+		if((i=readclumpinfos(arena, clump, cis, n)) != n){
+			seterr(EOk, "arena directory read failed %d not %d: %r", i, n);
+			ok = -1;
 			break;
 		}
 
@@ -39,36 +40,34 @@ readArenaInfo(Arena *arena)
 				fprint(2, "bad clump: %d: type = %d: size = %d\n", clump+i, ci->type, ci->uncsize);
 				continue;
 			}
-if(ci->uncsize == 422)
-print("%s: %d: %V\n", arena->name, clump+i, ci->score);
 			count[ci->uncsize][ci->type]++;
 		}
 	}
 	free(cis);
-	if(!ok)
+	if(ok < 0)
 		return TWID32;
 	return clump;
 }
 
 static void
-clumpStats(Index *ix)
+clumpstats(Index *ix)
 {
 	int ok;
 	ulong clumps, n;
 	int i, j, t;
 
-	ok = 1;
+	ok = 0;
 	clumps = 0;
 	for(i = 0; i < ix->narenas; i++){
-		n = readArenaInfo(ix->arenas[i]);
+		n = readarenainfo(ix->arenas[i]);
 		if(n == TWID32){
-			ok = 0;
+			ok = -1;
 			break;
 		}
 		clumps += n;
 	}
 
-	if(!ok)
+	if(ok < 0)
 		return;
 
 	print("clumps = %ld\n", clumps);
@@ -90,16 +89,14 @@ void
 usage(void)
 {
 	fprint(2, "usage: clumpstats [-B blockcachesize] config\n");
-	exits(0);
+	threadexitsall(0);
 }
 
-int
-main(int argc, char *argv[])
+void
+threadmain(int argc, char *argv[])
 {
 	u32int bcmem;
 
-	vtAttach();
-
 	bcmem = 0;
 
 	ARGBEGIN{
@@ -116,16 +113,15 @@ main(int argc, char *argv[])
 	if(argc != 1)
 		usage();
 
-	if(!initVenti(argv[0], nil))
-		fatal("can't init venti: %R");
+	if(initventi(argv[0], &conf) < 0)
+		sysfatal("can't init venti: %r");
 
-	if(bcmem < maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16))
-		bcmem = maxBlockSize * (mainIndex->narenas + mainIndex->nsects * 4 + 16);
-	fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
-	initDCache(bcmem);
+	if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
+		bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
+	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
+	initdcache(bcmem);
 
-	clumpStats(mainIndex);
+	clumpstats(mainindex);
 	
-	exits(0);
-	return 0;	/* shut up stupid compiler */
+	threadexitsall(0);
 }

+ 249 - 0
sys/src/cmd/venti/srv/cmparena.c

@@ -0,0 +1,249 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+static int	fd;
+static int	fd1;
+static uchar	*data;
+static uchar	*data1;
+static int	blocksize;
+static int	sleepms;
+
+void
+usage(void)
+{
+	fprint(2, "usage: verifyarena [-b blocksize] [-s ms] [-v] arenapart1 arenapart2 [name...]]\n");
+	threadexitsall(0);
+}
+
+static int
+preadblock(int fd, uchar *buf, int n, vlong off)
+{
+	int nr, m;
+
+	for(nr = 0; nr < n; nr += m){
+		m = n - nr;
+		m = pread(fd, &buf[nr], m, off+nr);
+		if(m <= 0){
+			if(m == 0)
+				werrstr("early eof");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int
+readblock(int fd, uchar *buf, int n)
+{
+	int nr, m;
+
+	for(nr = 0; nr < n; nr += m){
+		m = n - nr;
+		m = read(fd, &buf[nr], m);
+		if(m <= 0){
+			if(m == 0)
+				werrstr("early eof");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static void
+cmparena(char *name, vlong len)
+{
+	Arena arena;
+	ArenaHead head;
+	DigestState s;
+	u64int n, e;
+	u32int bs;
+	u8int score[VtScoreSize];
+
+	fprint(2, "verify %s\n", name);
+
+	memset(&arena, 0, sizeof arena);
+	memset(&s, 0, sizeof s);
+
+	/*
+	 * read a little bit, which will include the header
+	 */
+	if(readblock(fd, data, HeadSize) < 0){
+		fprint(2, "%s: reading header: %r\n", name);
+		return;
+	}
+	if(unpackarenahead(&head, data) < 0){
+		fprint(2, "%s: corrupt arena header: %r\n", name);
+		return;
+	}
+	if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
+		fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
+	if(len != 0 && len != head.size)
+		fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
+	if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
+		fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
+
+	if(readblock(fd1, data1, HeadSize) < 0){
+		fprint(2, "%s: reading header: %r\n", name);
+		return;
+	}
+	if(unpackarenahead(&head, data) < 0){
+		fprint(2, "%s: corrupt arena header: %r\n", name);
+		return;
+	}
+	if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
+		fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
+	if(len != 0 && len != head.size)
+		fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
+	if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
+		fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
+
+	seek(fd, -HeadSize, 1);
+	seek(fd1, -HeadSize, 1);
+
+	/*
+	 * now we know how much to read
+	 * read everything but the last block, which is special
+	 */
+	e = head.size;
+	bs = blocksize;
+	for(n = 0; n < e; n += bs){
+		if(n + bs > e)
+			bs = e - n;
+		if(readblock(fd, data, bs) < 0){
+			fprint(2, "%s: read data: %r\n", name);
+			return;
+		}
+		if(readblock(fd1, data1, bs) < 0){
+			fprint(2, "%s: read data: %r\n", name);
+			return;
+		}
+		if(memcmp(data, data1, bs) != 0){
+			fprint(2, "mismatch at %llx\n", n);
+		}
+	}
+}
+
+static int
+shouldcheck(char *name, char **s, int n)
+{
+	int i;
+	
+	if(n == 0)
+		return 1;
+
+	for(i=0; i<n; i++){
+		if(s[i] && strcmp(name, s[i]) == 0){
+			s[i] = nil;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+char *
+readap(int fd, ArenaPart *ap)
+{
+	char *table;
+	
+	if(preadblock(fd, data, 8192, PartBlank) < 0)
+		sysfatal("read arena part header: %r");
+	if(unpackarenapart(ap, data) < 0)
+		sysfatal("corrupted arena part header: %r");
+	fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n",
+		ap->version, ap->blocksize, ap->arenabase);
+	ap->tabbase = (PartBlank+HeadSize+ap->blocksize-1)&~(ap->blocksize-1);
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	table = malloc(ap->tabsize+1);
+	if(preadblock((uchar*)table, ap->tabsize, ap->tabbase) < 0)
+		sysfatal("reading arena part directory: %r");
+	table[ap->tabsize] = 0;
+	return table;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i, nline;
+	char *p, *q, *table, *table1, *f[10], line[256];
+	vlong start, stop;
+	ArenaPart ap;
+	
+	ventifmtinstall();
+	blocksize = MaxIoSize;
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(EARGF(usage()));
+		break;
+	case 's':
+		sleepms = atoi(EARGF(usage()));
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc < 2)
+		usage();
+
+	data = vtmalloc(blocksize);
+	data1 = vtmalloc(blocksize);
+	if((fd = open(argv[0], OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+	if((fd1 = open(argv[1], OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+
+	table = readap(fd, &ap);
+	table1 = readap(fd1, &ap1);
+	if(strcmp(table, table1) != 0)
+		sysfatal("arena partitions do not have identical tables");
+
+	nline = atoi(table);
+	p = strchr(table, '\n');
+	if(p)
+		p++;
+	for(i=0; i<nline; i++){
+		if(p == nil){
+			fprint(2, "warning: unexpected arena table end\n");
+			break;
+		}
+		q = strchr(p, '\n');
+		if(q)
+			*q++ = 0;
+		if(strlen(p) >= sizeof line){
+			fprint(2, "warning: long arena table line: %s\n", p);
+			p = q;
+			continue;
+		}
+		strcpy(line, p);
+		memset(f, 0, sizeof f);
+		if(tokenize(line, f, nelem(f)) < 3){
+			fprint(2, "warning: bad arena table line: %s\n", p);
+			p = q;
+			continue;
+		}
+		p = q;
+		if(shouldcheck(f[0], argv+1, argc-1)){
+			start = strtoull(f[1], 0, 0);
+			stop = strtoull(f[2], 0, 0);
+			if(stop <= start){
+				fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start);
+				continue;
+			}
+			if(seek(fd, start, 0) < 0)
+				fprint(2, "%s: seek to start: %r\n", f[0]);
+			if(seek(fd1, start, 0) < 0)
+				fprint(2, "%s: seek to start: %r\n", f[0]);
+			cmparena(f[0], stop - start);
+		}
+	}
+	for(i=1; i<argc; i++)
+		if(argv[i] != 0)
+			fprint(2, "%s: did not find arena\n", argv[i]);
+
+	threadexitsall(nil);
+}

+ 317 - 0
sys/src/cmd/venti/srv/cmparenas.c

@@ -0,0 +1,317 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static int	verbose;
+static int	fd;
+static int	fd1;
+static uchar	*data;
+static uchar	*data1;
+static int	blocksize;
+static int	sleepms;
+
+void
+usage(void)
+{
+	fprint(2, "usage: cmparenas [-b blocksize] [-s ms] [-v] arenapart1 arenapart2 [name...]]\n");
+	threadexitsall(0);
+}
+
+static int
+preadblock(int fd, uchar *buf, int n, vlong off)
+{
+	int nr, m;
+
+	for(nr = 0; nr < n; nr += m){
+		m = n - nr;
+		m = pread(fd, &buf[nr], m, off+nr);
+		if(m <= 0){
+			if(m == 0)
+				werrstr("early eof");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int
+readblock(int fd, uchar *buf, int n)
+{
+	int nr, m;
+
+	for(nr = 0; nr < n; nr += m){
+		m = n - nr;
+		m = read(fd, &buf[nr], m);
+		if(m <= 0){
+			if(m == 0)
+				werrstr("early eof");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int
+printheader(char *name, ArenaHead *head, int fd)
+{
+	Arena arena;
+	vlong baseoff, lo, hi, off;
+	int clumpmax;
+	
+	off = seek(fd, 0, 1);
+	seek(fd, off + head->size - head->blocksize, 0);
+	if(readblock(fd, data, head->blocksize) < 0){
+		fprint(2, "%s: reading arena tail: %r\n", name);
+		return -1;
+	}
+	seek(fd, off, 0);
+
+	memset(&arena, 0, sizeof arena);
+	if(unpackarena(&arena, data) < 0){
+		fprint(2, "%s: unpack arena tail: %r\n", name);
+		return -1;
+	}
+	arena.blocksize = head->blocksize;
+	arena.base = off + head->blocksize;
+	arena.clumpmax = arena.blocksize / ClumpInfoSize;
+	arena.size = head->size - 2*head->blocksize;
+
+	fprint(2, "%s: base=%llx size=%llx blocksize=%x\n", name, off, head->size, head->blocksize);
+
+	baseoff = head->blocksize;
+	fprint(2, "\t%llx-%llx: head\n", (vlong)0, baseoff);
+	lo = baseoff;
+	hi = baseoff + arena.diskstats.used;
+	fprint(2, "\t%llx-%llx: data (%llx)\n", lo, hi, hi - lo);
+	hi = head->size - head->blocksize;
+	clumpmax = head->blocksize / ClumpInfoSize;
+	if(clumpmax > 0)
+		lo = hi - (u64int)arena.diskstats.clumps/clumpmax * head->blocksize;
+	else
+		lo = hi;
+	fprint(2, "\t%llx-%llx: clumps (%llx)\n", lo, hi, hi - lo);
+	fprint(2, "\t%llx-%llx: tail\n", hi, hi + head->blocksize);
+	
+	fprint(2, "arena:\n");
+	printarena(2, &arena);
+	return 0;
+}
+
+static void
+cmparena(char *name, vlong len)
+{
+	ArenaHead head;
+	DigestState s;
+	u64int n, e;
+	u32int bs;
+	int i, j;
+	char buf[20];
+
+	fprint(2, "cmp %s\n", name);
+
+	memset(&s, 0, sizeof s);
+
+	/*
+	 * read a little bit, which will include the header
+	 */
+	if(readblock(fd, data, HeadSize) < 0){
+		fprint(2, "%s: reading header: %r\n", name);
+		return;
+	}
+	if(unpackarenahead(&head, data) < 0){
+		fprint(2, "%s: corrupt arena header: %r\n", name);
+		return;
+	}
+	if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
+		fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
+	if(len != 0 && len != head.size)
+		fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
+	if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
+		fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
+
+	if(readblock(fd1, data1, HeadSize) < 0){
+		fprint(2, "%s: reading header: %r\n", name);
+		return;
+	}
+	if(unpackarenahead(&head, data) < 0){
+		fprint(2, "%s: corrupt arena header: %r\n", name);
+		return;
+	}
+	if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
+		fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
+	if(len != 0 && len != head.size)
+		fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
+	if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
+		fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
+
+	seek(fd, -HeadSize, 1);
+	seek(fd1, -HeadSize, 1);
+
+	if(printheader(name, &head, fd) < 0)
+		return;
+	
+	/*
+	 * now we know how much to read
+	 * read everything but the last block, which is special
+	 */
+	e = head.size;
+	bs = blocksize;
+	for(n = 0; n < e; n += bs){
+		if(n + bs > e)
+			bs = e - n;
+		if(readblock(fd, data, bs) < 0){
+			fprint(2, "%s: read data: %r\n", name);
+			return;
+		}
+		if(readblock(fd1, data1, bs) < 0){
+			fprint(2, "%s: read data: %r\n", name);
+			return;
+		}
+		if(memcmp(data, data1, bs) != 0){
+			print("mismatch at %llx\n", n);
+			for(i=0; i<bs; i+=16){
+				if(memcmp(data+i, data1+i, 16) != 0){
+					snprint(buf, sizeof buf, "%llx", n+i);
+					print("%s ", buf);
+					for(j=0; j<16; j++){
+						print(" %.2ux", data[i+j]);
+						if(j == 7)
+							print(" -");
+					}
+					print("\n");
+					print("%*s ", (int)strlen(buf), "");
+					for(j=0; j<16; j++){
+						print(" %.2ux", data1[i+j]);
+						if(j == 7)
+							print(" -");
+					}
+					print("\n");
+				}
+			}
+		}
+	}
+}
+
+static int
+shouldcheck(char *name, char **s, int n)
+{
+	int i;
+	
+	if(n == 0)
+		return 1;
+
+	for(i=0; i<n; i++){
+		if(s[i] && strcmp(name, s[i]) == 0){
+			s[i] = nil;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+char *
+readap(int fd, ArenaPart *ap)
+{
+	char *table;
+	
+	if(preadblock(fd, data, 8192, PartBlank) < 0)
+		sysfatal("read arena part header: %r");
+	if(unpackarenapart(ap, data) < 0)
+		sysfatal("corrupted arena part header: %r");
+	fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n",
+		ap->version, ap->blocksize, ap->arenabase);
+	ap->tabbase = (PartBlank+HeadSize+ap->blocksize-1)&~(ap->blocksize-1);
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	table = malloc(ap->tabsize+1);
+	if(preadblock(fd, (uchar*)table, ap->tabsize, ap->tabbase) < 0)
+		sysfatal("reading arena part directory: %r");
+	table[ap->tabsize] = 0;
+	return table;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int i, nline;
+	char *p, *q, *table, *table1, *f[10], line[256];
+	vlong start, stop;
+	ArenaPart ap;
+	ArenaPart ap1;
+	
+	ventifmtinstall();
+	blocksize = MaxIoSize;
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(EARGF(usage()));
+		break;
+	case 's':
+		sleepms = atoi(EARGF(usage()));
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc < 2)
+		usage();
+
+	data = vtmalloc(blocksize);
+	data1 = vtmalloc(blocksize);
+	if((fd = open(argv[0], OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+	if((fd1 = open(argv[1], OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+
+	table = readap(fd, &ap);
+	table1 = readap(fd1, &ap1);
+	if(strcmp(table, table1) != 0)
+		sysfatal("arena partitions do not have identical tables");
+
+	nline = atoi(table);
+	p = strchr(table, '\n');
+	if(p)
+		p++;
+	for(i=0; i<nline; i++){
+		if(p == nil){
+			fprint(2, "warning: unexpected arena table end\n");
+			break;
+		}
+		q = strchr(p, '\n');
+		if(q)
+			*q++ = 0;
+		if(strlen(p) >= sizeof line){
+			fprint(2, "warning: long arena table line: %s\n", p);
+			p = q;
+			continue;
+		}
+		strcpy(line, p);
+		memset(f, 0, sizeof f);
+		if(tokenize(line, f, nelem(f)) < 3){
+			fprint(2, "warning: bad arena table line: %s\n", p);
+			p = q;
+			continue;
+		}
+		p = q;
+		if(shouldcheck(f[0], argv+1, argc-1)){
+			start = strtoull(f[1], 0, 0);
+			stop = strtoull(f[2], 0, 0);
+			if(stop <= start){
+				fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start);
+				continue;
+			}
+			if(seek(fd, start, 0) < 0)
+				fprint(2, "%s: seek to start: %r\n", f[0]);
+			if(seek(fd1, start, 0) < 0)
+				fprint(2, "%s: seek to start: %r\n", f[0]);
+			cmparena(f[0], stop - start);
+		}
+	}
+	for(i=2; i<argc; i++)
+		if(argv[i] != 0)
+			fprint(2, "%s: did not find arena\n", argv[i]);
+
+	threadexitsall(nil);
+}

+ 96 - 69
sys/src/cmd/venti/config.c → sys/src/cmd/venti/srv/config.c

@@ -2,50 +2,45 @@
 #include "dat.h"
 #include "fns.h"
 
-Index			*mainIndex;
+Index			*mainindex;
 int			paranoid = 1;		/* should verify hashes on disk read */
 
-static ArenaPart	*configArenas(char *file);
-static ISect		*configISect(char *file);
+static ArenaPart	*configarenas(char *file);
+static ISect		*configisect(char *file);
+static Bloom		*configbloom(char *file);
 
 int
-initVenti(char *file, Config *pconf)
+initventi(char *file, Config *conf)
 {
-	Config conf;
-
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-
-	statsInit();
+	statsinit();
 
 	if(file == nil){
-		setErr(EOk, "no configuration file");
-		return 0;
+		seterr(EOk, "no configuration file");
+		return -1;
 	}
-	if(!runConfig(file, &conf)){
-		setErr(EOk, "can't initialize venti: %R");
-		return 0;
+	if(runconfig(file, conf) < 0){
+		seterr(EOk, "can't initialize venti: %r");
+		return -1;
 	}
-	mainIndex = initIndex(conf.index, conf.sects, conf.nsects);
-	if(mainIndex == nil)
-		return 0;
-	if(pconf)
-		*pconf = conf;
-	return 1;
+	mainindex = initindex(conf->index, conf->sects, conf->nsects);
+	if(mainindex == nil)
+		return -1;
+	mainindex->bloom = conf->bloom;
+	return 0;
 }
 
 static int
-numOk(char *s)
+numok(char *s)
 {
 	char *p;
 
 	strtoull(s, &p, 0);
 	if(p == s)
-		return 0;
+		return -1;
 	if(*p == 0)
-		return 1;
+		return 0;
 	if(p[1] == 0 && strchr("MmGgKk", *p))
-		return 1;
+		return 0;
 	return 0;
 }
 
@@ -69,7 +64,7 @@ enum
 	MaxArgs	= 2
 };
 int
-runConfig(char *file, Config *config)
+runconfig(char *file, Config *config)
 {
 	ArenaPart **av;
 	ISect **sv;
@@ -77,16 +72,16 @@ runConfig(char *file, Config *config)
 	char *s, *line, *flds[MaxArgs + 1];
 	int i, ok;
 
-	if(!readIFile(&f, file))
-		return 0;
+	if(readifile(&f, file) < 0)
+		return -1;
 	memset(config, 0, sizeof *config);
 	config->mem = 0xFFFFFFFFUL;
-	ok = 0;
+	ok = -1;
 	line = nil;
 	for(;;){
-		s = ifileLine(&f);
+		s = ifileline(&f);
 		if(s == nil){
-			ok = 1;
+			ok = 0;
 			break;
 		}
 		line = estrdup(s);
@@ -97,7 +92,7 @@ runConfig(char *file, Config *config)
 				sv[i] = config->sects[i];
 			free(config->sects);
 			config->sects = sv;
-			config->sects[config->nsects] = configISect(flds[1]);
+			config->sects[config->nsects] = configisect(flds[1]);
 			if(config->sects[config->nsects] == nil)
 				break;
 			config->nsects++;
@@ -107,85 +102,90 @@ runConfig(char *file, Config *config)
 				av[i] = config->aparts[i];
 			free(config->aparts);
 			config->aparts = av;
-			config->aparts[config->naparts] = configArenas(flds[1]);
+			config->aparts[config->naparts] = configarenas(flds[1]);
 			if(config->aparts[config->naparts] == nil)
 				break;
 			config->naparts++;
+		}else if(i == 2 && strcmp(flds[0], "bloom") == 0){
+			if(config->bloom){
+				seterr(EAdmin, "duplicate bloom lines in configuration file %s", file);
+				break;
+			}
+			if((config->bloom = configbloom(flds[1])) == nil)
+				break;
 		}else if(i == 2 && strcmp(flds[0], "index") == 0){
-			if(!nameOk(flds[1])){
-				setErr(EAdmin, "illegal index name %s in config file %s", flds[1], file);
+			if(nameok(flds[1]) < 0){
+				seterr(EAdmin, "illegal index name %s in config file %s", flds[1], file);
 				break;
 			}
 			if(config->index != nil){
-				setErr(EAdmin, "duplicate indices in config file %s", file);
+				seterr(EAdmin, "duplicate indices in config file %s", file);
 				break;
 			}
 			config->index = estrdup(flds[1]);
 		}else if(i == 2 && strcmp(flds[0], "bcmem") == 0){
-			if(!numOk(flds[1])){
-				setErr(EAdmin, "illegal size %s in config file %s",
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
 					flds[1], file);
 				break;
 			}
 			if(config->bcmem != 0){
-				setErr(EAdmin, "duplicate bcmem lines in config file %s", file);
+				seterr(EAdmin, "duplicate bcmem lines in config file %s", file);
 				break;
 			}
 			config->bcmem = unittoull(flds[1]);
 		}else if(i == 2 && strcmp(flds[0], "mem") == 0){
-			if(!numOk(flds[1])){
-				setErr(EAdmin, "illegal size %s in config file %s",
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
 					flds[1], file);
 				break;
 			}
 			if(config->mem != 0xFFFFFFFFUL){
-				setErr(EAdmin, "duplicate mem lines in config file %s", file);
+				seterr(EAdmin, "duplicate mem lines in config file %s", file);
 				break;
 			}
 			config->mem = unittoull(flds[1]);
 		}else if(i == 2 && strcmp(flds[0], "icmem") == 0){
-			if(!numOk(flds[1])){
-				setErr(EAdmin, "illegal size %s in config file %s",
+			if(numok(flds[1]) < 0){
+				seterr(EAdmin, "illegal size %s in config file %s",
 					flds[1], file);
 				break;
 			}
 			if(config->icmem != 0){
-				setErr(EAdmin, "duplicate icmem lines in config file %s", file);
+				seterr(EAdmin, "duplicate icmem lines in config file %s", file);
 				break;
 			}
 			config->icmem = unittoull(flds[1]);
 		}else if(i == 1 && strcmp(flds[0], "queuewrites") == 0){
-			config->queueWrites = 1;
+			config->queuewrites = 1;
 		}else if(i == 2 && strcmp(flds[0], "httpaddr") == 0){
-			if(!nameOk(flds[1])){
-				setErr(EAdmin, "illegal http address '%s' in configuration file %s", flds[1], file);
-				break;
-			}
 			if(config->haddr){
-				setErr(EAdmin, "duplicate httpaddr lines in configuration file %s", file);
+				seterr(EAdmin, "duplicate httpaddr lines in configuration file %s", file);
 				break;
 			}
 			config->haddr = estrdup(flds[1]);
-		}else if(i == 2 && strcmp(flds[0], "addr") == 0){
-			if(!nameOk(flds[1])){
-				setErr(EAdmin, "illegal venti address '%s' in configuration file %s", flds[1], file);
+		}else if(i == 2 && strcmp(flds[0], "webroot") == 0){
+			if(config->webroot){
+				seterr(EAdmin, "duplicate webroot lines in configuration file %s", file);
 				break;
 			}
+			config->webroot = estrdup(flds[1]);
+		}else if(i == 2 && strcmp(flds[0], "addr") == 0){
 			if(config->vaddr){
-				setErr(EAdmin, "duplicate addr lines in configuration file %s", file);
+				seterr(EAdmin, "duplicate addr lines in configuration file %s", file);
 				break;
 			}
 			config->vaddr = estrdup(flds[1]);
 		}else{
-			setErr(EAdmin, "illegal line '%s' in configuration file %s", line, file);
+			seterr(EAdmin, "illegal line '%s' in configuration file %s", line, file);
 			break;
 		}
 		free(line);
 		line = nil;
 	}
 	free(line);
-	freeIFile(&f);
-	if(!ok){
+	freeifile(&f);
+	if(ok < 0){
 		free(config->sects);
 		config->sects = nil;
 		free(config->aparts);
@@ -195,26 +195,53 @@ runConfig(char *file, Config *config)
 }
 
 static ISect*
-configISect(char *file)
+configisect(char *file)
 {
 	Part *part;
+	ISect *is;
+	
+	if(0) fprint(2, "configure index section in %s\n", file);
 
-//	fprint(2, "configure index section in %s\n", file);
-
-	part = initPart(file, 0);
+	part = initpart(file, ORDWR|ODIRECT);
 	if(part == nil)
-		return 0;
-	return initISect(part);
+		return nil;
+	is = initisect(part);
+	if(is == nil)
+		werrstr("%s: %r", file);
+	return is;
 }
 
 static ArenaPart*
-configArenas(char *file)
+configarenas(char *file)
 {
+	ArenaPart *ap;
 	Part *part;
 
-//	fprint(2, "configure arenas in %s\n", file);
-	part = initPart(file, 0);
+	if(0) fprint(2, "configure arenas in %s\n", file);
+	part = initpart(file, ORDWR|ODIRECT);
 	if(part == nil)
-		return 0;
-	return initArenaPart(part);
+		return nil;
+	ap = initarenapart(part);
+	if(ap == nil)
+		werrstr("%s: %r", file);
+	return ap;
+}
+
+static Bloom*
+configbloom(char *file)
+{
+	Bloom *b;
+	Part *part;
+
+	if(0) fprint(2, "configure bloom in %s\n", file);
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		return nil;
+	b = readbloom(part);
+	if(b == nil){
+		werrstr("%s: %r", file);
+		freepart(part);
+	}
+	return b;
 }
+

+ 698 - 0
sys/src/cmd/venti/srv/conv.c

@@ -0,0 +1,698 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+/*
+ * disk structure conversion routines
+ */
+#define	U8GET(p)	((p)[0])
+#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
+#define	U32GET(p)	((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
+#define	U64GET(p)	(((u64int)U32GET(p)<<32)|(u64int)U32GET((p)+4))
+
+#define	U8PUT(p,v)	(p)[0]=(v)&0xFF
+#define	U16PUT(p,v)	(p)[0]=((v)>>8)&0xFF;(p)[1]=(v)&0xFF
+#define	U32PUT(p,v)	(p)[0]=((v)>>24)&0xFF;(p)[1]=((v)>>16)&0xFF;(p)[2]=((v)>>8)&0xFF;(p)[3]=(v)&0xFF
+#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+static struct {
+	u32int m;
+	char *s;
+} magics[] = {
+	ArenaPartMagic, "ArenaPartMagic",
+	ArenaHeadMagic, "ArenaHeadMagic",
+	ArenaMagic, "ArenaMagic",
+	ISectMagic, "ISectMagic",
+	BloomMagic, "BloomMagic",
+};
+
+static char*
+fmtmagic(char *s, u32int m)
+{
+	int i;
+
+	for(i=0; i<nelem(magics); i++)
+		if(magics[i].m == m)
+			return magics[i].s;
+	sprint(s, "%#08ux", m);
+	return s;
+}
+
+u32int
+unpackmagic(u8int *buf)
+{
+	return U32GET(buf);
+}
+
+void
+packmagic(u32int magic, u8int *buf)
+{
+	U32PUT(buf, magic);
+}
+
+int
+unpackarenapart(ArenaPart *ap, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != ArenaPartMagic){
+		seterr(ECorrupt, "arena set has wrong magic number: %s expected ArenaPartMagic (%#lux)", fmtmagic(fbuf, m), ArenaPartMagic);
+		return -1;
+	}
+	p += U32Size;
+	ap->version = U32GET(p);
+	p += U32Size;
+	ap->blocksize = U32GET(p);
+	p += U32Size;
+	ap->arenabase = U32GET(p);
+	p += U32Size;
+
+	if(buf + ArenaPartSize != p)
+		sysfatal("unpackarenapart unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarenapart(ArenaPart *ap, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	U32PUT(p, ArenaPartMagic);
+	p += U32Size;
+	U32PUT(p, ap->version);
+	p += U32Size;
+	U32PUT(p, ap->blocksize);
+	p += U32Size;
+	U32PUT(p, ap->arenabase);
+	p += U32Size;
+
+	if(buf + ArenaPartSize != p)
+		sysfatal("packarenapart packed wrong amount");
+
+	return 0;
+}
+
+int
+unpackarena(Arena *arena, u8int *buf)
+{
+	int sz;
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != ArenaMagic){
+		seterr(ECorrupt, "arena has wrong magic number: %s expected ArenaMagic (%#lux)", fmtmagic(fbuf, m), ArenaMagic);
+		return -1;
+	}
+	p += U32Size;
+	arena->version = U32GET(p);
+	p += U32Size;
+	namecp(arena->name, (char*)p);
+	p += ANameSize;
+	arena->diskstats.clumps = U32GET(p);
+	p += U32Size;
+	arena->diskstats.cclumps = U32GET(p);
+	p += U32Size;
+	arena->ctime = U32GET(p);
+	p += U32Size;
+	arena->wtime = U32GET(p);
+	p += U32Size;
+	if(arena->version == ArenaVersion5){
+		arena->clumpmagic = U32GET(p);
+		p += U32Size;
+	}
+	arena->diskstats.used = U64GET(p);
+	p += U64Size;
+	arena->diskstats.uncsize = U64GET(p);
+	p += U64Size;
+	arena->diskstats.sealed = U8GET(p);
+	p += U8Size;
+	switch(arena->version){
+	case ArenaVersion4:
+		sz = ArenaSize4;
+		arena->clumpmagic = _ClumpMagic;
+		break;
+	case ArenaVersion5:
+		sz = ArenaSize5;
+		break;
+	default:
+		seterr(ECorrupt, "arena has bad version number %d", arena->version);
+		return -1;
+	}
+	/*
+	 * Additional fields for the memstats version of the stats.
+	 * Diskstats reflects what is committed to the index.
+	 * Memstats reflects what is in the arena.  Originally intended
+	 * this to be a version 5 extension, but might as well use for
+	 * all the existing version 4 arenas too.
+	 *
+	 * To maintain backwards compatibility with existing venti
+	 * installations using the older format, we define that if 
+	 * memstats == diskstats, then the extension fields are not
+	 * included (see packarena below).  That is, only partially
+	 * indexed arenas have these fields.  Fully indexed arenas
+	 * (in particular, sealed arenas) do not.
+	 */
+	if(U8GET(p) == 1){
+		sz += ArenaSize5a-ArenaSize5;
+		p += U8Size;
+		arena->memstats.clumps = U32GET(p);
+		p += U32Size;
+		arena->memstats.cclumps = U32GET(p);
+		p += U32Size;
+		arena->memstats.used = U64GET(p);
+		p += U64Size;
+		arena->memstats.uncsize = U64GET(p);
+		p += U64Size;
+		arena->memstats.sealed = U8GET(p);
+		p += U8Size;
+	}else
+		arena->memstats = arena->diskstats;
+	if(buf + sz != p)
+		sysfatal("unpackarena unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarena(Arena *arena, u8int *buf)
+{
+	return _packarena(arena, buf, 0);
+}
+
+int
+_packarena(Arena *arena, u8int *buf, int forceext)
+{
+	int sz;
+	u8int *p;
+	u32int t32;
+
+	switch(arena->version){
+	case ArenaVersion4:
+		sz = ArenaSize4;
+		if(arena->clumpmagic != _ClumpMagic)
+			fprint(2, "warning: writing old arena tail loses clump magic 0x%lux != 0x%lux\n",
+				(ulong)arena->clumpmagic, (ulong)_ClumpMagic);
+		break;
+	case ArenaVersion5:
+		sz = ArenaSize5;
+		break;
+	default:
+		sysfatal("packarena unknown version %d", arena->version);
+		return -1;
+	}
+
+	p = buf;
+
+	U32PUT(p, ArenaMagic);
+	p += U32Size;
+	U32PUT(p, arena->version);
+	p += U32Size;
+	namecp((char*)p, arena->name);
+	p += ANameSize;
+	U32PUT(p, arena->diskstats.clumps);
+	p += U32Size;
+	U32PUT(p, arena->diskstats.cclumps);
+	p += U32Size;
+	U32PUT(p, arena->ctime);
+	p += U32Size;
+	U32PUT(p, arena->wtime);
+	p += U32Size;
+	if(arena->version == ArenaVersion5){
+		U32PUT(p, arena->clumpmagic);
+		p += U32Size;
+	}
+	U64PUT(p, arena->diskstats.used, t32);
+	p += U64Size;
+	U64PUT(p, arena->diskstats.uncsize, t32);
+	p += U64Size;
+	U8PUT(p, arena->diskstats.sealed);
+	p += U8Size;
+	
+	/*
+	 * Extension fields; see above.
+	 */
+	if(forceext
+	|| arena->memstats.clumps != arena->diskstats.clumps
+	|| arena->memstats.cclumps != arena->diskstats.cclumps
+	|| arena->memstats.used != arena->diskstats.used
+	|| arena->memstats.uncsize != arena->diskstats.uncsize
+	|| arena->memstats.sealed != arena->diskstats.sealed){
+		sz += ArenaSize5a - ArenaSize5;
+		U8PUT(p, 1);
+		p += U8Size;
+		U32PUT(p, arena->memstats.clumps);
+		p += U32Size;
+		U32PUT(p, arena->memstats.cclumps);
+		p += U32Size;
+		U64PUT(p, arena->memstats.used, t32);	
+		p += U64Size;
+		U64PUT(p, arena->memstats.uncsize, t32);
+		p += U64Size;
+		U8PUT(p, arena->memstats.sealed);
+		p += U8Size;
+	}
+
+	if(buf + sz != p)
+		sysfatal("packarena packed wrong amount");
+
+	return 0;
+}
+
+int
+unpackarenahead(ArenaHead *head, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	int sz;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != ArenaHeadMagic){
+		seterr(ECorrupt, "arena has wrong magic number: %s expected ArenaHeadMagic (%#lux)", fmtmagic(fbuf, m), ArenaHeadMagic);
+		return -1;
+	}
+
+	p += U32Size;
+	head->version = U32GET(p);
+	p += U32Size;
+	namecp(head->name, (char*)p);
+	p += ANameSize;
+	head->blocksize = U32GET(p);
+	p += U32Size;
+	head->size = U64GET(p);
+	p += U64Size;
+	if(head->version == ArenaVersion5){
+		head->clumpmagic = U32GET(p);
+		p += U32Size;
+	}
+
+	switch(head->version){
+	case ArenaVersion4:
+		sz = ArenaHeadSize4;
+		head->clumpmagic = _ClumpMagic;
+		break;
+	case ArenaVersion5:
+		sz = ArenaHeadSize5;
+		break;
+	default:
+		seterr(ECorrupt, "arena head has unexpected version %d", head->version);
+		return -1;
+	}
+
+	if(buf + sz != p)
+		sysfatal("unpackarenahead unpacked wrong amount");
+
+	return 0;
+}
+
+int
+packarenahead(ArenaHead *head, u8int *buf)
+{
+	u8int *p;
+	int sz;
+	u32int t32;
+
+	switch(head->version){
+	case ArenaVersion4:
+		sz = ArenaHeadSize4;
+		if(head->clumpmagic != _ClumpMagic)
+			fprint(2, "warning: writing old arena header loses clump magic 0x%lux != 0x%lux\n",
+				(ulong)head->clumpmagic, (ulong)_ClumpMagic);
+		break;
+	case ArenaVersion5:
+		sz = ArenaHeadSize5;
+		break;
+	default:
+		sysfatal("packarenahead unknown version %d", head->version);
+		return -1;
+	}
+
+	p = buf;
+
+	U32PUT(p, ArenaHeadMagic);
+	p += U32Size;
+	U32PUT(p, head->version);
+	p += U32Size;
+	namecp((char*)p, head->name);
+	p += ANameSize;
+	U32PUT(p, head->blocksize);
+	p += U32Size;
+	U64PUT(p, head->size, t32);
+	p += U64Size;
+	if(head->version == ArenaVersion5){
+		U32PUT(p, head->clumpmagic);
+		p += U32Size;
+	}
+	if(buf + sz != p)
+		sysfatal("packarenahead packed wrong amount");
+
+	return 0;
+}
+
+static int
+checkclump(Clump *w)
+{
+	if(w->encoding == ClumpENone){
+		if(w->info.size != w->info.uncsize){
+			seterr(ECorrupt, "uncompressed wad size mismatch");
+			return -1;
+		}
+	}else if(w->encoding == ClumpECompress){
+		if(w->info.size >= w->info.uncsize){
+			seterr(ECorrupt, "compressed lump has inconsistent block sizes %d %d", w->info.size, w->info.uncsize);
+			return -1;
+		}
+	}else{
+		seterr(ECorrupt, "clump has illegal encoding");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+unpackclump(Clump *c, u8int *buf, u32int cmagic)
+{
+	u8int *p;
+	u32int magic;
+
+	p = buf;
+	magic = U32GET(p);
+	if(magic != cmagic){
+		seterr(ECorrupt, "clump has bad magic number=%#8.8ux != %#8.8ux", magic, cmagic);
+		return -1;
+	}
+	p += U32Size;
+
+	c->info.type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	c->info.size = U16GET(p);
+	p += U16Size;
+	c->info.uncsize = U16GET(p);
+	p += U16Size;
+	scorecp(c->info.score, p);
+	p += VtScoreSize;
+
+	c->encoding = U8GET(p);
+	p += U8Size;
+	c->creator = U32GET(p);
+	p += U32Size;
+	c->time = U32GET(p);
+	p += U32Size;
+
+	if(buf + ClumpSize != p)
+		sysfatal("unpackclump unpacked wrong amount");
+
+	return checkclump(c);
+}
+
+int
+packclump(Clump *c, u8int *buf, u32int magic)
+{
+	u8int *p;
+
+	p = buf;
+	U32PUT(p, magic);
+	p += U32Size;
+
+	U8PUT(p, vttodisktype(c->info.type));
+	p += U8Size;
+	U16PUT(p, c->info.size);
+	p += U16Size;
+	U16PUT(p, c->info.uncsize);
+	p += U16Size;
+	scorecp(p, c->info.score);
+	p += VtScoreSize;
+
+	U8PUT(p, c->encoding);
+	p += U8Size;
+	U32PUT(p, c->creator);
+	p += U32Size;
+	U32PUT(p, c->time);
+	p += U32Size;
+
+	if(buf + ClumpSize != p)
+		sysfatal("packclump packed wrong amount");
+
+	return checkclump(c);
+}
+
+void
+unpackclumpinfo(ClumpInfo *ci, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	ci->type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	ci->size = U16GET(p);
+	p += U16Size;
+	ci->uncsize = U16GET(p);
+	p += U16Size;
+	scorecp(ci->score, p);
+	p += VtScoreSize;
+
+	if(buf + ClumpInfoSize != p)
+		sysfatal("unpackclumpinfo unpacked wrong amount");
+}
+
+void
+packclumpinfo(ClumpInfo *ci, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	U8PUT(p, vttodisktype(ci->type));
+	p += U8Size;
+	U16PUT(p, ci->size);
+	p += U16Size;
+	U16PUT(p, ci->uncsize);
+	p += U16Size;
+	scorecp(p, ci->score);
+	p += VtScoreSize;
+
+	if(buf + ClumpInfoSize != p)
+		sysfatal("packclumpinfo packed wrong amount");
+}
+
+int
+unpackisect(ISect *is, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+
+	m = U32GET(p);
+	if(m != ISectMagic){
+		seterr(ECorrupt, "index section has wrong magic number: %s expected ISectMagic (%#lux)",
+			fmtmagic(fbuf, m), ISectMagic);
+		return -1;
+	}
+	p += U32Size;
+	is->version = U32GET(p);
+	p += U32Size;
+	namecp(is->name, (char*)p);
+	p += ANameSize;
+	namecp(is->index, (char*)p);
+	p += ANameSize;
+	is->blocksize = U32GET(p);
+	p += U32Size;
+	is->blockbase = U32GET(p);
+	p += U32Size;
+	is->blocks = U32GET(p);
+	p += U32Size;
+	is->start = U32GET(p);
+	p += U32Size;
+	is->stop = U32GET(p);
+	p += U32Size;
+	if(buf + ISectSize1 != p)
+		sysfatal("unpackisect unpacked wrong amount");
+	is->bucketmagic = 0;
+	if(is->version == ISectVersion2){
+		is->bucketmagic = U32GET(p);
+		p += U32Size;
+		if(buf + ISectSize2 != p)
+			sysfatal("unpackisect unpacked wrong amount");
+	}
+
+	return 0;
+}
+
+int
+packisect(ISect *is, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	U32PUT(p, ISectMagic);
+	p += U32Size;
+	U32PUT(p, is->version);
+	p += U32Size;
+	namecp((char*)p, is->name);
+	p += ANameSize;
+	namecp((char*)p, is->index);
+	p += ANameSize;
+	U32PUT(p, is->blocksize);
+	p += U32Size;
+	U32PUT(p, is->blockbase);
+	p += U32Size;
+	U32PUT(p, is->blocks);
+	p += U32Size;
+	U32PUT(p, is->start);
+	p += U32Size;
+	U32PUT(p, is->stop);
+	p += U32Size;
+	if(buf + ISectSize1 != p)
+		sysfatal("packisect packed wrong amount");
+	if(is->version == ISectVersion2){
+		U32PUT(p, is->bucketmagic);
+		p += U32Size;
+		if(buf + ISectSize2 != p)
+			sysfatal("packisect packed wrong amount");
+	}
+
+	return 0;
+}
+
+void
+unpackientry(IEntry *ie, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+
+	scorecp(ie->score, p);
+	p += VtScoreSize;
+	ie->wtime = U32GET(p);
+	p += U32Size;
+	ie->train = U16GET(p);
+	p += U16Size;
+	if(p - buf != IEntryAddrOff)
+		sysfatal("unpackentry bad IEntryAddrOff amount");
+	ie->ia.addr = U64GET(p);
+if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
+	p += U64Size;
+	ie->ia.size = U16GET(p);
+	p += U16Size;
+	if(p - buf != IEntryTypeOff)
+		sysfatal("unpackientry bad IEntryTypeOff amount");
+	ie->ia.type = vtfromdisktype(U8GET(p));
+	p += U8Size;
+	ie->ia.blocks = U8GET(p);
+	p += U8Size;
+
+	if(p - buf != IEntrySize)
+		sysfatal("unpackientry unpacked wrong amount");
+}
+
+void
+packientry(IEntry *ie, u8int *buf)
+{
+	u32int t32;
+	u8int *p;
+
+	p = buf;
+
+	scorecp(p, ie->score);
+	p += VtScoreSize;
+	U32PUT(p, ie->wtime);
+	p += U32Size;
+	U16PUT(p, ie->train);
+	p += U16Size;
+	U64PUT(p, ie->ia.addr, t32);
+	p += U64Size;
+	U16PUT(p, ie->ia.size);
+	p += U16Size;
+	U8PUT(p, vttodisktype(ie->ia.type));
+	p += U8Size;
+	U8PUT(p, ie->ia.blocks);
+	p += U8Size;
+
+	if(p - buf != IEntrySize)
+		sysfatal("packientry packed wrong amount");
+}
+
+void
+unpackibucket(IBucket *b, u8int *buf, u32int magic)
+{
+	b->n = U16GET(buf);
+	b->data = buf + IBucketSize;
+	if(magic && magic != U32GET(buf+U16Size))
+		b->n = 0;
+}		
+
+void
+packibucket(IBucket *b, u8int *buf, u32int magic)
+{
+	U16PUT(buf, b->n);
+	U32PUT(buf+U16Size, magic);
+}
+
+void
+packbloomhead(Bloom *b, u8int *buf)
+{
+	u8int *p;
+
+	p = buf;
+	U32PUT(p, BloomMagic);
+	U32PUT(p+4, BloomVersion);
+	U32PUT(p+8, b->nhash);
+	U32PUT(p+12, b->size);
+}
+
+int
+unpackbloomhead(Bloom *b, u8int *buf)
+{
+	u8int *p;
+	u32int m;
+	char fbuf[20];
+
+	p = buf;
+
+	m = U32GET(p);
+	if(m != BloomMagic){
+		seterr(ECorrupt, "bloom filter has wrong magic number: %s expected BloomMagic (%#lux)", fmtmagic(fbuf, m), (ulong)BloomMagic);
+		return -1;
+	}
+	p += U32Size;
+	
+	m = U32GET(p);
+	if(m != BloomVersion){
+		seterr(ECorrupt, "bloom filter has wrong version %ud expected %ud", (uint)m, (uint)BloomVersion);
+		return -1;
+	}
+	p += U32Size;
+
+	b->nhash = U32GET(p);
+	p += U32Size;
+
+	b->size = U32GET(p);
+	p += U32Size;
+	if(b->size < BloomHeadSize || b->size > MaxBloomSize || (b->size&(b->size-1))){
+		seterr(ECorrupt, "bloom filter has invalid size %#lux", b->size);
+		return -1;
+	}
+
+	if(buf + BloomHeadSize != p)
+		sysfatal("unpackarena unpacked wrong amount");
+
+	return 0;
+}

+ 731 - 0
sys/src/cmd/venti/srv/dat.h

@@ -0,0 +1,731 @@
+typedef struct Config		Config;
+typedef struct AMap		AMap;
+typedef struct AMapN		AMapN;
+typedef struct Arena		Arena;
+typedef struct AState	AState;
+typedef struct ArenaHead	ArenaHead;
+typedef struct ArenaPart	ArenaPart;
+typedef struct ArenaTail	ArenaTail;
+typedef struct ATailStats	ATailStats;
+typedef struct CIBlock		CIBlock;
+typedef struct Clump		Clump;
+typedef struct ClumpInfo	ClumpInfo;
+typedef struct Graph Graph;
+typedef struct IAddr		IAddr;
+typedef struct IBucket		IBucket;
+typedef struct IEStream		IEStream;
+#pragma incomplete IEStream
+typedef struct IEntry		IEntry;
+typedef struct IFile		IFile;
+typedef struct ISect		ISect;
+typedef struct Index		Index;
+typedef struct Lump		Lump;
+typedef struct DBlock		DBlock;
+typedef struct Part		Part;
+typedef struct Statbin Statbin;
+typedef struct Statdesc	Statdesc;
+typedef struct Stats		Stats;
+typedef struct ZBlock		ZBlock;
+typedef struct Round	Round;
+typedef struct Bloom	Bloom;
+
+#define TWID32	((u32int)~(u32int)0)
+#define TWID64	((u64int)~(u64int)0)
+#define	TWID8	((u8int)~(u8int)0)
+
+enum
+{
+	ABlockLog		= 9,		/* log2(512), the quantum for reading arenas */
+	ANameSize		= 64,
+	MaxDiskBlock		= 64*1024,	/* max. allowed size for a disk block */
+	MaxIoSize		= 64*1024,	/* max. allowed size for a disk io operation */
+	PartBlank		= 256*1024,	/* untouched section at beginning of partition */
+	HeadSize		= 512,		/* size of a header after PartBlank */
+	MinArenaSize		= 1*1024*1024,	/* smallest reasonable arena size */
+	IndexBase		= 1024*1024,	/* initial address to use in an index */
+	MaxIo			= 64*1024,	/* max size of a single read or write operation */
+	ICacheBits		= 16,		/* default bits for indexing icache */
+	ICacheDepth		= 4,		/* default depth of an icache hash chain */
+	MaxAMap			= 2*1024,	/* max. allowed arenas in an address mapping; must be < 32*1024 */
+
+	/*
+	 * return codes from syncarena
+	 */
+	SyncDataErr	= 1 << 0,		/* problem reading the clump data */
+	SyncCIErr	= 1 << 1,		/* found erroneous clump directory entries */
+	SyncCIZero	= 1 << 2,		/* found unwritten clump directory entries */
+	SyncFixErr	= 1 << 3,		/* error writing fixed data */
+	SyncHeader	= 1 << 4,		/* altered header fields */
+
+	/*
+	 * error severity
+	 */
+	EOk			= 0,		/* error expected in normal operation */
+	EStrange,				/* strange error that should be logged */
+	ECorrupt,				/* corrupted data found in arenas */
+	EICorrupt,				/* corrupted data found in index */
+	EAdmin,					/* should be brought to administrators' attention */
+	ECrash,					/* really bad internal error */
+	EBug,					/* a limitation which should be fixed */
+	EInconsist,				/* inconsistencies between index and arena */
+	EMax,
+
+	/*
+	 * internal disk formats for the venti archival storage system
+	 */
+	/*
+	 * magic numbers on disk
+	 */
+	_ClumpMagic		= 0xd15cb10cU,	/* clump header, deprecated */
+	ClumpFreeMagic		= 0,		/* free clump; terminates active clump log */
+
+	ArenaPartMagic		= 0xa9e4a5e7U,	/* arena partition header */
+	ArenaMagic		= 0xf2a14eadU,	/* arena trailer */
+	ArenaHeadMagic		= 0xd15c4eadU,	/* arena header */
+	
+	BloomMagic		= 0xb1004eadU,	/* bloom filter header */
+	BloomMaxHash	= 32,
+
+	ISectMagic		= 0xd15c5ec7U,	/* index header */
+
+	ArenaPartVersion	= 3,
+	ArenaVersion4		= 4,
+	ArenaVersion5		= 5,
+	BloomVersion		= 1,
+	IndexVersion		= 1,
+	ISectVersion1		= 1,
+	ISectVersion2		= 2,
+
+	/*
+	 * encodings of clumps on disk
+	 */
+	ClumpEErr		= 0,		/* can't happen */
+	ClumpENone,				/* plain */
+	ClumpECompress,				/* compressed */
+	ClumpEMax,
+
+	/*
+	 * sizes in bytes on disk
+	 */
+	U8Size			= 1,
+	U16Size			= 2,
+	U32Size			= 4,
+	U64Size			= 8,
+
+	ArenaPartSize		= 4 * U32Size,
+	ArenaSize4		= 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
+	ArenaSize5			= ArenaSize4 + U32Size,
+	ArenaSize5a		= ArenaSize5 + 2 * U8Size + 2 * U32Size + 2 * U64Size,
+	ArenaHeadSize4		= U64Size + 3 * U32Size + ANameSize,
+	ArenaHeadSize5		= ArenaHeadSize4 + U32Size,
+	BloomHeadSize	= 4 * U32Size,
+	ISectSize1		= 7 * U32Size + 2 * ANameSize,
+	ISectSize2		= ISectSize1 + U32Size,
+	ClumpInfoSize		= U8Size + 2 * U16Size + VtScoreSize,
+	ClumpSize		= ClumpInfoSize + U8Size + 3 * U32Size,
+	MaxBloomSize		= 1<<(32-3),	/* 2^32 bits */
+	MaxBloomHash	= 32,		/* bits per score */
+	/*
+	 * BUG - The various block copies that manipulate entry buckets
+	 * would be faster if we bumped IBucketSize up to 8 and IEntrySize up to 40,
+	 * so that everything is word-aligned.  Buildindex is actually cpu-bound
+	 * by the (byte at a time) copying in qsort.
+	 */
+	IBucketSize		= U32Size + U16Size,
+	IEntrySize		= U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
+	IEntryTypeOff		= VtScoreSize + U32Size + U16Size + U64Size + U16Size,
+	IEntryAddrOff		= VtScoreSize + U32Size + U16Size,
+
+	MaxClumpBlocks		=  (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,
+	
+	IcacheFrac		= 1000000,	/* denominator */
+
+	SleepForever		= 1000000000,	/* magic value for sleep time */
+	/*
+	 * dirty flags - order controls disk write order
+	 */
+	DirtyArena		= 1,
+	DirtyArenaCib,
+	DirtyArenaTrailer,
+	DirtyMax,
+
+	VentiZZZZZZZZ
+};
+
+extern char TraceDisk[];
+extern char TraceLump[];
+extern char TraceBlock[];
+extern char TraceProc[];
+extern char TraceWork[];
+extern char TraceQuiet[];
+extern char TraceRpc[];
+
+/*
+ * results of parsing and initializing a config file
+ */
+struct Config
+{
+	char		*index;			/* name of the index to initialize */
+	int		naparts;		/* arena partitions initialized */
+	ArenaPart	**aparts;
+	int		nsects;			/* index sections initialized */
+	ISect		**sects;
+	Bloom	*bloom;		/* bloom filter */
+	u32int	bcmem;
+	u32int	mem;
+	u32int	icmem;
+	int		queuewrites;
+	char*	haddr;
+	char*	vaddr;
+	char*	webroot;
+};
+
+/*
+ * a Part is the low level interface to files or disks.
+ * there are two main types of partitions
+ *	arena paritions, which some number of arenas, each in a sub-partition.
+ *	index partition, which only have one subpartition.
+ */
+struct Part
+{
+	int		fd;			/* rock for accessing the disk */
+	int		mode;
+	u64int		offset;
+	u64int		size;			/* size of the partiton */
+	u32int		blocksize;		/* block size for reads and writes */
+	u32int		fsblocksize;	/* minimum file system block size */
+	char		*name;
+	char		*filename;
+	Channel		*writechan;		/* chan[dcache.nblock](DBlock*) */
+};
+
+/*
+ * a cached block from the partition
+ * yuck -- most of this is internal structure for the cache
+ * all other routines should only use data
+ */
+struct DBlock
+{
+	u8int	*data;
+
+	Part	*part;			/* partition in which cached */
+	u64int	addr;			/* base address on the partition */
+	u32int	size;			/* amount of data available, not amount allocated; should go away */
+	u32int	mode;
+	u32int	dirty;
+	u32int	dirtying;
+	DBlock	*next;			/* doubly linked hash chains */
+	DBlock	*prev;
+	u32int	heap;			/* index in heap table */
+	u32int	used;			/* last reference times */
+	u32int	used2;
+	u32int	ref;			/* reference count */
+	RWLock	lock;			/* for access to data only */
+	Channel	*writedonechan;	
+	void*	chanbuf[1];		/* buffer for the chan! */
+};
+
+/*
+ * a cached block from the partition
+ * yuck -- most of this is internal structure for the cache
+ * all other routines should only use data
+ * double yuck -- this is mostly the same as a DBlock
+ */
+struct Lump
+{
+	Packet	*data;
+
+	Part	*part;			/* partition in which cached */
+	u8int	score[VtScoreSize];	/* score of packet */
+	u8int	type;			/* type of packet */
+	u32int	size;			/* amount of data allocated to hold packet */
+	Lump	*next;			/* doubly linked hash chains */
+	Lump	*prev;
+	u32int	heap;			/* index in heap table */
+	u32int	used;			/* last reference times */
+	u32int	used2;
+	u32int	ref;			/* reference count */
+	QLock	lock;			/* for access to data only */
+};
+
+/*
+ * mapping between names and address ranges
+ */
+struct AMap
+{
+	u64int		start;
+	u64int		stop;
+	char		name[ANameSize];
+};
+
+/*
+ * an AMap along with a length
+ */
+struct AMapN
+{
+	int		n;
+	AMap		*map;
+};
+
+/*
+ * an ArenaPart is a partition made up of Arenas
+ * it exists because most os's don't support many partitions,
+ * and we want to have many different Arenas
+ */
+struct ArenaPart
+{
+	Part		*part;
+	u64int		size;			/* size of underlying partition, rounded down to blocks */
+	Arena		**arenas;
+	u32int		tabbase;		/* base address of arena table on disk */
+	u32int		tabsize;		/* max. bytes in arena table */
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	u32int		blocksize;		/* "optimal" block size for reads and writes */
+	u32int		arenabase;		/* base address of first arena */
+
+	/*
+	 * stored in the arena mapping table on disk
+	 */
+	AMap		*map;
+	int		narenas;
+};
+
+/*
+ * info about one block in the clump info cache
+ */
+struct CIBlock
+{
+	u32int		block;			/* blocks in the directory */
+	int		offset;			/* offsets of one clump in the data */
+	DBlock		*data;
+};
+
+/*
+ * Statistics kept in the tail. 
+ */
+struct ATailStats
+{
+	u32int		clumps;		/* number of clumps */
+	u32int		cclumps;		/* number of compressed clumps */
+	u64int		used;
+	u64int		uncsize;
+	u8int		sealed;
+};
+
+/*
+ * Arena state - represents a point in the data log
+ */
+struct AState
+{
+	Arena		*arena;
+	u64int		aa;			/* index address */
+	ATailStats		stats;
+};
+
+/*
+ * an Arena is a log of Clumps, preceeded by an ArenaHeader,
+ * and followed by a Arena, each in one disk block.
+ * struct on disk is not always up to date, but should be self-consistent.
+ * to sync after reboot, follow clumps starting at used until ClumpFreeMagic if found.
+ * <struct name="Arena" type="Arena *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="version" val="s->version" type="U32int"/>
+ *	<field name="partition" val="s->part->name" type="AName"/>
+ *	<field name="blocksize" val="s->blocksize" type="U32int"/>
+ *	<field name="start" val="s->base" type="U64int"/>
+ *	<field name="stop" val="s->base+2*s->blocksize" type="U64int"/>
+ *	<field name="created" val="s->ctime" type="U32int"/>
+ *	<field name="modified" val="s->wtime" type="U32int"/>
+ *	<field name="sealed" val="s->sealed" type="Sealed"/>
+ *	<field name="score" val="s->score" type="Score"/>
+ *	<field name="clumps" val="s->clumps" type="U32int"/>
+ *	<field name="compressedclumps" val="s->cclumps" type="U32int"/>
+ *	<field name="data" val="s->uncsize" type="U64int"/>
+ *	<field name="compresseddata" val="s->used - s->clumps * ClumpSize" type="U64int"/>
+ *	<field name="storage" val="s->used + s->clumps * ClumpInfoSize" type="U64int"/>
+ * </struct>
+ */
+struct Arena
+{
+	QLock		lock;			/* lock for arena fields, writing to disk */
+	Part		*part;			/* partition in which arena lives */
+	int		blocksize;		/* size of block to read or write */
+	u64int		base;			/* base address on disk */
+	u64int		size;			/* total space in the arena */
+	u8int		score[VtScoreSize];	/* score of the entire sealed & summed arena */
+
+	int		clumpmax;		/* ClumpInfos per block */
+	AState		mem;
+	int		inqueue;
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	char		name[ANameSize];	/* text label */
+	ATailStats		memstats;
+	ATailStats		diskstats;
+	u32int		ctime;			/* first time a block was written */
+	u32int		wtime;			/* last time a block was written */
+	u32int		clumpmagic;
+};
+
+/*
+ * redundant storage of some fields at the beginning of each arena
+ */
+struct ArenaHead
+{
+	u32int		version;
+	char		name[ANameSize];
+	u32int		blocksize;
+	u64int		size;
+	u32int		clumpmagic;
+};
+
+/*
+ * most interesting meta information for a clump.
+ * stored in each clump's header and in the Arena's directory,
+ * stored in reverse order just prior to the arena trailer
+ */
+struct ClumpInfo
+{
+	u8int		type;
+	u16int		size;			/* size of disk data, not including header */
+	u16int		uncsize;		/* size of uncompressed data */
+	u8int		score[VtScoreSize];	/* score of the uncompressed data only */
+};
+
+/*
+ * header for an immutable clump of data
+ */
+struct Clump
+{
+	ClumpInfo	info;
+	u8int		encoding;
+	u32int		creator;		/* initial client which wrote the block */
+	u32int		time;			/* creation at gmt seconds since 1/1/1970 */
+};
+
+/*
+ * index of all clumps according to their score
+ * this is just a wrapper to tie together the index sections
+ * <struct name="Index" type="Index *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="version" val="s->version" type="U32int"/>
+ *	<field name="blocksize" val="s->blocksize" type="U32int"/>
+ *	<field name="tabsize" val="s->tabsize" type="U32int"/>
+ *	<field name="buckets" val="s->buckets" type="U32int"/>
+ *	<field name="buckdiv" val="s->div" type="U32int"/>
+ *	<field name="bitblocks" val="s->div" type="U32int"/>
+ *	<field name="maxdepth" val="s->div" type="U32int"/>
+ *	<field name="bitkeylog" val="s->div" type="U32int"/>
+ *	<field name="bitkeymask" val="s->div" type="U32int"/>
+ *	<array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
+ *	<array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
+ *	<array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
+ * </struct>
+ * <struct name="Amap" type="AMap *">
+ *	<field name="name" val="s->name" type="AName"/>
+ *	<field name="start" val="s->start" type="U64int"/>
+ *	<field name="stop" val="s->stop" type="U64int"/>
+ * </struct>
+ */
+struct Index
+{
+	u32int		div;			/* divisor for mapping score to bucket */
+	u32int		buckets;		/* last bucket used in disk hash table */
+	u32int		blocksize;
+	u32int		tabsize;		/* max. bytes in index config */
+
+	int		mapalloc;		/* first arena to check when adding a lump */
+	Arena		**arenas;		/* arenas in the mapping */
+	ISect		**sects;		/* sections which hold the buckets */
+	Bloom		*bloom;	/* bloom filter */
+
+	/*
+	 * fields stored in config file 
+	 */
+	u32int		version;
+	char		name[ANameSize];	/* text label */
+	int		nsects;
+	AMap		*smap;			/* mapping of buckets to index sections */
+	int		narenas;
+	AMap		*amap;			/* mapping from index addesses to arenas */
+};
+
+/*
+ * one part of the bucket storage for an index.
+ * the index blocks are sequentially allocated
+ * across all of the sections.
+ */
+struct ISect
+{
+	Part		*part;
+	int		blocklog;		/* log2(blocksize) */
+	int		buckmax;		/* max. entries in a index bucket */
+	u32int		tabbase;		/* base address of index config table on disk */
+	u32int		tabsize;		/* max. bytes in index config */
+	Channel	*writechan;
+	Channel	*writedonechan;
+	void		*ig;		/* used by buildindex only */
+	int		ng;
+
+	/*
+	 * fields stored on disk
+	 */
+	u32int		version;
+	u32int		bucketmagic;
+	char		name[ANameSize];	/* text label */
+	char		index[ANameSize];	/* index owning the section */
+	u32int		blocksize;		/* size of hash buckets in index */
+	u32int		blockbase;		/* address of start of on disk index table */
+	u32int		blocks;			/* total blocks on disk; some may be unused */
+	u32int		start;			/* first bucket in this section */
+	u32int		stop;			/* limit of buckets in this section */
+};
+
+/*
+ * externally interesting part of an IEntry
+ */
+struct IAddr
+{
+	u64int		addr;
+	u16int		size;			/* uncompressed size */
+	u8int		type;			/* type of block */
+	u8int		blocks;			/* arena io quanta for Clump + data */
+};
+
+/*
+ * entries in the index
+ * kept in IBuckets in the disk index table,
+ * cached in the memory ICache.
+ */
+struct IEntry
+{
+	u8int		score[VtScoreSize];
+	IEntry		*next;			/* next in hash chain */
+	IEntry		*nextdirty; 		/* next in dirty chain */
+	u32int		wtime;			/* last write time */
+	u16int		train;			/* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
+	u8int		rac;			/* read ahead count */
+	u8int		dirty;		/* is dirty */
+	IAddr		ia;
+};
+
+/*
+ * buckets in the on disk index table
+ */
+struct IBucket
+{
+	u16int		n;			/* number of active indices */
+	u32int		buck;		/* used by buildindex/checkindex only */
+	u8int		*data;
+};
+
+/*
+ * temporary buffers used by individual threads
+ */
+struct ZBlock
+{
+	u32int		len;
+	u32int		_size;
+	u8int		*data;
+	u8int		*free;
+};
+
+/*
+ * simple input buffer for a '\0' terminated text file
+ */
+struct IFile
+{
+	char		*name;				/* name of the file */
+	ZBlock		*b;				/* entire contents of file */
+	u32int		pos;				/* current position in the file */
+};
+
+struct Statdesc
+{
+	char *name;
+	ulong max;
+};
+
+/* keep in sync with stats.c:/statdesc and httpd.c:/graphname*/
+enum
+{
+	StatRpcTotal,
+	StatRpcRead,
+	StatRpcReadOk,
+	StatRpcReadFail,
+	StatRpcReadBytes,
+	StatRpcReadTime,
+	StatRpcReadCached,
+	StatRpcReadCachedTime,
+	StatRpcReadUncached,
+	StatRpcReadUncachedTime,
+	StatRpcWrite,
+	StatRpcWriteNew,
+	StatRpcWriteOld,
+	StatRpcWriteFail,
+	StatRpcWriteBytes,
+	StatRpcWriteTime,
+	StatRpcWriteNewTime,
+	StatRpcWriteOldTime,
+
+	StatLcacheHit,
+	StatLcacheMiss,
+	StatLcacheRead,
+	StatLcacheWrite,
+	StatLcacheSize,
+	StatLcacheStall,
+	StatLcacheReadTime,
+
+	StatDcacheHit,
+	StatDcacheMiss,
+	StatDcacheLookup,
+	StatDcacheRead,
+	StatDcacheWrite,
+	StatDcacheDirty,
+	StatDcacheSize,
+	StatDcacheFlush,
+	StatDcacheStall,
+	StatDcacheLookupTime,
+
+	StatDblockStall,
+	StatLumpStall,
+
+	StatIcacheHit,
+	StatIcacheMiss,
+	StatIcacheRead,
+	StatIcacheWrite,
+	StatIcacheFill,
+	StatIcachePrefetch,
+	StatIcacheDirty,
+	StatIcacheSize,
+	StatIcacheFlush,
+	StatIcacheStall,
+	StatIcacheReadTime,
+
+	StatBloomHit,
+	StatBloomMiss,
+	StatBloomFalseMiss,
+	StatBloomLookup,
+	StatBloomOnes,
+	StatBloomBits,
+	StatBloomLookupTime,
+
+	StatApartRead,
+	StatApartReadBytes,
+	StatApartWrite,
+	StatApartWriteBytes,
+
+	StatIsectRead,
+	StatIsectReadBytes,
+	StatIsectWrite,
+	StatIsectWriteBytes,
+
+	StatSumRead,
+	StatSumReadBytes,
+
+	NStat
+};
+
+extern Statdesc statdesc[NStat];
+
+/*
+ * statistics about the operation of the server
+ * mainly for performance monitoring and profiling.
+ */
+struct Stats
+{
+	ulong		now;
+	ulong		n[NStat];
+};
+
+struct Statbin
+{
+	uint nsamp;
+	uint min;
+	uint max;
+	uint avg;
+};
+
+struct Graph
+{
+	long (*fn)(Stats*, Stats*, void*);
+	void *arg;
+	long t0;
+	long t1;
+	long min;
+	long max;
+	long wid;
+	long ht;
+	int fill;
+};
+
+/*
+ * for kicking background processes that run one round after another after another
+ */
+struct Round
+{
+	QLock	lock;
+	Rendez	start;
+	Rendez	finish;
+	Rendez	delaywait;
+	int		delaytime;
+	int		delaykick;
+	char*	name;
+	int		last;
+	int		current;
+	int		next;
+	int		doanother;
+};
+
+/*
+ * Bloom filter of stored block hashes
+ */
+struct Bloom
+{
+	RWLock lk;		/* protects nhash, nbits, tab, mb */
+	QLock mod;		/* one marker at a time, protects nb */
+	int nhash;
+	ulong size;		/* bytes in tab */
+	ulong bitmask;		/* to produce bit index */
+	u8int *data;
+	Part *part;
+	Channel *writechan;
+	Channel *writedonechan;
+};
+
+extern	Index		*mainindex;
+extern	u32int		maxblocksize;		/* max. block size used by any partition */
+extern	int		paranoid;		/* should verify hashes on disk read */
+extern	int		queuewrites;		/* put all lump writes on a queue and finish later */
+extern	int		readonly;		/* only allowed to read the disk data */
+extern	Stats		stats;
+extern	u8int		zeroscore[VtScoreSize];
+extern	int		compressblocks;
+extern	int		writestodevnull;	/* dangerous - for performance debugging */
+extern	int		collectstats;
+extern	QLock	memdrawlock;
+extern	int		icachesleeptime;
+extern	int		minicachesleeptime;
+extern	int		arenasumsleeptime;
+extern	int		manualscheduling;
+extern	int		l0quantum;
+extern	int		l1quantum;
+extern	int		ignorebloom;
+extern	int		icacheprefetch;
+extern	int		syncwrites;
+
+extern	Stats	*stathist;
+extern	int	nstathist;
+extern	ulong	stattime;
+
+#ifndef PLAN9PORT
+#pragma varargck type "V" uchar*
+#define ODIRECT 0
+#endif

+ 862 - 0
sys/src/cmd/venti/srv/dcache.c

@@ -0,0 +1,862 @@
+/*
+ * Disk cache.
+ * 
+ * Caches raw disk blocks.  Getdblock() gets a block, putdblock puts it back.
+ * Getdblock has a mode parameter that determines i/o and access to a block:
+ * if mode is OREAD or ORDWR, it is read from disk if not already in memory.
+ * If mode is ORDWR or OWRITE, it is locked for exclusive use before being returned.
+ * It is *not* marked dirty -- once changes have been made, they should be noted
+ * by using dirtydblock() before putdblock().  
+ *
+ * There is a global cache lock as well as a lock on each block. 
+ * Within a thread, the cache lock can be acquired while holding a block lock,
+ * but not vice versa; and a block cannot be locked if you already hold the lock
+ * on another block.
+ * 
+ * The flush proc writes out dirty blocks in batches, one batch per dirty tag.
+ * For example, the DirtyArena blocks are all written to disk before any of the
+ * DirtyArenaCib blocks.
+ *
+ * This code used to be in charge of flushing the dirty index blocks out to 
+ * disk, but updating the index turned out to benefit from extra care.
+ * Now cached index blocks are never marked dirty.  The index.c code takes
+ * care of updating them behind our back, and uses _getdblock to update any
+ * cached copies of the blocks as it changes them on disk.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+typedef struct DCache	DCache;
+
+enum
+{
+	HashLog		= 9,
+	HashSize	= 1<<HashLog,
+	HashMask	= HashSize - 1,
+};
+
+struct DCache
+{
+	QLock		lock;
+	RWLock		dirtylock;		/* must be held to inspect or set b->dirty */
+	Rendez		full;
+	Round		round;
+	DBlock		*free;			/* list of available lumps */
+	u32int		now;			/* ticks for usage timestamps */
+	int		size;			/* max. size of any block; allocated to each block */
+	DBlock		**heads;		/* hash table for finding address */
+	int		nheap;			/* number of available victims */
+	DBlock		**heap;			/* heap for locating victims */
+	int		nblocks;		/* number of blocks allocated */
+	DBlock		*blocks;		/* array of block descriptors */
+	DBlock		**write;		/* array of block pointers to be written */
+	u8int		*mem;			/* memory for all block descriptors */
+	int		ndirty;			/* number of dirty blocks */
+	int		maxdirty;		/* max. number of dirty blocks */
+	Channel	*ra;
+	u8int		*rabuf;
+	u32int		ramax;
+	u32int		rasize;
+	u64int		raaddr;
+	Part		*rapart;
+
+	AState	diskstate;
+	AState	state;
+};
+
+typedef struct Ra Ra;
+struct Ra
+{
+	Part *part;
+	u64int addr;
+};
+
+static DCache	dcache;
+
+static int	downheap(int i, DBlock *b);
+static int	upheap(int i, DBlock *b);
+static DBlock	*bumpdblock(void);
+static void	delheap(DBlock *db);
+static void	fixheap(int i, DBlock *b);
+static void	flushproc(void*);
+static void	writeproc(void*);
+static void raproc(void*);
+
+void
+initdcache(u32int mem)
+{
+	DBlock *b, *last;
+	u32int nblocks, blocksize;
+	int i;
+	u8int *p;
+
+	if(mem < maxblocksize * 2)
+		sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
+	if(maxblocksize == 0)
+		sysfatal("no max. block size given for disk cache");
+	blocksize = maxblocksize;
+	nblocks = mem / blocksize;
+	dcache.full.l = &dcache.lock;
+	dcache.nblocks = nblocks;
+	dcache.maxdirty = (nblocks * 2) / 3;
+	trace(TraceProc, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
+			nblocks, blocksize, dcache.maxdirty);
+	dcache.size = blocksize;
+	dcache.heads = MKNZ(DBlock*, HashSize);
+	dcache.heap = MKNZ(DBlock*, nblocks);
+	dcache.blocks = MKNZ(DBlock, nblocks);
+	dcache.write = MKNZ(DBlock*, nblocks);
+	dcache.mem = MKNZ(u8int, (nblocks+1+128) * blocksize);
+	dcache.ra = chancreate(sizeof(Ra), 0);
+
+	last = nil;
+	p = (u8int*)(((ulong)dcache.mem+blocksize-1)&~(ulong)(blocksize-1));
+	for(i = 0; i < nblocks; i++){
+		b = &dcache.blocks[i];
+		b->data = &p[i * blocksize];
+		b->heap = TWID32;
+		b->writedonechan = chancreate(sizeof(void*), 1);
+		b->next = last;
+		last = b;
+	}
+	dcache.rabuf = &p[i*blocksize];
+	dcache.ramax = 128*blocksize;
+	dcache.raaddr = 0;
+	dcache.rapart = nil;
+
+	dcache.free = last;
+	dcache.nheap = 0;
+	setstat(StatDcacheSize, nblocks);
+	initround(&dcache.round, "dcache", 120*1000);
+
+	vtproc(flushproc, nil);
+	vtproc(delaykickroundproc, &dcache.round);
+	vtproc(raproc, nil);
+}
+
+void
+setdcachestate(AState *a)
+{
+	trace(TraceBlock, "setdcachestate %s 0x%llux clumps %d", a->arena ? a->arena->name : nil, a->aa, a->stats.clumps);
+	qlock(&dcache.lock);
+	dcache.state = *a;
+	qunlock(&dcache.lock);
+}
+
+AState
+diskstate(void)
+{
+	AState a;
+
+	qlock(&dcache.lock);
+	a = dcache.diskstate;
+	qunlock(&dcache.lock);
+	return a;
+}
+
+static void
+raproc(void *v)
+{
+	Ra ra;
+	DBlock *b;
+
+	USED(v);
+	while(recv(dcache.ra, &ra) == 1){
+		if(ra.part->size <= ra.addr)
+			continue;
+		b = _getdblock(ra.part, ra.addr, OREAD, 2);
+		putdblock(b);
+	}
+}
+
+/*
+ * We do readahead a whole arena at a time now,
+ * so dreadahead is a no-op.  The original implementation
+ * is in unused_dreadahead below.
+ */
+void
+dreadahead(Part *part, u64int addr, int miss)
+{
+	USED(part);
+	USED(addr);
+	USED(miss);
+}
+
+void
+unused_dreadahead(Part *part, u64int addr, int miss)
+{
+	Ra ra;
+	static struct {
+		Part *part;
+		u64int addr;
+	} lastmiss;
+	static struct {
+		Part *part;
+		u64int addr;
+		int dir;
+	} lastra;
+
+	if(miss){
+		if(lastmiss.part==part && lastmiss.addr==addr-dcache.size){
+		XRa:
+			lastra.part = part;
+			lastra.dir = addr-lastmiss.addr;
+			lastra.addr = addr+lastra.dir;
+			ra.part = part;
+			ra.addr = lastra.addr;
+			nbsend(dcache.ra, &ra);
+		}else if(lastmiss.part==part && lastmiss.addr==addr+dcache.size){
+			addr -= dcache.size;
+			goto XRa;
+		}
+	}else{
+		if(lastra.part==part && lastra.addr==addr){
+			lastra.addr += lastra.dir;
+			ra.part = part;
+			ra.addr = lastra.addr;
+			nbsend(dcache.ra, &ra);
+		}
+	}
+
+	if(miss){
+		lastmiss.part = part;
+		lastmiss.addr = addr;
+	}
+}
+
+int
+rareadpart(Part *part, u64int addr, u8int *buf, uint n, int load)
+{
+	uint nn;
+	static RWLock ralock;
+
+	rlock(&ralock);
+	if(dcache.rapart==part && dcache.raaddr <= addr && addr+n <= dcache.raaddr+dcache.rasize){
+		memmove(buf, dcache.rabuf+(addr-dcache.raaddr), n);
+		runlock(&ralock);
+		return 0;
+	}
+	if(load != 2 || addr >= part->size){	/* addr >= part->size: let readpart do the error */	
+		runlock(&ralock);
+		diskaccess(0);
+		return readpart(part, addr, buf, n);
+	}
+
+	runlock(&ralock);
+	wlock(&ralock);
+fprint(2, "raread %s %llx\n", part->name, addr);
+	nn = dcache.ramax;
+	if(addr+nn > part->size)
+		nn = part->size - addr;
+	diskaccess(0);
+	if(readpart(part, addr, dcache.rabuf, nn) < 0){
+		wunlock(&ralock);
+		return -1;
+	}
+	memmove(buf, dcache.rabuf, n);	
+	dcache.rapart = part;
+	dcache.rasize = nn;
+	dcache.raaddr = addr;
+	wunlock(&ralock);
+
+	addstat(StatApartReadBytes, nn-n);
+	return 0;
+}
+
+static u32int
+pbhash(u64int addr)
+{
+	u32int h;
+
+#define hashit(c)	((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
+	h = (addr >> 32) ^ addr;
+	return hashit(h);
+}
+
+DBlock*
+getdblock(Part *part, u64int addr, int mode)
+{
+	DBlock *b;
+	uint ms;
+	
+	ms = msec();
+	b = _getdblock(part, addr, mode, 1);
+	if(mode == OREAD || mode == ORDWR)
+		addstat(StatDcacheRead, 1);
+	if(mode == OWRITE || mode == ORDWR)
+		addstat(StatDcacheWrite, 1);
+	ms = msec() - ms;
+	addstat2(StatDcacheLookup, 1, StatDcacheLookupTime, ms);
+	return b;
+}
+
+DBlock*
+_getdblock(Part *part, u64int addr, int mode, int load)
+{
+	DBlock *b;
+	u32int h, size;
+
+	trace(TraceBlock, "getdblock enter %s 0x%llux", part->name, addr);
+	size = part->blocksize;
+	if(size > dcache.size){
+		seterr(EAdmin, "block size %d too big for cache with size %d", size, dcache.size);
+		return nil;
+	}
+	h = pbhash(addr);
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&dcache.lock);
+again:
+	for(b = dcache.heads[h]; b != nil; b = b->next){
+		if(b->part == part && b->addr == addr){
+			/*
+			qlock(&stats.lock);
+			stats.pchit++;
+			qunlock(&stats.lock);
+			*/
+			if(load){
+				addstat(StatDcacheHit, 1);
+				if(load != 2 && mode != OWRITE)
+					dreadahead(part, b->addr, 0);
+			}
+			goto found;
+		}
+	}
+
+	/*
+	 * missed: locate the block with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	if(!load){
+		qunlock(&dcache.lock);
+		return nil;
+	}
+
+	addstat(StatDcacheMiss, 1);
+
+	b = bumpdblock();
+	if(b == nil){
+		trace(TraceBlock, "all disk cache blocks in use");
+		addstat(StatDcacheStall, 1);
+		rsleep(&dcache.full);
+		addstat(StatDcacheStall, -1);
+		goto again;
+	}
+
+	assert(!b->dirty);
+
+	/*
+	 * the new block has no last use, so assume it happens sometime in the middle
+ZZZ this is not reasonable
+	 */
+	b->used = (b->used2 + dcache.now) / 2;
+
+	/*
+	 * rechain the block on the correct hash chain
+	 */
+	b->next = dcache.heads[h];
+	dcache.heads[h] = b;
+	if(b->next != nil)
+		b->next->prev = b;
+	b->prev = nil;
+
+	b->addr = addr;
+	b->part = part;
+	b->size = 0;
+	if(load != 2 && mode != OWRITE)
+		dreadahead(part, b->addr, 1);
+
+found:
+	b->ref++;
+	b->used2 = b->used;
+	b->used = dcache.now++;
+	if(b->heap != TWID32)
+		fixheap(b->heap, b);
+
+	qunlock(&dcache.lock);
+
+	trace(TraceBlock, "getdblock lock");
+	addstat(StatDblockStall, 1);
+	if(mode == OREAD)
+		rlock(&b->lock);
+	else
+		wlock(&b->lock);
+	addstat(StatDblockStall, -1);
+	trace(TraceBlock, "getdblock locked");
+
+	if(b->size != size){
+		if(mode == OREAD){
+			addstat(StatDblockStall, 1);
+			runlock(&b->lock);
+			wlock(&b->lock);
+			addstat(StatDblockStall, -1);
+		}
+		if(b->size < size){
+			if(mode == OWRITE)
+				memset(&b->data[b->size], 0, size - b->size);
+			else{
+				trace(TraceBlock, "getdblock readpart %s 0x%llux", part->name, addr);
+				if(rareadpart(part, addr + b->size, &b->data[b->size], size - b->size, load) < 0){
+					b->mode = ORDWR;	/* so putdblock wunlocks */
+					putdblock(b);
+					return nil;
+				}
+				trace(TraceBlock, "getdblock readpartdone");
+				addstat(StatApartRead, 1);
+				addstat(StatApartReadBytes, size-b->size);
+			}
+		}
+		b->size = size;
+		if(mode == OREAD){
+			addstat(StatDblockStall, 1);
+			wunlock(&b->lock);
+			rlock(&b->lock);
+			addstat(StatDblockStall, -1);
+		}
+	}
+
+	b->mode = mode;
+	trace(TraceBlock, "getdblock exit");
+	return b;
+}
+
+void
+putdblock(DBlock *b)
+{
+	if(b == nil)
+		return;
+
+	trace(TraceBlock, "putdblock %s 0x%llux", b->part->name, b->addr);
+
+	if(b->mode == OREAD)
+		runlock(&b->lock);
+	else
+		wunlock(&b->lock);
+
+	qlock(&dcache.lock);
+	if(--b->ref == 0 && !b->dirty){
+		if(b->heap == TWID32)
+			upheap(dcache.nheap++, b);
+		rwakeupall(&dcache.full);
+	}
+	qunlock(&dcache.lock);
+}
+
+void
+dirtydblock(DBlock *b, int dirty)
+{
+	int odirty;
+	Part *p;
+
+
+	trace(TraceBlock, "dirtydblock enter %s 0x%llux %d from 0x%lux", b->part->name, b->addr, dirty, getcallerpc(&b));
+	assert(b->ref != 0);
+	assert(b->mode==ORDWR || b->mode==OWRITE);
+
+	odirty = b->dirty;
+	if(b->dirty)
+		assert(b->dirty == dirty);
+	else
+		b->dirty = dirty;
+
+	p = b->part;
+	if(p->writechan == nil){
+		trace(TraceBlock, "dirtydblock allocwriteproc %s", p->name);
+		/* XXX hope this doesn't fail! */
+		p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
+		vtproc(writeproc, p);
+	}
+	qlock(&dcache.lock);
+	if(!odirty){
+		dcache.ndirty++;
+		setstat(StatDcacheDirty, dcache.ndirty);
+		if(dcache.ndirty >= dcache.maxdirty)
+			kickround(&dcache.round, 0);
+		else
+			delaykickround(&dcache.round);
+	}
+	qunlock(&dcache.lock);
+}
+
+static void
+unchain(DBlock *b)
+{
+	ulong h;
+	
+	/*
+	 * unchain the block
+	 */
+	if(b->prev == nil){
+		h = pbhash(b->addr);
+		if(dcache.heads[h] != b)
+			sysfatal("bad hash chains in disk cache");
+		dcache.heads[h] = b->next;
+	}else
+		b->prev->next = b->next;
+	if(b->next != nil)
+		b->next->prev = b->prev;
+}
+
+/*
+ * remove some block from use and update the free list and counters
+ */
+static DBlock*
+bumpdblock(void)
+{
+	DBlock *b;
+
+	trace(TraceBlock, "bumpdblock enter");
+	b = dcache.free;
+	if(b != nil){
+		dcache.free = b->next;
+		return b;
+	}
+
+	if(dcache.ndirty >= dcache.maxdirty)
+		kickdcache();
+
+	/*
+	 * remove blocks until we find one that is unused
+	 * referenced blocks are left in the heap even though
+	 * they can't be scavenged; this is simple a speed optimization
+	 */
+	for(;;){
+		if(dcache.nheap == 0){
+			kickdcache();
+			trace(TraceBlock, "bumpdblock gotnothing");
+			return nil;
+		}
+		b = dcache.heap[0];
+		delheap(b);
+		if(!b->ref && !b->dirty)
+			break;
+	}
+
+	trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
+
+	unchain(b);
+	return b;
+}
+
+void
+emptydcache(void)
+{
+	DBlock *b;
+	
+	qlock(&dcache.lock);
+	while(dcache.nheap > 0){
+		b = dcache.heap[0];
+		delheap(b);
+		if(!b->ref && !b->dirty){
+			unchain(b);
+			b->next = dcache.free;
+			dcache.free = b;
+		}
+	}
+	qunlock(&dcache.lock);
+}
+
+/*
+ * delete an arbitrary block from the heap
+ */
+static void
+delheap(DBlock *db)
+{
+	if(db->heap == TWID32)
+		return;
+	fixheap(db->heap, dcache.heap[--dcache.nheap]);
+	db->heap = TWID32;
+}
+
+/*
+ * push an element up or down to it's correct new location
+ */
+static void
+fixheap(int i, DBlock *b)
+{
+	if(upheap(i, b) == i)
+		downheap(i, b);
+}
+
+static int
+upheap(int i, DBlock *b)
+{
+	DBlock *bb;
+	u32int now;
+	int p;
+
+	now = dcache.now;
+	for(; i != 0; i = p){
+		p = (i - 1) >> 1;
+		bb = dcache.heap[p];
+		if(b->used2 - now >= bb->used2 - now)
+			break;
+		dcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	dcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static int
+downheap(int i, DBlock *b)
+{
+	DBlock *bb;
+	u32int now;
+	int k;
+
+	now = dcache.now;
+	for(; ; i = k){
+		k = (i << 1) + 1;
+		if(k >= dcache.nheap)
+			break;
+		if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
+			k++;
+		bb = dcache.heap[k];
+		if(b->used2 - now <= bb->used2 - now)
+			break;
+		dcache.heap[i] = bb;
+		bb->heap = i;
+	}
+
+	dcache.heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+static void
+findblock(DBlock *bb)
+{
+	DBlock *b, *last;
+	int h;
+
+	last = nil;
+	h = pbhash(bb->addr);
+	for(b = dcache.heads[h]; b != nil; b = b->next){
+		if(last != b->prev)
+			sysfatal("bad prev link");
+		if(b == bb)
+			return;
+		last = b;
+	}
+	sysfatal("block missing from hash table");
+}
+
+void
+checkdcache(void)
+{
+	DBlock *b;
+	u32int size, now;
+	int i, k, refed, nfree;
+
+	qlock(&dcache.lock);
+	size = dcache.size;
+	now = dcache.now;
+	for(i = 0; i < dcache.nheap; i++){
+		if(dcache.heap[i]->heap != i)
+			sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
+		if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+		k = (i << 1) + 1;
+		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+		k++;
+		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
+			sysfatal("dc: bad heap ordering");
+	}
+
+	refed = 0;
+	for(i = 0; i < dcache.nblocks; i++){
+		b = &dcache.blocks[i];
+		if(b->data != &dcache.mem[i * size])
+			sysfatal("dc: mis-blocked at %d", i);
+		if(b->ref && b->heap == TWID32)
+			refed++;
+		if(b->addr)
+			findblock(b);
+		if(b->heap != TWID32
+		&& dcache.heap[b->heap] != b)
+			sysfatal("dc: spurious heap value");
+	}
+
+	nfree = 0;
+	for(b = dcache.free; b != nil; b = b->next){
+		if(b->addr != 0 || b->heap != TWID32)
+			sysfatal("dc: bad free list");
+		nfree++;
+	}
+
+	if(dcache.nheap + nfree + refed != dcache.nblocks)
+		sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
+	qunlock(&dcache.lock);
+}
+
+void
+flushdcache(void)
+{
+	trace(TraceProc, "flushdcache enter");
+	kickround(&dcache.round, 1);
+	trace(TraceProc, "flushdcache exit");
+}
+
+void
+kickdcache(void)
+{
+	kickround(&dcache.round, 0);
+}
+
+static int
+parallelwrites(DBlock **b, DBlock **eb, int dirty)
+{
+	DBlock **p, **q;
+	Part *part;
+
+	for(p=b; p<eb && (*p)->dirty == dirty; p++){
+		assert(b<=p && p<eb);
+		sendp((*p)->part->writechan, *p);
+	}
+	q = p;
+	for(p=b; p<q; p++){
+		assert(b<=p && p<eb);
+		recvp((*p)->writedonechan);
+	}
+	
+	/*
+	 * Flush the partitions that have been written to.
+	 */
+	part = nil;
+	for(p=b; p<q; p++){
+		if(part != (*p)->part){
+			part = (*p)->part;
+			flushpart(part);	/* what if it fails? */
+		}
+	}
+
+	return p-b;
+}
+
+/*
+ * Sort first by dirty flag, then by partition, then by address in partition.
+ */
+static int
+writeblockcmp(const void *va, const void *vb)
+{
+	DBlock *a, *b;
+
+	a = *(DBlock**)va;
+	b = *(DBlock**)vb;
+
+	if(a->dirty != b->dirty)
+		return a->dirty - b->dirty;
+	if(a->part != b->part){
+		if(a->part < b->part)
+			return -1;
+		if(a->part > b->part)
+			return 1;
+	}
+	if(a->addr < b->addr)
+		return -1;
+	return 1;
+}
+
+static void
+flushproc(void *v)
+{
+	int i, j, n;
+	ulong t0;
+	DBlock *b, **write;
+	AState as;
+
+	USED(v);
+	threadsetname("flushproc");
+	for(;;){
+		waitforkick(&dcache.round);
+
+		trace(TraceWork, "start");
+		qlock(&dcache.lock);
+		as = dcache.state;
+		qunlock(&dcache.lock);
+
+		t0 = nsec()/1000;
+
+		trace(TraceProc, "build t=%lud", (ulong)(nsec()/1000)-t0);
+		write = dcache.write;
+		n = 0;
+		for(i=0; i<dcache.nblocks; i++){
+			b = &dcache.blocks[i];
+			if(b->dirty)
+				write[n++] = b;
+		}
+
+		qsort(write, n, sizeof(write[0]), writeblockcmp);
+
+		/* Write each stage of blocks out. */
+		trace(TraceProc, "writeblocks t=%lud", (ulong)(nsec()/1000)-t0);
+		i = 0;
+		for(j=1; j<DirtyMax; j++){
+			trace(TraceProc, "writeblocks.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
+			i += parallelwrites(write+i, write+n, j);
+		}
+		if(i != n){
+			fprint(2, "in flushproc i=%d n=%d\n", i, n);
+			for(i=0; i<n; i++)
+				fprint(2, "\tblock %d: dirty=%d\n", i, write[i]->dirty);
+			abort();
+		}
+
+/* XXX
+* the locking here is suspect.  what if a block is redirtied
+* after the write happens?  we'll still decrement dcache.ndirty here.
+*/
+		trace(TraceProc, "undirty.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
+		qlock(&dcache.lock);
+		dcache.diskstate = as;
+		for(i=0; i<n; i++){
+			b = write[i];
+			--dcache.ndirty;
+			if(b->ref == 0 && b->heap == TWID32){
+				upheap(dcache.nheap++, b);
+				rwakeupall(&dcache.full);
+			}
+		}
+		setstat(StatDcacheDirty, dcache.ndirty);
+		qunlock(&dcache.lock);
+		addstat(StatDcacheFlush, 1);
+		trace(TraceWork, "finish");
+	}
+}
+
+static void
+writeproc(void *v)
+{
+	DBlock *b;
+	Part *p;
+
+	p = v;
+
+	threadsetname("writeproc:%s", p->name);
+	for(;;){
+		b = recvp(p->writechan);
+		trace(TraceWork, "start");
+		assert(b->part == p);
+		trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
+		wlock(&b->lock);
+		trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
+		diskaccess(0);
+		if(writepart(p, b->addr, b->data, b->size) < 0)
+			fprint(2, "write error: %r\n"); /* XXX details! */
+		addstat(StatApartWrite, 1);
+		addstat(StatApartWriteBytes, b->size);
+		b->dirty = 0;
+		wunlock(&b->lock);
+		trace(TraceProc, "finish %s 0x%llux", p->name, b->addr);
+		trace(TraceWork, "finish");
+		sendp(b->writedonechan, b);
+	}
+}

+ 88 - 0
sys/src/cmd/venti/srv/disksched.c

@@ -0,0 +1,88 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+ulong lasttime[2];
+int manualscheduling;
+int l0quantum = 120;
+int l1quantum = 120;
+ulong lasticachechange;
+
+void
+disksched(void)
+{
+	int p, nwrite, nflush, ndirty, tdirty, toflush;
+	ulong t;
+	vlong cflush;
+	Stats *prev;
+	
+	/*
+	 * no locks because all the data accesses are atomic.
+	 */
+	t = time(0);
+	if(manualscheduling){
+		lasticachechange = t;
+		return;
+	}
+
+	if(t-lasttime[0] < l0quantum){
+		/* level-0 disk access going on */
+		p = icachedirtyfrac();
+		if(p < IcacheFrac*5/10){	/* can wait */
+			icachesleeptime = SleepForever;
+			lasticachechange = t;
+		}else if(p > IcacheFrac*9/10){	/* can't wait */
+			icachesleeptime = 0;
+			lasticachechange = t;
+		}else if(t-lasticachechange > 60){
+			/* have minute worth of data for current rate */
+			prev = &stathist[(stattime-60+nstathist)%nstathist];
+
+			/* # entries written to index cache */
+			nwrite = stats.n[StatIcacheWrite] - prev->n[StatIcacheWrite];
+			
+			/* # dirty entries in index cache */
+			ndirty = stats.n[StatIcacheDirty] - prev->n[StatIcacheDirty];
+			
+			/* # entries flushed to disk */
+			nflush = nwrite - ndirty;
+			
+			/* want to stay around 70% dirty */
+			tdirty = (vlong)stats.n[StatIcacheSize]*700/1000;
+			
+			/* assume nflush*icachesleeptime is a constant */
+			cflush = (vlong)nflush*(icachesleeptime+1);
+			
+			/* computer number entries to write in next minute */
+			toflush = nwrite + (stats.n[StatIcacheDirty] - tdirty);
+			
+			/* schedule for  that many */
+			if(toflush <= 0 || cflush/toflush > 100000)
+				icachesleeptime = SleepForever;
+			else
+				icachesleeptime = cflush/toflush;
+		}
+		arenasumsleeptime = SleepForever;
+		return;
+	}
+	if(t-lasttime[1] < l1quantum){
+		/* level-1 disk access (icache flush) going on */
+		icachesleeptime = 0;
+		arenasumsleeptime = SleepForever;
+		return;
+	}
+	/* no disk access going on - no holds barred*/
+	icachesleeptime = 0;
+	arenasumsleeptime = 0;
+}
+
+void
+diskaccess(int level)
+{
+	if(level < 0 || level >= nelem(lasttime)){
+		fprint(2, "bad level in diskaccess; caller=%lux\n", getcallerpc(&level));
+		return;
+	}
+	lasttime[level] = time(0);
+}
+

+ 13 - 13
sys/src/cmd/venti/dump.c → sys/src/cmd/venti/srv/dump.c

@@ -3,12 +3,12 @@
 #include "fns.h"
 
 void
-printIndex(int fd, Index *ix)
+printindex(int fd, Index *ix)
 {
 	int i;
 
-	fprint(fd, "index=%s version=%d blockSize=%d tabSize=%d\n",
-		ix->name, ix->version, ix->blockSize, ix->tabSize);
+	fprint(fd, "index=%s version=%d blocksize=%d tabsize=%d\n",
+		ix->name, ix->version, ix->blocksize, ix->tabsize);
 	fprint(fd, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
 	for(i = 0; i < ix->nsects; i++)
 		fprint(fd, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
@@ -17,31 +17,31 @@ printIndex(int fd, Index *ix)
 }
 
 void
-printArenaPart(int fd, ArenaPart *ap)
+printarenapart(int fd, ArenaPart *ap)
 {
 	int i;
 
-	fprint(fd, "arena partition=%s\n\tversion=%d blockSize=%d arenas=%d\n\tsetBase=%d setSize=%d\n",
-		ap->part->name, ap->version, ap->blockSize, ap->narenas, ap->tabBase, ap->tabSize);
+	fprint(fd, "arena partition=%s\n\tversion=%d blocksize=%d arenas=%d\n\tsetbase=%d setsize=%d\n",
+		ap->part->name, ap->version, ap->blocksize, ap->narenas, ap->tabbase, ap->tabsize);
 	for(i = 0; i < ap->narenas; i++)
 		fprint(fd, "\tarena=%s at [%lld,%lld)\n", ap->map[i].name, ap->map[i].start, ap->map[i].stop);
 }
 
 void
-printArena(int fd, Arena *arena)
+printarena(int fd, Arena *arena)
 {
 	fprint(fd, "arena='%s' [%lld,%lld)\n\tversion=%d created=%d modified=%d",
-		arena->name, arena->base-arena->blockSize, arena->base + arena->size + arena->blockSize,
+		arena->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
 		arena->version, arena->ctime, arena->wtime);
-	if(arena->sealed)
+	if(arena->memstats.sealed)
 		fprint(2, " sealed\n");
 	else
 		fprint(2, "\n");
-	if(!scoreEq(zeroScore, arena->score))
+	if(scorecmp(zeroscore, arena->score) != 0)
 		fprint(2, "\tscore=%V\n", arena->score);
 
 	fprint(fd, "\tclumps=%,d compressed clumps=%,d data=%,lld compressed data=%,lld disk storage=%,lld\n",
-		arena->clumps, arena->cclumps, arena->uncsize,
-		arena->used - arena->clumps * ClumpSize,
-		arena->used + arena->clumps * ClumpInfoSize);
+		arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
+		arena->memstats.used - arena->memstats.clumps * ClumpSize,
+		arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
 }

+ 122 - 0
sys/src/cmd/venti/srv/findscore.c

@@ -0,0 +1,122 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	ClumpChunks	= 32*1024
+};
+
+static int	verbose;
+
+int
+clumpinfoeq(ClumpInfo *c, ClumpInfo *d)
+{
+	return c->type == d->type
+		&& c->size == d->size
+		&& c->uncsize == d->uncsize
+		&& scorecmp(c->score, d->score)==0;
+}
+
+int
+findscore(Arena *arena, uchar *score)
+{
+	IEntry ie;
+	ClumpInfo *ci, *cis;
+	u64int a;
+	u32int clump;
+	int i, n, found;
+
+//ZZZ remove fprint?
+	if(arena->memstats.clumps)
+		fprint(2, "reading directory for arena=%s with %d entries\n",
+			arena->name, arena->memstats.clumps);
+
+	cis = MKN(ClumpInfo, ClumpChunks);
+	found = 0;
+	a = 0;
+	memset(&ie, 0, sizeof(IEntry));
+	for(clump = 0; clump < arena->memstats.clumps; clump += n){
+		n = ClumpChunks;
+		if(n > arena->memstats.clumps - clump)
+			n = arena->memstats.clumps - clump;
+		if(readclumpinfos(arena, clump, cis, n) != n){
+			seterr(EOk, "arena directory read failed: %r");
+			break;
+		}
+
+		for(i = 0; i < n; i++){
+			ci = &cis[i];
+			if(scorecmp(score, ci->score)==0){
+				fprint(2, "found at clump=%d with type=%d size=%d csize=%d position=%lld\n",
+					clump + i, ci->type, ci->uncsize, ci->size, a);
+				found++;
+			}
+			a += ci->size + ClumpSize;
+		}
+	}
+	free(cis);
+	return found;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: findscore [-v] arenafile score\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	ArenaPart *ap;
+	Part *part;
+	char *file;
+	u8int score[VtScoreSize];
+	int i, found;
+
+	ventifmtinstall();
+
+	ARGBEGIN{
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	readonly = 1;
+
+	if(argc != 2)
+		usage();
+
+	file = argv[0];
+	if(strscore(argv[1], score) < 0)
+		sysfatal("bad score %s\n", argv[1]);
+
+	part = initpart(file, OREAD|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	ap = initarenapart(part);
+	if(ap == nil)
+		sysfatal("can't initialize arena partition in %s: %r", file);
+
+	if(verbose > 1){
+		printarenapart(2, ap);
+		fprint(2, "\n");
+	}
+
+	initdcache(8 * MaxDiskBlock);
+
+	found = 0;
+	for(i = 0; i < ap->narenas; i++)
+		found += findscore(ap->arenas[i], score);
+
+	print("found %d occurrences of %V\n", found, score);
+
+	if(verbose > 1)
+		printstats();
+	threadexitsall(0);
+}

+ 1911 - 0
sys/src/cmd/venti/srv/fixarenas.c

@@ -0,0 +1,1911 @@
+/*
+ * Check and fix an arena partition.
+ *
+ * This is a lot grittier than the rest of Venti because
+ * it can't just give up if a byte here or there is wrong.
+ *
+ * The rule here (hopefully followed!) is that block corruption
+ * only ever has a local effect -- there are no blocks that you
+ * can wipe out that will cause large portions of 
+ * uncorrupted data blocks to be useless.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+
+#pragma varargck type "z" uvlong
+#pragma varargck type "z" vlong
+#pragma varargck type "t" uint
+
+enum
+{
+	K = 1024,
+	M = 1024*1024,
+	G = 1024*1024*1024,
+	
+	Block = 4096,
+};
+
+int debugsha1;
+
+int verbose;
+Part *part;
+char *file;
+char *basename;
+char *dumpbase;
+int fix;
+int badreads;
+int unseal;
+uchar zero[MaxDiskBlock];
+
+Arena lastarena;
+ArenaPart ap;
+uvlong arenasize;
+int nbadread;
+int nbad;
+uvlong partend;
+void checkarena(vlong, int);
+
+void
+usage(void)
+{
+	fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
+	threadexitsall(0);
+}
+
+/*
+ * Format number in simplest way that is okay with unittoull.
+ */
+static int
+zfmt(Fmt *fmt)
+{
+	vlong x;
+	
+	x = va_arg(fmt->args, vlong);
+	if(x == 0)
+		return fmtstrcpy(fmt, "0");
+	if(x%G == 0)
+		return fmtprint(fmt, "%lldG", x/G);
+	if(x%M == 0)
+		return fmtprint(fmt, "%lldM", x/M);
+	if(x%K == 0)
+		return fmtprint(fmt, "%lldK", x/K);
+	return fmtprint(fmt, "%lld", x);
+}
+
+/*
+ * Format time like ctime without newline.
+ */
+static int
+tfmt(Fmt *fmt)
+{
+	uint t;
+	char buf[30];
+	
+	t = va_arg(fmt->args, uint);
+	strcpy(buf, ctime(t));
+	buf[28] = 0;
+	return fmtstrcpy(fmt, buf);
+}
+
+/*
+ * Coalesce messages about unreadable sectors into larger ranges.
+ * bad(0, 0) flushes the buffer.
+ */
+static void
+bad(char *msg, vlong o, int len)
+{
+	static vlong lb0, lb1;
+	static char *lmsg;
+
+	if(msg == nil)
+		msg = lmsg;
+	if(o == -1){
+		lmsg = nil;
+		lb0 = 0;
+		lb1 = 0;
+		return;
+	}
+	if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
+		if(lb0 != lb1)
+			print("%s %#llux+%#llux (%,lld+%,lld)\n",
+				lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
+		lb0 = o;
+	}
+	lmsg = msg;
+	lb1 = o+len;
+}
+
+/*
+ * Read in the len bytes of data at the offset.  If can't for whatever reason,
+ * fill it with garbage but print an error.
+ */
+static uchar*
+readdisk(uchar *buf, vlong offset, int len)
+{
+	int i, j, k, n;
+
+	if(offset >= partend){
+		memset(buf, 0xFB, sizeof buf);
+		return buf;
+	}
+	
+	if(offset+len > partend){
+		memset(buf, 0xFB, sizeof buf);
+		len = partend - offset;
+	}
+
+	if(readpart(part, offset, buf, len) >= 0)
+		return buf;
+	
+	/*
+	 * The read failed.  Clear the buffer to nonsense, and
+	 * then try reading in smaller pieces.  If that fails,
+	 * read in even smaller pieces.  And so on down to sectors.
+	 */
+	memset(buf, 0xFD, len);
+	for(i=0; i<len; i+=64*K){
+		n = 64*K;
+		if(i+n > len)
+			n = len-i;
+		if(readpart(part, offset+i, buf+i, n) >= 0)
+			continue;
+		for(j=i; j<len && j<i+64*K; j+=4*K){
+			n = 4*K;
+			if(j+n > len)
+				n = len-j;
+			if(readpart(part, offset+j, buf+j, n) >= 0)
+				continue;
+			for(k=j; k<len && k<j+4*K; k+=512){
+				if(readpart(part, offset+k, buf+k, 512) >= 0)
+					continue;
+				bad("disk read failed at", k, 512);
+				badreads++;
+			}
+		}
+	}
+	bad(nil, 0, 0);
+	return buf;
+}
+
+/*
+ * Buffer to support running SHA1 hash of the disk.
+ */
+typedef struct Shabuf Shabuf;
+struct Shabuf
+{
+	int fd;
+	vlong offset;
+	DigestState state;
+	int rollback;
+	vlong r0;
+	DigestState *hist;
+	int nhist;
+};
+
+void
+sbdebug(Shabuf *sb, char *file)
+{
+	int fd;
+	
+	if(sb->fd > 0){
+		close(sb->fd);
+		sb->fd = 0;
+	}
+	if((fd = create(file, OWRITE, 0666)) < 0)
+		return;
+	if(fd == 0){
+		fd = dup(fd, -1);
+		close(0);
+	}
+	sb->fd = fd;
+}
+
+void
+sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
+{
+	int n, x;
+	vlong o;
+
+	if(sb->rollback && !sb->hist){
+		sb->r0 = offset;
+		sb->nhist = 1;
+		sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
+		memset(sb->hist, 0, sizeof sb->hist[0]);
+	}
+	if(sb->r0 == 0)
+		sb->r0 = offset;
+
+	if(sb->offset < offset || sb->offset >= offset+len){
+		if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
+			p, offset, len, sb->offset);
+		return;
+	}
+	x = sb->offset - offset;
+	if(0) print("sbupdate %p %#llux+%d skip %d\n",
+		sb, offset, len, x);
+	if(x){
+		p += x;
+		offset += x;
+		len -= x;
+	}
+	assert(sb->offset == offset);
+	
+	if(sb->fd > 0)
+		pwrite(sb->fd, p, len, offset - sb->r0);
+
+	if(!sb->rollback){
+		sha1(p, len, nil, &sb->state);
+		sb->offset += len;
+		return;
+	}
+	
+	/* save state every 4M so we can roll back quickly */
+	o = offset - sb->r0;
+	while(len > 0){
+		n = 4*M - o%(4*M);
+		if(n > len)
+			n = len;
+		sha1(p, n, nil, &sb->state);
+		sb->offset += n;
+		o += n;
+		p += n;
+		len -= n;
+		if(o%(4*M) == 0){
+			x = o/(4*M);
+			if(x >= sb->nhist){
+				if(x != sb->nhist)
+					print("oops! x=%d nhist=%d\n", x, sb->nhist);
+				sb->nhist += 32;
+				sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
+			}
+			sb->hist[x] = sb->state;
+		}
+	}		
+}
+
+void
+sbdiskhash(Shabuf *sb, vlong eoffset)
+{
+	static uchar dbuf[4*M];
+	int n;
+	
+	while(sb->offset < eoffset){
+		n = sizeof dbuf;
+		if(sb->offset+n > eoffset)
+			n = eoffset - sb->offset;
+		readdisk(dbuf, sb->offset, n);
+		sbupdate(sb, dbuf, sb->offset, n);
+	}
+}
+
+void
+sbrollback(Shabuf *sb, vlong offset)
+{
+	int x;
+	vlong o;
+	Dir d;
+	
+	if(!sb->rollback || !sb->r0){
+		print("cannot rollback sha\n");
+		return;
+	}
+	if(offset >= sb->offset)
+		return;
+	o = offset - sb->r0;
+	x = o/(4*M);
+	if(x >= sb->nhist){
+		print("cannot rollback sha\n");
+		return;
+	}
+	sb->state = sb->hist[x];
+	sb->offset = sb->r0 + x*4*M;
+	assert(sb->offset <= offset);
+	
+	if(sb->fd > 0){
+		nulldir(&d);
+		d.length = sb->offset - sb->r0;
+		dirfwstat(sb->fd, &d);
+	}
+}
+
+void
+sbscore(Shabuf *sb, uchar *score)
+{
+	if(sb->hist){
+		free(sb->hist);
+		sb->hist = nil;
+	}
+	sha1(nil, 0, score, &sb->state);
+}
+
+/*
+ * If we're fixing arenas, then editing this memory edits the disk!
+ * It will be written back out as new data is paged in. 
+ */
+uchar buf[4*M];
+uchar sbuf[4*M];
+vlong bufoffset;
+int buflen;
+
+static void pageout(void);
+static uchar*
+pagein(vlong offset, int len)
+{
+	pageout();
+	if(offset >= partend){
+		memset(buf, 0xFB, sizeof buf);
+		return buf;
+	}
+	
+	if(offset+len > partend){
+		memset(buf, 0xFB, sizeof buf);
+		len = partend - offset;
+	}
+	bufoffset = offset;
+	buflen = len;
+	readdisk(buf, offset, len);
+	memmove(sbuf, buf, len);
+	return buf;
+}
+
+static void
+pageout(void)
+{
+	if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
+		buflen = 0;
+		return;
+	}
+	if(writepart(part, bufoffset, buf, buflen) < 0)
+		print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
+			bufoffset, buflen, bufoffset, buflen);
+	buflen = 0;
+}
+
+static void
+zerorange(vlong offset, int len)
+{
+	int i;
+	vlong ooff;
+	int olen;
+	enum { MinBlock = 4*K, MaxBlock = 8*K };
+	
+	if(0)
+	if(bufoffset <= offset && offset+len <= bufoffset+buflen){
+		memset(buf+(offset-bufoffset), 0, len);
+		return;
+	}
+	
+	ooff = bufoffset;
+	olen = buflen;
+	
+	i = offset%MinBlock;
+	if(i+len < MaxBlock){
+		pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
+		memset(buf+i, 0, len);
+	}else{
+		pagein(offset-i, MaxBlock);
+		memset(buf+i, 0, MaxBlock-i);
+		offset += MaxBlock-i;
+		len -= MaxBlock-i;
+		while(len >= MaxBlock){
+			pagein(offset, MaxBlock);
+			memset(buf, 0, MaxBlock);
+			offset += MaxBlock;
+			len -= MaxBlock;
+		}
+		pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
+		memset(buf, 0, len);
+	}
+	pagein(ooff, olen);
+}
+
+/*
+ * read/write integers
+ *
+static void
+p16(uchar *p, u16int u)
+{
+	p[0] = (u>>8) & 0xFF;
+	p[1] = u & 0xFF;
+}
+*/
+
+static u16int
+u16(uchar *p)
+{
+	return (p[0]<<8)|p[1];
+}
+
+static void
+p32(uchar *p, u32int u)
+{
+	p[0] = (u>>24) & 0xFF;
+	p[1] = (u>>16) & 0xFF;
+	p[2] = (u>>8) & 0xFF;
+	p[3] = u & 0xFF;
+}
+
+static u32int
+u32(uchar *p)
+{
+	return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
+}
+
+/*
+static void
+p64(uchar *p, u64int u)
+{
+	p32(p, u>>32);
+	p32(p, u);
+}
+*/
+
+static u64int
+u64(uchar *p)
+{
+	return ((u64int)u32(p)<<32) | u32(p+4);
+}
+
+static int
+vlongcmp(const void *va, const void *vb)
+{
+	vlong a, b;
+	
+	a = *(vlong*)va;
+	b = *(vlong*)vb;
+	if(a < b)
+		return -1;
+	if(b > a)
+		return 1;
+	return 0;
+}
+
+/* D and S are in draw.h */
+#define D VD
+#define S VS
+
+enum
+{
+	D = 0x10000,
+	Z = 0x20000,
+	S = 0x30000,
+	T = 0x40000,
+	N = 0xFFFF
+};
+typedef struct Info Info;
+struct Info
+{
+	int len;
+	char *name;
+};
+
+Info partinfo[] = {
+	4,	"magic",
+	D|4,	"version",
+	Z|4,	"blocksize",
+	4,	"arenabase",
+	0
+};
+
+Info headinfo4[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	Z|4,	"blocksize",
+	Z|8,	"size",
+	0
+};
+
+Info headinfo5[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	Z|4,	"blocksize",
+	Z|8,	"size",
+	4,	"clumpmagic",
+	0
+};
+
+Info tailinfo4[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+	0
+};
+	
+Info tailinfo4a[] = {
+	/* tailinfo 4 */
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+
+	/* mem stats */
+	1,	"extension",
+	D|4,	"mem.clumps",
+	D|4,	"mem.cclumps",
+	D|8,	"mem.used",
+	D|8,	"mem.uncsize",
+	1,	"mem.sealed",
+	0
+};
+	
+Info tailinfo5[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	4,	"clumpmagic",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+	0
+};
+
+Info tailinfo5a[] = {
+	/* tailinfo 5 */
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	4,	"clumpmagic",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+
+	/* mem stats */
+	1,	"extension",
+	D|4,	"mem.clumps",
+	D|4,	"mem.cclumps",
+	D|8,	"mem.used",
+	D|8,	"mem.uncsize",
+	1,	"mem.sealed",
+	0
+};
+	
+void
+showdiffs(uchar *want, uchar *have, int len, Info *info)
+{
+	int n;
+	
+	while(len > 0 && (n=info->len&N) > 0){
+		if(memcmp(have, want, n) != 0){
+			switch(info->len){
+			case 1:
+				print("\t%s: correct=%d disk=%d\n",
+					info->name, *want, *have);
+				break;
+			case 4:
+				print("\t%s: correct=%#ux disk=%#ux\n",
+					info->name, u32(want), u32(have));
+				break;
+			case D|4:
+				print("\t%s: correct=%,ud disk=%,ud\n",
+					info->name, u32(want), u32(have));
+				break;
+			case T|4:
+				print("\t%s: correct=%t\n\t\tdisk=%t\n",
+					info->name, u32(want), u32(have));
+				break;
+			case Z|4:
+				print("\t%s: correct=%z disk=%z\n",
+					info->name, (uvlong)u32(want), (uvlong)u32(have));
+				break;
+			case D|8:
+				print("\t%s: correct=%,lld disk=%,lld\n",
+					info->name, u64(want), u64(have));
+				break;
+			case Z|8:
+				print("\t%s: correct=%z disk=%z\n",
+					info->name, u64(want), u64(have));
+				break;
+			case S|ANameSize:
+				print("\t%s: correct=%s disk=%.*s\n",
+					info->name, (char*)want, 
+					utfnlen((char*)have, ANameSize-1),
+					(char*)have);
+				break;
+			default:
+				print("\t%s: correct=%.*H disk=%.*H\n",
+					info->name, n, want, n, have);
+				break;
+			}
+		}
+		have += n;
+		want += n;
+		len -= n;
+		info++;
+	}
+	if(len > 0 && memcmp(have, want, len) != 0){
+		if(memcmp(want, zero, len) != 0)
+			print("!!\textra want data in showdiffs (bug in fixarenas)\n");
+		else
+			print("\tnon-zero data on disk after structure\n");
+		if(verbose > 1){
+			print("want: %.*H\n", len, want);
+			print("have: %.*H\n", len, have);
+		}
+	}
+}
+
+/*
+ * Does part begin with an arena?
+ */
+int
+isonearena(void)
+{
+	return u32(pagein(0, Block)) == ArenaHeadMagic;
+}
+
+static int tabsizes[] = { 16*1024, 64*1024, 512*1024, };
+/*
+ * Poke around on the disk to guess what the ArenaPart numbers are.
+ */
+void
+guessgeometry(void)
+{
+	int i, j, n, bestn, ndiff, nhead, ntail;
+	uchar *p, *ep, *sp;
+	u64int diff[100], head[20], tail[20];
+	u64int offset, bestdiff;
+	
+	ap.version = ArenaPartVersion;
+
+	if(arenasize == 0 || ap.blocksize == 0){
+		/*
+		 * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
+		 * Instead, look for the individual arena headers and tails, which there
+		 * are many of, and once we've seen enough, infer the spacing.
+		 *
+		 * Of course, nothing in the file format requires that arenas be evenly
+		 * spaced, but fmtarenas always does that for us.
+		 */
+		nhead = 0;
+		ntail = 0;
+		for(offset=PartBlank; offset<partend; offset+=4*M){
+			p = pagein(offset, 4*M);
+			for(sp=p, ep=p+4*M; p<ep; p+=K){
+				if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
+					if(verbose)
+						print("arena head at %#llx\n", offset+(p-sp));
+					head[nhead++] = offset+(p-sp);
+				}
+				if(u32(p) == ArenaMagic && ntail < nelem(tail)){
+					tail[ntail++] = offset+(p-sp);
+					if(verbose)
+						print("arena tail at %#llx\n", offset+(p-sp));
+				}
+			}
+			if(nhead == nelem(head) && ntail == nelem(tail))
+				break;
+		}
+		if(nhead < 3 && ntail < 3)
+			sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
+	
+		/* 
+		 * Arena size is likely the most common
+		 * inter-head or inter-tail spacing.
+		 */
+		ndiff = 0;
+		for(i=1; i<nhead; i++)
+			diff[ndiff++] = head[i] - head[i-1];
+		for(i=1; i<ntail; i++)
+			diff[ndiff++] = tail[i] - tail[i-1];
+		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+		bestn = 0;
+		bestdiff = 0;
+		for(i=1, n=1; i<=ndiff; i++, n++){
+			if(i==ndiff || diff[i] != diff[i-1]){
+				if(n > bestn){
+					bestn = n;
+					bestdiff = diff[i-1];
+				}
+				n = 0;
+			}
+		}
+		print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
+		if(arenasize != 0 && arenasize != bestdiff)
+			print("using user-specified size %z instead\n", arenasize);
+		else
+			arenasize = bestdiff;
+
+		/*
+		 * The arena tail for an arena is arenasize-blocksize from the head.
+		 */
+		ndiff = 0;
+		for(i=j=0; i<nhead && j<ntail; ){
+			if(tail[j] < head[i]){
+				j++;
+				continue;
+			}
+			if(tail[j] < head[i]+arenasize){
+				diff[ndiff++] = head[i]+arenasize - tail[j];
+				j++;
+				continue;
+			}
+			i++;
+		}
+		if(ndiff < 3)
+			sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
+		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+		bestn = 0;
+		bestdiff = 0;
+		for(i=1, n=1; i<=ndiff; i++, n++){
+			if(i==ndiff || diff[i] != diff[i-1]){
+				if(n > bestn){
+					bestn = n;
+					bestdiff = diff[i-1];
+				}
+				n = 0;
+			}
+		}
+		print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
+		if(ap.blocksize != 0 && ap.blocksize != bestdiff)
+			print("using user-specified size %z instead\n", (vlong)ap.blocksize);
+		else
+			ap.blocksize = bestdiff;
+		if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
+			sysfatal("block size not a power of two");
+		if(ap.blocksize > MaxDiskBlock)
+			sysfatal("block size too big (max=%d)", MaxDiskBlock);
+
+		/*
+		 * Use head/tail information to deduce arena base.
+		 */
+		ndiff = 0;
+		for(i=0; i<nhead; i++)
+			diff[ndiff++] = head[i]%arenasize;
+		for(i=0; i<ntail; i++)
+			diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
+		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+		bestn = 0;
+		bestdiff = 0;
+		for(i=1, n=1; i<=ndiff; i++, n++){
+			if(i==ndiff || diff[i] != diff[i-1]){
+				if(n > bestn){
+					bestn = n;
+					bestdiff = diff[i-1];
+				}
+				n = 0;
+			}
+		}
+		ap.arenabase = bestdiff;
+	}
+	
+	ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
+	/*
+	 * XXX pick up table, check arenabase.
+	 * XXX pick up table, record base name.
+	 */
+
+	/*
+	 * Somewhat standard computation.
+	 * Fmtarenas used to use 64k tab, now uses 512k tab.
+	 */
+	if(ap.arenabase == 0){
+		for(i=0; i<nelem(tabsizes); i++){
+			ap.arenabase = (PartBlank+HeadSize+tabsizes[i]+ap.blocksize-1)&~(ap.blocksize-1);
+			p = pagein(ap.arenabase, Block);
+			if(u32(p) == ArenaHeadMagic)
+				break;
+		}
+	}
+	p = pagein(ap.arenabase, Block);
+	print("arena base likely %z%s\n", (vlong)ap.arenabase, 
+		u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
+
+	ap.tabsize = ap.arenabase - ap.tabbase;
+}
+
+/*
+ * Check the arena partition blocks and then the arenas listed in range.
+ */
+void
+checkarenas(char *range)
+{
+	char *s, *t;
+	int i, lo, hi, narena;
+	uchar dbuf[HeadSize];
+	uchar *p;
+
+	guessgeometry();
+
+	partend -= partend%ap.blocksize;
+
+	memset(dbuf, 0, sizeof dbuf);
+	packarenapart(&ap, dbuf);
+	p = pagein(PartBlank, Block);
+	if(memcmp(p, dbuf, HeadSize) != 0){
+		print("on-disk arena part superblock incorrect\n");
+		showdiffs(dbuf, p, HeadSize, partinfo);
+	}
+	memmove(p, dbuf, HeadSize);
+
+	narena = (partend-ap.arenabase + arenasize-1)/arenasize;
+	if(range == nil){
+		for(i=0; i<narena; i++)
+			checkarena(ap.arenabase+(vlong)i*arenasize, i);
+	}else if(strcmp(range, "none") == 0){
+		/* nothing */
+	}else{
+		/* parse, e.g., -4,8-9,10- */
+		for(s=range; *s; s=t){
+			t = strchr(s, ',');
+			if(t)
+				*t++ = 0;
+			else
+				t = s+strlen(s);
+			if(*s == '-')
+				lo = 0;
+			else
+				lo = strtol(s, &s, 0);
+			hi = lo;
+			if(*s == '-'){
+				s++;
+				if(*s == 0)
+					hi = narena-1;
+				else
+					hi = strtol(s, &s, 0);
+			}
+			if(*s != 0){
+				print("bad arena range: %s\n", s);
+				continue;
+			}
+			for(i=lo; i<=hi; i++)
+				checkarena(ap.arenabase+(vlong)i*arenasize, i);
+		}
+	}
+}
+
+/*
+ * Is there a clump here at p?
+ */
+static int
+isclump(uchar *p, Clump *cl, u32int *pmagic)
+{
+	int n;
+	u32int magic;
+	uchar score[VtScoreSize], *bp;
+	Unwhack uw;
+	uchar ubuf[70*1024];
+	
+	bp = p;
+	magic = u32(p);
+	if(magic == 0)
+		return 0;
+	p += U32Size;
+
+	cl->info.type = vtfromdisktype(*p);
+	if(cl->info.type == 0xFF)
+		return 0;
+	p++;
+	cl->info.size = u16(p);
+	p += U16Size;
+	cl->info.uncsize = u16(p);
+	if(cl->info.size > cl->info.uncsize)
+		return 0;
+	p += U16Size;
+	scorecp(cl->info.score, p);
+	p += VtScoreSize;
+	cl->encoding = *p;
+	p++;
+	cl->creator = u32(p);
+	p += U32Size;
+	cl->time = u32(p);
+	p += U32Size;
+
+	switch(cl->encoding){
+	case ClumpENone:
+		if(cl->info.size != cl->info.uncsize)
+			return 0;
+		scoremem(score, p, cl->info.size);
+		if(scorecmp(score, cl->info.score) != 0)
+			return 0;
+		break;
+	case ClumpECompress:
+		if(cl->info.size >= cl->info.uncsize)
+			return 0;
+		unwhackinit(&uw);
+		n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
+		if(n != cl->info.uncsize)
+			return 0;
+		scoremem(score, ubuf, cl->info.uncsize);
+		if(scorecmp(score, cl->info.score) != 0)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+	p += cl->info.size;
+	
+	/* it all worked out in the end */
+	*pmagic = magic;
+	return p - bp;
+}
+
+/*
+ * All ClumpInfos seen in this arena.
+ * Kept in binary tree so we can look up by score.
+ */
+typedef struct Cit Cit;
+struct Cit
+{
+	int left;
+	int right;
+	vlong corrupt;
+	ClumpInfo ci;
+};
+Cit *cibuf;
+int ciroot;
+int ncibuf, mcibuf;
+
+void
+resetcibuf(void)
+{
+	ncibuf = 0;
+	ciroot = -1;
+}
+
+int*
+ltreewalk(int *p, uchar *score)
+{
+	int i;
+	
+	for(;;){
+		if(*p == -1)
+			return p;
+		i = scorecmp(cibuf[*p].ci.score, score);
+		if(i == 0)
+			return p;
+		if(i < 0)
+			p = &cibuf[*p].right;
+		else
+			p = &cibuf[*p].left;
+	}
+}
+
+void
+addcibuf(ClumpInfo *ci, vlong corrupt)
+{
+	Cit *cit;
+	
+	if(ncibuf == mcibuf){
+		mcibuf += 131072;
+		cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
+	}
+	cit = &cibuf[ncibuf];
+	cit->ci = *ci;
+	cit->left = -1;
+	cit->right = -1;
+	cit->corrupt = corrupt;
+	if(!corrupt)
+		*ltreewalk(&ciroot, ci->score) = ncibuf;
+	ncibuf++;
+}
+
+void
+addcicorrupt(vlong len)
+{
+	static ClumpInfo zci;
+	
+	addcibuf(&zci, len);
+}
+
+int
+haveclump(uchar *score)
+{
+	int i;
+	int p;
+	
+	p = ciroot;
+	for(;;){
+		if(p == -1)
+			return 0;
+		i = scorecmp(cibuf[p].ci.score, score);
+		if(i == 0)
+			return 1;
+		if(i < 0)
+			p = cibuf[p].right;
+		else
+			p = cibuf[p].left;
+	}
+}
+
+int
+matchci(ClumpInfo *ci, uchar *p)
+{
+	if(ci->type != vtfromdisktype(p[0]))
+		return 0;
+	if(ci->size != u16(p+1))
+		return 0;
+	if(ci->uncsize != u16(p+3))
+		return 0;
+	if(scorecmp(ci->score, p+5) != 0)
+		return 0;
+	return 1;
+}
+
+int
+sealedarena(uchar *p, int blocksize)
+{
+	int v, n;
+	
+	v = u32(p+4);
+	switch(v){
+	default:
+		return 0;
+	case ArenaVersion4:
+		n = ArenaSize4;
+		break;
+	case ArenaVersion5:
+		n = ArenaSize5;
+		break;
+	}
+	if(p[n-1] != 1){
+		print("arena tail says not sealed\n");
+		return 0;
+	}
+	if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
+		print("arena tail followed by non-zero data\n");
+		return 0;
+	}
+	if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
+		print("arena score zero\n");
+		return 0;
+	}
+	return 1;
+}
+
+int
+okayname(char *name, int n)
+{
+	char buf[20];
+	
+	if(nameok(name) < 0)
+		return 0;
+	sprint(buf, "%d", n);
+	if(n == 0)
+		buf[0] = 0;
+	if(strlen(name) < strlen(buf) 
+	|| strcmp(name+strlen(name)-strlen(buf), buf) != 0)
+		return 0;
+	return 1;
+}
+
+int
+clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
+{
+	if(a->type != b->type)
+		return a->type - b->type;
+	if(a->size != b->size)
+		return a->size - b->size;
+	if(a->uncsize != b->uncsize)
+		return a->uncsize - b->uncsize;
+	return scorecmp(a->score, b->score);
+}
+
+ClumpInfo*
+loadci(vlong offset, Arena *arena, int nci)
+{
+	int i, j, per;
+	uchar *p, *sp;
+	ClumpInfo *bci, *ci;
+	
+	per = arena->blocksize/ClumpInfoSize;
+	bci = vtmalloc(nci*sizeof bci[0]);
+	ci = bci;
+	offset += arena->size - arena->blocksize;
+	p = sp = nil;
+	for(i=0; i<nci; i+=per){
+		if(p == sp){
+			sp = pagein(offset-4*M, 4*M);
+			p = sp+4*M;
+		}
+		p -= arena->blocksize;
+		offset -= arena->blocksize;
+		for(j=0; j<per && i+j<nci; j++)
+			unpackclumpinfo(ci++, p+j*ClumpInfoSize);
+	}
+	return bci;
+}
+
+vlong
+writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
+{
+	int i, j, per;
+	uchar *p, *sp;
+	
+	per = arena->blocksize/ClumpInfoSize;
+	offset += arena->size - arena->blocksize;
+	p = sp = nil;
+	for(i=0; i<nci; i+=per){
+		if(p == sp){
+			sp = pagein(offset-4*M, 4*M);
+			p = sp+4*M;
+		}
+		p -= arena->blocksize;
+		offset -= arena->blocksize;
+		memset(p, 0, arena->blocksize);
+		for(j=0; j<per && i+j<nci; j++)
+			packclumpinfo(ci++, p+j*ClumpInfoSize);
+	}
+	pageout();
+	return offset;
+}
+
+void
+loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
+{
+	char dname[ANameSize];
+	static char lastbase[ANameSize];
+	uchar *p;
+	Arena oarena;
+	ArenaHead ohead;
+
+	/*
+	 * Fmtarenas makes all arenas the same size
+	 * except the last, which may be smaller.
+	 * It uses the same block size for arenas as for
+	 * the arena partition blocks.
+	 */
+	arena->size = arenasize;
+	if(offset0+arena->size > partend)
+		arena->size = partend - offset0;
+	head->size = arena->size;
+	
+	arena->blocksize = ap.blocksize;
+	head->blocksize = arena->blocksize;
+	
+	/* 
+	 * Look for clump magic and name in head/tail blocks.
+	 * All the other info we will reconstruct just in case.
+	 */
+	p = pagein(offset0, arena->blocksize);
+	memset(&ohead, 0, sizeof ohead);
+	if(unpackarenahead(&ohead, p) >= 0){
+		head->version = ohead.version;
+		head->clumpmagic = ohead.clumpmagic;
+		if(okayname(ohead.name, anum))
+			strcpy(head->name, ohead.name);
+	}
+
+	p = pagein(offset0+arena->size-arena->blocksize, 
+		arena->blocksize);
+	memset(&oarena, 0, sizeof oarena);
+	if(unpackarena(&oarena, p) >= 0){
+		arena->version = oarena.version;
+		arena->clumpmagic = oarena.clumpmagic;
+		if(okayname(oarena.name, anum))
+			strcpy(arena->name, oarena.name);
+		arena->diskstats.clumps = oarena.diskstats.clumps;
+print("old arena: sealed=%d\n", oarena.diskstats.sealed);
+		arena->diskstats.sealed = oarena.diskstats.sealed;
+	}
+
+	/* Head trumps arena. */
+	if(head->version){
+		arena->version = head->version;
+		arena->clumpmagic = head->clumpmagic;
+	}
+	if(arena->version == 0)
+		arena->version = ArenaVersion5;
+	if(basename){
+		if(anum == -1)
+			snprint(arena->name, ANameSize, "%s", basename);
+		else
+			snprint(arena->name, ANameSize, "%s%d", basename, anum);
+	}else if(lastbase[0])
+		snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
+	else if(head->name[0])
+		strcpy(arena->name, head->name);
+	else if(arena->name[0] == 0)
+		sysfatal("cannot determine base name for arena; use -n");
+	strcpy(lastbase, arena->name);
+	sprint(dname, "%d", anum);
+	lastbase[strlen(lastbase)-strlen(dname)] = 0;
+	
+	/* Was working in arena, now copy to head. */
+	head->version = arena->version;
+	memmove(head->name, arena->name, sizeof head->name);
+	head->blocksize = arena->blocksize;
+	head->size = arena->size;
+}
+
+void
+shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
+{
+	uchar headbuf[MaxDiskBlock];
+	
+	sb->offset = offset0;
+	memset(headbuf, 0, sizeof headbuf);
+	packarenahead(head, headbuf);
+	sbupdate(sb, headbuf, offset0, head->blocksize);
+}
+
+u32int
+newclumpmagic(int version)
+{
+	u32int m;
+	
+	if(version == ArenaVersion4)
+		return _ClumpMagic;
+	do{
+		m = fastrand();
+	}while(m==0 || m == _ClumpMagic);
+	return m;
+}
+
+/*
+ * Poke around in the arena to find the clump data
+ * and compute the relevant statistics.
+ */
+void
+guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
+	uchar *oldscore, uchar *score)
+{
+	uchar dbuf[MaxDiskBlock];
+	int needtozero, clumps, nb1, nb2, minclumps;
+	int inbad, n, ncib, printed, sealing, smart;
+	u32int magic;
+	uchar *sp, *ep, *p;
+	vlong boffset, eoffset, lastclumpend, leaked;
+	vlong offset, toffset, totalcorrupt, v;
+	Clump cl;
+	ClumpInfo *bci, *ci, *eci, *xci;
+	Cit *bcit, *cit, *ecit;
+	Shabuf oldsha, newsha;
+	
+	/*
+	 * We expect to find an arena, with data, between offset
+	 * and offset+arenasize.  With any luck, the data starts at
+	 * offset+ap.blocksize.  The blocks have variable size and
+	 * aren't padded at all, which doesn't give us any alignment
+	 * constraints.  The blocks are compressed or high entropy,
+	 * but the headers are pretty low entropy (except the score):
+	 *
+	 *	type[1] (range 0 thru 9, 13)
+	 *	size[2]
+	 *	uncsize[2] (<= size)
+	 *
+	 * so we can look for these.  We check the scores as we go,
+	 * so we can't make any wrong turns.  If we find ourselves
+	 * in a dead end, scan forward looking for a new start.
+	 */
+
+	resetcibuf();
+	memset(head, 0, sizeof *head);
+	memset(arena, 0, sizeof *arena);
+	memset(oldscore, 0, VtScoreSize);
+	memset(score, 0, VtScoreSize);
+	memset(&oldsha, 0, sizeof oldsha);
+	memset(&newsha, 0, sizeof newsha);
+	newsha.rollback = 1;
+
+	if(0){
+		sbdebug(&oldsha, "old.sha");
+		sbdebug(&newsha, "new.sha");
+	}
+
+	loadarenabasics(offset0, anum, head, arena);
+
+	/* start the clump hunt */
+	
+	clumps = 0;
+	totalcorrupt = 0;
+	sealing = 1;
+	boffset = offset0 + arena->blocksize;
+	offset = boffset;
+	eoffset = offset0+arena->size - arena->blocksize;
+	toffset = eoffset;
+	sp = pagein(offset0, 4*M);
+
+	if(arena->diskstats.sealed){
+		oldsha.offset = offset0;
+		sbupdate(&oldsha, sp, offset0, 4*M);
+	}
+	ep = sp+4*M;
+	p = sp + (boffset - offset0);
+	ncib = arena->blocksize / ClumpInfoSize;	/* ci per block in index */
+	lastclumpend = offset;
+	nbad = 0;
+	inbad = 0;
+	needtozero = 0;
+	minclumps = 0;
+	while(offset < eoffset){
+		/*
+		 * Shift buffer if we're running out of room.
+		 */
+		if(p+70*K >= ep){
+			/*
+			 * Start the post SHA1 buffer.   By now we should know the
+			 * clumpmagic and arena version, so we can create a
+			 * correct head block to get things going.
+			 */
+			if(sealing && fix && newsha.offset == 0){
+				newsha.offset = offset0;
+				if(arena->clumpmagic == 0){
+					if(arena->version == 0)
+						arena->version = ArenaVersion5;
+					arena->clumpmagic = newclumpmagic(arena->version);
+				}
+				head->clumpmagic = arena->clumpmagic;
+				shahead(&newsha, offset0, head);
+			}
+			n = 4*M-256*K;
+			if(sealing && fix){
+				sbdiskhash(&newsha, bufoffset);
+				sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
+			}
+			pagein(bufoffset+n, 4*M);
+			p -= n;
+			if(arena->diskstats.sealed)
+				sbupdate(&oldsha, buf, bufoffset, 4*M);
+		}
+
+		/*
+		 * Check for a clump at p, which is at offset in the disk.
+		 * Duplicate clumps happen in corrupted disks
+		 * (the same pattern gets written many times in a row)
+		 * and should never happen during regular use.
+		 */
+		magic = 0;
+		if((n = isclump(p, &cl, &magic)) > 0){
+			/*
+			 * If we were in the middle of some corrupted data,
+			 * flush a warning about it and then add any clump
+			 * info blocks as necessary.
+			 */
+			if(inbad){
+				inbad = 0;
+				v = offset-lastclumpend;
+				if(needtozero){
+					zerorange(lastclumpend, v);
+					sbrollback(&newsha, lastclumpend);
+					print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
+						lastclumpend, v, v);
+				}
+				addcicorrupt(v);
+				totalcorrupt += v;
+				nb1 = (minclumps+ncib-1)/ncib;
+				minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
+				nb2 = (minclumps+ncib-1)/ncib;
+				eoffset -= (nb2-nb1)*arena->blocksize;
+			}
+
+			if(haveclump(cl.info.score))
+				print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n);
+
+			/*
+			 * If clumps use different magic numbers, we don't care.
+			 * We'll just use the first one we find and make the others
+			 * follow suit.
+			 */
+			if(arena->clumpmagic == 0){
+				print("clump type %d size %d score %V magic %x\n",
+					cl.info.type, cl.info.size, cl.info.score, magic);
+				arena->clumpmagic = magic;
+				if(magic == _ClumpMagic)
+					arena->version = ArenaVersion4;
+				else
+					arena->version = ArenaVersion5;
+			}
+			if(magic != arena->clumpmagic)
+				p32(p, arena->clumpmagic);
+			if(clumps == 0)
+				arena->ctime = cl.time;
+
+			/*
+			 * Record the clump, update arena stats,
+			 * grow clump info blocks if needed.
+			 */
+			if(verbose > 1)
+				print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", 
+					clumps, cl.info.type, cl.info.score, offset, n, n);
+			addcibuf(&cl.info, 0);
+			if(minclumps%ncib == 0)
+				eoffset -= arena->blocksize;
+			minclumps++;
+			clumps++;
+			if(cl.encoding != ClumpENone)
+				arena->diskstats.cclumps++;
+			arena->diskstats.uncsize += cl.info.uncsize;
+			arena->wtime = cl.time;
+			
+			/*
+			 * Move to next clump.
+			 */
+			offset += n;
+			p += n;
+			lastclumpend = offset;
+		}else{
+			/*
+			 * Overwrite malformed clump data with zeros later.
+			 * For now, just record whether it needs to be overwritten.
+			 * Bad regions must be of size at least ClumpSize.
+			 * Postponing the overwriting keeps us from writing past
+			 * the end of the arena data (which might be directory data)
+			 * with zeros.
+			 */
+			if(!inbad){
+				inbad = 1;
+				needtozero = 0;
+				if(memcmp(p, zero, ClumpSize) != 0)
+					needtozero = 1;
+				p += ClumpSize;
+				offset += ClumpSize;
+				nbad++;
+			}else{
+				if(*p != 0)
+					needtozero = 1;
+				p++;
+				offset++;
+			}
+		}
+	}
+	pageout();
+
+	if(verbose)
+		print("readable clumps: %d; min. directory entries: %d\n", 
+			clumps, minclumps);
+	arena->diskstats.used = lastclumpend - boffset;
+	leaked = eoffset - lastclumpend;
+	if(verbose)
+		print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
+			boffset, lastclumpend, arena->diskstats.used, leaked);
+
+	/*
+	 * Finish the SHA1 of the old data.
+	 */
+	if(arena->diskstats.sealed){
+		sbdiskhash(&oldsha, toffset);
+		readdisk(dbuf, toffset, arena->blocksize);
+		scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
+		sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
+		sbscore(&oldsha, oldscore);
+	}
+	
+	/*
+	 * If we still don't know the clump magic, the arena
+	 * must be empty.  It still needs a value, so make 
+	 * something up.
+	 */
+	if(arena->version == 0)
+		arena->version = ArenaVersion5;
+	if(arena->clumpmagic == 0){
+		if(arena->version == ArenaVersion4)
+			arena->clumpmagic = _ClumpMagic;
+		else{
+			do
+				arena->clumpmagic = fastrand();
+			while(arena->clumpmagic==_ClumpMagic
+				||arena->clumpmagic==0);
+		}
+		head->clumpmagic = arena->clumpmagic;
+	}
+
+	/*
+	 * Guess at number of clumpinfo blocks to load.
+	 * If we guess high, it's no big deal.  If we guess low,
+	 * we'll be forced into rewriting the whole directory.
+	 * Still not such a big deal.
+	 */
+	if(clumps == 0 || arena->diskstats.used == totalcorrupt)
+		goto Nocib;
+	if(clumps < arena->diskstats.clumps)
+		clumps = arena->diskstats.clumps;
+	if(clumps < ncibuf)
+		clumps = ncibuf;
+	clumps += totalcorrupt/
+		((arena->diskstats.used - totalcorrupt)/clumps);
+	clumps += totalcorrupt/2000;
+	if(clumps < minclumps)
+		clumps = minclumps;
+	clumps += ncib-1;
+	clumps -= clumps%ncib;
+
+	/*
+	 * Can't write into the actual data.
+	 */
+	v = offset0 + arena->size - arena->blocksize;
+	v -= (clumps+ncib-1)/ncib * arena->blocksize;
+	if(v < lastclumpend){
+		v = offset0 + arena->size - arena->blocksize;
+		clumps = (v-lastclumpend)/arena->blocksize * ncib;
+	}
+	
+	if(clumps < minclumps)
+		print("cannot happen?\n");
+
+	/*
+	 * Check clumpinfo blocks against directory we created.
+	 * The tricky part is handling the corrupt sections of arena.
+	 * If possible, we remark just the affected directory entries
+	 * rather than slide everything down.
+	 * 
+	 * Allocate clumps+1 blocks and check that we don't need
+	 * the last one at the end.
+	 */
+	bci = loadci(offset0, arena, clumps+1);
+	eci = bci+clumps+1;
+	bcit = cibuf;
+	ecit = cibuf+ncibuf;
+	
+	smart = 1;
+Again:
+	nbad = 0;
+	ci = bci;
+	for(cit=bcit; cit<ecit && ci<eci; cit++){
+		if(cit->corrupt){
+			vlong n, m;
+			if(smart){
+				/*
+				 * If we can, just mark existing entries as corrupt.
+				 */
+				n = cit->corrupt;
+				for(xci=ci; n>0 && xci<eci; xci++)
+					n -= ClumpSize+xci->size;
+				if(n > 0 || xci >= eci)
+					goto Dumb;
+				printed = 0;
+				for(; ci<xci; ci++){
+					if(verbose && ci->type != VtCorruptType){
+						if(!printed){
+							print("marking directory %d-%d as corrupt\n",
+								(int)(ci-bci), (int)(xci-bci));
+							printed = 1;
+						}
+						print("\ttype=%d size=%d uncsize=%d score=%V\n",
+							ci->type, ci->size, ci->uncsize, ci->score);
+					}
+					ci->type = VtCorruptType;
+				}
+			}else{
+			Dumb:
+				print("\trewriting clump directory\n");
+				/*
+				 * Otherwise, blaze a new trail.
+				 */
+				n = cit->corrupt;
+				while(n > 0 && ci < eci){
+					if(n < ClumpSize)
+						sysfatal("bad math in clump corrupt");
+					if(n <= VtMaxLumpSize+ClumpSize)
+						m = n;
+					else{
+						m = VtMaxLumpSize+ClumpSize;
+						if(n-m < ClumpSize)
+							m -= ClumpSize;
+					}
+					ci->type = VtCorruptType;
+					ci->size = m-ClumpSize;
+					ci->uncsize = m-ClumpSize;
+					memset(ci->score, 0, VtScoreSize);
+					ci++;
+					n -= m;
+				}
+			}
+			continue;
+		}
+		if(clumpinfocmp(&cit->ci, ci) != 0){
+			if(verbose && (smart || verbose>1)){
+				print("clumpinfo %d\n", (int)(ci-bci));
+				print("\twant: %d %d %d %V\n", 
+					cit->ci.type, cit->ci.size,
+					cit->ci.uncsize, cit->ci.score);
+				print("\thave: %d %d %d %V\n", 
+					ci->type, ci->size, 
+					ci->uncsize, ci->score);
+			}
+			*ci = cit->ci;
+			nbad++;
+		}
+		ci++;
+	}
+	if(ci >= eci || cit < ecit){
+		print("ran out of space editing existing directory; rewriting\n");
+		print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
+		assert(smart);	/* can't happen second time thru */
+		smart = 0;
+		goto Again;
+	}
+	
+	assert(ci <= eci);
+	arena->diskstats.clumps = ci-bci;
+	eoffset = writeci(offset0, arena, bci, ci-bci);
+	if(sealing && fix)
+		sbrollback(&newsha, v);
+print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
+	if(lastclumpend > eoffset)
+		print("arena directory overwrote blocks!  cannot happen!\n");
+	free(bci);
+	if(smart && nbad)
+		print("arena directory has %d bad or missing entries\n", nbad);
+Nocib:
+	if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
+		if(arena->diskstats.sealed)
+			print("unsealing arena\n");
+		sealing = 0;
+		memset(oldscore, 0, VtScoreSize);
+	}
+
+	/*
+	 * Finish the SHA1 of the new data - only meaningful
+	 * if we've been writing to disk (`fix').
+	 */
+	arena->diskstats.sealed = sealing;
+	arena->memstats = arena->diskstats;
+	if(sealing && fix){
+		uchar tbuf[MaxDiskBlock];
+		
+		sbdiskhash(&newsha, toffset);
+		memset(tbuf, 0, sizeof tbuf);
+		packarena(arena, tbuf);
+		sbupdate(&newsha, tbuf, toffset, arena->blocksize);
+		sbscore(&newsha, score);
+	}
+}
+
+void
+dumparena(vlong offset, int anum, Arena *arena)
+{
+	char buf[1000];
+	vlong o, e;
+	int fd, n;
+	
+	snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
+	if((fd = create(buf, OWRITE, 0666)) < 0){
+		fprint(2, "create %s: %r\n", buf);
+		return;
+	}
+	e = offset+arena->size;
+	for(o=offset; o<e; o+=n){
+		n = 4*M;
+		if(o+n > e)
+			n = e-o;
+		if(pwrite(fd, pagein(o, n), n, o-offset) != n){
+			fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
+			return;
+		}
+	}
+}
+
+void
+checkarena(vlong offset, int anum)
+{
+	uchar dbuf[MaxDiskBlock];
+	uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
+	Arena arena, oarena;
+	ArenaHead head;
+	Info *fmt, *fmta;
+	int sz;
+	
+	print("# arena %d: offset %#llux\n", anum, offset);
+
+	if(offset >= partend){
+		print("arena offset out of bounds\n");
+		return;
+	}
+
+	guessarena(offset, anum, &head, &arena, oldscore, score);
+
+	if(verbose){
+		print("#\tversion=%d name=%s blocksize=%d size=%z",
+			head.version, head.name, head.blocksize, head.size);
+		if(head.clumpmagic)
+			print(" clumpmagic=%#.8ux", head.clumpmagic);
+		print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
+			arena.diskstats.clumps, arena.diskstats.cclumps,
+			arena.diskstats.used, arena.diskstats.uncsize);
+		print("#\tctime=%t\n", arena.ctime);
+		print("#\twtime=%t\n", arena.wtime);
+		if(arena.diskstats.sealed)
+			print("#\tsealed score=%V\n", score);
+	}
+
+	if(dumpbase){
+		dumparena(offset, anum, &arena);
+		return;
+	}
+
+	memset(dbuf, 0, sizeof dbuf);
+	packarenahead(&head, dbuf);
+	p = pagein(offset, arena.blocksize);
+	if(memcmp(dbuf, p, arena.blocksize) != 0){
+		print("on-disk arena header incorrect\n");
+		showdiffs(dbuf, p, arena.blocksize, 
+			arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
+	}
+	memmove(p, dbuf, arena.blocksize);
+	
+	memset(dbuf, 0, sizeof dbuf);
+	packarena(&arena, dbuf);
+	if(arena.diskstats.sealed)
+		scorecp(dbuf+arena.blocksize-VtScoreSize, score);
+	p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
+	memset(&oarena, 0, sizeof oarena);
+	unpackarena(&oarena, p);
+	if(arena.version == ArenaVersion4){
+		sz = ArenaSize4;
+		fmt = tailinfo4;
+		fmta = tailinfo4a;
+	}else{
+		sz = ArenaSize5;
+		fmt = tailinfo5;
+		fmta = tailinfo5a;
+	}
+	if(p[sz] == 1){
+		fmt = fmta;
+		if(oarena.diskstats.sealed){
+			/*
+			 * some arenas were sealed with the extension
+			 * before we adopted the convention that if it didn't
+			 * add new information it gets dropped.
+			 */
+			_packarena(&arena, dbuf, 1);
+		}
+	}
+	if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
+		print("on-disk arena tail incorrect\n");
+		showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
+	}
+	if(arena.diskstats.sealed){
+		if(oarena.diskstats.sealed)
+		if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
+			print("on-disk arena seal score incorrect\n");
+			print("\tcorrect=%V\n", oldscore);
+			print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
+		}
+		if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
+			print("%ssealing arena%s: %V\n", 
+				oarena.diskstats.sealed ? "re" : "",
+				scorecmp(oldscore, score) == 0 ? 
+					"" : " after changes", score);
+		}
+	}
+	memmove(p, dbuf, arena.blocksize);
+	
+	pageout();
+}
+
+AMapN*
+buildamap(void)
+{
+	uchar *p;
+	vlong o;
+	ArenaHead h;
+	AMapN *an;
+	AMap *m;
+	
+	an = vtmallocz(sizeof *an);
+	for(o=ap.arenabase; o<partend; o+=arenasize){
+		p = pagein(o, Block);
+		if(unpackarenahead(&h, p) >= 0){
+			an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
+			m = &an->map[an->n++];
+			m->start = o;
+			m->stop = o+h.size;
+			strcpy(m->name, h.name);
+		}
+	}
+	return an;	
+}
+
+void
+checkmap(void)
+{
+	char *s;
+	uchar *p;
+	int i, len;
+	AMapN *an;
+	Fmt fmt;
+	
+	an = buildamap();
+	fmtstrinit(&fmt);
+	fmtprint(&fmt, "%ud\n", an->n);
+	for(i=0; i<an->n; i++)
+		fmtprint(&fmt, "%s\t%lld\t%lld\n",
+			an->map[i].name, an->map[i].start, an->map[i].stop);
+	s = fmtstrflush(&fmt);
+	len = strlen(s);
+	if(len > ap.tabsize){
+		print("arena partition map too long: need %z bytes have %z\n",
+			(vlong)len, (vlong)ap.tabsize);
+		len = ap.tabsize;
+	}
+	
+	if(ap.tabsize >= 4*M){	/* can't happen - max arenas is 2000 */
+		print("arena partition map *way* too long\n");
+		return;
+	}
+
+	p = pagein(ap.tabbase, ap.tabsize);
+	if(memcmp(p, s, len) != 0){
+		print("arena partition map incorrect; rewriting.\n");
+		memmove(p, s, len);
+	}
+	pageout();
+}
+
+int mainstacksize = 512*1024;
+
+void
+threadmain(int argc, char **argv)
+{
+	int mode;
+	
+	mode = OREAD;
+	readonly = 1;	
+	ARGBEGIN{
+	case 'U':
+		unseal = 1;
+		break;
+	case 'a':
+		arenasize = unittoull(EARGF(usage()));
+		break;
+	case 'b':
+		ap.blocksize = unittoull(EARGF(usage()));
+		break;
+	case 'f':
+		fix = 1;
+		mode = ORDWR;
+		readonly = 0;
+		break;
+	case 'n':
+		basename = EARGF(usage());
+		break;
+	case 'v':
+		verbose++;
+		break;
+	case 'x':
+		dumpbase = EARGF(usage());
+		break;
+	default:
+		usage();
+	}ARGEND
+	
+	if(argc != 1 && argc != 2)
+		usage();
+
+	file = argv[0];
+	
+	ventifmtinstall();
+	fmtinstall('z', zfmt);
+	fmtinstall('t', tfmt);
+	quotefmtinstall();
+	
+	part = initpart(file, mode|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open %s: %r", file);
+	partend = part->size;
+	
+	if(isonearena()){
+		checkarena(0, -1);
+		threadexitsall(nil);
+	}
+	checkarenas(argc > 1 ? argv[1] : nil);
+	checkmap();
+	threadexitsall(nil);
+}
+

+ 1660 - 0
sys/src/cmd/venti/srv/fixarenas0.c

@@ -0,0 +1,1660 @@
+/*
+ * Check and fix an arena partition.
+ *
+ * This is a lot grittier than the rest of Venti because
+ * it can't just give up if a byte here or there is wrong.
+ *
+ * The goal here (hopefully met!) is that block corruption
+ * only ever has a local effect -- there are no blocks that
+ * you can wipe out that will cause large portions of 
+ * uncorrupted data blocks to be useless.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+#pragma varargck type "z" uvlong
+#pragma varargck type "z" vlong
+#pragma varargck type "t" uint
+
+enum
+{
+	K = 1024,
+	M = 1024*1024,
+	G = 1024*1024*1024,
+	
+	Block = 4096,
+};
+
+int verbose;
+Part *part;
+char *file;
+char *basename;
+int fix;
+int badreads;
+uchar zero[MaxDiskBlock];
+
+Arena lastarena;
+ArenaPart ap;
+uvlong arenasize;
+int nbadread;
+int nbad;
+void checkarena(vlong, int);
+
+void
+usage(void)
+{
+	fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
+	threadexitsall(0);
+}
+
+/*
+ * Format number in simplest way that is okay with unittoull.
+ */
+static int
+zfmt(Fmt *fmt)
+{
+	vlong x;
+	
+	x = va_arg(fmt->args, vlong);
+	if(x == 0)
+		return fmtstrcpy(fmt, "0");
+	if(x%G == 0)
+		return fmtprint(fmt, "%lldG", x/G);
+	if(x%M == 0)
+		return fmtprint(fmt, "%lldM", x/M);
+	if(x%K == 0)
+		return fmtprint(fmt, "%lldK", x/K);
+	return fmtprint(fmt, "%lld", x);
+}
+
+/*
+ * Format time like ctime without newline.
+ */
+static int
+tfmt(Fmt *fmt)
+{
+	uint t;
+	char buf[30];
+	
+	t = va_arg(fmt->args, uint);
+	strcpy(buf, ctime(t));
+	buf[28] = 0;
+	return fmtstrcpy(fmt, buf);
+}
+
+/*
+ * Coalesce messages about unreadable sectors into larger ranges.
+ * bad(0, 0) flushes the buffer.
+ */
+static void
+bad(char *msg, vlong o, int len)
+{
+	static vlong lb0, lb1;
+	static char *lmsg;
+
+	if(msg == nil)
+		msg = lmsg;
+	if(o == -1){
+		lmsg = nil;
+		lb0 = 0;
+		lb1 = 0;
+		return;
+	}
+	if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
+		if(lb0 != lb1)
+			fprint(2, "%s %#llux+%#llux (%,lld+%,lld)\n",
+				lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
+		lb0 = o;
+	}
+	lmsg = msg;
+	lb1 = o+len;
+}
+
+/*
+ * Read in the len bytes of data at the offset.  If can't for whatever reason,
+ * fill it with garbage but print an error.
+ */
+static uchar*
+readdisk(uchar *buf, vlong offset, int len)
+{
+	int i, j, k, n;
+
+	if(offset >= part->size){
+		memset(buf, 0xFB, sizeof buf);
+		return buf;
+	}
+	
+	if(offset+len > part->size){
+		memset(buf, 0xFB, sizeof buf);
+		len = part->size - offset;
+	}
+
+	if(readpart(part, offset, buf, len) >= 0)
+		return buf;
+	
+	/*
+	 * The read failed.  Clear the buffer to nonsense, and
+	 * then try reading in smaller pieces.  If that fails,
+	 * read in even smaller pieces.  And so on down to sectors.
+	 */
+	memset(buf, 0xFD, len);
+	for(i=0; i<len; i+=64*K){
+		n = 64*K;
+		if(i+n > len)
+			n = len-i;
+		if(readpart(part, offset+i, buf+i, n) >= 0)
+			continue;
+		for(j=i; j<len && j<i+64*K; j+=4*K){
+			n = 4*K;
+			if(j+n > len)
+				n = len-j;
+			if(readpart(part, offset+j, buf+j, n) >= 0)
+				continue;
+			for(k=j; k<len && k<j+4*K; k+=512){
+				if(readpart(part, offset+k, buf+k, 512) >= 0)
+					continue;
+				bad("disk read failed at", k, 512);
+				badreads++;
+			}
+		}
+	}
+	bad(nil, 0, 0);
+	return buf;
+}
+
+/*
+ * Buffers to support running SHA1 hashes of the
+ * pre-edit and post-edit disk.
+ */
+typedef struct Shabuf Shabuf;
+struct Shabuf
+{
+	vlong offset0;
+	vlong preoffset;
+	vlong postoffset;
+	DigestState pre;
+	DigestState snap[10000];	/* enough for 10G arena! */
+	DigestState post;
+	int nds;
+};
+
+Shabuf shabuf;
+
+void
+sbrollback(Shabuf *sb, vlong offset)
+{
+	int x;
+	vlong o;
+	
+	/* roll back to M boundary before or at offset */
+	o = offset - sb->offset0;
+	o -= o%M;
+	x = o/M;
+	if((vlong)x*M != o){
+		print("bad math in shaupdate1\n");
+		sb->offset0 = 0;
+		return;
+	}
+	if(x >= nelem(sb->snap)){
+		print("arena way too big: >10G ???; no sha1\n");
+		sb->offset0 = 0;
+		return;
+	}
+	sb->post = sb->snap[x];
+	sb->postoffset = sb->offset0 + o;
+	assert(sb->postoffset <= offset);
+	assert((sb->postoffset - sb->offset0)%M == 0);
+}
+
+void
+sbposthash(Shabuf *sb, uchar *p, int len)
+{
+	int o, n, x;
+	
+	o = (sb->postoffset - sb->offset0)%M;
+	for(; len > 0; p += n, len -= n){
+		n = M-o;
+		o = 0;
+		if(n > len)
+			n = len;
+		sha1(p, n, nil, &sb->post);
+		sb->postoffset += n;
+		if((sb->postoffset - sb->offset0)%M == 0){
+			/* (test may not be true in last iteration) */
+			x = (sb->postoffset - sb->offset0)/M;
+			assert(x < nelem(sb->snap));
+			sb->snap[x] = sb->post;
+		}
+	}
+}
+
+void
+sbrollforward(Shabuf *sb, vlong offset)
+{
+	int len;
+	static uchar xbuf[M];
+	
+	assert((sb->postoffset - sb->offset0)%M == 0);
+	while(sb->postoffset < offset){
+		readdisk(xbuf, sb->postoffset, M);
+		len = M;
+		if(sb->postoffset+len > offset)
+			len = offset - sb->postoffset;
+		sbposthash(sb, xbuf, len);
+	}
+	assert(sb->postoffset == offset);
+}
+
+void
+shaupdate1(Shabuf *sb, uchar *p, vlong offset, int len)
+{
+	int x;
+
+	if(sb->offset0 == 0)
+		return;
+	assert(sb->preoffset >= offset);
+	
+	/*
+	 * Update pre-edit digest.
+	 */
+	if(sb->preoffset < offset+len){
+		x = sb->preoffset - offset;
+		sha1(p+x, len-x, nil, &sb->pre);
+		sb->preoffset += len-x;
+		assert(sb->preoffset == offset+len);
+	}
+	
+	/*
+	 * Update post-edit digest.
+	 */
+	if(offset < sb->postoffset)
+		sbrollback(sb, offset);
+	if(offset > sb->postoffset){
+		sbrollback(sb, sb->postoffset);
+		sbrollforward(sb, offset);
+	}
+	assert(offset == sb->postoffset);
+	sbposthash(sb, p, len);
+}
+		
+void
+shafromdisk(Shabuf *sb, vlong offset)
+{
+	vlong o;
+	int off, n;
+	static uchar xbuf[4*M];
+	
+	if(sb->offset0 == 0)
+		return;
+
+	off = sb->preoffset%M;
+	o = sb->preoffset - off;
+	for(; o < offset; o+=n){
+		n = 4*M;
+		if(o+n > offset)
+			n = ((offset-o)+Block-1)&~(Block-1);
+		readdisk(xbuf, o, n);
+		shaupdate1(sb, xbuf+off, o+off, n-off);
+		off = 0;
+	}
+	if(sb->offset0 == 0)	/* error happened */
+		return;
+	assert(sb->preoffset >= offset);
+	assert(sb->postoffset >= offset);
+}
+
+void
+shaupdate(Shabuf *sb, uchar *p, vlong offset, int len)
+{	
+	if(sb->offset0 == 0)	/* not started yet */
+		return;
+
+	if(sb->preoffset < offset)
+		shafromdisk(sb, offset);
+
+	if(sb->offset0 == 0)	/* error happened */
+		return;
+	assert(sb->preoffset >= offset);
+	assert(sb->postoffset >= offset);
+	
+	shaupdate1(sb, p, offset, len);
+	assert(sb->preoffset >= offset+len);
+	assert(sb->postoffset >= offset+len);
+}
+
+/*
+ * If we're fixing arenas, then editing this memory edits the disk!
+ * It will be written back out as new data is paged in.  Also update
+ * the sha buffer as we see data go by.
+ */
+uchar buf[4*M];
+uchar sbuf[4*M];
+vlong bufoffset;
+int buflen;
+
+static void pageout(void);
+static uchar*
+pagein(vlong offset, int len)
+{
+	pageout();
+	if(offset >= part->size){
+		memset(buf, 0xFB, sizeof buf);
+		return buf;
+	}
+	
+	if(offset+len > part->size){
+		memset(buf, 0xFB, sizeof buf);
+		len = part->size - offset;
+	}
+	bufoffset = offset;
+	buflen = len;
+	readdisk(buf, offset, len);
+	memmove(sbuf, buf, len);
+	shaupdate(&shabuf, buf, offset, len);
+	return buf;
+}
+
+static void
+pageout(void)
+{
+	shaupdate(&shabuf, buf, bufoffset, buflen);
+	if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
+		buflen = 0;
+		return;
+	}
+	if(writepart(part, bufoffset, buf, buflen) < 0)
+		fprint(2, "disk write failed at %#llux+%#ux (%,lld+%,d)\n",
+			bufoffset, buflen, bufoffset, buflen);
+	buflen = 0;
+}
+
+static void
+zerorange(vlong offset, int len)
+{
+	int i;
+	vlong ooff;
+	int olen;
+	enum { MinBlock = 4*K, MaxBlock = 8*K };
+	
+	if(0)
+	if(bufoffset <= offset && offset+len <= bufoffset+buflen){
+		memset(buf+(offset-bufoffset), 0, len);
+		return;
+	}
+	
+	ooff = bufoffset;
+	olen = buflen;
+	
+	i = offset%MinBlock;
+	if(i+len < MaxBlock){
+		pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
+		memset(buf+i, 0, len);
+	}else{
+		pagein(offset-i, MaxBlock);
+		memset(buf+i, 0, MaxBlock-i);
+		offset += MaxBlock-i;
+		len -= MaxBlock-i;
+		while(len >= MaxBlock){
+			pagein(offset, MaxBlock);
+			memset(buf, 0, MaxBlock);
+			offset += MaxBlock;
+			len -= MaxBlock;
+		}
+		pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
+		memset(buf, 0, len);
+	}
+	pagein(ooff, olen);
+}
+
+/*
+ * read/write integers
+ */
+static void
+p16(uchar *p, u16int u)
+{
+	p[0] = (u>>8) & 0xFF;
+	p[1] = u & 0xFF;
+}
+
+static u16int
+u16(uchar *p)
+{
+	return (p[0]<<8)|p[1];
+}
+
+static void
+p32(uchar *p, u32int u)
+{
+	p[0] = (u>>24) & 0xFF;
+	p[1] = (u>>16) & 0xFF;
+	p[2] = (u>>8) & 0xFF;
+	p[3] = u & 0xFF;
+}
+
+static u32int
+u32(uchar *p)
+{
+	return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
+}
+
+static void
+p64(uchar *p, u64int u)
+{
+	p32(p, u>>32);
+	p32(p, u);
+}
+
+static u64int
+u64(uchar *p)
+{
+	return ((u64int)u32(p)<<32) | u32(p+4);
+}
+
+static int
+vlongcmp(const void *va, const void *vb)
+{
+	vlong a, b;
+	
+	a = *(vlong*)va;
+	b = *(vlong*)vb;
+	if(a < b)
+		return -1;
+	if(b > a)
+		return 1;
+	return 0;
+}
+
+/* D and S are in draw.h */
+#define D VD
+#define S VS
+
+enum
+{
+	D = 0x10000,
+	Z = 0x20000,
+	S = 0x30000,
+	T = 0x40000,
+	N = 0xFFFF
+};
+typedef struct Info Info;
+struct Info
+{
+	int len;
+	char *name;
+};
+
+Info partinfo[] = {
+	4,	"magic",
+	D|4,	"version",
+	Z|4,	"blocksize",
+	4,	"arenabase",
+	0
+};
+
+Info headinfo4[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	Z|4,	"blocksize",
+	Z|8,	"size",
+	0
+};
+
+Info headinfo5[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	Z|4,	"blocksize",
+	Z|8,	"size",
+	4,	"clumpmagic",
+	0
+};
+
+Info tailinfo4[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+	0
+};
+	
+Info tailinfo5[] = {
+	4,	"magic",
+	D|4,	"version",
+	S|ANameSize,	"name",
+	D|4,	"clumps",
+	D|4,	"cclumps",
+	T|4,	"ctime",
+	T|4,	"wtime",
+	4,	"clumpmagic",
+	D|8,	"used",
+	D|8,	"uncsize",
+	1,	"sealed",
+	0
+};
+
+void
+showdiffs(uchar *want, uchar *have, int len, Info *info)
+{
+	int n;
+	
+	while(len > 0 && (n=info->len&N) > 0){
+		if(memcmp(have, want, n) != 0){
+			switch(info->len){
+			case 1:
+				print("\t%s: correct=%d disk=%d\n",
+					info->name, *want, *have);
+				break;
+			case 4:
+				print("\t%s: correct=%#ux disk=%#ux\n",
+					info->name, u32(want), u32(have));
+				break;
+			case D|4:
+				print("\t%s: correct=%,ud disk=%,ud\n",
+					info->name, u32(want), u32(have));
+				break;
+			case T|4:
+				print("\t%s: correct=%t\n\t\tdisk=%t\n",
+					info->name, u32(want), u32(have));
+				break;
+			case Z|4:
+				print("\t%s: correct=%z disk=%z\n",
+					info->name, (uvlong)u32(want), (uvlong)u32(have));
+				break;
+			case D|8:
+				print("\t%s: correct=%,lld disk=%,lld\n",
+					info->name, u64(want), u64(have));
+				break;
+			case Z|8:
+				print("\t%s: correct=%z disk=%z\n",
+					info->name, u64(want), u64(have));
+				break;
+			case S|ANameSize:
+				print("\t%s: correct=%s disk=%.*s\n",
+					info->name, (char*)want, 
+					utfnlen((char*)have, ANameSize-1),
+					(char*)have);
+			default:
+				print("\t%s: correct=%.*H disk=%.*H\n",
+					info->name, n, want, n, have);
+				break;
+			}
+		}
+		have += n;
+		want += n;
+		len -= n;
+		info++;
+	}
+	if(len > 0 && memcmp(have, want, len) != 0){
+		if(memcmp(want, zero, len) != 0)
+			print("!!\textra want data in showdiffs (bug in fixarenas)\n");
+		else
+			print("\tnon-zero data on disk after structure\n");
+		if(verbose){
+			print("want: %.*H\n", len, want);
+			print("have: %.*H\n", len, have);
+		}
+	}
+}
+
+/*
+ * Poke around on the disk to guess what the ArenaPart numbers are.
+ */
+void
+guessgeometry(void)
+{
+	int i, j, n, bestn, ndiff, nhead, ntail;
+	uchar *p, *ep, *sp;
+	u64int diff[100], head[3], tail[3];
+	u64int offset, bestdiff;
+	
+	ap.version = ArenaPartVersion;
+
+	if(arenasize == 0 || ap.blocksize == 0){
+		/*
+		 * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
+		 * Instead, look for the individual arena headers and tails, which there
+		 * are many of, and once we've seen enough, infer the spacing.
+		 *
+		 * Of course, nothing in the file format requires that arenas be evenly
+		 * spaced, but fmtarenas always does that for us.
+		 */
+		nhead = 0;
+		ntail = 0;
+		for(offset=PartBlank; offset<part->size; offset+=4*M){
+			p = pagein(offset, 4*M);
+			for(sp=p, ep=p+4*M; p<ep; p+=K){
+				if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
+					if(verbose)
+						fprint(2, "arena head at %#llx\n", offset+(p-sp));
+					head[nhead++] = offset+(p-sp);
+				}
+				if(u32(p) == ArenaMagic && ntail < nelem(tail)){
+					tail[ntail++] = offset+(p-sp);
+					if(verbose)
+						fprint(2, "arena tail at %#llx\n", offset+(p-sp));
+				}
+			}
+			if(nhead == nelem(head) && ntail == nelem(tail))
+				break;
+		}
+		if(nhead < 3 && ntail < 3)
+			sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
+	
+		/* 
+		 * Arena size is likely the most common
+		 * inter-head or inter-tail spacing.
+		 */
+		ndiff = 0;
+		for(i=1; i<nhead; i++)
+			diff[ndiff++] = head[i] - head[i-1];
+		for(i=1; i<ntail; i++)
+			diff[ndiff++] = tail[i] - tail[i-1];
+		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+		bestn = 0;
+		bestdiff = 0;
+		for(i=1, n=1; i<=ndiff; i++, n++){
+			if(i==ndiff || diff[i] != diff[i-1]){
+				if(n > bestn){
+					bestn = n;
+					bestdiff = diff[i-1];
+				}
+				n = 0;
+			}
+		}
+		fprint(2, "arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
+		if(arenasize != 0 && arenasize != bestdiff)
+			fprint(2, "using user-specified size %z instead\n", arenasize);
+		else
+			arenasize = bestdiff;
+
+		/*
+		 * The arena tail for an arena is arenasize-blocksize from the head.
+		 */
+		ndiff = 0;
+		for(i=j=0; i<nhead && j<ntail; ){
+			if(head[i] > tail[j]){
+				j++;
+				continue;
+			}
+			if(head[i]+arenasize > tail[j]){
+				diff[ndiff++] = head[i]+arenasize - tail[j];
+				continue;
+			}
+		}
+		if(ndiff < 3)
+			sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
+		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+		bestn = 0;
+		bestdiff = 0;
+		for(i=1, n=1; i<=ndiff; i++, n++){
+			if(i==ndiff || diff[i] != diff[i-1]){
+				if(n > bestn){
+					bestn = n;
+					bestdiff = diff[i-1];
+				}
+				n = 0;
+			}
+		}
+		fprint(2, "block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
+		if(ap.blocksize != 0 && ap.blocksize != bestdiff)
+			fprint(2, "using user-specified size %z instead\n", (vlong)ap.blocksize);
+		else
+			ap.blocksize = bestdiff;
+		if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
+			sysfatal("block size not a power of two");
+		if(ap.blocksize > MaxDiskBlock)
+			sysfatal("block size too big (max=%d)", MaxDiskBlock);
+	}
+	
+	/* standard computation - fmtarenas always uses tabsize==512k */
+	ap.arenabase = (PartBlank+HeadSize+512*K+ap.blocksize-1)&~(ap.blocksize-1);
+	p = pagein(ap.arenabase, Block);
+	fprint(2, "arena base likely %z%s\n", (vlong)ap.arenabase, u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
+}
+
+/*
+ * Check the arena partition blocks and then the arenas listed in range.
+ */
+void
+checkarenas(char *range)
+{
+	char *s, *t;
+	int i, lo, hi, narena;
+	uchar dbuf[HeadSize];
+	uchar *p;
+
+	guessgeometry();
+
+	memset(dbuf, 0, sizeof dbuf);
+	packarenapart(&ap, dbuf);
+	p = pagein(PartBlank, Block);
+	if(memcmp(p, dbuf, HeadSize) != 0){
+		print("on-disk arena part superblock incorrect\n");
+		showdiffs(dbuf, p, HeadSize, partinfo);
+	}
+	memmove(p, dbuf, HeadSize);
+
+	narena = (part->size-ap.arenabase + arenasize-1)/arenasize;
+	if(range == nil){
+		for(i=0; i<narena; i++)
+			checkarena(ap.arenabase+(vlong)i*arenasize, i);
+	}else{
+		/* parse, e.g., -4,8-9,10- */
+		for(s=range; *s; s=t){
+			t = strchr(s, ',');
+			if(t)
+				*t++ = 0;
+			else
+				t = s+strlen(s);
+			if(*s == '-')
+				lo = 0;
+			else
+				lo = strtol(s, &s, 0);
+			hi = lo;
+			if(*s == '-'){
+				s++;
+				if(*s == 0)
+					hi = narena-1;
+				else
+					hi = strtol(s, &s, 0);
+			}
+			if(*s != 0){
+				fprint(2, "bad range: %s\n", s);
+				continue;
+			}
+			for(i=lo; i<=hi; i++)
+				checkarena(ap.arenabase+(vlong)i*arenasize, i);
+		}
+	}
+}
+
+/*
+ * Is there a clump here at p?
+ */
+static int
+isclump(uchar *p, Clump *cl, u32int *pmagic)
+{
+	int n;
+	u32int magic;
+	uchar score[VtScoreSize], *bp;
+	Unwhack uw;
+	uchar ubuf[70*1024];
+	
+	bp = p;
+	magic = u32(p);
+	if(magic == 0)
+		return 0;
+	p += U32Size;
+
+	cl->info.type = vtfromdisktype(*p);
+	if(cl->info.type == 0xFF)
+		return 0;
+	p++;
+	cl->info.size = u16(p);
+	p += U16Size;
+	cl->info.uncsize = u16(p);
+	if(cl->info.size > cl->info.uncsize)
+		return 0;
+	p += U16Size;
+	scorecp(cl->info.score, p);
+	p += VtScoreSize;
+	cl->encoding = *p;
+	p++;
+	cl->creator = u32(p);
+	p += U32Size;
+	cl->time = u32(p);
+	p += U32Size;
+
+	switch(cl->encoding){
+	case ClumpENone:
+		if(cl->info.size != cl->info.uncsize)
+			return 0;
+		scoremem(score, p, cl->info.size);
+		if(scorecmp(score, cl->info.score) != 0)
+			return 0;
+		break;
+	case ClumpECompress:
+		if(cl->info.size >= cl->info.uncsize)
+			return 0;
+		unwhackinit(&uw);
+		n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
+		if(n != cl->info.uncsize)
+			return 0;
+		scoremem(score, ubuf, cl->info.uncsize);
+		if(scorecmp(score, cl->info.score) != 0)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+	p += cl->info.size;
+	
+	/* it all worked out in the end */
+	*pmagic = magic;
+	return p - bp;
+}
+
+/*
+ * All ClumpInfos seen in this arena.
+ * Kept in binary tree so we can look up by score.
+ */
+typedef struct Cit Cit;
+struct Cit
+{
+	Cit *left;
+	Cit *right;
+	vlong corrupt;
+	ClumpInfo ci;
+};
+Cit *cibuf;
+Cit *ciroot;
+int ncibuf, mcibuf;
+
+void
+resetcibuf(void)
+{
+	ncibuf = 0;
+	ciroot = nil;
+}
+
+Cit**
+ltreewalk(Cit **l, uchar *score)
+{
+	int i;
+	
+	for(;;){
+		if(*l == nil)
+			return l;
+		i = scorecmp((*l)->ci.score, score);
+		if(i < 0)
+			l = &(*l)->right;
+		else 
+			l = &(*l)->left;
+	}
+	return nil; 	/* stupid 8c */
+}
+
+void
+addcibuf(ClumpInfo *ci, vlong corrupt)
+{
+	Cit *cit;
+	
+	if(ncibuf == mcibuf){
+		mcibuf += 256;
+		cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
+	}
+	cit = &cibuf[ncibuf++];
+	cit->ci = *ci;
+	cit->left = nil;
+	cit->right = nil;
+	cit->corrupt = corrupt;
+	if(!corrupt)
+		*ltreewalk(&ciroot, ci->score) = cit;
+}
+
+void
+addcicorrupt(vlong len)
+{
+	static ClumpInfo zci;
+	
+	addcibuf(&zci, len);
+}
+
+int
+haveclump(uchar *score)
+{
+	int i;
+	Cit *t;
+	
+	t = ciroot;
+	for(;;){
+		if(t == nil)
+			return 0;
+		i = scorecmp(t->ci.score, score);
+		if(i == 0)
+			return 1;
+		if(i < 0)
+			t = t->right;
+		else
+			t = t->left;
+	}
+	return 0;	/* stupid 8c */
+}
+
+int
+matchci(ClumpInfo *ci, uchar *p)
+{
+	if(ci->type != vtfromdisktype(p[0]))
+		return 0;
+	if(ci->size != u16(p+1))
+		return 0;
+	if(ci->uncsize != u16(p+3))
+		return 0;
+	if(scorecmp(ci->score, p+5) != 0)
+		return 0;
+	return 1;
+}
+
+/* XXX */
+int
+sealedarena(uchar *p, int blocksize)
+{
+	int v, n;
+	
+	v = u32(p+4);
+	switch(v){
+	default:
+		return 0;
+	case ArenaVersion4:
+		n = ArenaSize4;
+		break;
+	case ArenaVersion5:
+		n = ArenaSize5;
+		break;
+	}
+	if(p[n-1] != 1){
+		print("arena tail says not sealed\n");
+		return 0;
+	}
+	if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
+		print("arena tail followed by non-zero data\n");
+		return 0;
+	}
+	if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
+		print("arena score zero\n");
+		return 0;
+	}
+	return 1;
+}
+
+int
+okayname(char *name, int n)
+{
+	char buf[20];
+	
+	if(nameok(name) < 0)
+		return 0;
+	sprint(buf, "%d", n);
+	if(strlen(name) < strlen(buf) 
+	|| strcmp(name+strlen(name)-strlen(buf), buf) != 0)
+		return 0;
+	return 1;
+}
+
+int
+clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
+{
+	if(a->type != b->type)
+		return a->type - b->type;
+	if(a->size != b->size)
+		return a->size - b->size;
+	if(a->uncsize != b->uncsize)
+		return a->uncsize - b->uncsize;
+	return scorecmp(a->score, b->score);
+}
+
+ClumpInfo*
+loadci(vlong offset, Arena *arena, int nci)
+{
+	int i, j, per;
+	uchar *p, *sp;
+	ClumpInfo *bci, *ci;
+	
+	per = arena->blocksize/ClumpInfoSize;
+	bci = vtmalloc(nci*sizeof bci[0]);
+	ci = bci;
+	offset += arena->size - arena->blocksize;
+	p = sp = nil;
+	for(i=0; i<nci; i+=per){
+		if(p == sp){
+			sp = pagein(offset-4*M, 4*M);
+			p = sp+4*M;
+		}
+		p -= arena->blocksize;
+		offset -= arena->blocksize;
+		for(j=0; j<per && i+j<nci; j++)
+			unpackclumpinfo(ci++, p+j*ClumpInfoSize);
+	}
+	return bci;
+}
+
+vlong
+writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
+{
+	int i, j, per;
+	uchar *p, *sp;
+	
+	per = arena->blocksize/ClumpInfoSize;
+	offset += arena->size - arena->blocksize;
+	p = sp = nil;
+	for(i=0; i<nci; i+=per){
+		if(p == sp){
+			sp = pagein(offset-4*M, 4*M);
+			p = sp+4*M;
+		}
+		p -= arena->blocksize;
+		offset -= arena->blocksize;
+		memset(p, 0, arena->blocksize);
+		for(j=0; j<per && i+j<nci; j++)
+			packclumpinfo(ci++, p+j*ClumpInfoSize);
+	}
+	return offset;
+}
+
+void
+loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
+{
+	char dname[ANameSize];
+	static char lastbase[ANameSize];
+	uchar *p;
+	u32int x;
+
+	/*
+	 * Fmtarenas makes all arenas the same size
+	 * except the last, which may be smaller.
+	 * It uses the same block size for arenas as for
+	 * the arena partition blocks.
+	 */
+	arena->size = arenasize;
+	if(offset0+arena->size > part->size)
+		arena->size = part->size - offset0;
+	head->size = arena->size;
+	
+	arena->blocksize = ap.blocksize;
+	head->blocksize = arena->blocksize;
+	
+	/* 
+	 * Look for clump magic and name in head/tail blocks.
+	 * All the other info we will reconstruct just in case.
+	 */
+	p = pagein(offset0, arena->blocksize);
+	if(u32(p) == ArenaHeadMagic)
+	if((x=u32(p+4)) == ArenaVersion4 || x == ArenaVersion5){
+		head->version = x;
+		if(x == ArenaVersion4)
+			head->clumpmagic = _ClumpMagic;
+		else
+			head->clumpmagic = 
+				u32(p+2*U32Size+ANameSize+U32Size+U64Size);
+		memmove(dname, p+2*U32Size, ANameSize);
+		dname[ANameSize-1] = 0;
+		if(okayname(dname, anum))
+			strcpy(head->name, dname);
+	}
+
+	p = pagein(offset0+arena->size-arena->blocksize, 
+		arena->blocksize);
+
+	if(u32(p)==ArenaMagic)
+	if((x=u32(p+4))==ArenaVersion4 || x==ArenaVersion5){
+		arena->version = x;
+		if(x == ArenaVersion4)
+			arena->clumpmagic = _ClumpMagic;
+		else
+			arena->clumpmagic = 
+				u32(p+2*U32Size+ANameSize+4*U32Size);
+		memmove(dname, p+2*U32Size, ANameSize);
+		dname[ANameSize-1] = 0;
+		if(okayname(dname, anum))
+			strcpy(arena->name, dname);
+		arena->diskstats.clumps = u32(p+2*U32Size+ANameSize);
+	}
+
+	/* Head trumps arena. */
+	if(head->version){
+		arena->version = head->version;
+		arena->clumpmagic = head->clumpmagic;
+	}
+	if(arena->version == 0)
+		arena->version = ArenaVersion5;
+	if(basename)
+		snprint(arena->name, ANameSize, "%s%d", basename, anum);
+	else if(lastbase[0])
+		snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
+	else if(head->name[0])
+		strcpy(arena->name, head->name);
+	else if(arena->name[0] == 0)
+		sysfatal("cannot determine base name for arena; use -n");
+	strcpy(lastbase, arena->name);
+	sprint(dname, "%d", anum);
+	lastbase[strlen(lastbase)-strlen(dname)] = 0;
+	
+	/* Was working in arena, now copy to head. */
+	head->version = arena->version;
+	memmove(head->name, arena->name, sizeof head->name);
+	head->blocksize = arena->blocksize;
+	head->size = arena->size;
+}
+
+
+/*
+ * Poke around in the arena to find the clump data
+ * and compute the relevant statistics.
+ */
+void
+guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
+	uchar *oldscore, uchar *score)
+{
+	static char lastbase[ANameSize];
+	uchar headbuf[MaxDiskBlock];
+	int needtozero, clumps, nb1, nb2, minclumps;
+	int diff, inbad, n, ncib, printed, sealing, smart;
+	u32int magic;
+	uchar *sp, *ep, *p;
+	vlong boffset, eoffset, lastclumpend, leaked;
+	vlong offset, oldshathru, shathru, toffset, totalcorrupt, v;
+	Clump cl;
+	ClumpInfo *bci, *ci, *eci, *xci;
+	Cit *bcit, *cit, *ecit;
+	
+	/*
+	 * We expect to find an arena, with data, between offset
+	 * and offset+arenasize.  With any luck, the data starts at
+	 * offset+ap.blocksize.  The blocks have variable size and
+	 * aren't padded at all, which doesn't give us any alignment
+	 * constraints.  The blocks are compressed or high entropy,
+	 * but the headers are pretty low entropy (except the score):
+	 *
+	 *	type[1] (range 0 thru 9, 13)
+	 *	size[2]
+	 *	uncsize[2] (<= size)
+	 *
+	 * so we can look for these.  We check the scores as we go,
+	 * so we can't make any wrong turns.  If we find ourselves
+	 * in a dead end, scan forward looking for a new start.
+	 */
+
+	resetcibuf();
+	memset(head, 0, sizeof *head);
+	memset(arena, 0, sizeof *arena);
+	memset(score, 0, VtScoreSize);
+	memset(&shabuf, 0, sizeof shabuf);
+
+	loadarenabasics(offset0, anum, head, arena);
+	
+	/* start the clump hunt */
+	clumps = 0;
+	totalcorrupt = 0;
+	sealing = 1;
+	shathru = 0;
+	boffset = offset0 + arena->blocksize;
+	offset = boffset;
+	eoffset = offset0+arena->size - arena->blocksize;
+	toffset = eoffset;
+	sp = pagein(offset0, 4*M);
+	oldshathru = 0;
+	if(sealing){
+		sha1(sp, 4*M, nil, &dsold);
+		oldshathru = bufoffset+4*M;
+	}
+	ep = sp+4*M;
+	p = sp + (boffset - offset0);
+	ncib = arena->blocksize / ClumpInfoSize;	/* ci per block in index */
+	lastclumpend = offset;
+	nbad = 0;
+	inbad = 0;
+	needtozero = 0;
+	minclumps = 0;
+	while(offset < eoffset){
+		/*
+		 * Shift buffer if we're running out of room.
+		 */
+		if(p+70*K >= ep){
+			/*
+			 * Start the SHA1 buffer.   By now we should know the
+			 * clumpmagic and arena version, so we can create a
+			 * correct head block to get things going.
+			 */
+			if(sealing){
+				if(shabuf.offset0 == 0){
+					if(arena->clumpmagic == 0){
+print("no clumpmagic; no seal\n");
+						sealing = 0;
+						goto Noseal;
+					}
+					memset(headbuf, 0, arena->blocksize);
+					head->clumpmagic = arena->clumpmagic;
+					packarenahead(head, headbuf);
+					shabuf.offset0 = offset0;
+					sha1(headbuf, arena->blocksize, nil, &dsnew);
+					shathru = offset0 + arena->blocksize;
+				}
+				n = 4*M-256*K;
+				if(bufoffset+n > eoffset)
+					n = eoffset - bufoffset;
+				if(n > 0){
+					if(shathru < bufoffset)
+						fprint(2, "bad sha: shathru=%,lld < bufoffset=%,lld\n", shathru, bufoffset);
+					diff = shathru - bufoffset;
+					sha1(sp+diff, n-diff, nil, &dsnew);
+					shathru += n-diff;
+				}
+			}
+		Noseal:
+
+			n = 4*M-256*K;
+			pagein(bufoffset+n, 4*M);
+			p -= n;
+			
+			if(bufoffset+256*K+n > toffset)
+				n = toffset - (bufoffset+256*K);
+			if(sealing){
+				sha1(sp+256*K, n, nil, &dsold);
+				oldshathru = bufoffset+256*K+n;
+			}
+		}
+
+		/*
+		 * Check for a clump at p, which is at offset in the disk.
+		 * Duplicate clumps happen in corrupted disks
+		 * (the same pattern gets written many times in a row)
+		 * and should never happen during regular use.
+		 */
+		if((n = isclump(p, &cl, &magic)) > 0 && !haveclump(cl.info.score)){
+			/*
+			 * If we were in the middle of some corrupted data,
+			 * flush a warning about it and then add any clump
+			 * info blocks as necessary.
+			 */
+			if(inbad){
+				inbad = 0;
+				v = offset-lastclumpend;
+				if(needtozero){
+					zerorange(lastclumpend, v);
+					print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
+						lastclumpend, v, v);
+				}
+				addcicorrupt(v);
+				totalcorrupt += v;
+				nb1 = (minclumps+ncib-1)/ncib;
+				minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
+				nb2 = (minclumps+ncib-1)/ncib;
+				eoffset -= (nb2-nb1)*arena->blocksize;
+			}
+
+			/*
+			 * If clumps use different magic numbers, we don't care.
+			 * We'll just use the first one we find and make the others
+			 * follow suit.
+			 */
+			if(arena->clumpmagic == 0){
+				print("clump type %d size %d score %V magic %x\n",
+					cl.info.type, cl.info.size, cl.info.score, magic);
+				arena->clumpmagic = magic;
+				if(magic == _ClumpMagic)
+					arena->version = ArenaVersion4;
+				else
+					arena->version = ArenaVersion5;
+			}
+			if(magic != arena->clumpmagic)
+				p32(p, arena->clumpmagic);
+			if(clumps == 0)
+				arena->ctime = cl.time;
+
+			/*
+			 * Record the clump, update arena stats,
+			 * grow clump info blocks if needed.
+			 */
+			if(verbose > 1)
+				fprint(2, "\tclump %d: %d %V at %#llux+%#ux (%d)\n", 
+					clumps, cl.info.type, cl.info.score, offset, n, n);
+			addcibuf(&cl.info, 0);
+			if(minclumps%ncib == 0)
+				eoffset -= arena->blocksize;
+			minclumps++;
+			clumps++;
+			if(cl.encoding != ClumpENone)
+				arena->diskstats.cclumps++;
+			arena->diskstats.uncsize += cl.info.uncsize;
+			arena->wtime = cl.time;
+			
+			/*
+			 * Move to next clump.
+			 */
+			offset += n;
+			p += n;
+			lastclumpend = offset;
+		}else{
+			/*
+			 * Overwrite malformed clump data with zeros later.
+			 * For now, just record whether it needs to be overwritten.
+			 * Bad regions must be of size at least ClumpSize.
+			 * Postponing the overwriting keeps us from writing past
+			 * the end of the arena data (which might be directory data)
+			 * with zeros.
+			 */
+			if(!inbad){
+				inbad = 1;
+				needtozero = 0;
+				if(memcmp(p, zero, ClumpSize) != 0)
+					needtozero = 1;
+				p += ClumpSize;
+				offset += ClumpSize;
+				nbad++;
+			}else{
+				if(*p != 0)
+					needtozero = 1;
+				p++;
+				offset++;
+			}
+		}
+	}
+fprint(2, "good clumps: %d; min clumps: %d\n", clumps, minclumps);
+	arena->diskstats.used = lastclumpend - boffset;
+	leaked = eoffset - lastclumpend;
+	if(verbose)
+		fprint(2, "used from %#llux to %#llux = %,lld (%,lld unused)\n",
+			boffset, lastclumpend, arena->diskstats.used, leaked);
+
+	/*
+	 * Finish the SHA1 of the old data.
+	 */
+	if(sealing){
+		for(; oldshathru<toffset; oldshathru+=n){
+			n = 4*M;
+			if(oldshathru+n > toffset)
+				n = toffset - oldshathru;
+			p = pagein(oldshathru, n);
+			sha1(p, n, nil, &dsold);
+		}
+		p = pagein(toffset, arena->blocksize);
+		sha1(p, arena->blocksize-VtScoreSize, nil, &dsold);
+		sha1(zero, VtScoreSize, nil, &dsold);
+		sha1(nil, 0, oldscore, &dsold);
+	}
+
+	/*
+	 * If we still don't know the clump magic, the arena
+	 * must be empty.  It still needs a value, so make 
+	 * something up.
+	 */
+	if(arena->version == 0)
+		arena->version = ArenaVersion5;
+	if(arena->clumpmagic == 0){
+		if(arena->version == ArenaVersion4)
+			arena->clumpmagic = _ClumpMagic;
+		else{
+			do
+				arena->clumpmagic = fastrand();
+			while(arena->clumpmagic==_ClumpMagic
+				||arena->clumpmagic==0);
+		}
+		head->clumpmagic = arena->clumpmagic;
+	}
+	
+	/*
+	 * Guess at number of clumpinfo blocks to load.
+	 * If we guess high, it's no big deal.  If we guess low,
+	 * we'll 
+	 */
+	if(clumps == 0 
+	|| arena->diskstats.used == totalcorrupt)
+		goto Nocib;
+	if(clumps < arena->diskstats.clumps)
+		clumps = arena->diskstats.clumps;
+	if(clumps < ncibuf)
+		clumps = ncibuf;
+	clumps += totalcorrupt/
+		((arena->diskstats.used - totalcorrupt)/clumps);
+	clumps += totalcorrupt/2000;
+	if(clumps < minclumps)
+		clumps = minclumps;
+	clumps += ncib-1;
+	clumps -= clumps%ncib;
+	/*
+	 * Can't go into the actual data.
+	 */
+	v = offset0 + arena->size - arena->blocksize;
+	v -= (clumps+ncib-1)/ncib * arena->blocksize;
+	if(v < lastclumpend){
+		v = offset0 + arena->size - arena->blocksize;
+		clumps = (v-lastclumpend)/arena->blocksize * ncib;
+	}
+	
+	if(clumps < minclumps)
+		print("cannot happen?\n");
+
+	/*
+	 * Check clumpinfo blocks against directory we created.
+	 * The tricky part is handling the corrupt sections of arena.
+	 * If possible, we remark just the affected directory entries
+	 * rather than slide everything down.
+	 * 
+	 * Allocate clumps+1 blocks and check that we don't need
+	 * the last one at the end.
+	 */
+	bci = loadci(offset0, arena, clumps+1);
+	eci = bci+clumps+1;
+	bcit = cibuf;
+	ecit = cibuf+ncibuf;
+	smart = 1;
+Again:
+	nbad = 0;
+	ci = bci;
+	for(cit=bcit; cit<ecit && ci<eci; cit++){
+		if(cit->corrupt){
+			vlong n, m;
+			if(smart){
+				/*
+				 * If we can, just mark existing entries as corrupt.
+				 */
+				n = cit->corrupt;
+				for(xci=ci; n>0 && xci<eci; xci++)
+					n -= ClumpSize+xci->size;
+				if(n > 0 || xci >= eci)
+					goto Dumb;
+				printed = 0;
+				for(; ci<xci; ci++){
+					if(verbose && ci->type != VtCorruptType){
+						if(!printed){
+							print("marking directory %d-%d as corrupt\n",
+								(int)(ci-bci), (int)(xci-bci));
+							printed = 1;
+						}
+						print("\ttype=%d size=%d uncsize=%d score=%V\n",
+							ci->type, ci->size, ci->uncsize, ci->score);
+					}
+					ci->type = VtCorruptType;
+				}
+			}else{
+			Dumb:
+				/*
+				 * Otherwise, blaze a new trail.
+				 */
+xci = ci;
+				n = cit->corrupt;
+				while(n > 0 && ci < eci){
+					if(n < ClumpSize)
+						sysfatal("bad math in clump corrupt");
+					if(n <= VtMaxLumpSize+ClumpSize)
+						m = n;
+					else{
+						m = VtMaxLumpSize+ClumpSize;
+						if(n-m < ClumpSize)
+							m -= ClumpSize;
+					}
+					ci->type = VtCorruptType;
+					ci->size = m-ClumpSize;
+					ci->uncsize = m-ClumpSize;
+					memset(ci->score, 0, VtScoreSize);
+					ci++;
+					n -= m;
+				}
+fprint(2, "blaze %d %d: %lld bytes in %ld\n", xci-bci, ci-bci, cit->corrupt, ci-xci);
+			}
+			continue;
+		}
+		if(clumpinfocmp(&cit->ci, ci) != 0){
+			if(verbose && (smart || verbose>1)){
+				print("clumpinfo %d\n", (int)(ci-bci));
+				print("\twant: %d %d %d %V\n", 
+					cit->ci.type, cit->ci.size,
+					cit->ci.uncsize, cit->ci.score);
+				print("\thave: %d %d %d %V\n", 
+					ci->type, ci->size, 
+					ci->uncsize, ci->score);
+			}
+			*ci = cit->ci;
+			nbad++;
+		}
+		ci++;
+	}
+	if(ci >= eci || cit < ecit){
+		print("ran out of space editing existing directory; rewriting\n");
+print("eci %d ci %d ecit %d cit %d\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
+		assert(smart);	/* can't happen second time thru */
+		smart = 0;
+		goto Again;
+	}
+	
+	assert(ci <= eci);
+	arena->diskstats.clumps = ci-bci;
+print("new clumps %d\n", ci-bci);
+	v = writeci(offset0, arena, bci, ci-bci);
+	if(v - lastclumpend > 64*1024)
+		sealing = 0;
+	if(lastclumpend > v)
+		print("arena directory overwrote blocks!  cannot happen!\n");
+	free(bci);
+	if(smart && nbad)
+		print("arena directory has %d bad or missing entries\n", nbad);
+Nocib:
+
+	/*
+	 * Finish the SHA1 of the new data.
+	 */
+	arena->diskstats.sealed = sealing;
+	if(sealing){
+		eoffset = offset0 + arena->size - arena->blocksize;
+		for(; shathru<eoffset; shathru+=n){
+			n = 4*M;
+			if(shathru+n > eoffset)
+				n = eoffset - shathru;
+			p = pagein(shathru, n);
+			sha1(p, n, nil, &dsnew);
+		}
+		memset(headbuf, 0, sizeof headbuf);
+		packarena(arena, headbuf);
+		sha1(headbuf, arena->blocksize, nil, &dsnew);
+		sha1(nil, 0, score, &dsnew);
+	}
+	
+	memset(&shabuf, 0, sizeof shabuf);
+}
+
+void
+checkarena(vlong offset, int anum)
+{
+	uchar dbuf[MaxDiskBlock];
+	uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
+	Arena arena;
+	ArenaHead head;
+	
+	print("# arena %d: offset %#llux\n", anum, offset);
+
+	guessarena(offset, anum, &head, &arena, oldscore, score);
+
+	if(verbose){
+		print("#\tversion=%d name=%s blocksize=%d size=%z",
+			head.version, head.name, head.blocksize, head.size);
+		if(head.clumpmagic)
+			print(" clumpmagic=%#.8ux", head.clumpmagic);
+		print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
+			arena.diskstats.clumps, arena.diskstats.cclumps,
+			arena.diskstats.used, arena.diskstats.uncsize);
+		print("#\tctime=%t\n", arena.ctime);
+		print("#\twtime=%t\n", arena.wtime);
+		if(arena.diskstats.sealed)
+			print("#\tsealed score=%V\n", score);
+	}
+
+	memset(dbuf, 0, sizeof dbuf);
+	packarenahead(&head, dbuf);
+	p = pagein(offset, arena.blocksize);
+	if(memcmp(dbuf, p, arena.blocksize) != 0){
+		print("on-disk arena header incorrect\n");
+		showdiffs(dbuf, p, arena.blocksize, 
+			arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
+	}
+	memmove(p, dbuf, arena.blocksize);
+	
+	memset(dbuf, 0, sizeof dbuf);
+	packarena(&arena, dbuf);
+	if(arena.diskstats.sealed)
+		scorecp(dbuf+arena.blocksize-VtScoreSize, score);
+	p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
+	if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
+		print("on-disk arena tail incorrect\n");
+		showdiffs(dbuf, p, arena.blocksize-VtScoreSize,
+			arena.version==ArenaVersion4 ? tailinfo4 : tailinfo5);
+	}
+	if(arena.diskstats.sealed){
+		if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
+			print("on-disk arena seal score incorrect\n");
+			print("\tcorrect=%V\n", oldscore);
+			print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
+		}
+	}
+	memmove(p, dbuf, arena.blocksize);
+	
+	pageout();
+}
+
+int mainstacksize = 512*1024;
+
+void
+threadmain(int argc, char **argv)
+{
+	int mode;
+	
+	mode = OREAD;
+	readonly = 1;	
+	ARGBEGIN{
+	case 'a':
+		arenasize = unittoull(EARGF(usage()));
+		break;
+	case 'b':
+		ap.blocksize = unittoull(EARGF(usage()));
+		break;
+	case 'f':
+		fix = 1;
+		mode = ORDWR;
+		readonly = 0;
+		break;
+	case 'n':
+		basename = EARGF(usage());
+		break;
+	case 'v':
+		verbose++;
+		break;
+	default:
+		usage();
+	}ARGEND
+	
+	if(argc != 1 && argc != 2)
+		usage();
+
+	file = argv[0];
+	
+	ventifmtinstall();
+	fmtinstall('z', zfmt);
+	fmtinstall('t', tfmt);
+	quotefmtinstall();
+	
+	part = initpart(file, mode|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open %s: %r", file);
+	
+	checkarenas(argc > 1 ? argv[1] : nil);
+	threadexitsall(nil);
+}
+

+ 132 - 0
sys/src/cmd/venti/srv/fmtarenas.c

@@ -0,0 +1,132 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtarenas [-Z] [-b blocksize] [-a arenasize] name file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int vers;
+	ArenaPart *ap;
+	Part *part;
+	Arena *arena;
+	u64int addr, limit, asize, apsize;
+	char *file, *name, aname[ANameSize];
+	int i, n, blocksize, tabsize, zero;
+
+	ventifmtinstall();
+	statsinit();
+
+	blocksize = 8 * 1024;
+	asize = 512 * 1024 *1024;
+	tabsize = 512 * 1024;		/* BUG: should be determine from number of arenas */
+	zero = -1;
+	vers = ArenaVersion5;
+	ARGBEGIN{
+	case 'D':
+		settrace(EARGF(usage()));
+		break;
+	case 'a':
+		asize = unittoull(EARGF(usage()));
+		if(asize == TWID64)
+			usage();
+		break;
+	case 'b':
+		blocksize = unittoull(EARGF(usage()));
+		if(blocksize == ~0)
+			usage();
+		if(blocksize > MaxDiskBlock){
+			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
+			threadexitsall("usage");
+		}
+		break;
+	case '4':
+		vers = ArenaVersion4;
+		break;
+	case 'Z':
+		zero = 0;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(zero == -1){
+		if(vers == ArenaVersion4)
+			zero = 1;
+		else
+			zero = 0;
+	}
+
+	if(argc != 2)
+		usage();
+
+	name = argv[0];
+	file = argv[1];
+
+	if(nameok(name) < 0)
+		sysfatal("illegal name template %s", name);
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(zero)
+		zeropart(part, blocksize);
+
+	maxblocksize = blocksize;
+	initdcache(20*blocksize);
+
+	ap = newarenapart(part, blocksize, tabsize);
+	if(ap == nil)
+		sysfatal("can't initialize arena: %r");
+
+	apsize = ap->size - ap->arenabase;
+	n = apsize / asize;
+	if(apsize - (n * asize) >= MinArenaSize)
+		n++;
+
+	fprint(2, "fmtarenas %s: %,d arenas, %,lld bytes storage, %,d bytes for index map\n",
+		file, n, apsize, ap->tabsize);
+
+	ap->narenas = n;
+	ap->map = MKNZ(AMap, n);
+	ap->arenas = MKNZ(Arena*, n);
+
+	addr = ap->arenabase;
+	for(i = 0; i < n; i++){
+		limit = addr + asize;
+		if(limit >= ap->size || ap->size - limit < MinArenaSize){
+			limit = ap->size;
+			if(limit - addr < MinArenaSize)
+				sysfatal("bad arena set math: runt arena at %lld,%lld %lld\n", addr, limit, ap->size);
+		}
+
+		snprint(aname, ANameSize, "%s%d", name, i);
+
+		if(0) fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit);
+
+		arena = newarena(part, vers, aname, addr, limit - addr, blocksize);
+		if(!arena)
+			fprint(2, "can't make new arena %s: %r", aname);
+		freearena(arena);
+
+		ap->map[i].start = addr;
+		ap->map[i].stop = limit;
+		namecp(ap->map[i].name, aname);
+
+		addr = limit;
+	}
+
+	if(wbarenapart(ap) < 0)
+		fprint(2, "can't write back arena partition header for %s: %r\n", file);
+
+	flushdcache();
+	threadexitsall(0);
+}

+ 116 - 0
sys/src/cmd/venti/srv/fmtbloom.c

@@ -0,0 +1,116 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+Bloom b;
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	Part *part;
+	char *file;
+	vlong bits, size, size2;
+	int nhash;
+	vlong nblocks;
+	
+	ventifmtinstall();
+	statsinit();
+
+	size = 0;
+	nhash = 0;
+	nblocks = 0;
+	ARGBEGIN{
+	case 'n':
+		if(nhash || nblocks)
+			usage();
+		nblocks = unittoull(EARGF(usage()));
+		break;
+	case 'N':
+		if(nhash || nblocks)
+			usage();
+		nhash = unittoull(EARGF(usage()));
+		if(nhash > BloomMaxHash){
+			fprint(2, "maximum possible is -N %d", BloomMaxHash);
+			usage();
+		}
+		break;
+	case 's':
+		size = unittoull(ARGF());
+		if(size == ~0)
+			usage();
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	file = argv[0];
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(size == 0)
+		size = part->size;
+	
+	if(size < 1024*1024)
+		sysfatal("bloom filter too small");
+
+	if(size > MaxBloomSize){
+		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
+			size, (vlong)MaxBloomSize);
+		size = MaxBloomSize;
+	}
+	if(size&(size-1)){
+		for(size2=1; size2<size; size2*=2)
+			;
+		size = size2/2;
+		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
+	}
+
+	if(nblocks){
+		/*
+		 * no use for more than 32 bits per block
+		 * shoot for less than 64 bits per block
+		 */
+		size2 = size;
+		while(size2*8 >= nblocks*64)
+			size2 >>= 1;
+		if(size2 != size){
+			size = size2;
+			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
+				size/1024/1024);
+		}
+
+		/*
+		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
+		 */
+		bits = (8*size)/nblocks;
+		nhash = bits*7/10;
+		if(nhash > BloomMaxHash)
+			nhash = BloomMaxHash;
+	}
+	if(!nhash)
+		nhash = BloomMaxHash;
+	if(bloominit(&b, size, nil) < 0)
+		sysfatal("bloominit: %r");
+	b.nhash = nhash;
+	bits = nhash*10/7;
+	nblocks = (8*size)/bits;
+	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
+	b.data = vtmallocz(size);
+	b.part = part;
+	if(writebloom(&b) < 0)
+		sysfatal("writing %s: %r", file);
+	threadexitsall(0);
+}

+ 31 - 31
sys/src/cmd/venti/fmtindex.c → sys/src/cmd/venti/srv/fmtindex.c

@@ -6,11 +6,11 @@ void
 usage(void)
 {
 	fprint(2, "usage: fmtindex [-a] config\n");
-	exits(0);
+	threadexitsall(0);
 }
 
-int
-main(int argc, char *argv[])
+void
+threadmain(int argc, char *argv[])
 {
 	Config conf;
 	Index *ix;
@@ -22,10 +22,8 @@ main(int argc, char *argv[])
 	u32int i, j, n, narenas;
 	int add;
 
-	fmtinstall('V', vtScoreFmt);
-	fmtinstall('R', vtErrFmt);
-	vtAttach();
-	statsInit();
+	ventifmtinstall();
+	statsinit();
 
 	add = 0;
 	ARGBEGIN{
@@ -42,12 +40,12 @@ main(int argc, char *argv[])
 
 	file = argv[0];
 
-	if(!runConfig(file, &conf))
-		fatal("can't run config %s: %R", file);
+	if(runconfig(file, &conf) < 0)
+		sysfatal("can't initialize config %s: %r", file);
 	if(conf.index == nil)
-		fatal("no index specified in %s", file);
-	if(!nameOk(conf.index))
-		fatal("illegal index name %s", conf.index);
+		sysfatal("no index specified in %s", file);
+	if(nameok(conf.index) < 0)
+		sysfatal("illegal index name %s", conf.index);
 
 	narenas = 0;
 	for(i = 0; i < conf.naparts; i++){
@@ -56,22 +54,19 @@ main(int argc, char *argv[])
 	}
 
 	if(add){
-		ix = initIndex(conf.index, conf.sects, conf.nsects);
+		ix = initindex(conf.index, conf.sects, conf.nsects);
 		if(ix == nil)
-			fatal("can't initialize index %s: %R", conf.index);
+			sysfatal("can't initialize index %s: %r", conf.index);
 	}else{
-		ix = newIndex(conf.index, conf.sects, conf.nsects);
+		ix = newindex(conf.index, conf.sects, conf.nsects);
 		if(ix == nil)
-			fatal("can't create new index %s: %R", conf.index);
+			sysfatal("can't create new index %s: %r", conf.index);
 
 		n = 0;
 		for(i = 0; i < ix->nsects; i++)
 			n += ix->sects[i]->blocks;
 
-		if(ix->div < 100)
-			fatal("index divisor too coarse: use bigger block size");
-
-		fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
+		if(0) fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
 	}
 	amap = MKNZ(AMap, narenas);
 	arenas = MKNZ(Arena*, narenas);
@@ -82,39 +77,44 @@ main(int argc, char *argv[])
 		ap = conf.aparts[i];
 		for(j = 0; j < ap->narenas; j++){
 			if(n >= narenas)
-				fatal("too few slots in index's arena set");
+				sysfatal("too few slots in index's arena set");
 
 			arenas[n] = ap->arenas[j];
 			if(n < ix->narenas){
 				if(arenas[n] != ix->arenas[n])
-					fatal("mismatched arenas %s and %s at slot %d\n",
+					sysfatal("mismatched arenas %s and %s at slot %d\n",
 						arenas[n]->name, ix->arenas[n]->name, n);
 				amap[n] = ix->amap[n];
 				if(amap[n].start != addr)
-					fatal("mis-located arena %s in index %s\n", arenas[n]->name, ix->name);
+					sysfatal("mis-located arena %s in index %s\n", arenas[n]->name, ix->name);
 				addr = amap[n].stop;
 			}else{
 				amap[n].start = addr;
 				addr += ap->arenas[j]->size;
 				amap[n].stop = addr;
-				nameCp(amap[n].name, ap->arenas[j]->name);
-				fprint(2, "add arena %s at [%lld,%lld)\n",
+				namecp(amap[n].name, ap->arenas[j]->name);
+				if(0) fprint(2, "add arena %s at [%lld,%lld)\n",
 					amap[n].name, amap[n].start, amap[n].stop);
 			}
 
 			n++;
 		}
 	}
-	fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
-		ix->name, n, addr - IndexBase);
+	if(0){
+		fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
+			ix->name, n, addr - IndexBase);
+		fprint(2, "\tbuckets=%d\n",
+			ix->buckets);
+	}
+	fprint(2, "fmtindex: %,d arenas, %,d index buckets, %,lld bytes storage\n",
+		n, ix->buckets, addr-IndexBase);
 
 	ix->amap = amap;
 	ix->arenas = arenas;
 	ix->narenas = narenas;
 
-	if(!wbIndex(ix))
-		fprint(2, "can't write back arena partition header for %s: %R\n", file);
+	if(wbindex(ix) < 0)
+		fprint(2, "can't write back arena partition header for %s: %r\n", file);
 
-	exits(0);
-	return 0;	/* shut up stupid compiler */
+	threadexitsall(0);
 }

+ 83 - 0
sys/src/cmd/venti/srv/fmtisect.c

@@ -0,0 +1,83 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+void
+usage(void)
+{
+	fprint(2, "usage: fmtisect [-Z] [-b blocksize] name file\n");
+	threadexitsall(0);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int vers;
+	ISect *is;
+	Part *part;
+	char *file, *name;
+	int blocksize, setsize, zero;
+
+	ventifmtinstall();
+	statsinit();
+
+	blocksize = 8 * 1024;
+	setsize = 512 * 1024;
+	zero = -1;
+	vers = ISectVersion2;
+	ARGBEGIN{
+	case 'b':
+		blocksize = unittoull(ARGF());
+		if(blocksize == ~0)
+			usage();
+		if(blocksize > MaxDiskBlock){
+			fprint(2, "block size too large, max %d\n", MaxDiskBlock);
+			threadexitsall("usage");
+		}
+		break;
+	case '1':
+		vers = ISectVersion1;
+		break;
+	case 'Z':
+		zero = 0;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND
+
+	if(zero == -1){
+		if(vers == ISectVersion1)
+			zero = 1;
+		else
+			zero = 0;
+	}
+
+	if(argc != 2)
+		usage();
+
+	name = argv[0];
+	file = argv[1];
+
+	if(nameok(name) < 0)
+		sysfatal("illegal name %s", name);
+
+	part = initpart(file, ORDWR|ODIRECT);
+	if(part == nil)
+		sysfatal("can't open partition %s: %r", file);
+
+	if(zero)
+		zeropart(part, blocksize);
+
+	is = newisect(part, vers, name, blocksize, setsize);
+	if(is == nil)
+		sysfatal("can't initialize new index: %r");
+
+	fprint(2, "fmtisect %s: %,d buckets of %,d entries, %,d bytes for index map\n",
+		file, is->blocks, is->buckmax, setsize);
+
+	if(wbisect(is) < 0)
+		fprint(2, "can't write back index section header for %s: %r\n", file);
+
+	threadexitsall(0);
+}

+ 225 - 0
sys/src/cmd/venti/srv/fns.h

@@ -0,0 +1,225 @@
+/*
+ * sorted by 4,/^$/|sort -bd +1
+ */
+int		addarena(Arena *name);
+void		addstat(int, int);
+void		addstat2(int, int, int, int);
+ZBlock		*alloczblock(u32int size, int zeroed, uint alignment);
+Arena		*amapitoa(Index *index, u64int a, u64int *aa);
+u64int		arenadirsize(Arena *arena, u32int clumps);
+void		arenaupdate(Arena *arena, u32int size, u8int *score);
+void		backsumarena(Arena *arena);
+void	binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin);
+int		bloominit(Bloom*, vlong, uchar*);
+int		bucklook(u8int*, int, u8int*, int);
+u32int		buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint);
+void		checkdcache(void);
+void		checklumpcache(void);
+int		clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
+int		clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
+u32int		clumpmagic(Arena *arena, u64int aa);
+uint		countbits(uint n);
+int		delarena(Arena *arena);
+void		delaykickicache(void);
+void		delaykickround(Round*);
+void		delaykickroundproc(void*);
+void		dirtydblock(DBlock*, int);
+void		diskaccess(int);
+void		disksched(void);
+AState	diskstate(void);
+void		*emalloc(ulong);
+void		emptydcache(void);
+void		emptyicache(void);
+void		emptylumpcache(void);
+void		*erealloc(void *, ulong);
+char		*estrdup(char*);
+void		*ezmalloc(ulong);
+Arena		*findarena(char *name);
+int		flushciblocks(Arena *arena);
+void		flushdcache(void);
+void		flushicache(void);
+int		flushpart(Part*);
+void		flushqueue(void);
+void		fmtzbinit(Fmt *f, ZBlock *b);
+void		freearena(Arena *arena);
+void		freearenapart(ArenaPart *ap, int freearenas);
+void		freeiestream(IEStream *ies);
+void		freeifile(IFile *f);
+void		freeisect(ISect *is);
+void		freeindex(Index *index);
+void		freepart(Part *part);
+void		freezblock(ZBlock *b);
+DBlock		*_getdblock(Part *part, u64int addr, int mode, int load);
+DBlock		*getdblock(Part *part, u64int addr, int mode);
+u32int		hashbits(u8int *score, int nbits);
+char		*hargstr(HConnect*, char*, char*);
+vlong	hargint(HConnect*, char*, vlong);
+int		hdebug(HConnect*);
+int		hdisk(HConnect*);
+int		hnotfound(HConnect*);
+int		hsethtml(HConnect*);
+int		hsettext(HConnect*);
+int		httpdinit(char *address, char *webroot);
+int		iaddrcmp(IAddr *ia1, IAddr *ia2);
+IEntry*	icachedirty(u32int, u32int, u64int);
+ulong	icachedirtyfrac(void);
+void		icacheclean(IEntry*);
+int		ientrycmp(const void *vie1, const void *vie2);
+char		*ifileline(IFile *f);
+int		ifilename(IFile *f, char *dst);
+int		ifileu32int(IFile *f, u32int *r);
+int		inbloomfilter(Bloom*, u8int*);
+int		indexsect(Index *ix, u8int *score);
+int		indexsect0(Index *ix, u32int buck);
+Arena		*initarena(Part *part, u64int base, u64int size, u32int blocksize);
+ArenaPart	*initarenapart(Part *part);
+int		initarenasum(void);
+void		initbloomfilter(Index*);
+void		initdcache(u32int mem);
+void		initicache(int bits, int depth);
+void		initicachewrite(void);
+IEStream	*initiestream(Part *part, u64int off, u64int clumps, u32int size);
+ISect		*initisect(Part *part);
+Index		*initindex(char *name, ISect **sects, int n);
+void		initlumpcache(u32int size, u32int nblocks);
+int		initlumpqueues(int nq);
+Part*		initpart(char *name, int mode);
+void		initround(Round*, char*, int);
+int		initventi(char *config, Config *conf);
+void		insertlump(Lump *lump, Packet *p);
+int		insertscore(u8int *score, IAddr *ia, int write);
+void		kickdcache(void);
+void		kickicache(void);
+void		kickround(Round*, int wait);
+int		loadbloom(Bloom*);
+ZBlock		*loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify);
+DBlock	*loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucket *ib);
+int		loadientry(Index *index, u8int *score, int type, IEntry *ie);
+void		logerr(int severity, char *fmt, ...);
+Lump		*lookuplump(u8int *score, int type);
+int		_lookupscore(u8int *score, int type, IAddr *ia, int *rac);
+int		lookupscore(u8int *score, int type, IAddr *ia, int *rac);
+int		maparenas(AMap *am, Arena **arenas, int n, char *what);
+void		markbloomfilter(Bloom*, u8int*);
+uint		msec(void);
+int		namecmp(char *s, char *t);
+void		namecp(char *dst, char *src);
+int		nameok(char *name);
+void		needzeroscore(void);
+Arena		*newarena(Part *part, u32int, char *name, u64int base, u64int size, u32int blocksize);
+ArenaPart	*newarenapart(Part *part, u32int blocksize, u32int tabsize);
+ISect		*newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize);
+Index		*newindex(char *name, ISect **sects, int n);
+u32int		now(void);
+int		okamap(AMap *am, int n, u64int start, u64int stop, char *what);
+int		okibucket(IBucket*, ISect*);
+int		outputamap(Fmt *f, AMap *am, int n);
+int		outputindex(Fmt *f, Index *ix);
+int		_packarena(Arena *arena, u8int *buf, int);
+int		packarena(Arena *arena, u8int *buf);
+int		packarenahead(ArenaHead *head, u8int *buf);
+int		packarenapart(ArenaPart *as, u8int *buf);
+void		packbloomhead(Bloom*, u8int*);
+int		packclump(Clump *c, u8int *buf, u32int);
+void		packclumpinfo(ClumpInfo *ci, u8int *buf);
+void		packibucket(IBucket *b, u8int *buf, u32int magic);
+void		packientry(IEntry *i, u8int *buf);
+int		packisect(ISect *is, u8int *buf);
+void		packmagic(u32int magic, u8int *buf);
+ZBlock		*packet2zblock(Packet *p, u32int size);
+int		parseamap(IFile *f, AMapN *amn);
+int		parseindex(IFile *f, Index *ix);
+void		partblocksize(Part *part, u32int blocksize);
+int		partifile(IFile *f, Part *part, u64int start, u32int size);
+void		printarenapart(int fd, ArenaPart *ap);
+void		printarena(int fd, Arena *arena);
+void		printindex(int fd, Index *ix);
+void		printstats(void);
+void		putdblock(DBlock *b);
+void		putlump(Lump *b);
+int		queuewrite(Lump *b, Packet *p, int creator, uint ms);
+u32int		readarena(Arena *arena, u64int aa, u8int *buf, long n);
+int		readarenamap(AMapN *amn, Part *part, u64int base, u32int size);
+Bloom	*readbloom(Part*);
+int		readclumpinfo(Arena *arena, int clump, ClumpInfo *ci);
+int		readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n);
+ZBlock		*readfile(char *name);
+int		readifile(IFile *f, char *name);
+Packet		*readlump(u8int *score, int type, u32int size, int *cached);
+int		readpart(Part *part, u64int addr, u8int *buf, u32int n);
+int		resetbloom(Bloom*);
+int		runconfig(char *config, Config*);
+int		scorecmp(u8int *, u8int *);
+void		scoremem(u8int *score, u8int *buf, int size);
+void		setatailstate(AState*);
+void		setdcachestate(AState*);
+void		seterr(int severity, char *fmt, ...);
+void		setstat(int, long);
+void		settrace(char *type);
+u64int		sortrawientries(Index *ix, Part *tmp, u64int *tmpoff, Bloom *bloom);
+void		startbloomproc(Bloom*);
+Memimage*	statgraph(Graph *g);
+void		statsinit(void);
+int		storeclump(Index *index, ZBlock *b, u8int *score, int type, u32int creator, IAddr *ia);
+int		storeientry(Index *index, IEntry *m);
+int		strscore(char *s, u8int *score);
+int		stru32int(char *s, u32int *r);
+int		stru64int(char *s, u64int *r);
+void		sumarena(Arena *arena);
+int		syncarena(Arena *arena, u64int start, u32int n, int zok, int fix);
+int		syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check);
+int		syncindex(Index *ix, int fix, int mustflushicache, int check);
+void		trace(char *type, char*, ...);
+void		traceinit(void);
+int		u64log2(u64int v);
+u64int		unittoull(char *s);
+int		unpackarena(Arena *arena, u8int *buf);
+int		unpackarenahead(ArenaHead *head, u8int *buf);
+int		unpackarenapart(ArenaPart *as, u8int *buf);
+int		unpackbloomhead(Bloom*, u8int*);
+int		unpackclump(Clump *c, u8int *buf, u32int);
+void		unpackclumpinfo(ClumpInfo *ci, u8int *buf);
+void		unpackibucket(IBucket *b, u8int *buf, u32int magic);
+void		unpackientry(IEntry *i, u8int *buf);
+int		unpackisect(ISect *is, u8int *buf);
+u32int		unpackmagic(u8int *buf);
+void		ventifmtinstall(void);
+void		vtloghdump(Hio*, VtLog*);
+void		vtloghlist(Hio*);
+int		vtproc(void(*)(void*), void*);
+int		vttypevalid(int type);
+void		waitforkick(Round*);
+int		wbarena(Arena *arena);
+int		wbarenahead(Arena *arena);
+int		wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size);
+int		wbarenapart(ArenaPart *ap);
+void		wbbloomhead(Bloom*);
+int		wbisect(ISect *is);
+int		wbindex(Index *ix);
+int		whackblock(u8int *dst, u8int *src, int ssize);
+u64int		writeaclump(Arena *a, Clump *c, u8int *clbuf, u64int, u64int*);
+u32int		writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n);
+int		writebloom(Bloom*);
+int		writeclumpinfo(Arena *arean, int clump, ClumpInfo *ci);
+int		writepng(Hio*, Memimage*);
+u64int		writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int*);
+int		writelump(Packet *p, u8int *score, int type, u32int creator, uint ms);
+int		writepart(Part *part, u64int addr, u8int *buf, u32int n);
+int		writeqlump(Lump *u, Packet *p, int creator, uint ms);
+Packet		*zblock2packet(ZBlock *zb, u32int size);
+void		zeropart(Part *part, int blocksize);
+
+/*
+#pragma	varargck	argpos	sysfatal		1
+#pragma	varargck	argpos	logerr		2
+#pragma	varargck	argpos	SetErr		2
+*/
+
+#define scorecmp(h1,h2)		memcmp((h1),(h2),VtScoreSize)
+#define scorecp(h1,h2)		memmove((h1),(h2),VtScoreSize)
+
+#define MK(t)			((t*)emalloc(sizeof(t)))
+#define MKZ(t)			((t*)ezmalloc(sizeof(t)))
+#define MKN(t,n)		((t*)emalloc((n)*sizeof(t)))
+#define MKNZ(t,n)		((t*)ezmalloc((n)*sizeof(t)))
+#define MKNA(t,at,n)		((t*)emalloc(sizeof(t) + (n)*sizeof(at)))

+ 197 - 0
sys/src/cmd/venti/srv/graph.c

@@ -0,0 +1,197 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	Top = 1,
+	Bottom = 1,
+	Left = 40,
+	Right = 0,
+	MinWidth = Left+Right+2,
+	MinHeight = Top+Bottom+2,
+	DefaultWidth = Left+Right+500,
+	DefaultHeight = Top+Bottom+40
+};
+
+QLock memdrawlock;
+static Memsubfont *smallfont;
+static Memimage *black;
+static Memimage *blue;
+static Memimage *red;
+static Memimage *lofill[6];
+static Memimage *hifill[6];
+static Memimage *grid;
+
+static ulong fill[] = {
+	0xFFAAAAFF,	0xBB5D5DFF,	/* peach */
+	DPalegreygreen, DPurpleblue,	/* aqua */
+	DDarkyellow, DYellowgreen,	/* yellow */
+	DMedgreen, DDarkgreen,		/* green */
+	0x00AAFFFF, 0x0088CCFF,	/* blue */
+	0xCCCCCCFF, 0x888888FF,	/* grey */
+};
+
+Memimage*
+allocrepl(ulong color)
+{
+	Memimage *m;
+	
+	m = allocmemimage(Rect(0,0,1,1), RGB24);
+	memfillcolor(m, color);
+	m->flags |= Frepl;
+	m->clipr = Rect(-1000000, -1000000, 1000000, 1000000);
+	return m;
+}
+
+static void
+ginit(void)
+{
+	static int first = 1;
+	int i;
+	
+	if(!first)
+		return;
+		
+	first = 0;
+	memimageinit();
+#ifdef PLAN9PORT
+	smallfont = openmemsubfont(unsharp("#9/font/lucsans/lstr.10"));
+#else
+	smallfont = openmemsubfont("/lib/font/bit/lucidasans/lstr.10");
+#endif
+	black = memblack;
+	blue = allocrepl(DBlue);
+	red = allocrepl(DRed);
+	grid = allocrepl(0x77777777);
+	for(i=0; i<nelem(fill)/2 && i<nelem(lofill) && i<nelem(hifill); i++){
+		lofill[i] = allocrepl(fill[2*i]);
+		hifill[i] = allocrepl(fill[2*i+1]);
+	}
+}
+
+static void
+mklabel(char *str, int v)
+{
+	if(v < 0){
+		v = -v;
+		*str++ = '-';
+	}
+	if(v < 10000)
+		sprint(str, "%d", v);
+	else if(v < 10000000)
+		sprint(str, "%dk", v/1000);
+	else
+		sprint(str, "%dM", v/1000000);
+}
+
+static void
+drawlabel(Memimage *m, Point p, int n)
+{
+	char buf[30];
+	Point w;
+	
+	mklabel(buf, n);
+	w = memsubfontwidth(smallfont, buf);
+	memimagestring(m, Pt(p.x-5-w.x, p.y), memblack, ZP, smallfont, buf);
+}
+
+static int
+scalept(int val, int valmin, int valmax, int ptmin, int ptmax)
+{
+	if(val <= valmin)
+		val = valmin;
+	if(val >= valmax)
+		val = valmax;
+	if(valmax == valmin)
+		valmax++;
+	return ptmin + (vlong)(val-valmin)*(ptmax-ptmin)/(valmax-valmin);
+}
+
+Memimage*
+statgraph(Graph *g)
+{
+	int i, nbin, x, lo, hi, min, max, first;
+	Memimage *m;
+	Rectangle r;
+	Statbin *b, bin[2000];	/* 32 kB, but whack is worse */
+
+	needstack(8192);	/* double check that bin didn't kill us */
+	
+	if(g->wid <= MinWidth)
+		g->wid = DefaultWidth;
+	if(g->ht <= MinHeight)
+		g->ht = DefaultHeight;
+	if(g->wid > nelem(bin))
+		g->wid = nelem(bin);
+	if(g->fill < 0)
+		g->fill = ((uint)(uintptr)g->arg>>8)%nelem(lofill);
+	if(g->fill > nelem(lofill))
+		g->fill %= nelem(lofill);
+	
+	nbin = g->wid - (Left+Right);
+	binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
+
+	/*
+	 * compute bounds
+	 */
+	min = g->min;
+	max = g->max;
+	if(min < 0 || max <= min){
+		min = max = 0;
+		first = 1;
+		for(i=0; i<nbin; i++){
+			b = &bin[i];
+			if(b->nsamp == 0)
+				continue;
+			if(first || b->min < min)
+				min = b->min;
+			if(first || b->max > max)
+				max = b->max;
+			first = 0;
+		}
+	}
+
+	qlock(&memdrawlock);
+	ginit();
+	if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==nil || lofill==nil){
+		werrstr("graphics initialization failed: %r");
+		qunlock(&memdrawlock);
+		return nil;
+	}
+
+	/* fresh image */
+	m = allocmemimage(Rect(0,0,g->wid,g->ht), ABGR32);
+	if(m == nil){
+		qunlock(&memdrawlock);
+		return nil;
+	}
+	r = Rect(Left, Top, g->wid-Right, g->ht-Bottom);
+	memfillcolor(m, DTransparent);
+	
+	/* x axis */
+	memimagedraw(m, Rect(r.min.x, r.max.y, r.max.x, r.max.y+1), black, ZP, memopaque, ZP, S);
+
+	/* y labels */
+	drawlabel(m, r.min, max);
+	if(min != 0)
+		drawlabel(m, Pt(r.min.x, r.max.y-smallfont->height), min);
+	
+	/* actual data */
+	for(i=0; i<nbin; i++){
+		b = &bin[i];
+		if(b->nsamp == 0)
+			continue;
+		lo = scalept(b->min, min, max, r.max.y, r.min.y);
+		hi = scalept(b->max, min, max, r.max.y, r.min.y);
+		x = r.min.x+i;
+		hi-=2;
+		memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill%nelem(hifill)], ZP, memopaque, ZP, S);
+		memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill%nelem(lofill)], ZP, memopaque, ZP, S);
+	}
+
+	if(bin[nbin-1].nsamp)
+		drawlabel(m, Pt(r.max.x, r.min.y+(Dy(r)-smallfont->height)/2), bin[nbin-1].avg);
+	qunlock(&memdrawlock);
+	return m;
+}

+ 696 - 0
sys/src/cmd/venti/srv/hdisk.c

@@ -0,0 +1,696 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+
+static int disksummary(HConnect*);
+static int diskarenapart(HConnect*, char*, Part*);
+static int diskbloom(HConnect*, char*, Part*);
+static int diskisect(HConnect*, char*, Part*);
+
+int
+hdisk(HConnect *c)
+{
+	char *disk, *type;
+	Part *p;
+	int ret;
+	
+	if(hsethtml(c) < 0)
+		return -1;
+
+	disk = hargstr(c, "disk", "");
+	if(!disk[0])
+		return disksummary(c);
+	if((p = initpart(disk, OREAD)) == nil){
+		hprint(&c->hout, "open %s: %r", disk);
+		return 0;
+	}
+
+	type = hargstr(c, "type", "");
+	switch(type[0]){
+	case 'a':
+		ret = diskarenapart(c, disk, p);
+		break;
+	case 'b':
+		ret = diskbloom(c, disk, p);
+		break;
+	case 'i':
+		ret = diskisect(c, disk, p);
+		break;
+	default:
+		hprint(&c->hout, "unknown disk type %s", type);
+		return 0;
+	}
+	freepart(p);
+	return ret;	
+}
+
+static int
+disksummary(HConnect *c)
+{
+	int i;
+	Index *ix;
+	Part *p;
+	
+	hprint(&c->hout, "<h1>venti disks</h1>\n");
+	hprint(&c->hout, "<pre>\n");
+	ix = mainindex;
+	p = nil;
+	for(i=0; i<ix->narenas; i++){
+		if(ix->arenas[i]->part == p)
+			continue;
+		p = ix->arenas[i]->part;
+		hprint(&c->hout, "<a href=\"/disk?disk=%s&type=a\">%s</a> %s\n", p->name, p->name, ix->arenas[i]->name);
+	}
+	hprint(&c->hout, "\n");
+	p = nil;
+	for(i=0; i<ix->nsects; i++){
+		if(ix->sects[i]->part == p)
+			continue;
+		p = ix->sects[i]->part;
+		hprint(&c->hout, "<a href=\"/disk?disk=%s&type=i\">%s</a> %s\n", p->name, p->name, ix->sects[i]->name);
+	}
+	hprint(&c->hout, "\n");
+	if(ix->bloom){
+		p = ix->bloom->part;
+		hprint(&c->hout, "<a href=\"/disk?disk=%s&type=b\">%s</a> %s\n", p->name, p->name, "bloom filter");
+	}
+	return 0;
+}
+
+static char*
+readap(Part *p, ArenaPart *ap)
+{
+	uchar *blk;
+	char *table;
+	
+	blk = vtmalloc(8192);
+	if(readpart(p, PartBlank, blk, 8192) != 8192)
+		return nil;
+	if(unpackarenapart(ap, blk) < 0){
+		werrstr("corrupt arena part header: %r");
+		return nil;
+	}
+	vtfree(blk);
+	ap->tabbase = (PartBlank+HeadSize+ap->blocksize-1)&~(ap->blocksize-1);
+	ap->tabsize = ap->arenabase - ap->tabbase;
+	table = vtmalloc(ap->tabsize+1);
+	if(readpart(p, ap->tabbase, (uchar*)table, ap->tabsize) != ap->tabsize){
+		werrstr("reading arena part directory: %r");
+		return nil;
+	}
+	table[ap->tabsize] = 0;
+	return table;
+}
+
+static int
+xfindarena(char *table, char *name, vlong *start, vlong *end)
+{
+	int i, nline;
+	char *p, *q, *f[4], line[256];
+	
+	nline = atoi(table);
+	p = strchr(table, '\n');
+	if(p)
+		p++;
+	for(i=0; i<nline; i++){
+		if(p == nil)
+			break;
+		q = strchr(p, '\n');
+		if(q)
+			*q++ = 0;
+		if(strlen(p) >= sizeof line){
+			p = q;
+			continue;
+		}
+		strcpy(line, p);
+		memset(f, 0, sizeof f);
+		if(tokenize(line, f, nelem(f)) < 3){
+			p = q;
+			continue;
+		}
+		if(strcmp(f[0], name) == 0){
+			*start = strtoull(f[1], 0, 0);
+			*end = strtoull(f[2], 0, 0);
+			return 0;
+		}
+		p = q;
+	}
+	return -1;
+}
+
+static void
+diskarenatable(HConnect *c, char *disk, char *table)
+{
+	char *p, *q;
+	int i, nline;
+	char *f[4], line[256], base[256];
+
+	hprint(&c->hout, "<h2>table</h2>\n");
+	hprint(&c->hout, "<pre>\n");
+	nline = atoi(table);
+	snprint(base, sizeof base, "/disk?disk=%s&type=a", disk);
+	p = strchr(table, '\n');
+	if(p)
+		p++;
+	for(i=0; i<nline; i++){
+		if(p == nil){
+			hprint(&c->hout, "<b><i>unexpected end of table</i></b>\n");
+			break;
+		}
+		q = strchr(p, '\n');
+		if(q)
+			*q++ = 0;
+		if(strlen(p) >= sizeof line){
+			hprint(&c->hout, "%s\n", p);
+			p = q;
+			continue;
+		}
+		strcpy(line, p);
+		memset(f, 0, sizeof f);
+		if(tokenize(line, f, 3) < 3){
+			hprint(&c->hout, "%s\n", p);
+			p = q;
+			continue;
+		}
+		p = q;
+		hprint(&c->hout, "<a href=\"%s&arena=%s\">%s</a> %s %s\n",
+			base, f[0], f[0], f[1], f[2]);
+	}
+	hprint(&c->hout, "</pre>\n");
+}
+
+static char*
+fmttime(char *buf, ulong time)
+{
+	strcpy(buf, ctime(time));
+	buf[28] = 0;
+	return buf;
+}
+
+
+static int diskarenaclump(HConnect*, Arena*, vlong, char*);
+static int diskarenatoc(HConnect*, Arena*);
+
+static int
+diskarenapart(HConnect *c, char *disk, Part *p)
+{
+	char *arenaname;
+	ArenaPart ap;
+	ArenaHead head;
+	Arena arena;
+	char *table;
+	char *score;
+	char *clump;
+	uchar *blk;
+	vlong start, end, off;
+	char tbuf[60];
+
+	hprint(&c->hout, "<h1>arena partition %s</h1>\n", disk);
+
+	if((table = readap(p, &ap)) == nil){
+		hprint(&c->hout, "%r\n");
+		goto out;
+	}
+	
+	hprint(&c->hout, "<pre>\n");
+	hprint(&c->hout, "version=%d blocksize=%d base=%d\n",
+		ap.version, ap.blocksize, ap.arenabase);
+	hprint(&c->hout, "</pre>\n");
+
+	arenaname = hargstr(c, "arena", "");
+	if(arenaname[0] == 0){
+		diskarenatable(c, disk, table);
+		goto out;
+	}
+	
+	if(xfindarena(table, arenaname, &start, &end) < 0){
+		hprint(&c->hout, "no such arena %s\n", arenaname);
+		goto out;
+	}
+	
+	hprint(&c->hout, "<h2>arena %s</h2>\n", arenaname);
+	hprint(&c->hout, "<pre>start=%#llx end=%#llx<pre>\n", start, end);
+	if(end < start || end - start < HeadSize){
+		hprint(&c->hout, "bad size %#llx\n", end - start);
+		goto out;
+	}
+
+	// read arena header, tail
+	blk = vtmalloc(HeadSize);
+	if(readpart(p, start, blk, HeadSize) != HeadSize){
+		hprint(&c->hout, "reading header: %r\n");
+		vtfree(blk);
+		goto out;
+	}
+	if(unpackarenahead(&head, blk) < 0){
+		hprint(&c->hout, "corrupt arena header: %r\n");
+		// hhex(blk, HeadSize);
+		vtfree(blk);
+		goto out;
+	}
+	vtfree(blk);
+
+	hprint(&c->hout, "head:\n<pre>\n");
+	hprint(&c->hout, "version=%d name=%s blocksize=%d size=%#llx clumpmagic=%#ux\n",
+		head.version, head.name, head.blocksize, head.size, 
+		head.clumpmagic);
+	hprint(&c->hout, "</pre><br><br>\n");
+
+	if(head.blocksize > MaxIoSize || head.blocksize >= end - start){
+		hprint(&c->hout, "corrupt block size %d\n", head.blocksize);
+		goto out;
+	}
+
+	blk = vtmalloc(head.blocksize);
+	if(readpart(p, end - head.blocksize, blk, head.blocksize) < 0){
+		hprint(&c->hout, "reading tail: %r\n");
+		vtfree(blk);
+		goto out;
+	}
+	memset(&arena, 0, sizeof arena);
+	arena.part = p;
+	arena.blocksize = head.blocksize;
+	arena.clumpmax = head.blocksize / ClumpInfoSize;
+	arena.base = start + head.blocksize;
+	arena.size = end - start - 2 * head.blocksize;
+	if(unpackarena(&arena, blk) < 0){
+		vtfree(blk);
+		goto out;
+	}
+	scorecp(arena.score, blk+head.blocksize - VtScoreSize);
+
+	vtfree(blk);
+	
+	hprint(&c->hout, "tail:\n<pre>\n");
+	hprint(&c->hout, "version=%d name=%s\n", arena.version, arena.name);
+	hprint(&c->hout, "ctime=%d %s\n", arena.ctime, fmttime(tbuf, arena.ctime));
+	hprint(&c->hout, "wtime=%d %s\n", arena.wtime, fmttime(tbuf, arena.wtime));
+	hprint(&c->hout, "clumpmagic=%#ux\n", arena.clumpmagic);
+	hprint(&c->hout, "score %V\n", arena.score);
+	hprint(&c->hout, "diskstats:\n");
+	hprint(&c->hout, "\tclumps=%,d cclumps=%,d used=%,lld uncsize=%,lld sealed=%d\n",
+		arena.diskstats.clumps, arena.diskstats.cclumps,
+		arena.diskstats.used, arena.diskstats.uncsize,
+		arena.diskstats.sealed);
+	hprint(&c->hout, "memstats:\n");
+	hprint(&c->hout, "\tclumps=%,d cclumps=%,d used=%,lld uncsize=%,lld sealed=%d\n",
+		arena.memstats.clumps, arena.memstats.cclumps,
+		arena.memstats.used, arena.memstats.uncsize,
+		arena.memstats.sealed);
+	if(arena.clumpmax == 0){
+		hprint(&c->hout, "bad clumpmax\n");
+		goto out;
+	}
+
+	score = hargstr(c, "score", "");
+	clump = hargstr(c, "clump", "");
+
+	if(clump[0]){
+		off = strtoull(clump, 0, 0);
+		diskarenaclump(c, &arena, off, score[0] ? score : nil);
+	}else if(score[0]){
+		diskarenaclump(c, &arena, -1, score);
+	}else{
+		diskarenatoc(c, &arena);
+	}
+
+out:
+	free(table);
+	return 0;
+}
+
+static vlong
+findintoc(HConnect *c, Arena *arena, uchar *score)
+{
+	uchar *blk;
+	int i;
+	vlong off;
+	vlong coff;
+	ClumpInfo ci;
+
+	blk = vtmalloc(arena->blocksize);
+	off = arena->base + arena->size;
+	coff = 0;
+	for(i=0; i<arena->memstats.clumps; i++){
+		if(i%arena->clumpmax == 0){
+			off -= arena->blocksize;
+			if(readpart(arena->part, off, blk, arena->blocksize) != arena->blocksize){
+				if(c)
+					hprint(&c->hout, "<i>clump info directory at %#llx: %r</i>\n<br>\n",
+						off);
+				break;
+			}
+		}
+		unpackclumpinfo(&ci, blk+(i%arena->clumpmax)*ClumpInfoSize);
+		if(scorecmp(ci.score, score) == 0){
+			vtfree(blk);
+			return coff;
+		}
+		coff += ClumpSize + ci.size;
+	}
+	vtfree(blk);
+	return -1;
+}
+
+
+static int
+diskarenatoc(HConnect *c, Arena *arena)
+{
+	uchar *blk;
+	int i;
+	vlong off;
+	vlong coff;
+	ClumpInfo ci;
+	char base[512];
+	int cib;
+
+	snprint(base, sizeof base, "/disk?disk=%s&type=a&arena=%s",
+		arena->part->name, arena->name);
+
+	blk = vtmalloc(arena->blocksize);
+	off = arena->base + arena->size;
+	hprint(&c->hout, "<h2>table of contents</h2>\n");
+	hprint(&c->hout, "<pre>\n");
+	hprint(&c->hout, "%5s %6s %7s %s\n", "type", "size", "uncsize", "score");
+	coff = 0;
+	cib = hargint(c, "cib", 0);
+
+	for(i=0; i<arena->memstats.clumps; i++){
+		if(i%arena->clumpmax == 0){
+			off -= arena->blocksize;
+			if(readpart(arena->part, off, blk, arena->blocksize) != arena->blocksize){
+				hprint(&c->hout, "<i>clump info directory at %#llx: %r</i>\n<br>\n",
+					off);
+				i += arena->clumpmax-1;
+				coff = -1;
+				continue;
+			}
+		}
+		unpackclumpinfo(&ci, blk+(i%arena->clumpmax)*ClumpInfoSize);
+		if(i/arena->clumpmax == cib || i%arena->clumpmax == 0){
+			hprint(&c->hout, "%5d %6d %7d %V", 
+				ci.type, ci.size, ci.uncsize, ci.score);
+			if(coff >= 0)
+				hprint(&c->hout, " at <a href=\"%s&clump=%#llx&score=%V\">%#llx</a>", 
+					base, coff, ci.score, coff);
+			if(i/arena->clumpmax != cib)
+				hprint(&c->hout, "  <font size=-1><a href=\"%s&cib=%d\">more</a></font>", base, i/arena->clumpmax);
+			hprint(&c->hout, "\n");
+		}
+		if(coff >= 0)
+			coff += ClumpSize + ci.size;
+	}
+	hprint(&c->hout, "</pre>\n");
+	return 0;
+}
+
+#define	U32GET(p)	((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
+static int
+diskarenaclump(HConnect *c, Arena *arena, vlong off, char *scorestr)
+{
+	uchar *blk, *blk2;
+	Clump cl;
+	char err[ERRMAX];
+	uchar xscore[VtScoreSize], score[VtScoreSize];
+	Unwhack uw;
+	int n;
+	
+	if(scorestr){
+		if(vtparsescore(scorestr, nil, score) < 0){
+			hprint(&c->hout, "bad score %s: %r\n", scorestr);
+			return -1;
+		}
+		if(off < 0){
+			off = findintoc(c, arena, score);
+			if(off < 0){
+				hprint(&c->hout, "score %V not found in arena %s\n", score, arena->name);
+				return -1;
+			}
+			hprint(&c->hout, "score %V at %#llx\n", score, off);
+		}
+	}else
+		memset(score, 0, sizeof score);
+
+	if(off < 0){
+		hprint(&c->hout, "bad offset %#llx\n", off);
+		return -1;
+	}
+	
+	off += arena->base;
+
+	blk = vtmalloc(ClumpSize + VtMaxLumpSize);
+	if(readpart(arena->part, off, blk, ClumpSize + VtMaxLumpSize) != ClumpSize + VtMaxLumpSize){
+		hprint(&c->hout, "reading at %#llx: %r\n", off);
+		vtfree(blk);
+		return -1;
+	}
+
+	if(unpackclump(&cl, blk, arena->clumpmagic) < 0){
+		hprint(&c->hout, "unpackclump: %r\n<br>");
+		rerrstr(err, sizeof err);
+		if(strstr(err, "magic")){
+			hprint(&c->hout, "trying again with magic=%#ux<br>\n", U32GET(blk));
+			if(unpackclump(&cl, blk, U32GET(blk)) < 0){
+				hprint(&c->hout, "unpackclump: %r\n<br>\n");
+				goto error;
+			}
+		}else
+			goto error;
+	}
+
+	hprint(&c->hout, "<pre>type=%d size=%d uncsize=%d score=%V\n", cl.info.type, cl.info.size, cl.info.uncsize, cl.info.score);
+	hprint(&c->hout, "encoding=%d creator=%d time=%d %s</pre>\n", cl.encoding, cl.creator, cl.time, fmttime(err, cl.time));
+	
+	if(cl.info.type == VtCorruptType)
+		hprint(&c->hout, "clump is marked corrupt<br>\n");
+	
+	if(cl.info.size >= VtMaxLumpSize){
+		hprint(&c->hout, "clump too big\n");
+		goto error;
+	}
+	
+	switch(cl.encoding){
+	case ClumpECompress:
+		blk2 = vtmalloc(VtMaxLumpSize);
+		unwhackinit(&uw);
+		n = unwhack(&uw, blk2, cl.info.uncsize, blk+ClumpSize, cl.info.size);
+		if(n < 0){
+			hprint(&c->hout, "decompression failed\n");
+			vtfree(blk2);
+			goto error;
+		}
+		if(n != cl.info.uncsize){
+			hprint(&c->hout, "got wrong amount: %d wanted %d\n", n, cl.info.uncsize);
+			// hhex(blk2, n);
+			vtfree(blk2);
+			goto error;
+		}
+		scoremem(xscore, blk2, cl.info.uncsize);
+		vtfree(blk2);
+		break;
+	case ClumpENone:
+		scoremem(xscore, blk+ClumpSize, cl.info.size);
+		break;
+	}
+	
+	hprint(&c->hout, "score=%V<br>\n", xscore);
+	if(scorestr && scorecmp(score, xscore) != 0)
+		hprint(&c->hout, "score does NOT match expected %V\n", score);
+
+	vtfree(blk);
+	return 0;
+
+error:
+	// hhex(blk, ClumpSize + VtMaxLumpSize);
+	vtfree(blk);
+	return -1;
+}
+
+static int
+diskbloom(HConnect *c, char *disk, Part *p)
+{
+	USED(c);
+	USED(disk);
+	USED(p);
+	return 0;
+}
+
+static int
+diskisect(HConnect *c, char *disk, Part *p)
+{
+	USED(c);
+	USED(disk);
+	USED(p);
+	return 0;
+}
+
+static void
+debugamap(HConnect *c)
+{
+	int i;
+	AMap *amap;
+
+	hprint(&c->hout, "<h2>arena map</h2>\n");
+	hprint(&c->hout, "<pre>\n");
+
+	amap = mainindex->amap;
+	for(i=0; i<mainindex->narenas; i++)
+		hprint(&c->hout, "%s %#llx %#llx\n", 
+			amap[i].name, amap[i].start, amap[i].stop);
+}
+
+static void
+debugread(HConnect *c, u8int *score)
+{
+	int type;
+	Lump *u;
+	IAddr ia;
+	IEntry ie;
+	int i, rac;
+	Arena *arena;
+	u64int aa;
+	ZBlock *zb;
+	Clump cl;
+	vlong off;
+	u8int sc[VtScoreSize];
+
+	if(scorecmp(score, zeroscore) == 0){
+		hprint(&c->hout, "zero score\n");
+		return;
+	}
+	
+	hprint(&c->hout, "<h2>index search %V</h2><pre>\n", score);
+	if(_lookupscore(score, -1, &ia, nil) < 0)
+		hprint(&c->hout, "  icache: not found\n");
+	else
+		hprint(&c->hout, "  icache: addr=%#llx size=%d type=%d blocks=%d\n",
+			ia.addr, ia.size, ia.type, ia.blocks);
+	
+	if(loadientry(mainindex, score, -1, &ie) < 0)
+		hprint(&c->hout, "  idisk: not found\n");
+	else
+		hprint(&c->hout, "  idisk: addr=%#llx size=%d type=%d blocks=%d\n",
+			ie.ia.addr, ie.ia.size, ie.ia.type, ie.ia.blocks);
+	
+	hprint(&c->hout, "</pre><h2>lookup %V</h2>\n", score);
+	hprint(&c->hout, "<pre>\n");
+	
+	for(type=0; type < VtMaxType; type++){
+		hprint(&c->hout, "%V type %d:", score, type);
+		u = lookuplump(score, type);
+		if(u->data != nil)
+			hprint(&c->hout, " +cache");
+		else
+			hprint(&c->hout, " -cache");
+		putlump(u);
+		
+		if(lookupscore(score, type, &ia, &rac) < 0){
+			hprint(&c->hout, " -lookup\n");
+			continue;
+		}
+		hprint(&c->hout, "\n  lookupscore: addr=%#llx size=%d blocks=%d rac=%d\n",
+			ia.addr, ia.size, ia.blocks, rac);
+		
+		arena = amapitoa(mainindex, ia.addr, &aa);
+		if(arena == nil){
+			hprint(&c->hout, "  amapitoa failed: %r\n");
+			continue;
+		}
+
+		hprint(&c->hout, "  amapitoa: aa=%#llx arena="
+			"<a href=\"/disk?disk=%s&type=a&arena=%s&score=%V\">%s</a>\n",
+			aa, arena->part->name, arena->name, score, arena->name);
+		zb = loadclump(arena, aa, ia.blocks, &cl, sc, 1);
+		if(zb == nil){
+			hprint(&c->hout, "  loadclump failed: %r\n");
+			continue;
+		}
+		
+		hprint(&c->hout, "  loadclump: uncsize=%d type=%d score=%V\n",
+			cl.info.uncsize, cl.info.type, sc);
+		if(ia.size != cl.info.uncsize || ia.type != cl.info.type || scorecmp(score, sc) != 0){
+			hprint(&c->hout, "    clump info mismatch\n");
+			continue;
+		}
+	}
+	
+	if(hargstr(c, "brute", "")[0] == 'y'){
+		hprint(&c->hout, "</pre>\n");
+		hprint(&c->hout, "<h2>brute force arena search %V</h2>\n", score);
+		hprint(&c->hout, "<pre>\n");
+		
+		for(i=0; i<mainindex->narenas; i++){
+			arena = mainindex->arenas[i];
+			hprint(&c->hout, "%s...\n", arena->name);
+			hflush(&c->hout);
+			off = findintoc(nil, arena, score);
+			if(off >= 0)
+				hprint(&c->hout, "%s %#llx (%#llx)\n", arena->name, off, mainindex->amap[i].start + off);
+		}
+	}
+
+	hprint(&c->hout, "</pre>\n");
+}
+
+static void
+debugmem(HConnect *c)
+{
+	Index *ix;
+	
+	ix = mainindex;
+	hprint(&c->hout, "<h2>memory</h2>\n");
+	
+	hprint(&c->hout, "<pre>\n");
+	hprint(&c->hout, "ix=%p\n", ix);
+	hprint(&c->hout, "\tarenas=%p\n", ix->arenas);
+	if(ix->narenas > 0)
+		hprint(&c->hout, "\tarenas[...] = %p...%p\n", ix->arenas[0], ix->arenas[ix->narenas-1]);
+	hprint(&c->hout, "\tsmap=%p\n", ix->smap);
+	hprint(&c->hout, "\tamap=%p\n", ix->amap);
+	hprint(&c->hout, "\tbloom=%p\n", ix->bloom);
+	hprint(&c->hout, "\tbloom->data=%p\n", ix->bloom ? ix->bloom->data : nil);
+	hprint(&c->hout, "\tisects=%p\n", ix->sects);
+	if(ix->nsects > 0)
+		hprint(&c->hout, "\tsects[...] = %p...%p\n", ix->sects[0], ix->sects[ix->nsects-1]);
+}
+
+int
+hdebug(HConnect *c)
+{
+	char *scorestr, *op;
+	u8int score[VtScoreSize];
+	
+	if(hsethtml(c) < 0)
+		return -1;
+	hprint(&c->hout, "<h1>venti debug</h1>\n");
+
+	op = hargstr(c, "op", "");
+	if(!op[0]){
+		hprint(&c->hout, "no op\n");
+		return 0;
+	}
+	
+	if(strcmp(op, "amap") == 0){
+		debugamap(c);
+		return 0;
+	}
+
+	if(strcmp(op, "mem") == 0){
+		debugmem(c);
+		return 0;
+	}
+
+	if(strcmp(op, "read") == 0){
+		scorestr = hargstr(c, "score", "");
+		if(vtparsescore(scorestr, nil, score) < 0){
+			hprint(&c->hout, "bad score %s: %r\n", scorestr);
+			return 0;
+		}
+		debugread(c, score);
+		return 0;
+	}
+	
+	hprint(&c->hout, "unknown op %s", op);
+	return 0;
+}

Some files were not shown because too many files changed in this diff