Browse Source

Plan 9 from Bell Labs 2006-01-26

David du Colombier 18 years ago
parent
commit
84ec658f21

+ 9 - 41
dist/replica/_plan9.db

@@ -231,9 +231,9 @@
 386/bin/fortune - 775 sys sys 1135570810 67113
 386/bin/fossil - 20000000775 sys sys 1042005470 0
 386/bin/fossil/conf - 775 sys sys 1085077052 1506
-386/bin/fossil/flchk - 775 sys sys 1135570810 237600
-386/bin/fossil/flfmt - 775 sys sys 1135570811 245747
-386/bin/fossil/fossil - 775 sys sys 1135570812 360522
+386/bin/fossil/flchk - 775 sys sys 1138211975 237734
+386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
+386/bin/fossil/fossil - 775 sys sys 1138211977 360656
 386/bin/fossil/last - 775 sys sys 1135570812 63280
 386/bin/freq - 775 sys sys 1136397208 61797
 386/bin/fs - 20000000775 sys sys 954380769 0
@@ -567,7 +567,7 @@
 386/lib/libstdio.a - 664 sys sys 1115950159 126206
 386/lib/libsunrpc.a - 664 sys sys 1115950160 355994
 386/lib/libthread.a - 664 sys sys 1135531448 71308
-386/lib/libventi.a - 664 sys sys 1124766772 97708
+386/lib/libventi.a - 664 sys sys 1138211977 98048
 386/mbr - 775 sys sys 1131317338 407
 386/mkfile - 664 sys sys 948141303 46
 386/pbs - 775 sys sys 1131317339 494
@@ -5391,7 +5391,7 @@ power/mkfile - 664 sys sys 948141304 46
 rc - 20000000775 sys sys 944959447 0
 rc/bin - 20000000775 sys sys 1018637942 0
 rc/bin/9fat: - 775 sys sys 1133179689 367
-rc/bin/9fs - 775 sys sys 1079969823 948
+rc/bin/9fs - 775 sys sys 1138240042 1027
 rc/bin/B - 775 sys sys 945617206 645
 rc/bin/C - 775 sys sys 1127395076 855
 rc/bin/Kill - 775 sys sys 1018637942 115
@@ -5591,46 +5591,35 @@ sparc64/lib - 20000000775 sys sys 1114458535 0
 sparc64/mkfile - 664 sys sys 1114458667 46
 sys - 20000000775 sys sys 952648870 0
 sys/doc - 20000000775 sys sys 1018471272 0
-sys/doc/-.2669382.gif - 664 sys sys 1019969850 2078
 sys/doc/8½ - 20000000775 sys sys 945616779 0
-sys/doc/8½/8½.html - 664 sys sys 1020895860 33484
 sys/doc/8½/8½.ms - 664 sys sys 1020895859 31593
 sys/doc/8½/8½.ps - 664 sys sys 1020895860 797150
 sys/doc/8½/fig1.ps - 664 sys sys 1020895859 473747
 sys/doc/8½/mkfile - 664 sys sys 1020895860 215
-sys/doc/9.html - 664 sys sys 1136302690 87619
 sys/doc/9.ms - 664 sys sys 953237044 84632
 sys/doc/9.ps - 664 sys sys 960837924 508340
-sys/doc/acid.html - 664 sys sys 1091459045 71723
 sys/doc/acid.ms - 664 sys sys 1127411211 65062
 sys/doc/acid.ps - 664 sys sys 1015012454 426359
-sys/doc/acidpaper.html - 664 sys sys 1091459045 46880
 sys/doc/acidpaper.ms - 664 sys sys 952880777 44805
 sys/doc/acidpaper.ps - 664 sys sys 960837913 359639
 sys/doc/acme - 20000000775 sys sys 945616779 0
 sys/doc/acme/acme.fig1 - 664 sys sys 944959632 164559
 sys/doc/acme/acme.fig2 - 664 sys sys 944959632 56026
-sys/doc/acme/acme.html - 664 sys sys 1020013936 51769
 sys/doc/acme/acme.ms - 664 sys sys 952880782 49851
 sys/doc/acme/acme.pdf - 664 sys sys 1020384351 117006
 sys/doc/acme/acme.ps - 664 sys sys 960837907 611301
 sys/doc/acme/bs - 664 sys sys 944959634 556
 sys/doc/acme/mkfile - 664 sys sys 961259926 304
-sys/doc/ape.html - 664 sys sys 1091459042 14415
 sys/doc/ape.ms - 664 sys sys 953344517 12595
 sys/doc/ape.ps - 664 sys sys 960837914 258779
-sys/doc/asm.html - 664 sys sys 1091459045 31098
 sys/doc/asm.ms - 664 sys sys 958247686 28542
 sys/doc/asm.ps - 664 sys sys 960837915 322051
-sys/doc/auth.html - 664 sys sys 1091459079 77445
 sys/doc/auth.ms - 664 sys sys 1021579975 66803
 sys/doc/auth.ps - 664 sys sys 1021579976 451672
 sys/doc/cleanps - 775 sys sys 961259933 184
 sys/doc/colophon.ps - 664 sys sys 960837922 214122
-sys/doc/comp.html - 664 sys sys 1091459046 42144
 sys/doc/comp.ms - 664 sys sys 954266992 37792
 sys/doc/comp.ps - 664 sys sys 960837915 345786
-sys/doc/compiler.html - 664 sys sys 1091459044 32960
 sys/doc/compiler.ms - 664 sys sys 1067721142 30279
 sys/doc/compiler.ps - 664 sys sys 1091459054 309735
 sys/doc/contents.ms - 664 sys sys 1019916701 4920
@@ -5641,7 +5630,6 @@ sys/doc/fossil.ms - 664 sys sys 1063856349 31400
 sys/doc/fossil.pdf - 664 sys sys 1042123169 63200
 sys/doc/fossil.ps - 664 sys sys 1135487951 313552
 sys/doc/fs - 20000000775 sys sys 945616779 0
-sys/doc/fs/fs.html - 664 sys sys 1020013937 21345
 sys/doc/fs/fs.pdf - 664 sys sys 1020384351 47177
 sys/doc/fs/fs.ps - 664 sys sys 960837905 276918
 sys/doc/fs/mkfile - 664 sys sys 961259926 282
@@ -5656,7 +5644,6 @@ sys/doc/fs/p7 - 664 sys sys 953844581 958
 sys/doc/fs/p8 - 664 sys sys 953844574 881
 sys/doc/fs/xx - 664 sys sys 944959592 65957
 sys/doc/il - 20000000775 sys sys 945616779 0
-sys/doc/il/il.html - 664 sys sys 1020013937 12278
 sys/doc/il/il.ms - 664 sys sys 952880783 11367
 sys/doc/il/il.pdf - 664 sys sys 1020384351 44630
 sys/doc/il/il.ps - 664 sys sys 960837905 258028
@@ -5666,29 +5653,21 @@ sys/doc/il/transition.fig - 664 sys sys 944959591 15431
 sys/doc/il/transition.pic - 664 sys sys 944959591 11912
 sys/doc/il/xx - 664 sys sys 944959591 48924
 sys/doc/index.htm - 664 sys sys 1019916696 6906
-sys/doc/index.html - 664 sys sys 1020082751 6906
-sys/doc/lexnames.html - 664 sys sys 1091459043 37261
 sys/doc/lexnames.ms - 664 sys sys 954383595 34046
 sys/doc/lexnames.ps - 664 sys sys 960837909 335546
-sys/doc/libmach.html - 664 sys sys 1091459044 26650
 sys/doc/libmach.ms - 664 sys sys 1021579974 24145
 sys/doc/libmach.ps - 664 sys sys 960837916 291283
-sys/doc/lp.html - 664 sys sys 1091459043 21721
 sys/doc/lp.ms - 664 sys sys 954614673 22366
 sys/doc/lp.ps - 664 sys sys 960837917 294399
-sys/doc/mk.html - 664 sys sys 1091459046 40021
 sys/doc/mk.ms - 664 sys sys 952880779 34413
 sys/doc/mk.ps - 664 sys sys 960837917 329779
 sys/doc/mkfile - 664 sys sys 1091459055 4411
-sys/doc/mkfiles.html - 664 sys sys 1091459045 17750
 sys/doc/mkfiles.ms - 664 sys sys 952880779 17888
 sys/doc/mkfiles.ps - 664 sys sys 960837918 269496
-sys/doc/names.html - 664 sys sys 1091459046 23529
 sys/doc/names.ms - 664 sys sys 954269607 22103
 sys/doc/names.ps - 664 sys sys 960837918 288716
 sys/doc/net - 20000000775 sys sys 954033300 0
 sys/doc/net/mkfile - 664 sys sys 961259927 345
-sys/doc/net/net.html - 664 sys sys 1020013937 43449
 sys/doc/net/net.ms - 664 sys sys 952880783 41191
 sys/doc/net/net.pdf - 664 sys sys 1020384351 82603
 sys/doc/net/net.ps - 664 sys sys 960837908 350053
@@ -5696,25 +5675,19 @@ sys/doc/net/tree - 664 sys sys 944959636 866
 sys/doc/net/tree.pout - 664 sys sys 944959636 1755
 sys/doc/network.art - 664 sys sys 944959651 2260
 sys/doc/network.pic - 664 sys sys 944959651 4124
-sys/doc/plumb.html - 664 sys sys 1091459044 55783
 sys/doc/plumb.ms - 664 sys sys 954383596 53250
 sys/doc/plumb.ps - 664 sys sys 960837910 391830
-sys/doc/port.html - 664 sys sys 1091459042 18123
 sys/doc/port.ms - 664 sys sys 1020111393 16694
 sys/doc/port.ps - 664 sys sys 1020111394 268901
 sys/doc/preamble - 664 sys sys 961259497 203475
 sys/doc/prfile - 775 sys sys 944959668 3782
-sys/doc/prog4.html - 664 sys sys 1091459046 18653
 sys/doc/prog4.ms - 664 sys sys 1019932830 16485
 sys/doc/prog4.ps - 664 sys sys 1019932831 271581
 sys/doc/ps - 664 sys sys 944959649 1739
-sys/doc/rc.html - 664 sys sys 1091459044 41702
 sys/doc/rc.ms - 664 sys sys 1063856321 34373
 sys/doc/rc.ps - 664 sys sys 960837920 337955
-sys/doc/release3.html - 664 sys sys 1019922810 6329
 sys/doc/release3.ms - 664 sys sys 961261276 5492
 sys/doc/release3.ps - 664 sys sys 961261277 230682
-sys/doc/release4.html - 664 sys sys 1091459042 6473
 sys/doc/release4.ms - 664 sys sys 1063856338 5160
 sys/doc/release4.ps - 664 sys sys 1091459052 230868
 sys/doc/sam - 20000000775 sys sys 945617037 0
@@ -5731,24 +5704,19 @@ sys/doc/sam/fig6.pic - 464 sys sys 944959644 1105
 sys/doc/sam/fig7.pic - 464 sys sys 944959644 258
 sys/doc/sam/mkfile - 664 sys sys 961259927 574
 sys/doc/sam/refs - 464 sys sys 944959644 2652
-sys/doc/sam/sam.html - 664 sys sys 1020013938 100660
 sys/doc/sam/sam.ms - 464 sys sys 954266468 94536
 sys/doc/sam/sam.pdf - 664 sys sys 1020384352 156123
 sys/doc/sam/sam.ps - 664 sys sys 960837910 707546
 sys/doc/sam/sam.tut - 464 sys sys 944959644 40481
-sys/doc/sleep.html - 664 sys sys 1091459043 16602
 sys/doc/sleep.ms - 664 sys sys 953237030 15206
 sys/doc/sleep.ps - 664 sys sys 960837920 263882
-sys/doc/spin.html - 664 sys sys 1091459042 75873
 sys/doc/spin.ms - 664 sys sys 953344522 67475
 sys/doc/spin.ps - 664 sys sys 960837923 443064
 sys/doc/title - 664 sys sys 1018974170 740
 sys/doc/title.ps - 664 sys sys 1018974170 214289
 sys/doc/trademarks.ps - 664 sys sys 960837912 217896
-sys/doc/troff.html - 664 sys sys 1019922811 110932
 sys/doc/troff.ms - 664 sys sys 953237047 120683
 sys/doc/troff.ps - 664 sys sys 1091459051 782310
-sys/doc/utf.html - 664 sys sys 1091459044 43965
 sys/doc/utf.ms - 664 sys sys 952880781 41659
 sys/doc/utf.ps - 664 sys sys 960837922 363085
 sys/doc/venti - 20000000775 sys sys 1019852318 0
@@ -5763,7 +5731,7 @@ sys/doc/venti/emelie.gif - 664 sys sys 1019852316 5004
 sys/doc/venti/emelie2.gif - 664 sys sys 1019852317 4357
 sys/doc/venti/mkfile - 664 sys sys 1019965454 79
 sys/doc/venti/probablity.gif - 664 sys sys 1019852317 1244
-sys/doc/venti/venti.html - 664 sys sys 1019852317 55272
+sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
 sys/doc/venti/venti.pdf - 664 sys sys 1020384352 139090
 sys/doc/venti/venti.ps - 664 sys sys 1019852320 2012620
 sys/games - 20000000775 sys sys 952648872 0
@@ -7596,7 +7564,7 @@ sys/man/3/segment - 664 sys sys 1017423721 2378
 sys/man/3/srv - 664 sys sys 958419690 1470
 sys/man/3/ssl - 664 sys sys 1018386776 3413
 sys/man/3/tls - 664 sys sys 1045501496 7018
-sys/man/3/uart - 664 sys sys 1102093395 1710
+sys/man/3/uart - 664 sys sys 1138191356 2003
 sys/man/3/usb - 664 sys sys 1126971427 6960
 sys/man/3/vga - 664 sys sys 1131301005 4957
 sys/man/4 - 20000000775 sys sys 1018581459 0
@@ -7682,7 +7650,7 @@ sys/man/6/plot - 664 sys sys 944959679 6739
 sys/man/6/plumb - 664 sys sys 969499892 10918
 sys/man/6/regexp - 664 sys sys 954089523 2050
 sys/man/6/rewrite - 664 sys sys 969499892 3235
-sys/man/6/smtpd - 664 sys sys 971095216 8192
+sys/man/6/smtpd - 664 sys sys 1138191586 8178
 sys/man/6/snap - 664 sys sys 1132452694 2402
 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
 sys/man/6/users - 664 sys sys 1130912014 1392
@@ -15530,7 +15498,7 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
 sys/src/libventi - 20000000775 sys sys 947360466 0
-sys/src/libventi/client.c - 664 sys sys 1121977166 5362
+sys/src/libventi/client.c - 664 sys sys 1138191441 5493
 sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
 sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
 sys/src/libventi/fatal.c - 664 sys sys 1084468118 225

+ 9 - 41
dist/replica/plan9.db

@@ -231,9 +231,9 @@
 386/bin/fortune - 775 sys sys 1135570810 67113
 386/bin/fossil - 20000000775 sys sys 1042005470 0
 386/bin/fossil/conf - 775 sys sys 1085077052 1506
-386/bin/fossil/flchk - 775 sys sys 1135570810 237600
-386/bin/fossil/flfmt - 775 sys sys 1135570811 245747
-386/bin/fossil/fossil - 775 sys sys 1135570812 360522
+386/bin/fossil/flchk - 775 sys sys 1138211975 237734
+386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
+386/bin/fossil/fossil - 775 sys sys 1138211977 360656
 386/bin/fossil/last - 775 sys sys 1135570812 63280
 386/bin/freq - 775 sys sys 1136397208 61797
 386/bin/fs - 20000000775 sys sys 954380769 0
@@ -567,7 +567,7 @@
 386/lib/libstdio.a - 664 sys sys 1115950159 126206
 386/lib/libsunrpc.a - 664 sys sys 1115950160 355994
 386/lib/libthread.a - 664 sys sys 1135531448 71308
-386/lib/libventi.a - 664 sys sys 1124766772 97708
+386/lib/libventi.a - 664 sys sys 1138211977 98048
 386/mbr - 775 sys sys 1131317338 407
 386/mkfile - 664 sys sys 948141303 46
 386/pbs - 775 sys sys 1131317339 494
@@ -5391,7 +5391,7 @@ power/mkfile - 664 sys sys 948141304 46
 rc - 20000000775 sys sys 944959447 0
 rc/bin - 20000000775 sys sys 1018637942 0
 rc/bin/9fat: - 775 sys sys 1133179689 367
-rc/bin/9fs - 775 sys sys 1079969823 948
+rc/bin/9fs - 775 sys sys 1138240042 1027
 rc/bin/B - 775 sys sys 945617206 645
 rc/bin/C - 775 sys sys 1127395076 855
 rc/bin/Kill - 775 sys sys 1018637942 115
@@ -5591,46 +5591,35 @@ sparc64/lib - 20000000775 sys sys 1114458535 0
 sparc64/mkfile - 664 sys sys 1114458667 46
 sys - 20000000775 sys sys 952648870 0
 sys/doc - 20000000775 sys sys 1018471272 0
-sys/doc/-.2669382.gif - 664 sys sys 1019969850 2078
 sys/doc/8½ - 20000000775 sys sys 945616779 0
-sys/doc/8½/8½.html - 664 sys sys 1020895860 33484
 sys/doc/8½/8½.ms - 664 sys sys 1020895859 31593
 sys/doc/8½/8½.ps - 664 sys sys 1020895860 797150
 sys/doc/8½/fig1.ps - 664 sys sys 1020895859 473747
 sys/doc/8½/mkfile - 664 sys sys 1020895860 215
-sys/doc/9.html - 664 sys sys 1136302690 87619
 sys/doc/9.ms - 664 sys sys 953237044 84632
 sys/doc/9.ps - 664 sys sys 960837924 508340
-sys/doc/acid.html - 664 sys sys 1091459045 71723
 sys/doc/acid.ms - 664 sys sys 1127411211 65062
 sys/doc/acid.ps - 664 sys sys 1015012454 426359
-sys/doc/acidpaper.html - 664 sys sys 1091459045 46880
 sys/doc/acidpaper.ms - 664 sys sys 952880777 44805
 sys/doc/acidpaper.ps - 664 sys sys 960837913 359639
 sys/doc/acme - 20000000775 sys sys 945616779 0
 sys/doc/acme/acme.fig1 - 664 sys sys 944959632 164559
 sys/doc/acme/acme.fig2 - 664 sys sys 944959632 56026
-sys/doc/acme/acme.html - 664 sys sys 1020013936 51769
 sys/doc/acme/acme.ms - 664 sys sys 952880782 49851
 sys/doc/acme/acme.pdf - 664 sys sys 1020384351 117006
 sys/doc/acme/acme.ps - 664 sys sys 960837907 611301
 sys/doc/acme/bs - 664 sys sys 944959634 556
 sys/doc/acme/mkfile - 664 sys sys 961259926 304
-sys/doc/ape.html - 664 sys sys 1091459042 14415
 sys/doc/ape.ms - 664 sys sys 953344517 12595
 sys/doc/ape.ps - 664 sys sys 960837914 258779
-sys/doc/asm.html - 664 sys sys 1091459045 31098
 sys/doc/asm.ms - 664 sys sys 958247686 28542
 sys/doc/asm.ps - 664 sys sys 960837915 322051
-sys/doc/auth.html - 664 sys sys 1091459079 77445
 sys/doc/auth.ms - 664 sys sys 1021579975 66803
 sys/doc/auth.ps - 664 sys sys 1021579976 451672
 sys/doc/cleanps - 775 sys sys 961259933 184
 sys/doc/colophon.ps - 664 sys sys 960837922 214122
-sys/doc/comp.html - 664 sys sys 1091459046 42144
 sys/doc/comp.ms - 664 sys sys 954266992 37792
 sys/doc/comp.ps - 664 sys sys 960837915 345786
-sys/doc/compiler.html - 664 sys sys 1091459044 32960
 sys/doc/compiler.ms - 664 sys sys 1067721142 30279
 sys/doc/compiler.ps - 664 sys sys 1091459054 309735
 sys/doc/contents.ms - 664 sys sys 1019916701 4920
@@ -5641,7 +5630,6 @@ sys/doc/fossil.ms - 664 sys sys 1063856349 31400
 sys/doc/fossil.pdf - 664 sys sys 1042123169 63200
 sys/doc/fossil.ps - 664 sys sys 1135487951 313552
 sys/doc/fs - 20000000775 sys sys 945616779 0
-sys/doc/fs/fs.html - 664 sys sys 1020013937 21345
 sys/doc/fs/fs.pdf - 664 sys sys 1020384351 47177
 sys/doc/fs/fs.ps - 664 sys sys 960837905 276918
 sys/doc/fs/mkfile - 664 sys sys 961259926 282
@@ -5656,7 +5644,6 @@ sys/doc/fs/p7 - 664 sys sys 953844581 958
 sys/doc/fs/p8 - 664 sys sys 953844574 881
 sys/doc/fs/xx - 664 sys sys 944959592 65957
 sys/doc/il - 20000000775 sys sys 945616779 0
-sys/doc/il/il.html - 664 sys sys 1020013937 12278
 sys/doc/il/il.ms - 664 sys sys 952880783 11367
 sys/doc/il/il.pdf - 664 sys sys 1020384351 44630
 sys/doc/il/il.ps - 664 sys sys 960837905 258028
@@ -5666,29 +5653,21 @@ sys/doc/il/transition.fig - 664 sys sys 944959591 15431
 sys/doc/il/transition.pic - 664 sys sys 944959591 11912
 sys/doc/il/xx - 664 sys sys 944959591 48924
 sys/doc/index.htm - 664 sys sys 1019916696 6906
-sys/doc/index.html - 664 sys sys 1020082751 6906
-sys/doc/lexnames.html - 664 sys sys 1091459043 37261
 sys/doc/lexnames.ms - 664 sys sys 954383595 34046
 sys/doc/lexnames.ps - 664 sys sys 960837909 335546
-sys/doc/libmach.html - 664 sys sys 1091459044 26650
 sys/doc/libmach.ms - 664 sys sys 1021579974 24145
 sys/doc/libmach.ps - 664 sys sys 960837916 291283
-sys/doc/lp.html - 664 sys sys 1091459043 21721
 sys/doc/lp.ms - 664 sys sys 954614673 22366
 sys/doc/lp.ps - 664 sys sys 960837917 294399
-sys/doc/mk.html - 664 sys sys 1091459046 40021
 sys/doc/mk.ms - 664 sys sys 952880779 34413
 sys/doc/mk.ps - 664 sys sys 960837917 329779
 sys/doc/mkfile - 664 sys sys 1091459055 4411
-sys/doc/mkfiles.html - 664 sys sys 1091459045 17750
 sys/doc/mkfiles.ms - 664 sys sys 952880779 17888
 sys/doc/mkfiles.ps - 664 sys sys 960837918 269496
-sys/doc/names.html - 664 sys sys 1091459046 23529
 sys/doc/names.ms - 664 sys sys 954269607 22103
 sys/doc/names.ps - 664 sys sys 960837918 288716
 sys/doc/net - 20000000775 sys sys 954033300 0
 sys/doc/net/mkfile - 664 sys sys 961259927 345
-sys/doc/net/net.html - 664 sys sys 1020013937 43449
 sys/doc/net/net.ms - 664 sys sys 952880783 41191
 sys/doc/net/net.pdf - 664 sys sys 1020384351 82603
 sys/doc/net/net.ps - 664 sys sys 960837908 350053
@@ -5696,25 +5675,19 @@ sys/doc/net/tree - 664 sys sys 944959636 866
 sys/doc/net/tree.pout - 664 sys sys 944959636 1755
 sys/doc/network.art - 664 sys sys 944959651 2260
 sys/doc/network.pic - 664 sys sys 944959651 4124
-sys/doc/plumb.html - 664 sys sys 1091459044 55783
 sys/doc/plumb.ms - 664 sys sys 954383596 53250
 sys/doc/plumb.ps - 664 sys sys 960837910 391830
-sys/doc/port.html - 664 sys sys 1091459042 18123
 sys/doc/port.ms - 664 sys sys 1020111393 16694
 sys/doc/port.ps - 664 sys sys 1020111394 268901
 sys/doc/preamble - 664 sys sys 961259497 203475
 sys/doc/prfile - 775 sys sys 944959668 3782
-sys/doc/prog4.html - 664 sys sys 1091459046 18653
 sys/doc/prog4.ms - 664 sys sys 1019932830 16485
 sys/doc/prog4.ps - 664 sys sys 1019932831 271581
 sys/doc/ps - 664 sys sys 944959649 1739
-sys/doc/rc.html - 664 sys sys 1091459044 41702
 sys/doc/rc.ms - 664 sys sys 1063856321 34373
 sys/doc/rc.ps - 664 sys sys 960837920 337955
-sys/doc/release3.html - 664 sys sys 1019922810 6329
 sys/doc/release3.ms - 664 sys sys 961261276 5492
 sys/doc/release3.ps - 664 sys sys 961261277 230682
-sys/doc/release4.html - 664 sys sys 1091459042 6473
 sys/doc/release4.ms - 664 sys sys 1063856338 5160
 sys/doc/release4.ps - 664 sys sys 1091459052 230868
 sys/doc/sam - 20000000775 sys sys 945617037 0
@@ -5731,24 +5704,19 @@ sys/doc/sam/fig6.pic - 464 sys sys 944959644 1105
 sys/doc/sam/fig7.pic - 464 sys sys 944959644 258
 sys/doc/sam/mkfile - 664 sys sys 961259927 574
 sys/doc/sam/refs - 464 sys sys 944959644 2652
-sys/doc/sam/sam.html - 664 sys sys 1020013938 100660
 sys/doc/sam/sam.ms - 464 sys sys 954266468 94536
 sys/doc/sam/sam.pdf - 664 sys sys 1020384352 156123
 sys/doc/sam/sam.ps - 664 sys sys 960837910 707546
 sys/doc/sam/sam.tut - 464 sys sys 944959644 40481
-sys/doc/sleep.html - 664 sys sys 1091459043 16602
 sys/doc/sleep.ms - 664 sys sys 953237030 15206
 sys/doc/sleep.ps - 664 sys sys 960837920 263882
-sys/doc/spin.html - 664 sys sys 1091459042 75873
 sys/doc/spin.ms - 664 sys sys 953344522 67475
 sys/doc/spin.ps - 664 sys sys 960837923 443064
 sys/doc/title - 664 sys sys 1018974170 740
 sys/doc/title.ps - 664 sys sys 1018974170 214289
 sys/doc/trademarks.ps - 664 sys sys 960837912 217896
-sys/doc/troff.html - 664 sys sys 1019922811 110932
 sys/doc/troff.ms - 664 sys sys 953237047 120683
 sys/doc/troff.ps - 664 sys sys 1091459051 782310
-sys/doc/utf.html - 664 sys sys 1091459044 43965
 sys/doc/utf.ms - 664 sys sys 952880781 41659
 sys/doc/utf.ps - 664 sys sys 960837922 363085
 sys/doc/venti - 20000000775 sys sys 1019852318 0
@@ -5763,7 +5731,7 @@ sys/doc/venti/emelie.gif - 664 sys sys 1019852316 5004
 sys/doc/venti/emelie2.gif - 664 sys sys 1019852317 4357
 sys/doc/venti/mkfile - 664 sys sys 1019965454 79
 sys/doc/venti/probablity.gif - 664 sys sys 1019852317 1244
-sys/doc/venti/venti.html - 664 sys sys 1019852317 55272
+sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
 sys/doc/venti/venti.pdf - 664 sys sys 1020384352 139090
 sys/doc/venti/venti.ps - 664 sys sys 1019852320 2012620
 sys/games - 20000000775 sys sys 952648872 0
@@ -7596,7 +7564,7 @@ sys/man/3/segment - 664 sys sys 1017423721 2378
 sys/man/3/srv - 664 sys sys 958419690 1470
 sys/man/3/ssl - 664 sys sys 1018386776 3413
 sys/man/3/tls - 664 sys sys 1045501496 7018
-sys/man/3/uart - 664 sys sys 1102093395 1710
+sys/man/3/uart - 664 sys sys 1138191356 2003
 sys/man/3/usb - 664 sys sys 1126971427 6960
 sys/man/3/vga - 664 sys sys 1131301005 4957
 sys/man/4 - 20000000775 sys sys 1018581459 0
@@ -7682,7 +7650,7 @@ sys/man/6/plot - 664 sys sys 944959679 6739
 sys/man/6/plumb - 664 sys sys 969499892 10918
 sys/man/6/regexp - 664 sys sys 954089523 2050
 sys/man/6/rewrite - 664 sys sys 969499892 3235
-sys/man/6/smtpd - 664 sys sys 971095216 8192
+sys/man/6/smtpd - 664 sys sys 1138191586 8178
 sys/man/6/snap - 664 sys sys 1132452694 2402
 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
 sys/man/6/users - 664 sys sys 1130912014 1392
@@ -15530,7 +15498,7 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
 sys/src/libventi - 20000000775 sys sys 947360466 0
-sys/src/libventi/client.c - 664 sys sys 1121977166 5362
+sys/src/libventi/client.c - 664 sys sys 1138191441 5493
 sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
 sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
 sys/src/libventi/fatal.c - 664 sys sys 1084468118 225

+ 46 - 0
dist/replica/plan9.log

@@ -27199,3 +27199,49 @@
 1138120255 0 c sys/src/9/port/devuart.c - 664 sys sys 1138118881 12141
 1138127457 0 c sys/src/9/port/portdat.h - 664 sys sys 1138127259 22540
 1138131057 0 c sys/games/lib/fortunes - 664 sys sys 1138129633 257147
+1138192278 0 c sys/man/3/uart - 664 sys sys 1138191356 2003
+1138192278 1 c sys/man/6/smtpd - 664 sys sys 1138191586 8178
+1138192278 2 c sys/src/libventi/client.c - 664 sys sys 1138191441 5493
+1138212084 0 c 386/bin/fossil/flchk - 775 sys sys 1138211975 237734
+1138212084 1 c 386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
+1138212084 2 c 386/bin/fossil/fossil - 775 sys sys 1138211977 360656
+1138212084 3 c 386/lib/libventi.a - 664 sys sys 1138211977 98048
+1138230089 0 c sys/doc/9.html - 664 sys sys 1138229042 163762
+1138230089 1 c sys/doc/auth.html - 664 sys sys 1138229043 256779
+1138230089 2 a sys/doc/contents.html - 664 sys sys 1138229042 27104
+1138230089 3 c sys/doc/names.html - 664 sys sys 1138229043 49369
+1138233689 0 c sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
+1138233689 1 d sys/doc/utf.html - 664 sys sys 1091459044 0
+1138233689 2 d sys/doc/troff.html - 664 sys sys 1019922811 0
+1138233689 3 d sys/doc/spin.html - 664 sys sys 1091459042 0
+1138233689 4 d sys/doc/sleep.html - 664 sys sys 1091459043 0
+1138233689 5 d sys/doc/sam/sam.html - 664 sys sys 1020013938 0
+1138233689 6 d sys/doc/release4.html - 664 sys sys 1091459042 0
+1138233689 7 d sys/doc/release3.html - 664 sys sys 1019922810 0
+1138233689 8 d sys/doc/rc.html - 664 sys sys 1091459044 0
+1138233689 9 d sys/doc/prog4.html - 664 sys sys 1091459046 0
+1138233689 10 d sys/doc/port.html - 664 sys sys 1091459042 0
+1138233689 11 d sys/doc/plumb.html - 664 sys sys 1091459044 0
+1138233689 12 d sys/doc/net/net.html - 664 sys sys 1020013937 0
+1138233689 13 d sys/doc/names.html - 664 sys sys 1138229043 0
+1138233689 14 d sys/doc/mkfiles.html - 664 sys sys 1091459045 0
+1138233689 15 d sys/doc/mk.html - 664 sys sys 1091459046 0
+1138233689 16 d sys/doc/lp.html - 664 sys sys 1091459043 0
+1138233689 17 d sys/doc/libmach.html - 664 sys sys 1091459044 0
+1138233689 18 d sys/doc/lexnames.html - 664 sys sys 1091459043 0
+1138233689 19 d sys/doc/index.html - 664 sys sys 1020082751 0
+1138233689 20 d sys/doc/il/il.html - 664 sys sys 1020013937 0
+1138233689 21 d sys/doc/fs/fs.html - 664 sys sys 1020013937 0
+1138233689 22 d sys/doc/contents.html - 664 sys sys 1138229042 0
+1138233689 23 d sys/doc/compiler.html - 664 sys sys 1091459044 0
+1138233689 24 d sys/doc/comp.html - 664 sys sys 1091459046 0
+1138233689 25 d sys/doc/auth.html - 664 sys sys 1138229043 0
+1138233689 26 d sys/doc/asm.html - 664 sys sys 1091459045 0
+1138233689 27 d sys/doc/ape.html - 664 sys sys 1091459042 0
+1138233689 28 d sys/doc/acme/acme.html - 664 sys sys 1020013936 0
+1138233689 29 d sys/doc/acidpaper.html - 664 sys sys 1091459045 0
+1138233689 30 d sys/doc/acid.html - 664 sys sys 1091459045 0
+1138233689 31 d sys/doc/9.html - 664 sys sys 1138229042 0
+1138233689 32 d sys/doc/8½/8½.html - 664 sys sys 1020895860 0
+1138233689 33 d sys/doc/-.2669382.gif - 664 sys sys 1019969850 0
+1138240891 0 c rc/bin/9fs - 775 sys sys 1138240042 1027

+ 3 - 1
rc/bin/9fs

@@ -20,7 +20,9 @@ case other
 case juke
 	srv -q il!jukefs && mount /srv/il!jukefs /n/njuke && bind -c /n/njuke/juke /n/juke
 case sources
-	srv -m tcp!sources.cs.bell-labs.com sources /n/sources
+	srv -q tcp!sources.cs.bell-labs.com sources /n/sources &&
+		{ mount /srv/sources /n/sources ||
+		  mount -n /srv/sources /n/sources }
 case sourcesdump
 	9fs sources
 	mount /srv/sources /n/sourcesdump main/archive

BIN
sys/doc/-.2669382.gif


+ 0 - 868
sys/doc/8½/8½.html

@@ -1,868 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>8&#189;, the Plan 9 Window System
-</H1>
-<DL><DD><I>Rob Pike<br>
-rob@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Originally appeared, in a slightly different form, in
-Proc. of the Summer 1991 USENIX Conf.,
-pp. 257-265,
-Nashville.
-Note that
-<TT>8&#189;</TT>
-has been replaced by
-<TT>rio</TT>
-(see
-<A href="/magic/man2html/1/rio"><I>rio</I>(1)).
-</A></I><DT>&#32;<DD></dl>
-<br>
-The Plan 9 window system, 8&#189;, is a modest-sized program of novel design.
-It provides textual I/O and bitmap graphic services to both
-local and remote client programs by offering a multiplexed file service to those clients.
-It serves traditional UNIX files like
-<TT>/dev/tty</TT>
-as well as more unusual ones that provide access to the mouse
-and the raw screen.
-Bitmap graphics operations are provided by serving a file called
-<TT>/dev/bitblt</TT>
-that interprets client messages to perform raster operations.
-The file service that 8&#189; offers its clients is identical to that it uses for
-its own implementation, so it is fundamentally no more than
-a multiplexer.
-This architecture has some rewarding symmetries and can be implemented
-compactly.
-</DL>
-<H4>Introduction
-</H4>
-<P>
-In 1989 I constructed a toy window system from only a few hundred
-lines of source code using a custom language and an unusual architecture
-involving concurrent processes [Pike89].
-Although that system was rudimentary at best, it demonstrated that
-window systems are not inherently complicated.
-The following year, for the new Plan 9 distributed system [Pike92], I applied some of
-the lessons from that toy project to write, in C,
-a production-quality window system
-called 8&#189;.
-8&#189; provides, on black-and-white, grey-scale, or color displays,
-the services required of a modern window system, including
-programmability and support for remote graphics.
-The entire system, including the default program that runs in the 
-window &#173; the equivalent of
-<TT>xterm</TT>
-[Far89] with `cutting and pasting' between windows &#173;
-is well under 90 kilobytes of text on a Motorola 68020 processor, about
-half the size of the
-operating system
-kernel that supports it and a tenth the size of the X server
-[Sche86]
-<I>without</I>
-<TT>xterm</TT>.
-</P>
-<P>
-What makes 8&#189; so compact?  Much of the saving comes from overall simplicity:
-8&#189; has little graphical fanciness, a concise programming interface, and
-a simple, fixed user interface.
-8&#189; also makes some decisions by fiat
-&#173; three-button mouse, overlapping windows, built-in terminal program and
-window manager, etc. &#173;
-rather than trying to appeal to all tastes.
-Although compact, 8&#189; is not ascetic.
-It provides the fundamentals and
-enough extras to make them comfortable to use.
-The most important contributor to its small size, though, is its
-overall design as a file server.
-This structure may be applicable to window systems
-on traditional UNIX-like operating systems.
-</P>
-<P>
-The small size of 8&#189; does not reflect reduced functionality:
-8&#189; provides service roughly equivalent to the X window system.
-8&#189;'s clients may of course be as complex as they choose,
-although the tendency to mimic 8&#189;'s design
-and the clean programming interface means they
-are not nearly as bloated as X applications.
-</P>
-<H4>User's Model
-</H4>
-<P>
-8&#189; turns the single screen, mouse, and keyboard of the terminal
-(in Plan 9 terminology) or workstation (in commercial terminology) into an array
-of independent virtual terminals that may be textual terminals supporting a shell and
-the usual suite of tools
-or graphical applications using the full power of the bitmap screen and mouse.
-Text is represented in UTF, an encoding of the Unicode Standard [Pike93].
-The entire programming interface is provided through
-reading and writing files in
-<TT>/dev</TT>.
-</P>
-<P>
-Primarily for reasons of history and familiarity,
-the general model and appearance of 8&#189; are similar to those of
-<TT>mux</TT>
-[Pike88].
-The right button has a short menu for controlling window creation, destruction,
-and placement.
-When a window is created, it runs the default shell,
-<TT>rc</TT>
-[Duff90], with standard input
-and output directed to the window and accessible through the file
-<TT>/dev/cons</TT>
-(`console'),
-analogous to the
-<TT>/dev/tty</TT>
-of UNIX.
-The name change represents a break with the past: Plan 9 does not provide a
-Teletype-style model of terminals.  8&#189; provides the only way
-most users ever access Plan 9.
-</P>
-<P>
-Graphical applications,
-like ordinary programs,
-may be run by typing their names
-to the shell running in a window.
-This runs the application in the same window;
-to run the application in a new window one may use an external program,
-<TT>window</TT>,
-described below.
-For graphical applications, the virtual terminal model
-is extended somewhat to allow programs to perform graphical operations,
-access the
-mouse, and perform related functions by reading and writing files with
-suggestive names such as
-<TT>/dev/mouse</TT>
-and
-<TT>/dev/window</TT>
-multiplexed per-window
-much like
-<TT>/dev/cons</TT>.
-The implementation and semantics of these files,
-described below, is central to the structure of 8&#189;.
-</P>
-<P>
-The default program that runs in a window is familiar to users of Blit terminals [Pike83].
-It is very similar to that of
-<TT>mux</TT>
-[Pike88], providing mouse-based editing of input and output text,
-the ability to scroll back to see earlier output, and so on.
-It also has a new feature, toggled by typing ESC,
-that enables the user to control when
-typed characters may be read by the shell or application,
-instead of (for example) after each newline.
-This feature makes the window program directly useful for many text-editing
-tasks such as composing mail messages before sending them.
-</P>
-<H4>Plan 9 and 8&#189;
-</H4>
-<P>
-Plan 9 is a distributed system that provides support for UNIX-like applications
-in an environment built from distinct CPU servers, file servers, and terminals
-connected by a variety of networks [Pike90].
-The terminals are comparable to modest workstations that, once connected to a file
-server over a medium-bandwidth network such as Ethernet, are self-sufficient computers
-running a full operating system.
-Unlike workstations, however, their role is just to
-provide an affordable multiplexed user interface to the rest of the system:
-they run the window system and support simple interactive
-tasks such as text editing.
-Thus they lie somewhere between workstations and X terminals in design,
-cost, performance, and function.
-(The terminals can be used
-for general computing, but in practice Plan 9 users do their
-computing on the CPU servers.)
-The Plan 9 terminal software, including 8&#189;,
-was developed on a 68020-based
-machine called a Gnot
-and has been ported to
-the NeXTstation,
-the MIPS Magnum 3000,
-SGI Indigos,
-and Sun SPARCstations&#173;all small workstations that we use as terminals&#173;as
-well as PCs.
-</P>
-<P>
-Heavy computations such as compilation, text processing,
-or scientific calculation are done on the CPU servers, which are connected
-to the file servers by high-bandwidth networks.
-For interactive work,
-these computations can access the terminal that instantiated them.
-The terminal and CPU server being used by a particular user are connected to the
-same file server, although over different networks; Plan 9 provides a view of the
-file server that is independent of location in the network.
-</P>
-<P>
-The components of Plan 9 are connected by a common protocol based on the sharing of files.
-All resources in the network are implemented as file servers; programs that wish to
-access them connect to them over the network and communicate using ordinary file
-operations.
-An unusual aspect of Plan 9 is that the
-name space
-of a process, the set of files that can be accessed by name
-(for example by an
-<TT>open</TT>
-system call) is not global to all processes on a machine; distinct processes
-may have distinct name spaces.  The system provides methods by which processes
-may change their name spaces, such as the ability to
-<I>mount</I>
-a service upon an existing directory, making the files of the service
-visible in the directory.
-(This is a different operation from its
-UNIX
-namesake.)
-Multiple services may be mounted upon the same directory,
-allowing the files from multiple services to be accessed in the same directory.
-Options to the
-<TT>mount</TT>
-system call control the order of searching for files in such a
-union directory.
-</P>
-<P>
-The most obvious example of a network resource is a file server, where permanent
-files reside.  There are a number of unusual services, however, whose design in
-a different environment would likely not be file-based.  Many are described
-elsewhere [Pike92]; some examples are the representation
-of processes for debugging,
-much like Killian's process files for the 8th edition [Kill84],
-and the implementation of the name/value pairs of the
-UNIX
-<TT>exec</TT>
-environment as files.
-User processes may also implement a file service and make it available to clients
-in the network, much like the `mounted streams' in the 9th Edition
-[Pres90].
-A typical example is a program that interprets an externally-defined file system
-such as that on a CD-ROM or a standard
-UNIX
-system and makes the contents available to Plan 9 programs.
-This design is used by all distributed applications in Plan 9, including 8&#189;.
-</P>
-<P>
-8&#189; serves a set of files in the conventional directory
-<TT>/dev</TT>
-with names like
-<TT>cons</TT>,
-<TT>mouse</TT>,
-and
-<TT>screen</TT>.
-Clients of 8&#189; communicate with the window system by reading and writing
-these files.
-For example, a client program, such as a shell,
-can print text by writing its standard output, which is automatically
-connected to
-<TT>/dev/cons</TT>,
-or it may open and write that file explicitly.
-Unlike files served by a traditional file server, however, the instance of
-<TT>/dev/cons</TT>
-served in each window by 8&#189; is a distinct file;
-the per-process name spaces of Plan 9 allow 8&#189; to provide a unique
-<TT>/dev/cons</TT>
-to each client.
-This mechanism is best illustrated by the creation of a new 8&#189; client.
-</P>
-<P>
-When 8&#189; starts, it creates a full-duplex pipe to be the communication
-medium for the messages that implement the file service it will provide.
-One end will be shared by all the clients; the other end is held by
-8&#189; to accept requests for I/O.
-When a user makes a new window using the mouse,
-8&#189; allocates the window data structures and forks a child process.
-The child's name space,
-initially shared with the parent,
-is then duplicated
-so that changes the child makes to its name space will not affect the parent.
-The child then attaches its end of the communication pipe,
-<TT>cfd</TT>,
-to the directory
-<TT>/dev</TT>
-by doing a
-<TT>mount</TT>
-system call:
-<DL><DT><DD><TT><PRE>
-mount(cfd, "/dev", MBEFORE, buf)
-</PRE></TT></DL>
-This call attaches the service associated with the file descriptor
-<TT>cfd</TT>
-&#173; the client end of the pipe &#173; to the beginning of
-<TT>/dev</TT>
-so that the files in the new service take priority over existing files
-in the directory.
-This makes the new files
-<TT>cons</TT>,
-<TT>mouse</TT>,
-and so on,
-available in
-<TT>/dev</TT>
-in a way that hides any files with the same names already in place.
-The argument
-<TT>buf</TT>
-is a character string (null in this case),
-described below.
-</P>
-<P>
-The client process then closes file descriptors 0, 1, and 2 and opens
-<TT>/dev/cons</TT>
-repeatedly to connect the standard
-input, output, and error files to the window's
-<TT>/dev/cons</TT>.
-It then does an
-<TT>exec</TT>
-system call to begin executing the shell in the window.
-This entire sequence, complete with error handling, is 33 lines of C.
-</P>
-<P>
-The view of these events from 8&#189;'s end of the pipe is a sequence
-of file protocol messages from the new client generated by the
-intervening operating
-system in response to the
-<TT>mount</TT>
-and
-<TT>open</TT>
-system calls executed by the client.
-The message generated by the
-<TT>mount</TT>
-informs 8&#189; that a new client has attached to the file service it provides;
-8&#189;'s response is a unique identifier kept by the operating system and
-passed in all messages generated by I/O on the files derived from that
-<TT>mount</TT>.
-This identifier is used by 8&#189; to distinguish the various clients so
-each sees a unique
-<TT>/dev/cons</TT>;
-most servers do not need to make this distinction.
-</P>
-<P>
-A process unrelated to 8&#189; may create windows by a variant of this mechanism.
-When 8&#189; begins, it uses a Plan 9 service to `post' the client end of the
-communication pipe in a public place.
-A process may open that pipe and
-<TT>mount</TT>
-it to attach to the window system,
-much in the way an X client may connect to a
-UNIX
-domain socket to the server bound to the file system.
-The final argument to
-<TT>mount</TT>
-is passed through uninterpreted by the operating
-system.
-It provides a way for the client and server to
-exchange information at the time of the
-<TT>mount</TT>.
-8&#189; interprets it as the dimensions of the window to be
-created for the new client.  (In the case above, the window has been
-created by the time the mount occurs, and
-<TT>buf</TT>
-carries no information.)
-When the
-<TT>mount</TT>
-returns, the process can open the files of the new window and begin I/O to
-use it.
-</P>
-<P>
-Because 8&#189;'s interface is based on files,
-standard system utilities can be used to control its services.
-For example,
-its method of creating windows externally is packaged in a
-16-line shell script, called
-<TT>window</TT>,
-the core of which is just a
-<TT>mount</TT>
-operation that prefixes 8&#189;'s directory to
-<TT>/dev</TT>
-and runs a command passed on the argument line:
-<DL><DT><DD><TT><PRE>
-mount -b $'8&#189;serv' /dev
-$* &#60; /dev/cons &#62; /dev/cons &#62;[2] /dev/cons &amp;
-</PRE></TT></DL>
-The
-<TT>window</TT>
-program is typically employed by users to create their
-initial working environment when they boot the system, although
-it has more general possibilities.
-</P>
-<P>
-Other basic features of the system fall out naturally from the
-file-based model.
-When the user deletes a window, 8&#189; sends the equivalent of a
-UNIX
-signal to the process group &#173; the clients &#173; in the window,
-removes the window from the screen, and poisons the incoming connections
-to the files that drive it.  If a client ignores the signal and
-continues to write to the window, it will get I/O errors.
-If, on the other hand, all the processes in a window exit spontaneously,
-they will automatically close all connections to the window.
-8&#189; counts references to the window's files; when none are left,
-it shuts down the window and removes it from the screen.
-As a different example, when the user hits the DEL key to generate an
-interrupt,
-8&#189; writes a message to a special file, provided by Plan 9's
-process control interface, that interrupts all the processes
-in the window.
-In all these examples, the implementation works seamlessly
-across a network.
-</P>
-<P>
-There are two valuable side effects of implementing
-a window system by multiplexing
-<TT>/dev/cons</TT>
-and other such files.
-First, the problem of giving a meaningful
-interpretation to the file
-<TT>/dev/cons</TT>
-(<TT>/dev/tty</TT>)
-in each window is solved automatically.
-To provide
-<TT>/dev/cons</TT>
-is the fundamental job of the window system, rather than just an awkward burden;
-other systems must often make special and otherwise irrelevant arrangements for
-<TT>/dev/tty</TT>
-to behave as expected in a window.
-Second, any program that can access the server, including a
-process on a remote machine, can access the files using standard
-read and write system calls to communicate with the window system,
-and standard open and close calls to connect to it.
-Again, no special arrangements need to be made for remote processes to
-use all the graphics facilities of 8&#189;.
-</P>
-<H4>Graphical input
-</H4>
-<P>
-Of course 8&#189; offers more than ASCII I/O to its clients.
-The state of the mouse may be discovered by reading the file
-<TT>/dev/mouse</TT>,
-which returns a ten-byte message encoding the state
-of the buttons and the position of the cursor.
-If the mouse has not moved since the last read of
-<TT>/dev/mouse</TT>,
-or if the window associated with the instance of
-<TT>/dev/mouse</TT>
-is not the `input focus', the read blocks.
-</P>
-<P>
-The format of the message is:
-<DL><DT><DD><TT><PRE>
-<TT>'m'</TT>
-1 byte of button state
-4 bytes of x, low byte first
-4 bytes of y, low byte first
-</PRE></TT></DL>
-As in all shared data structures in Plan 9,
-the order of every byte in the message is defined 
-so all clients can execute the same code to unpack the message
-into a local data structure.
-</P>
-<P>
-For keyboard input, clients can read
-<TT>/dev/cons</TT>
-or, if they need character-at-a-time input,
-<TT>/dev/rcons</TT>
-(`raw console').
-There is no explicit event mechanism to help clients that need to read
-from multiple sources.
-Instead, a small (365 line) external
-support library can be used.
-It attaches a process
-to the various blocking input sources &#173; mouse, keyboard, and perhaps
-a third user-provided file descriptor &#173;
-and funnels their input into a single pipe from which may be read
-the various types of
-events in the traditional style.
-This package is a compromise.  As discussed in a previous paper
-[Pike89] I prefer
-to free applications from event-based programming.  Unfortunately, though, I see
-no easy way to achieve this in single-threaded C programs, and am unwilling
-to require all programmers to master concurrent programming.
-It should be noted, though, that even this compromise results in a small
-and easily understood interface.  An example program that uses it is
-given near the end of the paper.
-</P>
-<H4>Graphical output
-</H4>
-<P>
-The file
-<TT>/dev/screen</TT>
-may be read by any client to recover the contents of the entire screen,
-such as for printing (see Figure 1).
-Similarly,
-<TT>/dev/window</TT>
-holds the contents of the current window.
-These are read-only files.
-</P>
-<P>
-To perform graphics operations in their windows, client programs access
-<TT>/dev/bitblt</TT>.
-It implements a protocol that encodes bitmap graphics operations.
-Most of the messages in the protocol (there are 23 messages in all, about
-half to manage the multi-level fonts necessary for efficient handling
-of Unicode characters)
-are transmissions (via a write)
-from the client to the window system to perform a graphical
-operation such as a
-<TT>bitblt</TT>
-[PLR85] or character-drawing operation; a few include return information
-(recovered via a read) to the client.
-As with
-<TT>/dev/mouse</TT>,
-the
-<TT>/dev/bitblt</TT>
-protocol is in a defined byte order.
-Here, for example, is the layout of the
-<TT>bitblt</TT>
-message:
-<DL><DT><DD><TT><PRE>
-<TT>'b'</TT>
-2 bytes of destination id
-2x4 bytes of destination point
-2 bytes of source id
-4x4 bytes of source rectangle
-2 bytes of boolean function code
-</PRE></TT></DL>
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-Figure 1.
-A representative 8&#189; screen, running on a NeXTstation under Plan 9
-(with no NeXT software).  In the upper right, a program announces the
-arrival of mail.  In the top and left are a broswer for astronomical
-databases and an image of a galaxy produced by the browser.
-In the lower left there is a screen editor,
-<TT>sam</TT>
-[Pike87],
-editing Japanese text encoded in UTF,
-and in the lower right an 8&#189; running recursively and, inside that instantiation,
-a previewer for
-<TT>troff</TT>
-output.
-Underneath the faces is a small window running the command that
-prints the screen by passing
-<TT>/dev/screen</TT>
-to the bitmap printing utility.
-<br>&#32;<br>
-</dl>
-<P>
-The message is trivially constructed from the
-<TT>bitblt</TT>
-subroutine in the library, defined as
-<DL><DT><DD><TT><PRE>
-void bitblt(Bitmap *dst, Point dp,
-            Bitmap *src, Rectangle sr, Fcode c).
-</PRE></TT></DL>
-</P>
-<P>
-The `id'
-fields in the message indicate another property of 8&#189;:
-the clients do not store the actual data for any of their bitmaps locally.
-Instead, the protocol provides a message to allocate a bitmap, to be
-stored in the server, and returns to the client an integer identifier,
-much like a
-UNIX
-file descriptor, to be used in operations on that bitmap.
-Bitmap number 0 is conventionally the client's window,
-analogous to standard input for file I/O.
-In fact, no bitmap graphics operations are executed in the client at all;
-they are all performed on its behalf by the server.
-Again, using the standard remote file operations in Plan 9,
-this permits remote machines having no graphics capability, such
-as the CPU server,
-to run graphics applications.
-Analogous features of the original Andrew window system [Gos86]
-and of X [Sche86] require more complex mechanisms.
-</P>
-<P>
-Nor does 8&#189; itself operate directly on bitmaps.
-Instead, it calls another server to do its graphics operations for it,
-using an identical protocol.
-The operating system for the Plan 9 terminals contains an internal
-server that implements that protocol, exactly as does 8&#189;, but for a single
-client.  That server stores the actual bytes for the bitmaps
-and implements the fundamental bitmap graphics operations.
-Thus the environment in which 8&#189; runs
-has exactly the structure it provides for its clients;
-8&#189; reproduces the environment for its clients,
-multiplexing the interface to keep the clients separate.
-</P>
-<P>
-This idea of multiplexing by simulation is applicable to more
-than window systems, of course, and has some side effects.
-Since 8&#189; simulates its own environment for its clients, it may run
-in one of its own windows (see Figure 1).
-A useful and common application of this
-technique is to connect a window to a remote machine, such as a CPU
-server, and run the window system there so that each subwindow is automatically
-on the remote machine.
-It is also a handy way to debug a new version of the window system
-or to create an environment with, for example, a different default font.
-</P>
-<H4>Implementation
-</H4>
-<P>
-To provide graphics to its clients, 8&#189; mostly just multiplexes and passes
-through to its own server the clients' requests, occasionally rearranging
-the messages to maintain the fiction that the clients have unique screens
-(windows).
-To manage the overlapping windows it uses the layers model,
-which is handled by a separate library [Pike83a].
-Thus it has little work to do and is a fairly simple program;
-it is dominated by a couple of switch statements to interpret
-the bitmap and file server protocols.
-The built-in window program and its associated menus and text-management
-support are responsible for most of the code.
-</P>
-<P>
-The operating system's server is also compact:
-the version for the 68020 processor, excluding the implementation
-of a half dozen bitmap graphics operations, is 2295 lines of C
-(again, about half dealing with fonts);
-the graphics operations are another 2214 lines.
-</P>
-<P>
-8&#189; is structured as a set of communicating coroutines,
-much as discussed in a 1989 paper [Pike89].
-One coroutine manages the mouse, another the keyboard, and another
-is instantiated to manage the state of each window and associated client.
-When no coroutine wishes to run, 8&#189; reads the next file I/O request from
-its clients, which arrive serially on the full-duplex communication pipe.
-Thus 8&#189; is entirely synchronous.
-</P>
-<P>
-The program source is small and compiles in about 10 seconds
-in our Plan 9 environment.  There are ten source files and
-one
-<TT>makefile</TT>
-totaling 5100 lines.
-This includes the source for the window management process,
-the cut-and-paste terminal program,
-the window/file server itself,
-and a small coroutine library
-(<TT>proc.c</TT>).
-It does not include the layer library
-(another 1031 lines)
-or the library to handle the cutting and pasting of text
-displayed in a window (960 lines),
-or the general graphics support library that manages all the
-non-drawing aspects of graphics &#173; arithmetic on points and rectangles,
-memory management, error handling, clipping, &#173; plus fonts,
-events, and non-primitive drawing operations such as circles and ellipses
-(a final 3051 lines).
-Not all the pieces of these libraries are used by 8&#189; itself;
-a large part of the graphics library in particular is used only by clients.
-Thus it is somewhat unfair to 8&#189; just to sum these numbers, including
-the 4509 lines of support in the kernel, and arrive
-at a total implementation size of 14651 lines of source to implement
-all of 8&#189; from the lowest levels to the highest.
-But that number gives a fair measure of the complexity of the overall system.
-</P>
-<P>
-The implementation is also efficient.
-8&#189;'s performance is competitive to X windows'.
-Compared using Dunwoody's and Linton's
-<TT>gbench</TT>
-benchmarks on the 68020,
-distributed with the ``X Test Suite'',
-circles and arcs are drawn about half as fast in 8&#189; as in
-X11 release 4 compiled with
-<TT>gcc</TT>
-for equivalent hardware,
-probably because they are currently implemented in a user library
-by calls to the
-<TT>point</TT>
-primitive.
-Line drawing speed is about equal between the two systems.
-Unicode text is drawn about the same speed by 8&#189; as ASCII text by
-X, and
-the
-<TT>bitblt</TT>
-test is runs four times faster for 8&#189;.
-These numbers vary enough to caution against drawing sweeping
-conclusions, but they
-suggest that 8&#189;'s architecture does not penalize its performance.
-Finally, 8&#189; boots in under a second and creates a new window
-apparently instantaneously.
-</P>
-<H4>An example
-</H4>
-<P>
-Here is a complete program that runs under 8&#189;.
-It prints the string
-<TT>"hello world"</TT>
-wherever the left mouse button is depressed, and exits when the
-right mouse button is depressed.
-It also prints the string in the center of its window, and maintains
-that string when the window is resized.
-<DL><DT><DD><TT><PRE>
-#include &#60;u.h&#62;
-#include &#60;libc.h&#62;
-#include &#60;libg.h&#62;
-
-void
-ereshaped(Rectangle r)
-{
-    Point p;
-
-    screen.r = r;
-    bitblt(&amp;screen, screen.r.min, &amp;screen, r, Zero); /* clear */
-    p.x = screen.r.min.x + Dx(screen.r)/2;
-    p.y = screen.r.min.y + Dy(screen.r)/2;
-    p = sub(p, div(strsize(font, "hello world"), 2));
-    string(&amp;screen, p, font, "hello world", S);
-}
-
-main(void)
-{
-    Mouse m;
-
-    binit(0, 0, 0);	/* initialize graphics library */
-    einit(Emouse);	/* initialize event library */
-    ereshaped(screen.r);
-    for(;;){
-        m = emouse();
-        if(m.buttons &amp; RIGHTB)
-            break;
-        if(m.buttons &amp; LEFTB){
-            string(&amp;screen, m.xy, font, "hello world", S);
-            /* wait for release of button */
-            do; while(emouse().buttons &amp; LEFTB);
-        }
-    }
-}
-</PRE></TT></DL>
-The complete loaded binary is a little over 26K bytes on a 68020.
-This program should be compared to the similar ones in the excellent paper
-by Rosenthal [Rose88].
-(The current program does more: it also employs the mouse.)
-The clumsiest part is
-<TT>ereshaped</TT>,
-a function with a known name that is called from the event library
-whenever the window is
-reshaped or moved, as is discovered inelegantly but adequately
-by a special case of a mouse message.
-(Simple so-called expose events are not events
-at all in 8&#189;; the layer library takes care of them transparently.)
-The lesson of this program, with deference to Rosenthal, is that if
-the window system is cleanly designed a toolkit should be unnecessary
-for simple tasks.
-</P>
-<H4>Status
-</H4>
-<P>
-As of 1992, 8&#189; is in regular daily use by almost all the 60 people in our
-research center.  Some of those people use it to access Plan 9 itself; others
-use it as a front end to remote
-UNIX
-systems, much as one would use an X terminal.
-</P>
-<P>
-Some things about 8&#189; may change.
-It would be nice if its capabilities were more easily accessible
-from the shell.
-A companion to this paper [Pike91] proposes one way to do this,
-but that does not include any graphics functionality.
-Perhaps a textual version of the
-<TT>/dev/bitblt</TT>
-file is a way to proceed; that would allow, for example,
-<TT>awk</TT>
-programs to draw graphs directly.
-</P>
-<P>
-Can this style of window system be built on other operating systems?
-A major part of the design of 8&#189; depends on its structure as a file server.
-In principle this could be done for any system that supports user processes
-that serve files, such as any system running NFS or AFS [Sun89, Kaza87].
-One requirement, however, is 8&#189;'s need
-to respond to its clients' requests out of order:
-if one client reads
-<TT>/dev/cons</TT>
-in a window with no characters to be read,
-other clients should be able to perform I/O in their windows, or even
-the same window.
-Another constraint is that the 8&#189; files are like devices,
-and must not be cached by the client.
-NFS cannot honor these requirements; AFS may be able to.
-Of course, other interprocess communication mechanisms such as sockets
-could be used as a basis for a window system.  One may even argue that
-X's model fits into this overall scheme.  It may prove easy and worthwhile
-to write a small 8&#189;-like system for commercial
-UNIX
-systems to demonstrate that its merits can be won in systems other than
-Plan 9.
-</P>
-<H4>Conclusion
-</H4>
-<P>
-In conclusion, 8&#189; uses an unusual architecture in
-concert with the file-oriented interprocess communication of Plan 9
-to provide network-based interactive graphics to client programs.
-It demonstrates that even production-quality window systems are not
-inherently large or complicated
-and may be simple to use and to program.
-</P>
-<H4>Acknowledgements
-</H4>
-<P>
-Helpful comments on early drafts of this paper were made by
-Doug Blewett,
-Stu Feldman,
-Chris Fraser,
-Brian Kernighan,
-Dennis Ritchie,
-and Phil Winterbottom.
-8&#189;'s support for color was added by Howard Trickey.
-Many of the ideas leading to 8&#189; were tried out in earlier, sometimes less
-successful, programs.  I would like to thank those users who suffered
-through some of my previous 7&#189; window systems.
-</P>
-<H4>References
-</H4>
-<br>&#32;<br>
-[Duff90] Tom Duff, ``Rc - A Shell for Plan 9 and UNIX systems'', Proc. of the Summer 1990 UKUUG Conf., London, July, 1990, pp. 21-33, reprinted, in a different form, in this volume.
-<br>&#32;<br>
-[Far89] Far too many people, XTERM(1), Massachusetts Institute of Technology, 1989.
-<br>&#32;<br>
-[Gos86] James Gosling and David Rosenthal,
-``A window manager for bitmapped displays and UNIX'', in Methodology of Window Management, edited by F.R.A. Hopgood et al., Springer, 1986.
-<br>&#32;<br>
-[Kaza87] Mike Kazar, ``Synchronization and Caching issues in the Andrew File System'', Tech. Rept. CMU-ITC-058, Information Technology Center, Carnegie Mellon University, June, 1987.
-<br>&#32;<br>
-[Kill84] Tom Killian, ``Processes as Files'', USENIX Summer Conf. Proc., Salt Lake City June, 1984.
-<br>&#32;<br>
-[Pike83] Rob Pike, ``The Blit: A Multiplexed Graphics Terminal'', Bell Labs Tech. J., V63, #8, part 2, pp. 1607-1631.
-<br>&#32;<br>
-[Pike83a] Rob Pike, ``Graphics in Overlapping Bitmap Layers'', Trans. on Graph., Vol 2, #2, 135-160, reprinted in Proc. SIGGRAPH '83, pp. 331-356.
-<br>&#32;<br>
-[Pike87] Rob Pike, ``The Text Editor <TT>sam</TT>'', Softw. - Prac. and Exp., Nov 1987, Vol 17 #11, pp. 813-845, reprinted in this volume.
-<br>&#32;<br>
-[Pike88] Rob Pike, ``Window Systems Should Be Transparent'', Comp. Sys., Summer 1988, Vol 1 #3, pp. 279-296.
-<br>&#32;<br>
-[Pike89] Rob Pike, ``A Concurrent Window System'', Comp. Sys., Spring 1989, Vol 2 #2, pp. 133-153.
-<br>&#32;<br>
-[Pike91] Rob Pike, ``A Minimalist Global User Interface'', USENIX Summer Conf. Proc., Nashville, June, 1991.
-<br>&#32;<br>
-[Pike92]  Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
-Operating Systems Review
-Vol 27, #2, Apr 1993, pp. 72-76
-(reprinted from Proceedings of the 5th ACM SIGOPS European Workshop, Mont Saint-Michel, 1992, Paper n&#186; 34, and reprinted in this volume).
-<br>&#32;<br>
-[Pike94] Rob Pike and Ken Thompson, ``Hello World or &#191;ALPHA&#191;&#191;MU&#191;&#191;ALPHA &#191;&#191;&#191;MUEPSILON or &#191;&#191;&#191;&#191;&#191; &#191;&#191;'', USENIX Winter Conf. Proc., San Diego, Jan, 1993, reprinted in this volume.
-<br>&#32;<br>
-[PLR85] Rob Pike, Bart Locanthi and John Reiser, ``Hardware/Software Tradeoffs for Bitmap Graphics on the Blit'', Softw. - Prac. and Exp., Feb 1985, Vol 15 #2, pp. 131-152.
-<br>&#32;<br>
-[Pres90] David L. Presotto and Dennis M. Ritchie, ``Interprocess Communication in the Ninth Edition Unix System'', Softw. - Prac. and Exp., June 1990, Vol 20 #S1, pp. S1/3-S1/17.
-<br>&#32;<br>
-[Rose88] David Rosenthal, ``A Simple X11 Client Program -or- How hard can it really be to write ``Hello, World''?'', USENIX Winter Conf. Proc., Dallas, Jan, 1988, pp. 229-242.
-<br>&#32;<br>
-[Sche86] Robert W. Scheifler and Jim Gettys,
-``The X Window System'',
-ACM Trans. on Graph., Vol 5 #2, pp. 79-109.
-<br>&#32;<br>
-[Sun89] Sun Microsystems, NFS: Network file system protocol specification,
-RFC 1094, Network Information Center, SRI International, March, 1989.
-<br>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 2373
sys/doc/9.html

@@ -1,2373 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<center><H1>Plan 9 from Bell Labs
-</H1>
-<DL><DD><I>Rob Pike<br>
-Dave Presotto<br>
-Sean Dorward<br>
-Bob Flandrena<br>
-Ken Thompson<br>
-Howard Trickey<br>
-Phil Winterbottom<br>
-Bell Laboratories, Murray Hill, NJ, 07974
-USA<br>
-</center></I></DL>
-<H4>Motivation
-</H4>
-<P>
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Appeared in a slightly different form in
-Computing Systems,
-Vol 8 #3, Summer 1995, pp. 221-254.
-</I><DT>&#32;<DD></dl>
-<br>
-By the mid 1980's, the trend in computing was
-away from large centralized time-shared computers towards
-networks of smaller, personal machines,
-typically UNIX `workstations'.
-People had grown weary of overloaded, bureaucratic timesharing machines
-and were eager to move to small, self-maintained systems, even if that
-meant a net loss in computing power.
-As microcomputers became faster, even that loss was recovered, and
-this style of computing remains popular today.
-<P>
-In the rush to personal workstations, though, some of their weaknesses
-were overlooked.
-First, the operating system they run, UNIX, is itself an old timesharing system and
-has had trouble adapting to ideas
-born after it.  Graphics and networking were added to UNIX well into
-its lifetime and remain poorly integrated and difficult to administer.
-More important, the early focus on having private machines
-made it difficult for networks of machines to serve as seamlessly as the old
-monolithic timesharing systems.
-Timesharing centralized the management
-and amortization of costs and resources;
-personal computing fractured, democratized, and ultimately amplified
-administrative problems.
-The choice of
-an old timesharing operating system to run those personal machines
-made it difficult to bind things together smoothly.
-</P>
-<P>
-Plan 9 began in the late 1980's as an attempt to have it both
-ways: to build a system that was centrally administered and cost-effective
-using cheap modern microcomputers as its computing elements.
-The idea was to build a time-sharing system out of workstations, but in a novel way.
-Different computers would handle
-different tasks: small, cheap machines in people's offices would serve
-as terminals providing access to large, central, shared resources such as computing
-servers and file servers.  For the central machines, the coming wave of
-shared-memory multiprocessors seemed obvious candidates.
-The philosophy is much like that of the Cambridge
-Distributed System [NeHe82].
-The early catch phrase was to build a UNIX out of a lot of little systems,
-not a system out of a lot of little UNIXes.
-</P>
-<P>
-The problems with UNIX were too deep to fix, but some of its ideas could be
-brought along.  The best was its use of the file system to coordinate
-naming of and access to resources, even those, such as devices, not traditionally
-treated as files.
-For Plan 9, we adopted this idea by designing a network-level protocol, called 9P,
-to enable machines to access files on remote systems.
-Above this, we built a naming
-system that lets people and their computing agents build customized views
-of the resources in the network.
-This is where Plan 9 first began to look different:
-a Plan 9 user builds a private computing environment and recreates it wherever
-desired, rather than doing all computing on a private machine.
-It soon became clear that this model was richer
-than we had foreseen, and the ideas of per-process name spaces
-and file-system-like resources were extended throughout
-the system&#x2014;to processes, graphics, even the network itself.
-</P>
-<P>
-By 1989 the system had become solid enough
-that some of us began using it as our exclusive computing environment.
-This meant bringing along many of the services and applications we had
-used on UNIX.  We used this opportunity to revisit many issues, not just
-kernel-resident ones, that we felt UNIX addressed badly.
-Plan 9 has new compilers,
-languages,
-libraries,
-window systems,
-and many new applications.
-Many of the old tools were dropped, while those brought along have
-been polished or rewritten.
-</P>
-<P>
-Why be so all-encompassing?
-The distinction between operating system, library, and application
-is important to the operating system researcher but uninteresting to the
-user.  What matters is clean functionality.
-By building a complete new system,
-we were able to solve problems where we thought they should be solved.
-For example, there is no real `tty driver' in the kernel; that is the job of the window
-system.
-In the modern world, multi-vendor and multi-architecture computing
-are essential, yet the usual compilers and tools assume the program is being
-built to run locally; we needed to rethink these issues.
-Most important, though, the test of a system is the computing
-environment it provides.
-Producing a more efficient way to run the old UNIX warhorses
-is empty engineering;
-we were more interested in whether the new ideas suggested by
-the architecture of the underlying system encourage a more effective way of working.
-Thus, although Plan 9 provides an emulation environment for
-running POSIX commands, it is a backwater of the system.
-The vast majority
-of system software is developed in the `native' Plan 9 environment.
-</P>
-<P>
-There are benefits to having an all-new system.
-First, our laboratory has a history of building experimental peripheral boards.
-To make it easy to write device drivers,
-we want a system that is available in source form
-(no longer guaranteed with UNIX, even
-in the laboratory in which it was born).
-Also, we want to redistribute our work, which means the software
-must be locally produced.  For example, we could have used some vendors'
-C compilers for our system, but even had we overcome the problems with
-cross-compilation, we would have difficulty
-redistributing the result.
-</P>
-<P>
-This paper serves as an overview of the system.  It discusses the architecture
-from the lowest building blocks to the computing environment seen by users.
-It also serves as an introduction to the rest of the Plan 9 Programmer's Manual,
-which it accompanies.  More detail about topics in this paper
-can be found elsewhere in the manual.
-</center></P>
-<H4>Design
-</H4>
-<P>
-The view of the system is built upon three principles.
-First, resources are named and accessed like files in a hierarchical file system.
-Second, there is a standard protocol, called 9P, for accessing these
-resources.
-Third, the disjoint hierarchies provided by different services are
-joined together into a single private hierarchical file name space.
-The unusual properties of Plan 9 stem from the consistent, aggressive
-application of these principles.
-</P>
-<P>
-A large Plan 9 installation has a number of computers networked
-together, each providing a particular class of service.
-Shared multiprocessor servers provide computing cycles;
-other large machines offer file storage.
-These machines are located in an air-conditioned machine
-room and are connected by high-performance networks.
-Lower bandwidth networks such as Ethernet or ISDN connect these
-servers to office- and home-resident workstations or PCs, called terminals
-in Plan 9 terminology.
-Figure 1 shows the arrangement.
-<DL><DT><DD><TT><PRE>
-<br><img src="network.pic.0.gif"><br>
-</PRE></TT></DL>
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-<I>Figure 1. Structure of a large Plan 9 installation.</I>
-CPU servers and file servers share fast local-area networks,
-while terminals use slower wider-area networks such as Ethernet,
-Datakit, or telephone lines to connect to them.
-Gateway machines, which are just CPU servers connected to multiple
-networks, allow machines on one network to see another.
-<br>&#32;<br>
-</dl>
-<P>
-The modern style of computing offers each user a dedicated workstation or PC.
-Plan 9's approach is different.
-The various machines with screens, keyboards, and mice all provide
-access to the resources of the network, so they are functionally equivalent,
-in the manner of the terminals attached to old timesharing systems.
-When someone uses the system, though,
-the terminal is temporarily personalized by that user.
-Instead of customizing the hardware, Plan 9 offers the ability to customize
-one's view of the system provided by the software.
-That customization is accomplished by giving local, personal names for the
-publicly visible resources in the network.
-Plan 9 provides the mechanism to assemble a personal view of the public
-space with local names for globally accessible resources.
-Since the most important resources of the network are files, the model
-of that view is file-oriented.
-</P>
-<P>
-The client's local name space provides a way to customize the user's
-view of the network.  The services available in the network all export file
-hierarchies.
-Those important to the user are gathered together into
-a custom name space; those of no immediate interest are ignored.
-This is a different style of use from the idea of a `uniform global name space'.
-In Plan 9, there are known names for services and uniform names for
-files exported by those services,
-but the view is entirely local.  As an analogy, consider the difference
-between the phrase `my house' and the precise address of the speaker's
-home.  The latter may be used by anyone but the former is easier to say and
-makes sense when spoken.
-It also changes meaning depending on who says it,
-yet that does not cause confusion.
-Similarly, in Plan 9 the name
-<TT>/dev/cons</TT>
-always refers to the user's terminal and
-<TT>/bin/date</TT>
-the correct version of the date
-command to run,
-but which files those names represent depends on circumstances such as the
-architecture of the machine executing
-<TT>date</TT>.
-Plan 9, then, has local name spaces that obey globally understood
-conventions;
-it is the conventions that guarantee sane behavior in the presence
-of local names.
-</P>
-<P>
-The 9P protocol is structured as a set of transactions that
-send a request from a client to a (local or remote) server and return the result.
-9P controls file systems, not just files:
-it includes procedures to resolve file names and traverse the name
-hierarchy of the file system provided by the server.
-On the other hand,
-the client's name space is held by the client system alone, not on or with the server,
-a distinction from systems such as Sprite [OCDNW88].
-Also, file access is at the level of bytes, not blocks, which distinguishes
-9P from protocols like NFS and RFS.
-A paper by Welch compares Sprite, NFS, and Plan 9's network file system structures [Welc94].
-</P>
-<P>
-This approach was designed with traditional files in mind,
-but can be extended
-to many other resources.
-Plan 9 services that export file hierarchies include I/O devices,
-backup services,
-the window system,
-network interfaces,
-and many others.
-One example is the process file system,
-<TT>/proc</TT>,
-which provides a clean way
-to examine and control running processes.
-Precursor systems had a similar idea [Kill84], but Plan 9 pushes the
-file metaphor much further [PPTTW93].
-The file system model is well-understood, both by system builders and general users,
-so services that present file-like interfaces are easy to build, easy to understand,
-and easy to use.
-Files come with agreed-upon rules for
-protection,
-naming,
-and access both local and remote,
-so services built this way are ready-made for a distributed system.
-(This is a distinction from `object-oriented' models, where these issues
-must be faced anew for every class of object.)
-Examples in the sections that follow illustrate these ideas in action.
-</center></P>
-<H4>The Command-level View
-</H4>
-<P>
-Plan 9 is meant to be used from a machine with a screen running
-the window system.
-It has no notion of `teletype' in the UNIX sense.  The keyboard handling of
-the bare system is rudimentary, but once the window system, 8&frac12; [Pike91],
-is running,
-text can be edited with `cut and paste' operations from a pop-up menu,
-copied between windows, and so on.
-8&frac12; permits editing text from the past, not just on the current input line.
-The text-editing capabilities of 8&frac12; are strong enough to displace
-special features such as history in the shell,
-paging and scrolling,
-and mail editors.
-8&frac12; windows do not support cursor addressing and,
-except for one terminal emulator to simplify connecting to traditional systems,
-there is no cursor-addressing software in Plan 9.
-</P>
-<P>
-Each window is created in a separate name space.
-Adjustments made to the name space in a window do not affect other windows
-or programs, making it safe to experiment with local modifications to the name
-space, for example
-to substitute files from the dump file system when debugging.
-Once the debugging is done, the window can be deleted and all trace of the
-experimental apparatus is gone.
-Similar arguments apply to the private space each window has for environment
-variables, notes (analogous to UNIX signals), etc.
-</P>
-<P>
-Each window is created running an application, such as the shell, with
-standard input and output connected to the editable text of the window.
-Each window also has a private bitmap and multiplexed access to the
-keyboard, mouse, and other graphical resources through files like
-<TT>/dev/mouse</TT>,
-<TT>/dev/bitblt</TT>,
-and
-<TT>/dev/cons</TT>
-(analogous to UNIX's
-<TT>/dev/tty</TT>).
-These files are provided by 8&frac12;, which is implemented as a file server.
-Unlike X windows, where a new application typically creates a new window
-to run in, an 8&frac12; graphics application usually runs in the window where it starts.
-It is possible and efficient for an application to create a new window, but
-that is not the style of the system.
-Again contrasting to X, in which a remote application makes a network
-call to the X server to start running,
-a remote 8&frac12; application sees the
-<TT>mouse</TT>,
-<TT>bitblt</TT>,
-and
-<TT>cons</TT>
-files for the window as usual in
-<TT>/dev</TT>;
-it does not know whether the files are local.
-It just reads and writes them to control the window;
-the network connection is already there and multiplexed.
-</P>
-<P>
-The intended style of use is to run interactive applications such as the window
-system and text editor on the terminal and to run computation- or file-intensive
-applications on remote servers.
-Different windows may be running programs on different machines over
-different networks, but by making the name space equivalent in all windows,
-this is transparent: the same commands and resources are available, with the same names,
-wherever the computation is performed.
-</P>
-<P>
-The command set of Plan 9 is similar to that of UNIX.
-The commands fall into several broad classes.  Some are new programs for
-old jobs: programs like
-<TT>ls</TT>,
-<TT>cat</TT>,
-and
-<TT>who</TT>
-have familiar names and functions but are new, simpler implementations.
-<TT>Who</TT>,
-for example, is a shell script, while
-<TT>ps</TT>
-is just 95 lines of C code.
-Some commands are essentially the same as their UNIX ancestors:
-<TT>awk</TT>,
-<TT>troff</TT>,
-and others have been converted to ANSI C and extended to handle
-Unicode, but are still the familiar tools.
-Some are entirely new programs for old niches: the shell
-<TT>rc</TT>,
-text editor
-<TT>sam</TT>,
-debugger
-<TT>acid</TT>,
-and others
-displace the better-known UNIX tools with similar jobs.
-Finally, about half the commands are new.
-</P>
-<P>
-Compatibility was not a requirement for the system.
-Where the old commands or notation seemed good enough, we
-kept them.  When they didn't, we replaced them.
-</center></P>
-<H4>The File Server
-</H4>
-<P>
-A central file server stores permanent files and presents them to the network
-as a file hierarchy exported using 9P.
-The server is a stand-alone system, accessible only over the network,
-designed to do its one job well.
-It runs no user processes, only a fixed set of routines compiled into the
-boot image.
-Rather than a set of disks or separate file systems,
-the main hierarchy exported by the server is a single
-tree, representing files on many disks.
-That hierarchy is
-shared by many users over a wide area on a variety of networks.
-Other file trees exported by
-the server include
-special-purpose systems such as temporary storage and, as explained
-below, a backup service.
-</P>
-<P>
-The file server has three levels of storage.
-The central server in our installation has
-about 100 megabytes of memory buffers,
-27 gigabytes of magnetic disks,
-and 350 gigabytes of
-bulk storage in a write-once-read-many (WORM) jukebox.
-The disk is a cache for the WORM and the memory is a cache for the disk;
-each is much faster, and sees about an order of magnitude more traffic,
-than the level it caches.
-The addressable data in the file system can be larger than the size of the
-magnetic disks, because they are only a cache;
-our main file server has about 40 gigabytes of active storage.
-</P>
-<P>
-The most unusual feature of the file server
-comes from its use of a WORM device for
-stable storage.
-Every morning at 5 o'clock, a
-<I>dump</I>
-of the file system occurs automatically.
-The file system is frozen and
-all blocks modified since the last dump
-are queued to be written to the WORM.
-Once the blocks are queued,
-service is restored and
-the read-only root of the dumped
-file system appears in a
-hierarchy of all dumps ever taken, named by its date.
-For example, the directory
-<TT>/n/dump/1995/0315</TT>
-is the root directory of an image of the file system
-as it appeared in the early morning of March 15, 1995.
-It takes a few minutes to queue the blocks,
-but the process to copy blocks to the WORM, which runs in the background, may take hours.
-</P>
-<P>
-There are two ways the dump file system is used.
-The first is by the users themselves, who can browse the
-dump file system directly or attach pieces of
-it to their name space.
-For example, to track down a bug,
-it is straightforward to try the compiler from three months ago
-or to link a program with yesterday's library.
-With daily snapshots of all files,
-it is easy to find when a particular change was
-made or what changes were made on a particular date.
-People feel free to make large speculative changes
-to files in the knowledge that they can be backed
-out with a single
-copy command.
-There is no backup system as such;
-instead, because the dump
-is in the file name space, 
-backup problems can be solved with
-standard tools
-such as
-<TT>cp</TT>,
-<TT>ls</TT>,
-<TT>grep</TT>,
-and
-<TT>diff</TT>.
-</P>
-<P>
-The other (very rare) use is complete system backup.
-In the event of disaster,
-the active file system can be initialized from any dump by clearing the
-disk cache and setting the root of
-the active file system to be a copy
-of the dumped root.
-Although easy to do, this is not to be taken lightly:
-besides losing any change made after the date of the dump, this recovery method
-results in a very slow system.
-The cache must be reloaded from WORM, which is much
-slower than magnetic disks.
-The file system takes a few days to reload the working
-set and regain its full performance.
-</P>
-<P>
-Access permissions of files in the dump are the same
-as they were when the dump was made.
-Normal utilities have normal
-permissions in the dump without any special arrangement.
-The dump file system is read-only, though,
-which means that files in the dump cannot be written regardless of their permission bits;
-in fact, since directories are part of the read-only structure,
-even the permissions cannot be changed.
-</P>
-<P>
-Once a file is written to WORM, it cannot be removed,
-so our users never see
-``please clean up your files''
-messages and there is no
-<TT>df</TT>
-command.
-We regard the WORM jukebox as an unlimited resource.
-The only issue is how long it will take to fill.
-Our WORM has served a community of about 50 users
-for five years and has absorbed daily dumps, consuming a total of
-65% of the storage in the jukebox.
-In that time, the manufacturer has improved the technology,
-doubling the capacity of the individual disks.
-If we were to upgrade to the new media,
-we would have more free space than in the original empty jukebox.
-Technology has created storage faster than we can use it.
-</center></P>
-<H4>Unusual file servers
-</H4>
-<P>
-Plan 9 is characterized by a variety of servers that offer
-a file-like interface to unusual services.
-Many of these are implemented by user-level processes, although the distinction
-is unimportant to their clients; whether a service is provided by the kernel,
-a user process, or a remote server is irrelevant to the way it is used.
-There are dozens of such servers; in this section we present three representative ones.
-</P>
-<P>
-Perhaps the most remarkable file server in Plan 9 is 8&frac12;, the window system.
-It is discussed at length elsewhere [Pike91], but deserves a brief explanation here.
-8&frac12; provides two interfaces: to the user seated at the terminal, it offers a traditional
-style of interaction with multiple windows, each running an application, all controlled
-by a mouse and keyboard.
-To the client programs, the view is also fairly traditional:
-programs running in a window see a set of files in
-<TT>/dev</TT>
-with names like
-<TT>mouse</TT>,
-<TT>screen</TT>,
-and
-<TT>cons</TT>.
-Programs that want to print text to their window write to
-<TT>/dev/cons</TT>;
-to read the mouse, they read
-<TT>/dev/mouse</TT>.
-In the Plan 9 style, bitmap graphics is implemented by providing a file
-<TT>/dev/bitblt</TT>
-on which clients write encoded messages to execute graphical operations such as
-<TT>bitblt</TT>
-(RasterOp).
-What is unusual is how this is done:
-8&frac12; is a file server, serving the files in
-<TT>/dev</TT>
-to the clients running in each window.
-Although every window looks the same to its client,
-each window has a distinct set of files in
-<TT>/dev</TT>.
-8&frac12; multiplexes its clients' access to the resources of the terminal
-by serving multiple sets of files.  Each client is given a private name space
-with a
-<I>different</I>
-set of files that behave the same as in all other windows.
-There are many advantages to this structure.
-One is that 8&frac12; serves the same files it needs for its own implementation&#x2014;it
-multiplexes its own interface&#x2014;so it may be run, recursively, as a client of itself.
-Also, consider the implementation of
-<TT>/dev/tty</TT>
-in UNIX, which requires special code in the kernel to redirect
-<TT>open</TT>
-calls to the appropriate device.
-Instead, in 8&frac12; the equivalent service falls out
-automatically: 8&frac12; serves
-<TT>/dev/cons</TT>
-as its basic function; there is nothing extra to do.
-When a program wants to
-read from the keyboard, it opens
-<TT>/dev/cons</TT>,
-but it is a private file, not a shared one with special properties.
-Again, local name spaces make this possible; conventions about the consistency of
-the files within them make it natural.
-</P>
-<P>
-8&frac12; has a unique feature made possible by its design.
-Because it is implemented as a file server,
-it has the power to postpone answering read requests for a particular window.
-This behavior is toggled by a reserved key on the keyboard.
-Toggling once suspends client reads from the window;
-toggling again resumes normal reads, which absorb whatever text has been prepared,
-one line at a time.
-This allows the user to edit multi-line input text on the screen before the application sees it,
-obviating the need to invoke a separate editor to prepare text such as mail
-messages.
-A related property is that reads are answered directly from the
-data structure defining the text on the display: text may be edited until
-its final newline makes the prepared line of text readable by the client.
-Even then, until the line is read, the text the client will read can be changed.
-For example, after typing
-<DL><DT><DD><TT><PRE>
-% make
-rm *
-</PRE></TT></DL>
-to the shell, the user can backspace over the final newline at any time until
-<TT>make</TT>
-finishes, holding off execution of the
-<TT>rm</TT>
-command, or even point with the mouse
-before the
-<TT>rm</TT>
-and type another command to be executed first.
-</P>
-<P>
-There is no
-<TT>ftp</TT>
-command in Plan 9.  Instead, a user-level file server called
-<TT>ftpfs</TT>
-dials the FTP site, logs in on behalf of the user, and uses the FTP protocol
-to examine files in the remote directory.
-To the local user, it offers a file hierarchy, attached to
-<TT>/n/ftp</TT>
-in the local name space, mirroring the contents of the FTP site.
-In other words, it translates the FTP protocol into 9P to offer Plan 9 access to FTP sites.
-The implementation is tricky;
-<TT>ftpfs</TT>
-must do some sophisticated caching for efficiency and
-use heuristics to decode remote directory information.
-But the result is worthwhile:
-all the local file management tools such as
-<TT>cp</TT>,
-<TT>grep</TT>,
-<TT>diff</TT>,
-and of course
-<TT>ls</TT>
-are available to FTP-served files exactly as if they were local files.
-Other systems such as Jade and Prospero
-have exploited the same opportunity [Rao81, Neu92],
-but because of local name spaces and the simplicity of implementing 9P,
-this approach
-fits more naturally into Plan 9 than into other environments.
-</P>
-<P>
-One server,
-<TT>exportfs</TT>,
-is a user process that takes a portion of its own name space and
-makes it available to other processes by
-translating 9P requests into system calls to the Plan 9 kernel.
-The file hierarchy it exports may contain files from multiple servers.
-<TT>Exportfs</TT>
-is usually run as a remote server
-started by a local program,
-either
-<TT>import</TT>
-or
-<TT>cpu</TT>.
-<TT>Import</TT>
-makes a network call to the remote machine, starts
-<TT>exportfs</TT>
-there, and attaches its 9P connection to the local name space.  For example,
-<DL><DT><DD><TT><PRE>
-import helix /net
-</PRE></TT></DL>
-makes Helix's network interfaces visible in the local
-<TT>/net</TT>
-directory.  Helix is a central server and
-has many network interfaces, so this permits a machine with one network to
-access to any of Helix's networks.  After such an import, the local
-machine may make calls on any of the networks connected to Helix.
-Another example is
-<DL><DT><DD><TT><PRE>
-import helix /proc
-</PRE></TT></DL>
-which makes Helix's processes visible in the local
-<TT>/proc</TT>,
-permitting local debuggers to examine remote processes.
-</P>
-<P>
-The
-<TT>cpu</TT>
-command connects the local terminal to a remote
-CPU server.
-It works in the opposite direction to
-<TT>import</TT>:
-after calling the server, it starts a
-<I>local</I>
-<TT>exportfs</TT>
-and mounts it in the name space of a process, typically a newly created shell, on the
-server.
-It then rearranges the name space
-to make local device files (such as those served by
-the terminal's window system) visible in the server's
-<TT>/dev</TT>
-directory.
-The effect of running a
-<TT>cpu</TT>
-command is therefore to start a shell on a fast machine, one more tightly
-coupled to the file server,
-with a name space analogous
-to the local one.
-All local device files are visible remotely, so remote applications have full
-access to local services such as bitmap graphics,
-<TT>/dev/cons</TT>,
-and so on.
-This is not the same as
-<TT>rlogin</TT>,
-which does nothing to reproduce the local name space on the remote system,
-nor is it the same as
-file sharing with, say, NFS, which can achieve some name space equivalence but
-not the combination of access to local hardware devices, remote files, and remote
-CPU resources.
-The
-<TT>cpu</TT>
-command is a uniquely transparent mechanism.
-For example, it is reasonable
-to start a window system in a window running a
-<TT>cpu</TT>
-command; all windows created there automatically start processes on the CPU server.
-</center></P>
-<H4>Configurability and administration
-</H4>
-<P>
-The uniform interconnection of components in Plan 9 makes it possible to configure
-a Plan 9 installation many different ways.
-A single laptop PC can function as a stand-alone Plan 9 system;
-at the other extreme, our setup has central multiprocessor CPU
-servers and file servers and scores of terminals ranging from small PCs to
-high-end graphics workstations.
-It is such large installations that best represent how Plan 9 operates.
-</P>
-<P>
-The system software is portable and the same
-operating system runs on all hardware.
-Except for performance, the appearance of the system on, say,
-an SGI workstation is the same
-as on a laptop.
-Since computing and file services are centralized, and terminals have
-no permanent file storage, all terminals are functionally identical.
-In this way, Plan 9 has one of the good properties of old timesharing systems, where
-a user could sit in front of any machine and see the same system.  In the modern
-workstation community, machines tend to be owned by people who customize them
-by storing private information on local disk.
-We reject this style of use,
-although the system itself can be used this way.
-In our group, we have a laboratory with many public-access machines&#x2014;a terminal
-room&#x2014;and a user may sit down at any one of them and work.
-</P>
-<P>
-Central file servers centralize not just the files, but also their administration
-and maintenance.
-In fact, one server is the main server, holding all system files; other servers provide
-extra storage or are available for debugging and other special uses, but the system
-software resides on one machine.
-This means that each program
-has a single copy of the binary for each architecture, so it is
-trivial to install updates and bug fixes.
-There is also a single user database; there is no need to synchronize distinct
-<TT>/etc/passwd</TT>
-files.
-On the other hand, depending on a single central server does limit the size of an installation.
-</P>
-<P>
-Another example of the power of centralized file service
-is the way Plan 9 administers network information.
-On the central server there is a directory,
-<TT>/lib/ndb</TT>,
-that contains all the information necessary to administer the local Ethernet and
-other networks.
-All the machines use the same database to talk to the network; there is no
-need to manage a distributed naming system or keep parallel files up to date.
-To install a new machine on the local Ethernet, choose a
-name and IP address and add these to a single file in
-<TT>/lib/ndb</TT>;
-all the machines in the installation will be able to talk to it immediately.
-To start running, plug the machine into the network, turn it on, and use BOOTP
-and TFTP to load the kernel.
-All else is automatic.
-</P>
-<P>
-Finally,
-the automated dump file system frees all users from the need to maintain
-their systems, while providing easy access to backup files without
-tapes, special commands, or the involvement of support staff.
-It is difficult to overstate the improvement in lifestyle afforded by this service.
-</P>
-<P>
-Plan 9 runs on a variety of hardware without
-constraining how to configure an installation.
-In our laboratory, we
-chose to use central servers because they amortize costs and administration.
-A sign that this is a good decision is that our cheap
-terminals remain comfortable places
-to work for about five years, much longer than workstations that must provide
-the complete computing environment.
-We do, however, upgrade the central machines, so
-the computation available from even old Plan 9 terminals improves with time.
-The money saved by avoiding regular upgrades of terminals
-is instead spent on the newest, fastest multiprocessor servers.
-We estimate this costs about half the money of networked workstations
-yet provides general access to more powerful machines.
-</center></P>
-<H4>C Programming
-</H4>
-<P>
-Plan 9 utilities are written in several languages.
-Some are scripts for the shell,
-<TT>rc</TT>
-[Duff90]; a handful
-are written in a new C-like concurrent language called Alef [Wint95], described below.
-The great majority, though, are written in a dialect of ANSI C [ANSIC].
-Of these, most are entirely new programs, but some
-originate in pre-ANSI C code
-from our research UNIX system [UNIX85].
-These have been updated to ANSI C
-and reworked for portability and cleanliness.
-</P>
-<P>
-The Plan 9 C dialect has some minor extensions,
-described elsewhere [Pike95], and a few major restrictions.
-The most important restriction is that the compiler demands that
-all function definitions have ANSI prototypes
-and all function calls appear in the scope of a prototyped declaration
-of the function.
-As a stylistic rule,
-the prototyped declaration is placed in a header file
-included by all files that call the function.
-Each system library has an associated header file, declaring all
-functions in that library.
-For example, the standard Plan 9 library is called
-<TT>libc</TT>,
-so all C source files include
-<TT>&lt;libc.h&gt;</TT>.
-These rules guarantee that all functions
-are called with arguments having the expected types &#x2014; something
-that was not true with pre-ANSI C programs.
-</P>
-<P>
-Another restriction is that the C compilers accept only a subset of the
-preprocessor directives required by ANSI.
-The main omission is
-<TT>#if</TT>,
-since we believe it
-is never necessary and often abused.
-Also, its effect is
-better achieved by other means.
-For instance, an
-<TT>#if</TT>
-used to toggle a feature at compile time can be written
-as a regular
-<TT>if</TT>
-statement, relying on compile-time constant folding and
-dead code elimination to discard object code.
-</P>
-<P>
-Conditional compilation, even with
-<TT>#ifdef</TT>,
-is used sparingly in Plan 9.
-The only architecture-dependent
-<TT>#ifdefs</TT>
-in the system are in low-level routines in the graphics library.
-Instead, we avoid such dependencies or, when necessary, isolate
-them in separate source files or libraries.
-Besides making code hard to read,
-<TT>#ifdefs</TT>
-make it impossible to know what source is compiled into the binary
-or whether source protected by them will compile or work properly.
-They make it harder to maintain software.
-</P>
-<P>
-The standard Plan 9 library overlaps much of
-ANSI C and POSIX [POSIX], but diverges
-when appropriate to Plan 9's goals or implementation.
-When the semantics of a function
-change, we also change the name.
-For instance, instead of UNIX's
-<TT>creat</TT>,
-Plan 9 has a
-<TT>create</TT>
-function that takes three arguments,
-the original two plus a third that, like the second
-argument of
-<TT>open</TT>,
-defines whether the returned file descriptor is to be opened for reading,
-writing, or both.
-This design was forced by the way 9P implements creation,
-but it also simplifies the common use of
-<TT>create</TT>
-to initialize a temporary file.
-</P>
-<P>
-Another departure from ANSI C is that Plan 9 uses a 16-bit character set
-called Unicode [ISO10646, Unicode].
-Although we stopped short of full internationalization,
-Plan 9 treats the representation
-of all major languages uniformly throughout all its
-software.
-To simplify the exchange of text between programs, the characters are packed into
-a byte stream by an encoding we designed, called UTF-8,
-which is now
-becoming accepted as a standard [FSSUTF].
-It has several attractive properties,
-including byte-order independence,
-backwards compatibility with ASCII,
-and ease of implementation.
-</P>
-<P>
-There are many problems in adapting existing software to a large
-character set with an encoding that represents characters with
-a variable number of bytes.
-ANSI C addresses some of the issues but
-falls short of
-solving them all.
-It does not pick a character set encoding and does not
-define all the necessary I/O library routines.
-Furthermore, the functions it
-<I>does</I>
-define have engineering problems.
-Since the standard left too many problems unsolved,
-we decided to build our own interface.
-A separate paper has the details [Pike93].
-</P>
-<P>
-A small class of Plan 9 programs do not follow the conventions
-discussed in this section.
-These are programs imported from and maintained by
-the UNIX community;
-<TT>tex</TT>
-is a representative example.
-To avoid reconverting such programs every time a new version
-is released,
-we built a porting environment, called the ANSI C/POSIX Environment, or APE [Tric95].
-APE comprises separate include files, libraries, and commands,
-conforming as much as possible to the strict ANSI C and base-level
-POSIX specifications.
-To port network-based software such as X Windows, it was necessary to add
-some extensions to those
-specifications, such as the BSD networking functions.
-</center></P>
-<H4>Portability and Compilation
-</H4>
-<P>
-Plan 9 is portable across a variety of processor architectures.
-Within a single computing session, it is common to use
-several architectures: perhaps the window system running on
-an Intel processor connected to a MIPS-based CPU server with files
-resident on a SPARC system.
-For this heterogeneity to be transparent, there must be conventions
-about data interchange between programs; for software maintenance
-to be straightforward, there must be conventions about cross-architecture
-compilation.
-</P>
-<P>
-To avoid byte order problems,
-data is communicated between programs as text whenever practical.
-Sometimes, though, the amount of data is high enough that a binary
-format is necessary;
-such data is communicated as a byte stream with a pre-defined encoding
-for multi-byte values.
-In the rare cases where a format
-is complex enough to be defined by a data structure,
-the structure is never
-communicated as a unit; instead, it is decomposed into
-individual fields, encoded as an ordered byte stream, and then
-reassembled by the recipient.
-These conventions affect data
-ranging from kernel or application program state information to object file
-intermediates generated by the compiler.
-</P>
-<P>
-Programs, including the kernel, often present their data
-through a file system interface,
-an access mechanism that is inherently portable.
-For example, the system clock is represented by a decimal number in the file
-<TT>/dev/time</TT>;
-the
-<TT>time</TT>
-library function (there is no
-<TT>time</TT>
-system call) reads the file and converts it to binary.
-Similarly, instead of encoding the state of an application
-process in a series of flags and bits in private memory,
-the kernel
-presents a text string in the file named
-<TT>status</TT>
-in the 
-<TT>/proc</TT>
-file system associated with each process.
-The Plan 9
-<TT>ps</TT>
-command is trivial: it prints the contents of
-the desired status files after some minor reformatting; moreover, after
-<DL><DT><DD><TT><PRE>
-import helix /proc
-</PRE></TT></DL>
-a local
-<TT>ps</TT>
-command reports on the status of Helix's processes.
-</P>
-<P>
-Each supported architecture has its own compilers and loader.
-The C and Alef compilers produce intermediate files that
-are portably encoded; the contents
-are unique to the target architecture but the format of the
-file is independent of compiling processor type.
-When a compiler for a given architecture is compiled on
-another type of processor and then used to compile a program
-there,
-the intermediate produced on
-the new architecture is identical to the intermediate
-produced on the native processor.  From the compiler's
-point of view, every compilation is a cross-compilation.
-</P>
-<P>
-Although each architecture's loader accepts only intermediate files produced
-by compilers for that architecture,
-such files could have been generated by a compiler executing
-on any type of processor.
-For instance, it is possible to run
-the MIPS compiler on a 486, then use the MIPS loader on a
-SPARC to produce a MIPS executable.
-</P>
-<P>
-Since Plan 9 runs on a variety of architectures, even in a single installation,
-distinguishing the compilers and intermediate names
-simplifies multi-architecture
-development from a single source tree.
-The compilers and the loader for each architecture are
-uniquely named; there is no
-<TT>cc</TT>
-command.
-The names are derived by concatenating a code letter
-associated with the target architecture with the name of the
-compiler or loader.  For example, the letter `8' is
-the code letter for Intel
-<I>x</I>86
-processors; the C compiler is named
-<TT>8c</TT>,
-the Alef compiler
-<TT>8al</TT>,
-and the loader is called
-<TT>8l</TT>.
-Similarly, the compiler intermediate files are suffixed
-<TT>.8</TT>,
-not
-<TT>.o</TT>.
-</P>
-<P>
-The Plan 9
-build program
-<TT>mk</TT>,
-a relative of
-<TT>make</TT>,
-reads the names of the current and target
-architectures from environment variables called
-<TT></TT><I>cputype</I>
-and
-</TT><TT></TT><TT>objtype</TT>.
-By default the current processor is the target, but setting
-<TT></TT><I>objtype</I>
-to the name of another architecture
-before invoking
-</TT><TT>mk</TT>
-results in a cross-build:
-<DL><DT><DD><TT><PRE>
-% objtype=sparc mk
-</PRE></TT></DL>
-builds a program for the SPARC architecture regardless of the executing machine.
-The value of
-</TT><TT></TT><TT>objtype</TT>
-selects a
-file of architecture-dependent variable definitions
-that configures the build to use the appropriate compilers and loader.
-Although simple-minded, this technique works well in practice:
-all applications in Plan 9 are built from a single source tree
-and it is possible to build the various architectures in parallel without conflict.
-</center></P>
-<H4>Parallel programming
-</H4>
-<P>
-Plan 9's support for parallel programming has two aspects.
-First, the kernel provides
-a simple process model and a few carefully designed system calls for
-synchronization and sharing.
-Second, a new parallel programming language called Alef
-supports concurrent programming.
-Although it is possible to write parallel
-programs in C, Alef is the parallel language of choice.
-</P>
-<P>
-There is a trend in new operating systems to implement two
-classes of processes: normal UNIX-style processes and light-weight
-kernel threads.
-Instead, Plan 9 provides a single class of process but allows fine control of the
-sharing of a process's resources such as memory and file descriptors.
-A single class of process is a
-feasible approach in Plan 9 because the kernel has an efficient system
-call interface and cheap process creation and scheduling.
-</P>
-<P>
-Parallel programs have three basic requirements:
-management of resources shared between processes,
-an interface to the scheduler,
-and fine-grain process synchronization using spin locks.
-On Plan 9,
-new processes are created using the
-<TT>rfork</TT>
-system call.
-<TT>Rfork</TT>
-takes a single argument,
-a bit vector that specifies
-which of the parent process's resources should be shared,
-copied, or created anew
-in the child.
-The resources controlled by
-<TT>rfork</TT>
-include the name space,
-the environment,
-the file descriptor table,
-memory segments,
-and notes (Plan 9's analog of UNIX signals).
-One of the bits controls whether the
-<TT>rfork</TT>
-call will create a new process; if the bit is off, the resulting
-modification to the resources occurs in the process making the call.
-For example, a process calls
-<TT>rfork(RFNAMEG)</TT>
-to disconnect its name space from its parent's.
-Alef uses a
-fine-grained fork in which all the resources, including
-memory, are shared between parent
-and child, analogous to creating a kernel thread in many systems.
-</P>
-<P>
-An indication that
-<TT>rfork</TT>
-is the right model is the variety of ways it is used.
-Other than the canonical use in the library routine
-<TT>fork</TT>,
-it is hard to find two calls to
-<TT>rfork</TT>
-with the same bits set; programs
-use it to create many different forms of sharing and resource allocation.
-A system with just two types of processes&#x2014;regular processes and threads&#x2014;could
-not handle this variety.
-</P>
-<P>
-There are two ways to share memory.
-First, a flag to
-<TT>rfork</TT>
-causes all the memory segments of the parent to be shared with the child
-(except the stack, which is
-forked copy-on-write regardless).
-Alternatively, a new segment of memory may be
-attached using the
-<TT>segattach</TT>
-system call; such a segment
-will always be shared between parent and child.
-</P>
-<P>
-The
-<TT>rendezvous</TT>
-system call provides a way for processes to synchronize.
-Alef uses it to implement communication channels,
-queuing locks,
-multiple reader/writer locks, and
-the sleep and wakeup mechanism.
-<TT>Rendezvous</TT>
-takes two arguments, a tag and a value.
-When a process calls
-<TT>rendezvous</TT>
-with a tag it sleeps until another process
-presents a matching tag.
-When a pair of tags match, the values are exchanged
-between the two processes and both
-<TT>rendezvous</TT>
-calls return.
-This primitive is sufficient to implement the full set of synchronization routines.
-</P>
-<P>
-Finally, spin locks are provided by
-an architecture-dependent library at user level.
-Most processors provide atomic test and set instructions that
-can be used to implement locks.
-A notable exception is the MIPS R3000, so the SGI
-Power series multiprocessors have special lock hardware on the bus.
-User processes gain access to the lock hardware
-by mapping pages of hardware locks
-into their address space using the
-<TT>segattach</TT>
-system call.
-</P>
-<P>
-A Plan 9 process in a system call will block regardless of its `weight'.
-This means that when a program wishes to read from a slow
-device without blocking the entire calculation, it must fork a process to do
-the read for it.  The solution is to start a satellite
-process that does the I/O and delivers the answer to the main program
-through shared memory or perhaps a pipe.
-This sounds onerous but works easily and efficiently in practice; in fact,
-most interactive Plan 9 applications, even relatively ordinary ones written
-in C, such as
-the text editor Sam [Pike87], run as multiprocess programs.
-</P>
-<P>
-The kernel support for parallel programming in Plan 9 is a few hundred lines
-of portable code; a handful of simple primitives enable the problems to be handled
-cleanly at user level.
-Although the primitives work fine from C,
-they are particularly expressive from within Alef.
-The creation
-and management of slave I/O processes can be written in a few lines of Alef,
-providing the foundation for a consistent means of multiplexing
-data flows between arbitrary processes.
-Moreover, implementing it in a language rather than in the kernel
-ensures consistent semantics between all devices
-and provides a more general multiplexing primitive.
-Compare this to the UNIX
-<TT>select</TT>
-system call:
-<TT>select</TT>
-applies only to a restricted set of devices,
-legislates a style of multiprogramming in the kernel,
-does not extend across networks,
-is difficult to implement, and is hard to use.
-</P>
-<P>
-Another reason
-parallel programming is important in Plan 9 is that
-multi-threaded user-level file servers are the preferred way
-to implement services.
-Examples of such servers include the programming environment
-Acme [Pike94],
-the name space exporting tool
-<TT>exportfs</TT>
-[PPTTW93],
-the HTTP daemon,
-and the network name servers
-<TT>cs</TT>
-and
-<TT>dns</TT>
-[PrWi93].
-Complex applications such as Acme prove that
-careful operating system support can reduce the difficulty of writing
-multi-threaded applications without moving threading and
-synchronization primitives into the kernel.
-</center></P>
-<H4>Implementation of Name Spaces
-</H4>
-<P>
-User processes construct name spaces using three system calls:
-<TT>mount</TT>,
-<TT>bind</TT>,
-and
-<TT>unmount</TT>.
-The
-<TT>mount</TT>
-system call attaches a tree served by a file server to
-the current name space.  Before calling
-<TT>mount</TT>,
-the client must (by outside means) acquire a connection to the server in
-the form of a file descriptor that may be written and read to transmit 9P messages.
-That file descriptor represents a pipe or network connection.
-</P>
-<P>
-The
-<TT>mount</TT>
-call attaches a new hierarchy to the existing name space.
-The
-<TT>bind</TT>
-system call, on the other hand, duplicates some piece of existing name space at
-another point in the name space.
-The
-<TT>unmount</TT>
-system call allows components to be removed.
-</P>
-<P>
-Using
-either
-<TT>bind</TT>
-or
-<TT>mount</TT>,
-multiple directories may be stacked at a single point in the name space.
-In Plan 9 terminology, this is a
-<I>union</I>
-directory and behaves like the concatenation of the constituent directories.
-A flag argument to
-<TT>bind</TT>
-and
-<TT>mount</TT>
-specifies the position of a new directory in the union,
-permitting new elements
-to be added either at the front or rear of the union or to replace it entirely.
-When a file lookup is performed in a union directory, each component
-of the union is searched in turn and the first match taken; likewise,
-when a union directory is read, the contents of each of the component directories
-is read in turn.
-Union directories are one of the most widely used organizational features
-of the Plan 9 name space.
-For instance, the directory
-<TT>/bin</TT>
-is built as a union of
-<TT>/</TT><I>cputype/bin</I>
-(program binaries),
-</TT><TT>/rc/bin</TT>
-(shell scripts),
-and perhaps more directories provided by the user.
-This construction makes the shell
-</TT><TT></TT><TT>PATH</TT>
-variable unnecessary.
-</P>
-<P>
-One question raised by union directories
-is which element of the union receives a newly created file.
-After several designs, we decided on the following.
-By default, directories in unions do not accept new files, although the
-<TT>create</TT>
-system call applied to an existing file succeeds normally.
-When a directory is added to the union, a flag to
-<TT>bind</TT>
-or
-<TT>mount</TT>
-enables create permission (a property of the name space) in that directory.
-When a file is being created with a new name in a union, it is created in the
-first directory of the union with create permission; if that creation fails,
-the entire
-<TT>create</TT>
-fails.
-This scheme enables the common use of placing a private directory anywhere
-in a union of public ones,
-while allowing creation only in the private directory.
-</P>
-<P>
-By convention, kernel device file systems
-are bound into the
-<TT>/dev</TT>
-directory, but to bootstrap the name space building process it is
-necessary to have a notation that permits
-direct access to the devices without an existing name space.
-The root directory
-of the tree served by a device driver can be accessed using the syntax
-<TT>#</TT><I>c</I>,
-where
-<I>c</I>
-is a unique character (typically a letter) identifying the
-<I>type</I>
-of the device.
-Simple device drivers serve a single level directory containing a few files.
-As an example,
-each serial port is represented by a data and a control file:
-<DL><DT><DD><TT><PRE>
-% bind -a '#t' /dev
-% cd /dev
-% ls -l eia*
---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1
---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1ctl
---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2
---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2ctl
-</PRE></TT></DL>
-The
-<TT>bind</TT>
-program is an encapsulation of the
-<TT>bind</TT>
-system call; its
-<TT>-a</TT>
-flag positions the new directory at the end of the union.
-The data files
-<TT>eia1</TT>
-and
-<TT>eia2</TT>
-may be read and written to communicate over the serial line.
-Instead of using special operations on these files to control the devices,
-commands written to the files
-<TT>eia1ctl</TT>
-and
-<TT>eia2ctl</TT>
-control the corresponding device;
-for example,
-writing the text string
-<TT>b1200</TT>
-to
-<TT>/dev/eia1ctl</TT>
-sets the speed of that line to 1200 baud.
-Compare this to the UNIX
-<TT>ioctl</TT>
-system call: in Plan 9, devices are controlled by textual messages,
-free of byte order problems, with clear semantics for reading and writing.
-It is common to configure or debug devices using shell scripts.
-</P>
-<P>
-It is the universal use of the 9P protocol that
-connects Plan 9's components together to form a
-distributed system.
-Rather than inventing a unique protocol for each
-service such as
-<TT>rlogin</TT>,
-FTP, TFTP, and X windows,
-Plan 9 implements services
-in terms of operations on file objects,
-and then uses a single, well-documented protocol to exchange information between
-computers.
-Unlike NFS, 9P treats files as a sequence of bytes rather than blocks.
-Also unlike NFS, 9P is stateful: clients perform
-remote procedure calls to establish pointers to objects in the remote
-file server.
-These pointers are called file identifiers or
-<I>fids</I>.
-All operations on files supply a fid to identify an object in the remote file system.
-</P>
-<P>
-The 9P protocol defines 17 messages, providing
-means to authenticate users, navigate fids around
-a file system hierarchy, copy fids, perform I/O, change file attributes, 
-and create and delete files.
-Its complete specification is in Section 5 of the Programmer's Manual [9man].
-Here is the procedure to gain access to the name hierarchy supplied by a server.
-A file server connection is established via a pipe or network connection.
-An initial
-<TT>session</TT>
-message performs a bilateral authentication between client and server.
-An
-<TT>attach</TT>
-message then connects a fid suggested by the client to the root of the server file
-tree.
-The
-<TT>attach</TT>
-message includes the identity of the user performing the attach; henceforth all
-fids derived from the root fid will have permissions associated with
-that user.
-Multiple users may share the connection, but each must perform an attach to
-establish his or her identity.
-</P>
-<P>
-The
-<TT>walk</TT>
-message moves a fid through a single level of the file system hierarchy.
-The
-<TT>clone</TT>
-message takes an established fid and produces a copy that points
-to the same file as the original.
-Its purpose is to enable walking to a file in a directory without losing the fid
-on the directory.
-The
-<TT>open</TT>
-message locks a fid to a specific file in the hierarchy,
-checks access permissions,
-and prepares the fid
-for I/O.
-The
-<TT>read</TT>
-and
-<TT>write</TT>
-messages allow I/O at arbitrary offsets in the file;
-the maximum size transferred is defined by the protocol.
-The
-<TT>clunk</TT>
-message indicates the client has no further use for a fid.
-The
-<TT>remove</TT>
-message behaves like
-<TT>clunk</TT>
-but causes the file associated with the fid to be removed and any associated
-resources on the server to be deallocated.
-</P>
-<P>
-9P has two forms: RPC messages sent on a pipe or network connection and a procedural
-interface within the kernel.
-Since kernel device drivers are directly addressable,
-there is no need to pass messages to
-communicate with them;
-instead each 9P transaction is implemented by a direct procedure call.
-For each fid,
-the kernel maintains a local representation in a data structure called a
-<I>channel</I>,
-so all operations on files performed by the kernel involve a channel connected
-to that fid.
-The simplest example is a user process's file descriptors, which are
-indexes into an array of channels.
-A table in the kernel provides a list
-of entry points corresponding one to one with the 9P messages for each device.
-A system call such as
-<TT>read</TT>
-from the user translates into one or more procedure calls
-through that table, indexed by the type character stored in the channel:
-<TT>procread</TT>,
-<TT>eiaread</TT>,
-etc.
-Each call takes at least
-one channel as an argument.
-A special kernel driver, called the
-<I>mount</I>
-driver, translates procedure calls to messages, that is,
-it converts local procedure calls to remote ones.
-In effect, this special driver
-becomes a local proxy for the files served by a remote file server.
-The channel pointer in the local call is translated to the associated fid
-in the transmitted message.
-</P>
-<P>
-The mount driver is the sole RPC mechanism employed by the system.
-The semantics of the supplied files, rather than the operations performed upon
-them, create a particular service such as the
-<TT>cpu</TT>
-command.
-The mount driver demultiplexes protocol
-messages between clients sharing a communication channel
-with a file server.
-For each outgoing RPC message,
-the mount driver allocates a buffer labeled by a small unique integer,
-called a
-<I>tag</I>.
-The reply to the RPC is labeled with the same tag, which is used by
-the mount driver to match the reply with the request.
-</P>
-<P>
-The kernel representation of the name space
-is called the
-<I>mount table</I>,
-which stores a list of bindings between channels.
-Each entry in the mount table contains a pair of channels: a
-<I>from</I>
-channel and a
-<I>to</I>
-channel.
-Every time a walk succeeds in moving a channel to a new location in the name space,
-the mount table is consulted to see if a `from' channel matches the new name; if
-so the `to' channel is cloned and substituted for the original.
-Union directories are implemented by converting the `to'
-channel into a list of channels: 
-a successful walk to a union directory returns a `to' channel that forms
-the head of
-a list of channels, each representing a component directory
-of the union.
-If a walk
-fails to find a file in the first directory of the union, the list is followed,
-the next component cloned, and walk tried on that directory.
-</P>
-<P>
-Each file in Plan 9 is uniquely identified by a set of integers:
-the type of the channel (used as the index of the function call table),
-the server or device number
-distinguishing the server from others of the same type (decided locally by the driver),
-and a
-<I>qid</I>
-formed from two 32-bit numbers called
-<I>path</I>
-and
-<I>version</I>.
-The path is a unique file number assigned by a device driver or
-file server when a file is created.
-The version number is updated whenever
-the file is modified; as described in the next section,
-it can be used to maintain cache coherency between
-clients and servers.
-</P>
-<P>
-The type and device number are analogous to UNIX major and minor
-device numbers;
-the qid is analogous to the i-number.
-The device and type
-connect the channel to a device driver and the qid
-identifies the file within that device. 
-If the file recovered from a walk has the same type, device, and qid path
-as an entry in the mount table, they are the same file and the
-corresponding substitution from the mount table is made.
-This is how the name space is implemented.
-</center></P>
-<H4>File Caching
-</H4>
-<P>
-The 9P protocol has no explicit support for caching files on a client.
-The large memory of the central file server acts as a shared cache for all its clients,
-which reduces the total amount of memory needed across all machines in the network.
-Nonetheless, there are sound reasons to cache files on the client, such as a slow
-connection to the file server.
-</P>
-<P>
-The version field of the qid is changed whenever the file is modified,
-which makes it possible to do some weakly coherent forms of caching.
-The most important is client caching of text and data segments of executable files.
-When a process
-<TT>execs</TT>
-a program, the file is re-opened and the qid's version is compared with that in the cache;
-if they match, the local copy is used.
-The same method can be used to build a local caching file server.
-This user-level server interposes on the 9P connection to the remote server and
-monitors the traffic, copying data to a local disk.
-When it sees a read of known data, it answers directly,
-while writes are passed on immediately&#x2014;the cache is write-through&#x2014;to keep
-the central copy up to date.
-This is transparent to processes on the terminal and requires no change to 9P;
-it works well on home machines connected over serial lines.
-A similar method can be applied to build a general client cache in unused local
-memory, but this has not been done in Plan 9.
-</center></P>
-<H4>Networks and Communication Devices
-</H4>
-<P>
-Network interfaces are kernel-resident file systems, analogous to the EIA device
-described earlier.
-Call setup and shutdown are achieved by writing text strings to the control file
-associated with the device;
-information is sent and received by reading and writing the data file.
-The structure and semantics of the devices is common to all networks so,
-other than a file name substitution,
-the same procedure makes a call using TCP over Ethernet as URP over Datakit [Fra80].
-</P>
-<P>
-This example illustrates the structure of the TCP device:
-<DL><DT><DD><TT><PRE>
-% ls -lp /net/tcp
-d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 0
-d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 1
---rw-rw-rw- I 0 bootes bootes 0 Feb 23 20:20 clone
-% ls -lp /net/tcp/0
---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 ctl
---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 data
---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 listen
---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 local
---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 remote
---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 status
-%
-</PRE></TT></DL>
-The top directory,
-<TT>/net/tcp</TT>,
-contains a
-<TT>clone</TT>
-file and a directory for each connection, numbered
-<TT>0</TT>
-to
-<I>n</I>.
-Each connection directory corresponds to an TCP/IP connection.
-Opening
-<TT>clone</TT>
-reserves an unused connection and returns its control file.
-Reading the control file returns the textual connection number, so the user
-process can construct the full name of the newly allocated
-connection directory.
-The
-<TT>local</TT>,
-<TT>remote</TT>,
-and
-<TT>status</TT>
-files are diagnostic; for example,
-<TT>remote</TT>
-contains the address (for TCP, the IP address and port number) of the remote side.
-</P>
-<P>
-A call is initiated by writing a connect message with a network-specific address as
-its argument; for example, to open a Telnet session (port 23) to a remote machine
-with IP address 135.104.9.52,
-the string is:
-<DL><DT><DD><TT><PRE>
-connect 135.104.9.52!23
-</PRE></TT></DL>
-The write to the control file blocks until the connection is established;
-if the destination is unreachable, the write returns an error.
-Once the connection is established, the
-<TT>telnet</TT>
-application reads and writes the
-<TT>data</TT>
-file
-to talk to the remote Telnet daemon.
-On the other end, the Telnet daemon would start by writing
-<DL><DT><DD><TT><PRE>
-announce 23
-</PRE></TT></DL>
-to its control file to indicate its willingness to receive calls to this port.
-Such a daemon is called a
-<I>listener</I>
-in Plan 9.
-</P>
-<P>
-A uniform structure for network devices cannot hide all the details
-of addressing and communication for dissimilar networks.
-For example, Datakit uses textual, hierarchical addresses unlike IP's 32-bit addresses, so
-an application given a control file must still know what network it represents.
-Rather than make every application know the addressing of every network,
-Plan 9 hides these details in a
-<I>connection</I>
-<I>server</I>,
-called
-<TT>cs</TT>.
-<TT>Cs</TT>
-is a file system mounted in a known place.
-It supplies a single control file that an application uses to discover how to connect
-to a host.
-The application writes the symbolic address and service name for
-the connection it wishes to make,
-and reads back the name of the
-<TT>clone</TT>
-file to open and the address to present to it.
-If there are multiple networks between the machines,
-<TT>cs</TT>
-presents a list of possible networks and addresses to be tried in sequence;
-it uses heuristics to decide the order.
-For instance, it presents the highest-bandwidth choice first.
-</P>
-<P>
-A single library function called
-<TT>dial</TT>
-talks to
-<TT>cs</TT>
-to establish the connection.
-An application that uses
-<TT>dial</TT>
-needs no changes, not even recompilation, to adapt to new networks;
-the interface to
-<TT>cs</TT>
-hides the details.
-</P>
-<P>
-The uniform structure for networks in Plan 9 makes the
-<TT>import</TT>
-command all that is needed to construct gateways.
-</center></P>
-<H4>Kernel structure for networks
-</H4>
-<P>
-The kernel plumbing used to build Plan 9 communications
-channels is called
-<I>streams</I>
-[Rit84][Presotto].
-A stream is a bidirectional channel connecting a
-physical or pseudo-device to a user process.
-The user process inserts and removes data at one end of the stream;
-a kernel process acting on behalf of a device operates at
-the other end.
-A stream comprises a linear list of
-<I>processing modules</I>.
-Each module has both an upstream (toward the process) and
-downstream (toward the device)
-<I>put routine</I>.
-Calling the put routine of the module on either end of the stream
-inserts data into the stream.
-Each module calls the succeeding one to send data up or down the stream.
-Like UNIX streams [Rit84],
-Plan 9 streams can be dynamically configured.
-</center></P>
-<H4>The IL Protocol
-</H4>
-<P>
-The 9P protocol must run above a reliable transport protocol with delimited messages.
-9P has no mechanism to recover from transmission errors and
-the system assumes that each read from a communication channel will
-return a single 9P message;
-it does not parse the data stream to discover message boundaries.
-Pipes and some network protocols already have these properties but
-the standard IP protocols do not.
-TCP does not delimit messages, while
-UDP [RFC768] does not provide reliable in-order delivery.
-</P>
-<P>
-We designed a new protocol, called IL (Internet Link), to transmit 9P messages over IP.
-It is a connection-based protocol that provides
-reliable transmission of sequenced messages between machines.
-Since a process can have only a single outstanding 9P request,
-there is no need for flow control in IL.
-Like TCP, IL has adaptive timeouts: it scales acknowledge and retransmission times
-to match the network speed.
-This allows the protocol to perform well on both the Internet and on local Ethernets.
-Also, IL does no blind retransmission,
-to avoid adding to the congestion of busy networks.
-Full details are in another paper [PrWi95].
-</P>
-<P>
-In Plan 9, the implementation of IL is smaller and faster than TCP.
-IL is our main Internet transport protocol.
-</center></P>
-<H4>Overview of authentication
-</H4>
-<P>
-Authentication establishes the identity of a
-user accessing a resource.
-The user requesting the resource is called the
-<I>client</I>
-and the user granting access to the resource is called the
-<I>server</I>.
-This is usually done under the auspices of a 9P attach message.
-A user may be a client in one authentication exchange and a server in another.
-Servers always act on behalf of some user,
-either a normal client or some administrative entity, so authentication
-is defined to be between users, not machines.
-</P>
-<P>
-Each Plan 9 user has an associated DES [NBS77] authentication key;
-the user's identity is verified by the ability to
-encrypt and decrypt special messages called challenges.
-Since knowledge of a user's key gives access to that user's resources,
-the Plan 9 authentication protocols never transmit a message containing
-a cleartext key.
-</P>
-<P>
-Authentication is bilateral:
-at the end of the authentication exchange,
-each side is convinced of the other's identity.
-Every machine begins the exchange with a DES key in memory.
-In the case of CPU and file servers, the key, user name, and domain name
-for the server are read from permanent storage,
-usually non-volatile RAM.
-In the case of terminals,
-the key is derived from a password typed by the user at boot time.
-A special machine, known as the
-<I>authentication</I>
-<I>server</I>,
-maintains a database of keys for all users in its administrative domain and
-participates in the authentication protocols.
-</P>
-<P>
-The authentication protocol is as follows:
-after exchanging challenges, one party
-contacts the authentication server to create
-permission-granting
-<I>tickets</I>
-encrypted with
-each party's secret key and containing a new conversation key.
-Each
-party decrypts its own ticket and uses the conversation key to
-encrypt the other party's challenge.
-</P>
-<P>
-This structure is somewhat like Kerberos [MBSS87], but avoids
-its reliance on synchronized clocks.
-Also
-unlike Kerberos, Plan 9 authentication supports a `speaks for'
-relation [LABW91] that enables one user to have the authority
-of another;
-this is how a CPU server runs processes on behalf of its clients.
-</P>
-<P>
-Plan 9's authentication structure builds
-secure services rather than depending on firewalls.
-Whereas firewalls require special code for every service penetrating the wall,
-the Plan 9 approach permits authentication to be done in a single place&#x2014;9P&#x2014;for
-all services.
-For example, the
-<TT>cpu</TT>
-command works securely across the Internet.
-</center></P>
-<H4>Authenticating external connections
-</H4>
-<P>
-The regular Plan 9 authentication protocol is not suitable for text-based services such as
-Telnet
-or FTP.
-In such cases, Plan 9 users authenticate with hand-held DES calculators called
-<I>authenticators</I>.
-The authenticator holds a key for the user, distinct from
-the user's normal authentication key.
-The user `logs on' to the authenticator using a 4-digit PIN.
-A correct PIN enables the authenticator for a challenge/response exchange with the server.
-Since a correct challenge/response exchange is valid only once
-and keys are never sent over the network,
-this procedure is not susceptible to replay attacks, yet
-is compatible with protocols like Telnet and FTP.
-</center></P>
-<H4>Special users
-</H4>
-<P>
-Plan 9 has no super-user.
-Each server is responsible for maintaining its own security, usually permitting
-access only from the console, which is protected by a password.
-For example, file servers have a unique administrative user called
-<TT>adm</TT>,
-with special privileges that apply only to commands typed at the server's
-physical console.
-These privileges concern the day-to-day maintenance of the server,
-such as adding new users and configuring disks and networks.
-The privileges do
-<I>not</I>
-include the ability to modify, examine, or change the permissions of any files.
-If a file is read-protected by a user, only that user may grant access to others.
-</P>
-<P>
-CPU servers have an equivalent user name that allows administrative access to
-resources on that server such as the control files of user processes.
-Such permission is necessary, for example, to kill rogue processes, but
-does not extend beyond that server.
-On the other hand, by means of a key
-held in protected non-volatile RAM,
-the identity of the administrative user is proven to the
-authentication server.
-This allows the CPU server to authenticate remote users, both
-for access to the server itself and when the CPU server is acting
-as a proxy on their behalf.
-</P>
-<P>
-Finally, a special user called
-<TT>none</TT>
-has no password and is always allowed to connect;
-anyone may claim to be
-<TT>none</TT>.
-<TT>None</TT>
-has restricted permissions; for example, it is not allowed to examine dump files
-and can read only world-readable files.
-</P>
-<P>
-The idea behind
-<TT>none</TT>
-is analogous to the anonymous user in FTP
-services.
-On Plan 9, guest FTP servers are further confined within a special
-restricted name space.
-It disconnects guest users from system programs, such as the contents of
-<TT>/bin</TT>,
-but makes it possible to make local files available to guests
-by binding them explicitly into the space.
-A restricted name space is more secure than the usual technique of exporting
-an ad hoc directory tree; the result is a kind of cage around untrusted users.
-</center></P>
-<H4>The cpu command and proxied authentication
-</H4>
-<P>
-When a call is made to a CPU server for a user, say Peter,
-the intent is that Peter wishes to run processes with his own authority.
-To implement this property,
-the CPU server does the following when the call is received.
-First, the listener forks off a process to handle the call.
-This process changes to the user
-<TT>none</TT>
-to avoid giving away permissions if it is compromised.
-It then performs the authentication protocol to verify that the
-calling user really is Peter, and to prove to Peter that
-the machine is itself trustworthy.
-Finally, it reattaches to all relevant file servers using the
-authentication protocol to identify itself as Peter.
-In this case, the CPU server is a client of the file server and performs the
-client portion of the authentication exchange on behalf of Peter.
-The authentication server will give the process tickets to 
-accomplish this only if the CPU server's administrative user name is allowed to
-<I>speak for</I>
-Peter.
-</P>
-<P>
-The
-<I>speaks for</I>
-relation [LABW91] is kept in a table on the authentication server.
-To simplify the management of users computing in different authentication domains,
-it also contains mappings between user names in different domains,
-for example saying that user
-<TT>rtm</TT>
-in one domain is the same person as user
-<TT>rtmorris</TT>
-in another.
-</center></P>
-<H4>File Permissions
-</H4>
-<P>
-One of the advantages of constructing services as file systems
-is that the solutions to ownership and permission problems fall out naturally.
-As in UNIX,
-each file or directory has separate read, write, and execute/search permissions
-for the file's owner, the file's group, and anyone else.
-The idea of group is unusual:
-any user name is potentially a group name.
-A group is just a user with a list of other users in the group.
-Conventions make the distinction: most people have user names without group members,
-while groups have long lists of attached names.  For example, the
-<TT>sys</TT>
-group traditionally has all the system programmers,
-and system files are accessible
-by group
-<TT>sys</TT>.
-Consider the following two lines of a user database stored on a server:
-<DL><DT><DD><TT><PRE>
-pjw:pjw:
-sys::pjw,ken,philw,presotto
-</PRE></TT></DL>
-The first establishes user
-<TT>pjw</TT>
-as a regular user.  The second establishes user
-<TT>sys</TT>
-as a group and lists four users who are
-<I>members</I>
-of that group.
-The empty colon-separated field is space for a user to be named as the
-<I>group</I>
-<I>leader</I>.
-If a group has a leader, that user has special permissions for the group,
-such as freedom to change the group permissions
-of files in that group.
-If no leader is specified, each member of the group is considered equal, as if each were
-the leader.
-In our example, only
-<TT>pjw</TT>
-can add members to his group, but all of
-<TT>sys</TT>'s
-members are equal partners in that group.
-</P>
-<P>
-Regular files are owned by the user that creates them.
-The group name is inherited from the directory holding the new file.
-Device files are treated specially:
-the kernel may arrange the ownership and permissions of
-a file appropriate to the user accessing the file.
-</P>
-<P>
-A good example of the generality this offers is process files,
-which are owned and read-protected by the owner of the process.
-If the owner wants to let someone else access the memory of a process,
-for example to let the author of a program debug a broken image, the standard
-<TT>chmod</TT>
-command applied to the process files does the job.
-</P>
-<P>
-Another unusual application of file permissions
-is the dump file system, which is not only served by the same file
-server as the original data, but represented by the same user database.
-Files in the dump are therefore given identical protection as files in the regular
-file system;
-if a file is owned by
-<TT>pjw</TT>
-and read-protected, once it is in the dump file system it is still owned by
-<TT>pjw</TT>
-and read-protected.
-Also, since the dump file system is immutable, the file cannot be changed;
-it is read-protected forever.
-Drawbacks are that if the file is readable but should have been read-protected,
-it is readable forever, and that user names are hard to re-use.
-</center></P>
-<H4>Performance
-</H4>
-<P>
-As a simple measure of the performance of the Plan 9 kernel,
-we compared the
-time to do some simple operations on Plan 9 and on SGI's IRIX Release 5.3
-running on an SGI Challenge M with a 100MHz MIPS R4400 and a 1-megabyte
-secondary cache.
-The test program was written in Alef,
-compiled with the same compiler,
-and run on identical hardware,
-so the only variables are the operating system and libraries.
-</P>
-<P>
-The program tests the time to do a context switch
-(<TT>rendezvous</TT>
-on Plan 9,
-<TT>blockproc</TT>
-on IRIX);
-a trivial system call
-(<TT>rfork(0)</TT>
-and
-<TT>nap(0)</TT>);
-and
-lightweight fork
-(<TT>rfork(RFPROC)</TT>
-and
-<TT>sproc(PR_SFDS|PR_SADDR)</TT>).
-It also measures the time to send a byte on a pipe from one process
-to another and the throughput on a pipe between two processes.
-The results appear in Table 1.
-<br><img src="-.1.gif"><br>
-Table 1.  Performance comparison.
-</P>
-<br>&#32;<br>
-Although the Plan 9 times are not spectacular, they show that the kernel is
-competitive with commercial systems.
-</center><H4>Discussion
-</H4>
-<P>
-Plan 9 has a relatively conventional kernel;
-the system's novelty lies in the pieces outside the kernel and the way they interact.
-When building Plan 9, we considered all aspects
-of the system together, solving problems where the solution fit best.
-Sometimes the solution spanned many components.
-An example is the problem of heterogeneous instruction architectures,
-which is addressed by the compilers (different code characters, portable
-object code),
-the environment
-(<TT></TT><I>cputype</I>
-and
-</TT><TT></TT><TT>objtype</TT>),
-the name space
-(binding in
-<TT>/bin</TT>),
-and other components.
-Sometimes many issues could be solved in a single place.
-The best example is 9P,
-which centralizes naming, access, and authentication.
-9P is really the core
-of the system;
-it is fair to say that the Plan 9 kernel is primarily a 9P multiplexer.
-</P>
-<P>
-Plan 9's focus on files and naming is central to its expressiveness.
-Particularly in distributed computing, the way things are named has profound
-influence on the system [Nee89].
-The combination of
-local name spaces and global conventions to interconnect networked resources
-avoids the difficulty of maintaining a global uniform name space,
-while naming everything like a file makes the system easy to understand, even for
-novices.
-Consider the dump file system, which is trivial to use for anyone familiar with
-hierarchical file systems.
-At a deeper level, building all the resources above a single uniform interface
-makes interoperability easy.
-Once a resource exports a 9P interface,
-it can combine transparently
-with any other part of the system to build unusual applications;
-the details are hidden.
-This may sound object-oriented, but there are distinctions.
-First, 9P defines a fixed set of `methods'; it is not an extensible protocol.
-More important,
-files are well-defined and well-understood
-and come prepackaged with familiar methods of access, protection, naming, and
-networking.
-Objects, despite their generality, do not come with these attributes defined.
-By reducing `object' to `file', Plan 9 gets some technology for free.
-</P>
-<P>
-Nonetheless, it is possible to push the idea of file-based computing too far.
-Converting every resource in the system into a file system is a kind of metaphor,
-and metaphors can be abused.
-A good example of restraint is
-<TT>/proc</TT>,
-which is only a view of a process, not a representation.
-To run processes, the usual
-<TT>fork</TT>
-and
-<TT>exec</TT>
-calls are still necessary, rather than doing something like
-<DL><DT><DD><TT><PRE>
-cp /bin/date /proc/clone/mem
-</PRE></TT></DL>
-The problem with such examples is that they require the server to do things
-not under its control.
-The ability to assign meaning to a command like this does not
-imply the meaning will fall naturally out of the structure of answering the 9P requests
-it generates.
-As a related example, Plan 9 does not put machine's network names in the file
-name space.
-The network interfaces provide a very different model of naming, because using
-<TT>open</TT>,
-<TT>create</TT>,
-<TT>read</TT>,
-and
-<TT>write</TT>
-on such files would not offer a suitable place to encode all the details of call
-setup for an arbitrary network.
-This does not mean that the network interface cannot be file-like, just that it must
-have a more tightly defined structure.
-</P>
-<P>
-What would we do differently next time?
-Some elements of the implementation are unsatisfactory.
-Using streams to implement network interfaces in the kernel
-allows protocols to be connected together dynamically,
-such as to attach the same TTY driver to TCP, URP, and
-IL connections,
-but Plan 9 makes no use of this configurability.
-(It was exploited, however, in the research UNIX system for which
-streams were invented.)
-Replacing streams by static I/O queues would
-simplify the code and make it faster.
-</P>
-<P>
-Although the main Plan 9 kernel is portable across many machines,
-the file server is implemented separately.
-This has caused several problems:
-drivers that must be written twice,
-bugs that must be fixed twice,
-and weaker portability of the file system code.
-The solution is easy: the file server kernel should be maintained
-as a variant of the regular operating system, with no user processes and
-special compiled-in
-kernel processes to implement file service.
-Another improvement to the file system would be a change of internal structure.
-The WORM jukebox is the least reliable piece of the hardware, but because
-it holds the metadata of the file system, it must be present in order to serve files.
-The system could be restructured so the WORM is a backup device only, with the
-file system proper residing on magnetic disks.
-This would require no change to the external interface.
-</P>
-<P>
-Although Plan 9 has per-process name spaces, it has no mechanism to give the
-description of a process's name space to another process except by direct inheritance.
-The
-<TT>cpu</TT>
-command, for example, cannot in general reproduce the terminal's name space;
-it can only re-interpret the user's login profile and make substitutions for things like
-the name of the binary directory to load.
-This misses any local modifications made before running
-<TT>cpu</TT>.
-It should instead be possible to capture the terminal's name space and transmit
-its description to a remote process.
-</P>
-<P>
-Despite these problems, Plan 9 works well.
-It has matured into the system that supports our research,
-rather than being the subject of the research itself.
-Experimental new work includes developing interfaces to faster networks,
-file caching in the client kernel,
-encapsulating and exporting name spaces,
-and the ability to re-establish the client state after a server crash.
-Attention is now focusing on using the system to build distributed applications.
-</P>
-<P>
-One reason for Plan 9's success is that we use it for our daily work, not just as a research tool.
-Active use forces us to address shortcomings as they arise and to adapt the system
-to solve our problems.
-Through this process, Plan 9 has become a comfortable, productive programming
-environment, as well as a vehicle for further systems research.
-</center></P>
-<H4>References
-<DL COMPACT>
-<DT>[9man]<DD>
-Plan 9 Programmer's Manual,
-Volume 1,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1995.
-<DT>[ANSIC]<DD>
-<I>American National Standard for Information Systems -
-Programming Language C</I>, American National Standards Institute, Inc.,
-New York, 1990.
-<DT>[Duff90]<DD>
-Tom Duff, ``Rc - A Shell for Plan 9 and UNIX systems'',
-Proc. of the Summer 1990 UKUUG Conf.,
-London, July, 1990, pp. 21-33, reprinted, in a different form, in this volume.
-<DT>[Fra80]<DD>
-A.G. Fraser,
-``Datakit - A Modular Network for Synchronous and Asynchronous Traffic'',
-Proc. Int. Conf. on Commun.,
-June 1980, Boston, MA.
-<DT>[FSSUTF]<DD>
-File System Safe UCS Transformation Format (FSS-UTF),
-X/Open Preliminary Specification, 1993.
-ISO designation is
-ISO/IEC JTC1/SC2/WG2 N 1036, dated 1994-08-01.
-<DT>[ISO10646] <DD>
-ISO/IEC DIS 10646-1:1993
-Information technology -
-Universal Multiple-Octet Coded Character Set (UCS) &#x2014;
-Part 1: Architecture and Basic Multilingual Plane.
-<DT>[Kill84]<DD>
-T.J. Killian,
-``Processes as Files'',
-USENIX Summer 1984 Conf. Proc.,
-June 1984, Salt Lake City, UT.
-<DT>[LABW91] <DD>
-Butler Lampson,
-Mart&iacute;n Abadi,
-Michael Burrows, and
-Edward Wobber,
-``Authentication in Distributed Systems: Theory and Practice'',
-Proc. 13th ACM Symp. on Op. Sys. Princ.,
-Asilomar, 1991,
-pp. 165-182.
-<DT>[MBSS87] <DD>
-S. P. Miller,
-B. C. Neumann,
-J. I. Schiller, and
-J. H. Saltzer,
-``Kerberos Authentication and Authorization System'',
-Massachusetts Institute of Technology,
-1987.
-<DT>[NBS77]<DD>
-National Bureau of Standards (U.S.),
-Federal Information Processing Standard 46,
-National Technical Information Service, Springfield, VA, 1977.
-<DT>[Nee89]<DD>
-R. Needham, ``Names'', in
-Distributed systems,
-S. Mullender, ed.,
-Addison Wesley, 1989
-<DT>[NeHe82] <DD>
-R.M. Needham and A.J. Herbert,
-The Cambridge Distributed Computing System,
-Addison-Wesley, London, 1982
-<DT>[Neu92]<DD>
-B. Clifford Neuman,
-``The Prospero File System'',
-USENIX File Systems Workshop Proc.,
-Ann Arbor, 1992, pp. 13-28.
-<DT>[OCDNW88] <DD>
-John Ousterhout, Andrew Cherenson, Fred Douglis, Mike Nelson, and Brent Welch,
-``The Sprite Network Operating System'',
-IEEE Computer,
-21(2), 23-38, Feb. 1988.
-<DT>[Pike87]<DD>
-Rob Pike, ``The Text Editor <TT>sam</TT>'',
-Software - Practice and Experience,
-Nov 1987, <B>17</B>(11), pp. 813-845; reprinted in this volume.
-<DT>[Pike91]<DD>
-Rob Pike, ``8&frac12;, the Plan 9 Window System'',
-USENIX Summer Conf. Proc.,
-Nashville, June, 1991, pp. 257-265,
-reprinted in this volume.
-<DT>[Pike93]<DD>
-Rob Pike and Ken Thompson, ``Hello World or &#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949; or
-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;'',
-USENIX Winter Conf. Proc.,
-San Diego, 1993, pp. 43-50,
-reprinted in this volume.
-<DT>[Pike94]<DD>
-Rob Pike,
-``Acme: A User Interface for Programmers'',
-USENIX Proc. of the Winter 1994 Conf.,
-San Francisco, CA,
-<DT>[Pike95]<DD>
-Rob Pike,
-``How to Use the Plan 9 C Compiler'',
-Plan 9 Programmer's Manual,
-Volume 2,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1995.
-<DT>[POSIX]<DD>
-Information Technology&#x2014;Portable Operating
-System Interface (POSIX) Part 1:
-System Application Program Interface (API)
-[C Language],
-IEEE, New York, 1990.
-<DT>[PPTTW93] <DD>
-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom, ``The Use of Name Spaces in Plan 9'',
-Op. Sys. Rev.,
-Vol. 27, No. 2, April 1993, pp. 72-76,
-reprinted in this volume.
-<DT>[Presotto]<DD>
-Dave Presotto,
-``Multiprocessor Streams for Plan 9'',
-UKUUG Summer 1990 Conf. Proc.,
-July 1990, pp. 11-19.
-<DT>[PrWi93]<DD>
-Dave Presotto and Phil Winterbottom,
-``The Organization of Networks in Plan 9'',
-USENIX Proc. of the Winter 1993 Conf.,
-San Diego, CA,
-pp. 43-50,
-reprinted in this volume.
-<DT>[PrWi95]<DD>
-Dave Presotto and Phil Winterbottom,
-``The IL Protocol'',
-Plan 9 Programmer's Manual,
-Volume 2,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1995.
-<DT>[RFC768] <DD>
-J. Postel, RFC768,
-<I>User Datagram Protocol,</I>
-<I>DARPA Internet Program Protocol Specification,</I>
-August 1980.
-<DT>[RFC793] <DD>
-RFC793,
-<I>Transmission Control Protocol,</I>
-<I>DARPA Internet Program Protocol Specification,</I>
-September 1981.
-<DT>[Rao91]<DD>
-Herman Chung-Hwa Rao,
-The Jade File System,
-(Ph. D. Dissertation),
-Dept. of Comp. Sci,
-University of Arizona,
-TR 91-18.
-<DT>[Rit84]<DD>
-D.M. Ritchie,
-``A Stream Input-Output System'',
-AT&amp;T Bell Laboratories Technical Journal,
-<B>63</B>(8), October, 1984.
-<DT>[Tric95]<DD>
-Howard Trickey,
-``APE &#x2014; The ANSI/POSIX Environment'',
-Plan 9 Programmer's Manual,
-Volume 2,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1995.
-<DT>[Unicode]<DD>
-The Unicode Standard,
-Worldwide Character Encoding,
-Version 1.0, Volume 1,
-The Unicode Consortium,
-Addison Wesley,
-New York,
-1991.
-<DT>[UNIX85]<DD>
-UNIX Time-Sharing System Programmer's Manual,
-Research Version, Eighth Edition, Volume 1.
-AT&amp;T Bell Laboratories, Murray Hill, NJ, 1985.
-<DT>[Welc94]<DD>
-Brent Welch,
-``A Comparison of Three Distributed File System Architectures: Vnode, Sprite, and Plan 9'',
-Computing Systems,
-7(2), pp. 175-199, Spring, 1994.
-<DT>[Wint95]<DD>
-Phil Winterbottom,
-``Alef Language Reference Manual'',
-Plan 9 Programmer's Manual,
-Volume 2,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1995.
-</H4>
-</dl>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2006 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 2921
sys/doc/acid.html

@@ -1,2921 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Acid Manual
-</H1>
-<DL><DD><I>Phil Winterbottom<br>
-philw@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<P>
-Acid is a general purpose, source level symbolic debugger.
-The debugger is built around a simple command language. 
-The command language, distinct from the language of the program being debugged,
-provides a flexible user interface that allows the debugger
-interface to be customized for a specific application or architecture.
-Moreover, it provides an opportunity to write test and
-verification code independently of a program's source code.
-Acid is able to debug multiple
-processes provided they share a common set of symbols, such as the processes in
-a threaded program.
-</P>
-<P>
-Like other language-based solutions, Acid presents a poor user interface but
-provides a powerful debugging tool.
-Application of Acid to hard problems is best approached by writing functions off-line
-(perhaps loading them with the
-<TT>include</TT>
-function or using the support provided by
-<A href="/magic/man2html/1/acme"><I>acme</I>(1)),
-</A>rather than by trying to type intricate Acid operations
-at the interactive prompt.
-</P>
-<P>
-Acid allows the execution of a program to be controlled by operating on its
-state while it is stopped and by monitoring and controlling its execution
-when it is running. Each program action that causes a change 
-of execution state is reflected by the execution
-of an Acid function, which may be user defined.
-A library of default functions provides the functionality of a normal debugger.
-</P>
-<P>
-A Plan 9 process is controlled by writing messages to a control file in the
-<A href="/magic/man2html/3/proc"><I>proc</I>(3)
-</A>file system. Each control message has a corresponding Acid function, which
-sends the message to the process. These functions take a process id
-(<I>pid</I>)
-as an
-argument. The memory and text file of the program may be manipulated using
-the indirection operators. The symbol table, including source cross reference,
-is available to an Acid program. The combination allows complex operations
-to be performed both in terms of control flow and data manipulation.
-</P>
-<H4>Input format and <TT>whatis</TT>
-</H4>
-<P>
-Comments start with
-<TT>//</TT>
-and continue to the end of the line.
-Input is a series of statements and expressions separated by semicolons.
-At the top level of the interpreter, the builtin function
-<TT>print</TT>
-is called automatically to display the result of all expressions except function calls.
-A unary
-<TT>+</TT>
-may be used as a shorthand to force the result of a function call to be printed.
-</P>
-<P>
-Also at the top level, newlines are treated as semicolons
-by the parser, so semicolons are unnecessary when evaluating expressions.
-</P>
-<P>
-When Acid starts, it loads the default program modules,
-enters interactive mode, and prints a prompt. In this state Acid accepts
-either function definitions or statements to be evaluated.
-In this interactive mode
-statements are evaluated immediately, while function definitions are
-stored for later invocation.
-</P>
-<P>
-The
-<TT>whatis</TT>
-operator can be used to report the state of identifiers known to the interpreter.
-With no argument,
-<TT>whatis</TT>
-reports the name of all defined Acid functions; when supplied with an identifier
-as an argument it reports any variable, function, or type definition
-associated with the identifier.
-Because of the way the interpreter handles semicolons,
-the result of a
-<TT>whatis</TT>
-statement can be returned directly to Acid without adding semicolons.
-A syntax error or interrupt returns Acid to the normal evaluation
-mode; any partially evaluated definitions are lost.
-</P>
-<H4>Using the Library Functions
-</H4>
-<P>
-After loading the program binary, Acid loads the portable and architecture-specific
-library functions  that form the standard debugging environment.
-These files are Acid source code and are human-readable.
-The following example uses the standard debugging library to show how
-language and program interact:
-<DL><DT><DD><TT><PRE>
-% acid /bin/ls
-/bin/ls:mips plan 9 executable
-
-/sys/lib/acid/port
-/sys/lib/acid/mips
-acid: new()
-75721: system call  _main ADD  <I>-0x14,R29
-75721: breakpoint   main+0x4   MOVW  R31,0x0(R29)
-acid: bpset(ls)
-acid: cont()
-75721: breakpoint   ls    ADD  </I>-0x16c8,R29
-acid: stk()
-At pc:0x0000141c:ls /sys/src/cmd/ls.c:87
-ls(s=0x0000004d,multi=0x00000000) /sys/src/cmd/ls.c:87
-    called from main+0xf4 /sys/src/cmd/ls.c:79
-main(argc=0x00000000,argv=0x7ffffff0) /sys/src/cmd/ls.c:48
-    called from _main+0x20 /sys/src/libc/mips/main9.s:10
-acid: PC
-0xc0000f60
-acid: *PC
-0x0000141c
-acid: ls
-0x0000141c
-</PRE></TT></DL>
-The function
-<TT>new()</TT>
-creates a new process and stops it at the first instruction.
-This change in state is reported by a call to the
-Acid function
-<TT>stopped</TT>,
-which is called by the interpreter whenever the debugged program stops.
-<TT>Stopped</TT>
-prints the status line giving the pid, the reason the program stopped
-and the address and instruction at the current PC.
-The function
-<TT>bpset</TT>
-makes an entry in the breakpoint table and plants a breakpoint in memory.
-The
-<TT>cont</TT>
-function continues the process, allowing it to run until some condition
-causes it to stop. In this case the program hits the breakpoint placed on
-the function
-<TT>ls</TT>
-in the C program. Once again the
-<TT>stopped</TT>
-routine is called to print the status of the program. The function
-<TT>stk</TT>
-prints a C stack trace of the current process. It is implemented using
-a builtin Acid function that returns the stack trace as a list; the code
-that formats the information is all written in Acid. 
-The Acid variable
-<TT>PC</TT>
-holds the address of the 
-cell where the current value of the processor register
-<TT>PC</TT>
-is stored. By indirecting through
-the value of
-<TT>PC</TT>
-the address where the program is stopped can be found.
-All of the processor registers are available by the same mechanism.
-</P>
-<H4>Types
-</H4>
-<P>
-An Acid variable has one of four types:
-<I>integer</I>,
-<I>float</I>,
-<I>list</I>,
-or
-<I>string</I>.
-The type of a variable is inferred from the type of the right-hand
-side of the assignment expression which last set its value.
-Referencing a variable that has not yet
-been assigned draws a "used but not set" error. Many of the operators may
-be applied to more than
-one type; for these operators the action of the operator is determined by
-the types of its operands. The action of each operator is defined in the
-<I>Expressions</I>
-section of this manual.
-</P>
-<H4>Variables
-</H4>
-<P>
-Acid has three kinds of variables: variables defined by the symbol table
-of the debugged program, variables that are defined and maintained
-by the interpreter as the debugged program changes state, and variables
-defined and used by Acid programs.
-</P>
-<P>
-Some examples of variables maintained by the interpreter are the register
-pointers listed by name in the Acid list variable
-<TT>registers</TT>,
-and the symbol table listed by name and contents in the Acid variable
-<TT>symbols</TT>.
-</P>
-<P>
-The variable
-<TT>pid</TT>
-is updated by the interpreter to select the most recently created process
-or the process selected by the
-<TT>setproc</TT>
-builtin function.
-</P>
-<H4>Formats
-</H4>
-<P>
-In addition to a type, variables have formats. The format is a code
-letter that determines the printing style and the effect of some of the
-operators on that variable. The format codes are derived from the format
-letters used by
-<A href="/magic/man2html/1/db"><I>db</I>(1).
-</A>By default, symbol table variables and numeric constants
-are assigned the format code
-<TT>X</TT>,
-which specifies 32-bit hexadecimal.
-Printing a variable with this code yields the output
-<TT>0x00123456</TT>.
-The format code of a variable may be changed from the default by using the 
-builtin function
-<TT>fmt</TT>.
-This function takes two arguments, an expression and a format code. After
-the expression is evaluated the new format code is attached to the result
-and forms the return value from
-<TT>fmt</TT>.
-The backslash operator is a short form of
-<TT>fmt</TT>.
-The format supplied by the backslash operator must be the format character
-rather than an expression.
-If the result is assigned to a variable the new format code is maintained
-in the variable. For example:
-<DL><DT><DD><TT><PRE>
-acid: x=10
-acid: print(x)
-0x0000000a 
-acid: x = fmt(x, 'D')
-acid: print(x, fmt(x, 'X'))
-10 0x0000000a
-acid: x
-10
-acid: x\o
-12
-</PRE></TT></DL>
-The supported format characters are:
-<DL><DD>
-</P>
-<DL COMPACT>
-<DT><TT>o</TT><DD>
-Print two-byte integer in octal.
-<DT><TT>O</TT><DD>
-Print four-byte integer in octal.
-<DT><TT>q</TT><DD>
-Print two-byte integer in signed octal.
-<DT><TT>Q</TT><DD>
-Print four-byte integer in signed octal.
-<DT><TT>B</TT><DD>
-Print four-byte integer in binary.
-<DT><TT>d</TT><DD>
-Print two-byte integer in signed decimal.
-<DT><TT>D</TT><DD>
-Print four-byte integer in signed decimal.
-<DT><TT>Y</TT><DD>
-Print eight-byte integer in signed decimal.
-<DT><TT>Z</TT><DD>
-Print eight-byte integer in unsigned decimal.
-<DT><TT>x</TT><DD>
-Print two-byte integer in hexadecimal.
-<DT><TT>X</TT><DD>
-Print four-byte integer in hexadecimal.
-<DT><TT>Y</TT><DD>
-Print eight-byte integer in hexadecimal.
-<DT><TT>u</TT><DD>
-Print two-byte integer in unsigned decimal.
-<DT><TT>U</TT><DD>
-Print four-byte integer in unsigned decimal.
-<DT><TT>f</TT><DD>
-Print single-precision floating point number.
-<DT><TT>F</TT><DD>
-Print double-precision floating point number.
-<DT><TT>g</TT><DD>
-Print a single precision floating point number in string format.
-<DT><TT>G</TT><DD>
-Print a double precision floating point number in string format.
-<DT><TT>b</TT><DD>
-Print byte in hexadecimal.
-<DT><TT>c</TT><DD>
-Print byte as an ASCII character.
-<DT><TT>C</TT><DD>
-Like
-<TT>c</TT>,
-with
-printable ASCII characters represented normally and
-others printed in the form <TT>\x</TT><I>nn</I>.
-<DT><TT>s</TT><DD>
-Interpret the addressed bytes as UTF characters
-and print successive characters until a zero byte is reached.
-<DT><TT>r</TT><DD>
-Print a two-byte integer as a rune.
-<DT><TT>R</TT><DD>
-Print successive two-byte integers as runes
-until a zero rune is reached.
-<DT><TT>i</TT><DD>
-Print as machine instructions.
-<DT><TT>I</TT><DD>
-As
-<TT>i</TT>
-above, but print the machine instructions in
-an alternate form if possible:
-<TT>sunsparc</TT>
-and
-<TT>mipsco</TT>
-reproduce the manufacturers' syntax.
-<DT><TT>a</TT><DD>
-Print the value in symbolic form.
-</DL>
-</dl>
-<H4>Complex types
-</H4>
-<P>
-Acid permits the definition of the layout of memory.
-The usual method is to use the
-<TT>-a</TT>
-flag of the compilers to produce Acid-language descriptions of data structures (see
-<A href="/magic/man2html/1/2c"><I>2c</I>(1))
-</A>although such definitions can be typed interactively.
-The keywords
-<TT>complex</TT>,
-<TT>adt</TT>,
-<TT>aggr</TT>,
-and
-<TT>union</TT>
-are all equivalent; the compiler uses the synonyms to document the declarations.
-A complex type is described as a set of members, each containing a format letter,
-an offset in the structure, and a name.  For example, the C structure
-<DL><DT><DD><TT><PRE>
-struct List {
-	int         type;
-	struct List *next;
-};
-</PRE></TT></DL>
-is described by the Acid statement
-<DL><DT><DD><TT><PRE>
-complex List {
-	'D'	0	type;
-	'X'	4	next;
-};
-</PRE></TT></DL>
-</P>
-<H4>Scope
-</H4>
-<P>
-Variables are global unless they are either parameters to functions
-or are declared as
-<TT>local</TT>
-in a function body. Parameters and local variables are available only in
-the body of the function in which they are instantiated.
-Variables are dynamically bound: if a function declares a local variable
-with the same name as a global variable, the global variable will be hidden
-whenever the function is executing.
-For example, if a function
-<TT>f</TT>
-has a local called
-<TT>main</TT>,
-any function called below
-<TT>f</TT>
-will see the local version of
-<TT>main</TT>,
-not the external symbol.
-</P>
-<H4>Addressing
-</H4>
-<P>
-Since the symbol table specifies addresses,
-to access the value of program variables
-an extra level of indirection
-is required relative to the source code.
-For consistency, the registers are maintained as pointers as well; Acid variables with the names
-of processor registers point to cells holding the saved registers.
-</P>
-<P>
-The location in a file or memory image associated with
-an address is calculated from a map
-associated with the file.
-Each map contains one or more quadruples (<I>t</I>,
-<I>b</I>,
-<I>e</I>,
-<I>f</I>),
-defining a segment named
-<I>t</I>
-(usually 
-<TT>text</TT>,
-<TT>data</TT>,
-<TT>regs</TT>,
-or
-<TT>fpregs</TT>)
-mapping addresses in the range
-<I>b</I>
-through
-<I>e</I>
-to the part of the file
-beginning at
-offset
-<I>f</I>.
-The memory model of a Plan 9 process assumes
-that segments are disjoint.  There
-can be more than one segment of a given type (e.g., a process
-may have more than one text segment) but segments
-may not overlap.
-An address
-<I>a</I>
-is translated
-to a file address
-by finding a segment
-for which
-<I>b</I>
-+
-<I>a</I>
-&lt;
-<I>e</I>;
-the location in the file
-is then
-<I>address</I>
-+
-<I>f</I>
--
-<I>b</I>.
-</P>
-<P>
-Usually,
-the text and initialized data of a program
-are mapped by segments called 
-<TT>text</TT>
-and
-<TT>data</TT>.
-Since a program file does not contain bss, stack, or register data,
-these data are
-not mapped by the data segment.
-The text segment is mapped similarly in the memory image of
-a normal (i.e., non-kernel) process.
-However, the segment called 
-<TT>*data</TT>
-maps memory from the beginning to the end of the program's data space.
-This region contains the program's static data, the bss, the
-heap and the stack.  A segment
-called
-<TT>*regs</TT>
-maps the registers;
-<TT>*fpregs</TT>
-maps the floating point registers.
-</P>
-<P>
-Sometimes it is useful to define a map with a single segment
-mapping the region from 0 to 0xFFFFFFFF; such a map
-allows the entire file to be examined
-without address translation.  The builtin function
-<TT>map</TT>
-examines and modifies Acid's map for a process.
-</P>
-<H4>Name Conflicts
-</H4>
-<P>
-Name conflicts between keywords in the Acid language, symbols in the program,
-and previously defined functions are resolved when the interpreter starts up.
-Each name is made unique by prefixing enough
-<TT></TT><I></I><TT>
-characters to the front of the name to make it unique. Acid reports
-a list of each name change at startup. The report looks like this:
-<DL><DT><DD><TT><PRE>
-/bin/sam: mips plan 9 executable
-/lib/acid/port
-/lib/acid/mips
-Symbol renames:
-	append=</TT>append T/0xa4e40
-acid:
-</PRE></TT></DL>
-The symbol
-<TT>append</TT>
-is both a keyword and a text symbol in the program. The message reports
-that the text symbol is now named
-<TT></TT><I>append</I><TT>.
-</P>
-</TT><H4>Expressions
-</H4>
-<P>
-Operators have the same
-binding and precedence as in C.
-For operators of equal precedence, expressions are evaluated from left to right. 
-</P>
-<H4>Boolean expressions
-</H4>
-<P>
-If an expression is evaluated for a boolean condition the test
-performed depends on the type of the result. If the result is of
-<I>integer</I>
-or
-<I>floating</I>
-type the result is true if the value is non-zero. If the expression is a
-<I>list</I>
-the result is true if there are any members in the list.
-If the expression is a
-<I>string</I>
-the result is true if there are any characters in the string.
-<DL><DT><DD><TT><PRE>
-	primary-expression:
-		identifier
-		identifier <TT>:</TT> identifier
-		constant
-		<TT>(</TT> expression <TT>)</TT>
-		<TT>{</TT> elist <TT>}</TT>
-
-	elist:
-		expression
-		elist , expression
-</PRE></TT></DL>
-An identifier may be any legal Acid variable. The colon operator returns the
-address of parameters or local variables in the current stack of a program.
-For example:
-<DL><DT><DD><TT><PRE>
-*main:argc
-</PRE></TT></DL>
-prints the number of arguments passed into main. Local variables and parameters
-can only be referenced after the frame has been established. It may be necessary to
-step a program over the first few instructions of a breakpointed function to properly set
-the frame.
-</P>
-<P>
-Constants follow the same lexical rules as C.
-A list of expressions delimited by braces forms a list constructor.
-A new list is produced by evaluating each expression when the constructor is executed.
-The empty list is formed from
-<TT>{}</TT>.
-<DL><DT><DD><TT><PRE>
-acid: x = 10
-acid: l = { 1, x, 2\D }
-acid: x = 20
-acid: l
-{0x00000001 , 0x0000000a , 2 }
-</PRE></TT></DL>
-</P>
-<H4>Lists
-</H4>
-<P>
-Several operators manipulate lists.
-<DL><DT><DD><TT><PRE>
-	list-expression:
-		primary-expression
-		<TT>head</TT> primary-expression
-		<TT>tail</TT> primary-expression
-		<TT>append</TT> expression <TT>,</TT> primary-expression
-		<TT>delete</TT> expression <TT>,</TT> primary-expression
-</PRE></TT></DL>
-The
-<I>primary-expression</I>
-for
-<TT>head</TT>
-and
-<TT>tail</TT>
-must yield a value of type
-<I>list</I>.
-If there are no elements in the list the value of
-<TT>head</TT>
-or
-<TT>tail</TT>
-will be the empty list. Otherwise
-<TT>head</TT>
-evaluates to the first element of the list and
-<TT>tail</TT>
-evaluates to the rest.
-<DL><DT><DD><TT><PRE>
-acid: head {}
-{}
-acid: head {1, 2, 3, 4}
-0x00000001 
-acid: tail {1, 2, 3, 4}
-{0x00000002 , 0x00000003 , 0x00000004 }
-</PRE></TT></DL>
-The first operand of
-<TT>append</TT>
-and
-<TT>delete</TT>
-must be an expression that yields a
-<I>list</I>.
-<TT>Append</TT>
-places the result of evaluating
-<I>primary-expression</I>
-at the end of the list.
-The
-<I>primary-expression</I>
-supplied to
-<TT>delete</TT>
-must evaluate to an integer;
-<TT>delete</TT>
-removes the 
-<I>n</I>'th
-item from the list, where
-<I>n</I>
-is integral value of
-<I>primary-expression.</I>
-List indices are zero-based.
-<DL><DT><DD><TT><PRE>
-	acid: append {1, 2}, 3
-	{0x00000001 , 0x00000002 , 0x00000003 }
-	acid: delete {1, 2, 3}, 1
-	{0x00000001 , 0x00000003 }
-</PRE></TT></DL>
-</P>
-<P>
-Assigning a list to a variable copies a reference to the list; if a list variable
-is copied it still points at the same list.  To copy a list, the elements must
-be copied piecewise using
-<TT>head</TT>
-and
-<TT>append</TT>.
-</P>
-<H4>Operators
-</H4>
-<P>
-<DL><DT><DD><TT><PRE>
-	postfix-expression:
-		list-expression
-		postfix-expression <TT>[</TT> expression <TT>]</TT>
-		postfix-expression <TT>(</TT> argument-list <TT>)</TT>
-		postfix-expression <TT>.</TT> tag
-		postfix-expression <TT>-&gt;</TT> tag 
-		postfix-expression <TT>++</TT>
-		postfix-expression <TT>--</TT>
-
-	argument-list:
-		expression
-		argument-list , expression
-</PRE></TT></DL>
-The
-<TT>[</TT>
-<I>expression</I>
-<TT>]</TT>
-operator performs indexing.
-The indexing expression must result in an expression of
-<I>integer</I>
-type, say
-<I>n</I>.
-The operation depends on the type of
-<I>postfix-expression</I>.
-If the
-<I>postfix-expression</I>
-yields an
-<I>integer</I>
-it is assumed to be the base address of an array in the memory image.
-The index offsets into this array; the size of the array members is
-determined by the format associated with the
-<I>postfix-expression</I>.
-If the 
-<I>postfix-expression</I>
-yields a
-<I>string</I>
-the index operator fetches the
-<I>n</I>'th
-character
-of the string. If the index points beyond the end
-of the string, a zero is returned.
-If the
-<I>postfix-expression</I>
-yields a
-<I>list</I>
-then the indexing operation returns the
-<I>n</I>'th
-item of the list.
-If the list contains less than
-<I>n</I>
-items the empty list
-<TT>{}</TT>
-is returned.
-</P>
-<P>
-The
-<TT>++</TT>
-and
-<TT>--</TT>
-operators increment and decrement integer variables.
-The amount of increment or decrement depends on the format code. These postfix
-operators return the value of the variable before the increment or decrement
-has taken place.
-<DL><DT><DD><TT><PRE>
-	unary-expression:
-		postfix-expression
-		<TT>++</TT> unary-expression
-		<TT>--</TT> unary-expression
-
-	unary-operator: one of
-		<TT>*</TT> <TT>@</TT> <TT>+</TT> <TT>-</TT> ~ <TT>!</TT>
-</PRE></TT></DL>
-The operators
-<TT>*</TT>
-and
-<TT>@</TT>
-are the indirection operators.
-<TT>@</TT>
-references a value from the text file of the program being debugged.
-The size of the value depends on the format code. The
-<TT>*</TT>
-operator fetches a value from the memory image of a process. If either
-operator appears on the left-hand side of an assignment statement, either the file
-or memory will be written. The file can only be modified when Acid is invoked
-with the
-<TT>-w</TT>
-option.
-The prefix
-<TT>++</TT>
-and
-<TT>--</TT>
-operators perform the same operation as their postfix counterparts but
-return the value after the increment or decrement has been performed. Since the
-<TT>++</TT>
-and
-<TT>*</TT>
-operators fetch and increment the correct amount for the specified format,
-the following function prints correct machine instructions on a machine with
-variable length instructions, such as the 68020 or 386:
-<DL><DT><DD><TT><PRE>
-	defn asm(addr)
-	{
-		addr = fmt(addr, 'i');
-		loop 1, 10 do
-			print(*addr++, "\n");
-	}
-</PRE></TT></DL>
-The operators
-<TT>~</TT>
-and
-<TT>!</TT>
-perform bitwise and logical negation respectively. Their operands must be of
-<I>integer</I>
-type.
-<DL><DT><DD><TT><PRE>
-	cast-expression:
-		unary-expression
-		unary-expression <TT>\</TT> format-char
-		<TT>(</TT> complex-name <TT>)</TT> unary-expression		
-</PRE></TT></DL>
-A unary expression may be preceded by a cast. The cast has the effect of
-associating the value of 
-<I>unary-expression</I>
-with a complex type structure.
-The result may then be dereferenced using the
-<TT>.</TT>
-and
-<TT>-&gt;</TT>
-operators.
-</P>
-<P>
-An Acid variable may be associated with a complex type
-to enable accessing the type's members:
-<DL><DT><DD><TT><PRE>
-acid: complex List {
-	'D'	0	type;
-	'X'	4	next;
-};
-acid: complex List lhead
-acid: lhead.type
-10
-acid: lhead = ((List)lhead).next
-acid: lhead.type
--46
-</PRE></TT></DL>
-Note that the
-<TT>next</TT>
-field cannot be given a complex type automatically.
-</P>
-<P>
-When entered at the top level of the interpreter,
-an expression of complex type
-is treated specially.
-If the type is called
-<TT>T</TT>
-and an Acid function also called
-<TT>T</TT>
-exists,
-then that function will be called with the expression as its argument.
-The compiler options
-<TT>-a</TT>
-and
-<TT>-aa</TT>
-will generate Acid source code defining such complex types and functions; see
-<A href="/magic/man2html/1/2c"><I>2c</I>(1).
-</A></P>
-<P>
-A
-<I>unary-expression</I>
-may be qualified with a format specifier using the
-<TT>\</TT>
-operator. This has the same effect as passing the expression to the
-<TT>fmt</TT>
-builtin function.
-<DL><DT><DD><TT><PRE>
-	multiplicative-expression:
-		cast-expression
-		multiplicative-expression <TT>*</TT> multiplicative-expression
-		multiplicative-expression <TT>/</TT> multiplicative-expression
-		multiplicative-expression <TT>%</TT> multiplicative-expression
-</PRE></TT></DL>
-These operate on
-<I>integer</I>
-and 
-<I>float</I>
-types and perform the expected operations:
-<TT>*</TT>
-multiplication,
-<TT>/</TT>
-division,
-<TT>%</TT>
-modulus.
-<DL><DT><DD><TT><PRE>
-	additive-expression:
-		multiplicative-expression
-		additive-expression <TT>+</TT> multiplicative-expression
-		additive-expression <TT>-</TT> multiplicative-expression
-</PRE></TT></DL>
-These operators perform as expected for
-<I>integer</I>
-and 
-<I>float</I>
-operands.
-Unlike in C,
-<TT>+</TT>
-and
-<TT>-</TT>
-do not scale the addition based on the format of the expression.
-This means that
-<TT>i=i+1</TT>
-will always add 1 but
-<TT>i++</TT>
-will add the size corresponding to the format stored with
-<TT>i</TT>.
-If both operands are of either
-<I>string</I>
-or
-<I>list</I>
-type  then addition is defined as concatenation. Subtraction is undefined for
-these two types.
-<DL><DT><DD><TT><PRE>
-	shift-expression:
-		additive-expression
-		shift-expression <TT>&lt;&lt;</TT> additive-expression
-		shift-expression <TT>&gt;&gt;</TT> additive-expression
-</PRE></TT></DL>
-The
-<TT>&gt;&gt;</TT>
-and
-<TT>&lt;&lt;</TT>
-operators perform bitwise right and left shifts respectively. Both
-require operands of
-<I>integer</I>
-type.
-<DL><DT><DD><TT><PRE>
-	relational-expression:
-		relational-expression <TT>&lt;</TT> shift-expression
-		relational-expression <TT>&gt;</TT> shift-expression
-		relational-expression <TT>&lt;=</TT> shift-expression
-		relational-expression <TT>&gt;=</TT> shift-expression
-
-	equality-expression:
-		relational-expression
-		relational-expression <TT>==</TT> equality-expression
-		relational-expression <TT>!=</TT> equality-expression
-</PRE></TT></DL>
-The comparison operators are
-<TT>&lt;</TT>
-(less than),
-<TT>&gt;</TT>
-(greater than),
-<TT>&lt;=</TT>
-(less than or equal to),
-<TT>&gt;=</TT>
-(greater than or equal to),
-<TT>==</TT>
-(equal to) and
-<TT>!=</TT>
-(not equal to). The result of a comparison is 0
-if the condition is false, otherwise 1. The relational operators can only be
-applied to operands of
-<I>integer</I>
-and
-<I>float</I>
-type. The equality operators apply to all types.  Comparing mixed types is legal.
-Mixed integer and float compare on the integral value.  Other mixtures are always unequal.
-Two lists are equal if they
-have the same number of members and a pairwise comparison of the members results
-in equality.
-<DL><DT><DD><TT><PRE>
-	AND-expression:
-		equality-expression
-		AND-expression <TT>&amp;</TT> equality-expression
-
-	XOR-expression:
-		AND-expression
-		XOR-expression <TT>^</TT> AND-expression
-
-	OR-expression:
-		XOR-expression
-		OR-expression <TT>|</TT> XOR-expression
-</PRE></TT></DL>
-These operators perform bitwise logical operations and apply only to the
-<I>integer</I>
-type.
-The operators are
-<TT>&</TT>
-(logical and),
-<TT>^</TT>
-(exclusive or) and
-<TT>|</TT>
-(inclusive or).
-<DL><DT><DD><TT><PRE>
-	logical-AND-expression:
-		OR-expression
-		logical-AND-expression <TT>&amp;&amp;</TT> OR-expression
-
-	logical-OR-expression:
-		logical-AND-expression
-		logical-OR-expression <TT>||</TT> logical-AND-expression
-</PRE></TT></DL>
-The
-<TT>&&</TT>
-operator returns 1 if both of its operands evaluate to boolean true, otherwise 0.
-The
-<TT>||</TT>
-operator returns 1 if either of its operands evaluates to boolean true,
-otherwise 0.
-</P>
-<H4>Statements
-</H4>
-<P>
-<DL><DT><DD><TT><PRE>
-	<TT>if</TT> expression <TT>then</TT> statement <TT>else</TT> statement
-	<TT>if</TT> expression <TT>then</TT> statement
-</PRE></TT></DL>
-The
-<I>expression</I>
-is evaluated as a boolean. If its value is true the statement after
-the
-<TT>then</TT>
-is executed, otherwise the statement after the
-<TT>else</TT>
-is executed. The 
-<TT>else</TT>
-portion may be omitted.
-<DL><DT><DD><TT><PRE>
-	<TT>while</TT> expression <TT>do</TT> statement
-</PRE></TT></DL>
-In a while loop, the
-<I>statement</I>
-is executed while the boolean
-<I>expression</I>
-evaluates
-true.
-<DL><DT><DD><TT><PRE>
-	<TT>loop</TT> startexpr, endexpr <TT>do</TT> statement
-</PRE></TT></DL>
-The two expressions
-<I>startexpr</I>
-and
-<I>endexpr</I>
-are evaluated prior to loop entry.
-<I>Statement</I>
-is evaluated while the value of
-<I>startexpr</I>
-is less than or equal to
-<I>endexpr</I>.
-Both expressions must yield
-<I>integer</I>
-values. The value of
-<I>startexpr</I>
-is
-incremented by one for each loop iteration.
-Note that there is no explicit loop variable; the
-<I>expressions</I>
-are just values.
-<DL><DT><DD><TT><PRE>
-	<TT>return</TT> expression
-</PRE></TT></DL>
-<TT>return</TT>
-terminates execution of the current function and returns to its caller.
-The value of the function is given by expression. Since
-<TT>return</TT>
-requires an argument, nil-valued functions should return the empty list
-<TT>{}</TT>.
-<DL><DT><DD><TT><PRE>
-	<TT>local</TT> variable
-</PRE></TT></DL>
-The
-<TT>local</TT>
-statement creates a local instance of
-<I>variable</I>,
-which exists for the duration
-of the instance of the function in which it is declared. Binding is dynamic: the local variable,
-rather than the previous value of
-<I>variable</I>,
-is visible to called functions.
-After a return from the current function the previous value of
-<I>variable</I>
-is
-restored.
-</P>
-<P>
-If Acid is interrupted, the values of all local variables are lost,
-as if the function returned.
-<DL><DT><DD><TT><PRE>
-	<TT>defn</TT> function-name <TT>(</TT> parameter-list <TT>)</TT> body
-
-	parameter-list:
-		variable
-		parameter-list , variable
-
-	body:
-		<TT>{</TT> statement <TT>}</TT>
-</PRE></TT></DL>
-Functions are introduced by the
-<TT>defn</TT>
-statement. The definition of parameter names suppresses any variables
-of the same name until the function returns. The body of a function is a list
-of statements enclosed by braces.
-</P>
-<H4>Code variables
-</H4>
-<P>
-Acid permits the delayed evaluation of a parameter to a function.  The parameter
-may then be evaluated at any time with the
-<TT>eval</TT>
-operator.  Such parameters are called
-<I>code variables</I>
-and are defined by prefixing their name with an asterisk in their declaration.
-</P>
-<P>
-For example, this function wraps up an expression for later evaluation:
-<DL><DT><DD><TT><PRE>
-acid: defn code(*e) { return e; }
-acid: x = code(v+atoi("100")\D)
-acid: print(x)
-(v+atoi("100"))\D;
-acid: eval x
-&lt;stdin&gt;:5: (error) v used but not set
-acid: v=5
-acid: eval x
-105
-</PRE></TT></DL>
-</P>
-<H4>Source Code Management
-</H4>
-<P>
-Acid provides the means to examine source code. Source code is
-represented by lists of strings. Builtin functions provide mapping
-from address to lines and vice-versa. The default debugging environment
-has the means to load and display source files.
-</P>
-<H4>Builtin Functions
-</H4>
-<P>
-The Acid interpreter has a number of builtin functions, which cannot be redefined.
-These functions perform machine- or operating system-specific functions such as
-symbol table and process management.
-The following section presents a description of each builtin function.
-The notation
-<TT>{}</TT>
-is used to denote the empty list, which is the default value of a function that
-does not execute a
-<TT>return</TT>
-statement.
-The type and number of parameters for each function are specified in the
-description; where a parameter can be of any type it is specified as type
-<I>item</I>.
-
-
-
-</P>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>Access</TT>
-returns the integer 1 if the file name in
-<I>string</I>
-can be read by the builtin functions
-<TT>file</TT>,
-<TT>readfile</TT>,
-or
-<TT>include</TT>,
-otherwise 0. A typical use of this function is to follow
-a search path looking for a source file; it is used by
-<TT>findsrc</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-if access("main.c") then
-	return file("main.c");
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>atof</TT>
-converts the string supplied as its argument into a floating point
-number. The function accepts strings in the same format as the C
-function of the same name. The value returned has the format code
-<TT>f</TT>.
-<TT>atof</TT>
-returns the value 0.0 if it is unable to perform the conversion.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: +atof("10.4e6")
-1.04e+07
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>atoi</TT>
-converts the argument
-to an integer value.
-The function accepts strings in the same format as the C function of the
-same name. The value returned has the format code
-<TT>D</TT>.
-<TT>atoi</TT>
-returns the integer 0 if it is unable to perform a conversion.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: +atoi("-1255")
--1255
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>error</TT>
-generates an error message and returns the interpreter to interactive
-mode. If an Acid program is running, it is aborted.
-Processes being debugged are not affected. The values of all local variables are lost.
-<TT>error</TT>
-is commonly used to stop the debugger when some interesting condition arises
-in the debugged program.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-while 1 do {
-	step();
-	if *main != @main then
-		error("memory corrupted");
-}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>file</TT>
-reads the contents of the file specified by
-<I>string</I>
-into a list.
-Each element in the list is a string corresponding to a line in the file.
-<TT>file</TT>
-breaks lines at the newline character, but the newline
-characters are not returned as part each string.
-<TT>file</TT>
-returns the empty list if it encounters an error opening or reading the data.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(file("main.c")[0])
-#include	&lt;u.h&gt;
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>filepc</TT>
-interprets its
-<I>string</I>
-argument as a source file address in the form of a file name and line offset.
-<TT>filepc</TT>
-uses the symbol table to map the source address into a text address
-in the debugged program. The
-<I>integer</I>
-return value has the format
-<TT>X</TT>.
-<TT>filepc</TT>
-returns an address of -1 if the source address is invalid.
-The source file address uses the same format as
-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
-</A>This function is commonly used to set breakpoints from the source text.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: bpset(filepc("main:10"))
-acid: bptab()
-	0x00001020 usage  ADD	-0xc,R29
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>fmt</TT>
-evaluates the expression
-<I>item</I>
-and sets the format of the result to
-<I>fmt</I>.
-The format of a value determines how it will be printed and
-what kind of object will be fetched by the
-<TT>*</TT>
-and
-<TT>@</TT>
-operators. The
-<TT>\</TT>
-operator is a short-hand form of the
-<TT>fmt</TT>
-builtin function. The
-<TT>fmt</TT>
-function leaves the format of the
-<I>item</I>
-unchanged.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: main=fmt(main, 'i') // as instructions
-acid: print(main\X, "\t", *main)
-0x00001020 ADD	<I>-64,R29
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>fnbound</TT>
-interprets its
-<I>integer</I>
-argument as an address in the text of the debugged program.
-<TT>fnbound</TT>
-returns a list containing two integers corresponding to
-the start and end addresses of the function containing the supplied address.
-If the
-<I>integer</I>
-address is not in the text segment of the program then the empty list is returned.
-<TT>fnbound</TT>
-is used by
-<TT>next</TT>
-to detect stepping into new functions.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(fnbound(main))
-{0x00001050, 0x000014b8}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-The follow set is defined as the set of program counter values that could result
-from executing an instruction.
-<TT>follow</TT>
-interprets its
-<I>integer</I>
-argument as a text address, decodes the instruction at
-that address and, with the current register set, builds a list of possible
-next program counter values. If the instruction at the specified address
-cannot be decoded
-<TT>follow</TT>
-raises an error.
-<TT>follow</TT>
-is used to plant breakpoints on
-all potential paths of execution. The following code fragment
-plants breakpoints on top of all potential following instructions.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-lst = follow(*PC);
-while lst do
-{
-	*head lst = bpinst;
-	lst = tail lst;
-}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>include</TT>
-opens the file specified by
-<I>string</I>
-and uses its contents as command input to the interpreter.
-The interpreter restores input to its previous source when it encounters
-either an end of file or an error.
-<TT>include</TT>
-can be used to incrementally load symbol table information without
-leaving the interpreter.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: include("/sys/src/cmd/acme/syms")
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>interpret</TT>
-evaluates the
-<I>string</I>
-expression and uses its result as command input for the interpreter.
-The interpreter restores input to its previous source when it encounters
-either the end of string or an error. The
-<TT>interpret</TT>
-function allows Acid programs to write Acid code for later evaluation.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: interpret("main+10;")
-0x0000102a
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>itoa</TT>
-takes an integer argument and converts it into an ASCII string
-in the
-<TT>D</TT>
-format.
-an alternate format string
-may be provided in the
-<TT>%</TT>
-style of
-<A href="/magic/man2html/2/print"><I>print</I>(2).
-</A>This function is commonly used to build
-<TT>rc</TT>
-command lines.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: rc("cat /proc/"+itoa(pid)+"/segment")
-Stack    7fc00000 80000000    1
-Data     00001000 00009000    1
-Data     00009000 0000a000    1
-Bss      0000a000 0000c000    1
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>kill</TT>
-writes a kill control message into the control file of the process
-specified by the
-<I>integer</I>
-pid.
-If the process was previously installed by
-<TT>setproc</TT>
-it will be removed from the list of active processes.
-If the
-<I>integer</I>
-has the same value as
-<TT>pid</TT>,
-then
-<TT>pid</TT>
-will be set to 0.
-To continue debugging, a new process must be selected using
-<TT>setproc</TT>.
-For example, to kill all the active processes:
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-while proclist do {
-	kill(head proclist);
-	proclist = tail proclist;
-}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>map</TT>
-either retrieves all the mappings associated with a process or sets a single
-map entry to a new value.
-If the
-<I>list</I>
-argument is omitted then
-<TT>map</TT>
-returns a list of lists. Each sublist has four values and describes a
-single region of contiguous addresses in the
-memory or file image of the debugged program. The first entry is the name of the
-mapping. If the name begins with
-<TT>*</TT>
-it denotes a map into the memory of an active process.
-The second and third values specify the base and end
-address of the region and the fourth number specifies the offset in the file
-corresponding to the first location of the region.
-A map entry may be set by supplying a list in the same format as the sublist
-described above. The name of the mapping must match a region already defined
-by the current map.
-Maps are set automatically for Plan 9 processes and some kernels; they may
-need to be set by hand for other kernels and programs that run on bare hardware.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: map({"text", _start, end, 0x30})
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>match</TT>
-compares each item in
-<I>list</I>
-using the equality operator
-<TT>==</TT>
-with
-<I>item</I>.
-The
-<I>item</I>
-can be of any type. If the match succeeds the result is the integer index
-of the matching value, otherwise -1.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: list={8,9,10,11}
-acid: print(list[match(10, list)]\D)
-10
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>newproc</TT>
-starts a new process with an argument vector constructed from
-<I>string</I>.
-The argument vector excludes the name of the program to execute and
-each argument in
-<I>string</I>
-must be space separated. A new process can accept no more
-than 512 arguments. The internal variable
-<TT>pid</TT>
-is set to the pid of the newly created process. The new pid
-is also appended to the list of active processes stored in the variable
-<TT>proclist</TT>.
-The new process is created then halted at the first instruction, causing
-the debugger to call
-<TT>stopped</TT>.
-The library functions
-<TT>new</TT>
-and
-<TT>win</TT>
-should be used to start processes when using the standard debugging
-environment.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: newproc("-l .")
-56720: system call	_main	ADD	-0x14,R29
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>pcfile</TT>
-interprets its
-<I>integer</I>
-argument as a text address in the debugged program. The address and symbol table
-are used to generate a string containing the name of the source file
-corresponding to the text address. If the address does not lie within the
-program the string
-<TT>?file?</TT>
-is returned.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print("Now at ", pcfile(*PC), ":", pcline(*PC))
-Now at ls.c:46 
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>pcline</TT>
-interprets its
-<I>integer</I>
-argument as a text address in the debugged program. The address and symbol table
-are used to generate an integer containing the line number in the source file
-corresponding to the text address. If the address does not lie within the
-program the integer 0 is returned.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: +file("main.c")[pcline(main)]
-main(int argc, char *argv[])
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>print</TT>
-evaluates each
-<I>item</I>
-supplied in its argument list and prints it to standard output. Each
-argument will be printed according to its associated format character.
-When the interpreter is executing, output is buffered and flushed every
-5000 statements or when the interpreter returns to interactive mode.
-<TT>print</TT>
-accepts a maximum of 512 arguments.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(10, "decimal ", 10\D, "octal ", 10\o)
-0x0000000a decimal 10 octal 000000000012 
-acid: print({1, 2, 3})
-{0x00000001 , 0x00000002 , 0x00000003 }
-acid: print(main, main\a, "\t", @main\i)
-0x00001020 main	ADD	<I>-64,R29
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>printto</TT>
-offers a limited form of output redirection. The first
-<I>string</I>
-argument is used as the path name of a new file to create.
-Each
-<I>item</I>
-is then evaluated and printed to the newly created file. When all items
-have been printed the file is closed.
-<TT>printto</TT>
-accepts a maximum of 512 arguments.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: printto("/env/foo", "hello")
-acid: rc("echo -n foo")
-hello
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>rc</TT>
-evaluates
-<I>string</I>
-to form a shell command. A new command interpreter is started
-to execute the command. The Acid interpreter blocks until the command
-completes. The return value is the empty string
-if the command succeeds, otherwise the exit status of the failed command.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: rc("B "+itoa(-pcline(addr))+" "+pcfile(addr));
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>readfile</TT>
-takes the contents of the file specified by
-<I>string</I>
-and returns its contents as a new string.
-If
-<TT>readfile</TT>
-encounters a zero byte in the file, it terminates.
-If
-<TT>readfile</TT>
-encounters an error opening or reading the file then the empty list
-is returned.
-<TT>readfile</TT>
-can be used to read the contents of device files whose lines are not
-terminated with newline characters.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: ""+readfile("/dev/label")
-helix
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>reason</TT>
-uses machine-dependent information to generate a string explaining
-why a process has stopped. The
-<I>integer</I>
-argument is the value of an architecture dependent status register,
-for example
-<TT>CAUSE</TT>
-on the MIPS.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(reason(*CAUSE))
-system call
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>regexp</TT>
-matches the
-<I>pattern</I>
-string supplied as its first argument with the 
-<I>string</I>
-supplied as its second.
-If the pattern matches the result is the value 1, otherwise 0.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(regexp(".*bar", "foobar"))
-1
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>setproc</TT>
-selects the default process used for memory and control operations. It effectively
-shifts the focus of control between processes. The 
-<I>integer</I>
-argument specifies the pid of the process to look at.
-The variable
-<TT>pid</TT>
-is set to the pid of the selected process. If the process is being
-selected for the first time its pid is added to the list of active
-processes
-<TT>proclist</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: setproc(68382)
-acid: procs()
-&gt;68382: Stopped at main+0x4 setproc(68382)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>start</TT>
-writes a
-<TT>start</TT>
-message to the control file of the process specified by the pid
-supplied as its
-<I>integer</I>
-argument.
-<TT>start</TT>
-draws an error if the process is not in the
-<TT>Stopped</TT>
-state.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: start(68382)
-acid: procs()
-&gt;68382: Running at main+0x4 setproc(68382)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>startstop</TT>
-performs the same actions as a call to
-<TT>start</TT>
-followed by a call to
-<TT>stop</TT>.
-The
-<I>integer</I>
-argument specifies the pid of the process to control. The process
-must be in the
-<TT>Stopped</TT>
-state.
-Execution is restarted, the debugger then waits for the process to
-return to the
-<TT>Stopped</TT>
-state. A process will stop if a startstop message has been written to its control
-file and any of the following conditions becomes true: the process executes or returns from
-a system call, the process generates a trap or the process receives a note.
-<TT>startstop</TT>
-is used to implement single stepping.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: startstop(pid)
-75374: breakpoint	ls	ADD	<I>-0x16c8,R29
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>status</TT>
-uses the pid supplied by its
-<I>integer</I>
-argument to generate a string describing the state of the process.
-The string corresponds to the state returned by the
-sixth column of the
-<A href="/magic/man2html/1/ps"><I>ps</I>(1)
-</A>command.
-A process must be in the
-<TT>Stopped</TT>
-state to modify its memory or registers.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: ""+status(pid)
-Stopped
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>stop</TT>
-writes a
-<TT>stop</TT>
-message to the control file of the process specified by the
-pid supplied as its
-<I>integer</I>
-argument.
-The interpreter blocks until the debugged process enters the
-<TT>Stopped</TT>
-state.
-A process will stop if a stop message has been written to its control
-file and any of the following conditions becomes true: the process executes or returns from
-a system call, the process generates a trap, the process is scheduled or the
-process receives a note.
-<TT>stop</TT>
-is used to wait for a process to halt before planting a breakpoint since Plan 9
-only allows a process's memory to be written while it is in the
-<TT>Stopped</TT>
-state.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-defn bpset(addr) {
-	if (status(pid)!="Stopped") then {
-		print("Waiting...\n");
-		stop(pid);
-	}
-	...
-}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>strace</TT>
-generates a list of lists corresponding to procedures called by the debugged
-program. Each sublist describes a single stack frame in the active process.
-The first element is an
-<I>integer</I>
-of format
-<TT>X</TT>
-specifying the address of the called function. The second element is the value
-of the program counter when the function was called. The third and fourth elements
-contain lists of parameter and automatic variables respectively.
-Each element of these lists
-contains a string with the name of the variable and an
-<I>integer</I>
-value of format
-<TT>X</TT>
-containing the current value of the variable.
-The arguments to
-<TT>strace</TT>
-are the current value of the program counter, the current value of the
-stack pointer, and the address of the link register. All three parameters
-must be integers.
-The setting of 
-<I>linkreg</I>
-is architecture dependent. On the MIPS linkreg is set to the address of saved
-<TT>R31</TT>,
-on the SPARC to the address of saved
-<TT>R15</TT>.
-For the other architectures
-<I>linkreg</I>
-is not used, but must point to valid memory.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: print(strace(*PC, *SP, linkreg))
-{{0x0000141c, 0xc0000f74,
-{{"s", 0x0000004d}, {"multi", 0x00000000}}, 
-{{"db", 0x00000000}, {"fd", 0x000010a4},
-{"n", 0x00000001}, {"i", 0x00009824}}}}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>waitstop</TT>
-writes a waitstop message to the control file of the process specified by the
-pid supplied as its
-<I>integer</I>
-argument.
-The interpreter will remain blocked until the debugged process enters the
-<TT>Stopped</TT>
-state.
-A process will stop if a waitstop message has been written to its control
-file and any of the following conditions becomes true: the process generates a trap
-or receives a note. Unlike
-<TT>stop</TT>,
-the
-<TT>waitstop</TT>
-function is passive; it does not itself cause the program to stop.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: waitstop(pid)
-75374: breakpoint	ls	ADD	-0x16c8,R29
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<H4>Library Functions
-</H4>
-<P>
-A standard debugging environment is provided by modules automatically
-loaded when
-Acid is started.
-These modules are located in the directory
-<TT>/sys/lib/acid</TT>.
-These functions may be overridden, personalized, or added to by code defined in
-<TT></TT><I>home/lib/acid</I><TT>.
-The implementation of these functions can be examined using the
-</TT><TT>whatis</TT><TT>
-operator and then modified during debugging sessions.
-
-
-
-</P>
-</TT><br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>Bsrc</TT>
-interprets the
-<I>integer</I>
-argument as a text address. The text address is used to produce a pathname
-and line number suitable for the
-<TT>B</TT>
-command
-to send to the text editor
-<A href="/magic/man2html/1/sam"><I>sam</I>(1)
-</A>or
-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
-</A><TT>Bsrc</TT>
-builds an
-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
-</A>command to invoke
-<TT>B</TT>,
-which either selects an existing source file or loads a new source file into the editor.
-The line of source corresponding to the text address is then selected.
-In the following example
-<TT>stopped</TT>
-is redefined so that the editor
-follows and displays the source line currently being executed.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-defn stopped(pid) {
-	pstop(pid);
-	Bsrc(*PC);
-}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-For machines equipped with floating point,
-<TT>Fpr</TT>
-displays the contents of the floating point registers as double precision
-values.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: Fpr()
-F0   0.	F2   0.
-F4   0.	F6   0.
-F8   0.	F10  0.
-...
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>Ureg</TT>
-interprets the integer passed as its first argument as the address of a
-kernel
-<TT>Ureg</TT>
-structure. Each element of the structure is retrieved and printed.
-The size and contents of the
-<TT>Ureg</TT>
-structure are architecture dependent.
-This function can be used to decode the first argument passed to a
-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
-</A>function after a process has received a note.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: Ureg(*notehandler:ur)
-	status	0x3000f000
-	pc	0x1020
-	sp	0x7ffffe00
-	cause	0x00004002
-...
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>acidinit</TT>
-is called by the interpreter after all
-modules have been loaded at initialization time.
-It is used to set up machine specific variables and the default source path.
-<TT>acidinit</TT>
-should not be called by user code.
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>addsrcdir</TT>
-interprets its string argument as a new directory
-<TT>findsrc</TT>
-should search when looking for source code files.
-<TT>addsrcdir</TT>
-draws an error if the directory is already in the source search path. The search
-path may be examined by looking at the variable
-<TT>srcpath</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: rc("9fs fornax")
-acid: addsrcpath("/n/fornax/sys/src/cmd")
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>asm</TT>
-interprets its integer argument as a text address from which to disassemble
-machine instructions.
-<TT>asm</TT>
-prints the instruction address in symbolic and hexadecimal form, then prints
-the instructions with addressing modes. Up to twenty instructions will
-be disassembled.
-<TT>asm</TT>
-stops disassembling when it reaches the end of the current function.
-Instructions are read from the file image using the
-<TT>@</TT>
-operator.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: asm(main)
-main     0x00001020 ADD    -0x64,R29
-main+0x4 0x00001024 MOVW   R31,0x0(R29)
-main+0x8 0x00001028 MOVW   R1,argc+4(FP)
-main+0xc 0x0000102c MOVW   <I>bin(SB),R1
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>bpdel</TT>
-removes a previously set breakpoint from memory.
-The
-<I>integer</I>
-supplied as its argument must be the address of a previously set breakpoint.
-The breakpoint address is deleted from the active breakpoint list
-<TT>bplist</TT>,
-then the original instruction is copied from the file image to the memory
-image so that the breakpoint is removed.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: bpdel(main+4)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>bpset</TT>
-places a breakpoint instruction at the address specified
-by its
-<I>integer</I>
-argument, which must be in the text segment.
-<TT>bpset</TT>
-draws an error if a breakpoint has already been set at the specified address.
-A list of current breakpoints is maintained in the variable
-<TT>bplist</TT>.
-Unlike in
-<A href="/magic/man2html/1/db"><I>db</I>(1),
-</A>breakpoints are left in memory even when a process is stopped, and
-the process must exist, perhaps by being
-created by either
-<TT>new</TT>
-or
-<TT>win</TT>,
-in order to place a breakpoint.
-(<TT>Db</TT>
-accepts breakpoint commands before the process is started.)
-On the
-MIPS and SPARC architectures,
-breakpoints at function entry points should be set 4 bytes into the function
-because the
-instruction scheduler may fill
-<TT>JAL</TT>
-branch delay slots with the first instruction of the function.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: bpset(main+4)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>bptab</TT>
-prints a list of currently installed breakpoints. The list contains the
-breakpoint address in symbolic and hexadecimal form as well as the instruction
-the breakpoint replaced. Breakpoints are not maintained across process creation
-using
-<TT>new</TT>
-and
-<TT>win</TT>.
-They are maintained across a fork, but care must be taken to keep control of
-the child process.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: bpset(ls+4)
-acid: bptab()
-	0x00001420 ls+0x4  MOVW	R31,0x0(R29)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>casm</TT>
-continues to disassemble instructions from where the last
-<TT>asm</TT>
-or
-<TT>casm</TT>
-command stopped. Like
-<TT>asm</TT>,
-this command stops disassembling at function boundaries.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: casm()
-main+0x10 0x00001030	MOVW	0x1,R3
-main+0x14 0x00001034	MOVW	R3,0x8(R29)
-main+0x18 0x00001038	MOVW	<I>0x1,R5
-main+0x1c 0x0000103c	JAL	Binit(SB)
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>cont</TT>
-restarts execution of the currently active process.
-If the process is stopped on a breakpoint, the breakpoint is first removed,
-the program is single stepped, the breakpoint is replaced and the program
-is then set executing. This may cause
-<TT>stopped()</TT>
-to be called twice.
-<TT>cont</TT>
-causes the interpreter to block until the process enters the
-<TT>Stopped</TT>
-state.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: cont()
-95197: breakpoint	ls+0x4	MOVW	R31,0x0(R29)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>dump</TT>
-interprets its first argument as an address, its second argument as a
-count and its third as a format string.
-<TT>dump</TT>
-fetches an object from memory at the current address and prints it according
-to the format. The address is incremented by the number of bytes specified by
-the format and the process is repeated count times. The format string is any
-combination of format characters, each preceded by an optional count.
-For each object,
-<TT>dump</TT>
-prints the address in hexadecimal, a colon, the object and then a newline.
-<TT>dump</TT>
-uses
-<TT>mem</TT>
-to fetch each object.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: dump(main+35, 4, "X2bi")
-0x00001043: 0x0c8fa700 108 143 lwc2 r0,0x528f(R4) 
-0x0000104d: 0xa9006811   0   0 swc3 r0,0x0(R24) 
-0x00001057: 0x2724e800   4  37 ADD  -0x51,R23,R31 
-0x00001061: 0xa200688d   6   0 NOOP
-0x0000106b: 0x2710c000   7   0 BREAK
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>findsrc</TT>
-interprets its
-<I>string</I>
-argument as a source file. Each directory in the source path is searched
-in turn for the file. If the file is found, the source text is loaded using
-<TT>file</TT>
-and stored in the list of active source files called
-<TT>srctext</TT>.
-The name of the file is added to the source file name list
-<TT>srcfiles</TT>.
-Users are unlikely to call
-<TT>findsrc</TT>
-from the command line, but may use it from scripts to preload source files
-for a debugging session. This function is used by
-<TT>src</TT>
-and
-<TT>line</TT>
-to locate and load source code. The default search path for the MIPS
-is
-<TT>./</TT>,
-<TT>/sys/src/libc/port</TT>,
-<TT>/sys/src/libc/9sys</TT>,
-<TT>/sys/src/libc/mips</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: findsrc(pcfile(main));
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-For machines equipped with floating point,
-<TT>fpr</TT>
-displays the contents of the floating point registers as single precision
-values. When the interpreter stores or manipulates floating point values
-it converts into double precision values.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: fpr()
-F0   0.	F1   0.
-F2   0.	F3   0.
-F4   0.	F5   0.
-...
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>func</TT>
-single steps the active process until it leaves the current function
-by either calling another function or returning to its caller.
-<TT>func</TT>
-will execute a single instruction after leaving the current function.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: func()
-95197: breakpoint	ls+0x8	MOVW	R1,R8
-95197: breakpoint	ls+0xc	MOVW	R8,R1
-95197: breakpoint	ls+0x10	MOVW	R8,s+4(FP)
-95197: breakpoint	ls+0x14	MOVW	<I>0x2f,R5
-95197: breakpoint	ls+0x18	JAL	utfrrune(SB)
-95197: breakpoint	utfrrune	ADD	</I>-0x18,R29
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>gpr</TT>
-prints the values of the general purpose processor registers.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: gpr()
-R1	0x00009562 R2	0x000010a4 R3	0x00005d08
-R4	0x0000000a R5	0x0000002f R6	0x00000008
-...
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>labstk</TT>
-performs a stack trace from a Plan 9
-<I>label.</I>
-The kernel,
-C compilers store continuations in a common format. Since the
-compilers all use caller save conventions a continuation may be saved by
-storing a
-<TT>PC</TT>
-and
-<TT>SP</TT>
-pair. This data structure is called a label and is used by the
-the C function
-<TT>longjmp</TT>
-and the kernel to schedule threads and processes.
-<TT>labstk</TT>
-interprets its
-<I>integer</I>
-argument as the address of a label and produces a stack trace for
-the thread of execution. The value of the function
-<TT>ALEF_tid</TT>
-is a suitable argument for
-<TT>labstk</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: labstk(*mousetid)
-At pc:0x00021a70:Rendez_Sleep+0x178 rendez.l:44
-Rendez_Sleep(r=0xcd7d8,bool=0xcd7e0,t=0x0) rendez.l:5
-	called from ALEF_rcvmem+0x198 recvmem.l:45
-ALEF_rcvmem(c=0x000cd764,l=0x00000010) recvmem.l:6
-...
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>lstk</TT>
-produces a long format stack trace.
-The stack trace includes each function in the stack,
-where it was called from, and the value of the parameters and automatic
-variables for each function.
-<TT>lstk</TT>
-displays the value rather than the address of each variable and all
-variables are assumed to be an integer in format
-<TT>X</TT>.
-To print a variable in its correct format use the
-<TT>:</TT>
-operator to find the address and apply the appropriate format before indirection
-with the
-<TT>*</TT>
-operator. It may be necessary to single step a couple of instructions into
-a function to get a correct stack trace because the frame pointer adjustment
-instruction may get scheduled down into the body of the function.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: lstk()
-At pc:0x00001024:main+0x4 ls.c:48
-main(argc=0x00000001,argv=0x7fffefec) ls.c:48
-	called from _main+0x20 main9.s:10
-	_argc=0x00000000
-	_args=0x00000000
-	fd=0x00000000
-	buf=0x00000000
-	i=0x00000000
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>mem</TT>
-interprets its first
-<I>integer</I>
-argument as the address of an object to be printed according to the
-format supplied in its second
-<I>string</I>
-argument.
-The format string can be any combination of format characters, each preceded
-by an optional count.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: mem(bdata+0x326, "2c2Xb")
-P = 0xa94bc464 0x3e5ae44d  19 
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>new</TT>
-starts a new copy of the debugged program. The new program is started
-with the program arguments set by the variable
-<TT>progargs</TT>.
-The new program is stopped in the second instruction of
-<TT>main</TT>.
-The breakpoint list is reinitialized.
-<TT>new</TT>
-may be used several times to instantiate several copies of a program
-simultaneously. The user can rotate between the copies using
-<TT>setproc</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: progargs="-l"
-acid: new()
-60: external interrupt	_main	ADD	<I>-0x14,R29
-60: breakpoint	main+0x4	MOVW	R31,0x0(R29)
-</PRE></TT></DL>
-</I><br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>next</TT>
-steps through a single language level statement without tracing down
-through each statement in a called function. For each statement,
-<TT>next</TT>
-prints the machine instructions executed as part of the statement. After
-the statement has executed, source lines around the current program
-counter are displayed.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: next()
-60: breakpoint	Binit+0x4 MOVW	R31,0x0(R29)
-60: breakpoint	Binit+0x8 MOVW	f+8(FP),R4
-binit.c:93
- 88	
- 89	int
- 90	Binit(Biobuf *bp, int f, int mode)
- 91	{
-&gt;92		return Binits(bp, f, mode, bp-&gt;b, BSIZE);
- 93	}
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>notestk</TT>
-interprets its
-<I>integer</I>
-argument as the address of a
-<TT>Ureg</TT>
-structure passed by the kernel to a
-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
-</A>function during note processing.
-<TT>notestk</TT>
-uses the
-<TT>PC</TT>,
-<TT>SP</TT>,
-and link register from the
-<TT>Ureg</TT>
-to print a stack trace corresponding to the point in the program where the note
-was received.
-To get a valid stack trace on the MIPS and SPARC architectures from a notify
-routine, the program must stop in a new function called from the notify routine
-so that the link register is valid and the notify routine's parameters are
-addressable.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: notestk(*notify:ur)
-Note pc:0x00001024:main+0x4 ls.c:48
-main(argc=0x00000001,argv=0x7fffefec) ls.c:48
-	called from _main+0x20 main9.s:10
-	_argc=0x00000000
-	_args=0x00000000
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>pfl</TT>
-interprets its argument as a text address and uses it to print
-the source file and line number corresponding to the address. The output
-has the same format as file addresses in
-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
-</A><DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: pfl(main)
-ls.c:48
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>procs</TT>
-prints a list of active process attached to the debugger. Each process
-produces a single line of output giving the pid, process state, the address
-the process is currently executing, and the
-<TT>setproc</TT>
-command required to make that process current.
-The current process is marked in the first column with a
-<TT>&gt;</TT>
-character. The debugger maintains a list of processes in the variable
-<TT>proclist</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: procs()
-&gt;62: Stopped at main+0x4 setproc(62)
- 60: Stopped at Binit+0x8 setproc(60)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>pstop</TT>
-prints the status of the process specified by the
-<I>integer</I>
-pid supplied as its argument.
-<TT>pstop</TT>
-is usually called from
-<TT>stopped</TT>
-every time a process enters the
-<TT>Stopped</TT>
-state.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: pstop(62)
-0x0000003e: breakpoint	main+0x4	MOVW	R31,0x0(R29)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>regs</TT>
-prints the contents of both the general and special purpose registers.
-<TT>regs</TT>
-calls
-<TT>spr</TT>
-then
-<TT>gpr</TT>
-to display the contents of the registers.
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>source</TT>
-prints the directory search path followed by a list of currently loaded
-source files. The source management functions
-<TT>src</TT>
-and
-<TT>findsrc</TT>
-use the search path to locate and load source files. Source files are
-loaded incrementally into a source data base during debugging. A list
-of loaded files is stored in the variable
-<TT>srcfiles</TT>
-and the contents of each source file in the variable
-<TT>srctext</TT>.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: source()
-/n/bootes/sys/src/libbio/
-/sys/src/libc/port/
-/sys/src/libc/9sys/
-/sys/src/libc/mips/
-	binit.c
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>spr</TT>
-prints the contents of the processor control and memory management
-registers. Where possible, the contents of the registers are decoded
-to provide extra information; for example the
-<TT>CAUSE</TT>
-register on the MIPS is
-printed both in hexadecimal and using the
-<TT>reason</TT>
-function.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: spr()
-PC	0x00001024 main+0x4  ls.c:48
-SP	0x7fffef68 LINK	0x00006264 _main+0x28 main9.s:12
-STATUS	0x0000ff33 CAUSE	0x00000024 breakpoint
-TLBVIR	0x000000d3 BADVADR	0x00001020
-HI	0x00000004 LO		0x00001ff7
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>src</TT>
-interprets its
-<I>integer</I>
-argument as a text address and uses this address to print 5 lines
-of source before and after the address. The current line is marked with a
-<TT>&gt;</TT>
-character.
-<TT>src</TT>
-uses the source search path maintained by
-<TT>source</TT>
-and
-<TT>addsrcdir</TT>
-to locate the required source files.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: src(*PC)
-ls.c:47
- 42	Biobuf	bin;
- 43	
- 44	#define		HUNK	50
- 45	
- 46	void
-&gt;47	main(int argc, char *argv[])
- 48	{
- 49		int i, fd;
- 50		char buf[64];
- 51	
- 52		Binit(&amp;bin, 1, OWRITE);
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>step</TT>
-causes the debugged process to execute a single machine level instruction.
-If the program is stopped on a breakpoint set by
-<TT>bpset</TT>
-it is first removed, the single step executed, and the breakpoint replaced.
-<TT>step</TT>
-uses
-<TT>follow</TT>
-to predict the address of the program counter after the current instruction
-has been executed. A breakpoint is placed at each of these predicted addresses
-and the process is started. When the process stops the breakpoints are removed.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: step()
-62: breakpoint	main+0x8	MOVW	R1,argc+4(FP)
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>stk</TT>
-produces a short format stack trace. The stack trace includes each function
-in the stack, where it was called from, and the value of the parameters.
-The short format omits the values of automatic variables.
-Parameters are assumed to be integer values in the format
-<TT>X</TT>;
-to print a parameter in the correct format use the
-<TT>:</TT>
-to obtain its address, apply the correct format, and use the
-<TT>*</TT>
-indirection operator to find its value.
-It may be necessary to single step a couple of instructions into
-a function to get a correct stack trace because the frame pointer adjustment
-instruction may get scheduled down into the body of the function.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: stk()
-At pc:0x00001028:main+0x8 ls.c:48
-main(argc=0x00000002,argv=0x7fffefe4) ls.c:48
-	called from _main+0x20 main9.s:10
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>stmnt</TT>
-executes a single language level statement.
-<TT>stmnt</TT>
-displays each machine level instruction as it is executed. When the executed
-statement is completed the source for the next statement is displayed.
-Unlike
-<TT>next</TT>,
-the
-<TT>stmnt</TT>
-function will trace down through function calls.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: stmnt()
-62: breakpoint	main+0x18 MOVW	R5,0xc(R29)
-62: breakpoint	main+0x1c JAL	Binit(SB)
-62: breakpoint	Binit     ADD	-0x18,R29
-binit.c:91
- 89	int
- 90	Binit(Biobuf *bp, int f, int mode)
-&gt;91	{
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>stopped</TT>
-is called automatically by the interpreter
-every time a process enters the
-<TT>Stopped</TT>
-state, such as when it hits a breakpoint.
-The pid is passed as the
-<I>integer</I>
-argument.  The default implementation just calls
-<TT>pstop</TT>,
-but the function may be changed to provide more information or perform fine control
-of execution.  Note that
-<TT>stopped</TT>
-should return; for example, calling
-<TT>step</TT>
-in
-<TT>stopped</TT>
-will recur until the interpreter runs out of stack space.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: defn stopped(pid) {
-	if *lflag != 0 then error("lflag modified");
-	}
-acid: progargs = "-l"
-acid: new();
-acid: while 1 do step();
-&lt;stdin&gt;:7: (error) lflag modified
-acid: stk()
-At pc:0x00001220:main+0x200 ls.c:54
-main(argc=0x00000001,argv=0x7fffffe8) ls.c:48
-	called from _main+0x20 main9.s:10
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>symbols</TT>
-uses the regular expression supplied by
-<I>string</I>
-to search the symbol table for symbols whose name matches the
-regular expression.
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<br>
-acid: symbols("main")
-main	T	0x00001020
-_main	T	0x0000623c
-</PRE></TT></DL>
-<br>
-
-
-
-</dl>
-<br>&#32;<br>
-<DL>
-<DT><DT>&#32;<DD>
-<TT>win</TT>
-performs exactly the same function as
-<TT>new</TT>
-but uses the window system to create a new window for the debugged process.
-The variable
-<TT>progargs</TT>
-supplies arguments to the new process.
-The environment variable
-<TT></TT><I>8&#189;srv</I><TT>
-must be set to allow the interpreter to locate the mount channel for the
-window system.
-The window is created in the top left corner of the screen and is
-400x600 pixels in size. The
-</TT><TT>win</TT><TT>
-function may be modified to alter the geometry.
-The window system will not be able to deliver notes in the new window
-since the pid of the created process is not passed when the server is
-mounted to create a new window.
-<DT><DT>&#32;<DD>
-</TT><DL><DT><DD><TT><PRE>
-<br>
-acid: win()
-</PRE></TT></DL>
-<br>
-</dl>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1368
sys/doc/acidpaper.html

@@ -1,1368 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Acid: A Debugger Built From A Language
-</H1>
-<DL><DD><I>Phil Winterbottom<br>
-philw@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Originally appeared in
-Proc. of the Winter 1994 USENIX Conf.,
-pp. 211-222,
-San Francisco, CA
-</I><DT>&#32;<DD></dl>
-<br>
-Acid is an unusual source-level symbolic debugger for Plan 9. It is implemented
-as a language interpreter with specialized primitives that provide
-debugger support.  Programs written in the language manipulate
-one or more target processes; variables in the language represent the
-symbols, state, and resources of those processes. 
-This structure allows complex
-interaction between the debugger and the target program and
-provides a convenient method of parameterizing differences between
-machine architectures.
-Although some effort is required to learn
-the debugging language, the richness and flexibility of the
-debugging environment encourages new ways of reasoning about the way
-programs run and the conditions under which they fail.
-</DL>
-<H4>1 Introduction
-</H4>
-<P>
-The size and complexity
-of programs have increased in proportion to processor speed and memory but
-the interface between debugger and programmer has changed little.
-Graphical user interfaces have eased some of the tedious
-aspects of the interaction. A graphical interface is a convenient
-means for navigating through source and data structures but provides
-little benefit for process control.
-The introduction of a new concurrent language, Alef [Win93], emphasized the
-inadequacies of the existing Plan 9 [Pike90] debugger
-<I>db</I>,
-a distant relative of
-<I>adb</I>,
-and made it clear that a new debugger was required.
-</P>
-<P>
-Current debuggers like
-<I>dbx</I>,
-<I>sdb</I>,
-and
-<I>gdb</I>
-are limited to answering only the questions their authors
-envisage.  As a result, they supply a plethora
-of specialized commands, each attempting to anticipate
-a specific question a user may ask.
-When a debugging situation arises that is beyond the scope
-of the command set, the tool is useless.
-Further,
-it is often tedious or impossible to reproduce an anomalous state
-of the program, especially when
-the state is embedded in the program's data structures.
-</P>
-<P>
-Acid applies some ideas found in CAD software used for
-hardware test and simulation.
-It is based on the notion that the state and resources of a program
-are best represented and manipulated by a language. The state and resources,
-such as memory, registers, variables, type information and source code
-are represented by variables in the language.
-Expressions provide a computation mechanism and control
-statements allow repetitive or selective interpretation based
-on the result of expression evaluation.
-The heart of the Acid debugger is an interpreter for a small typeless
-language whose operators mirror the operations
-of C and Alef, which in turn correspond well to the basic operations of
-the machine. The interpreter itself knows nothing of the underlying
-hardware; it deals with the program state and resources
-in the abstract.
-Fundamental routines to control
-processes, read files, and interface to the system are implemented
-as builtin functions available to the interpreter.
-The actual debugger functionality is coded
-in Acid; commands are implemented as Acid functions.
-</P>
-<P>
-This language-based approach has several advantages.
-Most importantly, programs written in Acid, including most of the
-debugger itself, are inherently portable.
-Furthermore, Acid avoids the limitations other debuggers impose when
-debugging parallel programs.  Instead of embedding a fixed
-process model in the debugger, Acid allows the
-programmer to adapt the debugger to handle an
-arbitrary process partitioning or program structure. 
-The ability to
-interact dynamically with an executing process provides clear advantages
-over debuggers constrained to probe a static image.
-Finally, the Acid language is a powerful vehicle for expressing
-assertions about logic, process state, and the contents of data structures.
-When combined with dynamic interaction it allows a
-limited form of automated program verification without requiring
-modification or recompilation of the source code.
-The language is also an
-excellent vehicle for preserving a test suite for later regression testing.
-</P>
-<P>
-The debugger may be customized by its users; standard
-functions may be modified or extended to suit a particular application
-or preference.
-For example, the kernel developers in our group require a
-command set supporting assembler-level debugging while the application
-programmers prefer source-level functionality.
-Although the default library is biased toward assembler-level debugging,
-it is easily modified to provide a convenient source-level interface.
-The debugger itself does not change; the user combines primitives
-and existing Acid functions in different ways to
-implement the desired interface.
-</P>
-<H4>2 Related Work
-</H4>
-<P>
-DUEL [Gol93], an extension to
-<I>gdb</I>
-[Stal91], proposes using a high level expression evaluator to solve
-some of these problems. The evaluator provides iterators to loop over data
-structures and conditionals to control evaluation of expressions.
-The author shows that complex state queries can be formulated
-by combining concise expressions but this only addresses part of the problem.
-A program is a dynamic entity; questions asked when the program is in
-a static state are meaningful only after the program has been `caught' in
-that state. The framework for manipulating the program is still as
-primitive as the underlying debugger. While DUEL provides a means to
-probe data structures it entirely neglects the most beneficial aspect
-of debugging languages: the ability to control processes. Acid is structured
-around a thread of control that passes between the interpreter and the
-target program.
-</P>
-<P>
-The NeD debugger [May92] is a set of extensions to TCL [Ous90] that provide
-debugging primitives. The resulting language, NeDtcl, is used to implement
-a portable interface between a conventional debugger, pdb [May90], and
-a server that executes NeDtcl programs operating on the target program.
-Execution of the NeDtcl programs implements the debugging primitives
-that pdb expects.
-NeD is targeted at multi-process debugging across a network,
-and proves the flexibility of a language as a means of
-communication between debugging tools. Whereas NeD provides an interface
-between a conventional debugger and the process it debugs, Acid is the
-debugger itself. While NeD has some of the ideas
-found in Acid it is targeted toward a different purpose. Acid seeks to
-integrate the manipulation of a program's resources into the debugger
-while NeD provides a flexible interconnect between components of
-the debugging environment. The choice of TCL is appropriate for its use
-in NeD but is not suitable for Acid. Acid relies on the coupling of the type
-system with expression evaluation, which are the root of its design,
-to provide the debugging primitives.
-</P>
-<P>
-Dalek [Ols90] is an event based language extension to gdb. State transitions
-in the target program cause events to be queued for processing by the
-debugging language.
-</P>
-<P>
-Acid has many of the advantages of same process or
-<I>local</I>
-<I>agent</I>
-debuggers, like Parasight [Aral], without the need for dynamic linking or
-shared memory.
-Acid improves on the ideas of these other systems by completely integrating
-all aspects of the debugging process into the language environment. Of
-particular importance is the relationship between Acid variables,
-program symbols, source code, registers and type information. This
-integration is made possible by the design of the Acid language.
-</P>
-<P>
-Interpreted languages such as Lisp and Smalltalk are able to provide
-richer debugging environments through more complete information than
-their compiled counterparts. Acid is a means to gather and represent
-similar information about compiled programs through cooperation
-with the compilation tools and library implementers.
-</P>
-<H4>3 Acid the Language
-</H4>
-<P>
-Acid is a small interpreted language targeted to its debugging task.
-It focuses on representing program state and addressing data rather than
-expressing complex computations. Program state is
-<I>addressable</I>
-from an Acid program.
-In addition to parsing and executing expressions and providing
-an architecture-independent interface to the target process,
-the interpreter supplies a mark-and-scan garbage collector
-to manage storage.
-</P>
-<P>
-Every Acid session begins with the loading of the Acid libraries.
-These libraries contain functions, written in Acid, that provide
-a standard debugging environment including breakpoint management,
-stepping by instruction or statement, stack tracing, and
-access to variables, memory, and registers.
-The library contains 600 lines of Acid code and provides
-functionality similar to
-<I>dbx</I>.
-Following the loading of the system library, Acid loads
-user-specified libraries; this load sequence allows the
-user to augment or override the standard commands
-to customize the debugging environment.  When all libraries
-are loaded, Acid issues an interactive prompt and begins
-evaluating expressions entered by the user.  The Acid `commands'
-are actually invocations of builtin primitives or previously defined
-Acid functions. Acid evaluates each expression as it is entered and
-prints the result.
-</P>
-<H4>4 Types and Variables
-</H4>
-<P>
-Acid variables are of four basic types:
-<I>integer</I>,
-<I>string</I>,
-<I>float</I>,
-and
-<I>list</I>.
-The type of a variable is inferred by the type of the right-hand side of
-an assignment expression.
-Many of the operators can be applied to more than
-one type; for these operators the action of the operator is determined
-by the type of its operands.
-For example,
-the
-<TT>+</TT>
-operator adds
-<I>integer</I>
-and
-<I>float</I>
-operands, and concatenates
-<I>string</I>
-and
-<I>list</I>
-operands.
-Lists are the only complex type in Acid; there are no arrays, structures
-or pointers. Operators provide
-<TT>head</TT>,
-<TT>tail</TT>,
-<TT>append</TT>
-and
-<TT>delete</TT>
-operations.
-Lists can also be indexed like arrays.
-</P>
-<P>
-Acid has two levels of scope: global and local.
-Function parameters and variables declared in a function body
-using the
-<TT>local</TT>
-keyword are created at entry to the function and
-exist for the lifetime of a function.
-Global variables are created by assignment and need not be declared.
-All variables and functions in the program
-being debugged are entered in the Acid symbol table as global
-variables during Acid initialization.
-Conflicting variable names are resolved by prefixing enough `<I>' characters
-to make them unique.
-Syntactically, Acid variables and target program
-symbols are referenced identically.
-However, the variables are managed differently in the Acid
-symbol table and the user must be aware of this distinction.
-The value of an Acid variable is stored in the symbol
-table; a reference returns the value.
-The symbol table entry for a variable or function in the target
-program contains the address of that symbol in the image
-of the program.  Thus, the value of a program variable is
-accessed by indirect reference through the Acid
-variable that has the same name; the value of an Acid variable is the
-address of the corresponding program variable.
-</P>
-</I><H4>5 Control Flow
-</H4>
-<P>
-The
-<TT>while</TT>
-and
-<TT>loop</TT>
-statements implement looping.
-The former
-is similar to the same statement in C.
-The latter evaluates starting and ending expressions yielding
-integers and iterates while an incrementing loop index
-is within the bounds of those expressions.
-<DL><DT><DD><TT><PRE>
-acid: i = 0; loop 1,5 do print(i=i+1)
-0x00000001
-0x00000002
-0x00000003
-0x00000004
-0x00000005
-acid:
-</PRE></TT></DL>
-The traditional
-<TT>if-then-else</TT>
-statement implements conditional execution.
-</P>
-<H4>6 Addressing
-</H4>
-<P>
-Two indirection operators allow Acid to access values in
-the program being debugged.
-The
-<TT>*</TT>
-operator fetches a value from the memory image of an
-executing process;
-the
-<TT>@</TT>
-operator fetches a value from the text file of the process.
-When either operator appears on the left side of an assignment, the value
-is written rather than read.
-</P>
-<P>
-The indirection operator must know the size of the object
-referenced by a variable.
-The Plan 9 compilers neglect to include this
-information in the program symbol table, so Acid cannot
-derive this information implicitly.
-Instead Acid variables have formats.
-The format is a code
-letter specifying the printing style and the effect of some of the
-operators on that variable.
-The indirection operators look at the format code to determine the
-number of bytes to read or write.
-The format codes are derived from the format letters used by
-<I>db</I>.
-By default, symbol table variables and numeric constants
-are assigned the format code
-<TT>'X'</TT>
-which specifies 32-bit hexadecimal.
-Printing such a variable yields output of the form
-<TT>0x00123456</TT>.
-An indirect reference through the variable fetches 32 bits
-of data at the address indicated by the variable.
-Other formats specify various data types, for example
-<TT>i</TT>
-an instruction,
-<TT>D</TT>
-a signed 32 bit decimal,
-<TT>s</TT>
-a null-terminated string.
-The
-<TT>fmt</TT>
-function
-allows the user to change the format code of a variable
-to control the printing format and
-operator side effects.
-This function evaluates the expression supplied as the first
-argument, attaches the format code supplied as the second
-argument to the result and returns that value.
-If the result is assigned to a variable,
-the new format code applies to
-that variable.  For convenience, Acid provides the
-<TT>\</TT>
-operator as a shorthand infix form of
-<TT>fmt</TT>.
-For example:
-<DL><DT><DD><TT><PRE>
-acid: x=10
-acid: x				 // print x in hex
-0x0000000a 
-acid: x = fmt(x, 'D')		 // make x type decimal
-acid: print(x, fmt(x, 'X'), x\X) // print x in decimal &amp; hex
-10 0x0000000a 0x0000000a
-acid: x				 // print x in decimal
-10
-acid: x\o			 // print x in octal
-000000000012
-</PRE></TT></DL>
-The 
-<TT>++</TT>
-and
-<TT>--</TT>
-operators increment or decrement a variable by an amount
-determined by its format code.  Some formats imply a non-fixed size.
-For example, the
-<TT>i</TT>
-format code disassembles an instruction into a string.
-On a 68020, which has variable length instructions:
-<DL><DT><DD><TT><PRE>
-acid: p=main\i                     // p=addr(main), type INST
-acid: loop 1,5 do print(p\X, @p++) // disassemble 5 instr's
-0x0000222e LEA	0xffffe948(A7),A7
-0x00002232 MOVL	s+0x4(A7),A2
-0x00002236 PEA	0x2f(0)
-0x0000223a MOVL	A2,-(A7)
-0x0000223c BSR	utfrrune
-acid:
-</PRE></TT></DL>
-Here,
-<TT>main</TT>
-is the address of the function of the same name in the program under test.
-The loop retrieves the five instructions beginning at that address and
-then prints the address and the assembly language representation of each.
-Notice that the stride of the increment operator varies with the size of
-the instruction: the
-<TT>MOVL</TT>
-at 
-<TT>0x0000223a</TT>
-is a two byte instruction while all others are four bytes long.
-</P>
-<P>
-Registers are treated as normal program variables referenced
-by their symbolic assembler language names.
-When a
-process stops, the register set is saved by the kernel
-at a known virtual address in the process memory map.
-The Acid variables associated with the registers point
-to the saved values and the
-<TT>*</TT>
-indirection operator can then be used to read and write the register set.
-Since the registers are accessed via Acid variables they may
-be used in arbitrary expressions.
-<DL><DT><DD><TT><PRE>
-acid: PC                            // addr of saved PC
-0xc0000f60 
-acid: *PC
-0x0000623c                          // contents of PC
-acid: *PC\a
-main
-acid: *R1=10                        // modify R1
-acid: asm(*PC+4)                    // disassemble @ PC+4
-main+0x4 0x00006240 	MOVW	R31,0x0(R29)
-main+0x8 0x00006244 	MOVW	<I>setR30(SB),R30
-main+0x10 0x0000624c 	MOVW	R1,_clock(SB)
-</PRE></TT></DL>
-Here, the saved
-</I><TT>PC</TT><I>
-is stored at address
-</I><TT>0xc0000f60</TT><I>;
-its current content is
-</I><TT>0x0000623c</TT><I>.
-The
-`</I><TT>a</TT><I>'
-format code converts this value to a string specifying
-the address as an offset beyond the nearest symbol.
-After setting the value of register
-</I><TT>1</TT><I>,
-the example uses the
-</I><TT>asm</TT><I>
-command to disassemble a short section of code beginning
-at four bytes beyond the current value of the
-</I><TT>PC</TT><I>.
-</P>
-</I><H4>7 Process Interface
-</H4>
-<P>
-A program executing under Acid is monitored through the
-<I>proc</I>
-file system interface provided by Plan 9.
-Textual messages written to the
-<TT>ctl</TT>
-file control the execution of the process.
-For example writing
-<TT>waitstop</TT>
-to the control file causes the write to block until the target
-process enters the kernel and is stopped. When the process is stopped
-the write completes. The
-<TT>startstop</TT>
-message starts the target process and then does a
-<TT>waitstop</TT>
-action.
-Synchronization between the debugger and the target process is determined
-by the actions of the various messages. Some operate asynchronously to the
-target process and always complete immediately, others block until the
-action completes. The asynchronous messages allow Acid to control
-several processes simultaneously.
-</P>
-<P>
-The interpreter has builtin functions named after each of the control
-messages. The functions take a process id as argument.
-Any time a control message causes the program to execute instructions 
-the interpreter performs two actions when the control operation has completed.
-The Acid variables pointing at the register set are fixed up to point
-at the saved registers, and then
-the user defined function
-<TT>stopped</TT>
-is executed.
-The 
-<TT>stopped</TT>
-function may print the current address,
-line of source or instruction and return to interactive mode. Alternatively
-it may traverse a complex data structure, gather statistics and then set
-the program running again.
-</P>
-<P>
-Several Acid variables are maintained by the debugger rather than the
-programmer.
-These variables allow generic Acid code to deal with the current process,
-architecture specifics or the symbol table.
-The variable
-<TT>pid</TT>
-is the process id of the current process Acid is debugging.
-The variable
-<TT>symbols</TT>
-contains a list of lists where each sublist contains the symbol
-name, its type and the value of the symbol.
-The variable
-<TT>registers</TT>
-contains a list of the machine-specific register names. Global symbols in the target program
-can be referenced directly by name from Acid. Local variables
-are referenced using the colon operator as <TT>function:variable</TT>.
-</P>
-<H4>8 Source Level Debugging
-</H4>
-<P>
-Acid provides several builtin functions to manipulate source code.
-The
-<TT>file</TT>
-function reads a text file, inserting each line into a list.
-The
-<TT>pcfile</TT>
-and
-<TT>pcline</TT>
-functions each take an address as an argument.
-The first
-returns a string containing the name of the source file
-and the second returns an integer containing the line number
-of the source line containing the instruction at the address.
-<DL><DT><DD><TT><PRE>
-acid: pcfile(main)		// file containing main
-main.c
-acid: pcline(main)		// line # of main in source
-11
-acid: file(pcfile(main))[pcline(main)]	// print that line
-main(int argc, char *argv[])
-acid: src(*PC)			// print statements nearby
- 9
- 10 void
-&gt;11 main(int argc, char *argv[])
- 12 {
- 13	int a;
-</PRE></TT></DL>
-In this example, the three primitives are combined in an expression to print
-a line of source code associated with an address.
-The
-<TT>src</TT>
-function prints a few lines of source
-around the address supplied as its argument. A companion routine,
-<TT>Bsrc</TT>,
-communicates with the external editor
-<TT>sam</TT>.
-Given an address, it loads the corresponding source file into the editor
-and highlights the line containing the address.  This simple interface
-is easily extended to more complex functions.
-For example, the
-<TT>step</TT>
-function can select the current file and line in the editor
-each time the target program stops, giving the user a visual
-trace of the execution path of the program. A more complete interface
-allowing two way communication between Acid and the
-<TT>acme</TT>
-user interface [Pike93] is under construction. A filter between the debugger
-and the user interface provides interpretation of results from both
-sides of the interface. This allows the programming environment to
-interact with the debugger and vice-versa, a capability missing from the
-<TT>sam</TT>
-interface.
-The
-<TT>src</TT>
-and
-<TT>Bsrc</TT>
-functions are both written in Acid code using the file and line primitives.
-Acid provides library functions to step through source level
-statements and functions. Furthermore, addresses in Acid expressions can be
-specified by source file and line.
-Source code is manipulated in the Acid
-<I>list</I>
-data type.
-</P>
-<H4>9 The Acid Library
-</H4>
-<P>
-The following examples define some useful commands and
-illustrate the interaction of the debugger and the interpreter.
-<DL><DT><DD><TT><PRE>
-defn bpset(addr)                          // set breakpoint
-{
-	if match(addr, bplist) &gt;= 0 then
-		print("bkpoint already set:", addr\a, "\n");
-	else {
-		*fmt(addr, bpfmt) = bpinst;   // plant it
-		bplist = append bplist, addr; // add to list
-	}
-}
-</PRE></TT></DL>
-The
-<TT>bpset</TT>
-function plants a break point in memory. The function starts by
-using the
-<TT>match</TT>
-builtin to
-search the breakpoint list to determine if a breakpoint is already
-set at the address.
-The indirection operator, controlled by the format code returned
-by the
-<TT>fmt</TT>
-primitive, is used to plant the breakpoint in memory.
-The variables
-<TT>bpfmt</TT>
-and
-<TT>bpinst</TT>
-are Acid global variables containing the format code specifying
-the size of the breakpoint instruction and the breakpoint instruction
-itself.
-These
-variables are set by architecture-dependent library code
-when the debugger first attaches to the executing image.
-Finally the address of the breakpoint is
-appended to the breakpoint list,
-<TT>bplist</TT>.
-<DL><DT><DD><TT><PRE>
-defn step()				// single step
-{
-	local lst, lpl, addr, bput;
-
-	bput = 0;			// sitting on bkpoint
-	if match(*PC, bplist) &gt;= 0 then {	
-		bput = fmt(*PC, bpfmt);	// save current addr
-		*bput = @bput;		// replace it
-	}
-
-	lst = follow(*PC);		// get follow set
-
-	lpl = lst;
-	while lpl do {			// place breakpoints
-		*(head lpl) = bpinst;
-		lpl = tail lpl;
-	}
-
-	startstop(pid);			// do the step
-
-	while lst do {			// remove breakpoints
-		addr = fmt(head lst, bpfmt);
-		*addr = @addr;		// replace instr.
-		lst = tail lst;
-	}
-	if bput != 0 then
-		*bput = bpinst;		// restore breakpoint
-}
-</PRE></TT></DL>
-The
-<TT>step</TT>
-function executes a single assembler instruction.
-If the
-<TT>PC</TT>
-is sitting
-on a breakpoint, the address and size of
-the breakpoint are saved.
-The breakpoint instruction
-is then removed using the
-<TT>@</TT>
-operator to fetch
-<TT>bpfmt</TT>
-bytes from the text file and to place it into the memory
-of the executing process using the
-<TT>*</TT>
-operator.
-The
-<TT>follow</TT>
-function is an Acid
-builtin which returns a follow-set: a list of instruction addresses which
-could be executed next.
-If the instruction stored at the
-<TT>PC</TT>
-is a branch instruction, the
-list contains the addresses of the next instruction and
-the branch destination; otherwise, it contains only the
-address of the next instruction.
-The follow-set is then used to replace each possible following
-instruction with a breakpoint instruction.  The original
-instructions need not be saved; they remain
-in their unaltered state in the text file.
-The
-<TT>startstop</TT>
-builtin writes the `startstop' message to the
-<I>proc</I>
-control file for the process named
-<TT>pid</TT>.
-The target process executes until some condition causes it to
-enter the kernel, in this case, the execution of a breakpoint.
-When the process blocks, the debugger regains control and invokes the
-Acid library function
-<TT>stopped</TT>
-which reports the address and cause of the blockage.
-The
-<TT>startstop</TT>
-function completes and returns to the
-<TT>step</TT>
-function where
-the follow-set is used to replace the breakpoints placed earlier.
-Finally, if the address of the original
-<TT>PC</TT>
-contained a breakpoint, it is replaced.
-</P>
-<P>
-Notice that this approach to process control is inherently portable;
-the Acid code is shared by the debuggers for all architectures.
-Acid variables and builtin functions provide a transparent interface
-to architecture-dependent values and functions.  Here the breakpoint
-value and format are referenced through Acid variables and the
-<TT>follow</TT>
-primitive masks the differences in the underlying instruction set.
-</P>
-<P>
-The
-<TT>next</TT>
-function, similar to the
-<I>dbx</I>
-command of the same name,
-is a simpler example.
-This function steps through
-a single source statement but steps over function calls.
-<DL><DT><DD><TT><PRE>
-defn next()
-{
-	local sp, bound;
-
-	sp = *SP;			// save starting SP
-	bound = fnbound(*PC);		// begin &amp; end of fn.
-	stmnt();			// step 1 statement
-	pc = *PC;
-	if pc &gt;= bound[0] &amp;&amp; pc &lt; bound[1] then
-		return {};
-
-	while (pc&lt;bound[0] || pc&gt;bound[1]) &amp;&amp; sp&gt;=*SP do {
-		step();
-		pc = *PC;
-	}
-	src(*PC);
-}
-</PRE></TT></DL>
-The
-<TT>next</TT>
-function
-starts by saving the current stack pointer in a local variable.
-It then uses the Acid library function
-<TT>fnbound</TT>
-to return the addresses of the first and last instructions in
-the current function in a list.
-The
-<TT>stmnt</TT>
-function executes a single source statement and then uses
-<TT>src</TT>
-to print a few lines of source around the new
-<TT>PC</TT>.
-If the new value of the
-<TT>PC</TT>
-remains in the current function,
-<TT>next</TT>
-returns.
-When the executed statement is a function call or a return
-from a function, the new value of the
-<TT>PC</TT>
-is outside the bounds calculated by
-<TT>fnbound</TT>
-and the test of the
-<TT>while</TT>
-loop is evaluated.
-If the statement was a return, the new value of the stack pointer
-is greater than the original value and the loop completes without
-execution.
-Otherwise, the loop is entered and instructions are continually
-executed until the value of the
-<TT>PC</TT>
-is between the bounds calculated earlier.  At that point, execution
-ceases and a few lines of source in the vicinity of the
-<TT>PC</TT>
-are printed.
-</P>
-<P>
-Acid provides concise and elegant expression for control and
-manipulation of target programs. These examples demonstrate how a
-few well-chosen primitives can be combined to create a rich debugging environment.
-</P>
-<H4>10 Dealing With Multiple Architectures
-</H4>
-<P>
-A single binary of Acid may be used to debug a program running on any
-of the five processor architectures supported by Plan 9.  For example,
-Plan 9 allows a user on a MIPS to import the
-<I>proc</I>
-file system from an i486-based PC and remotely debug a program executing
-on that processor.
-</P>
-<P>
-Two levels of abstraction provide this architecture independence.
-On the lowest level, a Plan 9 library supplies functions to
-decode the file header of the program being debugged and
-select a table of system parameters
-and a jump vector of architecture-dependent
-functions based on the magic number.
-Among these functions are byte-order-independent
-access to memory and text files, stack manipulation, disassembly,
-and floating point number interpretation.
-The second level of abstraction is supplied by Acid.
-It consists of primitives and approximately 200 lines
-of architecture-dependent Acid library code that interface the
-interpreter to the architecture-dependent library.
-This layer performs functions such as mapping register names to
-memory locations, supplying breakpoint values and sizes,
-and converting processor specific data to Acid data types.
-An example of the latter is the stack trace function
-<TT>strace</TT>,
-which uses the stack traversal functions in the
-architecture-dependent library to construct a list of lists describing
-the context of a process.  The first level of list selects
-each function in the trace; subordinate lists contain the
-names and values of parameters and local variables of
-the functions.  Acid commands and library functions that
-manipulate and display process state information operate
-on the list representation and are independent of the
-underlying architecture.
-</P>
-<H4>11 Alef Runtime
-</H4>
-<P>
-Alef is a concurrent programming language,
-designed specifically for systems programming, which supports both
-shared variable and message passing paradigms.
-Alef borrows the C expression syntax but implements
-a substantially different type system.
-The language provides a rich set of 
-exception handling, process management, and synchronization
-primitives, which rely on a runtime system.
-Alef program bugs are often deadlocks, synchronization failures,
-or non-termination caused by locks being held incorrectly.
-In such cases, a process stalls deep
-in the runtime code and it is clearly
-unreasonable to expect a programmer using the language
-to understand the detailed
-internal semantics of the runtime support functions.
-</P>
-<P>
-Instead, there is an Alef support library, coded in Acid, that
-allows the programmer to interpret the program state in terms of
-Alef operations.  Consider the example of a multi-process program
-stalling because of improper synchronization.  A stack trace of
-the program indicates that it is waiting for an event in some
-obscure Alef runtime
-synchronization function.
-The function itself is irrelevant to the
-programmer; of greater importance is the identity of the
-unfulfilled event.
-Commands in the Alef support library decode
-the runtime data structures and program state to report the cause
-of the blockage in terms of the high-level operations available to
-the Alef programmer.  
-Here, the Acid language acts
-as a communications medium between Alef implementer and Alef user.
-</P>
-<H4>12 Parallel Debugging
-</H4>
-<P>
-The central issue in parallel debugging is how the debugger is
-multiplexed between the processes comprising
-the program.
-Acid has no intrinsic model of process partitioning; it
-only assumes that parallel programs share a symbol table,
-though they need not share memory.
-The
-<TT>setproc</TT>
-primitive attaches the debugger to a running process
-associated with the process ID supplied as its argument
-and assigns that value to the global variable
-<TT>pid</TT>,
-thereby allowing simple rotation among a group of processes.
-Further, the stack trace primitive is driven by parameters
-specifying a unique process context, so it is possible to
-examine the state of cooperating processes without switching
-the debugger focus from the process of interest.
-Since Acid is inherently extensible and capable of
-dynamic interaction with subordinate processes, the
-programmer can define Acid commands to detect and control
-complex interactions between processes.
-In short, the programmer is free to specify how the debugger reacts
-to events generated in specific threads of the program.
-</P>
-<P>
-The support for parallel debugging in Acid depends on a crucial kernel
-modification: when the text segment of a program is written (usually to
-place a breakpoint), the segment is cloned to prevent other threads
-from encountering the breakpoint.  Although this incurs a slight performance
-penalty, it is of little importance while debugging.
-</P>
-<H4>13 Communication Between Tools
-</H4>
-<P>
-The Plan 9 Alef and C compilers do not
-embed detailed type information in the symbol table of an
-executable file.
-However, they do accept a command line option causing them to
-emit descriptions of complex data types
-(e.g., aggregates and abstract data types)
-to an auxiliary file.
-The vehicle for expressing this information is Acid source code.
-When an Acid debugging session is 
-subsequently started, that file is loaded with the other Acid libraries.
-</P>
-<P>
-For each complex object in the program the compiler generates
-three pieces of Acid code.
-The first is a table describing the size and offset of each
-member of the complex data type.  Following is an Acid function,
-named the same as the object, that formats and prints each member.
-Finally, Acid declarations associate the
-Alef or C program variables of a type with the functions
-to print them.
-The three forms of declaration are shown in the following example:
-<DL><DT><DD><TT><PRE>
-struct Bitmap {
-	Rectangle    0 r;
-	Rectangle   16 clipr;
-	'D'   32 ldepth;
-	'D'   36 id;
-	'X'   40 cache;
-};
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-defn
-Bitmap(addr) {
-	complex Bitmap addr;
-	print("Rectangle r {\n");
-	Rectangle(addr.r);
-	print("}\n");
-	print("Rectangle clipr {\n");
-	Rectangle(addr.clipr);
-	print("}\n");
-	print("	ldepth	", addr.ldepth, "\n");
-	print("	id	", addr.id, "\n");
-	print("	cache	", addr.cache, "\n");
-};
-
-complex Bitmap darkgrey;
-complex Bitmap Window_settag:b;
-</PRE></TT></DL>
-The
-<TT>struct</TT>
-declaration specifies decoding instructions for the complex type named
-<TT>Bitmap</TT>.
-Although the syntax is superficially similar to a C structure declaration,
-the semantics differ markedly: the C declaration specifies a layout, while
-the Acid declaration tells how to decode it.
-The declaration specifies a type, an offset, and name for each
-member of the complex object. The type is either the name of another
-complex declaration, for example,
-<TT>Rectangle</TT>,
-or a format code.
-The offset is the number of bytes from the start
-of the object to the member
-and the name is the member's name in the Alef or C declaration.
-This type description is a close match for C and Alef, but is simple enough
-to be language independent.
-</P>
-<P>
-The
-<TT>Bitmap</TT>
-function expects the address of a
-<TT>Bitmap</TT>
-as its only argument.
-It uses the decoding information contained in the
-<TT>Bitmap</TT>
-structure declaration to extract, format, and print the
-value of each member of the complex object pointed to by
-the argument.
-The Alef compiler emits code to call other Acid functions
-where a member is another complex type; here,
-<TT>Bitmap</TT>
-calls
-<TT>Rectangle</TT>
-to print its contents.
-</P>
-<P>
-The
-<TT>complex</TT>
-declarations associate Alef variables with complex types.
-In the example,
-<TT>darkgrey</TT>
-is the name of a global variable of type
-<TT>Bitmap</TT>
-in the program being debugged.
-Whenever the name
-<TT>darkgrey</TT>
-is evaluated by Acid, it automatically calls the
-<TT>Bitmap</TT>
-function with the address of
-<TT>darkgrey</TT>
-as the argument.
-The second
-<TT>complex</TT>
-declaration associates a local variable or parameter named
-<TT>b</TT>
-in function
-<TT>Window_settag</TT>
-with the
-<TT>Bitmap</TT>
-complex data type.
-</P>
-<P>
-Acid borrows the C operators
-<TT>.</TT>
-and
-<TT>-&gt;</TT>
-to access the decoding parameters of a member of a complex type.
-Although this representation is sufficiently general for describing
-the decoding of both C and Alef complex data types, it may
-prove too restrictive for target languages with more complicated
-type systems.
-Further, the assumption that the compiler can select the proper
-Acid format code for each basic type in the language is somewhat
-naive.  For example, when a member of a complex type is a pointer,
-it is assigned a hexadecimal type code; integer members are always 
-assigned a decimal type code.
-This heuristic proves inaccurate when an integer field is a
-bit mask or set of bit flags which are more appropriately displayed
-in hexadecimal or octal.
-</P>
-<H4>14 Code Verification
-</H4>
-<P>
-Acid's ability to interact dynamically with
-an executing program allows passive test and
-verification of the target program.  For example,
-a common concern is leak detection in programs using
-<TT>malloc</TT>.
-Of interest are two items: finding memory that was allocated
-but never freed and detecting bad pointers passed to
-<TT>free</TT>.
-An auxiliary Acid library contains Acid functions to
-monitor the execution of a program and detect these
-faults, either as they happen or in the automated
-post-mortem analysis of the memory arena.
-In the following example, the
-<TT>sort</TT>
-command is run under the control of the
-Acid memory leak library.
-<DL><DT><DD><TT><PRE>
-helix% acid -l malloc /bin/sort
-/bin/sort: mips plan 9 executable
-/lib/acid/port
-/lib/acid/mips
-/lib/acid/malloc
-acid: go()
-now
-is
-the
-time
-&lt;ctrl-d&gt;
-is
-now
-the
-time
-27680 : breakpoint	_exits+0x4	MOVW	0x8,R1
-acid: 
-</PRE></TT></DL>
-The
-<TT>go</TT>
-command creates a process and plants
-breakpoints at the entry to
-<TT>malloc</TT>
-and
-<TT>free</TT>.
-The program is then started and continues until it
-exits or stops.  If the reason for stopping is anything
-other than the breakpoints in
-<TT>malloc</TT>
-and
-<TT>free</TT>,
-Acid prints the usual status information and returns to the
-interactive prompt.
-</P>
-<P>
-When the process stops on entering
-<TT>malloc</TT>,
-the debugger must capture and save the address that
-<TT>malloc</TT>
-will return.
-After saving a stack
-trace so the calling routine can be identified, it places
-a breakpoint at the return address and restarts the program.
-When
-<TT>malloc</TT>
-returns, the breakpoint stops the program,
-allowing the debugger
-to grab the address of the new memory block from the return register.
-The address and stack trace are added to the list of outstanding
-memory blocks, the breakpoint is removed from the return point, and
-the process is restarted.
-</P>
-<P>
-When the process stops at the beginning of
-<TT>free</TT>,
-the memory address supplied as the argument is compared to the list
-of outstanding memory blocks.  If it is not found an error message
-and a stack trace of the call is reported; otherwise, the
-address is deleted from the list.
-</P>
-<P>
-When the program exits, the list of outstanding memory blocks contains
-the addresses of all blocks that were allocated but never freed.
-The
-<TT>leak</TT>
-library function traverses the list producing a report describing
-the allocated blocks.
-<DL><DT><DD><TT><PRE>
-acid: leak()
-Lost a total of 524288 bytes from:
-    malloc() malloc.c:32 called from dofile+0xe8 sort.c:217 
-    dofile() sort.c:190 called from main+0xac sort.c:161 
-    main() sort.c:128 called from _main+0x20 main9.s:10 
-Lost a total of 64 bytes from:
-    malloc() malloc.c:32 called from newline+0xfc sort.c:280 
-    newline() sort.c:248 called from dofile+0x110 sort.c:222 
-    dofile() sort.c:190 called from main+0xac sort.c:161 
-    main() sort.c:128 called from _main+0x20 main9.s:10 
-Lost a total of 64 bytes from:
-    malloc() malloc.c:32 called from realloc+0x14 malloc.c:129 
-    realloc() malloc.c:123 called from bldkey+0x358 sort.c:1388 
-    buildkey() sort.c:1345 called from newline+0x150 sort.c:285 
-    newline() sort.c:248 called from dofile+0x110 sort.c:222 
-    dofile() sort.c:190 called from main+0xac sort.c:161 
-    main() sort.c:128 called from _main+0x20 main9.s:10
-acid: refs()
-data...bss...stack...
-acid: leak()
-acid: 
-</PRE></TT></DL>
-The presence of a block in the allocation list does not imply
-it is there because of a leak; for instance, it may have been
-in use when the program terminated.
-The
-<TT>refs()</TT>
-library function scans the
-<I>data</I>,
-<I>bss</I>,
-and
-<I>stack</I>
-segments of the process looking for pointers
-into the allocated blocks.  When one is found, the block is deleted from
-the outstanding block list.
-The
-<TT>leak</TT>
-function is used again to report the
-blocks remaining allocated and unreferenced.
-This strategy proves effective in detecting
-disconnected (but non-circular) data structures.
-</P>
-<P>
-The leak detection process is entirely passive.
-The program is not
-specially compiled and the source code is not required.
-As with the Acid support functions for the Alef runtime environment,
-the author of the library routines has encapsulated the
-functionality of the library interface
-in Acid code.
-Any programmer may then check a program's use of the
-library routines without knowledge of either implementation.
-The performance impact of running leak detection is great
-(about 10 times slower),
-but it has not prevented interactive programs like
-<TT>sam</TT>
-and the
-<TT>8&#189;</TT>
-window system from being tested.
-</P>
-<H4>15 Code Coverage
-</H4>
-<P>
-Another common component of software test uses 
-<I>coverage</I>
-analysis.
-The purpose of the test is to determine which paths through the code have
-not been executed while running the test suite.
-This is usually
-performed by a combination of compiler support and a reporting tool run
-on the output generated by statements compiled into the program.
-The compiler emits code that
-logs the progress of the program as it executes basic blocks and writes the
-results to a file. The file is then processed by the reporting tool 
-to determine which basic blocks have not been executed.
-</P>
-<P>
-Acid can perform the same function in a language independent manner without
-modifying the source, object or binary of the program. The following example
-shows
-<TT>ls</TT>
-being run under the control of the Acid coverage library.
-<DL><DT><DD><TT><PRE>
-philw-helix% acid -l coverage /bin/ls
-/bin/ls: mips plan 9 executable
-/lib/acid/port
-/lib/acid/mips
-/lib/acid/coverage
-acid: coverage()
-acid
-newstime
-profile
-tel
-wintool
-2: (error) msg: pid=11419 startstop: process exited
-acid: analyse(ls)
-ls.c:102,105
-	102:     return 1;
-	103: }
-	104: if(db[0].qid.path&amp;CHDIR &amp;&amp; dflag==0){
-	105:     output();
-ls.c:122,126
-	122:     memmove(dirbuf+ndir, db, sizeof(Dir));
-	123:     dirbuf[ndir].prefix = 0;
-	124:     p = utfrrune(s, '/');
-	125:     if(p){
-	126:         dirbuf[ndir].prefix = s;
-</PRE></TT></DL>
-The
-<TT>coverage</TT>
-function begins by looping through the text segment placing
-breakpoints at the entry to each basic block. The start of each basic
-block is found using the Acid builtin function
-<TT>follow</TT>.
-If the list generated by
-<TT>follow</TT>
-contains more than one
-element, then the addresses mark the start of basic blocks. A breakpoint
-is placed at each address to detect entry into the block. If the result
-of
-<TT>follow</TT>
-is a single address then no action is taken, and the next address is
-considered. Acid maintains a list of
-breakpoints already in place and avoids placing duplicates (an address may be
-the destination of several branches).
-</P>
-<P>
-After placing the breakpoints the program is set running.
-Each time a breakpoint is encountered
-Acid deletes the address from the breakpoint list, removes the breakpoint
-from memory and then restarts the program.
-At any instant the breakpoint list contains the addresses of basic blocks
-which have not been executed. 
-The
-<TT>analyse</TT>
-function reports the lines of source code bounded by basic blocks
-whose addresses are have not been deleted from the breakpoint list.
-These are the basic blocks which have not been executed.
-Program performance is almost unaffected since each breakpoint is executed
-only once and then removed.
-</P>
-<P>
-The library contains a total of 128 lines of Acid code.
-An obvious extension of this algorithm could be used to provide basic block
-profiling.
-</P>
-<H4>16 Conclusion
-</H4>
-<P>
-Acid has two areas of weakness. As with
-other language-based tools like
-<I>awk</I>,
-a programmer must learn yet another language to step beyond the normal
-debugging functions and use the full power of the debugger.
-Second, the command line interface supplied by the
-<I>yacc</I>
-parser is inordinately clumsy.
-Part of the problem relates directly to the use of
-<I>yacc</I>
-and could be circumvented with a custom parser.
-However, structural problems would remain: Acid often requires
-too much typing to execute a simple
-command.
-A debugger should prostitute itself to its users, doing whatever
-is wanted with a minimum of encouragement; commands should be
-concise and obvious. The language interface is more consistent than
-an ad hoc command interface but is clumsy to use.
-Most of these problems are addressed by an Acme interface
-which is under construction. This should provide the best of
-both worlds: graphical debugging and access to the underlying acid
-language when required.
-</P>
-<P>
-The name space clash between Acid variables, keywords, program variables,
-and functions is unavoidable.
-Although it rarely affects a debugging session, it is annoying
-when it happens and is sometimes difficult to circumvent.
-The current renaming scheme
-is too crude; the new names are too hard to remember.
-</P>
-<P>
-Acid has proved to be a powerful tool whose applications
-have exceeded expectations.
-Of its strengths, portability, extensibility and parallel debugging support
-were by design and provide the expected utility.
-In retrospect,
-its use as a tool for code test and verification and as
-a medium for communicating type information and encapsulating
-interfaces has provided unanticipated benefits and altered our
-view of the debugging process.
-</P>
-<H4>17 Acknowledgments
-</H4>
-<P>
-Bob Flandrena was the first user and helped prepare the paper.
-Rob Pike endured three buggy Alef compilers and a new debugger
-in a single sitting.
-</P>
-<H4>18 References
-</H4>
-<br>&#32;<br>
-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
-``Plan 9 from Bell Labs'',
-UKUUG Proc. of the Summer 1990 Conf.,
-London, England,
-1990,
-reprinted, in a different form, in this volume.
-<br>&#32;<br>
-[Gol93] M. Golan, D. Hanson,
-``DUEL -- A Very High-Level Debugging Language'',
-USENIX Proc. of the Winter 1993 Conf.,
-San Diego, CA,
-1993.
-<br>&#32;<br>
-[Lin90] M. A. Linton,
-``The Evolution of DBX'',
-USENIX Proc. of the Summer 1990 Conf.,
-Anaheim, CA,
-1990.
-<br>&#32;<br>
-[Stal91] R. M. Stallman, R. H. Pesch,
-``Using GDB: A guide to the GNU source level debugger'',
-Technical Report, Free Software Foundation,
-Cambridge, MA,
-1991.
-<br>&#32;<br>
-[Win93] P. Winterbottom,
-``Alef reference Manual'',
-this volume.
-<br>&#32;<br>
-[Pike93] Rob Pike,
-``Acme: A User Interface for Programmers'',
-USENIX Proc. of the Winter 1994 Conf.,
-San Francisco, CA,
-reprinted in this volume.
-<br>&#32;<br>
-[Ols90] Ronald A. Olsson, Richard H. Crawford, and W. Wilson Ho,
-``Dalek: A GNU, improved programmable debugger'',
-USENIX Proc. of the Summer 1990 Conf.,
-Anaheim, CA.
-<br>&#32;<br>
-[May92] Paul Maybee,
-``NeD: The Network Extensible Debugger''
-USENIX Proc. of the Summer 1992 Conf.,
-San Antonio, TX.
-<br>&#32;<br>
-[Aral] Ziya Aral, Ilya Gertner, and Greg Schaffer,
-``Efficient debugging primitives for multiprocessors'',
-Proceedings of the Third International Conference on Architectural
-Support for Programming Languages and Operating Systems,
-SIGPLAN notices Nr. 22, May 1989.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1351
sys/doc/acme/acme.html

@@ -1,1351 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Acme: A User Interface for Programmers
-</H1>
-<DL><DD><I><I>Rob Pike</I>
-<I>rob@plan9.bell-labs.com</I>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> <HR>
-<br>
-Originally appeared in
-Proc. of the Winter 1994 USENIX Conf.,
-pp. 223-234,
-San Francisco, CA
-</I><DT>&#32;<DD></dl>
-<br>
-A hybrid of window system, shell, and editor, Acme gives text-oriented
-applications a clean, expressive, and consistent style of interaction.
-Traditional window systems support interactive client programs and offer libraries of
-pre-defined operations such as pop-up menus
-and buttons to promote a consistent
-user interface among the clients.
-Acme instead provides its clients with a fixed user interface and
-simple conventions to encourage its uniform use.
-Clients access the facilities of Acme through a file system interface;
-Acme is in part a file server that exports device-like files that may be
-manipulated to access and control the contents of its windows.
-Written in a concurrent programming language,
-Acme is structured as a set of communicating processes that neatly subdivide
-the various aspects of its tasks: display management, input, file server, and so on.
-<P>
-Acme attaches distinct functions to the three mouse buttons:
-the left selects text;
-the middle executes textual commands;
-and the right combines context search and file opening
-functions to integrate the various applications and files in
-the system.
-</P>
-<P>
-Acme works well enough to have developed
-a community that uses it exclusively.
-Although Acme discourages the traditional style of interaction
-based on typescript windows&#173;teletypes&#173;its
-users find Acme's other services render
-typescripts obsolete. 
-</DL>
-</P>
-<H4>History and motivation
-</H4>
-<P>
-The usual typescript style of interaction with
-Unix and its relatives is an old one.
-The typescript&#173;an intermingling of textual commands and their
-output&#173;originates with the scrolls of paper on teletypes.
-The advent of windowed terminals has given each user what
-amounts to an array of teletypes, a limited and unimaginative
-use of the powers of bitmap displays and mice.
-Systems like the Macintosh
-that do involve the mouse as an integral part of the interaction
-are geared towards general users, not experts, and certainly
-not programmers.
-Software developers, at least on time-sharing systems, have been left behind.
-<br><img src="acme.fig1.14050.gif"><br>
-<br>&#32;<br>
-
-Figure 1.  A small Acme screen&#173;normally it runs on a larger display&#173;demonstrating
-some of the details discussed in the text.
-The right column contains some guide files,
-a mailbox presented by Acme's mail program,
-the columnated display of files in Acme's own source directory,
-a couple of windows from the OED browser,
-a debugger window,
-and an error window showing diagnostics from a compilation.
-The left column holds a couple of source files
-(<TT>dat.h</TT>
-and
-<TT>acme.l</TT>),
-another debugger window displaying a stack trace,
-and a third source file
-(<TT>time.l</TT>).
-<TT>Time.l</TT>
-was opened from the debugger by clicking the right mouse button
-on a line in the stack window;
-the mouse cursor landed on the offending line of
-<TT>acme.l</TT>
-after a click on the compiler message.
-<br>
-<HR>
-</P>
-<P>
-Some programs have mouse-based editing of
-text files and typescripts;
-ones I have built include
-the window systems
-<TT>mux</TT>
-[Pike88]
-and
-<TT>8&#189;</TT>
-[Pike91]
-and the text editor
-Sam [Pike87].
-These have put the programmer's mouse to some productive work,
-but not wholeheartedly.  Even experienced users of these programs
-often retype text that could be grabbed with the mouse,
-partly because the menu-driven interface is imperfect
-and partly because the various pieces are not well enough integrated.
-</P>
-<P>
-Other programs&#173;EMACS [Stal93] is the prime example&#173;offer a high
-degree of integration but with a user interface built around the
-ideas of cursor-addressed terminals that date from the 1970's.
-They are still keyboard-intensive and
-dauntingly complex.
-</P>
-<P>
-The most ambitious attempt to face these issues was the Cedar
-system, developed at Xerox [Swei86].
-It combined a new programming language, compilers,
-window system, even microcode&#173;a complete system&#173;to
-construct a productive, highly
-integrated and interactive environment
-for experienced users of compiled languages.
-Although successful internally, the system was so large
-and so tied to specific hardware that it never fledged.
-</P>
-<P>
-Cedar was, however, the major inspiration for Oberon [Wirt89],
-a system of similar scope but much smaller scale.
-Through careful selection of Cedar's ideas, Oberon shows
-that its lessons can be applied to a small, coherent system
-that can run efficiently on modest hardware.
-In fact, Oberon probably
-errs too far towards simplicity: a single-process system
-with weak networking, it seems an architectural throwback.
-</P>
-<P>
-Acme is a new program,
-a combined window system, editor, and shell,
-that applies
-some of the ideas distilled by Oberon.
-Where Oberon uses objects and modules within a programming language (also called Oberon),
-Acme uses files and commands within an existing operating system (Plan 9).
-Unlike Oberon, Acme does not yet have support for graphical output, just text.
-At least for now, the work on Acme has concentrated on
-producing the smoothest user interface possible for a programmer
-at work.
-</P>
-<P>
-The rest of this paper describes Acme's interface,
-explains how programs can access it,
-compares it to existing systems,
-and finally presents some unusual aspects of its implementation.
-</P>
-<H4>User interface
-</H4>
-<P>
-<br><img src="acme.fig2.14051.gif"><br>
-<br>&#32;<br>
-
-Figure 2.  An Acme window showing a section of code.
-The upper line of text is the tag containing the file name,
-relevant commands, and a scratch area (right of the vertical bar);
-the lower portion of the window is the
-body, or contents, of the file.
-Here the scratch area contains a command for the middle button
-(<TT>mk</TT>)
-and a word to search for with the right button
-(<TT>cxfidalloc</TT>).
-The user has just
-clicked the right button on
-<TT>cxfidalloc</TT>
-and Acme has searched for the word, highlighted it,
-and moved the mouse cursor there.  The file has been modified:
-the center of the layout box is black and the command
-<TT>Put</TT>
-appears in the tag.
-<br>
-<HR>
-Acme windows are arrayed in columns (Figure 1) and are used more
-dynamically than in an environment like X Windows or
-<TT>8&#189;</TT>
-[Sche86, Pike91].
-The system frequently creates them automatically and the user
-can order a new one with a single mouse button click.
-The initial placement of a new window is determined
-automatically, but the user may move an existing window anywhere
-by clicking or dragging a
-<I>layout box</I>
-in the upper left corner of
-the window.
-</P>
-<P>
-Acme windows have two parts: a
-<I>tag</I>
-holding a single line of text,
-above a
-<I>body</I>
-holding zero or more lines (Figure 2).
-The body typically contains an image of a file being edited
-or the editable output of a
-program, analogous to an
-EMACS shell
-window.  The tag contains
-the name of the window
-(usually the name of the associated
-file or directory), some built-in commands, and a scratch area to hold arbitrary text.
-If a window represents a directory, the name in the tag ends with
-a slash and the body contains a list of the names of the files
-in the directory.
-Finally, each non-empty body holds a scroll bar at the left of the text.
-</P>
-<P>
-Each column of windows also has a layout box and a tag.
-The tag has no special meaning, although Acme pre-loads it with a few
-built-in commands.
-There is also a tag across the whole display, also loaded with
-helpful commands and a list of active processes started
-by Acme.
-</P>
-<P>
-Typing with the keyboard and selecting with the left button are as in
-many other systems, including the Macintosh,
-<TT>8&#189;</TT>,
-and Sam.
-The middle and right buttons are used, somewhat like the left button,
-to `sweep' text, but the indicated text is treated in a way
-that depends on the text's location&#173;<I>context</I>&#173;as well as its content.
-This context, based on the directory of the file containing the text,
-is a central component of Acme's style of interaction.
-</P>
-<P>
-Acme has no single notion of `current directory'.
-Instead, every command, file name,
-action, and so on is interpreted or executed in the directory named by the
-tag of the window containing the command.  For example, the string
-<TT>mammals</TT>
-in a window labeled
-<TT>/lib/</TT>
-or
-<TT>/lib/insects</TT>
-will be interpreted as the file name
-<TT>/lib/mammals</TT>
-if such a file exists.
-</P>
-<P>
-Throughout Acme, the middle mouse button is used to execute commands
-and the right mouse button is used to locate and select files and text.
-Even when there are no true files on which to operate&#173;for example
-when editing mail messages&#173;Acme and its applications use
-consistent extensions of these basic functions.
-This idea is as vital to Acme as icons are to the Macintosh.
-</P>
-<P>
-The middle button executes commands: text swept with the button
-pressed is underlined; when the button is released, the underline is
-removed and the indicated text is executed.
-A modest number of commands are recognized as built-ins: words like
-<TT>Cut</TT>,
-<TT>Paste</TT>,
-and
-<TT>New</TT>
-name
-functions performed directly by Acme.
-These words often appear in tags to make them always available,
-but the tags are not menus: any text anywhere in Acme may be a command.
-For example, in the tag or body of any window one may type
-<TT>Cut</TT>,
-select it with the left button, use the middle button to execute it,
-and watch it disappear again.
-</P>
-<P>
-If the middle button indicates a command that is not recognized as a built-in,
-it is executed in the directory
-named by the tag of the window holding the text.
-Also, the file to be executed is searched for first in that directory.
-Standard input is connected to
-<TT>/dev/null</TT>,
-but standard and error outputs are connected to an Acme window,
-created if needed, called
-<I>dir</I><TT>/+Errors</TT> where
-<I>dir</I>
-is the directory of the window.
-(Programs that need interactive input use a different interface, described below.)
-A typical use of this is to type
-<TT>mk</TT>
-(Plan 9's
-<TT>make</TT>)
-in the scratch area in the tag of a C source window, say
-<TT>/sys/src/cmd/sam/regexp.c</TT>,
-and execute it.
-Output, including compiler errors, appears in the window labeled
-<TT>/sys/src/cmd/sam/+Errors</TT>,
-so file names in the output are associated with the windows and directory
-holding the source.
-The
-<TT>mk</TT>
-command remains in the tag, serving as a sort of menu item for the associated
-window.
-</P>
-<P>
-Like the middle button, the right button is used to indicate text by sweeping it out.
-The indicated text is not a command, however, but the argument of a generalized
-search operator.
-If the text, perhaps after appending it to the directory of the window containing it,
-is the name of an existing file, Acme creates a new window to hold the file
-and reads it in.  It then moves the mouse cursor to that window.  If the file is
-already loaded into Acme, the mouse motion happens but no new window is made.
-For example, indicating the string
-<TT>sam.h</TT>
-in
-<DL><DT><DD><TT><PRE>
-#include "sam.h"
-</PRE></TT></DL>
-in a window on the file
-<TT>/sys/src/cmd/sam/regexp.c</TT>
-will open the file
-<TT>/sys/src/cmd/sam/sam.h</TT>.
-</P>
-<P>
-If the file name is followed immediately by a colon and a legal address in
-Sam notation (for example a line number or a regular expression delimited in
-slashes or a comma-separated compound of such addresses), Acme highlights
-the target of that address in the file and places the mouse there.  One may jump to
-line 27 of
-<TT>dat.h</TT>
-by indicating with the right button the text
-<TT>dat.h:27</TT>.
-If the file is not already open, Acme loads it.
-If the file name is null, for example if the indicated string is
-<TT>:/^main/</TT>,
-the file is assumed to be that of the window containing the string.
-Such strings, when typed and evaluated in the tag of a window, amount to
-context searches.
-</P>
-<P>
-If the indicated text is not the name of an existing file, it is taken to be literal
-text and is searched for in the body of the window containing the text, highlighting
-the result as if it were the result of a context search.
-</P>
-<P>
-For the rare occasion when a file name
-<I>is</I>
-just text to search for, it can be selected with the left button and used as the
-argument to a built-in
-<TT>Look</TT>
-command that always searches for literal text.
-</P>
-<H4>Nuances and heuristics
-</H4>
-<P>
-A user interface should not only provide the necessary functions, it should also
-<I>feel</I>
-right.
-In fact, it should almost not be felt at all; when one notices a
-user interface, one is distracted from the job at hand [Pike88].
-To approach this invisibility, some of Acme's properties and features
-are there just to make the others easy to use.
-Many are based on a fundamental principle of good design:
-let the machine do the work.
-</P>
-<P>
-Acme tries to avoid needless clicking and typing.
-There is no `click-to-type', eliminating a button click.
-There are no pop-up or pull-down menus, eliminating the mouse action needed to
-make a menu appear.
-The overall design is intended to make text on the screen useful without
-copying or retyping; the ways in which this happens involve
-the combination of many aspects of the interface.
-</P>
-<P>
-Acme tiles its windows and places them automatically
-to avoid asking the user to place and arrange them.
-For this policy to succeed, the automatic placement must behave well enough
-that the user is usually content with the location of a new window.
-The system will never get it right all the time, but in practice most
-windows are used at least for a while where Acme first places them.
-There have been several complete rewrites of the
-heuristics for placing a new window,
-and with each rewrite the system became
-noticeably more comfortable.  The rules are as follows, although
-they are still subject to improvement.
-The window appears in the `active' column, that most recently used for typing or
-selecting.
-Executing and searching do not affect the choice of active column,
-so windows of commands and such do not draw new windows towards them,
-but rather let them form near the targets of their actions.
-Output (error) windows always appear towards the right, away from
-edited text, which is typically kept towards the left.
-Within the column, several competing desires are balanced to decide where
-and how large the window should be:
-large blank spaces should be consumed;
-existing text should remain visible;
-existing large windows should be divided before small ones;
-and the window should appear near the one containing the action that caused
-its creation.
-</P>
-<P>
-Acme binds some actions to chords of mouse buttons.
-These include
-<TT>Cut</TT>
-and
-<TT>Paste</TT>
-so these common operations can be done without
-moving the mouse.
-Another is a way to apply a command in one window to text (often a file name)
-in another, avoiding the actions needed to assemble the command textually.
-</P>
-<P>
-Another way Acme avoids the need to move the mouse is instead to move the cursor
-to where it is likely to be used next.  When a new window is made, Acme
-moves the cursor to the new window; in fact, to the selected text in that window.
-When the user deletes a newly made window, the cursor is
-returned to the point it was before the window was made,
-reducing the irritation of windows that pop up to report annoying errors.
-</P>
-<P>
-When a window is moved, Acme moves the cursor to the layout box in
-its new place, to permit further adjustment without moving the mouse.
-For example, when a click of the left mouse button on the layout box grows
-the window, the cursor moves to the new location of the box so repeated clicks,
-without moving the mouse, continue to grow it.
-</P>
-<P>
-Another form of assistance the system can offer is to supply precision in
-pointing the mouse.  The best-known form of this is `double-clicking' to
-select a word rather than carefully sweeping out the entire word.
-Acme provides this feature, using context to decide whether to select
-a word, line, quoted string, parenthesized expression, and so on.
-But Acme takes the idea much further by applying it to execution
-and searching.
-A
-<I>single</I>
-click, that is, a null selection, with either the middle or right buttons,
-is expanded automatically to indicate the appropriate text containing
-the click.  What is appropriate depends on the context.
-</P>
-<P>
-For example, to execute a single-word command
-such as
-<TT>Cut</TT>,
-it is not necessary to sweep the entire word; just clicking the button once with
-the mouse pointing at the word is sufficient.  `Word'
-means the largest string of likely file name characters surrounding the location
-of the click: click on a file name, run that program.
-On the right button, the rules are more complicated because
-the target of the click might be a file name, file name with address,
-or just plain text.  Acme examines the text near the click to find
-a likely file name;
-if it finds one, it checks that it names an existing file (in the directory named in the tag, if the name is relative)
-and if so, takes that as the result, after extending it with any address
-that may be present.  If there is no file with that name, Acme
-just takes the largest alphanumeric string under the click.
-The effect is a natural overloading of the button to refer to plain text as
-well as file names.
-</P>
-<P>
-First, though, if the click occurs over the left-button-selected text in the window,
-that text is taken to be what is selected.
-This makes it easy to skip through the occurrences of a string in a file: just click
-the right button
-on some occurrence of the text in the window (perhaps after typing it in the tag)
-and click once for each subsequent occurrence.  It isn't even necessary to move
-the mouse between clicks; Acme does that.
-To turn a complicated command into a sort of menu item, select it:
-thereafter, clicking the middle button on it will execute the full command.
-</P>
-<P>
-As an extra feature, Acme recognizes file names in angle brackets
-<TT><></TT>
-as names of files in standard directories of include files,
-making it possible for instance to look at
-<TT><stdio.h></TT>
-with a single click.
-</P>
-<P>
-Here's an example to demonstrate how the actions and defaults work together.
-Assume
-<TT>/sys/src/cmd/sam/regexp.c</TT>
-is
-open and has been edited.  We write it (execute
-<TT>Put</TT>
-in the tag; once the file is written, Acme removes the word from the tag)
-and type
-<TT>mk</TT>
-in the tag.  We execute
-<TT>mk</TT>
-and get some errors, which appear in a new window labeled
-<TT>/sys/src/cmd/sam/+Errors</TT>.
-The cursor moves automatically to that window.
-Say the error is
-<DL><DT><DD><TT><PRE>
-main.c:112: incompatible types on assignment to `pattern'
-</PRE></TT></DL>
-We move the mouse slightly and click the right button
-at the left of the error message; Acme
-makes a new window, reads
-<TT>/sys/src/cmd/main.c</TT>
-into it, selects line 112
-and places the mouse there, right on the offending line.
-</P>
-<H4>Coupling to existing programs
-</H4>
-<P>
-Acme's syntax for file names and addresses makes it easy for other programs
-to connect automatically to Acme's capabilities.  For example, the output of
-<DL><DT><DD><TT><PRE>
-grep -n variable *.[ch]
-</PRE></TT></DL>
-can be used to help Acme step through the occurrences of a variable in a program;
-every line of output is potentially a command to open a file.
-The file names need not be absolute, either: the output
-appears in a window labeled with the directory in which
-<TT>grep</TT>
-was run, from which Acme can derive the full path names.
-</P>
-<P>
-When necessary, we have changed the output of some programs,
-such as compiler error messages, to match
-Acme's syntax.
-Some might argue that it shouldn't be necessary to change old programs,
-but sometimes programs need to be updated when systems change,
-and consistent output benefits people as well as programs.
-A historical example is the retrofitting of standard error output to the
-early Unix programs when pipes were invented.
-</P>
-<P>
-Another change was to record full path names in
-the symbol table of executables, so line numbers reported by the debugger
-are absolute names that may be used directly by Acme; it's not necessary
-to run the debugger in the source directory.  (This aids debugging
-even without Acme.)
-</P>
-<P>
-A related change was to add lines of the form
-<DL><DT><DD><TT><PRE>
-#pragma src "/sys/src/libregexp"
-</PRE></TT></DL>
-to header files; coupled with Acme's ability to locate a header file,
-this provides a fast, keyboardless way to get the source associated with a library.
-</P>
-<P>
-Finally, Acme directs the standard output of programs it runs to
-windows labeled by the directory in which the program is run.
-Acme's splitting of the
-output into directory-labeled windows is a small feature that has a major effect:
-local file names printed by programs can be interpreted directly by Acme.
-By indirectly coupling the output of programs to the input,
-it also simplifies the management of software that occupies multiple
-directories.
-</P>
-<H4>Coupling to new programs
-</H4>
-<P>
-Like many Plan 9 programs,
-Acme offers a programmable interface to
-other programs by acting as a file server.
-The best example of such a file server is the window system
-<TT>8&#189;</TT>
-[Pike91],
-which exports files with names such as
-<TT>screen</TT>,
-<TT>cons</TT>,
-and
-<TT>mouse</TT>
-through which applications may access the I/O capabilities of the windows.
-<TT>8&#189;</TT>
-provides a
-<I>distinct</I>
-set of files for each window and builds a private file name space
-for the clients running `in' each window;
-clients in separate windows see distinct files with the same names
-(for example
-<TT>/dev/mouse</TT>).
-Acme, like the process file system [PPTTW93], instead associates each
-window with a directory of files; the files of each window are visible
-to any application.
-This difference reflects a difference in how the systems are used:
-<TT>8&#189;</TT>
-tells a client what keyboard and mouse activity has happened in its window;
-Acme tells a client what changes that activity wrought on any window it asks about.
-Putting it another way,
-<TT>8&#189;</TT>
-enables the construction of interactive applications;
-Acme provides the interaction for applications.
-</P>
-<P>
-The root of
-Acme's file system is mounted using Plan 9 operations on the directory
-<TT>/mnt/acme</TT>.
-In
-that root directory appears a directory for each window, numbered with the window's identifier,
-analogous to a process identifier, for example
-<TT>/mnt/acme/27</TT>.
-The window's directory
-contains 6 files:
-<TT>/mnt/acme/27/addr</TT>,
-<TT>body</TT>,
-<TT>ctl</TT>,
-<TT>data</TT>,
-<TT>event</TT>,
-and
-<TT>tag</TT>.
-The
-<TT>body</TT>
-and
-<TT>tag</TT>
-files contain the text of the respective parts of the window; they may be
-read to recover the contents.  Data written to these files is appended to the text;
-<TT>seeks</TT>
-are ignored.
-The
-<TT>addr</TT>
-and
-<TT>data</TT>
-files provide random access to the contents of the body.
-The
-<TT>addr</TT>
-file is written to set a character position within the body; the
-<TT>data</TT>
-file may then be read to recover the contents at that position,
-or written to change them.
-(The tag is assumed
-small and special-purpose enough not to need special treatment.
-Also,
-<TT>addr</TT>
-indexes by character position, which is not the same as byte offset
-in Plan 9's multi-byte character set [Pike93]).
-The format accepted by the
-<TT>addr</TT>
-file is exactly the syntax of addresses within the user interface,
-permitting regular expressions, line numbers, and compound addresses
-to be specified.  For example, to replace the contents of lines 3 through 7,
-write the text
-<DL><DT><DD><TT><PRE>
-3,7
-</PRE></TT></DL>
-to the
-<TT>addr</TT>
-file, then write the replacement text to the
-<TT>data</TT>
-file.  A zero-length write deletes the addressed text; further writes extend the replacement.
-</P>
-<P>
-The control file,
-<TT>ctl</TT>,
-may be written with commands to effect actions on the window; for example
-the command
-<DL><DT><DD><TT><PRE>
-name /adm/users
-</PRE></TT></DL>
-sets the name in the tag of the window to
-<TT>/adm/users</TT>.
-Other commands allow deleting the window, writing it to a file, and so on.
-Reading the
-<TT>ctl</TT>
-file recovers a fixed-format string containing 5 textual numbers&#173;the window
-identifier, the number of characters in the tag, the number in the body,
-and some status information&#173;followed by the text of the tag, up to a newline.
-</P>
-<P>
-The last file,
-<TT>event</TT>,
-is the most unusual.
-A program reading a window's
-<TT>event</TT>
-file is notified of all changes to the text of the window, and
-is asked to interpret all middle- and right-button actions.
-The data passed to the program is fixed-format and reports
-the source of the action (keyboard, mouse, external program, etc.),
-its location (what was pointed at or modified), and its nature (change,
-search, execution, etc.).
-This message, for example,
-<DL><DT><DD><TT><PRE>
-MI15 19 0 4 time
-</PRE></TT></DL>
-reports that actions of the mouse
-(<TT>M</TT>)
-inserted in the body (capital
-<TT>I</TT>)
-the 4 characters of
-<TT>time</TT>
-at character positions 15 through 19; the zero is a flag word.
-Programs may apply their own interpretations of searching and
-execution, or may simply reflect the events back to Acme,
-by writing them back to the
-<TT>event</TT>
-file, to have the default interpretation applied.
-Some examples of these ideas in action are presented below.
-</P>
-<P>
-Notice that changes to the window are reported
-after the fact; the program is told about them but is not required to act
-on them.  Compare this to a more traditional interface in which a program
-is told, for example, that a character has been typed on the keyboard and
-must then display and interpret it.
-Acme's style stems from the basic model of the system, in which any
-number of agents&#173;the keyboard, mouse, external programs
-writing to
-<TT>data</TT>
-or
-<TT>body</TT>,
-and so on&#173;may
-change the contents of a window.
-The style is efficient: many programs are content
-to have Acme do most of the work and act only when the editing is completed.
-An example is the Acme mail program, which can ignore the changes
-made to a message being composed
-and just read its body when asked to send it.
-A disadvantage is that some traditional ways of working are impossible.
-For example, there is no way `to turn off echo': characters appear on the
-screen and are read from there; no agent or buffer stands between
-the keyboard and the display.
-</P>
-<P>
-There are a couple of other files made available by Acme in its root directory
-rather than in the directory of each window.
-The text file
-<TT>/mnt/acme/index</TT>
-holds a list of all window names and numerical identifiers,
-somewhat analogous to the output of the
-<TT>ps</TT>
-command for processes.
-The most important, though, is
-<TT>/mnt/acme/new</TT>,
-a directory that makes new windows, similar to the
-<TT>clone</TT>
-directory in the Plan 9 network devices [Pres93].
-The act of opening any file in
-<TT>new</TT>
-creates a new Acme window; thus the shell command
-<DL><DT><DD><TT><PRE>
-grep -n var *.c &#62; /mnt/acme/new/body
-</PRE></TT></DL>
-places its output in the body of a fresh window.
-More sophisticated applications may open
-<TT>new/ctl</TT>,
-read it to discover the new window's identifier, and then
-open the window's other files in the numbered directory.
-</P>
-<H4>Acme-specific programs
-</H4>
-<P>
-Although Acme is in part an attempt to move beyond typescripts,
-they will probably always have utility.
-The first program written for Acme was therefore one
-to run a shell or other traditional interactive application
-in a window, the Acme analog of
-<TT>xterm</TT>.
-This program,
-<TT>win</TT>,
-has a simple structure:
-it acts as a two-way intermediary between Acme and the shell,
-cross-connecting the standard input and output of the shell to the
-text of the window.
-The style of interaction is modeled after
-<TT>mux</TT>
-[Pike88]: standard output is added to the window at the
-<I>output point;</I>
-text typed after the output point
-is made available on standard input when a newline is typed.
-After either of these actions, the output point is advanced.
-This is different from the working of a regular terminal,
-permitting cut-and-paste editing of an input line until the newline is typed.
-Arbitrary editing may be done to any text in the window.
-The implementation of
-<TT>win</TT>,
-using the
-<TT>event</TT>,
-<TT>addr</TT>,
-and
-<TT>data</TT>
-files, is straightforward.
-<TT>Win</TT>
-needs no code for handling the keyboard and mouse; it just monitors the
-contents of the window.  Nonetheless, it allows Acme's full editing to be
-applied to shell commands.
-The division of labor between
-<TT>win</TT>
-and
-<TT>Acme</TT>
-contrasted with
-<TT>xterm</TT>
-and the X server demonstrates how much work Acme handles automatically.
-<TT>Win</TT>
-is implemented by a single source file 560 lines long and has no graphics code.
-</P>
-<P>
-<TT>Win</TT>
-uses the middle and right buttons to connect itself in a consistent way
-with the rest of Acme.
-The middle button still executes commands, but in a style more suited
-to typescripts.  Text selected with the middle button is treated as if
-it had been typed after the output point, much as a similar feature in
-<TT>xterm</TT>
-or
-<TT>8&#189;</TT>,
-and therefore causes it to be `executed' by the application running in the window.
-Right button actions are reflected back to Acme but refer to the appropriate
-files because
-<TT>win</TT>
-places the name of the current directory in the tag of the window.
-If the shell is running, a simple shell function replacing the
-<TT>cd</TT>
-command can maintain the tag as the shell navigates the file system.
-This means, for example, that a right button click on a file mentioned in an
-<TT>ls</TT>
-listing opens the file within Acme.
-</P>
-<P>
-Another Acme-specific program is a mail reader that begins by presenting,
-in a window, a listing of the messages in the user's mailbox, one per line.
-Here the middle and right button actions are modified to refer to
-mail commands
-and messages, but the change feels natural.
-Clicking the right button on a line creates a new window and displays the
-message there, or, if it's already displayed, moves the mouse to that window.
-The metaphor is that the mailbox is a directory whose constituent files are messages.
-The mail program also places some relevant commands in the tag lines of
-the windows; for example, executing the word
-<TT>Reply</TT>
-in a message's tag creates a new window
-in which to compose a message to the sender of the original;
-<TT>Post</TT>
-then dispatches it.
-In such windows, the addressee is just a list of names
-on the first line of the body, which may be edited to add or change recipients.
-The program also monitors the mailbox, updating the `directory' as new messages
-arrive.
-</P>
-<P>
-The mail program is as simple as it sounds; all the work of interaction,
-editing, and management of the display is done by Acme.
-The only
-difficult sections of the 1200
-lines of code concern honoring the external protocols for managing
-the mailbox and connecting to
-<TT>sendmail</TT>.
-</P>
-<P>
-One of the things Acme does not provide directly is a facility like
-Sam's command language to enable actions such as global substitution;
-within Acme, all editing is done manually.
-It is easy, though, to write external programs for such tasks.
-In this, Acme comes closer to the original intent of Oberon:
-a directory,
-<TT>/acme/edit</TT>,
-contains a set of tools for repetitive editing and a template
-or `guide' file that gives examples
-of its use.  
-Acme's editing guide,
-<TT>/acme/edit/guide</TT>,
-looks like this:
-<DL><DT><DD><TT><PRE>
-e file | x '/regexp/' | c 'replacement'
-e file:'0,$' | x '/.*word.*\n/' | p -n
-e file | pipe command args ...
-</PRE></TT></DL>
-The syntax is reminiscent of Sam's command language, but here the individual
-one-letter commands are all stand-alone programs connected by pipes.
-Passed along the pipes are addresses, analogous to structural expressions
-in Sam terminology.
-The
-<TT>e</TT>
-command, unlike that of Sam, starts the process by generating the address
-(default dot, the highlighted selection) in the named files.
-The other commands are as in Sam:
-<TT>p</TT>
-prints the addressed text on standard output (the
-<TT>-n</TT>
-option is analogous to that of
-<TT>grep</TT>,
-useful in combination with the right mouse button);
-<TT>x</TT>
-matches a regular expression to the addressed (incoming) text,
-subdividing the text;
-<TT>c</TT>
-replaces the text; and so on.  Thus, global substitution throughout a file,
-which would be expressed in Sam as
-<DL><DT><DD><TT><PRE>
-0,$ x/regexp/ c/replacement/
-</PRE></TT></DL>
-in Acme's editor becomes
-<DL><DT><DD><TT><PRE>
-e 'file:0,$' | x '/regexp/' | c 'replacement'
-</PRE></TT></DL>
-</P>
-<P>
-To use the Acme editing commands, open
-<TT>/acme/edit/guide</TT>,
-use the mouse and keyboard to edit one of the commands to the right form,
-and execute it with the middle button.
-Acme's context rules find the appropriate binaries in
-<TT>/acme/edit</TT>
-rather than
-<TT>/bin</TT>;
-the effect is to turn
-<TT>/acme/edit</TT>
-into a toolbox containing tools and instructions (the guide file) for their use.
-In fact, the source for these tools is also there, in the directory
-<TT>/acme/edit/src</TT>.
-This setup allows some control of the file name space for binary programs;
-not only does it group related programs, it permits the use of common
-names for uncommon jobs.  For example, the single-letter names would
-be unwise in a directory in everyone's search path; here they are only
-visible when running editing commands.
-</P>
-<P>
-In Oberon,
-such a collection would be called a
-<I>tool</I>
-and would consist
-of a set of entry points in a module and a menu-like piece of text containing
-representative commands that may be edited to suit and executed.
-There is, in fact, a tool called
-<TT>Edit</TT>
-in Oberon.
-To provide related functionality,
-Acme exploits the directory and file structure of the underlying
-system, rather than the module structure of the language;
-this fits well with Plan 9's
-file-oriented philosophy.
-Such tools are central to the working of Oberon but they are
-less used in Acme, at least so far.
-The main reason is probably that Acme's program interface permits
-an external program to remain executing in the background, providing
-its own commands as needed (for example, the
-<TT>Reply</TT>
-command in the mail program); Oberon uses tools to
-implement such services because its must invoke
-a fresh program for each command.
-Also,
-Acme's better integration allows more
-basic functions to be handled internally; the right mouse button
-covers a lot of the basic utility of the editing tools in Oberon.
-Nonetheless, as more applications are written for Acme,
-many are sure to take this Oberon tool-like form.
-</P>
-<H4>Comparison with other systems
-</H4>
-<P>
-Acme's immediate ancestor is Help [Pike92], an experimental system written
-a few years ago as a first try at exploring some of Oberon's ideas
-in an existing operating system.
-Besides much better engineering, Acme's advances over Help
-include the actions of the right button (Help had nothing comparable),
-the ability to connect long-running programs to the user interface
-(Help had no analog of the
-<TT>event</TT>
-file),
-and the small but important change to split command output into
-windows labeled with the directory in which the commands run.
-</P>
-<P>
-Most of Acme's style, however, derives from the user interface and window
-system of Oberon [Wirt89, Reis91].
-Oberon includes a programming language and operating system,
-which Acme instead borrows from an existing system, Plan 9.
-When I first saw Oberon, in 1988, I was struck by the
-simplicity of its user interface, particularly its lack of menus
-and its elegant use of multiple mouse buttons.
-The system seemed restrictive, though&#173;single process,
-single language, no networking, event-driven programming&#173;and
-failed to follow through on some of its own ideas.
-For example, the middle mouse button had to be pointed accurately and
-the right button was essentially unused.
-Acme does follow through:
-to the basic idea planted by Oberon, it adds
-the ability to run on different operating systems and hardware,
-connection to existing applications including
-interactive ones such as shells and debuggers,
-support for multiple processes,
-the right mouse button's features,
-the default actions and context-dependent properties
-of execution and searching,
-and a host of little touches such as moving the mouse cursor that make the system 
-more pleasant.
-At the moment, though, Oberon does have one distinct advantage: it incorporates
-graphical programs well into its model, an issue Acme has not yet faced.
-</P>
-<P>
-Acme shares with the Macintosh a desire to use the mouse well and it is
-worth comparing the results.
-The mouse on the Macintosh has a single button, so menus are essential
-and the mouse must frequently move a long way
-to reach the appropriate function.
-An indication that this style has trouble is that applications provide
-keyboard sequences to invoke menu selections and users often prefer them.
-A deeper comparison is that the Macintosh uses pictures where Acme uses text.
-In contrast to pictures, text can be edited quickly, created on demand,
-and fine-tuned to the job at hand; consider adding an option to a command.
-It is also self-referential; Acme doesn't need menus because any text can be
-in effect a menu item.
-The result is that, although a Macintosh screen is certainly prettier and probably
-more attractive, especially to beginners, an Acme screen is more dynamic
-and expressive, at least for programmers and experienced users.
-</P>
-<P>
-For its role in the overall system,
-Acme most resembles EMACS [Stal93].
-It is tricky to compare Acme to EMACS, though, because there are
-many versions of EMACS and, since it is fully programmable, EMACS
-can in principle do anything Acme does.
-Also, Acme is much younger and therefore has not
-had the time to acquire as many features.
-The issue therefore is less what the systems can be programmed to do than
-how they are used.
-The EMACS versions that come closest to Acme's style are those that
-have been extended to provide a programming environment, usually
-for a language such as LISP [Alle92, Lucid92].
-For richness of the existing interface, these EMACS versions are certainly superior to Acme.
-On the other hand, Acme's interface works equally well already for a variety
-of languages; for example, one of its most enthusiastic users works almost
-exclusively in Standard ML, a language nothing like C.
-</P>
-<P>
-Where Acme excels is in the smoothness of its interface.
-Until recently, EMACS did not support the mouse especially well,
-and even with the latest version providing features such as `extents'
-that can be programmed to behave much like Acme commands,
-many users don't bother to upgrade.
-Moreover, in the versions that provide extents, 
-most EMACS packages don't take advantage of them.
-</P>
-<P>
-The most important distinction is just that
-EMACS is fundamentally keyboard-based, while
-Acme is mouse-based.
-</P>
-<P>
-People who try Acme find it hard to go back to their previous environment.
-Acme automates so much that to return to a traditional interface
-is to draw attention to the extra work it requires.
-</P>
-<H4>Concurrency in the implementation
-</H4>
-<P>
-Acme is about 8,000 lines of code in Alef, a concurrent object-oriented language syntactically similar to C [Alef].
-Acme's structure is a set of communicating
-processes in a single address space.
-One subset of the processes drives the display and user interface,
-maintaining the windows; other processes forward mouse and keyboard
-activity and implement the file server interface for external programs.
-The language and design worked out well;
-as explained elsewhere [Pike89, Gans93, Reppy93],
-user interfaces built with concurrent systems
-can avoid the clumsy
-top-level event loop typical of traditional interactive systems.
-</P>
-<P>
-An example of the benefits of the multi-process style
-is the management of the state of open
-files held by clients of the file system interface.
-The problem is that some I/O requests,
-such as reading the
-<TT>event</TT>
-file, may block if no data is available, and the server must
-maintain the state of (possibly many) requests until data appears.
-For example,
-in
-<TT>8&#189;</TT>,
-a single-process window system written in C, pending requests were queued in
-a data structure associated with each window.
-After activity in the window that might complete pending I/O,
-the data structure was scanned for requests that could now finish.
-This structure did not fit well with the rest of the program and, worse,
-required meticulous effort
-to guarantee correct behavior under all conditions
-(consider raw mode, reads of partial lines, deleting a window,
-multibyte characters, etc.).
-</P>
-<P>
-Acme instead creates a new dedicated process
-for each I/O request.
-This process coordinates with the rest of the system
-using Alef's synchronous communication;
-its state implicitly encodes the state of
-the I/O request and obviates the need for queuing.
-The passage of the request through Acme proceeds as follows.
-</P>
-<P>
-Acme contains a file server process, F, that executes a
-<TT>read</TT>
-system call to receive a Plan 9 file protocol (9P) message from the client [AT&amp;T92].
-The client blocks until Acme answers the request.
-F communicates with an allocation process, M,
-to acquire an object of type
-<TT>Xfid</TT>
-(`executing fid'; fid is a 9P term)
-to hold the request.
-M sits in a loop (reproduced in Figure 2) waiting for either a request for
-a new
-<TT>Xfid</TT>
-or notification that an existing one has finished its task.
-When an
-<TT>Xfid</TT>
-is created, an associated process, X,
-is also made.
-M queues idle
-<TT>Xfids</TT>,
-allocating new ones only when the list is empty.
-Thus, there is always a pool of
-<TT>Xfids</TT>,
-some executing, some idle.
-</P>
-<P>
-The
-<TT>Xfid</TT>
-object contains a channel,
-<TT>Xfid.c</TT>,
-for communication with its process;
-the unpacked message; and some associated functions,
-mostly corresponding to 9P messages such as
-<TT>Xfid.write</TT>
-to handle a 9P write request.
-</P>
-<P>
-The file server process F parses the message to see its nature&#173;open,
-close, read, write, etc.  Many messages, such as directory
-lookups, can be handled immediately; these are responded to directly
-and efficiently
-by F without invoking the
-<TT>Xfid</TT>,
-which is therefore maintained until the next message.
-When a message, such as a write to the display, requires the attention
-of the main display process and interlocked access to its data structures,
-F enables X
-by sending a function pointer on
-<TT>Xfid.c</TT>.
-For example, if the message is a write, F executes
-<DL><DT><DD><TT><PRE>
-x-&#62;c &#60;-= Xfid.write;
-</PRE></TT></DL>
-which sends
-the address of
-<TT>Xfid.write</TT>
-on
-<TT>Xfid.c</TT>,
-waking up X.
-</P>
-<P>
-The
-<TT>Xfid</TT>
-process, X, executes a simple loop:
-<DL><DT><DD><TT><PRE>
-void
-Xfid.ctl(Xfid *x)
-{
-    for(;;){
-        (*&#60;-x-&#62;c)(x);      /* receive and execute message */
-        bflush();          /* synchronize bitmap display */
-        cxfidfree &#60;-= x;   /* return to free list */
-    }
-}
-</PRE></TT></DL>
-Thus X
-will wake up with the address of a function to call (here
-<TT>Xfid.write</TT>)
-and execute it; once that completes, it returns itself to the pool of
-free processes by sending its address back to the allocator.
-</P>
-<P>
-Although this sequence may seem complicated, it is just a few lines
-of code and is in fact far simpler
-than the management of the I/O queues in
-<TT>8&#189;</TT>.
-The hard work of synchronization is done by the Alef run time system.
-Moreover, the code worked the first time, which cannot be said for the code in
-<TT>8&#189;</TT>.
-</P>
-<H4>Undo
-</H4>
-<P>
-Acme provides a general undo facility like that of Sam, permitting
-textual changes to be unwound arbitrarily.
-The implementation is superior to Sam's, though,
-with much higher performance and the ability to `redo' changes.
-</P>
-<P>
-Sam uses
-a multi-pass algorithm that builds
-a transcript of changes to be made simultaneously
-and then executes them atomically.
-This was thought necessary because the elements of a repetitive
-command such as a global substitution should all be applied to the same
-initial file and implemented simultaneously; forming the complete
-transcript before executing any of the changes avoids the
-cumbersome management of addresses in a changing file.
-Acme, however, doesn't have this problem; global substitution
-is controlled externally and may be made incrementally by exploiting
-an observation: if the changes are sorted in address order and
-executed in reverse, changes will not invalidate the addresses of
-pending changes.
-</P>
-<P>
-Acme therefore avoids the initial transcript.  Instead, changes are applied
-directly to the file, with an undo transcript recorded in a separate list.
-For example, when text is added to a window, it is added directly and a record
-of what to delete to restore the state is appended to the undo list.
-Each undo action and the file are marked with a sequence number;
-actions with the same sequence number are considered a unit
-to be undone together.
-The invariant state of the structure
-is that the last action in the undo list applies to the current state of the file,
-even if that action is one of a related set from, for example, a global substitute.
-(In Sam, a related set of actions needed to be undone simultaneously.)
-To undo an action, pop the last item on the undo list, apply it to the file,
-revert it, and append it to a second, redo list.
-To redo an action, do the identical operation with the lists interchanged.
-The expensive operations occur
-only when actually undoing; in normal editing the overhead is minor.
-For example, Acme reads files about seven times faster than Sam, partly
-because of this improvement and partly because of a cleaner implementation.
-</P>
-<P>
-Acme uses a temporary file to hold the text, keeping in memory only the
-visible portion, and therefore can edit large files comfortably
-even on small-memory machines such as laptops.
-</P>
-<H4>Future
-</H4>
-<P>
-Acme is still under development.
-Some things are simply missing.
-For example, Acme should support non-textual graphics, but this is being
-deferred until it can be done using a new graphics model being developed
-for Plan 9.  Also, it is undecided how Acme's style of interaction should best be
-extended to graphical applications.
-On a smaller scale, although the system feels smooth and comfortable,
-work continues to tune the heuristics and
-try new ideas for the user interface.
-</P>
-<P>
-There need to be more programs that use Acme.  Browsers for
-Usenet and AP News articles, the Oxford English Dictionary, and other
-such text sources exist, but more imaginative applications will
-be necessary to prove that Acme's approach is viable.
-One that has recently been started is an interface to the debugger Acid [Wint94],
-although it is still
-unclear what form it will ultimately take.
-</P>
-<P>
-Acme shows that it is possible to make a user interface a stand-alone component
-of an interactive environment.  By absorbing more of the interactive
-functionality than a simple window system, Acme off-loads much of the
-computation from its applications, which helps keep them small and
-consistent in their interface.  Acme can afford to dedicate
-considerable effort to making that interface as good as possible; the result
-will benefit the entire system.
-</P>
-<P>
-Acme is complete and useful enough to attract users.
-Its comfortable user interface,
-the ease with which it handles multiple tasks and
-programs in multiple directories,
-and its high level of integration
-make it addictive.
-Perhaps most telling,
-Acme shows that typescripts may not be the most
-productive interface to a time-sharing system.
-</P>
-<H4>Acknowledgements
-</H4>
-<P>
-Howard Trickey, Acme's first user, suffered buggy versions gracefully and made
-many helpful suggestions.  Chris Fraser provided the necessary insight for the Acme editing
-commands.
-</P>
-<H4>References
-</H4>
-<br>&#32;<br>
-[Alef] P. Winterbottom,
-``Alef Language Reference Manual'',
-Plan 9 Programmer's Manual,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1992;
-revised in this volume.
-<br>
-[Alle92]
-Allegro Common Lisp user Guide, Vol 2, 
-Chapter 14, "The Emacs-Lisp Interface". 
-March 1992.
-<br>
-[AT&amp;T92] Plan 9 Programmer's manual, Murray Hill, New Jersey, 1992.
-<br>
-[Far89] Far too many people, XTERM(1), Massachusetts Institute of Technology, 1989.
-<br>
-[Gans93] Emden R. Gansner and John H. Reppy,  ``A Multi-threaded Higher-order User Interface Toolkit'', in
-Software Trends, Volume 1,
-User Interface Software,
-Bass and Dewan (Eds.),
-John Wiley &amp; Sons 1993,
-pp. 61-80.
-<br>
-[Lucid92] Richard Stallman and Lucid, Inc.,
-Lucid GNU EMACS Manual,
-March 1992.
-<br>
-[Pike87] Rob Pike, ``The Text Editor <TT>sam</TT>'', Softw. - Pract. and Exp., Nov 1987, Vol 17 #11, pp. 813-845; reprinted in this volume.
-<br>
-[Pike88] Rob Pike, ``Window Systems Should Be Transparent'', Comp. Sys., Summer 1988, Vol 1 #3, pp. 279-296.
-<br>
-[Pike89] Rob Pike, ``A Concurrent Window System'', Comp. Sys., Spring 1989, Vol 2 #2, pp. 133-153.
-<br>
-[PPTTW93] Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom, ``The Use of Name Spaces in Plan 9'',
-Op. Sys. Rev.,  Vol. 27, No. 2, April 1993, pp. 72-76,
-reprinted in this volume.
-<br>
-[Pike91] Rob Pike, ``8&#189;, the Plan 9 Window System'', USENIX Summer Conf. Proc., Nashville, June, 1991, pp. 257-265,
-reprinted in this volume.
-<br>
-[Pike92] Rob Pike, ``A Minimalist Global User Interface'', Graphics Interface '92 Proc., Vancouver, 1992, pp. 282-293.  An earlier version appeared under the same title in USENIX Summer Conf. Proc., Nashville, June, 1991, pp. 267-279.
-<br>
-[Pike93] Rob Pike and Ken Thompson, ``Hello World or &#191;ALPHA&#191;&#191;MU&#191;&#191;ALPHA &#191;&#191;&#191;MUEPSILON or
-&#191;&#191;&#191;&#191;&#191; &#191;&#191;'', USENIX Winter Conf. Proc., San Diego, 1993, pp. 43-50,
-reprinted in this volume.
-<br>
-[Pres93] Dave Presotto and Phil Winterbottom, ``The Organization of Networks in Plan 9'', Proc. Usenix Winter 1993, pp. 271-287, San Diego, CA,
-reprinted in this volume.
-<br>
-[Reis91] Martin Reiser, <I>The Oberon System,</I> Addison Wesley, New York, 1991.
-<br>
-[Reppy93] John H. Reppy,
-``CML: A higher-order concurrent language'', Proc. SIGPLAN'91 Conf. on Programming, Lang. Design and Impl., June, 1991, pp. 293-305.
-<br>
-[Sche86] Robert W. Scheifler and Jim Gettys,
-``The X Window System'',
-ACM Trans. on Graph., Vol 5 #2, pp. 79-109.
-<br>
-[Stal93] Richard Stallman,
-Gnu Emacs Manual, 9th edition, Emacs version 19.19,
-MIT.
-<br>
-[Swei86] Daniel Sweinhart, Polle Zellweger, Richard Beach, and Robert Hagmann,
-``A Structural View of the Cedar Programming Environment'',
-ACM Trans. Prog. Lang. and Sys., Vol. 8, No. 4, pp. 419-490, Oct. 1986.
-<br>
-[Wint94], Philip Winterbottom, ``Acid: A Debugger based on a Language'', USENIX Winter Conf. Proc., San Francisco, CA, 1993,
-reprinted in this volume.
-<br>
-[Wirt89] N. Wirth and J. Gutknecht, ``The Oberon System'', Softw. - Prac. and Exp., Sep 1989, Vol 19 #9, pp 857-894.
-
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 532
sys/doc/ape.html

@@ -1,532 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>APE &#173; The ANSI/POSIX Environment
-</H1>
-<DL><DD><I>Howard Trickey<br>
-howard@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<P>
-When a large or frequently-updated program must be ported
-to or from Plan 9, the ANSI/POSIX environment known as APE can be useful.
-APE combines the set of headers and object code libraries specified by
-the ANSI C standard (ANSI X3.159-1989) with the POSIX operating system
-interface standard (IEEE 1003.1-1990, ISO 9945-1), the part of POSIX
-defining the basic operating system functions.
-Using APE will cause slower compilation and marginally slower execution speeds,
-so if the importing or exporting happens only infrequently, due consideration
-should be given to using the usual Plan 9 compilation environment instead.
-Another factor to consider is that the Plan 9 header organization is
-much simpler to remember and use.
-</P>
-<P>
-There are some aspects of required POSIX behavior that are impossible or
-very hard to simulate in Plan 9.  They are described below.
-Experience has shown, however, that the simulation is adequate for the
-vast majority of programs.  A much more common problem is that
-many programs use functions or headers not defined by POSIX.
-APE has some extensions to POSIX to help in this regard.
-Extensions must be explicitly enabled with an appropriate
-<TT>#define</TT>,
-in order that the APE environment be a good aid for testing
-ANSI/POSIX compliance of programs.
-</P>
-<H4>Pcc
-</H4>
-<P>
-The
-<TT>pcc</TT>
-command acts as a front end to the Plan 9 C compilers and loaders.
-It runs an ANSI C preprocessor over source files, using the APE
-headers to satisfy
-<TT>#include &lt;</TT><I>file</I><TT>&gt;</TT>
-directives; then it runs a Plan 9 C compiler; finally, it may load
-with APE libraries to produce an executable program.
-The document
-<I>How to Use the Plan 9 C Compiler</I>
-explains how environment variables are used by convention to
-handle compilation for differing architectures.
-The environment variable
-<TT></TT><I>objtype</I>
-controls which Plan 9 compiler and loader are used by
-</TT><TT>pcc</TT>,
-as well as the location of header and library files.
-For example, if
-</TT><TT></TT><TT>objtype</TT>
-is
-<TT>mips</TT>,
-then
-<TT>pcc</TT>
-has
-<TT>cpp</TT>
-look for headers in
-<TT>/mips/include/ape</TT>
-followed by
-<TT>/sys/include/ape</TT>;
-then
-<TT>pcc</TT>
-uses
-<TT>vc</TT>
-to create
-<TT>.v</TT>
-object files;
-finally,
-<TT>vl</TT>
-is used to create an executable using libraries in
-<TT>/mips/lib/ape</TT>.
-</P>
-<H4>Psh and Cc
-</H4>
-<P>
-The
-<TT>pcc</TT>
-command is intended for uses where the source code is
-ANSI/POSIX, but the programs are built in the usual Plan 9
-manner &#173; with
-<TT>mk</TT>
-and producing object files with names ending in
-<TT>.v</TT>,
-etc.
-Sometimes it is best to use the standard POSIX
-<TT>make</TT>
-and
-<TT>cc</TT>
-(which produces object files with names ending in
-<TT>.o</TT>,
-and automatically calls the loader unless
-<TT>-c</TT>
-is specified).
-Under these circumstances, execute the command:
-<DL><DT><DD><TT><PRE>
-<TT>ape/psh</TT>
-</PRE></TT></DL>
-This starts a POSIX shell, with an environment that
-includes the POSIX commands
-<TT>ar89</TT>,
-<TT>c89</TT>,
-<TT>cc</TT>,
-<TT>basename</TT>,
-<TT>dirname</TT>,
-<TT>expr</TT>,
-<TT>false</TT>,
-<TT>grep</TT>,
-<TT>kill</TT>,
-<TT>make</TT>,
-<TT>rmdir</TT>,
-<TT>sed</TT>,
-<TT>sh</TT>,
-<TT>stty</TT>,
-<TT>true</TT>,
-<TT>uname</TT>,
-and
-<TT>yacc</TT>.
-There are also a few placeholders for commands that cannot be
-implemented in Plan 9:
-<TT>chown</TT>,
-<TT>ln</TT>,
-and
-<TT>umask</TT>.
-</P>
-<P>
-The
-<TT>cc</TT>
-command accepts the options mandated for
-the POSIX command
-<TT>c89</TT>,
-as specified in the C-Language Development Utilities Option
-annex of the POSIX Shell and Utilities standard.
-It also accepts the following nonstandard options:
-<TT>-v</TT>
-for echoing the commands for each pass to stdout;
-<TT>-A</TT>
-to turn on ANSI prototype warnings;
-<TT>-S</TT>
-to leave assembly language in
-<I>file</I>.s;
-<TT>-Wp,</TT><I>args</I><TT></TT>
-to pass
-<I>args</I>
-to the
-<TT>cpp</TT>;
-<TT>-W0,</TT><I>args</I><TT></TT>
-to pass
-<I>args</I>
-to 2c, etc.;
-and
-<TT>-Wl,</TT><I>args</I><TT></TT>
-to pass
-<I>args</I>
-to 2l, etc.
-</P>
-<P>
-The
-<TT>sh</TT>
-command is pdksh, a mostly POSIX-compliant public domain Korn Shell.
-The Plan 9 implementation does not include
-the emacs and vi editing modes.
-</P>
-<P>
-The
-<TT>stty</TT>
-command only has effect if the
-<TT>ape/ptyfs</TT>
-command has been started to interpose a pseudo-tty interface
-between
-<TT>/dev/cons</TT>
-and the running command.
-None of the distributed commands do this automatically.
-</P>
-<H4>Symbols
-</H4>
-<P>
-The C and POSIX standards require that certain symbols be
-defined in headers.
-They also require that certain other classes of symbols not
-be defined in the headers, and specify certain other
-symbols that may be defined in headers at the discretion
-of the implementation.
-POSIX defines
-<I>feature test macros</I>,
-which are preprocessor symbols beginning with an underscore
-and then a capital letter;  if the program
-<TT>#defines</TT>
-a feature test macro before the inclusion of any headers,
-then it is requesting that certain symbols be visible in the headers.
-The most important feature test macro is
-<TT>_POSIX_SOURCE</TT>:
-when it is defined, exactly the symbols required by POSIX are
-visible in the appropriate headers.
-Consider
-<TT>&lt;signal.h&gt;</TT>
-for example:
-ANSI defines some names that must be defined in
-<TT>&lt;signal.h&gt;</TT>,
-but POSIX defines others, such as
-<TT>sigset_t</TT>,
-which are not allowed according to ANSI.
-The solution is to make the additional symbols visible only when
-<TT>_POSIX_SOURCE</TT>
-is defined.
-</P>
-<P>
-To export a program, it helps to know whether it fits
-in one of the following categories:
-</P>
-<DL COMPACT>
-<DT>1.<DD>
-Strictly conforming ANSI C program. It only uses features of the language,
-libraries, and headers explicitly required by the C standard.  It does not
-depend on unspecified, undefined, or implementation-dependent behavior,
-and does not exceed any minimum implementation limit.
-<DT>2.<DD>
-Strictly conforming POSIX program. Similar, but for the POSIX standard as well.
-<DT>3.<DD>
-Some superset of POSIX, with extensions.  Each extension
-is selected by a feature test macro, so it is clear which extensions
-are being used.
-</dl>
-<P>
-With APE, if headers are always included to declare any library functions
-used, then the set of feature test macros defined by a program will
-show which of the above categories the program is in.
-To accomplish this, no symbol is defined in a header if it is not required
-by the C or POSIX standard, and those required by the POSIX standard
-are protected by
-<TT>#ifdef _POSIX_SOURCE</TT>.
-For example,
-<TT>&lt;errno.h&gt;</TT>
-defines
-<TT>EDOM</TT>,
-<TT>ERANGE</TT>,
-and
-<TT>errno</TT>,
-as required by the C standard.
-The C standard allows more names beginning with
-<TT>E</TT>,
-but our header defines only those unless
-<TT>_POSIX_SOURCE</TT>
-is defined, in which case the symbols required by POSIX are also defined.
-This means that a program that uses
-<TT>ENAMETOOLONG</TT>
-cannot masquerade as a strictly conforming ANSI C program.
-</P>
-<P>
-<TT>Pcc</TT>
-and
-<TT>cc</TT>
-do not predefine any preprocessor symbols except those required by
-the ANSI C standard:
-<TT>__STDC__</TT>,
-<TT>__LINE__</TT>,
-<TT>__FILE__</TT>,
-<TT>__DATE__</TT>,
-and
-<TT>__TIME__</TT>.
-Any others must be defined in the program itself or by using
-<TT>-D</TT>
-on the command line.
-</P>
-<H4>Extensions
-</H4>
-<P>
-The discipline enforced by putting only required
-names in the headers is useful for exporting programs,
-but it gets in the way when importing programs.
-The compromise is to allow additional symbols in headers,
-additional headers, and additional library functions,
-but only under control of extension feature test macros.
-The following extensions are provided; unless otherwise
-specified, the additional library functions are in the
-default APE library.
-</P>
-<DL COMPACT>
-<DT>   -<DD>
-<TT>_LIBG_EXTENSION</TT>.
-This allows the use of the Plan 9 graphics library.
-The functions are as described in the Plan 9 manual (see
-<A href="/magic/man2html/2/graphics"><I>graphics</I>(2))
-</A>except that
-<TT>div</TT>
-had to be renamed
-<TT>ptdiv</TT>.
-Include the
-<TT>&lt;libg.h&gt;</TT>
-header to declare the needed types and functions.
-<DT>   -<DD>
-<TT>_LIMITS_EXTENSION</TT>.
-POSIX does not require that names such as
-<TT>PATH_MAX</TT>
-and
-<TT>OPEN_MAX</TT>
-be defined in
-<TT>&lt;limits.h&gt;</TT>,
-but many programs assume they are defined there.
-If
-<TT>_LIMITS_EXTENSION</TT>
-is defined, those names will all be defined when
-<TT>&lt;limits.h&gt;</TT>
-is included.
-<DT>   -<DD>
-<TT>_BSD_EXTENSION</TT>.
-This extension includes not only Berkeley Unix routines,
-but also a grab bag of other miscellaneous routines often
-found in Unix implementations.
-The extension allows the inclusion of any of:
-<TT>&lt;bsd.h&gt;</TT>
-for
-<TT>bcopy()</TT>,
-<TT>bcmp()</TT>,
-and similar Berkeley functions;
-<TT>&lt;netdb.h&gt;</TT>
-for
-<TT>gethostbyname()</TT>,
-etc.,
-and associated structures;
-<TT>&lt;select.h&gt;</TT>
-for the Berkeley
-<TT>select</TT>
-function and associated types and macros
-for dealing with multiple input sources;
-<TT>&lt;sys/ioctl.h&gt;</TT>
-for the
-<TT>ioctl</TT>
-function (minimally implemented);
-<TT>&lt;sys/param.h&gt;</TT>
-for
-<TT>NOFILES_MAX</TT>;
-<TT>&lt;sys/pty.h&gt;</TT>
-for pseudo-tty support via the
-<TT>ptsname(int)</TT>
-and
-<TT>ptmname(int)</TT>
-functions;
-<TT>&lt;sys/resource.h&gt;</TT>;
-<TT>&lt;sys/socket.h&gt;</TT>
-for socket structures, constants, and functions;
-<TT>&lt;sys/time.h&gt;</TT>
-for definitions of the
-<TT>timeval</TT>
-and
-<TT>timezone</TT>
-structures;
-and
-<TT>&lt;sys/uio.h&gt;</TT>
-for the
-<TT>iovec</TT>
-structure and the
-<TT>writev</TT>
-and
-<TT>readv</TT>
-functions used for scatter/gather I/O.
-Defining
-<TT>_BSD_EXTENSION</TT>
-also enables various extra definitions in
-<TT>&lt;ctype.h&gt;</TT>,
-<TT>&lt;signal.h&gt;</TT>,
-<TT>&lt;stdio.h&gt;</TT>,
-<TT>&lt;unistd.h&gt;</TT>,
-<TT>&lt;sys/stat.h&gt;</TT>,
-and
-<TT>&lt;sys/times.h&gt;</TT>.
-<DT>   -<DD>
-<TT>_NET_EXTENSION</TT>.
-This extension allows inclusion of
-<TT>&lt;libnet.h&gt;</TT>,
-which defines the networking functions described in the Plan 9 manual page
-<A href="/magic/man2html/2/dial"><I>dial</I>(2).
-</A><DT>   -<DD>
-<TT>_REGEXP_EXTENSION</TT>.
-This extension allows inclusion of
-<TT>&lt;regexp.h&gt;</TT>,
-which defines the regular expression matching functions described
-in the Plan 9 manual page
-<A href="/magic/man2html/2/regexp"><I>regexp</I>(2).
-</A><DT>   -<DD>
-<TT>_RESEARCH_SOURCE</TT>.
-This extension enables a small library of functions from the Tenth Edition Unix
-Research System (V10).
-These functions and the types needed to use them are all defined in the
-<TT>&lt;libv.h&gt;</TT>
-header.
-The provided functions are:
-<TT>srand</TT>,
-<TT>rand</TT>,
-<TT>nrand</TT>,
-<TT>lrand</TT>,
-and
-<TT>frand</TT>
-(better random number generators);
-<TT>getpass</TT>,
-<TT>tty_echoon</TT>,
-<TT>tty_echooff</TT>
-(for dealing with the common needs for mucking with terminal
-characteristics);
-<TT>min</TT>
-and
-<TT>max</TT>;
-<TT>nap</TT>;
-and
-<TT>setfields</TT>,
-<TT>getfields</TT>,
-and
-<TT>getmfields</TT>
-(for parsing a line into fields).
-See the Research Unix System Programmer's Manual, Tenth Edition, for a description
-of these functions.
-</dl>
-<H4>Common Problems
-</H4>
-<P>
-Some large systems, including X11, have been ported successfully
-to Plan 9 using APE
-(the X11 port is not included in the distribution, however,
-because supporting it properly is too big a job).
-The problems encountered fall into three categories:
-(1) non-ANSI C/POSIX features used; (2) inadequate simulation of POSIX functions;
-and (3) compiler/loader bugs.
-By far the majority of problems are in the first category.
-</P>
-<P>
-POSIX is just starting to be a target for programmers.
-Most existing code is written to work with one or both of a BSD or a System V Unix.
-System V is fairly close to POSIX, but there are some differences.
-Also, many System V systems have imported some BSD features that are
-not part of POSIX.
-A good strategy for porting external programs is to first try using
-<TT>CFLAGS=-D_POSIX_SOURCE</TT>;
-if that doesn't work, try adding
-<TT>_D_BSD_EXTENSION</TT>
-and perhaps include
-<TT>&lt;bsd.h&gt;</TT>
-in source files.
-Here are some solutions to problems that might remain:
-</P>
-<DL COMPACT>
-<DT>   -<DD>
-Third (environment) argument to
-<TT>main</TT>.
-Use the
-<TT>environ</TT>
-global instead.
-<DT>   -<DD>
-<TT>OPEN_MAX</TT>,
-<TT>PATH_MAX</TT>,
-etc., assumed in
-<TT>&lt;limits.h&gt;</TT>.
-Rewrite to call
-<TT>sysconf</TT>
-or define
-<TT>_LIMITS_EXTENSION</TT>.
-<DT>   -<DD>
-<TT>&lt;varargs.h&gt;</TT>.
-Rewrite to use
-<TT>&lt;stdarg.h&gt;</TT>.
-</dl>
-<P>
-The second class of problems has to do with inadequacies in the Plan 9
-simulation of POSIX functions.
-These shortcomings have rarely gotten in the way
-(except, perhaps, for the
-<TT>link</TT>
-problem).
-</P>
-<DL COMPACT>
-<DT>   -<DD>
-Functions for setting the userid, groupid, effective userid and effective groupid
-do not do anything useful.  The concept is impossible to simulate in Plan 9.
-<TT>Chown</TT>
-also does nothing.
-<DT>   -<DD>
-<TT>execlp</TT>
-and the related functions do not look at the
-<TT>PATH</TT>
-environment variable.  They just try the current directory and
-<TT>/bin</TT>
-if the pathname is not absolute.
-<DT>   -<DD>
-Advisory locking via
-<TT>fcntl</TT>
-is not implemented.
-<DT>   -<DD>
-<TT>isatty</TT>
-is hard to do correctly.
-The approximation used is only sometimes correct.
-<DT>   -<DD>
-<TT>link</TT>
-always fails.
-<DT>   -<DD>
-With
-<TT>open</TT>,
-the
-<TT>O_NOCTTY</TT>
-option has no effect.
-The concept of a controlling tty is foreign to Plan 9.
-<DT>   -<DD>
-<TT>setsid</TT>
-forks the name space and note group,
-which is only approximately the right behavior.
-<DT>   -<DD>
-The functions dealing with stacking signals,
-<TT>sigpending</TT>,
-<TT>sigprocmask</TT>
-and
-<TT>sigsuspend</TT>,
-do not work.
-<DT>   -<DD>
-<TT>umask</TT>
-has no effect, as there is no such concept in Plan 9.
-<DT>   -<DD>
-code that does
-<TT>getenv("HOME")</TT>
-should be changed to
-<TT>getenv("home")</TT>
-on Plan 9.
-</dl>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1358
sys/doc/asm.html

@@ -1,1358 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>A Manual for the Plan 9 assembler
-</H1>
-<DL><DD><I>Rob Pike<br>
-rob@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Machines
-</H4>
-<P>
-There is an assembler for each of the MIPS, SPARC, Intel 386,
-Intel 960, AMD 29000, Motorola 68020 and 68000, Motorola Power PC, DEC Alpha, and Acorn ARM.
-The 68020 assembler,
-<TT>2a</TT>,
-is the oldest and in many ways the prototype.
-The assemblers are really just variations of a single program:
-they share many properties such as left-to-right assignment order for
-instruction operands and the synthesis of macro instructions
-such as
-<TT>MOVE</TT>
-to hide the peculiarities of the load and store structure of the machines.
-To keep things concrete, the first part of this manual is
-specifically about the 68020.
-At the end is a description of the differences among
-the other assemblers.
-</P>
-<P>
-The document, ``How to Use the Plan 9 C Compiler'', by Rob Pike,
-is a prerequisite for this manual.
-</P>
-<H4>Registers
-</H4>
-<P>
-All pre-defined symbols in the assembler are upper-case.
-Data registers are
-<TT>R0</TT>
-through
-<TT>R7</TT>;
-address registers are
-<TT>A0</TT>
-through
-<TT>A7</TT>;
-floating-point registers are
-<TT>F0</TT>
-through
-<TT>F7</TT>.
-</P>
-<P>
-A pointer in
-<TT>A6</TT>
-is used by the C compiler to point to data, enabling short addresses to
-be used more often.
-The value of
-<TT>A6</TT>
-is constant and must be set during C program initialization
-to the address of the externally-defined symbol
-<TT>a6base</TT>.
-</P>
-<P>
-The following hardware registers are defined in the assembler; their
-meaning should be obvious given a 68020 manual:
-<TT>CAAR</TT>,
-<TT>CACR</TT>,
-<TT>CCR</TT>,
-<TT>DFC</TT>,
-<TT>ISP</TT>,
-<TT>MSP</TT>,
-<TT>SFC</TT>,
-<TT>SR</TT>,
-<TT>USP</TT>,
-and
-<TT>VBR</TT>.
-</P>
-<P>
-The assembler also defines several pseudo-registers that
-manipulate the stack:
-<TT>FP</TT>,
-<TT>SP</TT>,
-and
-<TT>TOS</TT>.
-<TT>FP</TT>
-is the frame pointer, so
-<TT>0(FP)</TT>
-is the first argument,
-<TT>4(FP)</TT>
-is the second, and so on.
-<TT>SP</TT>
-is the local stack pointer, where automatic variables are held
-(SP is a pseudo-register only on the 68020);
-<TT>0(SP)</TT>
-is the first automatic, and so on as with
-<TT>FP</TT>.
-Finally,
-<TT>TOS</TT>
-is the top-of-stack register, used for pushing parameters to procedures,
-saving temporary values, and so on.
-</P>
-<P>
-The assembler and loader track these pseudo-registers so
-the above statements are true regardless of what has been
-pushed on the hardware stack, pointed to by
-<TT>A7</TT>.
-The name
-<TT>A7</TT>
-refers to the hardware stack pointer, but beware of mixed use of
-<TT>A7</TT>
-and the above stack-related pseudo-registers, which will cause trouble.
-Note, too, that the
-<TT>PEA</TT>
-instruction is observed by the loader to
-alter SP and thus will insert a corresponding pop before all returns.
-The assembler accepts a label-like name to be attached to
-<TT>FP</TT>
-and
-<TT>SP</TT>
-uses, such as
-<TT>p+0(FP)</TT>,
-to help document that
-<TT>p</TT>
-is the first argument to a routine.
-The name goes in the symbol table but has no significance to the result
-of the program.
-</P>
-<H4>Referring to data
-</H4>
-<P>
-All external references must be made relative to some pseudo-register,
-either
-<TT>PC</TT>
-(the virtual program counter) or
-<TT>SB</TT>
-(the ``static base'' register).
-<TT>PC</TT>
-counts instructions, not bytes of data.
-For example, to branch to the second following instruction, that is,
-to skip one instruction, one may write
-<DL><DT><DD><TT><PRE>
-	BRA	2(PC)
-</PRE></TT></DL>
-Labels are also allowed, as in
-<DL><DT><DD><TT><PRE>
-	BRA	return
-	NOP
-return:
-	RTS
-</PRE></TT></DL>
-When using labels, there is no
-<TT>(PC)</TT>
-annotation.
-</P>
-<P>
-The pseudo-register
-<TT>SB</TT>
-refers to the beginning of the address space of the program.
-Thus, references to global data and procedures are written as
-offsets to
-<TT>SB</TT>,
-as in
-<DL><DT><DD><TT><PRE>
-	MOVL	<I>array(SB), TOS
-</PRE></TT></DL>
-to push the address of a global array on the stack, or
-<DL><DT><DD><TT><PRE>
-	MOVL	array+4(SB), TOS
-</PRE></TT></DL>
-to push the second (4-byte) element of the array.
-Note the use of an offset; the complete list of addressing modes is given below.
-Similarly, subroutine calls must use
-</I><TT>SB</TT><I>:
-<DL><DT><DD><TT><PRE>
-	BSR	exit(SB)
-</PRE></TT></DL>
-File-static variables have syntax
-<DL><DT><DD><TT><PRE>
-	local&lt;&gt;+4(SB)
-</PRE></TT></DL>
-The
-</I><TT>&lt;&gt;</TT><I>
-will be filled in at load time by a unique integer.
-</P>
-</I><P>
-When a program starts, it must execute
-<DL><DT><DD><TT><PRE>
-	MOVL	a6base(SB), A6
-</PRE></TT></DL>
-before accessing any global data.
-(On machines such as the MIPS and SPARC that cannot load a register
-in a single instruction, constants are loaded through the static base
-register.  The loader recognizes code that initializes the static
-base register and treats it specially.  You must be careful, however,
-not to load large constants on such machines when the static base
-register is not set up, such as early in interrupt routines.)
-</P>
-<H4>Expressions
-</H4>
-<P>
-Expressions are mostly what one might expect.
-Where an offset or a constant is expected,
-a primary expression with unary operators is allowed.
-A general C constant expression is allowed in parentheses.
-</P>
-<P>
-Source files are preprocessed exactly as in the C compiler, so
-<TT>#define</TT>
-and
-<TT>#include</TT>
-work.
-</P>
-<H4>Addressing modes
-</H4>
-<P>
-The simple addressing modes are shared by all the assemblers.
-Here, for completeness, follows a table of all the 68020 addressing modes,
-since that machine has the richest set.
-In the table,
-<TT>o</TT>
-is an offset, which if zero may be elided, and
-<TT>d</TT>
-is a displacement, which is a constant between -128 and 127 inclusive.
-Many of the modes listed have the same name;
-scrutiny of the format will show what default is being applied.
-For instance, indexed mode with no address register supplied operates
-as though a zero-valued register were used.
-For "offset" read "displacement."
-For "<TT>.s</TT>" read one of
-<TT>.L</TT>,
-or
-<TT>.W</TT>
-followed by
-<TT>*1</TT>,
-<TT>*2</TT>,
-<TT>*4</TT>,
-or
-<TT>*8</TT>
-to indicate the size and scaling of the data.
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-<br><img src="data.19116310.gif"><br>
-</dl>
-<H4>Laying down data
-</H4>
-<P>
-Placing data in the instruction stream, say for interrupt vectors, is easy:
-the pseudo-instructions
-<TT>LONG</TT>
-and
-<TT>WORD</TT>
-(but not
-<TT>BYTE</TT>)
-lay down the value of their single argument, of the appropriate size,
-as if it were an instruction:
-<DL><DT><DD><TT><PRE>
-	LONG	<I>12345
-</PRE></TT></DL>
-places the long 12345 (base 10)
-in the instruction stream.
-(On most machines,
-the only such operator is
-</I><TT>WORD</TT><I>
-and it lays down 32-bit quantities.
-The 386 has all three:
-</I><TT>LONG</TT><I>,
-</I><TT>WORD</TT><I>,
-and
-</I><TT>BYTE</TT><I>.
-The 960 has only one,
-</I><TT>LONG</TT><I>.)
-</P>
-</I><P>
-Placing information in the data section is more painful.
-The pseudo-instruction
-<TT>DATA</TT>
-does the work, given two arguments: an address at which to place the item,
-including its size,
-and the value to place there.  For example, to define a character array
-<TT>array</TT>
-containing the characters
-<TT>abc</TT>
-and a terminating null:
-<DL><DT><DD><TT><PRE>
-	DATA    array+0(SB)/1, 'a'
-	DATA    array+1(SB)/1, <I>'b'
-	DATA    array+2(SB)/1, </I>'c'
-	GLOBL   array(SB), <I>4
-</PRE></TT></DL>
-or
-<DL><DT><DD><TT><PRE>
-	DATA    array+0(SB)/4, </I>"abc\z"
-	GLOBL   array(SB), <I>4
-</PRE></TT></DL>
-The
-</I><TT>/1</TT><I>
-defines the number of bytes to define,
-</I><TT>GLOBL</TT><I>
-makes the symbol global, and the
-</I><TT></TT><I>4</I><TT>
-says how many bytes the symbol occupies.
-Uninitialized data is zeroed automatically.
-The character
-</TT><TT>\z</TT><TT>
-is equivalent to the C
-</TT><TT>\0.</TT><TT>
-The string in a
-</TT><TT>DATA</TT><TT>
-statement may contain a maximum of eight bytes;
-build larger strings piecewise.
-Two pseudo-instructions,
-</TT><TT>DYNT</TT><TT>
-and
-</TT><TT>INIT</TT><TT>,
-allow the (obsolete) Alef compilers to build dynamic type information during the load
-phase.
-The
-</TT><TT>DYNT</TT><TT>
-pseudo-instruction has two forms:
-<DL><DT><DD><TT><PRE>
-	DYNT	, ALEF_SI_5+0(SB)
-	DYNT	ALEF_AS+0(SB), ALEF_SI_5+0(SB)
-</PRE></TT></DL>
-In the first form,
-</TT><TT>DYNT</TT><TT>
-defines the symbol to be a small unique integer constant, chosen by the loader,
-which is some multiple of the word size.  In the second form,
-</TT><TT>DYNT</TT><TT>
-defines the second symbol in the same way,
-places the address of the most recently
-defined text symbol in the array specified by the first symbol at the
-index defined by the value of the second symbol,
-and then adjusts the size of the array accordingly.
-</P>
-</TT><P>
-The
-<TT>INIT</TT>
-pseudo-instruction takes the same parameters as a
-<TT>DATA</TT>
-statement.  Its symbol is used as the base of an array and the
-data item is installed in the array at the offset specified by the most recent
-<TT>DYNT</TT>
-pseudo-instruction.
-The size of the array is adjusted accordingly.
-The
-<TT>DYNT</TT>
-and
-<TT>INIT</TT>
-pseudo-instructions are not implemented on the 68020.
-</P>
-<H4>Defining a procedure
-</H4>
-<P>
-Entry points are defined by the pseudo-operation
-<TT>TEXT</TT>,
-which takes as arguments the name of the procedure (including the ubiquitous
-<TT>(SB)</TT>)
-and the number of bytes of automatic storage to pre-allocate on the stack,
-which will usually be zero when writing assembly language programs.
-On machines with a link register, such as the MIPS and SPARC,
-the special value -4 instructs the loader to generate no PC save
-and restore instructions, even if the function is not a leaf.
-Here is a complete procedure that returns the sum
-of its two arguments:
-<DL><DT><DD><TT><PRE>
-TEXT	sum(SB), <I>0
-	MOVL	arg1+0(FP), R0
-	ADDL	arg2+4(FP), R0
-	RTS
-</PRE></TT></DL>
-An optional middle argument
-to the
-</I><TT>TEXT</TT><I>
-pseudo-op is a bit field of options to the loader.
-Setting the 1 bit suspends profiling the function when profiling is enabled for the rest of
-the program.
-For example,
-<DL><DT><DD><TT><PRE>
-TEXT	sum(SB), 1, </I>0
-	MOVL	arg1+0(FP), R0
-	ADDL	arg2+4(FP), R0
-	RTS
-</PRE></TT></DL>
-will not be profiled; the first version above would be.
-Subroutines with peculiar state, such as system call routines,
-should not be profiled.
-</P>
-<P>
-Setting the 2 bit allows multiple definitions of the same
-<TT>TEXT</TT>
-symbol in a program; the loader will place only one such function in the image.
-It was emitted only by the Alef compilers.
-</P>
-<P>
-Subroutines to be called from C should place their result in
-<TT>R0</TT>,
-even if it is an address.
-Floating point values are returned in
-<TT>F0</TT>.
-Functions that return a structure to a C program
-receive as their first argument the address of the location to
-store the result;
-<TT>R0</TT>
-is unused in the calling protocol for such procedures.
-A subroutine is responsible for saving its own registers,
-and therefore is free to use any registers without saving them (``caller saves'').
-<TT>A6</TT>
-and
-<TT>A7</TT>
-are the exceptions as described above.
-</P>
-<H4>When in doubt
-</H4>
-<P>
-If you get confused, try using the
-<TT>-S</TT>
-option to
-<TT>2c</TT>
-and compiling a sample program.
-The standard output is valid input to the assembler.
-</P>
-<H4>Instructions
-</H4>
-<P>
-The instruction set of the assembler is not identical to that
-of the machine.
-It is chosen to match what the compiler generates, augmented
-slightly by specific needs of the operating system.
-For example,
-<TT>2a</TT>
-does not distinguish between the various forms of
-<TT>MOVE</TT>
-instruction: move quick, move address, etc.  Instead the context
-does the job.  For example,
-<DL><DT><DD><TT><PRE>
-	MOVL	<I>1, R1
-	MOVL	A0, R2
-	MOVW	SR, R3
-</PRE></TT></DL>
-generates official
-</I><TT>MOVEQ</TT><I>,
-</I><TT>MOVEA</TT><I>,
-and
-</I><TT>MOVESR</TT><I>
-instructions.
-A number of instructions do not have the syntax necessary to specify
-their entire capabilities.  Notable examples are the bitfield
-instructions, the
-multiply and divide instructions, etc.
-For a complete set of generated instruction names (in
-</I><TT>2a</TT><I>
-notation, not Motorola's) see the file
-</I><TT>/sys/src/cmd/2c/2.out.h</TT><I>.
-Despite its name, this file contains an enumeration of the
-instructions that appear in the intermediate files generated
-by the compiler, which correspond exactly to lines of assembly language.
-</P>
-</I><P>
-The MC68000 assembler,
-<TT>1a</TT>,
-is essentially the same, honoring the appropriate subset of the instructions
-and addressing modes.
-The definitions of these are, nonetheless, part of
-<TT>2.out.h</TT>.
-</P>
-<H4>Laying down instructions
-</H4>
-<P>
-The loader modifies the code produced by the assembler and compiler.
-It folds branches,
-copies short sequences of code to eliminate branches,
-and discards unreachable code.
-The first instruction of every function is assumed to be reachable.
-The pseudo-instruction
-<TT>NOP</TT>,
-which you may see in compiler output,
-means no instruction at all, rather than an instruction that does nothing.
-The loader discards all
-<TT>NOP</TT>'s.
-</P>
-<P>
-To generate a true
-<TT>NOP</TT>
-instruction, or any other instruction not known to the assembler, use a
-<TT>WORD</TT>
-pseudo-instruction.
-Such instructions on RISCs are not scheduled by the loader and must have
-their delay slots filled manually.
-</P>
-<H4>MIPS
-</H4>
-<P>
-The registers are only addressed by number:
-<TT>R0</TT>
-through
-<TT>R31</TT>.
-<TT>R29</TT>
-is the stack pointer;
-<TT>R30</TT>
-is used as the static base pointer, the analogue of
-<TT>A6</TT>
-on the 68020.
-Its value is the address of the global symbol
-<TT>setR30(SB)</TT>.
-The register holding returned values from subroutines is
-<TT>R1</TT>.
-When a function is called, space for the first argument
-is reserved at
-<TT>0(FP)</TT>
-but in C (not Alef) the value is passed in
-<TT>R1</TT>
-instead.
-</P>
-<P>
-The loader uses
-<TT>R28</TT>
-as a temporary.  The system uses
-<TT>R26</TT>
-and
-<TT>R27</TT>
-as interrupt-time temporaries.  Therefore none of these registers
-should be used in user code.
-</P>
-<P>
-The control registers are not known to the assembler.
-Instead they are numbered registers
-<TT>M0</TT>,
-<TT>M1</TT>,
-etc.
-Use this trick to access, say,
-<TT>STATUS</TT>:
-<DL><DT><DD><TT><PRE>
-#define	STATUS	12
-	MOVW	M(STATUS), R1
-</PRE></TT></DL>
-</P>
-<P>
-Floating point registers are called
-<TT>F0</TT>
-through
-<TT>F31</TT>.
-By convention,
-<TT>F24</TT>
-must be initialized to the value 0.0,
-<TT>F26</TT>
-to 0.5,
-<TT>F28</TT>
-to 1.0, and
-<TT>F30</TT>
-to 2.0;
-this is done by the operating system.
-</P>
-<P>
-The instructions and their syntax are different from those of the manufacturer's
-manual.
-There are no
-<TT>lui</TT>
-and kin; instead there are
-<TT>MOVW</TT>
-(move word),
-<TT>MOVH</TT>
-(move halfword),
-and
-<TT>MOVB</TT>
-(move byte) pseudo-instructions.  If the operand is unsigned, the instructions
-are
-<TT>MOVHU</TT>
-and
-<TT>MOVBU</TT>.
-The order of operands is from left to right in dataflow order, just as
-on the 68020 but not as in MIPS documentation.
-This means that the
-<TT>Bcond</TT>
-instructions are reversed with respect to the book; for example, a
-<TT>va</TT>
-<TT>BGTZ</TT>
-generates a MIPS
-<TT>bltz</TT>
-instruction.
-</P>
-<P>
-The assembler is for the R2000, R3000, and most of the R4000 and R6000 architectures.
-It understands the 64-bit instructions
-<TT>MOVV</TT>,
-<TT>MOVVL</TT>,
-<TT>ADDV</TT>,
-<TT>ADDVU</TT>,
-<TT>SUBV</TT>,
-<TT>SUBVU</TT>,
-<TT>MULV</TT>,
-<TT>MULVU</TT>,
-<TT>DIVV</TT>,
-<TT>DIVVU</TT>,
-<TT>SLLV</TT>,
-<TT>SRLV</TT>,
-and
-<TT>SRAV</TT>.
-The assembler does not have any cache, load-linked, or store-conditional instructions.
-</P>
-<P>
-Some assembler instructions are expanded into multiple instructions by the loader.
-For example the loader may convert the load of a 32 bit constant into an
-<TT>lui</TT>
-followed by an
-<TT>ori</TT>.
-</P>
-<P>
-Assembler instructions should be laid out as if there
-were no load, branch, or floating point compare delay slots;
-the loader will rearrange&#173;<I>schedule</I>&#173;the instructions
-to guarantee correctness and improve performance.
-The only exception is that the correct scheduling of instructions
-that use control registers varies from model to model of machine
-(and is often undocumented) so you should schedule such instructions
-by hand to guarantee correct behavior.
-The loader generates
-<DL><DT><DD><TT><PRE>
-	NOR	R0, R0, R0
-</PRE></TT></DL>
-when it needs a true no-op instruction.
-Use exactly this instruction when scheduling code manually;
-the loader recognizes it and schedules the code before it and after it independently.  Also,
-<TT>WORD</TT>
-pseudo-ops are scheduled like no-ops.
-</P>
-<P>
-The
-<TT>NOSCHED</TT>
-pseudo-op disables instruction scheduling
-(scheduling is enabled by default);
-<TT>SCHED</TT>
-re-enables it.
-Branch folding, code copying, and dead code elimination are
-disabled for instructions that are not scheduled.
-</P>
-<H4>SPARC
-</H4>
-<P>
-Once you understand the Plan 9 model for the MIPS, the SPARC is familiar.
-Registers have numerical names only:
-<TT>R0</TT>
-through
-<TT>R31</TT>.
-Forget about register windows: Plan 9 doesn't use them at all.
-The machine has 32 global registers, period.
-<TT>R1</TT>
-[sic] is the stack pointer.
-<TT>R2</TT>
-is the static base register, with value the address of
-<TT>setSB(SB)</TT>.
-<TT>R7</TT>
-is the return register and also the register holding the first
-argument to a C (not Alef) function, again with space reserved at
-<TT>0(FP)</TT>.
-<TT>R14</TT>
-is the loader temporary.
-</P>
-<P>
-Floating-point registers are exactly as on the MIPS.
-</P>
-<P>
-The control registers are known by names such as
-<TT>FSR</TT>.
-The instructions to access these registers are
-<TT>MOVW</TT>
-instructions, for example
-<DL><DT><DD><TT><PRE>
-	MOVW	Y, R8
-</PRE></TT></DL>
-for the SPARC instruction
-<DL><DT><DD><TT><PRE>
-	rdy	%r8
-</PRE></TT></DL>
-</P>
-<P>
-Move instructions are similar to those on the MIPS: pseudo-operations
-that turn into appropriate sequences of
-<TT>sethi</TT>
-instructions, adds, etc.
-Instructions read from left to right.  Because the arguments are
-flipped to
-<TT>SUBCC</TT>,
-the condition codes are not inverted as on the MIPS.
-</P>
-<P>
-The syntax for the ASI stuff is, for example to move a word from ASI 2:
-<DL><DT><DD><TT><PRE>
-	MOVW	(R7, 2), R8
-</PRE></TT></DL>
-The syntax for double indexing is
-<DL><DT><DD><TT><PRE>
-	MOVW	(R7+R8), R9
-</PRE></TT></DL>
-</P>
-<P>
-The SPARC's instruction scheduling is similar to the MIPS's.
-The official no-op instruction is:
-<DL><DT><DD><TT><PRE>
-	ORN	R0, R0, R0
-</PRE></TT></DL>
-</P>
-<H4>i960
-</H4>
-<P>
-Registers are numbered
-<TT>R0</TT>
-through
-<TT>R31</TT>.
-Stack pointer is
-<TT>R29</TT>;
-return register is
-<TT>R4</TT>;
-static base is
-<TT>R28</TT>;
-it is initialized to the address of
-<TT>setSB(SB)</TT>.
-<TT>R3</TT>
-must be zero; this should be done manually early in execution by
-<DL><DT><DD><TT><PRE>
-	SUBO	R3, R3
-</PRE></TT></DL>
-<TT>R27</TT>
-is the loader temporary.
-</P>
-<P>
-There is no support for floating point.
-</P>
-<P>
-The Intel calling convention is not supported and cannot be used; use
-<TT>BAL</TT>
-instead.
-Instructions are mostly as in the book.  The major change is that
-<TT>LOAD</TT>
-and
-<TT>STORE</TT>
-are both called
-<TT>MOV</TT>.
-The extension character for
-<TT>MOV</TT>
-is as in the manual:
-<TT>O</TT>
-for ordinal,
-<TT>W</TT>
-for signed, etc.
-</P>
-<H4>i386
-</H4>
-<P>
-The assembler assumes 32-bit protected mode.
-The register names are
-<TT>SP</TT>,
-<TT>AX</TT>,
-<TT>BX</TT>,
-<TT>CX</TT>,
-<TT>DX</TT>,
-<TT>BP</TT>,
-<TT>DI</TT>,
-and
-<TT>SI</TT>.
-The stack pointer (not a pseudo-register) is
-<TT>SP</TT>
-and the return register is
-<TT>AX</TT>.
-There is no physical frame pointer but, as for the MIPS,
-<TT>FP</TT>
-is a pseudo-register that acts as
-a frame pointer.
-</P>
-<P>
-Opcode names are mostly the same as those listed in the Intel manual
-with an
-<TT>L</TT>,
-<TT>W</TT>,
-or
-<TT>B</TT>
-appended to identify 32-bit, 
-16-bit, and 8-bit operations.
-The exceptions are loads, stores, and conditionals.
-All load and store opcodes to and from general registers, special registers
-(such as
-<TT>CR0,</TT>
-<TT>CR3,</TT>
-<TT>GDTR,</TT>
-<TT>IDTR,</TT>
-<TT>SS,</TT>
-<TT>CS,</TT>
-<TT>DS,</TT>
-<TT>ES,</TT>
-<TT>FS,</TT>
-and
-<TT>GS</TT>)
-or memory are written
-as
-<DL><DT><DD><TT><PRE>
-	MOV<I>x</I>	src,dst
-</PRE></TT></DL>
-where
-<I>x</I>
-is
-<TT>L</TT>,
-<TT>W</TT>,
-or
-<TT>B</TT>.
-Thus to get
-<TT>AL</TT>
-use a
-<TT>MOVB</TT>
-instruction.  If you need to access
-<TT>AH</TT>,
-you must mention it explicitly in a
-<TT>MOVB</TT>:
-<DL><DT><DD><TT><PRE>
-	MOVB	AH, BX
-</PRE></TT></DL>
-There are many examples of illegal moves, for example,
-<DL><DT><DD><TT><PRE>
-	MOVB	BP, DI
-</PRE></TT></DL>
-that the loader actually implements as pseudo-operations.
-</P>
-<P>
-The names of conditions in all conditional instructions
-(<TT>J</TT>,
-<TT>SET</TT>)
-follow the conventions of the 68020 instead of those of the Intel
-assembler:
-<TT>JOS</TT>,
-<TT>JOC</TT>,
-<TT>JCS</TT>,
-<TT>JCC</TT>,
-<TT>JEQ</TT>,
-<TT>JNE</TT>,
-<TT>JLS</TT>,
-<TT>JHI</TT>,
-<TT>JMI</TT>,
-<TT>JPL</TT>,
-<TT>JPS</TT>,
-<TT>JPC</TT>,
-<TT>JLT</TT>,
-<TT>JGE</TT>,
-<TT>JLE</TT>,
-and
-<TT>JGT</TT>
-instead of
-<TT>JO</TT>,
-<TT>JNO</TT>,
-<TT>JB</TT>,
-<TT>JNB</TT>,
-<TT>JZ</TT>,
-<TT>JNZ</TT>,
-<TT>JBE</TT>,
-<TT>JNBE</TT>,
-<TT>JS</TT>,
-<TT>JNS</TT>,
-<TT>JP</TT>,
-<TT>JNP</TT>,
-<TT>JL</TT>,
-<TT>JNL</TT>,
-<TT>JLE</TT>,
-and
-<TT>JNLE</TT>.
-</P>
-<P>
-The addressing modes have syntax like
-<TT>AX</TT>,
-<TT>(AX)</TT>,
-<TT>(AX)(BX*4)</TT>,
-<TT>10(AX)</TT>,
-and
-<TT>10(AX)(BX*4)</TT>.
-The offsets from
-<TT>AX</TT>
-can be replaced by offsets from
-<TT>FP</TT>
-or
-<TT>SB</TT>
-to access names, for example
-<TT>extern+5(SB)(AX*2)</TT>.
-</P>
-<P>
-Other notes: Non-relative
-<TT>JMP</TT>
-and
-<TT>CALL</TT>
-have a
-<TT>*</TT>
-added to the syntax.
-Only
-<TT>LOOP</TT>,
-<TT>LOOPEQ</TT>,
-and
-<TT>LOOPNE</TT>
-are legal loop instructions.  Only
-<TT>REP</TT>
-and
-<TT>REPN</TT>
-are recognized repeaters.  These are not prefixes, but rather
-stand-alone opcodes that precede the strings, for example
-<DL><DT><DD><TT><PRE>
-	CLD; REP; MOVSL
-</PRE></TT></DL>
-Segment override prefixes in
-<TT>MOD/RM</TT>
-fields are not supported.
-</P>
-<H4>Alpha
-</H4>
-<P>
-On the Alpha, all registers are 64 bits.  The architecture handles 32-bit values
-by giving them a canonical format (sign extension in the case of integer registers).
-Registers are numbered
-<TT>R0</TT>
-through
-<TT>R31</TT>.
-<TT>R0</TT>
-holds the return value from subroutines, and also the first parameter.
-<TT>R30</TT>
-is the stack pointer,
-<TT>R29</TT>
-is the static base,
-<TT>R26</TT>
-is the link register, and
-<TT>R27</TT>
-and
-<TT>R28</TT>
-are linker temporaries.
-</P>
-<P>
-Floating point registers are numbered
-<TT>F0</TT>
-to
-<TT>F31</TT>.
-<TT>F28</TT>
-contains
-<TT>0.5</TT>,
-<TT>F29</TT>
-contains
-<TT>1.0</TT>,
-and
-<TT>F30</TT>
-contains
-<TT>2.0</TT>.
-<TT>F31</TT>
-is always
-<TT>0.0</TT>
-on the Alpha.
-</P>
-<P>
-The extension character for
-<TT>MOV</TT>
-follows DEC's notation:
-<TT>B</TT>
-for byte (8 bits),
-<TT>W</TT>
-for word (16 bits),
-<TT>L</TT>
-for long (32 bits),
-and
-<TT>Q</TT>
-for quadword (64 bits).
-Byte and ``word'' loads and stores may be made unsigned
-by appending a
-<TT>U</TT>.
-<TT>S</TT>
-and
-<TT>T</TT>
-refer to IEEE floating point single precision (32 bits) and double precision (64 bits), respectively.
-</P>
-<H4>Power PC
-</H4>
-<P>
-The Power PC follows the Plan 9 model set by the MIPS and SPARC,
-not the elaborate ABIs.
-The 32-bit instructions of the 60x and 8xx PowerPC architectures are supported;
-there is no support for the older POWER instructions.
-Registers are
-<TT>R0</TT>
-through
-<TT>R31</TT>.
-<TT>R0</TT>
-is initialized to zero; this is done by C start up code
-and assumed by the compiler and loader.
-<TT>R1</TT>
-is the stack pointer.
-<TT>R2</TT>
-is the static base register, with value the address of
-<TT>setSB(SB)</TT>.
-<TT>R3</TT>
-is the return register and also the register holding the first
-argument to a C function, with space reserved at
-<TT>0(FP)</TT>
-as on the MIPS.
-<TT>R31</TT>
-is the loader temporary.
-The external registers in Plan 9's C are allocated from
-<TT>R30</TT>
-down.
-</P>
-<P>
-Floating point registers are called
-<TT>F0</TT>
-through
-<TT>F31</TT>.
-By convention, several registers are initialized
-to specific values; this is done by the operating system.
-<TT>F27</TT>
-must be initialized to the value
-<TT>0x4330000080000000</TT>
-(used by float-to-int conversion),
-<TT>F28</TT>
-to the value 0.0,
-<TT>F29</TT>
-to 0.5,
-<TT>F30</TT>
-to 1.0, and
-<TT>F31</TT>
-to 2.0.
-</P>
-<P>
-As on the MIPS and SPARC, the assembler accepts arbitrary literals
-as operands to
-<TT>MOVW</TT>,
-and also to
-<TT>ADD</TT>
-and others where `immediate' variants exist,
-and the loader generates sequences
-of
-<TT>addi</TT>,
-<TT>addis</TT>,
-<TT>oris</TT>,
-etc. as required.
-The register indirect addressing modes use the same syntax as the SPARC,
-including double indexing when allowed.
-</P>
-<P>
-The instruction names are generally derived from the Motorola ones,
-subject to slight transformation:
-the
-`<TT>.</TT>'
-marking the setting of condition codes is replaced by
-<TT>CC</TT>,
-and when the letter
-`<TT>o</TT>'
-represents `OE=1' it is replaced by
-<TT>V</TT>.
-Thus
-<TT>add</TT>,
-<TT>addo.</TT>
-and
-<TT>subfzeo.</TT>
-become
-<TT>ADD</TT>,
-<TT>ADDVCC</TT>
-and
-<TT>SUBFZEVCC</TT>.
-As well as the three-operand conditional branch instruction
-<TT>BC</TT>,
-the assembler provides pseudo-instructions for the common cases:
-<TT>BEQ</TT>,
-<TT>BNE</TT>,
-<TT>BGT</TT>,
-<TT>BGE</TT>,
-<TT>BLT</TT>,
-<TT>BLE</TT>,
-<TT>BVC</TT>,
-and
-<TT>BVS</TT>.
-The unconditional branch instruction is
-<TT>BR</TT>.
-Indirect branches use
-<TT>(CTR)</TT>
-or
-<TT>(LR)</TT>
-as target.
-</P>
-<P>
-Load or store operations are replaced by
-<TT>MOV</TT>
-variants in the usual way:
-<TT>MOVW</TT>
-(move word),
-<TT>MOVH</TT>
-(move halfword with sign extension), and
-<TT>MOVB</TT>
-(move byte with sign extension, a pseudo-instruction),
-with unsigned variants
-<TT>MOVHZ</TT>
-and
-<TT>MOVBZ</TT>,
-and byte-reversing
-<TT>MOVWBR</TT>
-and
-<TT>MOVHBR</TT>.
-`Load or store with update' versions are
-<TT>MOVWU</TT>,
-<TT>MOVHU</TT>,
-and
-<TT>MOVBZU</TT>.
-Load or store multiple is
-<TT>MOVMW</TT>.
-The exceptions are the string instructions, which are
-<TT>LSW</TT>
-and
-<TT>STSW</TT>,
-and the reservation instructions
-<TT>lwarx</TT>
-and
-<TT>stwcx.</TT>,
-which are
-<TT>LWAR</TT>
-and
-<TT>STWCCC</TT>,
-all with operands in the usual data-flow order.
-Floating-point load or store instructions are
-<TT>FMOVD</TT>,
-<TT>FMOVDU</TT>,
-<TT>FMOVS</TT>,
-and
-<TT>FMOVSU</TT>.
-The register to register move instructions
-<TT>fmr</TT>
-and
-<TT>fmr.</TT>
-are written
-<TT>FMOVD</TT>
-and
-<TT>FMOVDCC</TT>.
-</P>
-<P>
-The assembler knows the commonly used special purpose registers:
-<TT>CR</TT>,
-<TT>CTR</TT>,
-<TT>DEC</TT>,
-<TT>LR</TT>,
-<TT>MSR</TT>,
-and
-<TT>XER</TT>.
-The rest, which are often architecture-dependent, are referenced as
-<TT>SPR(n)</TT>.
-The segment registers of the 60x series are similarly
-<TT>SEG(n)</TT>,
-but
-<I>n</I>
-can also be a register name, as in
-<TT>SEG(R3)</TT>.
-Moves between special purpose registers and general purpose ones,
-when allowed by the architecture,
-are written as
-<TT>MOVW</TT>,
-replacing
-<TT>mfcr</TT>,
-<TT>mtcr</TT>,
-<TT>mfmsr</TT>,
-<TT>mtmsr</TT>,
-<TT>mtspr</TT>,
-<TT>mfspr</TT>,
-<TT>mftb</TT>,
-and many others.
-</P>
-<P>
-The fields of the condition register
-<TT>CR</TT>
-are referenced as
-<TT>CR(0)</TT>
-through
-<TT>CR(7)</TT>.
-They are used by the
-<TT>MOVFL</TT>
-(move field) pseudo-instruction,
-which produces
-<TT>mcrf</TT>
-or
-<TT>mtcrf</TT>.
-For example:
-<DL><DT><DD><TT><PRE>
-	MOVFL	CR(3), CR(0)
-	MOVFL	R3, CR(1)
-	MOVFL	R3, 7, CR
-</PRE></TT></DL>
-They are also accepted in
-the conditional branch instruction, for example
-<DL><DT><DD><TT><PRE>
-	BEQ	CR(7), label
-</PRE></TT></DL>
-Fields of the
-<TT>FPSCR</TT>
-are accessed using
-<TT>MOVFL</TT>
-in a similar way:
-<DL><DT><DD><TT><PRE>
-	MOVFL	FPSCR, F0
-	MOVFL	F0, FPSCR
-	MOVFL	F0, <I>7, FPSCR
-	MOVFL	</I>0, FPSCR(3)
-</PRE></TT></DL>
-producing
-<TT>mffs</TT>,
-<TT>mtfsf</TT>
-or
-<TT>mtfsfi</TT>,
-as appropriate.
-</P>
-<H4>ARM
-</H4>
-<P>
-The assembler provides access to
-<TT>R0</TT>
-through
-<TT>R14</TT>
-and the
-<TT>PC</TT>.
-The stack pointer is
-<TT>R13</TT>,
-the link register is
-<TT>R14</TT>,
-and the static base register is
-<TT>R12</TT>.
-<TT>R0</TT>
-is the return register and also the register holding
-the first argument to a subroutine.
-The assembler supports the
-<TT>CPSR</TT>
-and
-<TT>SPSR</TT>
-registers.
-It also knows about coprocessor registers
-<TT>C0</TT>
-through
-<TT>C15</TT>.
-Floating registers are
-<TT>F0</TT>
-through
-<TT>F7</TT>,
-<TT>FPSR</TT>
-and
-<TT>FPCR</TT>.
-</P>
-<P>
-As with the other architectures, loads and stores are called
-<TT>MOV</TT>,
-e.g.
-<TT>MOVW</TT>
-for load word or store word, and
-<TT>MOVM</TT>
-for
-load or store multiple,
-depending on the operands.
-</P>
-<P>
-Addressing modes are supported by suffixes to the instructions:
-<TT>.IA</TT>
-(increment after),
-<TT>.IB</TT>
-(increment before),
-<TT>.DA</TT>
-(decrement after), and
-<TT>.DB</TT>
-(decrement before).
-These can only be used with the
-<TT>MOV</TT>
-instructions.
-The move multiple instruction,
-<TT>MOVM</TT>,
-defines a range of registers using brackets, e.g.
-<TT>[R0-R12]</TT>.
-The special
-<TT>MOVM</TT>
-addressing mode bits
-<TT>W</TT>,
-<TT>U</TT>,
-and
-<TT>P</TT>
-are written in the same manner, for example,
-<TT>MOVM.DB.W</TT>.
-A
-<TT>.S</TT>
-suffix allows a
-<TT>MOVM</TT>
-instruction to access user
-<TT>R13</TT>
-and
-<TT>R14</TT>
-when in another processor mode.
-Shifts and rotates in addressing modes are supported by binary operators
-<TT>&lt;&lt;</TT>
-(logical left shift),
-<TT>&gt;&gt;</TT>
-(logical right shift),
-<TT>-&gt;</TT>
-(arithmetic right shift), and
-<TT>@&gt;</TT>
-(rotate right); for example
-<TT>R7&gt;&gt;R2</TT>or
-<TT>R2@&gt;2</TT>.
-The assembler does not support indexing by a shifted expression;
-only names can be doubly indexed.
-</P>
-<P>
-Any instruction can be followed by a suffix that makes the instruction conditional:
-<TT>.EQ</TT>,
-<TT>.NE</TT>,
-and so on, as in the ARM manual, with synonyms
-<TT>.HS</TT>
-(for
-<TT>.CS</TT>)
-and
-<TT>.LO</TT>
-(for
-for<TT>.CC</TT>),
-<TT>ADD.NE</TT>.
-Arithmetic
-and logical instructions
-can have a
-<TT>.S</TT>
-suffix, as ARM allows, to set condition codes.
-</P>
-<P>
-The syntax of the
-<TT>MCR</TT>
-and
-<TT>MRC</TT>
-coprocessor instructions is largely as in the manual, with the usual adjustments.
-The assembler directly supports only the ARM floating-point coprocessor
-operations used by the compiler:
-<TT>CMP</TT>,
-<TT>ADD</TT>,
-<TT>SUB</TT>,
-<TT>MUL</TT>,
-and
-<TT>DIV</TT>,
-all with
-<TT>F</TT>
-or
-<TT>D</TT>
-suffix selecting single or double precision.
-Floating-point load or store become
-<TT>MOVF</TT>
-and
-<TT>MOVD</TT>.
-Conversion instructions are also specified by moves:
-<TT>MOVWD</TT>,
-<TT>MOVWF</TT>,
-<TT>MOVDW</TT>,
-<TT>MOVWD</TT>,
-<TT>MOVFD</TT>,
-and
-<TT>MOVDF</TT>.
-</P>
-<H4>AMD 29000
-</H4>
-<P>
-For details about this assembly language, which was built for the AMD 29240,
-look at the sources or examine compiler output.
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 2096
sys/doc/auth.html

@@ -1,2096 +0,0 @@
-<html>
-<br><img src="-.19111510.gif"><br>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Security in Plan 9
-</H1>
-<DL><DD><I>Russ Cox, MIT LCS<br>
-<br>
-Eric Grosse, Bell Labs<br>
-<br>
-Rob Pike, Bell Labs<br>
-<br>
-Dave Presotto, Avaya Labs and Bell Labs<br>
-<br>
-Sean Quinlan, Bell Labs<br>
-<br>
-<TT>{rsc,ehg,rob,presotto,seanq}@plan9.bell-labs.com</TT>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-The security architecture of the Plan 9(tm)
-operating system has recently been redesigned
-to address some technical shortcomings.
-This redesign provided an opportunity also to make the system more
-convenient to use securely.
-Plan 9 has thus improved in two ways not usually seen together:
-it has become more secure
-<I>and</I>
-easier to use.
-<br>&#32;<br>
-The central component of the new architecture is a per-user
-self-contained agent called
-<TT>factotum</TT>.
-<TT>Factotum</TT>
-securely holds a
-copy of the user's keys and negotiates authentication protocols, on
-behalf of the user, with secure services around the network.
-Concentrating security code in a single program offers several
-advantages including: ease of update or repair to broken security
-software and protocols; the ability to run secure services at a lower
-privilege level; uniform management of keys for all services; and an
-opportunity to provide single sign on, even to unchanged legacy
-applications.
-<TT>Factotum</TT>
-has an unusual architecture: it is implemented
-as a Plan 9 file server.
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> To appear, in a slightly different form, in
-Proc. of the 2002 Usenix Security Symposium,
-San Francisco.
-</I><DT>&#32;<DD></dl>
-<br>
-</DL>
-<H4>1 Introduction
-</H4>
-<br>&#32;<br>
-Secure computing systems face two challenges:
-first, they must employ sophisticated technology that is difficult to design
-and prove correct; and second,
-they must be easy for regular people to use.
-The question of ease of use is sometimes neglected, but it is essential:
-weak but easy-to-use security can be more effective than strong but
-difficult-to-use security if it is more likely to be used.
-People lock their front doors when they leave the house, knowing
-full well that a burglar is capable of picking the lock (or avoiding
-the door altogether); yet few would accept the cost and
-awkwardness of a bank vault door on the
-house even though that might reduce the probability of a robbery.
-A related point is that users need a clear model of how the security
-operates (if not how it actually provides security) in order to use it
-well; for example, the clarity of a lock icon on a web browser
-is offset by the confusing and typically insecure
-steps for installing X.509 certificates.
-<br>&#32;<br>
-The security architecture of the Plan 9
-operating system
-[Pike95]
-has recently been redesigned to make it both more secure
-and easier to use.
-By
-<I>security</I>
-we mean three things:
-first, the business of authenticating users and services;
-second, the safe handling, deployment, and use of keys
-and other secret information; and
-third, the use of encryption and integrity checks
-to safeguard communications
-from prying eyes.
-<br>&#32;<br>
-The old security architecture of Plan 9
-had several engineering problems in common with other operating systems.
-First, it had an inadequate notion of security domain.
-Once a user provided a password to connect to a local file store,
-the system required that the same password be used to access all the other file
-stores.
-That is, the system treated all network services as
-belonging to the same security domain. 
-<br>&#32;<br>
-Second, the algorithms and protocols used in authentication,
-by nature tricky and difficult to get right, were compiled into the
-various applications, kernel modules, and file servers.
-Changes and fixes to a security protocol
-required that all components using that protocol needed to be recompiled,
-or at least relinked, and restarted.
-<br>&#32;<br>
-Third, the file transport protocol, 9P
-[Pike93],
-that forms the core of
-the Plan 9 system, had its authentication protocol embedded in its design.
-This meant that fixing or changing the authentication used by 9P
-required deep changes to the system.
-If someone were to find a way to break the protocol, the system would
-be wide open and very hard to fix.
-<br>&#32;<br>
-These and a number of lesser problems, combined with a desire
-for more widespread use of encryption in the system, spurred us to
-rethink the entire security architecture of Plan 9.
-<br>&#32;<br>
-The centerpiece of the new architecture is an agent,
-called
-<TT>factotum</TT>,
-that handles the user's keys and negotiates all security
-interactions with system services and applications.
-Like a trusted assistant with a copy of the owner's keys,
-<TT>factotum</TT>
-does all the negotiation for security and authentication.
-Programs no longer need to be compiled with cryptographic
-code; instead they communicate with
-<TT>factotum</TT>
-agents
-that represent distinct entities in the cryptographic exchange,
-such as a user and server of a secure service.
-If a security protocol needs to be added, deleted, or modified,
-only
-<TT>factotum</TT>
-needs to be updated for all system services
-to be kept secure.
-<br>&#32;<br>
-Building on
-<TT>factotum</TT>,
-we modified
-secure services in the system to move
-user authentication code into
-<TT>factotum</TT>;
-made authentication a separable component of the file server protocol;
-deployed new security protocols;
-designed a secure file store,
-called
-<TT>secstore</TT>,
-to protect our keys but make them easy to get when they are needed;
-designed a new kernel module to support transparent use of 
-Transport Layer Security (TLS)
-[RFC2246];
-and began using encryption for all communications within the system.
-The overall architecture is illustrated in Figure 1a.
-<br><img src="-.19111511.gif"><br>
-<DL><DT><DD><TT><PRE>
-<br><img src="-.19111512.gif"><br>
-</PRE></TT></DL>
-<br>&#32;<br>
-Figure 1a.  Components of the security architecture.
-Each box is a (typically) separate machine; each ellipse a process.
-n(11The ellipses labeled &lt;I&gt;F&lt;/I&gt;&lt;I&gt;X&lt;/I&gt;n(99
-are
-<TT>factotum</TT>
-processes; those labeled
-n(11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;X&lt;/I&gt;n(99
-are the pieces and proxies of a distributed program.
-The authentication server is one of several repositories for users' security information
-that
-<TT>factotum</TT>
-processes consult as required.
-<TT>Secstore</TT>
-is a shared resource for storing private information such as keys;
-<TT>factotum</TT>
-consults it for the user during bootstrap.
-<br>&#32;<br>
-<br><img src="-.19111513.gif"><br>
-<br>&#32;<br>
-Secure protocols and algorithms are well understood
-and are usually not the weakest link in a system's security.
-In practice, most security problems arise from buggy servers,
-confusing software, or administrative oversights.
-It is these practical problems that we are addressing.
-Although this paper describes the algorithms and protocols we are using,
-they are included mainly for concreteness.
-Our main intent is to present a simple security architecture built
-upon a small trusted code base that is easy to verify (whether by manual or
-automatic means), easy to understand, and easy to use.
-<br>&#32;<br>
-Although it is a subjective assessment,
-we believe we have achieved our goal of ease of use.
-That we have achieved
-our goal of improved security is supported by our plan to
-move our currently private computing environment onto the Internet
-outside the corporate firewall.
-The rest of this paper explains the architecture and how it is used,
-to explain why a system that is easy to use securely is also safe
-enough to run in the open network.
-<H4>2 An Agent for Security
-</H4>
-<br>&#32;<br>
-One of the primary reasons for the redesign of the Plan 9
-security infrastructure was to remove the authentication
-method both from the applications and from the kernel.
-Cryptographic code
-is large and intricate, so it should
-be packaged as a separate component that can be repaired or
-modified without altering or even relinking applications
-and services that depend on it.
-If a security protocol is broken, it should be trivial to repair,
-disable, or replace it on the fly.
-Similarly, it should be possible for multiple programs to use
-a common security protocol without embedding it in each program.
-<br>&#32;<br>
-Some systems use dynamically linked libraries (DLLs) to address these configuration issues.
-The problem with this approach is that it leaves
-security code in the same address space as the program using it.
-The interactions between the program and the DLL
-can therefore accidentally or deliberately violate the interface,
-weakening security.
-Also, a program using a library to implement secure services
-must run at a privilege level necessary to provide the service;
-separating the security to a different program makes it possible
-to run the services at a weaker privilege level, isolating the
-privileged code to a single, more trustworthy component.
-<br>&#32;<br>
-Following the lead of the SSH agent
-[Ylon96],
-we give each user
-an agent process responsible
-for holding and using the user's keys.
-The agent program is called
-<TT>factotum</TT>
-because of its similarity to the proverbial servant with the
-power to act on behalf of his master because he holds the
-keys to all the master's possessions.  It is essential that
-<TT>factotum</TT>
-keep the keys secret and use them only in the owner's interest.
-Later we'll discuss some changes to the kernel to reduce the possibility of
-<TT>factotum</TT>
-leaking information inadvertently.
-<br>&#32;<br>
-<TT>Factotum</TT>
-is implemented, like most Plan 9 services, as a file server.
-It is conventionally mounted upon the directory
-<TT>/mnt/factotum</TT>,
-and the files it serves there are analogous to virtual devices that provide access to,
-and control of, the services of the
-<TT>factotum</TT>.
-The next few sections describe the design of
-<TT>factotum</TT>
-and how it operates with the other pieces of Plan 9 to provide
-security services.
-<H4>2.1 Logging in
-</H4>
-<br>&#32;<br>
-To make the discussions that follow more concrete,
-we begin with a couple of examples showing how the
-Plan 9 security architecture appears to the user.
-These examples both involve a user
-<TT>gre</TT>
-logging in after booting a local machine.
-The user may or may not have a secure store in which
-all his keys are kept.
-If he does,
-<TT>factotum</TT>
-will prompt him for the password to the secure store
-and obtain keys from it, prompting only when a key
-isn't found in the store.
-Otherwise,
-<TT>factotum</TT>
-must prompt for each key.
-<br>&#32;<br>
-In the typescripts, \n
-represents a literal newline
-character typed to force a default response.
-User input is in italics, and
-long lines are folded and indented to fit.
-<br>&#32;<br>
-This first example shows a user logging in without
-help from the secure store.
-First,
-<TT>factotum</TT>
-prompts for a user name that the local kernel
-will use:
-<DL><DT><DD><TT><PRE>
-user[none]: gre
-</PRE></TT></DL>
-(Default responses appear in square brackets.)
-The kernel then starts accessing local resources
-and requests, through
-<TT>factotum</TT>,
-a user/password pair to do so:
-<DL><DT><DD><TT><PRE>
-!Adding key: dom=cs.bell-labs.com
-    proto=p9sk1
-user[gre]: \n
-password: ****
-</PRE></TT></DL>
-Now the user is logged in to the local system, and
-the mail client starts up:
-<DL><DT><DD><TT><PRE>
-!Adding key: proto=apop
-    server=plan9.bell-labs.com
-user[gre]: \n
-password: ****
-</PRE></TT></DL>
-<TT>Factotum</TT>
-is doing all the prompting and the applications
-being started are not even touching the keys.
-Note that it's always clear which key is being requested.
-<br>&#32;<br>
-Now consider the same login sequence, but in the case where
-<TT>gre</TT>
-has a secure store account:
-<DL><DT><DD><TT><PRE>
-user[none]: gre
-secstore password: *********
-STA PIN+SecurID: *********
-</PRE></TT></DL>
-That's the last
-<TT>gre</TT>
-will hear from
-<TT>factotum</TT>
-unless an attempt is made to contact
-a system for which no key is kept in the secure store.
-<H4>2.2 The factotum
-</H4>
-<br>&#32;<br>
-Each computer running Plan 9 has one user id that owns all the
-resources on that system &#173; the scheduler, local disks,
-network interfaces, etc.
-That user, the
-<I>host owner</I>,
-is the closest analogue in Plan 9 to a Unix
-<TT>root</TT>
-account (although it is far weaker;
-rather than having special powers, as its name implies the host owner
-is just a regular user that happens to own the
-resources of the local machine).
-On a single-user system, which we call a terminal,
-the host owner is the id of the terminal's user.
-Shared servers such as CPU servers normally have a pseudo-user
-that initially owns all resources.
-At boot time, the Plan 9 kernel starts a
-<TT>factotum</TT>
-executing as, and therefore with the privileges of,
-the host owner.
-<br>&#32;<br>
-New processes run as
-the same user as the process which created them.
-When a process must take on the identity of a new user,
-such as to provide a login shell
-on a shared CPU server,
-it does so by proving to the host owner's
-<TT>factotum</TT>
-that it is
-authorized to do so.
-This is done by running an
-authentication protocol with
-<TT>factotum</TT>
-to
-prove that the process has access to secret information
-which only the new user should possess.
-For example, consider the setup in Figure 1a.
-If a user on the terminal
-wants to log in to the CPU server using the
-Plan 9
-<TT>cpu</TT>
-service
-[Pike93],
-then
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
-might be the
-<TT>cpu</TT>
-client program and
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-the
-<TT>cpu</TT>
-server.
-n(11Neither 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99 nor 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
-knows the details of the authentication.
-They
-do need to be able to shuttle messages back and
-forth between the two
-<TT>factotums</TT>,
-but this is
-a generic function easily performed without
-knowing, or being able to extract, secrets in
-the messages.
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
-n(11will make a network connection to 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99.
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
-and
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-will then relay messages between
-the
-<TT>factotum</TT>
-n(11owned by the user, 11&lt;I&gt;F&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99,
-n(11and the one owned by the CPU server, 11&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99,
-until mutual authentication has been established.
-Later
-sections describe the RPC between
-<TT>factotum</TT>
-and
-applications and the library functions to support proxy operations.
-<br>&#32;<br>
-The kernel always uses a single local instance of
-<TT>factotum</TT>,
-running as the
-host owner, for
-its authentication purposes, but
-a regular user may start other
-<TT>factotum</TT>
-agents.
-In fact, the
-<TT>factotum</TT>
-representing the user need not be
-running on the same machine as its client.
-For instance, it is easy for a user on a CPU server,
-through standard Plan 9 operations,
-to replace the
-<TT>/mnt/factotum</TT>
-in the user's private file name space on the server
-with a connection to the
-<TT>factotum</TT>
-running on the terminal.
-(The usual file system permissions prevent interlopers
-from doing so maliciously.)
-This permits secure operations on the CPU server to be
-transparently validated by the user's own
-<TT>factotum</TT>,
-so
-secrets need never leave the user's terminal.
-The SSH agent
-[Ylon96]
-does much the
-same with special SSH protocol messages, but
-an advantage to making our agent a file system
-is that we need no new mechanism to access our remote
-agent; remote file access is sufficient.
-<br>&#32;<br>
-Within
-<TT>factotum</TT>,
-each protocol is implemented as a state
-machine with a generic interface, so protocols are in
-essence pluggable modules, easy to add, modify, or drop.
-Writing a message to and reading a message from
-<TT>factotum</TT>
-each require a separate RPC and result in
-a single state transition.
-Therefore
-<TT>factotum</TT>
-always runs to completion on every RPC and never blocks
-waiting for input during any authentication.
-Moreover, the number of simultaneous
-authentications is limited only by the amount of memory we're
-willing to dedicate to representing the state machines.
-<br>&#32;<br>
-Authentication protocols are implemented only
-within
-<TT>factotum</TT>,
-but adding and removing
-protocols does require relinking the binary, so
-<TT>factotum</TT>
-processes (but no others)
-need to be restarted in order to take advantage of
-new or repaired protocols.
-<br>&#32;<br>
-At the time of writing, 
-<TT>factotum</TT>
-contains authentication
-modules for the Plan 9 shared key protocol (p9sk1),
-SSH's RSA authentication, passwords in the clear, APOP, CRAM, PPP's CHAP,
-Microsoft PPP's MSCHAP, and VNC's challenge/response.
-<H4>2.3 Local capabilities
-</H4>
-<br>&#32;<br>
-A capability system, managed by the kernel, is used to empower
-<TT>factotum</TT>
-to grant permission to another process to change its user id.
-A
-kernel device driver
-implements two files,
-<TT>/dev/caphash</TT>
-and
-<TT>/dev/capuse</TT>.
-The write-only file
-<TT>/dev/caphash</TT>
-can be opened only by the host owner, and only once.
-<TT>Factotum</TT>
-opens this file immediately after booting.
-<br>&#32;<br>
-To use the files,
-<TT>factotum</TT>
-creates a string of the form
-<I>userid1</I><TT>@</TT><I>userid2</I><TT>@</TT><I>random-string</I><TT>,
-uses SHA1 HMAC to hash
-</TT><I>userid1</I><TT>@</TT><I>userid2</I><TT>
-with key
-</TT><I>random-string</I><TT>,
-and writes that hash to
-</TT><TT>/dev/caphash</TT><TT>.
-</TT><TT>Factotum</TT><TT>
-then passes the original string to another
-process on the same machine, running
-as user
-</TT><I>userid1</I><TT>,
-which
-writes the string to
-</TT><TT>/dev/capuse</TT><TT>.
-The kernel hashes the string and looks for
-a matching hash in its list.
-If it finds one,
-the writing process's user id changes from
-</TT><I>userid1</I><TT>
-to
-</TT><I>userid2</I><TT>.
-Once used, or if a timeout expires,
-the capability is discarded by the kernel.
-</TT><br>&#32;<br>
-The capabilities are local to the machine on which they are created.
-Hence a
-<TT>factotum</TT>
-running on one machine cannot pass capabilities
-to processes on another and expect them to work.
-<H4>2.4 Keys
-</H4>
-<br>&#32;<br>
-We define the word
-<I>key</I>
-to mean not only a secret, but also a description of the
-context in which that secret is to be used: the protocol,
-server, user, etc. to which it applies.
-That is,
-a key is a combination of secret and descriptive information
-used to authenticate the identities of parties
-transmitting or receiving information.
-The set of keys used
-in any authentication depends both on the protocol and on
-parameters passed by the program requesting the authentication.
-<br>&#32;<br>
-Taking a tip from SDSI
-[RiLa],
-which represents security information as textual S-expressions,
-keys in Plan 9 are represented as plain UTF-8 text.
-Text is easily
-understood and manipulated by users.
-By contrast,
-a binary or other cryptic format
-can actually reduce overall security.
-Binary formats are difficult for users to examine and can only be
-cracked by special tools, themselves poorly understood by most users.
-For example, very few people know or understand what's inside
-their X.509 certificates.
-Most don't even know where in the system to
-find them.
-Therefore, they have no idea what they are trusting, and why, and
-are powerless to change their trust relationships.
-Textual, centrally stored and managed keys are easier to use and safer.
-<br>&#32;<br>
-Plan 9 has historically represented databases as attribute/value pairs,
-since they are a good foundation for selection and projection operations.
-<TT>Factotum</TT>
-therefore represents
-the keys in the format
-<I>attribute</I><TT>=</TT><I>value</I><TT>,
-where
-</TT><I>attribute</I><TT>
-is an identifier, possibly with a single-character prefix, and
-</TT><I>value</I><TT>
-is an arbitrary quoted string.
-The pairs themselves are separated by white space.
-For example, a Plan 9 key and an APOP key
-might be represented like this:
-<DL><DT><DD><TT><PRE>
-dom=bell-labs.com proto=p9sk1 user=gre
-	!password='don''t tell'
-proto=apop server=x.y.com user=gre
-	!password='open sesame'
-</PRE></TT></DL>
-If a value is empty or contains white space or single quotes, it must be quoted;
-quotes are represented by doubled single quotes.
-Attributes that begin with an exclamation mark
-(</TT><TT>!</TT><TT>)
-are considered
-</TT><I>secret</I><TT>.
-</TT><TT>Factotum</TT><TT>
-will never let a secret value escape its address space
-and will suppress keyboard echo when asking the user to type one.
-</TT><br>&#32;<br>
-A program requesting authentication selects a key
-by providing a
-<I>query</I>,
-a list of elements to be matched by the key.
-Each element in the list is either an
-<I>attribute</I><TT>=</TT><I>value</I><TT>
-pair, which is satisfied by keys with
-exactly that pair;
-or an attribute followed by a question mark,
-</TT><I>attribute</I><TT>?</TT><I>,
-which is satisfied by keys with some pair specifying
-the attribute.
-A key matches a query if every element in the list
-is satisfied.
-For instance, to select the APOP key in the previous example,
-an APOP client process might specify the query
-<DL><DT><DD><TT><PRE>
-server=x.y.com proto=apop
-</PRE></TT></DL>
-Internally,
-</I><TT>factotum</TT><I>'s
-APOP module would add the requirements of
-having
-</I><TT>user</TT><I>
-and
-</I><TT>!password</TT><I>
-attributes, forming the query
-<DL><DT><DD><TT><PRE>
-server=x.y.com proto=apop user? !password?
-</PRE></TT></DL>
-when searching for an appropriate key.
-</I><br>&#32;<br>
-<TT>Factotum</TT>
-modules expect keys to have some well-known attributes.
-For instance, the
-<TT>proto</TT>
-attribute specifies the protocol module
-responsible for using a particular key,
-and protocol modules may expect other well-known attributes
-(many expect keys to have
-<TT>!password</TT>
-attributes, for example).
-Additional attributes can be used as comments or for
-further discrimination without intervention by 
-<TT>factotum</TT>;
-for example, the APOP and IMAP mail clients conventionally
-include a
-<TT>server</TT>
-attribute to select an appropriate key for authentication.
-<br>&#32;<br>
-Unlike in SDSI,
-keys in Plan 9 have no nested structure.  This design
-keeps the representation simple and straightforward.
-If necessary, we could add a nested attribute
-or, in the manner of relational databases, an attribute that
-selects another tuple, but so far the simple design has been sufficient.
-<br>&#32;<br>
-A simple common structure for all keys makes them easy for users
-to administer,
-but the set of attributes and their interpretation is still
-protocol-specific and can be subtle.
-Users may still
-need to consult a manual to understand all details.
-Many attributes
-(<TT>proto</TT>,
-<TT>user</TT>,
-<TT>password</TT>,
-<TT>server</TT>)
-are self-explanatory and our short experience
-has not uncovered any particular difficulty in handling keys.
-Things
-will likely get messier, however,
-when we grapple with public
-keys and their myriad components.
-<H4>2.5 Protecting keys
-</H4>
-<br>&#32;<br>
-Secrets must be prevented from escaping
-<TT>factotum</TT>.
-There are a number of ways they could leak:
-another process might be able to debug the agent process, the
-agent might swap out to disk, or the process might willingly
-disclose the key.
-The last is the easiest to avoid:
-secret information in a key is marked
-as such, and
-whenever
-<TT>factotum</TT>
-prints keys or queries for new
-ones, it is careful to avoid displaying secret information.
-(The only exception to this is the
-``plaintext password'' protocol, which consists
-of sending the values of the
-<TT>user</TT>
-and
-<TT>!password</TT>
-attributes.
-Only keys tagged with
-<TT>proto=pass</TT>
-can have their passwords disclosed by this mechanism.)
-<br>&#32;<br>
-Preventing the first two forms of leakage
-requires help from the kernel.
-In Plan 9, every process is
-represented by a directory in the
-<TT>/proc</TT>
-file system.
-Using the files in this directory,
-other processes could (with appropriate access permission) examine
-<TT>factotum</TT>'s
-memory and registers.
-<TT>Factotum</TT>
-is protected from processes of other users
-by the default access bits of its
-<TT>/proc</TT>
-directory.
-However, we'd also like to protect the
-agent from other processes owned by the same user,
-both to avoid honest mistakes and to prevent
-an unattended terminal being
-exploited to discover secret passwords.
-To do this, we added a control message to
-<TT>/proc</TT>
-called
-<TT>private</TT>.
-Once the
-<TT>factotum</TT>
-process has written
-<TT>private</TT>
-to its
-<TT>/proc/</TT><I>pid</I><TT>/ctl</TT><I>
-file, no process can access
-</I><TT>factotum</TT><I>'s
-memory
-through
-</I><TT>/proc</TT><I>.
-(Plan 9 has no other mechanism, such as
-</I><TT>/dev/kmem</TT><I>,
-for accessing a process's memory.)
-</I><br>&#32;<br>
-Similarly, the agent's address space should not be
-swapped out, to prevent discovering unencrypted
-keys on the swapping media.
-The
-<TT>noswap</TT>
-control message in
-<TT>/proc</TT>
-prevents this scenario.
-Neither
-<TT>private</TT>
-nor
-<TT>noswap</TT>
-is specific to
-<TT>factotum</TT>.
-User-level file servers such as
-<TT>dossrv</TT>,
-which interprets FAT file systems,
-could use
-<TT>noswap</TT>
-to keep their buffer caches from being
-swapped to disk.
-<br>&#32;<br>
-Despite our precautions, attackers might still
-find a way to gain access to a process running as the host
-owner on a machine.
-Although they could not directly
-access the keys, attackers could use the local
-<TT>factotum</TT>
-to perform authentications for them.
-In the case
-of some keys, for example those locking bank
-accounts, we want a way to disable or at least
-detect such access.
-That is the role of the
-<TT>confirm</TT>
-attribute in a key.
-Whenever a key with a
-<TT>confirm</TT>
-attribute is accessed, the local user must
-confirm use of the key via a local GUI.
-The next section describes the actual mechanism.
-<br>&#32;<br>
-We have not addressed leaks possible as a result of
-someone rebooting or resetting a machine running
-<TT>factotum</TT>.
-For example, someone could reset a machine
-and reboot it with a debugger instead of a kernel,
-allowing them to examine the contents of memory
-and find keys.  We have not found a satisfactory
-solution to this problem.
-<H4>2.6 Factotum transactions
-</H4>
-<br>&#32;<br>
-External programs manage
-<TT>factotum</TT>'s
-internal key state
-through its file interface,
-writing textual
-<TT>key</TT>
-and
-<TT>delkey</TT>
-commands to the
-<TT>/mnt/factotum/ctl</TT>
-file.
-Both commands take a list of attributes as an argument.
-<TT>Key</TT>
-creates a key with the given attributes, replacing any
-extant key with an identical set of public attributes.
-<TT>Delkey</TT>
-deletes all keys that match the given set of attributes.
-Reading the 
-<TT>ctl</TT>
-file returns a list of keys, one per line, displaying only public attributes.
-The following example illustrates these interactions.
-<DL><DT><DD><TT><PRE>
-% cd /mnt/factotum
-% ls -l
--lrw------- gre gre 0 Jan 30 22:17 confirm
---rw------- gre gre 0 Jan 30 22:17 ctl
--lr-------- gre gre 0 Jan 30 22:17 log
--lrw------- gre gre 0 Jan 30 22:17 needkey
---r--r--r-- gre gre 0 Jan 30 22:17 proto
---rw-rw-rw- gre gre 0 Jan 30 22:17 rpc
-% cat &gt;ctl
-key dom=bell-labs.com proto=p9sk1 user=gre
-    !password='don''t tell'
-key proto=apop server=x.y.com user=gre
-    !password='bite me'
-^D
-% cat ctl
-key dom=bell-labs.com proto=p9sk1 user=gre
-key proto=apop server=x.y.com user=gre
-% echo 'delkey proto=apop' &gt;ctl
-% cat ctl
-key dom=bell-labs.com proto=p9sk1 user=gre
-% 
-</PRE></TT></DL>
-(A file with the
-<TT>l</TT>
-bit set can be opened by only one process at a time.)
-<br>&#32;<br>
-The heart of the interface is the
-<TT>rpc</TT>
-file.
-Programs authenticate with
-<TT>factotum</TT>
-by writing a request to the
-<TT>rpc</TT>
-file
-and reading back the reply; this sequence is called an RPC
-<I>transaction</I>.
-Requests and replies have the same format:
-a textual verb possibly followed by arguments,
-which may be textual or binary.
-The most common reply verb is
-<TT>ok</TT>,
-indicating success.
-An RPC session begins with a
-<TT>start</TT>
-transaction; the argument is a key query as described
-earlier.
-Once started, an RPC conversation usually consists of 
-a sequence of
-<TT>read</TT>
-and
-<TT>write</TT>
-transactions.
-If the conversation is successful, an
-<TT>authinfo</TT>
-transaction will return information about
-the identities learned during the transaction.
-The
-<TT>attr</TT>
-transaction returns a list of attributes for the current
-conversation; the list includes any attributes given in
-the 
-<TT>start</TT>
-query as well as any public attributes from keys being used.
-<br>&#32;<br>
-As an example of the
-<TT>rpc</TT>
-file in action, consider a mail client
-connecting to a mail server and authenticating using
-the POP3 protocol's APOP challenge-response command.
-n(11There are four programs involved: the mail client 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99, the client
-<TT>factotum</TT>
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99, the mail server 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99, and the server
-<TT>factotum</TT>
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
-All authentication computations are handled by the
-<TT>factotum</TT>
-processes.
-The mail programs' role is just to relay messages.
-<br>&#32;<br>
-At startup, the mail server at
-<TT>x.y.com</TT>
-begins an APOP conversation
-with its
-<TT>factotum</TT>
-to obtain the banner greeting, which
-includes a challenge:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: start proto=apop role=server
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: read
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
-</PRE></TT></DL>
-Having obtained the challenge, the server greets the client:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
-</PRE></TT></DL>
-The client then uses an APOP conversation with its
-<TT>factotum</TT>
-to obtain a response:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: start proto=apop role=client
-            server=x.y.com
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: write +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: read
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok APOP gre &lt;I&gt;response&lt;/I&gt;
-</PRE></TT></DL>
-<TT>Factotum</TT>
-requires that
-<TT>start</TT>
-requests include a 
-<TT>proto</TT>
-attribute, and the APOP module requires an additional
-<TT>role</TT>
-attribute, but the other attributes are optional and only
-restrict the key space.
-Before responding to the
-<TT>start</TT>
-transaction, the client
-<TT>factotum</TT>
-looks for a key to
-use for the rest of the conversation.
-Because of the arguments in the
-<TT>start</TT>
-request, the key must have public attributes
-<TT>proto=apop</TT>
-and
-<TT>server=x.y.com</TT>;
-as mentioned earlier,
-the APOP module additionally requires that the key have
-<TT>user</TT>
-and
-<TT>!password</TT>
-attributes.
-Now that the client has obtained a response
-from its
-<TT>factotum</TT>,
-it echoes that response to the server:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: APOP gre &lt;I&gt;response&lt;/I&gt;
-</PRE></TT></DL>
-Similarly, the server passes this message to
-its
-<TT>factotum</TT>
-and obtains another to send back.
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: write APOP gre &lt;I&gt;response&lt;/I&gt;
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: read
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok +OK welcome
-
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: +OK welcome
-</PRE></TT></DL>
-Now the authentication protocol is done, and
-the server can retrieve information
-about what the protocol established.
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: authinfo
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok client=gre
-            capability=<I>capability</I>
-</PRE></TT></DL>
-The
-<TT>authinfo</TT>
-data is a list of
-<I>attr</I><TT>=</TT><I>value</I><TT>
-pairs, here a client user name and a capability.
-(Protocols that establish shared secrets or provide
-mutual authentication indicate this by adding
-appropriate
-</TT><I>attr</I><TT>=</TT><I>value</I><TT>
-pairs.)
-The capability can be used by the server to change its
-identity to that of the client, as described earlier.
-Once it has changed its identity, the server can access and serve
-the client's mailbox.
-</TT><br>&#32;<br>
-Two more files provide hooks for a graphical
-<TT>factotum</TT>
-control interface.
-The first, 
-<TT>confirm</TT>,
-allows the user detailed control over the use of certain keys.
-If a key has a
-<TT>confirm=</TT>
-attribute, then the user must approve each use of the key.
-A separate program with a graphical interface reads from the
-<TT>confirm</TT>
-file to see when a confirmation is necessary.
-The read blocks until a key usage needs to be approved, whereupon
-it will return a line of the form
-<DL><DT><DD><TT><PRE>
-confirm tag=1 <I>attributes</I>
-</PRE></TT></DL>
-requesting permission to use the key with those public attributes.
-The graphical interface then prompts the user for approval
-and writes back
-<DL><DT><DD><TT><PRE>
-tag=1 answer=yes
-</PRE></TT></DL>
-(or
-<TT>answer=no</TT>).
-<br>&#32;<br>
-The second file,
-<TT>needkey</TT>,
-diverts key requests.
-In the APOP example, if a suitable key had not been found
-during the
-<TT>start</TT>
-transaction,
-<TT>factotum</TT>
-would have indicated failure by
-returning a response indicating
-what key was needed:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: needkey proto=apop
-    server=x.y.com user? !password?
-</PRE></TT></DL>
-A typical client would then prompt the user for the desired
-key information, create a new key via the
-<TT>ctl</TT>
-file, and then reissue the 
-<TT>start</TT>
-request.
-If the
-<TT>needkey</TT>
-file is open,
-then instead of failing, the transaction
-will block, and the next read from the
-<TT>/mnt/factotum/needkey</TT>
-file will return a line of the form
-<DL><DT><DD><TT><PRE>
-needkey tag=1 <I>attributes</I><I>
-</PRE></TT></DL>
-The graphical interface then prompts the user for the needed
-key information, creates the key via the
-</I><TT>ctl</TT><I>
-file, and writes back
-</I><TT>tag=1</TT><I>
-to resume the transaction.
-</I><br>&#32;<br>
-The remaining files are informational and used for debugging.
-The
-<TT>proto</TT>
-file contains a list of supported protocols (to see what protocols the
-system supports,
-<TT>cat</TT>
-<TT>/mnt/factotum/proto</TT>),
-and the
-<TT>log</TT>
-file contains a log of operations and debugging output
-enabled by a
-<TT>debug</TT>
-control message.
-<br>&#32;<br>
-The next few sections explain how
-<TT>factotum</TT>
-is used by system services.
-<H4>3 Authentication in 9P
-</H4>
-<br>&#32;<br>
-Plan 9 uses a remote file access protocol, 9P
-[Pike93],
-to connect to resources such as the
-file server and remote processes.
-The original design for 9P included special messages at the start of a conversation
-to authenticate the user.
-Multiple users can share a single connection, such as when a CPU server
-runs processes for many users connected to a single file server,
-but each must authenticate separately.
-The authentication protocol, similar to that of Kerberos
-[Stei88],
-used a sequence of messages passed between client, file server, and authentication
-server to verify the identities of the user, calling machine, and serving machine.
-One major drawback to the design was that the authentication method was defined by 9P
-itself and could not be changed.  
-Moreover, there was no mechanism to relegate
-authentication to an external (trusted) agent,
-so a process implementing 9P needed, besides support for file service,
-a substantial body of cryptographic code to implement a handful of startup messages
-in the protocol.
-<br>&#32;<br>
-A recent redesign of 9P
-addressed a number of file service issues outside the scope of this paper.
-On issues of authentication, there were two goals:
-first, to remove details about authentication from the
-protocol itself; second, to allow an external program to execute the authentication
-part of the protocol.
-In particular, we wanted a way to quickly incorporate
-ideas found in other systems such as SFS
-[Mazi99].
-<br>&#32;<br>
-Since 9P is a file service protocol, the solution involved creating a new type of file
-to be served: an
-<I>authentication</I>
-<I>file</I>.
-Connections to a 9P service begin in a state that
-allows no general file access but permits the client
-to open an authentication file
-by sending a special message, generated by the new
-<TT>fauth</TT>
-system call:
-<DL><DT><DD><TT><PRE>
-afd = fauth(int fd, char *servicename);
-</PRE></TT></DL>
-Here
-<TT>fd</TT>
-is the user's file descriptor for the established network connection to the 9P server
-and
-<TT>servicename</TT>
-is the name of the desired service offered on that server, typically the file subsystem
-to be accessed.
-The returned file descriptor,
-<TT>afd</TT>,
-is a unique handle representing the authentication file
-created for this connection to authenticate to
-this service; it is analogous to a capability.
-The authentication file represented by
-<TT>afd</TT>
-is not otherwise addressable on the server, such as through
-the file name hierarchy.
-In all other respects, it behaves like a regular file;
-most important, it accepts standard read and write operations.
-<br>&#32;<br>
-To prove its identity, the user process (via
-<TT>factotum</TT>)
-executes the authentication protocol,
-described in the next section of this paper,
-over the
-<TT>afd</TT>
-file descriptor with ordinary reads and writes.
-When client and server have successfully negotiated, the authentication file
-changes state so it can be used as evidence of authority in
-<TT>mount</TT>.
-<br>&#32;<br>
-Once identity is established, the process presents the (now verified)
-<TT>afd</TT>
-as proof of identity to the
-<TT>mount</TT>
-system call:
-<DL><DT><DD><TT><PRE>
-mount(int fd, int afd, char *mountpoint,
-      int flag, char *servicename)
-</PRE></TT></DL>
-If the
-<TT>mount</TT>
-succeeds, the user now
-has appropriate permissions for the file hierarchy made
-visible at the mount point.
-<br>&#32;<br>
-This sequence of events has several advantages.
-First, the actual authentication protocol is implemented using regular reads and writes,
-not special 9P messages, so
-they can be processed, forwarded, proxied, and so on by
-any 9P agent without special arrangement.
-Second, the business of negotiating the authentication by reading and writing the
-authentication file can be delegated to an outside agent, in particular
-<TT>factotum</TT>;
-the programs that implement the client and server ends of a 9P conversation need
-no authentication or cryptographic code.
-Third,
-since the authentication protocol is not defined by 9P itself, it is easy to change and
-can even be negotiated dynamically.
-Finally, since
-<TT>afd</TT>
-acts like a capability, it can be treated like one:
-handed to another process to give it special permissions;
-kept around for later use when authentication is again required;
-or closed to make sure no other process can use it.
-<br>&#32;<br>
-All these advantages stem from moving the authentication negotiation into
-reads and writes on a separate file.
-As is often the case in Plan 9,
-making a resource (here authentication) accessible with a file-like interface
-reduces
-<I>a</I>
-<I>priori</I>
-the need for special interfaces.
-<br>&#32;<br>
-<H4>3.1 Plan 9 shared key protocol
-</H4>
-<br>&#32;<br>
-In addition to the various standard protocols supported by
-<TT>factotum</TT>,
-we use a shared key protocol for native
-Plan 9 authentication.
-This protocol provides backward compatibility with
-older versions of the system.  One reason for the new
-architecture is to let us replace such protocols
-in the near future with more cryptographically secure ones.
-<br>&#32;<br>
-<I>P9sk1</I>
-is a shared key protocol that uses tickets much like those
-in the original Kerberos.
-The difference is that we've
-replaced the expiration time in Kerberos tickets with
-a random nonce parameter and a counter.
-We summarize it here:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
-
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,n(99
-n(11         11&lt;I&gt;factotum&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
-
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
-</PRE></TT></DL>
-n(11(Here 11&lt;I&gt;K&lt;/I&gt;{&lt;I&gt;x&lt;/I&gt;}n(99 indicates 11&lt;I&gt;x&lt;/I&gt;n(99 encrypted with
-n(11DES key 11&lt;I&gt;K&lt;/I&gt;n(99.)
-The first two messages exchange nonces and server identification.
-After this initial exchange, the client contacts the authentication
-server to obtain a pair of encrypted tickets, one encrypted with
-the client key and one with the server key.
-The client relays the server ticket to the server.
-The server believes that the ticket is new
-because it contains
-n(1111&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
-and that the ticket is from the authentication
-n(11server because it is encrypted in the server key 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
-The ticket is basically a statement from the authentication
-n(11server that now 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99 and 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99 share a
-n(11secret 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99.
-n(11The authenticator 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
-n(11convinces the server that the client knows 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99 and thus
-n(11must be 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99.
-n(11Similarly, authenticator 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
-n(11convinces the client that the server knows 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99 and thus
-n(11must be 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
-Tickets can be reused, without contacting the authentication
-server again, by incrementing the counter before each
-authenticator is generated.
-<br>&#32;<br>
-In the future we hope to introduce a public key version of
-p9sk1,
-which would allow authentication even
-when the authentication server is not available.
-<H4>3.2 The authentication server
-</H4>
-<br>&#32;<br>
-Each Plan 9 security domain has an authentication server (AS)
-that all users trust to keep the complete set of shared keys.
-It also offers services for users and administrators to manage the
-keys, create and disable accounts, and so on.
-It typically runs on
-a standalone machine with few other services.
-The AS comprises two services,
-<TT>keyfs</TT>
-and
-<TT>authsrv</TT>.
-<br>&#32;<br>
-<TT>Keyfs</TT>
-is a user-level file system that manages an
-encrypted database of user accounts.
-Each account is represented by a directory containing the
-files
-<TT>key</TT>,
-containing the Plan 9 key for p9sk1;
-<TT>secret</TT>
-for the challenge/response protocols (APOP, VNC, CHAP, MSCHAP,
-CRAM);
-<TT>log</TT>
-for authentication outcomes;
-<TT>expire</TT>
-for an expiration time; and
-<TT>status</TT>.
-If the expiration time passes,
-if the number of successive failed authentications
-exceeds 50, or if
-<TT>disabled</TT>
-is written to the status file,
-any attempt to access the
-<TT>key</TT>
-or
-<TT>secret</TT>
-files will fail.
-<br>&#32;<br>
-<TT>Authsrv</TT>
-is a network service that brokers shared key authentications
-for the protocols p9sk1, APOP, VNC, CHAP, MSCHAP,
-and CRAM.  Remote users can also call
-<TT>authsrv</TT>
-to change their passwords.
-<br>&#32;<br>
-The
-p9sk1
-protocol was described in the previous
-section.
-The challenge/response protocols differ
-in detail but all follow the general structure:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,n(99
-n(11         11&lt;I&gt;hostid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11}n(99
-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11}n(99
-</PRE></TT></DL>
-The password protocol is:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;c&lt;/I&gt;11{&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;password&lt;/I&gt;&lt;I&gt;old&lt;/I&gt;11,&lt;I&gt;password&lt;/I&gt;&lt;I&gt;new&lt;/I&gt;11}n(99
-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;OK&lt;/I&gt;n(99
-</PRE></TT></DL>
-To avoid replay attacks, the pre-encryption
-clear text for each of the protocols (as well as for p9sk1) includes
-a tag indicating the encryption's role in the
-protocol.  We elided them in these outlines.
-<H4>3.3 Protocol negotiation
-</H4>
-<br>&#32;<br>
-Rather than require particular protocols for particular services,
-we implemented a negotiation metaprotocol,
-<I>p9any</I>,
-which chooses the actual authentication protocol to use.
-P9any
-is used now by all native services on Plan 9.
-<br>&#32;<br>
-The metaprotocol is simple.  The callee sends a
-null-terminated string of the form:
-<DL><DT><DD><TT><PRE>
-n(11v.11&lt;I&gt;n&lt;/I&gt;n(99 11&lt;I&gt;proto&lt;/I&gt;111n(99@11&lt;I&gt;domain&lt;/I&gt;111n(99 11&lt;I&gt;proto&lt;/I&gt;211n(99@11&lt;I&gt;domain&lt;/I&gt;211n(99 ...
-</PRE></TT></DL>
-where
-<I>n</I>
-n(11is a decimal version number, 11&lt;I&gt;proto&lt;/I&gt;&lt;I&gt;k&lt;/I&gt;11n(99
-is the name of a protocol for which the
-<TT>factotum</TT>
-n(11has a key, and 11&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;k&lt;/I&gt;11n(99
-is the name of the domain in which the key is
-valid.
-The caller then responds
-<DL><DT><DD><TT><PRE>
-<I>proto</I>@<I>domain</I>
-</PRE></TT></DL>
-indicating its choice.
-Finally the callee responds
-<DL><DT><DD><TT><PRE>
-OK
-</PRE></TT></DL>
-Any other string indicates failure.
-At this point the chosen protocol commences.
-The final fixed-length reply is used to make it easy to
-delimit the I/O stream should the chosen protocol
-require the caller rather than the callee to send the first message.
-<br>&#32;<br>
-With this negotiation metaprotocol, the underlying
-authentication protocols used for Plan 9 services
-can be changed under any application just
-by changing the keys known by the
-<TT>factotum</TT>
-agents at each end.
-<br>&#32;<br>
-P9any is vulnerable to man in the middle attacks
-to the extent that the attacker may constrain the
-possible choices by changing the stream.  However,
-we believe this is acceptable since the attacker
-cannot force either side to choose algorithms
-that it is unwilling to use.
-<H4>4 Library Interface to Factotum
-</H4>
-<br>&#32;<br>
-Although programs can access
-<TT>factotum</TT>'s
-services through its file system interface,
-it is more common to use a C library that
-packages the interaction.
-There are a number of routines in the library,
-not all of which are relevant here, but a few
-examples should give their flavor.
-<br>&#32;<br>
-First, consider the problem of mounting a remote file server using 9P.
-An earlier discussion showed how the
-<TT>fauth</TT>
-and
-<TT>mount</TT>
-system calls use an authentication file,
-<TT>afd</TT>,
-as a capability,
-but not how
-<TT>factotum</TT>
-manages
-<TT>afd</TT>.
-The library contains a routine,
-<TT>amount</TT>
-(authenticated mount), that is used by most programs in preference to
-the raw
-<TT>fauth</TT>
-and
-<TT>mount</TT>
-calls.
-<TT>Amount</TT>
-engages
-<TT>factotum</TT>
-to validate
-<TT>afd</TT>;
-here is the complete code:
-<DL><DT><DD><TT><PRE>
-int
-amount(int fd, char *mntpt,
-	int flags, char *aname)
-{
-	int afd, ret;
-	AuthInfo *ai;
-
-	afd = fauth(fd, aname);
-	if(afd &gt;= 0){
-		ai = auth_proxy(afd, amount_getkey,
-			"proto=p9any role=client");
-		if(ai != NULL)
-			auth_freeAI(ai);
-	}
-	ret = mount(fd, afd, mntpt,
-		flags, aname);
-	if(afd &gt;= 0)
-		close(afd);
-	return ret;
-}
-</PRE></TT></DL>
-where parameter
-<TT>fd</TT>
-is a file descriptor returned by
-<TT>open</TT>
-or
-<TT>dial</TT>
-for a new connection to a file server.
-The conversation with
-<TT>factotum</TT>
-occurs in the call to
-<TT>auth_proxy</TT>,
-which specifies, as a key query,
-which authentication protocol to use
-(here the metaprotocol
-<TT>p9any</TT>)
-and the role being played
-(<TT>client</TT>).
-<TT>Auth_proxy</TT>
-will read and write the
-<TT>factotum</TT>
-files, and the authentication file descriptor
-<TT>afd</TT>,
-to validate the user's right to access the service.
-If the call is successful, any auxiliary data, held in an
-<TT>AuthInfo</TT>
-structure, is freed.
-In any case, the
-<TT>mount</TT>
-is then called with the (perhaps validated)
-<TT>afd.</TT>
-A 9P server can cause the
-<TT>fauth</TT>
-system call to fail, as an indication that authentication is
-not required to access the service.
-<br>&#32;<br>
-The second argument to
-<TT>auth_proxy</TT>
-is a function, here
-<TT>amount_getkey</TT>,
-to be called if secret information such as a password or
-response to a challenge is required as part of the authentication.
-This function, of course, will provide this data to
-<TT>factotum</TT>
-as a
-<TT>key</TT>
-message on the
-<TT>/mnt/factotum/ctl</TT>
-file.
-<br>&#32;<br>
-Although the final argument to
-<TT>auth_proxy</TT>
-in this example is a simple string, in general
-it can be a formatted-print specifier in the manner of
-<TT>printf</TT>,
-to enable the construction of more elaborate key queries.
-<br>&#32;<br>
-As another example, consider the Plan 9
-<TT>cpu</TT>
-service, which exports local devices to a shell process on
-a remote machine, typically
-to connect the local screen and keyboard to a more powerful computer.
-At heart,
-<TT>cpu</TT>
-is a superset of a service called
-<TT>exportfs</TT>
-[Pike93],
-which allows one machine to see an arbitrary portion of the file name space
-of another machine, such as to
-export the network device to another machine
-for gatewaying.
-However,
-<TT>cpu</TT>
-is not just
-<TT>exportfs</TT>
-because it also delivers signals such as interrupt
-and negotiates the initial environment
-for the remote shell.
-<br>&#32;<br>
-To authenticate an instance of
-<TT>cpu</TT>
-requires
-<TT>factotum</TT>
-processes on both ends: the local, client
-end running as the user on a terminal
-and the remote, server
-end running as the host owner of the server machine.
-Here is schematic code for the two ends:
-<DL><DT><DD><TT><PRE>
-/* client */
-int
-p9auth(int fd)
-{
-	AuthInfo *ai;
-
-	ai = auth_proxy(fd, auth_getkey,
-		"proto=p9any role=client");
-	if(ai == NULL)
-		return -1;
-
-	/* start cpu protocol here */
-}
-
-/* server */
-int
-srvp9auth(int fd, char *user)
-{
-	AuthInfo *ai;
-
-	ai = auth_proxy(fd, NULL,
-		"proto=p9any role=server");
-	if(ai == NULL)
-		return -1;
-	/* set user id for server process */
-	if(auth_chuid(ai, NULL) &lt; 0)
-		return -1;
-
-	/* start cpu protocol here */
-}
-</PRE></TT></DL>
-<TT>Auth_chuid</TT>
-encapsulates the negotiation to change a user id using the
-<TT>caphash</TT>
-and
-<TT>capuse</TT>
-files of the (server) kernel.
-Note that although the client process may ask the user for new keys, using
-<TT>auth_getkey</TT>,
-the server machine, presumably a shared machine with a pseudo-user for
-the host owner, sets the key-getting function to
-<TT>NULL</TT>.
-<H4>5 Secure Store
-</H4>
-<br>&#32;<br>
-<TT>Factotum</TT>
-keeps its keys in volatile memory, which must somehow be
-initialized at boot time.
-Therefore,
-<TT>factotum</TT>
-must be
-supplemented by a persistent store, perhaps
-a floppy disk containing a key file of commands to be copied into
-<TT>/mnt/factotum/ctl</TT>
-during bootstrap.
-But removable media are a nuisance to carry and
-are vulnerable to theft.
-Keys could be stored encrypted on a shared file system, but
-only if those keys are not necessary for authenticating to
-the file system in the first place.
-Even if the keys are encrypted under a user
-password, a thief might well succeed with a dictionary attack.
-Other risks of local storage are loss of the contents
-through mechanical mishap or dead batteries.
-Thus for convenience and
-safety we provide a
-<TT>secstore</TT>
-(secure store) server in the network to hold each user's permanent list of keys, a
-<I>key</I>
-<I>file</I>.
-<br>&#32;<br>
-<TT>Secstore</TT>
-is a file server for encrypted data,
-used only during bootstrapping.
-It must provide strong
-authentication and resistance to passive and active protocol attacks
-while assuming nothing more from the client than a password.
-Once
-<TT>factotum</TT>
-has loaded the key file, further encrypted or authenticated
-file storage can be accomplished by standard mechanisms.
-<br><img src="-.19111514.gif"><br>
-<br>&#32;<br>
-The cryptographic technology that enables
-<TT>secstore</TT>
-is a form of encrypted
-key exchange
-called PAK
-[Boyk00],
-analogous to
-EKE
-[Bell93],
-SRP
-[Wu98],
-or
-SPEKE
-[Jabl].
-PAK was chosen
-because it comes with a proof of equivalence in strength to
-Diffie-Hellman; subtle flaws in some earlier encrypted key exchange
-protocols and implementations have encouraged us to take special care.
-In outline, the PAK protocol is:
-<DL><DT><DD><TT><PRE>
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;: &lt;I&gt;C&lt;/I&gt;,&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;x&lt;/I&gt;11&lt;I&gt;H&lt;/I&gt;n(99
-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;: &lt;I&gt;S&lt;/I&gt;,&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;y&lt;/I&gt;11,&lt;I&gt;hash&lt;/I&gt;(&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;xy&lt;/I&gt;11,&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;)n(99
-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;: &lt;I&gt;hash&lt;/I&gt;(&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;xy&lt;/I&gt;11,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;C&lt;/I&gt;)n(99
-</PRE></TT></DL>
-n(11where 11&lt;I&gt;H&lt;/I&gt;n(99 is a preshared secret between client 11&lt;I&gt;C&lt;/I&gt;n(99 and server 11&lt;I&gt;S&lt;/I&gt;n(99.
-There are several variants of PAK, all presented in papers
-mainly concerned with proofs of cryptographic properties.
-To aid implementers, we have distilled a description of the specific
-version we use into an Appendix to this paper.
-The Plan 9 open source license provides for use of Lucent's
-encrypted key exchange patents in this context.
-<br>&#32;<br>
-As a further layer of defense against password theft,
-n(11we provide (within the encrypted channel 11&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;n(99)
-information that is validated at a RADIUS server,
-such as the digits from a hardware token
-[RFC2138].
-This provides two-factor authentication, which potentially
-requires tricking two independent administrators in any attack by
-social engineering.
-<br>&#32;<br>
-The key file stored on the server is encrypted with AES (Rijndael) using CBC
-with a 10-byte initialization vector and trailing authentication padding.
-All this is invisible to the user of
-<TT>secstore</TT>.
-For that matter, it is invisible to the
-<TT>secstore</TT>
-server as well;
-if the AES Modes of Operation are standardized and a new encryption format
-designed, it can be implemented by a client without change to the server.
-The
-<TT>secstore</TT>
-is deliberately not backed up;  the user is expected to
-use more than one
-<TT>secstore</TT>
-or save the key file on removable media
-and lock it away.
-n(11The user's password is hashed to create the 11&lt;I&gt;H&lt;/I&gt;n(99 used
-in the PAK protocol;  a different hash of the password is used as
-the file encryption key.
-Finally, there is a command (inside the authenticated,
-encrypted channel between client and
-<TT>secstore</TT>)
-to change passwords by sending
-n(11a new 11&lt;I&gt;H&lt;/I&gt;n(99; 
-for consistency, the client process must at the same time fetch and re-encrypt all files.
-<br>&#32;<br>
-When
-<TT>factotum</TT>
-starts, it dials the local
-<TT>secstore</TT>
-and checks whether the user has an account.
-If so,
-it prompts for the user's
-<TT>secstore</TT>
-password and fetches the key file.
-The PAK protocol
-ensures mutual authentication and prevents dictionary attacks on the password
-by passive wiretappers or active intermediaries.
-Passwords saved in
-the key file can be long random strings suitable for
-simpler challenge/response authentication protocols.
-Thus the user need only remember
-a single, weaker password to enable strong, ``single sign on'' authentication to
-unchanged legacy applications scattered across multiple authentication domains.
-<H4>6 Transport Layer Security
-</H4>
-<br>&#32;<br>
-Since the Plan 9 operating system is designed for use in network elements
-that must withstand direct attack, unguarded by firewall or VPN, we seek
-to ensure that all applications use channels with appropriate mutual
-authentication and encryption.
-A principal tool for this is TLS 1.0
-[RFC2246].
-(TLS 1.0 is nearly the same as SSL 3.0,
-and our software is designed to interoperate
-with implementations of either standard.)
-<br>&#32;<br>
-TLS defines a record layer protocol for message integrity and privacy
-through the use of message digesting and encryption with shared secrets.
-We implement this service as a kernel device, though it could
-be performed at slightly higher cost by invoking a separate program.
-The library interface to the TLS kernel device is:
-<DL><DT><DD><TT><PRE>
-int pushtls(int fd, char *hashalg,
-    char *cryptalg, int isclient,
-    char *secret, char *dir);
-</PRE></TT></DL>
-Given a file descriptor, the names of message digest and
-encryption algorithms, and the shared secret,
-<TT>pushtls</TT>
-returns a new file descriptor for the encrypted connection.
-(The final argument
-<TT>dir</TT>
-receives the name of the directory in the TLS device that
-is associated with the new connection.)
-The function is named by analogy with the ``push'' operation
-supported by the stream I/O system of Research Unix and the
-first two editions of Plan 9.
-Because adding encryption is as simple as replacing one
-file descriptor with another, adding encryption to a particular
-network service is usually trivial.
-<br>&#32;<br>
-The Plan 9 shared key authentication protocols establish a shared 56-bit secret
-as a side effect.
-Native Plan 9 network services such as
-<TT>cpu</TT>
-and
-<TT>exportfs</TT>
-use these protocols for authentication and then invoke 
-<TT>pushtls</TT>
-with the shared secret.
-<br>&#32;<br>
-Above the record layer, TLS specifies a handshake protocol using public keys
-to establish the session secret.
-This protocol is widely used with HTTP and IMAP4
-to provide server authentication, though with client certificates it could provide
-mutual authentication.  The library function
-<DL><DT><DD><TT><PRE>
-int tlsClient(int fd, TLSconn *conn)
-</PRE></TT></DL>
-handles the initial handshake and returns the result of
-<TT>pushtls</TT>.
-On return, it fills the
-<TT>conn</TT>
-structure with the session ID used
-and the X.509 certificate presented by the
-server, but makes no effort to verify the certificate.
-Although the original design intent of X.509 certificates expected
-that they would be used with a Public Key Infrastructure,
-reliable deployment has been so long delayed and problematic
-that we have adopted the simpler policy of just using the
-X.509 certificate as a representation of the public key,
-depending on a locally-administered directory of SHA1 thumbprints
-to allow applications to decide which public keys to trust
-for which purposes.
-<H4>7 Related Work and Discussion
-</H4>
-<br>&#32;<br>
-Kerberos, one of the earliest distributed authentication
-systems, keeps a set of authentication tickets in a temporary file called
-a ticket cache.  The ticket cache is protected by Unix file permissions.
-An environment variable containing the file name of the ticket cache
-allows for different ticket caches in different simultaneous login sessions.
-A user logs in by typing his or her Kerberos password.
-The login program uses the Kerberos password to obtain a temporary
-ticket-granting ticket from the authentication server, initializes the
-ticket cache with the ticket-granting ticket, and then forgets the password.
-Other applications can use the ticket-granting ticket to sign tickets
-for themselves on behalf of the user during the login session.
-The ticket cache is removed when the user logs out
-[Stei88].
-The ticket cache relieves the user from typing a password
-every time authentication is needed.
-<br>&#32;<br>
-The secure shell SSH develops this idea further, replacing the
-temporary file with a named Unix domain socket connected to
-a user-level program, called an agent.
-Once the SSH agent is started and initialized with one or
-more RSA private keys, SSH clients can employ it
-to perform RSA authentications on their behalf.
-In the absence of an agent, SSH typically uses RSA keys
-read from encrypted disk files or uses passphrase-based
-authentication, both of which would require prompting the user
-for a passphrase whenever authentication is needed
-[Ylon96].
-The self-certifying file system SFS uses a similar agent
-[Kami00],
-not only for moderating the use of client authentication keys 
-but also for verifying server public keys
-[Mazi99].
-<br>&#32;<br>
-<TT>Factotum</TT>
-is a logical continuation of this evolution,
-replacing the program-specific SSH or SFS agents with
-a general agent capable of serving a wide variety of programs.
-Having one agent for all programs removes the need
-to have one agent for each program.
-It also allows the programs themselves to be protocol-agnostic,
-so that, for example, one could build an SSH workalike
-capable of using any protocol supported by
-<TT>factotum</TT>,
-without that program knowing anything about the protocols.
-Traditionally each program needs to implement each
-n(11authentication protocol for itself, an 11&lt;I&gt;O&lt;/I&gt;(&lt;I&gt;n&lt;/I&gt;^211)n(99 coding
-problem that
-<TT>factotum</TT>
-n(11reduces to 11&lt;I&gt;O&lt;/I&gt;(&lt;I&gt;n&lt;/I&gt;)n(99.
-<br>&#32;<br>
-Previous work on agents has concentrated on their use by clients
-authenticating to servers.
-Looking in the other direction, Sun Microsystem's 
-pluggable authentication module (PAM) is one
-of the earliest attempts to 
-provide a general authentication mechanism for Unix-like 
-operating systems
-[Sama96].
-Without a central authority like PAM, system policy is tied
-up in the various implementations of network services.
-For example, on a typical Unix, if a system administrator
-decides not to allow plaintext passwords for authentication,
-the configuration files for a half dozen different servers &#173;
-<TT>rlogind</TT>,
-<TT>telnetd</TT>,
-<TT>ftpd</TT>,
-<TT>sshd</TT>,
-and so on &#173;
-need to be edited.
-PAM solves this problem by hiding the details of a given
-authentication mechanism behind a common library interface.
-Directed by a system-wide configuration file,
-an application selects a particular authentication mechanism
-by dynamically loading the appropriate shared library.
-PAM is widely used on Sun's Solaris and some Linux distributions.
-<br>&#32;<br>
-<TT>Factotum</TT>
-achieves the same goals
-using the agent approach.
-<TT>Factotum</TT>
-is the only process that needs to create
-capabilities, so all the network servers can run as 
-untrusted users (e.g.,
-Plan 9's
-<TT>none</TT>
-or Unix's
-<TT>nobody</TT>),
-which greatly reduces the harm done if a server is buggy
-and is compromised.
-In fact, if
-<TT>factotum</TT>
-were implemented on Unix along with
-an analogue to the Plan 9 capability device, venerable
-programs like
-<TT>su</TT>
-and
-<TT>login</TT>
-would no longer need to be installed ``setuid root.''
-<br>&#32;<br>
-Several other systems, such as Password Safe [Schn],
-store multiple passwords in an encrypted file,
-so that the user only needs to remember one password.
-Our
-<TT>secstore</TT>
-solution differs from these by placing the storage in
-a hardened location in the network, so that the encrypted file is
-less liable to be stolen for offline dictionary attack and so that
-it is available even when a user has several computers.
-In contrast, Microsoft's Passport system
-[Micr]
-keeps credentials in
-the network, but centralized at one extremely-high-value target.
-The important feature of Passport, setting up trust relationships
-with e-merchants, is outside our scope.
-The
-<TT>secstore</TT>
-architecture is almost identical to
-Perlman and Kaufman's
-[Perl99]
-but with newer EKE technology.
-Like them, we chose to defend mainly against outside attacks
-on
-<TT>secstore</TT>;
-if additional defense of the files on the server
-itself is desired, one can use distributed techniques
-[Ford00].
-<br>&#32;<br>
-We made a conscious choice of placing encryption, message integrity,
-and key management at the application layer
-(TLS, just above layer 4) rather than at layer 3, as in IPsec.
-This leads to a simpler structure for the network stack, easier
-integration with applications and, most important, easier network
-administration since we can recognize which applications are misbehaving
-based on TCP port numbers.  TLS does suffer (relative to IPsec) from
-the possibility of forged TCP Reset, but we feel that this is adequately
-dealt with by randomized TCP sequence numbers.
-In contrast with other TLS libraries, Plan 9 does not
-require the application to change
-<TT>write</TT>
-calls to
-<TT>sslwrite</TT>
-but simply to add a few lines of code at startup
-[Resc01].
-<H4>8 Conclusion
-</H4>
-<br>&#32;<br>
-Writing safe code is difficult.
-Stack attacks,
-mistakes in logic, and bugs in compilers and operating systems
-can each make it possible for an attacker
-to subvert the intended execution sequence of a
-service.
-If the server process has the privileges
-of a powerful user, such as
-<TT>root</TT>
-on Unix, then so does the attacker.
-<TT>Factotum</TT>
-allows us
-to constrain the privileged execution to a single
-process whose core is a few thousand lines of code.
-Verifying such a process, both through manual and automatic means,
-is much easier and less error prone
-than requiring it of all servers.
-<br>&#32;<br>
-An implementation of these ideas is in Plan 9 from Bell Labs, Fourth Edition,
-freely available from <TT>http://plan9.bell-labs.com/plan9</TT>.
-<H4>Acknowledgments
-</H4>
-<br>&#32;<br>
-William Josephson contributed to the implementation of password changing in
-<TT>secstore</TT>.
-We thank Phil MacKenzie and Mart&iacute;n Abadi for helpful comments on early parts
-of the design.
-Chuck Blake,
-Peter Bosch,
-Frans Kaashoek,
-Sape Mullender,
-and
-Lakshman Y. N.,
-predominantly Dutchmen, gave helpful comments on the paper.
-Russ Cox is supported by a fellowship from the Fannie and John Hertz Foundation.
-<H4>References
-</H4>
-<br>&#32;<br>
-[Bell93]
-S.M. Bellovin and M. Merritt,
-``Augmented Encrypted Key Exchange,''
-Proceedings of the 1st ACM Conference on Computer and Communications Security, 1993, pp. 244 - 250.
-<br>&#32;<br>
-[Boyk00]
-Victor Boyko, Philip MacKenzie, and Sarvar Patel,
-``Provably Secure Password-Authenticated Key Exchange using Diffie-Hellman,''
-Eurocrypt 2000, 156-171.
-<br>&#32;<br>
-[RFC2246]
-T . Dierks and C. Allen,
-``The TLS Protocol, Version 1.0,''
-RFC 2246.
-<br>&#32;<br>
-[Ford00]
-Warwick Ford and Burton S. Kaliski, Jr.,
-``Server-Assisted Generation of a Strong Secret from a Password,''
-IEEE Fifth International Workshop on Enterprise Security,
-National Institute of Standards and Technology (NIST),
-Gaithersburg MD, June 14 - 16, 2000.
-<br>&#32;<br>
-[Jabl]
-David P. Jablon,
-``Strong Password-Only Authenticated Key Exchange,''
-<TT>http://integritysciences.com/speke97.html</TT>.
-<br>&#32;<br>
-[Kami00]
-Michael Kaminsky.
-``Flexible Key Management with SFS Agents,''
-Master's Thesis, MIT, May 2000.
-<br>&#32;<br>
-[Mack]
-Philip MacKenzie,
-private communication.
-<br>&#32;<br>
-[Mazi99]
-David Mazi&egrave;res, Michael Kaminsky, M. Frans Kaashoek and Emmett Witchel,
-``Separating key management from file system security,''
-Symposium on Operating Systems Principles, 1999, pp. 124-139.
-<br>&#32;<br>
-[Micr]
-Microsoft Passport,
-<TT>http://www.passport.com/</TT>.
-<br>&#32;<br>
-[Perl99]
-Radia Perlman and Charlie Kaufman,
-``Secure Password-Based Protocol for Downloading a Private Key,''
-Proc. 1999 Network and Distributed System Security Symposium,
-Internet Society, January 1999.
-<br>&#32;<br>
-[Pike95]
-Rob Pike, Dave Presotto, Sean Dorward, Bob Flandrena, Ken Thompson, Howard Trickey, and Phil Winterbottom,
-``Plan 9 from Bell Labs,''
-Computing Systems, <B>8</B>, 3, Summer 1995, pp. 221-254.
-<br>&#32;<br>
-[Pike93]
-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, Phil Winterbottom,
-``The Use of Name Spaces in Plan 9,''
-Operating Systems Review, <B>27</B>, 2, April 1993, pp. 72-76
-(reprinted from Proceedings of the 5th ACM SIGOPS European Workshop,
-Mont Saint-Michel, 1992, Paper n&#186; 34).
-<br>&#32;<br>
-[Resc01]
-Eric Rescorla,
-``SSL and TLS: Designing and Building Secure Systems,''
-Addison-Wesley, 2001. ISBN 0-201-61598-3, p. 387.
-<br>&#32;<br>
-[RFC2138]
-C. Rigney, A. Rubens, W. Simpson, S. Willens,
-``Remote Authentication Dial In User Service (RADIUS),''
-RFC2138, April 1997.
-<br>&#32;<br>
-[RiLa]
-Ronald L. Rivest and Butler Lampson,
-``SDSI&#173;A Simple Distributed Security Infrastructure,''
-<TT>http://theory.lcs.mit.edu/~rivest/sdsi10.ps</TT>.
-<br>&#32;<br>
-[Schn]
-Bruce Schneier, Password Safe,
-<TT>http://www.counterpane.com/passsafe.html</TT>.
-<br>&#32;<br>
-[Sama96]
-Vipin Samar,
-``Unified Login with Pluggable Authentication Modules (PAM),''
-Proceedings of the Third ACM Conference on Computer Communications and Security,
-March 1996, New Delhi, India.
-<br>&#32;<br>
-[Stei88]
-Jennifer G. Steiner, Clifford Neumann, and Jeffrey I. Schiller,
-``<I>Kerberos</I>: An Authentication Service for Open Network Systems,''
-Proceedings of USENIX Winter Conference, Dallas, Texas, February 1988, pp. 191-202.
-<br>&#32;<br>
-[Wu98]
-T. Wu,
-``The Secure Remote Password Protocol,''
-Proceedings of
-the 1998 Internet Society Network and Distributed System Security
-Symposium, San Diego, CA, March 1998, pp. 97-111.
-<br>&#32;<br>
-[Ylon96]
-Ylonen, T.,
-``SSH&#173;Secure Login Connections Over the Internet,''
-6th USENIX Security Symposium, pp. 37-42. San Jose, CA, July 1996.
-<H4>Appendix: Summary of the PAK protocol
-</H4>
-<br>&#32;<br>
-n(11Let 11&lt;I&gt;q&gt;&lt;/I&gt;2^16011n(99 and 11&lt;I&gt;p&gt;&lt;/I&gt;2^102411n(99 be primes
-n(11such that 11&lt;I&gt;p=rq+&lt;/I&gt;1n(99 with 11&lt;I&gt;r&lt;/I&gt;n(99 not a multiple of 11&lt;I&gt;q&lt;/I&gt;n(99.
-^&lt;I&gt;*&lt;/I&gt;h'-0w'&lt;I&gt;*&lt;/I&gt;'u+0w'&lt;I&gt;*&lt;/I&gt;'u'11
-n(11Take 11&lt;I&gt;h&lt;/I&gt;&lt;I&gt;&isin;&lt;/I&gt;&lt;I&gt;Z&lt;/I&gt;&lt;I&gt;p&lt;/I&gt;h'-0w'&lt;I&gt;p&lt;/I&gt;'u'n(99 such that 11&lt;I&gt;g&lt;/I&gt;==&lt;I&gt;h&lt;/I&gt;^&lt;I&gt;r&lt;/I&gt;11n(99 is not 1.
-These parameters may be chosen by the NIST algorithm for DSA,
-and are public, fixed values.
-n(11The client 11&lt;I&gt;C&lt;/I&gt;n(99 knows a secret 11&#960;n(99
-n(11and computes 11&lt;I&gt;H&lt;/I&gt;==(&lt;I&gt;H&lt;/I&gt;111(&lt;I&gt;C&lt;/I&gt;, &#960;)&lt;I&gt;&lt;/I&gt;)^&lt;I&gt;r&lt;/I&gt;11n(99 and 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99,
-^&lt;I&gt;*&lt;/I&gt;h'-0w'&lt;I&gt;*&lt;/I&gt;'u+0w'&lt;I&gt;*&lt;/I&gt;'u'11
-n(11where 11&lt;I&gt;H&lt;/I&gt;111n(99 is a hash function yielding a random element of 11&lt;I&gt;Z&lt;/I&gt;&lt;I&gt;p&lt;/I&gt;h'-0w'&lt;I&gt;p&lt;/I&gt;'u'n(99,
-n(11and 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99 may be computed by gcd.
-n(11(All arithmetic is modulo 11&lt;I&gt;p&lt;/I&gt;n(99.)
-n(11The client gives 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99 to the server 11&lt;I&gt;S&lt;/I&gt;n(99 ahead of time by a private channel.
-n(11To start a new connection, the client generates a random value 11&lt;I&gt;x&lt;/I&gt;n(99,
-n(11computes 11&lt;I&gt;m&lt;/I&gt;==&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;x&lt;/I&gt;11&lt;I&gt;H&lt;/I&gt;n(99,
-n(11then calls the server and sends 11&lt;I&gt;C&lt;/I&gt;n(99 and 11&lt;I&gt;m&lt;/I&gt;n(99.
-n(11The server checks 11&lt;I&gt;m&lt;/I&gt;!=0 mod &lt;I&gt;p&lt;/I&gt;n(99,
-n(11generates random 11&lt;I&gt;y&lt;/I&gt;n(99,
-n(11computes 11&#956;==&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;y&lt;/I&gt;11n(99,
-n(1111&#963;==(&lt;I&gt;m&lt;/I&gt;&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)^&lt;I&gt;y&lt;/I&gt;11n(99,
-n(11and sends 11&lt;I&gt;S&lt;/I&gt;n(99, 11&#956;n(99, 11&lt;I&gt;k&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("server",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
-n(11Next the client computes 11&#963;&lt;I&gt;=&lt;/I&gt;&#956;^&lt;I&gt;x&lt;/I&gt;11n(99,
-n(11verifies 11&lt;I&gt;k&lt;/I&gt;n(99,
-n(11and sends 11&lt;I&gt;k&#180;&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("client",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
-n(11The server then verifies 11&lt;I&gt;k&#180;&lt;/I&gt;n(99 and both sides begin
-n(11using session key 11&lt;I&gt;K&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("session",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
-n(11In the published version of PAK, the server name 11&lt;I&gt;S&lt;/I&gt;n(99
-is included in the initial
-n(11hash 11&lt;I&gt;H&lt;/I&gt;n(99, but doing so is inconvenient in our application,
-as the server may be known by various equivalent names.
-<br>&#32;<br>
-MacKenzie has shown
-[Mack]
-that the
-equivalence proof [Boyk00]
-can be adapted to cover our version.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1512
sys/doc/comp.html

@@ -1,1512 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>How to Use the Plan 9 C Compiler
-</H1>
-<DL><DD><I>Rob Pike<br>
-rob@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<P>
-The C compiler on Plan 9 is a wholly new program; in fact
-it was the first piece of software written for what would
-eventually become Plan 9 from Bell Labs.
-Programmers familiar with existing C compilers will find
-a number of differences in both the language the Plan 9 compiler
-accepts and in how the compiler is used.
-</P>
-<P>
-The compiler is really a set of compilers, one for each
-architecture &#173; MIPS, SPARC, Motorola 68020, Intel 386, etc. &#173;
-that accept a dialect of ANSI C and efficiently produce
-fairly good code for the target machine.
-There is a packaging of the compiler that accepts strict ANSI C for
-a POSIX environment, but this document focuses on the
-native Plan 9 environment, that in which all the system source and
-almost all the utilities are written.
-</P>
-<H4>Source
-</H4>
-<P>
-The language accepted by the compilers is the core ANSI C language
-with some modest extensions,
-a greatly simplified preprocessor,
-a smaller library that includes system calls and related facilities,
-and a completely different structure for include files.
-</P>
-<P>
-Official ANSI C accepts the old (K&amp;R) style of declarations for
-functions; the Plan 9 compilers
-are more demanding.
-Without an explicit run-time flag
-(<TT>-B</TT>)
-whose use is discouraged, the compilers insist
-on new-style function declarations, that is, prototypes for
-function arguments.
-The function declarations in the libraries' include files are
-all in the new style so the interfaces are checked at compile time.
-For C programmers who have not yet switched to function prototypes
-the clumsy syntax may seem repellent but the payoff in stronger typing
-is substantial.
-Those who wish to import existing software to Plan 9 are urged
-to use the opportunity to update their code.
-</P>
-<P>
-The compilers include an integrated preprocessor that accepts the familiar
-<TT>#include</TT>,
-<TT>#define</TT>
-for macros both with and without arguments,
-<TT>#undef</TT>,
-<TT>#line</TT>,
-<TT>#ifdef</TT>,
-<TT>#ifndef</TT>,
-and
-<TT>#endif</TT>.
-It
-supports neither
-<TT>#if</TT>
-nor
-<TT>##</TT>,
-although it does
-honor a few
-<TT>#pragmas</TT>.
-The
-<TT>#if</TT>
-directive was omitted because it greatly complicates the
-preprocessor, is never necessary, and is usually abused.
-Conditional compilation in general makes code hard to understand;
-the Plan 9 source uses it sparingly.
-Also, because the compilers remove dead code, regular
-<TT>if</TT>
-statements with constant conditions are more readable equivalents to many
-<TT>#ifs</TT>.
-To compile imported code ineluctably fouled by
-<TT>#if</TT>
-there is a separate command,
-<TT>/bin/cpp</TT>,
-that implements the complete ANSI C preprocessor specification.
-</P>
-<P>
-Include files fall into two groups: machine-dependent and machine-independent.
-The machine-independent files occupy the directory
-<TT>/sys/include</TT>;
-the others are placed in a directory appropriate to the machine, such as
-<TT>/mips/include</TT>.
-The compiler searches for include files
-first in the machine-dependent directory and then
-in the machine-independent directory.
-At the time of writing there are thirty-one machine-independent include
-files and two (per machine) machine-dependent ones:
-<TT>&lt;ureg.h&gt;</TT>
-and
-<TT>&lt;u.h&gt;</TT>.
-The first describes the layout of registers on the system stack,
-for use by the debugger.
-The second defines some
-architecture-dependent types such as
-<TT>jmp_buf</TT>
-for
-<TT>setjmp</TT>
-and the
-<TT>va_arg</TT>
-and
-<TT>va_list</TT>
-macros for handling arguments to variadic functions,
-as well as a set of
-<TT>typedef</TT>
-abbreviations for
-<TT>unsigned</TT>
-<TT>short</TT>
-and so on.
-</P>
-<P>
-Here is an excerpt from
-<TT>/68020/include/u.h</TT>:
-<DL><DT><DD><TT><PRE>
-#define nil		((void*)0)
-typedef	unsigned short	ushort;
-typedef	unsigned char	uchar;
-typedef unsigned long	ulong;
-typedef unsigned int	uint;
-typedef   signed char	schar;
-typedef	long long       vlong;
-
-typedef long	jmp_buf[2];
-#define	JMPBUFSP	0
-#define	JMPBUFPC	1
-#define	JMPBUFDPC	0
-</PRE></TT></DL>
-Plan 9 programs use
-<TT>nil</TT>
-for the name of the zero-valued pointer.
-The type
-<TT>vlong</TT>
-is the largest integer type available; on most architectures it
-is a 64-bit value.
-A couple of other types in
-<TT>&lt;u.h&gt;</TT>
-are
-<TT>u32int</TT>,
-which is guaranteed to have exactly 32 bits (a possibility on all the supported architectures) and
-<TT>mpdigit</TT>,
-which is used by the multiprecision math package
-<TT>&lt;mp.h&gt;</TT>.
-The
-<TT>#define</TT>
-constants permit an architecture-independent (but compiler-dependent)
-implementation of stack-switching using
-<TT>setjmp</TT>
-and
-<TT>longjmp</TT>.
-</P>
-<P>
-Every Plan 9 C program begins
-<DL><DT><DD><TT><PRE>
-#include &lt;u.h&gt;
-</PRE></TT></DL>
-because all the other installed header files use the
-<TT>typedefs</TT>
-declared in
-<TT>&lt;u.h&gt;</TT>.
-</P>
-<P>
-In strict ANSI C, include files are grouped to collect related functions
-in a single file: one for string functions, one for memory functions,
-one for I/O, and none for system calls.
-Each include file is protected by an
-<TT>#ifdef</TT>
-to guarantee its contents are seen by the compiler only once.
-Plan 9 takes a different approach.  Other than a few include
-files that define external formats such as archives, the files in
-<TT>/sys/include</TT>
-correspond to
-<I>libraries.</I>
-If a program is using a library, it includes the corresponding header.
-The default C library comprises string functions, memory functions, and
-so on, largely as in ANSI C, some formatted I/O routines,
-plus all the system calls and related functions.
-To use these functions, one must
-<TT>#include</TT>
-the file
-<TT>&lt;libc.h&gt;</TT>,
-which in turn must follow
-<TT>&lt;u.h&gt;</TT>,
-to define their prototypes for the compiler.
-Here is the complete source to the traditional first C program:
-<DL><DT><DD><TT><PRE>
-#include &lt;u.h&gt;
-#include &lt;libc.h&gt;
-
-void
-main(void)
-{
-	print("hello world\n");
-	exits(0);
-}
-</PRE></TT></DL>
-The
-<TT>print</TT>
-routine and its relatives
-<TT>fprint</TT>
-and
-<TT>sprint</TT>
-resemble the similarly-named functions in Standard I/O but are not
-attached to a specific I/O library.
-In Plan 9
-<TT>main</TT>
-is not integer-valued; it should call
-<TT>exits</TT>,
-which takes a string argument (or null; here ANSI C promotes the 0 to a
-<TT>char*</TT>).
-All these functions are, of course, documented in the Programmer's Manual.
-</P>
-<P>
-To use
-<TT>printf</TT>,
-<TT>&lt;stdio.h&gt;</TT>
-must be included to define the function prototype for
-<TT>printf</TT>:
-<DL><DT><DD><TT><PRE>
-#include &lt;u.h&gt;
-#include &lt;libc.h&gt;
-#include &lt;stdio.h&gt;
-
-void
-main(int argc, char *argv[])
-{
-	printf("%s: hello world; argc = %d\n", argv[0], argc);
-	exits(0);
-}
-</PRE></TT></DL>
-In practice, Standard I/O is not used much in Plan 9.  I/O libraries are
-discussed in a later section of this document.
-</P>
-<P>
-There are libraries for handling regular expressions, raster graphics,
-windows, and so on, and each has an associated include file.
-The manual for each library states which include files are needed.
-The files are not protected against multiple inclusion and themselves
-contain no nested
-<TT>#includes</TT>.
-Instead the
-programmer is expected to sort out the requirements
-and to
-<TT>#include</TT>
-the necessary files once at the top of each source file.  In practice this is
-trivial: this way of handling include files is so straightforward
-that it is rare for a source file to contain more than half a dozen
-<TT>#includes</TT>.
-</P>
-<P>
-The compilers do their own register allocation so the
-<TT>register</TT>
-keyword is ignored.
-For different reasons,
-<TT>volatile</TT>
-and
-<TT>const</TT>
-are also ignored.
-</P>
-<P>
-To make it easier to share code with other systems, Plan 9 has a version
-of the compiler,
-<TT>pcc</TT>,
-that provides the standard ANSI C preprocessor, headers, and libraries
-with POSIX extensions.
-<TT>Pcc</TT>
-is recommended only
-when broad external portability is mandated.  It compiles slower,
-produces slower code (it takes extra work to simulate POSIX on Plan 9),
-eliminates those parts of the Plan 9 interface
-not related to POSIX, and illustrates the clumsiness of an environment
-designed by committee.
-<TT>Pcc</TT>
-is described in more detail in
-APE&#173;The ANSI/POSIX Environment,
-by Howard Trickey.
-</P>
-<H4>Process
-</H4>
-<P>
-Each CPU architecture supported by Plan 9 is identified by a single,
-arbitrary, alphanumeric character:
-<TT>k</TT>
-for SPARC,
-<TT>q</TT>
-for Motorola Power PC 630 and 640,
-<TT>v</TT>
-for MIPS,
-<TT>1</TT>
-for Motorola 68000,
-<TT>2</TT>
-for Motorola 68020 and 68040,
-<TT>5</TT>
-for Acorn ARM 7500,
-<TT>6</TT>
-for Intel 960,
-<TT>7</TT>
-for DEC Alpha,
-<TT>8</TT>
-for Intel 386, and
-<TT>9</TT>
-for AMD 29000.
-The character labels the support tools and files for that architecture.
-For instance, for the 68020 the compiler is
-<TT>2c</TT>,
-the assembler is
-<TT>2a</TT>,
-the link editor/loader is
-<TT>2l</TT>,
-the object files are suffixed
-<TT>.2</TT>,
-and the default name for an executable file is
-<TT>2.out</TT>.
-Before we can use the compiler we therefore need to know which
-machine we are compiling for.
-The next section explains how this decision is made; for the moment
-assume we are building 68020 binaries and make the mental substitution for
-<TT>2</TT>
-appropriate to the machine you are actually using.
-</P>
-<P>
-To convert source to an executable binary is a two-step process.
-First run the compiler,
-<TT>2c</TT>,
-on the source, say
-<TT>file.c</TT>,
-to generate an object file
-<TT>file.2</TT>.
-Then run the loader,
-<TT>2l</TT>,
-to generate an executable
-<TT>2.out</TT>
-that may be run (on a 680X0 machine):
-<DL><DT><DD><TT><PRE>
-2c file.c
-2l file.2
-2.out
-</PRE></TT></DL>
-The loader automatically links with whatever libraries the program
-needs, usually including the standard C library as defined by
-<TT>&lt;libc.h&gt;</TT>.
-Of course the compiler and loader have lots of options, both familiar and new;
-see the manual for details.
-The compiler does not generate an executable automatically;
-the output of the compiler must be given to the loader.
-Since most compilation is done under the control of
-<TT>mk</TT>
-(see below), this is rarely an inconvenience.
-</P>
-<P>
-The distribution of work between the compiler and loader is unusual.
-The compiler integrates preprocessing, parsing, register allocation,
-code generation and some assembly.
-Combining these tasks in a single program is part of the reason for
-the compiler's efficiency.
-The loader does instruction selection, branch folding,
-instruction scheduling,
-and writes the final executable.
-There is no separate C preprocessor and no assembler in the usual pipeline.
-Instead the intermediate object file
-(here a
-<TT>.2</TT>
-file) is a type of binary assembly language.
-The instructions in the intermediate format are not exactly those in
-the machine.  For example, on the 68020 the object file may specify
-a MOVE instruction but the loader will decide just which variant of
-the MOVE instruction &#173; MOVE immediate, MOVE quick, MOVE address,
-etc. &#173; is most efficient.
-</P>
-<P>
-The assembler,
-<TT>2a</TT>,
-is just a translator between the textual and binary
-representations of the object file format.
-It is not an assembler in the traditional sense.  It has limited
-macro capabilities (the same as the integral C preprocessor in the compiler),
-clumsy syntax, and minimal error checking.  For instance, the assembler
-will accept an instruction (such as memory-to-memory MOVE on the MIPS) that the
-machine does not actually support; only when the output of the assembler
-is passed to the loader will the error be discovered.
-The assembler is intended only for writing things that need access to instructions
-invisible from C,
-such as the machine-dependent
-part of an operating system;
-very little code in Plan 9 is in assembly language.
-</P>
-<P>
-The compilers take an option
-<TT>-S</TT>
-that causes them to print on their standard output the generated code
-in a format acceptable as input to the assemblers.
-This is of course merely a formatting of the
-data in the object file; therefore the assembler is just
-an
-ASCII-to-binary converter for this format.
-Other than the specific instructions, the input to the assemblers
-is largely architecture-independent; see
-``A Manual for the Plan 9 Assembler'',
-by Rob Pike,
-for more information.
-</P>
-<P>
-The loader is an integral part of the compilation process.
-Each library header file contains a
-<TT>#pragma</TT>
-that tells the loader the name of the associated archive; it is
-not necessary to tell the loader which libraries a program uses.
-The C run-time startup is found, by default, in the C library.
-The loader starts with an undefined
-symbol,
-<TT>_main</TT>,
-that is resolved by pulling in the run-time startup code from the library.
-(The loader undefines
-<TT>_mainp</TT>
-when profiling is enabled, to force loading of the profiling start-up
-instead.)
-</P>
-<P>
-Unlike its counterpart on other systems, the Plan 9 loader rearranges
-data to optimize access.  This means the order of variables in the
-loaded program is unrelated to its order in the source.
-Most programs don't care, but some assume that, for example, the
-variables declared by
-<DL><DT><DD><TT><PRE>
-int a;
-int b;
-</PRE></TT></DL>
-will appear at adjacent addresses in memory.  On Plan 9, they won't.
-</P>
-<H4>Heterogeneity
-</H4>
-<P>
-When the system starts or a user logs in the environment is configured
-so the appropriate binaries are available in
-<TT>/bin</TT>.
-The configuration process is controlled by an environment variable,
-<TT></TT><I>cputype</I><TT>,
-with value such as
-</TT><TT>mips</TT><TT>,
-</TT><TT>68020</TT><TT>,
-</TT><TT>386</TT><TT>,
-or
-</TT><TT>sparc</TT><TT>.
-For each architecture there is a directory in the root,
-with the appropriate name,
-that holds the binary and library files for that architecture.
-Thus
-</TT><TT>/mips/lib</TT><TT>
-contains the object code libraries for MIPS programs,
-</TT><TT>/mips/include</TT><TT>
-holds MIPS-specific include files, and
-</TT><TT>/mips/bin</TT><TT>
-has the MIPS binaries.
-These binaries are attached to
-</TT><TT>/bin</TT><TT>
-at boot time by binding
-</TT><TT>/</TT><TT>cputype/bin</TT><TT>
-to
-</TT><TT>/bin</TT><TT>,
-so
-</TT><TT>/bin</TT><TT>
-always contains the correct files.
-</P>
-</TT><P>
-The MIPS compiler,
-<TT>vc</TT>,
-by definition
-produces object files for the MIPS architecture,
-regardless of the architecture of the machine on which the compiler is running.
-There is a version of
-<TT>vc</TT>
-compiled for each architecture:
-<TT>/mips/bin/vc</TT>,
-<TT>/68020/bin/vc</TT>,
-<TT>/sparc/bin/vc</TT>,
-and so on,
-each capable of producing MIPS object files regardless of the native
-instruction set.
-If one is running on a SPARC,
-<TT>/sparc/bin/vc</TT>
-will compile programs for the MIPS;
-if one is running on machine
-<TT></TT><I>cputype</I><TT>,
-</TT><TT>/</TT><TT>cputype/bin/vc</TT><TT>
-will compile programs for the MIPS.
-</P>
-</TT><P>
-Because of the bindings that assemble
-<TT>/bin</TT>,
-the shell always looks for a command, say
-<TT>date</TT>,
-in
-<TT>/bin</TT>
-and automatically finds the file
-<TT>/</TT><I>cputype/bin/date</I><TT>.
-Therefore the MIPS compiler is known as just
-</TT><TT>vc</TT><TT>;
-the shell will invoke
-</TT><TT>/bin/vc</TT><TT>
-and that is guaranteed to be the version of the MIPS compiler
-appropriate for the machine running the command.
-Regardless of the architecture of the compiling machine,
-</TT><TT>/bin/vc</TT><TT>
-is
-</TT><I>always</I><TT>
-the MIPS compiler.
-</P>
-</TT><P>
-Also, the output of
-<TT>vc</TT>
-and
-<TT>vl</TT>
-is completely independent of the machine type on which they are executed:
-<TT>.v</TT>
-files compiled (with
-<TT>vc</TT>)
-on a SPARC may be linked (with
-<TT>vl</TT>)
-on a 386.
-(The resulting
-<TT>v.out</TT>
-will run, of course, only on a MIPS.)
-Similarly, the MIPS libraries in
-<TT>/mips/lib</TT>
-are suitable for loading with
-<TT>vl</TT>
-on any machine; there is only one set of MIPS libraries, not one
-set for each architecture that supports the MIPS compiler.
-</P>
-<H4>Heterogeneity and <TT>mk</TT>
-</H4>
-<P>
-Most software on Plan 9 is compiled under the control of
-<TT>mk</TT>,
-a descendant of
-<TT>make</TT>
-that is documented in the Programmer's Manual.
-A convention used throughout the
-<TT>mkfiles</TT>
-makes it easy to compile the source into binary suitable for any architecture.
-</P>
-<P>
-The variable
-<TT></TT>cputype<TT>
-is advisory: it reports the architecture of the current environment, and should
-not be modified.  A second variable,
-</TT><TT></TT><I>objtype</I><TT>,
-is used to set which architecture is being
-</TT><I>compiled</I><TT>
-for.
-The value of
-</TT><TT></TT><TT>objtype</TT><TT>
-can be used by a
-</TT><TT>mkfile</TT><TT>
-to configure the compilation environment.
-</P>
-</TT><P>
-In each machine's root directory there is a short
-<TT>mkfile</TT>
-that defines a set of macros for the compiler, loader, etc.
-Here is
-<TT>/mips/mkfile</TT>:
-<DL><DT><DD><TT><PRE>
-&lt;/sys/src/mkfile.proto
-
-CC=vc
-LD=vl
-O=v
-AS=va
-</PRE></TT></DL>
-The line
-<DL><DT><DD><TT><PRE>
-&lt;/sys/src/mkfile.proto
-</PRE></TT></DL>
-causes
-<TT>mk</TT>
-to include the file
-<TT>/sys/src/mkfile.proto</TT>,
-which contains general definitions:
-<DL><DT><DD><TT><PRE>
-#
-# common mkfile parameters shared by all architectures
-#
-
-OS=v486xq7
-CPUS=mips 386 power alpha
-CFLAGS=-FVw
-LEX=lex
-YACC=yacc
-MK=/bin/mk
-</PRE></TT></DL>
-<TT>CC</TT>
-is obviously the compiler,
-<TT>AS</TT>
-the assembler, and
-<TT>LD</TT>
-the loader.
-<TT>O</TT>
-is the suffix for the object files and
-<TT>CPUS</TT>
-and
-<TT>OS</TT>
-are used in special rules described below.
-</P>
-<P>
-Here is a
-<TT>mkfile</TT>
-to build the installed source for
-<TT>sam</TT>:
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile
-OBJ=sam.</I>O address.<I>O buffer.</I>O cmd.<I>O disc.</I>O error.<I>O \
-	file.</I>O io.<I>O list.</I>O mesg.<I>O moveto.</I>O multi.<I>O \
-	plan9.</I>O rasp.<I>O regexp.</I>O string.<I>O sys.</I>O xec.<I>O
-
-</I>O.out:	<I>OBJ
-	</I>LD <I>OBJ
-
-install:	</I>O.out
-	cp <I>O.out /</I>objtype/bin/sam
-
-installall:
-	for(objtype in <I>CPUS) mk install
-
-%.</I>O:	%.c
-	<I>CC </I>CFLAGS <I>stem.c
-
-</I>OBJ:	sam.h errors.h mesg.h
-address.<I>O cmd.</I>O parse.<I>O xec.</I>O unix.<I>O:	parse.h
-
-clean:V:
-	rm -f [</I>OS].out *.[<I>OS] y.tab.?
-</PRE></TT></DL>
-(The actual
-</I><TT>mkfile</TT><I>
-imports most of its rules from other secondary files, but
-this example works and is not misleading.)
-The first line causes
-</I><TT>mk</TT><I>
-to include the contents of
-</I><TT>/</TT><I>objtype/mkfile</I><TT>
-in the current
-</TT><TT>mkfile</TT><TT>.
-If
-</TT><TT></TT><I>objtype</I><TT>
-is
-</TT><TT>mips</TT><TT>,
-this inserts the MIPS macro definitions into the
-</TT><TT>mkfile</TT><TT>.
-In this case the rule for
-</TT><TT></TT><TT>O.out</TT><TT>
-uses the MIPS tools to build
-</TT><TT>v.out</TT><TT>.
-The
-</TT><TT>%.</TT><I>O</I><TT>
-rule in the file uses
-</TT><TT>mk</TT><TT>'s
-pattern matching facilities to convert the source files to the object
-files through the compiler.
-(The text of the rules is passed directly to the shell,
-</TT><TT>rc</TT><TT>,
-without further translation.
-See the
-</TT><TT>mk</TT><TT>
-manual if any of this is unfamiliar.)
-Because the default rule builds
-</TT><TT></TT><TT>O.out</TT><TT>
-rather than
-</TT><TT>sam</TT><TT>,
-it is possible to maintain binaries for multiple machines in the
-same source directory without conflict.
-This is also, of course, why the output files from the various
-compilers and loaders
-have distinct names.
-</P>
-</TT><P>
-The rest of the
-<TT>mkfile</TT>
-should be easy to follow; notice how the rules for
-<TT>clean</TT>
-and
-<TT>installall</TT>
-(that is, install versions for all architectures) use other macros
-defined in
-<TT>/</TT><I>objtype/mkfile</I><TT>.
-In Plan 9,
-</TT><TT>mkfiles</TT><TT>
-for commands conventionally contain rules to
-</TT><TT>install</TT><TT>
-(compile and install the version for
-</TT><TT></TT><TT>objtype</TT><TT>),
-</TT><TT>installall</TT><TT>
-(compile and install for all
-</TT><TT></TT><I>objtypes</I><TT>),
-and
-</TT><TT>clean</TT><TT>
-(remove all object files, binaries, etc.).
-</P>
-</TT><P>
-The
-<TT>mkfile</TT>
-is easy to use.  To build a MIPS binary,
-<TT>v.out</TT>:
-<DL><DT><DD><TT><PRE>
-% objtype=mips
-% mk
-</PRE></TT></DL>
-To build and install a MIPS binary:
-<DL><DT><DD><TT><PRE>
-% objtype=mips
-% mk install
-</PRE></TT></DL>
-To build and install all versions:
-<DL><DT><DD><TT><PRE>
-% mk installall
-</PRE></TT></DL>
-These conventions make cross-compilation as easy to manage
-as traditional native compilation.
-Plan 9 programs compile and run without change on machines from
-large multiprocessors to laptops.  For more information about this process, see
-``Plan 9 Mkfiles'',
-by Bob Flandrena.
-</P>
-<H4>Portability
-</H4>
-<P>
-Within Plan 9, it is painless to write portable programs, programs whose
-source is independent of the machine on which they execute.
-The operating system is fixed and the compiler, headers and libraries
-are constant so most of the stumbling blocks to portability are removed.
-Attention to a few details can avoid those that remain.
-</P>
-<P>
-Plan 9 is a heterogeneous environment, so programs must
-<I>expect</I>
-that external files will be written by programs on machines of different
-architectures.
-The compilers, for instance, must handle without confusion
-object files written by other machines.
-The traditional approach to this problem is to pepper the source with
-<TT>#ifdefs</TT>
-to turn byte-swapping on and off.
-Plan 9 takes a different approach: of the handful of machine-dependent
-<TT>#ifdefs</TT>
-in all the source, almost all are deep in the libraries.
-Instead programs read and write files in a defined format,
-either (for low volume applications) as formatted text, or
-(for high volume applications) as binary in a known byte order.
-If the external data were written with the most significant
-byte first, the following code reads a 4-byte integer correctly
-regardless of the architecture of the executing machine (assuming
-an unsigned long holds 4 bytes):
-<DL><DT><DD><TT><PRE>
-ulong
-getlong(void)
-{
-	ulong l;
-
-	l = (getchar()&amp;0xFF)&lt;&lt;24;
-	l |= (getchar()&amp;0xFF)&lt;&lt;16;
-	l |= (getchar()&amp;0xFF)&lt;&lt;8;
-	l |= (getchar()&amp;0xFF)&lt;&lt;0;
-	return l;
-}
-</PRE></TT></DL>
-Note that this code does not `swap' the bytes; instead it just reads
-them in the correct order.
-Variations of this code will handle any binary format
-and also avoid problems
-involving how structures are padded, how words are aligned,
-and other impediments to portability.
-Be aware, though, that extra care is needed to handle floating point data.
-</P>
-<P>
-Efficiency hounds will argue that this method is unnecessarily slow and clumsy
-when the executing machine has the same byte order (and padding and alignment)
-as the data.
-The CPU cost of I/O processing
-is rarely the bottleneck for an application, however,
-and the gain in simplicity of porting and maintaining the code greatly outweighs
-the minor speed loss from handling data in this general way.
-This method is how the Plan 9 compilers, the window system, and even the file
-servers transmit data between programs.
-</P>
-<P>
-To port programs beyond Plan 9, where the system interface is more variable,
-it is probably necessary to use
-<TT>pcc</TT>
-and hope that the target machine supports ANSI C and POSIX.
-</P>
-<H4>I/O
-</H4>
-<P>
-The default C library, defined by the include file
-<TT>&lt;libc.h&gt;</TT>,
-contains no buffered I/O package.
-It does have several entry points for printing formatted text:
-<TT>print</TT>
-outputs text to the standard output,
-<TT>fprint</TT>
-outputs text to a specified integer file descriptor, and
-<TT>sprint</TT>
-places text in a character array.
-To access library routines for buffered I/O, a program must
-explicitly include the header file associated with an appropriate library.
-</P>
-<P>
-The recommended I/O library, used by most Plan 9 utilities, is
-<TT>bio</TT>
-(buffered I/O), defined by
-<TT>&lt;bio.h&gt;</TT>.
-There also exists an implementation of ANSI Standard I/O,
-<TT>stdio</TT>.
-</P>
-<P>
-<TT>Bio</TT>
-is small and efficient, particularly for buffer-at-a-time or
-line-at-a-time I/O.
-Even for character-at-a-time I/O, however, it is significantly faster than
-the Standard I/O library,
-<TT>stdio</TT>.
-Its interface is compact and regular, although it lacks a few conveniences.
-The most noticeable is that one must explicitly define buffers for standard
-input and output;
-<TT>bio</TT>
-does not predefine them.  Here is a program to copy input to output a byte
-at a time using
-<TT>bio</TT>:
-<DL><DT><DD><TT><PRE>
-#include &lt;u.h&gt;
-#include &lt;libc.h&gt;
-#include &lt;bio.h&gt;
-
-Biobuf	bin;
-Biobuf	bout;
-
-main(void)
-{
-	int c;
-
-	Binit(&amp;bin, 0, OREAD);
-	Binit(&amp;bout, 1, OWRITE);
-
-	while((c=Bgetc(&amp;bin)) != Beof)
-		Bputc(&amp;bout, c);
-	exits(0);
-}
-</PRE></TT></DL>
-For peak performance, we could replace
-<TT>Bgetc</TT>
-and
-<TT>Bputc</TT>
-by their equivalent in-line macros
-<TT>BGETC</TT>
-and
-<TT>BPUTC</TT>
-but 
-the performance gain would be modest.
-For more information on
-<TT>bio</TT>,
-see the Programmer's Manual.
-</P>
-<P>
-Perhaps the most dramatic difference in the I/O interface of Plan 9 from other
-systems' is that text is not ASCII.
-The format for
-text in Plan 9 is a byte-stream encoding of 16-bit characters.
-The character set is based on the Unicode Standard and is backward compatible with
-ASCII:
-characters with value 0 through 127 are the same in both sets.
-The 16-bit characters, called
-<I>runes</I>
-in Plan 9, are encoded using a representation called
-UTF,
-an encoding that is becoming accepted as a standard.
-(ISO calls it UTF-8;
-throughout Plan 9 it's just called
-UTF.)
-UTF
-defines multibyte sequences to
-represent character values from 0 to 65535.
-In
-UTF,
-character values up to 127 decimal, 7F hexadecimal, represent themselves,
-so straight
-ASCII
-files are also valid
-UTF.
-Also,
-UTF
-guarantees that bytes with values 0 to 127 (NUL to DEL, inclusive)
-will appear only when they represent themselves, so programs that read bytes
-looking for plain ASCII characters will continue to work.
-Any program that expects a one-to-one correspondence between bytes and
-characters will, however, need to be modified.
-An example is parsing file names.
-File names, like all text, are in
-UTF,
-so it is incorrect to search for a character in a string by
-<TT>strchr(filename,</TT>
-<TT>c)</TT>
-because the character might have a multi-byte encoding.
-The correct method is to call
-<TT>utfrune(filename,</TT>
-<TT>c)</TT>,
-defined in
-<A href="/magic/man2html/2/rune"><I>rune</I>(2),
-</A>which interprets the file name as a sequence of encoded characters
-rather than bytes.
-In fact, even when you know the character is a single byte
-that can represent only itself,
-it is safer to use
-<TT>utfrune</TT>
-because that assumes nothing about the character set
-and its representation.
-</P>
-<P>
-The library defines several symbols relevant to the representation of characters.
-Any byte with unsigned value less than
-<TT>Runesync</TT>
-will not appear in any multi-byte encoding of a character.
-<TT>Utfrune</TT>
-compares the character being searched against
-<TT>Runesync</TT>
-to see if it is sufficient to call
-<TT>strchr</TT>
-or if the byte stream must be interpreted.
-Any byte with unsigned value less than
-<TT>Runeself</TT>
-is represented by a single byte with the same value.
-Finally, when errors are encountered converting
-to runes from a byte stream, the library returns the rune value
-<TT>Runeerror</TT>
-and advances a single byte.  This permits programs to find runes
-embedded in binary data.
-</P>
-<P>
-<TT>Bio</TT>
-includes routines
-<TT>Bgetrune</TT>
-and
-<TT>Bputrune</TT>
-to transform the external byte stream
-UTF
-format to and from
-internal 16-bit runes.
-Also, the
-<TT>%s</TT>
-format to
-<TT>print</TT>
-accepts
-UTF;
-<TT>%c</TT>
-prints a character after narrowing it to 8 bits.
-The
-<TT>%S</TT>
-format prints a null-terminated sequence of runes;
-<TT>%C</TT>
-prints a character after narrowing it to 16 bits.
-For more information, see the Programmer's Manual, in particular
-<A href="/magic/man2html/6/utf"><I>utf</I>(6)
-</A>and
-<A href="/magic/man2html/2/rune"><I>rune</I>(2),
-</A>and the paper,
-``Hello world, or
-&#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949;, or 
-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;'',
-by Rob Pike and
-Ken Thompson;
-there is not room for the full story here.
-</P>
-<P>
-These issues affect the compiler in several ways.
-First, the C source is in
-UTF.
-ANSI says C variables are formed from
-ASCII
-alphanumerics, but comments and literal strings may contain any characters
-encoded in the native encoding, here
-UTF.
-The declaration
-<DL><DT><DD><TT><PRE>
-char *cp = "abc&yuml;";
-</PRE></TT></DL>
-initializes the variable
-<TT>cp</TT>
-to point to an array of bytes holding the
-UTF
-representation of the characters
-<TT>abc&yuml;.</TT>
-The type
-<TT>Rune</TT>
-is defined in
-<TT>&lt;u.h&gt;</TT>
-to be
-<TT>ushort</TT>,
-which is also the  `wide character' type in the compiler.
-Therefore the declaration
-<DL><DT><DD><TT><PRE>
-Rune *rp = L"abc&yuml;";
-</PRE></TT></DL>
-initializes the variable
-<TT>rp</TT>
-to point to an array of unsigned short integers holding the 16-bit
-values of the characters
-<TT>abc&yuml;</TT>.
-Note that in both these declarations the characters in the source
-that represent
-<TT>abc&yuml;</TT>
-are the same; what changes is how those characters are represented
-in memory in the program.
-The following two lines:
-<DL><DT><DD><TT><PRE>
-print("%s\n", "abc&yuml;");
-print("%S\n", L"abc&yuml;");
-</PRE></TT></DL>
-produce the same
-UTF
-string on their output, the first by copying the bytes, the second
-by converting from runes to bytes.
-</P>
-<P>
-In C, character constants are integers but narrowed through the
-<TT>char</TT>
-type.
-The Unicode character
-<TT>&yuml;</TT>
-has value 255, so if the
-<TT>char</TT>
-type is signed,
-the constant
-<TT>'&yuml;'</TT>
-has value -1 (which is equal to EOF).
-On the other hand,
-<TT>L'&yuml;'</TT>
-narrows through the wide character type,
-<TT>ushort</TT>,
-and therefore has value 255.
-</P>
-<P>
-Finally, although it's not ANSI C, the Plan 9 C compilers
-assume any character with value above
-<TT>Runeself</TT>
-is an alphanumeric,
-so &#945; is a legal, if non-portable, variable name.
-</P>
-<H4>Arguments
-</H4>
-<P>
-Some macros are defined
-in
-<TT>&lt;libc.h&gt;</TT>
-for parsing the arguments to
-<TT>main()</TT>.
-They are described in
-<A href="/magic/man2html/2/arg"><I>arg</I>(2)
-</A>but are fairly self-explanatory.
-There are four macros:
-<TT>ARGBEGIN</TT>
-and
-<TT>ARGEND</TT>
-are used to bracket a hidden
-<TT>switch</TT>
-statement within which
-<TT>ARGC</TT>
-returns the current option character (rune) being processed and
-<TT>ARGF</TT>
-returns the argument to the option, as in the loader option
-<TT>-o</TT>
-<TT>file</TT>.
-Here, for example, is the code at the beginning of
-<TT>main()</TT>
-in
-<TT>ramfs.c</TT>
-(see
-<A href="/magic/man2html/1/ramfs"><I>ramfs</I>(1))
-</A>that cracks its arguments:
-<DL><DT><DD><TT><PRE>
-void
-main(int argc, char *argv[])
-{
-	char *defmnt;
-	int p[2];
-	int mfd[2];
-	int stdio = 0;
-
-	defmnt = "/tmp";
-	ARGBEGIN{
-	case 'i':
-		defmnt = 0;
-		stdio = 1;
-		mfd[0] = 0;
-		mfd[1] = 1;
-		break;
-	case 's':
-		defmnt = 0;
-		break;
-	case 'm':
-		defmnt = ARGF();
-		break;
-	default:
-		usage();
-	}ARGEND
-</PRE></TT></DL>
-</P>
-<H4>Extensions
-</H4>
-<P>
-The compiler has several extensions to ANSI C, all of which are used
-extensively in the system source.
-First,
-<I>structure</I>
-<I>displays</I>
-permit 
-<TT>struct</TT>
-expressions to be formed dynamically.
-Given these declarations:
-<DL><DT><DD><TT><PRE>
-typedef struct Point Point;
-typedef struct Rectangle Rectangle;
-
-struct Point
-{
-	int x, y;
-};
-
-struct Rectangle
-{
-	Point min, max;
-};
-
-Point	p, q, add(Point, Point);
-Rectangle r;
-int	x, y;
-</PRE></TT></DL>
-this assignment may appear anywhere an assignment is legal:
-<DL><DT><DD><TT><PRE>
-r = (Rectangle){add(p, q), (Point){x, y+3}};
-</PRE></TT></DL>
-The syntax is the same as for initializing a structure but with
-a leading cast.
-</P>
-<P>
-If an
-<I>anonymous</I>
-<I>structure</I>
-or
-<I>union</I>
-is declared within another structure or union, the members of the internal
-structure or union are addressable without prefix in the outer structure.
-This feature eliminates the clumsy naming of nested structures and,
-particularly, unions.
-For example, after these declarations,
-<DL><DT><DD><TT><PRE>
-struct Lock
-{
-	int	locked;
-};
-
-struct Node
-{
-	int	type;
-	union{
-		double  dval;
-		double  fval;
-		long    lval;
-	};		/* anonymous union */
-	struct Lock;	/* anonymous structure */
-} *node;
-
-void	lock(struct Lock*);
-</PRE></TT></DL>
-one may refer to
-<TT>node-&gt;type</TT>,
-<TT>node-&gt;dval</TT>,
-<TT>node-&gt;fval</TT>,
-<TT>node-&gt;lval</TT>,
-and
-<TT>node-&gt;locked</TT>.
-Moreover, the address of a
-<TT>struct</TT>
-<TT>Node</TT>
-may be used without a cast anywhere that the address of a
-<TT>struct</TT>
-<TT>Lock</TT>
-is used, such as in argument lists.
-The compiler automatically promotes the type and adjusts the address.
-Thus one may invoke
-<TT>lock(node)</TT>.
-</P>
-<P>
-Anonymous structures and unions may be accessed by type name
-if (and only if) they are declared using a
-<TT>typedef</TT>
-name.
-For example, using the above declaration for
-<TT>Point</TT>,
-one may declare
-<DL><DT><DD><TT><PRE>
-struct
-{
-	int	type;
-	Point;
-} p;
-</PRE></TT></DL>
-and refer to
-<TT>p.Point</TT>.
-</P>
-<P>
-In the initialization of arrays, a number in square brackets before an
-element sets the index for the initialization.  For example, to initialize
-some elements in
-a table of function pointers indexed by
-ASCII
-character,
-<DL><DT><DD><TT><PRE>
-void	percent(void), slash(void);
-
-void	(*func[128])(void) =
-{
-	['%']	percent,
-	['/']	slash,
-};
-</PRE></TT></DL>
-</P>
-<br>&#32;<br>
-A similar syntax allows one to initialize structure elements:
-<DL><DT><DD><TT><PRE>
-Point p =
-{
-	.y 100,
-	.x 200
-};
-</PRE></TT></DL>
-These initialization syntaxes were later added to ANSI C, with the addition of an
-equals sign between the index or tag and the value.
-The Plan 9 compiler accepts either form.
-<P>
-Finally, the declaration
-<DL><DT><DD><TT><PRE>
-extern register reg;
-</PRE></TT></DL>
-(<I>this</I>
-appearance of the register keyword is not ignored)
-allocates a global register to hold the variable
-<TT>reg</TT>.
-External registers must be used carefully: they need to be declared in
-<I>all</I>
-source files and libraries in the program to guarantee the register
-is not allocated temporarily for other purposes.
-Especially on machines with few registers, such as the i386,
-it is easy to link accidentally with code that has already usurped
-the global registers and there is no diagnostic when this happens.
-Used wisely, though, external registers are powerful.
-The Plan 9 operating system uses them to access per-process and
-per-machine data structures on a multiprocessor.  The storage class they provide
-is hard to create in other ways.
-</P>
-<H4>The compile-time environment
-</H4>
-<P>
-The code generated by the compilers is `optimized' by default:
-variables are placed in registers and peephole optimizations are
-performed.
-The compiler flag
-<TT>-N</TT>
-disables these optimizations.
-Registerization is done locally rather than throughout a function:
-whether a variable occupies a register or
-the memory location identified in the symbol
-table depends on the activity of the variable and may change
-throughout the life of the variable.
-The
-<TT>-N</TT>
-flag is rarely needed;
-its main use is to simplify debugging.
-There is no information in the symbol table to identify the
-registerization of a variable, so
-<TT>-N</TT>
-guarantees the variable is always where the symbol table says it is.
-</P>
-<P>
-Another flag,
-<TT>-w</TT>,
-turns
-<I>on</I>
-warnings about portability and problems detected in flow analysis.
-Most code in Plan 9 is compiled with warnings enabled;
-these warnings plus the type checking offered by function prototypes
-provide most of the support of the Unix tool
-<TT>lint</TT>
-more accurately and with less chatter.
-Two of the warnings,
-`used and not set' and `set and not used', are almost always accurate but
-may be triggered spuriously by code with invisible control flow,
-such as in routines that call
-<TT>longjmp</TT>.
-The compiler statements
-<DL><DT><DD><TT><PRE>
-SET(v1);
-USED(v2);
-</PRE></TT></DL>
-decorate the flow graph to silence the compiler.
-Either statement accepts a comma-separated list of variables.
-Use them carefully: they may silence real errors.
-For the common case of unused parameters to a function,
-leaving the name off the declaration silences the warnings.
-That is, listing the type of a parameter but giving it no
-associated variable name does the trick.
-</P>
-<H4>Debugging
-</H4>
-<P>
-There are two debuggers available on Plan 9.
-The first, and older, is
-<TT>db</TT>,
-a revision of Unix
-<TT>adb</TT>.
-The other,
-<TT>acid</TT>,
-is a source-level debugger whose commands are statements in
-a true programming language.
-<TT>Acid</TT>
-is the preferred debugger, but since it
-borrows some elements of
-<TT>db</TT>,
-notably the formats for displaying values, it is worth knowing a little bit about
-<TT>db</TT>.
-</P>
-<P>
-Both debuggers support multiple architectures in a single program; that is,
-the programs are
-<TT>db</TT>
-and
-<TT>acid</TT>,
-not for example
-<TT>vdb</TT>
-and
-<TT>vacid</TT>.
-They also support cross-architecture debugging comfortably:
-one may debug a 68020 binary on a MIPS.
-</P>
-<P>
-Imagine a program has crashed mysteriously:
-<DL><DT><DD><TT><PRE>
-% X11/X
-Fatal server bug!
-failed to create default stipple
-X 106: suicide: sys: trap: fault read addr=0x0 pc=0x00105fb8
-% 
-</PRE></TT></DL>
-When a process dies on Plan 9 it hangs in the `broken' state
-for debugging.
-Attach a debugger to the process by naming its process id:
-<DL><DT><DD><TT><PRE>
-% acid 106
-/proc/106/text:mips plan 9 executable
-
-/sys/lib/acid/port
-/sys/lib/acid/mips
-acid: 
-</PRE></TT></DL>
-The
-<TT>acid</TT>
-function
-<TT>stk()</TT>
-reports the stack traceback:
-<DL><DT><DD><TT><PRE>
-acid: stk()
-At pc:0x105fb8:abort+0x24 /sys/src/ape/lib/ap/stdio/abort.c:6
-abort() /sys/src/ape/lib/ap/stdio/abort.c:4
-	called from FatalError+#4e
-		/sys/src/X/mit/server/dix/misc.c:421
-FatalError(s9=#e02, s8=#4901d200, s7=#2, s6=#72701, s5=#1,
-    s4=#7270d, s3=#6, s2=#12, s1=#ff37f1c, s0=#6, f=#7270f)
-    /sys/src/X/mit/server/dix/misc.c:416
-	called from gnotscreeninit+#4ce
-		/sys/src/X/mit/server/ddx/gnot/gnot.c:792
-gnotscreeninit(snum=#0, sc=#80db0)
-    /sys/src/X/mit/server/ddx/gnot/gnot.c:766
-	called from AddScreen+#16e
-		/n/bootes/sys/src/X/mit/server/dix/main.c:610
-AddScreen(pfnInit=0x0000129c,argc=0x00000001,argv=0x7fffffe4)
-    /sys/src/X/mit/server/dix/main.c:530
-	called from InitOutput+0x80
-		/sys/src/X/mit/server/ddx/brazil/brddx.c:522
-InitOutput(argc=0x00000001,argv=0x7fffffe4)
-    /sys/src/X/mit/server/ddx/brazil/brddx.c:511
-	called from main+0x294
-		/sys/src/X/mit/server/dix/main.c:225
-main(argc=0x00000001,argv=0x7fffffe4)
-    /sys/src/X/mit/server/dix/main.c:136
-	called from _main+0x24
-		/sys/src/ape/lib/ap/mips/main9.s:8
-</PRE></TT></DL>
-The function
-<TT>lstk()</TT>
-is similar but
-also reports the values of local variables.
-Note that the traceback includes full file names; this is a boon to debugging,
-although it makes the output much noisier.
-</P>
-<P>
-To use
-<TT>acid</TT>
-well you will need to learn its input language; see the
-``Acid Manual'',
-by Phil Winterbottom,
-for details.  For simple debugging, however, the information in the manual page is
-sufficient.  In particular, it describes the most useful functions
-for examining a process.
-</P>
-<P>
-The compiler does not place
-information describing the types of variables in the executable,
-but a compile-time flag provides crude support for symbolic debugging.
-The
-<TT>-a</TT>
-flag to the compiler suppresses code generation
-and instead emits source text in the
-<TT>acid</TT>
-language to format and display data structure types defined in the program.
-The easiest way to use this feature is to put a rule in the
-<TT>mkfile</TT>:
-<DL><DT><DD><TT><PRE>
-syms:   main.O
-        <I>CC -a main.c &gt; syms
-</PRE></TT></DL>
-Then from within
-</I><TT>acid</TT><I>,
-<DL><DT><DD><TT><PRE>
-acid: include("sourcedirectory/syms")
-</PRE></TT></DL>
-to read in the relevant definitions.
-(For multi-file source, you need to be a little fancier;
-see
-<A href="/magic/man2html/1/2c"></I><I>2c</I><I>(1)).
-</A>This text includes, for each defined compound
-type, a function with that name that may be called with the address of a structure
-of that type to display its contents.
-For example, if
-</I><TT>rect</TT><I>
-is a global variable of type
-</I><TT>Rectangle</TT><I>,
-one may execute
-<DL><DT><DD><TT><PRE>
-Rectangle(*rect)
-</PRE></TT></DL>
-to display it.
-The
-</I><TT>*</TT><I>
-(indirection) operator is necessary because
-of the way
-</I><TT>acid</TT><I>
-works: each global symbol in the program is defined as a variable by
-</I><TT>acid</TT><I>,
-with value equal to the
-</I><I>address</I><I>
-of the symbol.
-</P>
-</I><P>
-Another common technique is to write by hand special
-<TT>acid</TT>
-code to define functions to aid debugging, initialize the debugger, and so on.
-Conventionally, this is placed in a file called
-<TT>acid</TT>
-in the source directory; it has a line
-<DL><DT><DD><TT><PRE>
-include("sourcedirectory/syms");
-</PRE></TT></DL>
-to load the compiler-produced symbols.  One may edit the compiler output directly but
-it is wiser to keep the hand-generated
-<TT>acid</TT>
-separate from the machine-generated.
-</P>
-<P>
-To make things simple, the default rules in the system
-<TT>mkfiles</TT>
-include entries to make
-<TT>foo.acid</TT>
-from
-<TT>foo.c</TT>,
-so one may use
-<TT>mk</TT>
-to automate the production of
-<TT>acid</TT>
-definitions for a given C source file.
-</P>
-<P>
-There is much more to say here.  See
-<TT>acid</TT>
-manual page, the reference manual, or the paper
-``Acid: A Debugger Built From A Language'',
-also by Phil Winterbottom.
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1117
sys/doc/compiler.html

@@ -1,1117 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Plan 9 C Compilers
-</H1>
-<DL><DD><I>Ken Thompson<br>
-ken@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Originally appeared, in a different form, in
-Proceedings of the Summer 1990 UKUUG Conference,
-pp. 41-51,
-London, 1990.
-</I><DT>&#32;<DD></dl>
-<br>
-This paper describes the overall structure and function of the Plan 9 C compilers.
-A more detailed implementation document
-for any one of the compilers
-is yet to be written.
-</DL>
-<H4>1 Introduction
-</H4>
-<br>&#32;<br>
-There are many compilers in the series.
-Six of the compilers (MIPS 3000, SPARC, Intel 386, Power PC, DEC Alpha, and Motorola 68020)
-are considered active and are used to compile
-current versions of Plan 9.
-Several others (Motorola 68000, Intel 960, ARM 7500, AMD 29000) have had only limited use, such as
-to program peripherals or experimental devices.
-<H4>2 Structure
-</H4>
-<br>&#32;<br>
-The compiler is a single program that produces an
-object file.
-Combined in the compiler are the traditional
-roles of preprocessor, lexical analyzer, parser, code generator,
-local optimizer,
-and first half of the assembler.
-The object files are binary forms of assembly
-language,
-similar to what might be passed between
-the first and second passes of an assembler.
-<br>&#32;<br>
-Object files and libraries
-are combined by a loader
-program to produce the executable binary.
-The loader combines the roles of second half
-of the assembler, global optimizer, and loader.
-The names of the compliers, loaders, and assemblers
-are as follows:
-<DL><DT><DD><TT><PRE>
-SPARC	<TT>kc</TT>  <TT>kl</TT>  <TT>ka</TT>
-Power	<TT>PC</TT>  <TT>qc</TT>  <TT>ql</TT>
-MIPS	<TT>vc</TT>  <TT>vl</TT>  <TT>va</TT>
-Motorola	<TT>68000</TT>  <TT>1c</TT>  <TT>1l</TT>
-Motorola	<TT>68020</TT>  <TT>2c</TT>  <TT>2l</TT>
-ARM	<TT>7500</TT>  <TT>5c</TT>  <TT>5l</TT>
-Intel	<TT>960</TT>  <TT>6c</TT>  <TT>6l</TT>
-DEC	<TT>Alpha</TT>  <TT>7c</TT>  <TT>7l</TT>
-Intel	<TT>386</TT>  <TT>8c</TT>  <TT>8l</TT>
-AMD	<TT>29000</TT>  <TT>9c</TT>  <TT>9l</TT>
-</PRE></TT></DL>
-There is a further breakdown
-in the source of the compilers into
-object-independent and
-object-dependent
-parts.
-All of the object-independent parts
-are combined into source files in the
-directory
-<TT>/sys/src/cmd/cc</TT>.
-The object-dependent parts are collected
-in a separate directory for each compiler,
-for example
-<TT>/sys/src/cmd/vc</TT>.
-All of the code,
-both object-independent and
-object-dependent,
-is machine-independent
-and may be cross-compiled and executed on any
-of the architectures.
-<H4>3 The Language
-</H4>
-<br>&#32;<br>
-The compiler implements ANSI C with some
-restrictions and extensions
-[ANSI90].
-Most of the restrictions are due to
-personal preference, while
-most of the extensions were to help in
-the implementation of Plan 9.
-There are other departures from the standard,
-particularly in the libraries,
-that are beyond the scope of this
-paper.
-<H4>3.1 Register, volatile, const
-</H4>
-<br>&#32;<br>
-The keyword
-<TT>register</TT>
-is recognized syntactically
-but is semantically ignored.
-Thus taking the address of a
-<TT>register</TT>
-variable is not diagnosed.
-The keyword
-<TT>volatile</TT>
-disables all optimizations, in particular registerization, of the corresponding variable.
-The keyword
-<TT>const</TT>
-generates warnings (if warnings are enabled by the compiler's
-<TT>-w</TT>
-option) of non-constant use of the variable,
-but does not affect the generated code.
-<H4>3.2 The preprocessor
-</H4>
-<br>&#32;<br>
-The C preprocessor is probably the
-biggest departure from the ANSI standard.
-<br>&#32;<br>
-The preprocessor built into the Plan 9 compilers does not support
-<TT>#if</TT>,
-although it does handle
-<TT>#ifdef</TT>
-and
-<TT>#include</TT>.
-If it is necessary to be more standard,
-the source text can first be run through the separate ANSI C
-preprocessor,
-<TT>cpp</TT>.
-<H4>3.3 Unnamed substructures
-</H4>
-<br>&#32;<br>
-The most important and most heavily used of the
-extensions is the declaration of an
-unnamed substructure or subunion.
-For example:
-<DL><DT><DD><TT><PRE>
-	typedef
-	struct	lock
-	{
-		int    locked;
-	} Lock;
-
-	typedef
-	struct	node
-	{
-		int	type;
-		union
-		{
-			double dval;
-			float  fval;
-			long   lval;
-		};
-		Lock;
-	} Node;
-
-	Lock*	lock;
-	Node*	node;
-</PRE></TT></DL>
-The declaration of
-<TT>Node</TT>
-has an unnamed substructure of type
-<TT>Lock</TT>
-and an unnamed subunion.
-One use of this feature allows references to elements of the
-subunit to be accessed as if they were in
-the outer structure.
-Thus
-<TT>node-&gt;dval</TT>
-and
-<TT>node-&gt;locked</TT>
-are legitimate references.
-<br>&#32;<br>
-When an outer structure is used
-in a context that is only legal for
-an unnamed substructure,
-the compiler promotes the reference to the
-unnamed substructure.
-This is true for references to structures and
-to references to pointers to structures.
-This happens in assignment statements and
-in argument passing where prototypes have been
-declared.
-Thus, continuing with the example,
-<DL><DT><DD><TT><PRE>
-	lock = node;
-</PRE></TT></DL>
-would assign a pointer to the unnamed
-<TT>Lock</TT>
-in
-the
-<TT>Node</TT>
-to the variable
-<TT>lock</TT>.
-Another example,
-<DL><DT><DD><TT><PRE>
-	extern void lock(Lock*);
-	func(...)
-	{
-		...
-		lock(node);
-		...
-	}
-</PRE></TT></DL>
-will pass a pointer to the
-<TT>Lock</TT>
-substructure.
-<br>&#32;<br>
-Finally, in places where context is insufficient to identify the unnamed structure,
-the type name (it must be a
-<TT>typedef</TT>)
-of the unnamed structure can be used as an identifier.
-In our example,
-<TT>&node-&gt;Lock</TT>
-gives the address of the anonymous
-<TT>Lock</TT>
-structure.
-<H4>3.4 Structure displays
-</H4>
-<br>&#32;<br>
-A structure cast followed by a list of expressions in braces is
-an expression with the type of the structure and elements assigned from
-the corresponding list.
-Structures are now almost first-class citizens of the language.
-It is common to see code like this:
-<DL><DT><DD><TT><PRE>
-	r = (Rectangle){point1, (Point){x,y+2}};
-</PRE></TT></DL>
-<H4>3.5 Initialization indexes
-</H4>
-<br>&#32;<br>
-In initializers of arrays,
-one may place a constant expression
-in square brackets before an initializer.
-This causes the next initializer to assign
-the indicated element.
-For example:
-<DL><DT><DD><TT><PRE>
-	enum	errors
-	{
-		Etoobig,
-		Ealarm,
-		Egreg
-	};
-	char* errstrings[] =
-	{
-		[Ealarm]	"Alarm call",
-		[Egreg]	"Panic: out of mbufs",
-		[Etoobig]	"Arg list too long",
-	};
-</PRE></TT></DL>
-In the same way,
-individual structures members may
-be initialized in any order by preceding the initialization with
-<TT>.tagname</TT>.
-Both forms allow an optional
-<TT>=</TT>,
-to be compatible with a proposed
-extension to ANSI C.
-<H4>3.6 External register
-</H4>
-<br>&#32;<br>
-The declaration
-<TT>extern</TT>
-<TT>register</TT>
-will dedicate a register to
-a variable on a global basis.
-It can be used only under special circumstances.
-External register variables must be identically
-declared in all modules and
-libraries.
-The feature is not intended for efficiency,
-although it can produce efficient code;
-rather it represents a unique storage class that
-would be hard to get any other way.
-On a shared-memory multi-processor,
-an external register is
-one-per-processor and neither one-per-procedure (automatic)
-or one-per-system (external).
-It is used for two variables in the Plan 9 kernel,
-<TT>u</TT>
-and
-<TT>m</TT>.
-<TT>U</TT>
-is a pointer to the structure representing the currently running process
-and
-<TT>m</TT>
-is a pointer to the per-machine data structure.
-<H4>3.7 Long long
-</H4>
-<br>&#32;<br>
-The compilers accept
-<TT>long</TT>
-<TT>long</TT>
-as a basic type meaning 64-bit integer.
-On all of the machines
-this type is synthesized from 32-bit instructions.
-<H4>3.8 Pragma
-</H4>
-<br>&#32;<br>
-The compilers accept
-<TT>#pragma</TT>
-<TT>lib</TT>
-<I>libname</I>
-and pass the
-library name string uninterpreted
-to the loader.
-The loader uses the library name to
-find libraries to load.
-If the name contains
-<TT>%O</TT>,
-it is replaced with
-the single character object type of the compiler
-(e.g.,
-<TT>v</TT>
-for the MIPS).
-If the name contains
-<TT>%M</TT>,
-it is replaced with
-the architecture type for the compiler
-(e.g.,
-<TT>mips</TT>
-for the MIPS).
-If the name starts with
-<TT>/</TT>
-it is an absolute pathname;
-if it starts with
-<TT>.</TT>
-then it is searched for in the loader's current directory.
-Otherwise, the name is searched from
-<TT>/%M/lib</TT>.
-Such
-<TT>#pragma</TT>
-statements in header files guarantee that the correct
-libraries are always linked with a program without the
-need to specify them explicitly at link time.
-<br>&#32;<br>
-They also accept
-<TT>#pragma</TT>
-<TT>hjdicks</TT>
-<TT>on</TT>
-(or
-<TT>yes</TT>
-or
-<TT>1</TT>)
-to cause subsequently declared data, until
-<TT>#pragma</TT>
-<TT>hjdicks</TT>
-<TT>off</TT>
-(or
-<TT>no</TT>
-or
-<TT>0</TT>),
-to be laid out in memory tightly packed in successive bytes, disregarding
-the usual alignment rules.
-Accessing such data can cause faults.
-<br>&#32;<br>
-Similarly, 
-<TT>#pragma</TT>
-<TT>profile</TT>
-<TT>off</TT>
-(or
-<TT>no</TT>
-or
-<TT>0</TT>)
-causes subsequently declared functions, until
-<TT>#pragma</TT>
-<TT>profile</TT>
-<TT>on</TT>
-(or
-<TT>yes</TT>
-or
-<TT>1</TT>),
-to be marked as unprofiled.
-Such functions will not be profiled when 
-profiling is enabled for the rest of the program.
-<br>&#32;<br>
-Two
-<TT>#pragma</TT>
-statements allow type-checking of
-<TT>print</TT>-like
-functions.
-The first, of the form
-<DL><DT><DD><TT><PRE>
-#pragma varargck argpos error 2
-</PRE></TT></DL>
-tells the compiler that the second argument to
-<TT>error</TT>
-is a
-<TT>print</TT>
-format string (see the manual page
-<A href="/magic/man2html/2/print"><I>print</I>(2))
-</A>that specifies how to format
-<TT>error</TT>'s
-subsequent arguments.
-The second, of the form
-<DL><DT><DD><TT><PRE>
-#pragma varargck type "s" char*
-</PRE></TT></DL>
-says that the
-<TT>print</TT>
-format verb
-<TT>s</TT>
-processes an argument of
-type
-<TT>char*</TT>.
-If the compiler's
-<TT>-F</TT>
-option is enabled, the compiler will use this information
-to report type violations in the arguments to
-<TT>print</TT>,
-<TT>error</TT>,
-and similar routines.
-<H4>4 Object module conventions
-</H4>
-<br>&#32;<br>
-The overall conventions of the runtime environment
-are important
-to runtime efficiency.
-In this section,
-several of these conventions are discussed.
-<H4>4.1 Register saving
-</H4>
-<br>&#32;<br>
-In the Plan 9 compilers,
-the caller of a procedure saves the registers.
-With caller-saves,
-the leaf procedures can use all the
-registers and never save them.
-If you spend a lot of time at the leaves,
-this seems preferable.
-With callee-saves,
-the saving of the registers is done
-in the single point of entry and return.
-If you are interested in space,
-this seems preferable.
-In both,
-there is a degree of uncertainty
-about what registers need to be saved.
-Callee-saved registers make it difficult to
-find variables in registers in debuggers.
-Callee-saved registers also complicate
-the implementation of
-<TT>longjmp</TT>.
-The convincing argument is
-that with caller-saves,
-the decision to registerize a variable
-can include the cost of saving the register
-across calls.
-For a further discussion of caller- vs. callee-saves,
-see the paper by Davidson and Whalley [Dav91].
-<br>&#32;<br>
-In the Plan 9 operating system,
-calls to the kernel look like normal procedure
-calls, which means
-the caller
-has saved the registers and the system
-entry does not have to.
-This makes system calls considerably faster.
-Since this is a potential security hole,
-and can lead to non-determinism,
-the system may eventually save the registers
-on entry,
-or more likely clear the registers on return.
-<H4>4.2 Calling convention
-</H4>
-<br>&#32;<br>
-Older C compilers maintain a frame pointer, which is at a known constant
-offset from the stack pointer within each function.
-For machines where the stack grows towards zero,
-the argument pointer is at a known constant offset
-from the frame pointer.
-Since the stack grows down in Plan 9,
-the Plan 9 compilers
-keep neither an
-explicit frame pointer nor
-an explicit argument pointer;
-instead they generate addresses relative to the stack pointer.
-<br>&#32;<br>
-On some architectures, the first argument to a subroutine is passed in a register.
-<H4>4.3 Functions returning structures
-</H4>
-<br>&#32;<br>
-Structures longer than one word are awkward to implement
-since they do not fit in registers and must
-be passed around in memory.
-Functions that return structures
-are particularly clumsy.
-The Plan 9 compilers pass the return address of
-a structure as the first argument of a
-function that has a structure return value.
-Thus
-<DL><DT><DD><TT><PRE>
-	x = f(...)
-</PRE></TT></DL>
-is rewritten as
-<DL><DT><DD><TT><PRE>
-	f(&amp;x, ...).
-</PRE></TT></DL>
-This saves a copy and makes the compilation
-much less clumsy.
-A disadvantage is that if you call this
-function without an assignment,
-a dummy location must be invented.
-<br>&#32;<br>
-There is also a danger of calling a function
-that returns a structure without declaring
-it as such.
-With ANSI C function prototypes,
-this error need never occur.
-<H4>5 Implementation
-</H4>
-<br>&#32;<br>
-The compiler is divided internally into
-four machine-independent passes,
-four machine-dependent passes,
-and an output pass.
-The next nine sections describe each pass in order.
-<H4>5.1 Parsing
-</H4>
-<br>&#32;<br>
-The first pass is a YACC-based parser
-[Joh79].
-Declarations are interpreted immediately,
-building a block structured symbol table.
-Executable statements are put into a parse tree
-and collected,
-without interpretation.
-At the end of each procedure,
-the parse tree for the function is
-examined by the other passes of the compiler.
-<br>&#32;<br>
-The input stream of the parser is
-a pushdown list of input activations.
-The preprocessor
-expansions of
-macros
-and
-<TT>#include</TT>
-are implemented as pushdowns.
-Thus there is no separate
-pass for preprocessing.
-<H4>5.2 Typing
-</H4>
-<br>&#32;<br>
-The next pass distributes typing information
-to every node of the tree.
-Implicit operations on the tree are added,
-such as type promotions and taking the
-address of arrays and functions.
-<H4>5.3 Machine-independent optimization
-</H4>
-<br>&#32;<br>
-The next pass performs optimizations
-and transformations of the tree, such as converting
-<TT>&*x</TT>
-and
-<TT>*&x</TT>
-into
-<TT>x</TT>.
-Constant expressions are converted to constants in this pass.
-<H4>5.4 Arithmetic rewrites
-</H4>
-<br>&#32;<br>
-This is another machine-independent optimization.
-Subtrees of add, subtract, and multiply of integers are
-rewritten for easier compilation.
-The major transformation is factoring:
-<TT>4+8*a+16*b+5</TT>
-is transformed into
-<TT>9+8*(a+2*b)</TT>.
-Such expressions arise from address
-manipulation and array indexing.
-<H4>5.5 Addressability
-</H4>
-<br>&#32;<br>
-This is the first of the machine-dependent passes.
-The addressability of a processor is defined as the set of
-expressions that is legal in the address field
-of a machine language instruction.
-The addressability of different processors varies widely.
-At one end of the spectrum are the 68020 and VAX,
-which allow a complex mix of incrementing,
-decrementing,
-indexing, and relative addressing.
-At the other end is the MIPS,
-which allows only registers and constant offsets from the
-contents of a register.
-The addressability can be different for different instructions
-within the same processor.
-<br>&#32;<br>
-It is important to the code generator to know when a
-subtree represents an address of a particular type.
-This is done with a bottom-up walk of the tree.
-In this pass, the leaves are labeled with small integers.
-When an internal node is encountered,
-it is labeled by consulting a table indexed by the
-labels on the left and right subtrees.
-For example,
-on the 68020 processor,
-it is possible to address an
-offset from a named location.
-In C, this is represented by the expression
-<TT>*(&name+constant)</TT>.
-This is marked addressable by the following table.
-In the table,
-a node represented by the left column is marked
-with a small integer from the right column.
-Marks of the form
-<TT>A<small><small><sub>i</sub></small></small></TT>
-are addressable while
-marks of the form
-<TT>N<small><small><sub>i</sub></small></small></TT>
-are not addressable.
-<DL><DT><DD><TT><PRE>
-	Node	Marked
-	name	A<small><small><sub>1</sub></small></small>
-	const	A<small><small><sub>2</sub></small></small>
-	&amp;A<small><small><sub>1</sub></small></small>	A<small><small><sub>3</sub></small></small>
-	A<small><small><sub>3</sub></small></small>+A<small><small><sub>1</sub></small></small>	N<small><small><sub>1</sub></small></small> (note that this is not addressable)
-	*N<small><small><sub>1</sub></small></small>	A<small><small><sub>4</sub></small></small>
-</PRE></TT></DL>
-Here there is a distinction between
-a node marked
-<TT>A<small><small><sub>1</sub></small></small></TT>
-and a node marked
-<TT>A<small><small><sub>4</sub></small></small></TT>
-because the address operator of an
-<TT>A<small><small><sub>4</sub></small></small></TT>
-node is not addressable.
-So to extend the table:
-<DL><DT><DD><TT><PRE>
-	Node	Marked
-	&amp;A<small><small><sub>4</sub></small></small>	N<small><small><sub>2</sub></small></small>
-	N<small><small><sub>2</sub></small></small>+N<small><small><sub>1</sub></small></small>	N<small><small><sub>1</sub></small></small>
-</PRE></TT></DL>
-The full addressability of the 68020 is expressed
-in 18 rules like this,
-while the addressability of the MIPS is expressed
-in 11 rules.
-When one ports the compiler,
-this table is usually initialized
-so that leaves are labeled as addressable and nothing else.
-The code produced is poor,
-but porting is easy.
-The table can be extended later.
-<br>&#32;<br>
-This pass also rewrites some complex operators
-into procedure calls.
-Examples include 64-bit multiply and divide.
-<br>&#32;<br>
-In the same bottom-up pass of the tree,
-the nodes are labeled with a Sethi-Ullman complexity
-[Set70].
-This number is roughly the number of registers required
-to compile the tree on an ideal machine.
-An addressable node is marked 0.
-A function call is marked infinite.
-A unary operator is marked as the
-maximum of 1 and the mark of its subtree.
-A binary operator with equal marks on its subtrees is
-marked with a subtree mark plus 1.
-A binary operator with unequal marks on its subtrees is
-marked with the maximum mark of its subtrees.
-The actual values of the marks are not too important,
-but the relative values are.
-The goal is to compile the harder
-(larger mark)
-subtree first.
-<H4>5.6 Code generation
-</H4>
-<br>&#32;<br>
-Code is generated by recursive
-descent.
-The Sethi-Ullman complexity completely guides the
-order.
-The addressability defines the leaves.
-The only difficult part is compiling a tree
-that has two infinite (function call)
-subtrees.
-In this case,
-one subtree is compiled into the return register
-(usually the most convenient place for a function call)
-and then stored on the stack.
-The other subtree is compiled into the return register
-and then the operation is compiled with
-operands from the stack and the return register.
-<br>&#32;<br>
-There is a separate boolean code generator that compiles
-conditional expressions.
-This is fundamentally different from compiling an arithmetic expression.
-The result of the boolean code generator is the
-position of the program counter and not an expression.
-The boolean code generator makes extensive use of De Morgan's rule.
-The boolean code generator is an expanded version of that described
-in chapter 8 of Aho, Sethi, and Ullman
-[Aho87].
-<br>&#32;<br>
-There is a considerable amount of talk in the literature
-about automating this part of a compiler with a machine
-description.
-Since this code generator is so small
-(less than 500 lines of C)
-and easy,
-it hardly seems worth the effort.
-<H4>5.7 Registerization
-</H4>
-<br>&#32;<br>
-Up to now,
-the compiler has operated on syntax trees
-that are roughly equivalent to the original source language.
-The previous pass has produced machine language in an internal
-format.
-The next two passes operate on the internal machine language
-structures.
-The purpose of the next pass is to reintroduce
-registers for heavily used variables.
-<br>&#32;<br>
-All of the variables that can be
-potentially registerized within a procedure are
-placed in a table.
-(Suitable variables are any automatic or external
-scalars that do not have their addresses extracted.
-Some constants that are hard to reference are also
-considered for registerization.)
-Four separate data flow equations are evaluated
-over the procedure on all of these variables.
-Two of the equations are the normal set-behind
-and used-ahead
-bits that define the life of a variable.
-The two new bits tell if a variable life
-crosses a function call ahead or behind.
-By examining a variable over its lifetime,
-it is possible to get a cost
-for registerizing.
-Loops are detected and the costs are multiplied
-by three for every level of loop nesting.
-Costs are sorted and the variables
-are replaced by available registers on a greedy basis.
-<br>&#32;<br>
-The 68020 has two different
-types of registers.
-For the 68020,
-two different costs are calculated for
-each variable life and the register type that
-affords the better cost is used.
-Ties are broken by counting the number of available
-registers of each type.
-<br>&#32;<br>
-Note that externals are registerized together with automatics.
-This is done by evaluating the semantics of a ``call'' instruction
-differently for externals and automatics.
-Since a call goes outside the local procedure,
-it is assumed that a call references all externals.
-Similarly,
-externals are assumed to be set before an ``entry'' instruction
-and assumed to be referenced after a ``return'' instruction.
-This makes sure that externals are in memory across calls.
-<br>&#32;<br>
-The overall results are satisfactory.
-It would be nice to be able to do this processing in
-a machine-independent way,
-but it is impossible to get all of the costs and
-side effects of different choices by examining the parse tree.
-<br>&#32;<br>
-Most of the code in the registerization pass is machine-independent.
-The major machine-dependency is in
-examining a machine instruction to ask if it sets or references
-a variable.
-<H4>5.8 Machine code optimization
-</H4>
-<br>&#32;<br>
-The next pass walks the machine code
-for opportunistic optimizations.
-For the most part,
-this is highly specific to a particular
-processor.
-One optimization that is performed
-on all of the processors is the
-removal of unnecessary ``move''
-instructions.
-Ironically,
-most of these instructions were inserted by
-the previous pass.
-There are two patterns that are repetitively
-matched and replaced until no more matches are
-found.
-The first tries to remove ``move'' instructions
-by relabeling variables.
-<br>&#32;<br>
-When a ``move'' instruction is encountered,
-if the destination variable is set before the
-source variable is referenced,
-then all of the references to the destination
-variable can be renamed to the source and the ``move''
-can be deleted.
-This transformation uses the reverse data flow
-set up in the previous pass.
-<br>&#32;<br>
-An example of this pattern is depicted in the following
-table.
-The pattern is in the left column and the
-replacement action is in the right column.
-<DL><DT><DD><TT><PRE>
-	MOVE	a-&gt;b		(remove)
-	(sequence with no mention of <TT>a</TT>)
-	USE	b		USE	a
-	(sequence with no mention of <TT>a</TT>)
-	SET	b		SET	b
-</PRE></TT></DL>
-<br>&#32;<br>
-Experiments have shown that it is marginally
-worthwhile to rename uses of the destination variable
-with uses of the source variable up to
-the first use of the source variable.
-<br>&#32;<br>
-The second transform will do relabeling
-without deleting instructions.
-When a ``move'' instruction is encountered,
-if the source variable has been set prior
-to the use of the destination variable
-then all of the references to the source
-variable are replaced by the destination and
-the ``move'' is inverted.
-Typically,
-this transformation will alter two ``move''
-instructions and allow the first transformation
-another chance to remove code.
-This transformation uses the forward data flow
-set up in the previous pass.
-<br>&#32;<br>
-Again,
-the following is a depiction of the transformation where
-the pattern is in the left column and the
-rewrite is in the right column.
-<DL><DT><DD><TT><PRE>
-	SET	a		SET	b
-	(sequence with no use of <TT>b</TT>)
-	USE	a		USE	b
-	(sequence with no use of <TT>b</TT>)
-	MOVE	a-&gt;b		MOVE	b-&gt;a
-</PRE></TT></DL>
-Iterating these transformations
-will usually get rid of all redundant ``move'' instructions.
-<br>&#32;<br>
-A problem with this organization is that the costs
-of registerization calculated in the previous pass
-must depend on how well this pass can detect and remove
-redundant instructions.
-Often,
-a fine candidate for registerization is rejected
-because of the cost of instructions that are later
-removed.
-<H4>5.9 Writing the object file
-</H4>
-<br>&#32;<br>
-The last pass walks the internal assembly language
-and writes the object file.
-The object file is reduced in size by about a factor
-of three with simple compression
-techniques.
-The most important aspect of the object file
-format is that it is independent of the compiling machine.
-All integer and floating numbers in the object
-code are converted to known formats and byte
-orders.
-<H4>6 The loader
-</H4>
-<br>&#32;<br>
-The loader is a multiple pass program that
-reads object files and libraries and produces
-an executable binary.
-The loader also does some minimal
-optimizations and code rewriting.
-Many of the operations performed by the
-loader are machine-dependent.
-<br>&#32;<br>
-The first pass of the loader reads the
-object modules into an internal data
-structure that looks like binary assembly language.
-As the instructions are read,
-code is reordered to remove
-unconditional branch instructions.
-Conditional branch instructions are inverted
-to prevent the insertion of unconditional branches.
-The loader will also make a copy of a few instructions
-to remove an unconditional branch.
-<br>&#32;<br>
-The next pass allocates addresses for
-all external data.
-Typical of processors is the MIPS,
-which can reference &#177;32K bytes from a
-register.
-The loader allocates the register
-<TT>R30</TT>
-as the static pointer.
-The value placed in
-<TT>R30</TT>
-is the base of the data segment plus 32K.
-It is then cheap to reference all data in the
-first 64K of the data segment.
-External variables are allocated to
-the data segment
-with the smallest variables allocated first.
-If all of the data cannot fit into the first
-64K of the data segment,
-then usually only a few large arrays
-need more expensive addressing modes.
-<br>&#32;<br>
-For the MIPS processor,
-the loader makes a pass over the internal
-structures,
-exchanging instructions to try
-to fill ``delay slots'' with useful work.
-If a useful instruction cannot be found
-to fill a delay slot,
-the loader will insert
-``noop''
-instructions.
-This pass is very expensive and does not
-do a good job.
-About 40% of all instructions are in
-delay slots.
-About 65% of these are useful instructions and
-35% are ``noops.''
-The vendor-supplied assembler does this job
-more effectively,
-filling about 80%
-of the delay slots with useful instructions.
-<br>&#32;<br>
-On the 68020 processor,
-branch instructions come in a variety of
-sizes depending on the relative distance
-of the branch.
-Thus the size of branch instructions
-can be mutually dependent.
-The loader uses a multiple pass algorithm
-to resolve the branch lengths
-[Szy78].
-Initially, all branches are assumed minimal length.
-On each subsequent pass,
-the branches are reassessed
-and expanded if necessary.
-When no more expansions occur,
-the locations of the instructions in
-the text segment are known.
-<br>&#32;<br>
-On the MIPS processor,
-all instructions are one size.
-A single pass over the instructions will
-determine the locations of all addresses
-in the text segment.
-<br>&#32;<br>
-The last pass of the loader produces the
-executable binary.
-A symbol table and other tables are
-produced to help the debugger to
-interpret the binary symbolically.
-<br>&#32;<br>
-The loader places absolute source line numbers in the symbol table.
-The name and absolute line number of all
-<TT>#include</TT>
-files is also placed in the
-symbol table so that the debuggers can
-associate object code to source files.
-<H4>7 Performance
-</H4>
-<br>&#32;<br>
-The following is a table of the source size of the MIPS
-compiler.
-<DL><DT><DD><TT><PRE>
-	lines	module
-	 509	machine-independent headers
-	1070	machine-independent YACC source
-	6090	machine-independent C source
-
-	 545	machine-dependent headers
-	6532	machine-dependent C source
-
-	 298	loader headers
-	5215	loader C source
-</PRE></TT></DL>
-<br>&#32;<br>
-The following table shows timing
-of a test program
-that plays checkers, running on a MIPS R4000.
-The test program is 26 files totaling 12600 lines of C.
-The execution time does not significantly
-depend on library implementation.
-Since no other compiler runs on Plan 9,
-the Plan 9 tests were done with the Plan 9 operating system;
-the other tests were done on the vendor's operating system.
-The hardware was identical in both cases.
-The optimizer in the vendor's compiler
-is reputed to be extremely good.
-<DL><DT><DD><TT><PRE>
-	  4.49s	Plan 9 <TT>vc</TT> <TT>-N</TT> compile time (opposite of <TT>-O</TT>)
-	  1.72s	Plan 9 <TT>vc</TT> <TT>-N</TT> load time
-	148.69s	Plan 9 <TT>vc</TT> <TT>-N</TT> run time
-
-	 15.07s	Plan 9 <TT>vc</TT> compile time (<TT>-O</TT> implicit)
-	  1.66s	Plan 9 <TT>vc</TT> load time
-	 89.96s	Plan 9 <TT>vc</TT> run time
-
-	 14.83s	vendor <TT>cc</TT> compile time
-	  0.38s	vendor <TT>cc</TT> load time
-	104.75s	vendor <TT>cc</TT> run time
-
-	 43.59s	vendor <TT>cc</TT> <TT>-O</TT> compile time
-	  0.38s	vendor <TT>cc</TT> <TT>-O</TT> load time
-	 76.19s	vendor <TT>cc</TT> <TT>-O</TT> run time
-
-	  8.19s	vendor <TT>cc</TT> <TT>-O3</TT> compile time
-	 35.97s	vendor <TT>cc</TT> <TT>-O3</TT> load time
-	 71.16s	vendor <TT>cc</TT> <TT>-O3</TT> run time
-</PRE></TT></DL>
-<br>&#32;<br>
-To compare the Intel compiler,
-a program that is about 40% bit manipulation and
-about 60% single precision floating point was
-run on the same 33 MHz 486, once under Windows
-compiled with the Watcom compiler, version 10.0,
-in 16-bit mode and once under
-Plan 9 in 32-bit mode.
-The Plan 9 execution time was 27 sec while the Windows
-execution time was 31 sec.
-<H4>8 Conclusions
-</H4>
-<br>&#32;<br>
-The new compilers compile
-quickly,
-load slowly,
-and produce
-medium quality
-object code.
-The compilers are relatively
-portable,
-requiring but a couple of weeks' work to
-produce a compiler for a different computer.
-For Plan 9,
-where we needed several compilers
-with specialized features and
-our own object formats,
-this project was indispensable.
-It is also necessary for us to
-be able to freely distribute our compilers
-with the Plan 9 distribution.
-<br>&#32;<br>
-Two problems have come up in retrospect.
-The first has to do with the
-division of labor between compiler and loader.
-Plan 9 runs on multi-processors and as such
-compilations are often done in parallel.
-Unfortunately,
-all compilations must be complete before loading
-can begin.
-The load is then single-threaded.
-With this model,
-any shift of work from compile to load
-results in a significant increase in real time.
-The same is true of libraries that are compiled
-infrequently and loaded often.
-In the future,
-we may try to put some of the loader work
-back into the compiler.
-<br>&#32;<br>
-The second problem comes from
-the various optimizations performed over several
-passes.
-Often optimizations in different passes depend
-on each other.
-Iterating the passes could compromise efficiency,
-or even loop.
-We see no real solution to this problem.
-<H4>9 References
-</H4>
-<br>&#32;<br>
-[Aho87] A. V. Aho, R. Sethi, and J. D. Ullman,
-Compilers - Principles, Techniques, and Tools,
-Addison Wesley,
-Reading, MA,
-1987.
-<br>&#32;<br>
-[ANSI90] <I>American National Standard for Information Systems -
-Programming Language C</I>, American National Standards Institute, Inc.,
-New York, 1990.
-<br>&#32;<br>
-[Dav91] J. W. Davidson and D. B. Whalley,
-``Methods for Saving and Restoring Register Values across Function Calls'',
-Software-Practice and Experience,
-Vol 21(2), pp. 149-165, February 1991.
-<br>&#32;<br>
-[Joh79] S. C. Johnson,
-``YACC - Yet Another Compiler Compiler'',
-UNIX Programmer's Manual, Seventh Ed., Vol. 2A,
-AT&amp;T Bell Laboratories,
-Murray Hill, NJ,
-1979.
-<br>&#32;<br>
-[Set70] R. Sethi and J. D. Ullman,
-``The Generation of Optimal Code for Arithmetic Expressions'',
-Journal of the ACM,
-Vol 17(4), pp. 715-728, 1970.
-<br>&#32;<br>
-[Szy78] T. G. Szymanski,
-``Assembling Code for Machines with Span-dependent Instructions'',
-Communications of the ACM,
-Vol 21(4), pp. 300-308, 1978.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 837
sys/doc/fs/fs.html

@@ -1,837 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The Plan 9 File Server
-</H1>
-<DL><DD><I>Ken Thompson<br>
-ken@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-This paper describes the structure
-and the operation of Plan 9 file servers.
-The specifics apply to
-our main Plan 9 file server
-Emelie,
-but
-the code is also the basis for
-the user level file server
-<TT>kfs</TT>.
-</DL>
-<H4>Introduction
-</H4>
-<P>
-The Plan 9 file server
-Emelie
-is the oldest piece of system software
-still in use on Plan 9.
-It evolved from a user-level program that served
-serial lines on a Sequent multi-processor.
-The current implementation is neither clean nor
-portable,
-but it has slowly come to terms with
-its particular set of cranky computers
-and devices.
-</P>
-<H4>Process Structure
-</H4>
-<P>
-The Plan 9 file system server is made from
-an ancient version of the Plan 9 kernel.
-The kernel contains process control,
-synchronization,
-locks,
-and some memory
-allocation.
-The kernel has no user processes or
-virtual memory.
-</P>
-<P>
-The structure of the file system server
-is a set of kernel processes
-synchronizing mostly through message passing.
-In Emelie there are 26 processes of 10 types:
-<DL><DT><DD><TT><PRE>
-number name  function
-  15       <TT>srv</TT>   Main file system server processes
-   1       <TT>rah</TT>   Block read-ahead processes
-  h'w'0'u'1       <TT>scp</TT>   Sync process
-  h'w'0'u'1       <TT>wcp</TT>   WORM copy process
-  h'w'0'u'1       <TT>con</TT>   Console process
-  h'w'0'u'1       <TT>ilo</TT>   IL protocol process
-  h'w'0'u'1       <TT>ilt</TT>   IL timer process
-  h'w'0'u'2       <TT>ethi</TT>   Ethernet input process
-  h'w'0'u'2       <TT>etho</TT>   Ethernet output process
-  h'w'0'u'1       <TT>flo</TT>   Floppy disk process
-</PRE></TT></DL>
-</P>
-<H4>The server processes
-</H4>
-<P>
-The main file system algorithm is a set
-of identical processes
-named
-<TT>srv</TT>
-that honor the
-9P protocol.
-Each file system process waits on
-a message queue for an incoming request.
-The request contains a 9P message and
-the address of a reply queue.
-A
-<TT>srv</TT>
-process parses the message,
-performs pseudo-disk I/O
-to the corresponding file system block device,
-formulates a response,
-and sends the
-response back to the reply queue.
-</P>
-<P>
-The unit of storage is a
-block of data on a device:
-<DL><DT><DD><TT><PRE>
-    enum
-    {
-        RBUFSIZE = 16*1024
-    };
-
-    typedef
-    struct
-    {
-        short   pad;
-        short	tag;
-        long	path;
-    } Tag;
-
-    enum
-    {
-        BUFSIZE = RBUFSIZE - sizeof(Tag)
-    };
-
-    typedef
-    struct
-    {
-        uchar   data[BUFSIZE];
-        Tag     tag;
-    } Block;
-</PRE></TT></DL>
-All devices are idealized as a perfect disk
-of contiguously numbered blocks each of size
-<TT>RBUFSIZE</TT>.
-Each block has a tag that identifies what type
-of block it is and a unique id of the file or directory
-where this block resides.
-The remaining data in the block depends on
-what type of block it is.
-</P>
-<P>
-The
-<TT>srv</TT>
-process's main data structure is the directory entry.
-This is the equivalent of a UNIX i-node and
-defines the set of block addresses that comprise a file or directory.
-Unlike the i-node,
-the directory entry also has the name of the
-file or directory in it:
-<DL><DT><DD><TT><PRE>
-    enum
-    {
-        NAMELEN = 28,
-        NDBLOCK = 6
-    };
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    typedef
-    struct
-    {
-        char    name[NAMELEN];
-        short   uid;
-        short   gid;
-        ushort  mode;
-        short   wuid;
-        Qid     qid;
-        long    size;
-        long    dblock[NDBLOCK];
-        long    iblock;
-        long    diblock;
-        long    atime;
-        long    mtime;
-    } Dentry;
-</PRE></TT></DL>
-Each directory entry holds the file or directory
-name, protection mode, access times, user-id, group-id, and addressing
-information.
-The entry
-<TT>wuid</TT>
-is the user-id of the last writer of the file
-and
-<TT>size</TT>
-is the size of the file in bytes.
-The first 6
-blocks of the file are held in the
-<TT>dblock</TT>
-array.
-If the file is larger than that,
-an indirect block is allocated that holds
-the next
-<TT>BUFSIZE/sizeof(long)</TT>
-blocks of the file.
-The indirect block address is held in the structure member
-<TT>iblock</TT>.
-If the file is larger yet,
-then there is a double indirect block that points
-at indirect blocks.
-The double indirect address is held in
-<TT>diblock</TT>
-and can point at another
-<TT>(BUFSIZE/sizeof(long))<sup>2</sup></TT>
-blocks of data.
-The maximum addressable size of a file is
-therefore 275 Gbytes.
-There is a tighter restriction of
-2<sup>32</sup>
-bytes because the length of a file is maintained in
-a long.
-Even so,
-sloppy use of long arithmetic restricts the length to
-2<sup>31</sup>
-bytes.
-These numbers are based on Emelie
-which has a block size of 16K and
-<TT>sizeof(long)</TT>
-is 4.
-It would be different if the size of a block
-changed.
-</P>
-<P>
-The declarations of the indirect and double indirect blocks
-are as follows.
-<DL><DT><DD><TT><PRE>
-    enum
-    {
-        INDPERBUF = BUFSIZE/sizeof(long),
-    };
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    typedef
-    {
-        long    dblock[INDPERBUF];
-        Tag     ibtag;
-    } Iblock;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    typedef
-    {
-        long    iblock[INDPERBUF];
-        Tag     dibtag;
-    } Diblock;
-</PRE></TT></DL>
-</P>
-<P>
-The root of a file system is a single directory entry
-at a known block address.
-A directory is a file that consists of a list of
-directory entries.
-To make access easier,
-a directory entry cannot cross blocks.
-In Emelie there are 233 directory entries per block.
-</P>
-<P>
-The device on which the blocks reside is implicit
-and ultimately comes from the 9P
-<TT>attach</TT>
-message that specifies the name of the
-device containing the root.
-</P>
-<H4>Buffer Cache
-</H4>
-<P>
-When the file server is
-booted,
-all of the unused memory is allocated to
-a block buffer pool.
-There are two major operations on the buffer
-pool.
-<TT>Getbuf</TT>
-will find the buffer associated with a
-particular block on a particular device.
-The returned buffer is locked so that the
-caller has exclusive use.
-If the requested buffer is not in the pool,
-some other buffer will be relabeled and
-the data will be read from the requested device.
-<TT>Putbuf</TT>
-will unlock a buffer and
-if the contents are marked as modified,
-the buffer will be written to the device before
-the buffer is relabeled.
-If there is some special mapping
-or CPU cache flushing
-that must occur in order for the physical I/O
-device to access the buffers,
-this is done between
-<TT>getbuf</TT>
-and
-<TT>putbuf</TT>.
-The contents of a buffer is never touched
-except while it is locked between
-<TT>getbuf</TT>
-and
-<TT>putbuf</TT>
-calls.
-</P>
-<P>
-The
-file system server processes
-prevent deadlock in the buffers by
-always locking parent and child
-directory entries in that order.
-Since the entire directory structure
-is a hierarchy,
-this makes the locking well-ordered,
-preventing deadlock.
-The major problem in the locking strategy is
-that locks are at a block level and there are many
-directory entries in a single block.
-There are unnecessary lock conflicts
-in the directory blocks.
-When one of these directory blocks is tied up
-accessing the very slow WORM,
-then all I/O to dozens of unrelated directories
-is blocked.
-</P>
-<H4>Block Devices
-</H4>
-<P>
-The block device I/O system is like a
-protocol stack of filters.
-There are a set of pseudo-devices that call
-recursively to other pseudo-devices and real devices.
-The protocol stack is compiled from a configuration
-string that specifies the order of pseudo-devices and devices.
-Each pseudo-device and device has a set of entry points
-that corresponds to the operations that the file system
-requires of a device.
-The most notable operations are
-<TT>read</TT>,
-<TT>write</TT>,
-and
-<TT>size</TT>.
-</P>
-<P>
-The device stack can best be described by
-describing the syntax of the configuration string
-that specifies the stack.
-Configuration strings are used
-during the setup of the file system.
-For a description see
-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8).
-</A>In the following recursive definition,
-<I>D</I>
-represents a
-string that specifies a block device.
-</P>
-<DL COMPACT>
-<DT><I>D</I> = (<I>DD</I>...)<DD>
-<br>
-This is a set of devices that
-are concatenated to form a single device.
-The size of the catenated device is the
-sum of the sizes of each sub-device.
-<DT><I>D</I> = [<I>DD</I>...]<DD>
-<br>
-This is the interleaving of the
-individual devices.
-If there are N devices in the list,
-then the pseudo-device is the N-way block
-interleaving of the sub-devices.
-The size of the interleaved device is
-N times the size of the smallest sub-device.
-<DT><I>D</I> = <TT>p</TT><I>DN1.N2</I><DD>
-<br>
-This is a partition of a sub-device.
-The sub-device is partitioned into 100 equal pieces.
-If the size of the sub-device is not divisible by 100,
-then there will be some slop thrown away at the top.
-The pseudo-device starts at the N1-th piece and
-continues for N2 pieces. Thus
-<TT>p<I>D</I>67.33</TT>
-will be the
-last third of the device
-<I>D</I>.
-<DT><I>D</I> = <TT>f</TT><I>D</I><DD>
-<br>
-This is a fake write-once-read-many device simulated by a
-second read-write device.
-This second device is partitioned
-into a set of block flags and a set of blocks.
-The flags are used to generate errors if a
-block is ever written twice or read without being written first.
-<DT><I>D</I> = <TT>c</TT><I>DD</I><DD>
-<br>
-This is the cache/WORM device made up of a cache (read-write)
-device and a WORM (write-once-read-many) device.
-More on this later.
-<DT><I>D</I> = <TT>o</TT><DD>
-<br>
-This is the dump file system that is the
-two-level hierarchy of all dumps ever taken on a cache/WORM.
-The read-only root of the cache/WORM file system
-(on the dump taken Feb 18, 1995) can
-be referenced as
-<TT>/1995/0218</TT>
-in this pseudo device.
-The second dump taken that day will be
-<TT>/1995/02181</TT>.
-<DT><I>D</I> = <TT>w</TT><I>N1.N2</I><DD>
-<br>
-This is a SCSI disk on controller N1 and target N2.
-<DT><I>D</I> = <TT>l</TT><I>N1.N2</I><DD>
-<br>
-This is the same as
-<TT>w</TT>,
-but one block from the SCSI disk is removed for labeling.
-<DT><I>D</I> = <TT>j(</TT><I>D<sub>1</sub></I><I>D<sub>2</sub></I><TT>*)</TT><I>D<sub>3</sub></I><DD>
-<br>
-<I>D<sub>1</sub></I>
-is the juke box SCSI interface.
-The
-<I>D<sub>2</sub></I>'s
-are the SCSI drives in the juke box
-and  the
-<I>D<sub>3</sub></I>'s
-are the demountable platters in the juke box.
-<I>D<sub>1</sub></I>
-and
-<I>D<sub>2</sub></I>
-must be
-<TT>w</TT>.
-<I>D<sub>3</sub></I>
-must be pseudo devices of
-<TT>w</TT>
-or
-<TT>l</TT>
-devices.
-</dl>
-<P>
-For both
-<TT>w</TT>
-and
-<TT>r</TT>
-devices any of the configuration numbers
-can be replaced by an iterator of the form
-<TT><<I>N1-N2</I>></TT>.
-Thus
-<DL><DT><DD><TT><PRE>
-    [w0.&#60;2-6&#62;]
-</PRE></TT></DL>
-is the interleaved SCSI disks on SCSI targets
-2 through 6 of SCSI controller 0.
-The main file system on
-Emelie
-is defined by the configuration string
-<DL><DT><DD><TT><PRE>
-    c[w1.&#60;0-5&#62;.0]j(w6w5w4w3w2)l(&#60;0-236&#62;l&#60;238-474&#62;)
-</PRE></TT></DL>
-This is a cache/WORM driver.
-The cache is three interleaved disks on SCSI controller 1
-targets 0, 1, 2, 3, 4, and 5.
-The WORM half of the cache/WORM
-is 474 jukebox disks.
-</P>
-<H4>The read-ahead processes
-</H4>
-<P>
-There are a set of file system processes,
-<TT>rah</TT>,
-that wait for messages consisting of a device and block
-address.
-When a message comes in,
-the process reads the specified block from the device.
-This is done by calling
-<TT>getbuf</TT>
-and
-<TT>putbuf</TT>.
-The purpose of this is the hope that these blocks
-will be used later and that they will reside in the
-buffer cache long enough not to be discarded before
-they are used.
-</P>
-<P>
-The messages to the read-ahead processes are
-generated by the server processes.
-The server processes maintain a relative block mark in every
-open file.
-Whenever an open file reads that relative block,
-the next 110 block addresses of the file are sent
-to the read-ahead processes and
-the relative block mark is advanced by 100.
-The initial relative block is set to 1.
-If the file is opened and
-only a few bytes are read,
-then no anticipating reads are performed
-since the relative block mark is set to 1
-and only block offset 0 is read.
-This is to prevent some
-fairly common action such as
-<DL><DT><DD><TT><PRE>
-    file *
-</PRE></TT></DL>
-from swamping the file system with read-ahead
-requests that will never be used.
-</P>
-<H4>Cache/WORM Driver
-</H4>
-<P>
-The cache/WORM (cw) driver is by far the
-largest and most complicated device driver in the file server.
-There are four devices involved in the cw driver.
-It implements a read/write pseudo-device (the cw-device)
-and a read-only pseudo-device (the dump device)
-by performing operations on its two constituent devices
-the read-write c-device and the write-once-read-many
-w-device.
-The block numbers on the four devices are distinct,
-although the cw addresses,
-dump addresses,
-and the w addresses are
-highly correlated.
-</P>
-<P>
-The cw-driver uses the w-device as the
-stable storage of the file system at the time of the
-last dump.
-All newly written and a large number of recently used
-exact copies of blocks of the w-device are kept on the c-device.
-The c-device is much smaller than the w-device and
-so the subset of w-blocks that are kept on the c-device are
-mapped through a hash table kept on a partition of the c-device.
-</P>
-<P>
-The map portion of the c-device consists of blocks of buckets of entries.
-The declarations follow.
-<DL><DT><DD><TT><PRE>
-    enum
-    {
-        BKPERBLK = 10,
-        CEPERBK  = (BUFSIZE - BKPERBLK*sizeof(long)) /
-                   (sizeof(Centry)*BKPERBLK),
-    };
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    typedef
-    struct
-    {
-        ushort   age;
-        short    state;
-        long     waddr;
-    } Centry;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    typedef
-    struct
-    {
-        long     agegen;
-        Centry   entry[CEPERBK];
-    } Bucket;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-    Bucket   bucket[BKPERBLK];
-</PRE></TT></DL>
-There is exactly one entry structure for each block in the
-data partition of the c-device.
-A bucket contains all of the w-addresses that have
-the same hash code.
-There are as many buckets as will fit
-in a block and enough blocks to have the required
-number of entries.
-The entries in the bucket are maintained
-in FIFO order with an age variable and an incrementing age generator.
-When the age generator is about to overflow,
-all of the ages in the bucket are rescaled
-from zero.
-</P>
-<P>
-The following steps go into converting a w-address into a c-address.
-The bucket is found by
-<DL><DT><DD><TT><PRE>
-    bucket_number = w-address % total_buckets
-    getbuf(c-device, bucket_offset + bucket_number/BKPERBLK);
-</PRE></TT></DL>
-After the desired bucket is found,
-the desired entry is found by a linear search within the bucket for the
-entry with the desired
-<TT>waddr</TT>.
-</P>
-<P>
-The state variable in the entry is
-one of the following.
-<DL><DT><DD><TT><PRE>
-    enum
-    {
-        Cnone    = 0,
-        Cdirty,
-        Cdump,
-        Cread,
-        Cwrite,
-        Cdump1,
-    };
-</PRE></TT></DL>
-Every w-address has a state.
-Blocks that are not in the
-c-device have the implied
-state
-<TT>Cnone</TT>.
-The
-<TT>Cread</TT>
-state is for blocks that have the
-same data as the corresponding block in
-the w-device.
-Since the c-device is much faster than the
-w-device,
-<TT>Cread</TT>
-blocks are kept as long as possible and
-used in preference to reading the w-device.
-<TT>Cread</TT>
-blocks may be discarded from the c-device
-when the space is needed for newer data.
-The
-<TT>Cwrite</TT>
-state is when the c-device contains newer data
-than the corresponding block on the w-device.
-This happens when a
-<TT>Cnone</TT>,
-<TT>Cread</TT>,
-or
-<TT>Cwrite</TT>
-block is written.
-The
-<TT>Cdirty</TT>
-state
-is when the c-device contains
-new data and the corresponding block
-on the w-device has never been written.
-This happens when a new block has been
-allocated from the free space on the w-device.
-</P>
-<P>
-The
-<TT>Cwrite</TT>
-and
-<TT>Cdirty</TT>
-blocks are created and never removed.
-Unless something is done to
-convert these blocks,
-the c-device will gradually
-fill up and stop functioning.
-Once a day,
-or by command,
-a
-<I>dump</I>
-of the cw-device
-is taken.
-The purpose of
-a dump is to queue the writes that
-have been shunted to the c-device
-to be written to the w-device.
-Since the w-device is a WORM,
-blocks cannot be rewritten.
-Blocks that have already been written to the WORM must be
-relocated to the unused portion of the w-device.
-These are precisely the
-blocks with
-<TT>Cwrite</TT>
-state.
-</P>
-<P>
-The dump algorithm is as follows:
-a) The tree on the cw-device is walked
-as long as the blocks visited have been
-modified since the last dump.
-These are the blocks with state
-<TT>Cwrite</TT>
-and
-<TT>Cdirty</TT>.
-It is possible to restrict the search
-to within these blocks
-since the directory containing a modified
-file must have been accessed to modify the
-file and accessing a directory will set its
-modified time thus causing the block containing it
-to be written.
-The directory containing that directory must be
-modified for the same reason.
-The tree walk is thus drastically restrained and the
-tree walk does not take much time.
-b) All
-<TT>Cwrite</TT>
-blocks found in the tree search
-are relocated to new blank blocks on the w-device
-and converted to
-<TT>Cdump</TT>
-state.
-All
-<TT>Cdirty</TT>
-blocks are converted to
-<TT>Cdump</TT>
-state without relocation.
-At this point,
-all modified blocks in the cw-device
-have w-addresses that point to unwritten
-WORM blocks.
-These blocks are marked for later
-writing to the w-device
-with the state
-<TT>Cdump</TT>.
-c) All open files that were pointing to modified
-blocks are reopened to point at the corresponding
-reallocated blocks.
-This causes the directories leading to the
-open files to be modified.
-Thus the invariant discussed in a) is maintained.
-d) The background dumping process will slowly
-go through the map of the c-device and write out
-all blocks with
-<TT>Cdump</TT>
-state.
-</P>
-<P>
-The dump takes a few minutes to walk the tree
-and mark the blocks.
-It can take hours to write the marked blocks
-to the WORM.
-If a marked block is rewritten before the old
-copy has been written to the WORM,
-it must be forced to the WORM before it is rewritten.
-There is no problem if another dump is taken before the first one
-is finished.
-The newly marked blocks are just added to the marked blocks
-left from the first dump.
-</P>
-<P>
-If there is an error writing a marked block
-to the WORM
-then the
-<TT>dump</TT>
-state is converted to
-<TT>Cdump1</TT>
-and manual intervention is needed.
-(See the
-<TT>cwcmd</TT>
-<TT>mvstate</TT>
-command in
-<A href="/magic/man2html/8/fs"><I>fs</I>(8)).
-</A>These blocks can be disposed of by converting
-their state back to
-<TT>Cdump</TT>
-so that they will be written again.
-They can also be converted to
-<TT>Cwrite</TT>
-state so that they will be allocated new
-addresses at the next dump.
-In most other respects,
-a
-<TT>Cdump1</TT>
-block behaves like a
-<TT>Cwrite</TT>
-block.
-</P>
-<H4>Sync Copy and WORM Copy Processes
-</H4>
-<P>
-The
-<TT>scp</TT>
-process
-wakes up every ten seconds and
-issues writes to blocks in the buffer cache
-that have been modified.
-This is done automatically on important
-console commands such as
-<TT>halt</TT>
-and
-<TT>dump</TT>.
-</P>
-<P>
-The
-<TT>wcp</TT>
-process also wakes up every ten seconds
-and tries to copy a
-<TT>dump</TT>
-block from the cache to the WORM.
-As long as there are
-<TT>dump</TT>
-blocks to copy and there is no competition for
-the WORM device,
-the copy will continue at full speed.
-Whenever there is competition for the WORM
-or there are no more blocks to
-copy,
-then the process will sleep ten seconds
-before looking again.
-</P>
-<P>
-The HP WORM jukebox consists of
-238 disks divided into 476 sides
-or platters.
-Platter 0 is the
-<I>A</I>
-side of disk 0.
-Platter 1 is the
-<I>A</I>
-side of the disk 1.
-Platter 238 is the
-<I>B</I>
-side of disk 0.
-On Emelie,
-the main file system is configured
-on both sides of the first 237 disks,
-platters 0-236 and 238-474.
-</P>
-<H4>9P Protocol Drivers
-</H4>
-<P>
-The file server described so far
-waits for 9P protocol messages to
-appear in its input queue.
-It processes each message and
-sends the reply back to the originator.
-There are groups of processes that
-perform protocol I/O on some network or
-device and the resulting messages
-are sent to the file system queue.
-</P>
-<P>
-There are two sets of processes
-<TT>ethi</TT>
-and
-<TT>etho</TT>
-that perform Ethernet input and output on two different networks.
-These processes send Ethernet messages
-to/from two more processes
-<TT>ilo</TT>
-and
-<TT>ilt</TT>
-that do the IL reliable datagram protocol
-on top of IP packets.
-</P>
-<P>
-The last process in Emelie,
-<TT>con</TT>,
-reads the console
-and calls internal subroutines to
-executes commands typed.
-Since there is only one process,
-only one command can be executing at a
-time.
-See
-<A href="/magic/man2html/8/fs"><I>fs</I>(8)
-</A>for a description of the
-commands available at the console.
-
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 427
sys/doc/il/il.html

@@ -1,427 +0,0 @@
-<html>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The IL protocol
-</H1>
-<DL><DD><I>Dave Presotto<br>
-Phil Winterbottom<br>
-<br>&#32;<br>
-presotto,philw@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-To transport the remote procedure call messages of the Plan 9 file system
-protocol 9P, we have implemented a new network protocol, called IL.
-It is a connection-based, lightweight transport protocol that carries
-datagrams encapsulated by IP.
-IL provides retransmission of lost messages and in-sequence delivery, but has
-no flow control and no blind retransmission.
-</DL>
-<H4>Introduction
-</H4>
-<P>
-Plan 9 uses a file system protocol, called 9P [PPTTW93], that assumes
-in-sequence guaranteed delivery of delimited messages
-holding remote procedure call
-(RPC) requests and responses.
-None of the standard IP protocols [RFC791] is suitable for transmission of
-9P messages over an Ethernet or the Internet.
-TCP [RFC793] has a high overhead and does not preserve delimiters.
-UDP [RFC768], while cheap and preserving message delimiters, does not provide
-reliable sequenced delivery.
-When we were implementing IP, TCP, and UDP in our system we
-tried to choose a protocol suitable for carrying 9P.
-The properties we desired were:
-</P>
-<DL COMPACT>
-<DT>*<DD>
-Reliable datagram service
-<DT>*<DD>
-In-sequence delivery
-<DT>*<DD>
-Internetworking using IP
-<DT>*<DD>
-Low complexity, high performance
-<DT>*<DD>
-Adaptive timeouts
-</dl>
-<br>&#32;<br>
-No standard protocol met our needs so we designed a new one,
-called IL (Internet Link).
-<P>
-IL is a lightweight protocol encapsulated by IP.
-It is connection-based and
-provides reliable transmission of sequenced messages.
-No provision is made for flow control since the protocol
-is designed to transport RPC
-messages between client and server, a structure with inherent flow limitations.
-A small window for outstanding messages prevents too
-many incoming messages from being buffered;
-messages outside the window are discarded
-and must be retransmitted.
-Connection setup uses a two-way handshake to generate
-initial sequence numbers at each end of the connection;
-subsequent data messages increment the
-sequence numbers to allow
-the receiver to resequence out of order messages. 
-In contrast to other protocols, IL avoids blind retransmission.
-This helps performance in congested networks,
-where blind retransmission could cause further
-congestion.
-Like TCP, IL has adaptive timeouts,
-so the protocol performs well both on the
-Internet and on local Ethernets.
-A round-trip timer is used
-to calculate acknowledge and retransmission times
-that match the network speed.
-</P>
-<H4>Connections
-</H4>
-<P>
-An IL connection carries a stream of data between two end points.
-While the connection persists,
-data entering one side is sent to the other side in the same sequence.
-The functioning of a connection is described by the state machine in Figure 1,
-which shows the states (circles) and transitions between them (arcs).
-Each transition is labeled with the list of events that can cause
-the transition and, separated by a horizontal line,
-the messages sent or received on that transition.
-The remainder of this paper is a discussion of this state machine.
-
-<DL><DT><DD><TT><PRE>
-<br><img src="-.15070.gif"><br>
-
-<DL><DD>
-</PRE></TT></DL>
-</P>
-<DL COMPACT>
-<DT><I>ackok</I><DD>
-any sequence number between id0 and next inclusive
-<DT><I>!x</I><DD>
-any value except x
-<DT>-<DD>
-any value
-</DL>
-<br>&#32;<br>
-<I>Figure 1 - IL State Transitions</I>
-</dl>
-<P>
-The IL state machine has five states:
-<I>Closed</I>,
-<I>Syncer</I>,
-<I>Syncee</I>,
-<I>Established</I>,
-and
-<I>Closing</I>.
-The connection is identified by the IP address and port number used at each end.
-The addresses ride in the IP protocol header, while the ports are part of the
-18-byte IL header.
-The local variables identifying the state of a connection are:
-<DL><DD>
-</P>
-<DL COMPACT>
-<DT>state<DD>
-one of the states
-<DT>laddr<DD>
-32-bit local IP address
-<DT>lport<DD>
-16-bit local IL port
-<DT>raddr<DD>
-32-bit remote IP address
-<DT>rport<DD>
-16-bit remote IL port
-<DT>id0<DD>
-32-bit starting sequence number of the local side
-<DT>rid0<DD>
-32-bit starting sequence number of the remote side
-<DT>next<DD>
-sequence number of the next message to be sent from the local side
-<DT>rcvd<DD>
-the last in-sequence message received from the remote side
-<DT>unacked<DD>
-sequence number of the first unacked message
-</DL>
-</dl>
-<P>
-Unused connections are in the
-<I>Closed</I>
-state with no assigned addresses or ports.
-Two events open a connection: the reception of
-a message whose addresses and ports match no open connection
-or a user explicitly opening a connection.
-In the first case, the message's source address and port become the
-connection's remote address and port and the message's destination address
-and port become the local address and port.
-The connection state is set to
-<I>Syncee</I>
-and the message is processed.
-In the second case, the user specifies both local and remote addresses and ports.
-The connection's state is set to
-<I>Syncer</I>
-and a
-<TT>sync</TT>
-message is sent to the remote side.
-The legal values for the local address are constrained by the IP implementation.
-</P>
-<H4>Sequence Numbers
-</H4>
-<P>
-IL carries data messages.
-Each message corresponds to a single write from
-the operating system and is identified by a 32-bit
-sequence number.
-The starting sequence number for each direction in a
-connection is picked at random and transmitted in the initial
-<TT>sync</TT>
-message.
-The number is incremented for each subsequent data message.
-A retransmitted message contains its original sequence number.
-</P>
-<H4>Transmission/Retransmission
-</H4>
-<P>
-Each message contains two sequence numbers:
-an identifier (ID) and an acknowledgement.
-The acknowledgement is the last in-sequence
-data message received by the transmitter of the message.
-For
-<TT>data</TT>
-and
-<TT>dataquery</TT>
-messages, the ID is its sequence number.
-For the control messages
-<TT>sync</TT>,
-<TT>ack</TT>,
-<TT>query</TT>,
-<TT>state</TT>,
-and
-<TT>close</TT>,
-the ID is one greater than the sequence number of
-the highest sent data message.
-</P>
-<P>
-The sender transmits data messages with type
-<TT>data</TT>.
-Any messages traveling in the opposite direction carry acknowledgements.
-An
-<TT>ack</TT>
-message will be sent within 200 milliseconds of receiving the data message
-unless a returning message has already piggy-backed an
-acknowledgement to the sender.
-</P>
-<P>
-In IP, messages may be delivered out of order or
-may be lost due to congestion or faults.
-To overcome this,
-IL uses a modified ``go back n'' protocol that also attempts
-to avoid aggravating network congestion.
-An average round trip time is maintained by measuring the delay between
-the transmission of a message and the
-receipt of its acknowledgement.
-Until the first acknowledge is received, the average round trip time
-is assumed to be 100ms.
-If an acknowledgement is not received within four round trip times
-of the first unacknowledged message
-(<I>rexmit timeout</I>
-in Figure 1), IL assumes the message or the acknowledgement
-has been lost.
-The sender then resends only the first unacknowledged message,
-setting the type to
-<TT>dataquery</TT>.
-When the receiver receives a
-<TT>dataquery</TT>,
-it responds with a
-<TT>state</TT>
-message acknowledging the highest received in-sequence data message.
-This may be the retransmitted message or, if the receiver has been
-saving up out-of-sequence messages, some higher numbered message.
-Implementations of the receiver are free to choose whether to save out-of-sequence messages.
-Our implementation saves up to 10 packets ahead.
-When the sender receives the
-<TT>state</TT>
-message, it will immediately resend the next unacknowledged message
-with type
-<TT>dataquery</TT>.
-This continues until all messages are acknowledged.
-</P>
-<P>
-If no acknowledgement is received after the first
-<TT>dataquery</TT>,
-the transmitter continues to timeout and resend the
-<TT>dataquery</TT>
-message.
-The intervals between retransmissions increase exponentially.
-After 300 times the round trip time
-(<I>death timeout</I>
-in Figure 1), the sender gives up and
-assumes the connection is dead.
-</P>
-<P>
-Retransmission also occurs in the states
-<I>Syncer</I>,
-<I>Syncee</I>,
-and
-<I>Close</I>.
-The retransmission intervals are the same as for data messages.
-</P>
-<H4>Keep Alive
-</H4>
-<P>
-Connections to dead systems must be discovered and torn down
-lest they consume resources.
-If the surviving system does not need to send any data and
-all data it has sent has been acknowledged, the protocol
-described so far will not discover these connections.
-Therefore, in the
-<I>Established</I>
-state, if no other messages are sent for a 6 second period,
-a
-<TT>query</TT>
-is sent.
-The receiver always replies to a
-<TT>query</TT>
-with a
-<TT>state</TT>
-message.
-If no messages are received for 30 seconds, the
-connection is torn down.
-This is not shown in Figure 1.
-</P>
-<H4>Byte Ordering
-</H4>
-<P>
-All 32- and 16-bit quantities are transmitted high-order byte first, as
-is the custom in IP.
-</P>
-<H4>Formats
-</H4>
-<P>
-The following is a C language description of an IP+IL
-header, assuming no IP options:
-<DL><DT><DD><TT><PRE>
-typedef unsigned char byte;
-struct IPIL
-{
-	byte	vihl;       /* Version and header length */
-	byte	tos;        /* Type of service */
-	byte	length[2];  /* packet length */
-	byte	id[2];      /* Identification */
-	byte	frag[2];    /* Fragment information */
-	byte	ttl;        /* Time to live */
-	byte	proto;      /* Protocol */
-	byte	cksum[2];   /* Header checksum */
-	byte	src[4];     /* Ip source */
-	byte	dst[4];     /* Ip destination */
-	byte	ilsum[2];   /* Checksum including header */
-	byte	illen[2];   /* Packet length */
-	byte	iltype;     /* Packet type */
-	byte	ilspec;     /* Special */
-	byte	ilsrc[2];   /* Src port */
-	byte	ildst[2];   /* Dst port */
-	byte	ilid[4];    /* Sequence id */
-	byte	ilack[4];   /* Acked sequence */
-};
-</PRE></TT></DL>
-</P>
-<br>&#32;<br>
-Data is assumed to immediately follow the header in the message.
-<TT>Ilspec</TT>
-is an extension reserved for future protocol changes.
-<P>
-The checksum is calculated with
-<TT>ilsum</TT>
-and
-<TT>ilspec</TT>
-set to zero.
-It is the standard IP checksum, that is, the 16-bit one's complement of the one's
-complement sum of all 16 bit words in the header and text.  If a
-message contains an odd number of header and text bytes to be
-checksummed, the last byte is padded on the right with zeros to
-form a 16-bit word for the checksum.
-The checksum covers from
-<TT>cksum</TT>
-to  the end of the data.
-</P>
-<P>
-The possible
-<I>iltype</I>
-values are:
-<DL><DT><DD><TT><PRE>
-enum {
-	sync=		0,
-	data=		1,
-	dataquery=	2,
-	ack=		3,
-	query=		4,
-	state=		5,
-	close=		6,
-};
-</PRE></TT></DL>
-</P>
-<br>&#32;<br>
-The
-<TT>illen</TT>
-field is the size in bytes of the IL header (18 bytes) plus the size of the data.
-<H4>Numbers
-</H4>
-<P>
-The IP protocol number for IL is 40.
-</P>
-<P>
-The assigned IL port numbers are:
-<DL><DD>
-</P>
-<DL COMPACT>
-<DT>7<DD>
-echo all input to output
-<DT>9<DD>
-discard input
-<DT>19<DD>
-send a standard pattern to output
-<DT>565<DD>
-send IP addresses of caller and callee to output
-<DT>566<DD>
-Plan 9 authentication protocol
-<DT>17005<DD>
-Plan 9 CPU service, data
-<DT>17006<DD>
-Plan 9 CPU service, notes
-<DT>17007<DD>
-Plan 9 exported file systems
-<DT>17008<DD>
-Plan 9 file service
-<DT>17009<DD>
-Plan 9 remote execution
-<DT>17030<DD>
-Alef Name Server
-</DL>
-</dl>
-<H4>References
-</H4>
-<br>&#32;<br>
-[PPTTW93] Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
-``The Use of Name Spaces in Plan 9'',
-<I>Op. Sys. Rev.,</I>
-Vol. 27, No. 2, April 1993, pp. 72-76,
-reprinted in this volume.
-<br>
-[RFC791] RFC791,
-<I>Internet Protocol,</I>
-<I>DARPA Internet Program Protocol Specification,</I>
-September 1981.
-<br>
-[RFC793] RFC793,
-<I>Transmission Control Protocol,</I>
-<I>DARPA Internet Program Protocol Specification,</I>
-September 1981.
-<br>
-[RFC768] J. Postel, RFC768,
-<I>User Datagram Protocol,</I>
-<I>DARPA Internet Program Protocol Specification,</I>
-August 1980.
-
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 174
sys/doc/index.html

@@ -1,174 +0,0 @@
-<HEAD>
-<TITLE>Plan 9 Manual - Volume 2</TITLE>
-</HEAD>
-
-<H2>Plan 9 Documents (Volume 2)</H2>
-
-<H3>Introduction</H3>
-<DL>
-
-<DT>Plan 9 From Bell Labs
-[<A HREF="9.html">html</A>, <A HREF="9.ps">ps</A>, <A HREF="9.pdf">pdf</A>]
-<DD><I>Rob Pike, Dave Presotto, Sean Dorward, Bob Flandrena, Ken Thompson, Howard Trickey, and Phil Winterbottom</I>
-<BR>An overview of the system; read at least this paper before you install.
-
-<DT>The Use of Name Spaces in Plan 9
-[<A HREF="names.html">html</A>, <A HREF="names.ps">ps</A>, <A HREF="names.pdf">pdf</A>]
-<DD><I>Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom</I>
-<BR>What's in a name?
-
-<DT>The Organization of Networks in Plan 9
-[<A HREF="net/net.html">html</A>, <A HREF="net/net.ps">ps</A>, <A HREF="net/net.pdf">pdf</A>]
-<DD><I>Dave Presotto and Phil Winterbottom</I>
-<BR>Connecting the pieces.  The details in the paper are outdated but the ideas still apply.
-
-<DT>Security in Plan 9
-[<A HREF="auth.html">html</A>, <A HREF="auth.ps">ps</A>, <A HREF="auth.pdf">pdf</A>]
-<DD><I>Russ Cox, Eric Grosse, Rob Pike, Dave Presotto, and Sean Quinlan</I>
-<BR>An overview of the security architecture.
-</DL>
-
-<H3>Programming</H3>
-<DL>
-
-<DT>How to Use the Plan 9 C Compiler
-[<A HREF="comp.html">html</A>, <A HREF="comp.ps">ps</A>, <A HREF="comp.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>The starting point for C programming under Plan 9.
-
-<DT>Changes to the Programming Environment in the Fourth Release of Plan 9
-[<A HREF="prog4.html">html</A>, <A HREF="prog4.ps">ps</A>, <A HREF="prog4.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>An overview of the changes to the C library since the Third Release.
-
-<DT>APE - The ANSI/POSIX Environment
-[<A HREF="ape.html">html</A>, <A HREF="ape.ps">ps</A>, <A HREF="ape.pdf">pdf</A>]
-<DD><I>Howard Trickey</I>
-<BR>Moving C code between UNIX and Plan 9.
-
-<DT>Acid: A Debugger Built From A Language
-[<A HREF="acidpaper.html">html</A>, <A HREF="acidpaper.ps">ps</A>, <A HREF="acidpaper.pdf">pdf</A>]
-<DD><I>Phil Winterbottom</I>
-<BR>An overview paper about the Acid debugger.
-
-<DT>Acid Manual
-[<A HREF="acid.html">html</A>, <A HREF="acid.ps">ps</A>, <A HREF="acid.pdf">pdf</A>]
-<DD><I>Phil Winterbottom</I>
-<BR>The reference manual for the language and its libraries.
-
-<DT>Maintaining Files on Plan 9 with Mk
-[<A HREF="mk.html">html</A>, <A HREF="mk.ps">ps</A>, <A HREF="mk.pdf">pdf</A>]
-<DD><I>Andrew G. Hume and Bob Flandrena</I>
-<BR>An introduction to Plan 9's replacement for <TT>make</TT>.
-
-<DT>Plan 9 Mkfiles
-[<A HREF="mkfiles.html">html</A>, <A HREF="mkfiles.ps">ps</A>, <A HREF="mkfiles.pdf">pdf</A>]
-<DD><I>Bob Flandrena</I>
-<BR>The conventions for using <TT>mk</TT> in Plan 9.
-
-<DT>A Manual for the Plan 9 assembler
-[<A HREF="asm.html">html</A>, <A HREF="asm.ps">ps</A>, <A HREF="asm.pdf">pdf</A>
-<DD><I>Rob Pike</I>
-<BR>Things you'd rather not know.
-</DL>
-
-<H3>User Interfaces</H3>
-<DL>
-
-<DT>8&#189;, the Plan 9 Window System
-[<A HREF="8%bd/8%bd.html">html</A>, <A HREF="8%bd/8%bd.ps">ps</A>, <A HREF="8%bd/8%bd.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>An introduction to the (previous) window system and its unusual implementation.
-
-<DT>Rc - The Plan 9 Shell
-[<A HREF="rc.html">html</A>, <A HREF="rc.ps">ps</A>, <A HREF="rc.pdf">pdf</A>]
-<DD><I>Tom Duff</I>
-<BR>An introduction to the new shell, complete with examples.
-
-<DT>The Text Editor <TT>sam</TT>
-[<A HREF="sam/sam.html">html</A>, <A HREF="sam/sam.ps">ps</A>, <A HREF="sam/sam.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR><TT>Sam</TT> is the standard editor on Plan 9.
-
-<DT>Acme: A User Interface for Programmers
-[<A HREF="acme/acme.html">html</A>, <A HREF="acme/acme.ps">ps</A>, <A HREF="acme/acme.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>A system with a more radical approach to programming and editing.
-
-<DT>Plumbing and Other Utilities
-[<A HREF="plumb.html">html</A>, <A HREF="plumb.ps">ps</A>, <A HREF="plumb.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>Inter-process communication that enlivens the interactive user interface.
-</DL>
-
-<H3>Implementation</H3>
-<DL>
-
-<DT>Hello World
-[<A HREF="utf.html">html</A>, <A HREF="utf.ps">ps</A>, <A HREF="utf.pdf">pdf</A>]
-<DD><I>Rob Pike and Ken Thompson</I>
-<BR>The details about Plan 9's character set: the Unicode Standard plus an ASCII-compatible encoding.
-
-<DT>Plan 9 C Compilers
-[<A HREF="compiler.html">html</A>, <A HREF="compiler.ps">ps</A>, <A HREF="compiler.pdf">pdf</A>]
-<DD><I>Ken Thompson</I>
-<BR>The design and some internals of the compiler suite.
-
-<DT>Adding Application Support for a New Architecture in Plan 9
-[<A HREF="libmach.html">html</A>, <A HREF="libmach.ps">ps</A>, <A HREF="libmach.pdf">pdf</A>]
-<DD><I>Bob Flandrena</I>
-<BR>The procedures necessary to add a new instruction set to Plan 9's programming environment.
-
-<DT>The Plan 9 File Server
-[<A HREF="fs/fs.html">html</A>, <A HREF="fs/fs.ps">ps</A>, <A HREF="fs/fs.pdf">pdf</A>]
-<DD><I>Ken Thompson</I>
-<BR>The design of the central file server and its novel backup system.
-
-<DT>Venti: A new approach to archival storage
-[<A HREF="venti/venti.html">html</A>, <A HREF="venti/venti.ps">ps</A>, <A HREF="venti/venti.pdf">pdf</A>]
-<DD><I>Sean Quinlan and Sean Dorward</I>
-<BR>Archival block-level storage using secure hashes as block identifiers.
-
-<DT>The IL protocol
-[<A HREF="il/il.html">html</A>, <A HREF="il/il.ps">ps</A>, <A HREF="il/il.pdf">pdf</A>]
-<DD><I>Dave Presotto and Phil Winterbottom</I>
-<BR>A description of the Internet protocol Plan 9 uses for internal communication.
-
-<DT>Lexical File Names in Plan 9, or, Getting Dot-Dot Right
-[<A HREF="lexnames.html">html</A>, <A HREF="lexnames.ps">ps</A>, <A HREF="lexnames.pdf">pdf</A>]
-<DD><I>Rob Pike</I>
-<BR>A vexing old problem solved: how to make <TT>pwd</TT> get the right answer in the face of multiply-bound directories.
-
-<DT>Process Sleep and Wakeup on a Shared-memory Multiprocessor
-[<A HREF="sleep.html">html</A>, <A HREF="sleep.ps">ps</A>, <A HREF="sleep.pdf">pdf</A>]
-<DD><I>Rob Pike, Dave Presotto, Ken Thompson, and Gerard Holzmann</I>
-<BR>A detailed study of a central issue in the Plan 9 kernel.
-</DL>
-
-<H3>Miscellany</H3>
-<DL>
-
-<DT>A Guide to the Lp Printer Spooler
-[<A HREF="lp.html">html</A>, <A HREF="lp.ps">ps</A>, <A HREF="lp.pdf">pdf</A>]
-<DD><I>Paul Glick</I>
-<BR>Adminstering the suite of tools to drive PostScript printers.
-
-<DT>Troff User's Manual
-[<A HREF="troff.html">html</A>, <A HREF="troff.ps">ps</A>, <A HREF="troff.pdf">pdf</A>]
-<DD><I>Joseph F. Ossanna and Brian W. Kernighan</I>
-<BR>The old warhorse, updated for Unicode characters.
-.bp
-
-<DT>Using SPIN
-[<A HREF="spin.html">html</A>, <A HREF="spin.ps">ps</A>, <A HREF="spin.pdf">pdf</A>]
-<DD><I>Gerard Holzmann</I>
-<BR>An introduction to a tool for analyzing parallel and distributed programs.
-</DL>
-
-<H3>Installation</H3>
-<DL>
-
-<DT>The Various Ports
-[<A HREF="port.html">html</A>, <A HREF="port.ps">ps</A>, <A HREF="port.pdf">pdf</A>]
-<DD>
-<BR>The hardware requirements for the Plan 9 compilers and kernels.

+ 0 - 1220
sys/doc/lexnames.html

@@ -1,1220 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Lexical File Names in Plan 9
-<br>
-or
-<br>
-Getting Dot-Dot Right
-</H1>
-<DL><DD><I>Rob Pike<br>
-<TT>rob@plan9.bell-labs.com</TT>
-Bell Laboratories, Murray Hill, NJ, 07974
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<br>&#32;<br>
-Symbolic links make the Unix file system non-hierarchical, resulting in
-multiple valid path names for a given file.
-This ambiguity is a source of confusion, especially since some shells
-work overtime to present a consistent view from programs such as
-<TT>pwd</TT>,
-while other programs and
-the kernel itself do nothing about the problem.
-<br>&#32;<br>
-Plan 9 has no symbolic links but it does have other mechanisms that produce the same difficulty.
-Moreover, Plan 9 is founded on the ability to control a program's environment
-by manipulating its name space.
-Ambiguous names muddle the result of operations such as copying a name space across
-the network.
-<br>&#32;<br>
-To address these problems,
-the Plan 9 kernel has been modified to maintain an accurate path name for every active
-file (open file, working directory, mount table entry) in the system.
-The definition of `accurate' is that the path name for a file is guaranteed to be the rooted,
-absolute name
-the program used to acquire it.
-These names are maintained by an efficient method that combines lexical processing&#173;such as
-evaluating
-<TT>..</TT>
-by just removing the last path name element of a directory&#173;with
-local operations within the file system to maintain a consistently, easily understood view
-of the name system.
-Ambiguous situations are resolved by examining the lexically maintained names themselves.
-<br>&#32;<br>
-A new kernel call,
-<TT>fd2path</TT>,
-returns the file name associated with an open file,
-permitting the use of reliable names to improve system
-services ranging from
-<TT>pwd</TT>
-to debugging.
-Although this work was done in Plan 9,
-Unix systems could also benefit from the addition of
-a method to recover the accurate name of an
-open file or the current directory.
-</DL>
-<H4>Motivation
-</H4>
-<br>&#32;<br>
-Consider the following unedited transcript of a session running the Bourne shell on a modern
-Unix system:
-<DL><DT><DD><TT><PRE>
-% echo <I>HOME
-/home/rob
-% cd </I>HOME
-% pwd
-/n/bopp/v7/rob
-% cd /home/rob
-% cd /home/ken
-% cd ../rob
-../rob: bad directory
-% 
-</PRE></TT></DL>
-(The same output results from running
-<TT>tcsh</TT>;
-we'll discuss
-<TT>ksh</TT>
-in a moment.)
-To a neophyte being schooled in the delights of a hierarchical file name space,
-this behavior must be baffling.
-It is, of course, the consequence of a series of symbolic links intended to give users
-the illusion they share a disk, when in fact their files are scattered over several devices:
-<DL><DT><DD><TT><PRE>
-% ls -ld /home/rob /home/ken
-lrwxr-xr-x  1 root  sys   14 Dec 26  1998 /home/ken -&gt; /n/bopp/v6/ken
-lrwxr-xr-x  1 root  sys   14 Dec 23  1998 /home/rob -&gt; /n/bopp/v7/rob
-% 
-</PRE></TT></DL>
-The introduction of symbolic links has changed the Unix file system from a true
-hierarchy into a directed graph, rendering
-<TT>..</TT>
-ambiguous and sowing confusion.
-<br>&#32;<br>
-Unix popularized hierarchical naming, but the introduction of symbolic links
-made its naming irregular.
-Worse, the
-<TT>pwd</TT>
-command, through the underlying
-<TT>getwd</TT>
-library routine,
-uses a tricky, expensive algorithm that often delivers the wrong answer.
-Starting from the current directory,
-<TT>getwd</TT>
-opens the parent,
-<TT>..</TT>,
-and searches it for an entry whose i-number matches the current directory;
-the matching entry is the final path element of the ultimate result.
-Applying this process iteratively,
-<TT>getwd</TT>
-works back towards the root.
-Since
-<TT>getwd</TT>
-knows nothing about symbolic links, it will recover surprising names for
-directories reached by them,
-as illustrated by the example;
-the backward paths
-<TT>getwd</TT>
-traverses will not backtrack across the links.
-<br>&#32;<br>
-Partly for efficiency and partly to make
-<TT>cd</TT>
-and
-<TT>pwd</TT>
-more predictable, the Korn shell
-<TT>ksh</TT>
-[Korn94]
-implements
-<TT>pwd</TT>
-as a builtin.
-(The
-<TT>cd</TT>
-command must be a builtin in any shell, since the current directory is unique to each process.)
-<TT>Ksh</TT>
-maintains its own private view of the file system to try to disguise symbolic links;
-in particular,
-<TT>cd</TT>
-and
-<TT>pwd</TT>
-involve some lexical processing (somewhat like the
-<TT>cleanname</TT>
-function discussed later
-in this paper), augmented by heuristics such as examining the environment
-for names like
-<TT></TT><I>HOME</I><TT>
-and
-</TT><TT></TT><TT>PWD</TT><TT>
-to assist initialization of the state of the private view. [Korn00]
-</TT><br>&#32;<br>
-This transcript begins with a Bourne shell running:
-<DL><DT><DD><TT><PRE>
-% cd /home/rob
-% pwd
-/n/bopp/v7/rob
-% ksh
-<I> pwd
-/home/rob
-</I> 
-</PRE></TT></DL>
-This result is encouraging.  Another example, again starting from a Bourne shell:
-<DL><DT><DD><TT><PRE>
-% cd /home/rob
-% cd ../ken
-../ken: bad directory
-% ksh
-<I> pwd
-/home/rob
-</I> cd ../ken
-<I> pwd
-/home/ken
-</I>
-</PRE></TT></DL>
-By doing extra work,
-the Korn shell is providing more sensible behavior,
-but it is easy to defeat:
-<DL><DT><DD><TT><PRE>
-% cd /home/rob
-% pwd
-/n/bopp/v7/rob
-% cd bin
-% pwd
-/n/bopp/v7/rob/bin
-% ksh
-<I> pwd
-/n/bopp/v7/rob/bin
-</I> exit
-% cd /home/ken
-% pwd
-/n/bopp/v6/ken
-% ksh
-<I> pwd
-/n/bopp/v6/ken
-</I> 
-</PRE></TT></DL>
-In these examples,
-<TT>ksh</TT>'s
-built-in
-<TT>pwd</TT>
-failed to produce the results
-(<TT>/home/rob/bin</TT>
-and
-<TT>/home/ken</TT>)
-that the previous example might have led us to expect.
-The Korn shell is hiding the problem, not solving it, and in fact is not even hiding it very well.
-<br>&#32;<br>
-A deeper question is whether the shell should even be trying to make
-<TT>pwd</TT>
-and
-<TT>cd</TT>
-do a better job.
-If it does, then the
-<TT>getwd</TT>
-library call and every program that uses it will behave differently from the shell,
-a situation that is sure to confuse.
-Moreover, the ability to change directory to
-<TT>../ken</TT>
-with the Korn shell's
-<TT>cd</TT>
-command but not with the
-<TT>chdir</TT>
-system call is a symptom of a diseased system, not a healthy shell.
-<br>&#32;<br>
-The operating system should provide names that work and make sense.
-Symbolic links, though, are here to stay, so we need a way to provide
-sensible, unambiguous names in the face of a non-hierarchical name space.
-This paper shows how the challenge was met on Plan 9, an operating system
-with Unix-like naming.
-<H4>Names in Plan 9
-</H4>
-<br>&#32;<br>
-Except for some details involved with bootstrapping, file names in Plan 9 have the same syntax as in Unix.
-Plan 9 has no symbolic links, but its name space construction operators,
-<TT>bind</TT>
-and
-<TT>mount</TT>,
-make it possible to build the same sort of non-hierarchical structures created
-by symbolically linking directories on Unix.
-<br>&#32;<br>
-Plan 9's
-<TT>mount</TT>
-system call takes a file descriptor
-and attaches to the local name space the file system service it represents:
-<DL><DT><DD><TT><PRE>
-mount(fd, "/dir", flags)
-</PRE></TT></DL>
-Here
-<TT>fd</TT>
-is a file descriptor to a communications port such as a pipe or network connection;
-at the other end of the port is a service, such as file server, that talks 9P, the Plan 9 file
-system protocol.
-After the call succeeds, the root directory of the service will be visible at the
-<I>mount point</I>
-<TT>/dir</TT>,
-much as with the
-<TT>mount</TT>
-call of Unix.
-The
-<TT>flag</TT>
-argument specifies the nature of the attachment:
-<TT>MREPL</TT>
-says that the contents of the root directory (appear to) replace the current contents of
-<TT>/dir</TT>;
-<TT>MAFTER</TT>
-says that the current contents of
-<TT>dir</TT>
-remain visible, with the mounted directory's contents appearing
-<I>after</I>
-any existing files;
-and
-<TT>MBEFORE</TT>
-says that the contents remain visible, with
-the mounted directory's contents appearing
-<I>before</I>
-any existing files.
-These multicomponent directories are called
-<I>union directories</I>
-and are somewhat different from union directories in 4.4BSD-Lite [PeMc95], because
-only the top-level directory itself is unioned, not its descendents, recursively.
-(Plan 9's union directories are used differently from 4.4BSD-Lite's, as will become apparent.)
-<br>&#32;<br>
-For example, to bootstrap a diskless computer the system builds a local name space containing
-only the root directory,
-<TT>/</TT>,
-then uses the network to open a connection
-to the main file server.
-It then executes
-<DL><DT><DD><TT><PRE>
-mount(rootfd, "/", MREPL);
-</PRE></TT></DL>
-After this call, the entire file server's tree is visible, starting from the root of the local machine.
-<br>&#32;<br>
-While
-<TT>mount</TT>
-connects a new service to the local name space,
-<TT>bind</TT>
-rearranges the existing name space:
-<DL><DT><DD><TT><PRE>
-bind("tofile", "fromfile", flags)
-</PRE></TT></DL>
-causes subsequent mention of the
-<TT>fromfile</TT>
-(which may be a plain file or a directory)
-to behave as though
-<TT>tofile</TT>
-had been mentioned instead, somewhat like a symbolic link.
-(Note, however, that the arguments are in the opposite order
-compared to
-<TT>ln</TT>
-<TT>-s</TT>).
-The
-<TT>flags</TT>
-argument is the same as with
-<TT>mount</TT>.
-<br>&#32;<br>
-As an example, a sequence something like the following is done at bootstrap time to
-assemble, under the single directory
-<TT>/bin</TT>,
-all of the binaries suitable for this architecture, represented by (say) the string
-<TT>sparc</TT>:
-<DL><DT><DD><TT><PRE>
-bind("/sparc/bin", "/bin", MREPL);
-bind("/usr/rob/sparc/bin", "/bin", MAFTER);
-</PRE></TT></DL>
-This sequence of
-<TT>binds</TT>
-causes
-<TT>/bin</TT>
-to contain first the standard binaries, then the contents of
-<TT>rob</TT>'s
-private SPARC binaries.
-The ability to build such union directories
-obviates the need for a shell
-<TT></TT><I>PATH</I><TT>
-variable
-while providing opportunities for managing heterogeneity.
-If the system were a Power PC, the same sequence would be run with
-</TT><TT>power</TT><TT>
-textually substituted for
-</TT><TT>sparc</TT><TT>
-to place the Power PC binaries in
-</TT><TT>/bin</TT><TT>
-rather than the SPARC binaries.
-</TT><br>&#32;<br>
-Trouble is already brewing.  After these bindings are set up,
-where does
-<DL><DT><DD><TT><PRE>
-% cd /bin
-% cd ..
-</PRE></TT></DL>
-set the current working directory, to
-<TT>/</TT>
-or
-<TT>/sparc</TT>
-or
-<TT>/usr/rob/sparc</TT>?
-We will return to this issue.
-<br>&#32;<br>
-There are some important differences between
-<TT>binds</TT>
-and symbolic links.
-First,
-symbolic links are a static part of the file system, while
-Plan 9 bindings are created at run time, are stored in the kernel,
-and endure only as long as the system maintains them;
-they are temporary.
-Since they are known to the kernel but not the file system, they must
-be set up each time the kernel boots or a user logs in;
-permanent bindings are created by editing system initialization scripts
-and user profiles rather than by building them in the file system itself.
-<br>&#32;<br>
-The Plan 9 kernel records what bindings are active for a process,
-whereas symbolic links, being held on the Unix file server, may strike whenever the process evaluates
-a file name.
-Also, symbolic links apply to all processes that evaluate the affected file, whereas
-<TT>bind</TT>
-has a local scope, applying only to the process that executes it and possibly some of its
-peers, as discussed in the next section.
-Symbolic links cannot construct the sort of
-<TT>/bin</TT>
-directory built here; it is possible to have multiple directories point to
-<TT>/bin</TT>
-but not the other way around.
-<br>&#32;<br>
-Finally,
-symbolic links are symbolic, like macros: they evaluate the associated names each time
-they are accessed.
-Bindings, on the other hand, are evaluated only once, when the bind is executed;
-after the binding is set up, the kernel associates the underlying files, rather than their names.
-In fact, the kernel's representation of a bind is identical to its representation of a mount;
-in effect, a bind is a mount of the
-<TT>tofile</TT>
-upon the
-<TT>fromfile</TT>.
-The binds and mounts coexist in a single
-<I>mount table</I>,
-the subject of the next section.
-<H4>The Mount Table
-</H4>
-<br>&#32;<br>
-Unix has a single global mount table
-for all processes in the system, but Plan 9's mount tables are local to each process.
-By default it is inherited when a process forks, so mounts and binds made by one
-process affect the other, but a process may instead inherit a copy,
-so modifications it makes will be invisible to other processes.
-The convention is that related processes, such
-as processes running in a single window, share a mount table, while sets of processes
-in different windows have distinct mount tables.
-In practice, the name spaces of the two windows will appear largely the same,
-but the possibility for different processes to see different files (hence services) under
-the same name is fundamental to the system,
-affecting the design of key programs such as the
-window system [Pike91].
-<br>&#32;<br>
-The Plan 9 mount table is little more than an ordered list of pairs, mapping the
-<TT>fromfiles</TT>
-to the
-<TT>tofiles</TT>.
-For mounts, the
-<TT>tofile</TT>
-will be an item called a
-<TT>Channel</TT>,
-similar to a Unix
-<TT>vnode</TT>,
-pointing to the root of the file service,
-while for a bind it will be the
-<TT>Channel</TT>
-pointing to the
-<TT>tofile</TT>
-mentioned in the
-<TT>bind</TT>
-call.
-In both cases, the
-<TT>fromfile</TT>
-entry in the table
-will be a
-<TT>Channel</TT>
-pointing to the
-<TT>fromfile</TT>
-itself.
-<br>&#32;<br>
-The evaluation of a file name proceeds as follows.
-If the name begins with a slash, start with the
-<TT>Channel</TT>
-for the root; otherwise start with the
-<TT>Channel</TT>
-for the current directory of the process.
-For each path element in the name,
-such as
-<TT>usr</TT>
-in
-<TT>/usr/rob</TT>,
-try to `walk' the
-<TT>Channel</TT>
-to that element [Pike93].
-If the walk succeeds, look to see if the resulting
-<TT>Channel</TT>
-is the same as any
-<TT>fromfile</TT>
-in the mount table, and if so, replace it by the corresponding
-<TT>tofile</TT>.
-Advance to the next element and continue.
-<br>&#32;<br>
-There are a couple of nuances.  If the directory being walked is a union directory,
-the walk is attempted in the elements of the union, in order, until a walk succeeds.
-If none succeed, the operation fails.
-Also, when the destination of a walk is a directory for a purpose such as the
-<TT>chdir</TT>
-system call or the
-<TT>fromfile</TT>
-in a
-<TT>bind</TT>,
-once the final walk of the sequence has completed the operation stops;
-the final check through the mount table is not done.
-Among other things, this simplifies the management of union directories;
-for example, subsequent
-<TT>bind</TT>
-calls will append to the union associated with the underlying
-<TT>fromfile</TT>
-instead of what is bound upon it.
-<H4>A Definition of Dot-Dot
-</H4>
-<br>&#32;<br>
-The ability to construct union directories and other intricate naming structures
-introduces some thorny problems: as with symbolic links,
-the name space is no longer hierarchical, files and directories can have multiple
-names, and the meaning of
-<TT>..</TT>,
-the parent directory, can be ambiguous.
-<br>&#32;<br>
-The meaning of
-<TT>..</TT>
-is straightforward if the directory is in a locally hierarchical part of the name space,
-but if we ask what
-<TT>..</TT>
-should identify when the current directory is a mount point or union directory or
-multiply symlinked spot (which we will henceforth call just a mount point, for brevity),
-there is no obvious answer.
-Name spaces have been part of Plan 9 from the beginning, but the definition of
-<TT>..</TT>
-has changed several times as we grappled with this issue.
-In fact, several attempts to clarify the meaning of
-<TT>..</TT>
-by clever coding
-resulted in definitions that could charitably be summarized as `what the implementation gives.'
-<br>&#32;<br>
-Frustrated by this situation, and eager to have better-defined names for some of the
-applications described later in this paper, we recently proposed the following definition
-for
-<TT>..</TT>:
-<DL>
-<DT><DT>&#32;<DD>
-The parent of a directory
-<I>X</I>,
-<I>X</I><TT>/..</TT>,<TT>
-is the same directory that would obtain if
-we instead accessed the directory named by stripping away the last
-path name element of
-</TT><I>X</I><TT>.
-</dl>
-</TT><br>&#32;<br>
-For example, if we are in the directory
-<TT>/a/b/c</TT>
-and
-<TT>chdir</TT>
-to
-<TT>..</TT>,
-the result is
-<I>exactly</I>
-as if we had executed a
-<TT>chdir</TT>
-to
-<TT>/a/b</TT>.
-<br>&#32;<br>
-This definition is easy to understand and seems natural.
-It is, however, a purely
-<I>lexical</I>
-definition that flatly ignores evaluated file names, mount tables, and
-other kernel-resident data structures.
-Our challenge is to implement it efficiently.
-One obvious (and correct)
-implementation is to rewrite path names lexically to fold out
-<TT>..</TT>,
-and then evaluate the file name forward from the root,
-but this is expensive and unappealing.
-We want to be able to use local operations to evaluate file names,
-but maintain the global, lexical definition of dot-dot.
-It isn't too hard.
-<H4>The Implementation
-</H4>
-<br>&#32;<br>
-To operate lexically on file names, we associate a name with each open file in the kernel, that
-is, with each 
-<TT>Channel</TT>
-data structure.
-The first step is therefore to store a
-<TT>char*</TT>
-with each
-<TT>Channel</TT>
-in the system, called its
-<TT>Cname</TT>,
-that records the
-<I>absolute</I>
-rooted
-file name for the
-<TT>Channel</TT>.
-<TT>Cnames</TT>
-are stored as full text strings, shared copy-on-write for efficiency.
-The task is to maintain each
-<TT>Cname</TT>
-as an accurate absolute name using only local operations.
-<br>&#32;<br>
-When a file is opened, the file name argument in the
-<TT>open</TT>
-(or
-<TT>chdir</TT>
-or
-<TT>bind</TT>
-or ...) call is recorded in the
-<TT>Cname</TT>
-of the resulting
-<TT>Channel</TT>.
-When the file name begins with a slash, the name is stored as is,
-subject to a cleanup pass described in the next section.
-Otherwise, it is a local name, and the file name must be made
-absolute by prefixing it with the
-<TT>Cname</TT>
-of the current directory, followed by a slash.
-For example, if we are in
-<TT>/home/rob</TT>
-and
-<TT>chdir</TT>
-to
-<TT>bin</TT>,
-the
-<TT>Cname</TT>
-of the resulting
-<TT>Channel</TT>
-will be the string
-<TT>/home/rob/bin</TT>.
-<br>&#32;<br>
-This assumes, of course, that the local file name contains no
-<TT>..</TT>
-elements.
-If it does, instead of storing for example
-<TT>/home/rob/..</TT>
-we delete the last element of the existing name and set the
-<TT>Cname</TT>
-to
-<TT>/home</TT>.
-To maintain the lexical naming property we must guarantee that the resulting
-<TT>Cname</TT>,
-if it were to be evaluated, would yield the identical directory to the one
-we actually do get by the local
-<TT>..</TT>
-operation.
-<br>&#32;<br>
-If the current directory is not a mount point, it is easy to maintain the lexical property.
-If it is a mount point, though, it is still possible to maintain it on Plan 9
-because the mount table, a kernel-resident data structure, contains all the
-information about the non-hierarchical connectivity of the name space.
-(On Unix, by contrast, symbolic links are stored on the file server rather than in the kernel.)
-Moreover, the presence of a full file name for each
-<TT>Channel</TT>
-in the mount table provides the information necessary to resolve ambiguities.
-<br>&#32;<br>
-The mount table is examined in the
-<TT>from</TT>-><TT>to</TT>
-direction when evaluating a name, but
-<TT>..</TT>
-points backwards in the hierarchy, so to evaluate
-<TT>..</TT>
-the table must be examined in the
-<TT>to</TT>-><TT>from</TT>
-direction.
-(``How did we get here?'')
-<br>&#32;<br>
-The value of
-<TT>..</TT>
-is ambiguous when there are multiple bindings (mount points) that point to
-the directories involved in the evaluation of
-<TT>..</TT>.
-For example, return to our original script with
-<TT>/n/bopp/v6</TT>
-(containing a home directory for
-<TT>ken</TT>)
-and
-<TT>/n/bopp/v7</TT>
-(containing a home directory for
-<TT>rob</TT>)
-unioned into
-<TT>/home</TT>.
-This is represented by two entries in the mount table,
-<TT>from=/home</TT>,
-<TT>to=/n/bopp/v6</TT>
-and
-<TT>from=/home</TT>,
-<TT>to=/n/bopp/v7</TT>.
-If we have set our current directory to
-<TT>/home/rob</TT>
-(which has landed us in the physical location
-<TT>/n/bopp/v7/rob</TT>)
-our current directory is not a mount point but its parent is.
-The value of
-<TT>..</TT>
-is ambiguous: it could be
-<TT>/home</TT>,
-<TT>/n/bopp/v7</TT>,
-or maybe even
-<TT>/n/bopp/v6</TT>,
-and the ambiguity is caused by two
-<TT>tofiles</TT>
-bound to the same
-<TT>fromfile</TT>.
-By our definition, if we now evaluate
-<TT>..</TT>,
-we should acquire the directory
-<TT>/home</TT>;
-otherwise
-<TT>../ken</TT>
-could not possibly result in
-<TT>ken</TT>'s
-home directory, which it should.
-On the other hand, if we had originally gone to
-<TT>/n/bopp/v7/rob</TT>,
-the name
-<TT>../ken</TT>
-should
-<I>not</I>
-evaluate to
-<TT>ken</TT>'s
-home directory because there is no directory
-<TT>/n/bopp/v7/ken</TT>
-(<TT>ken</TT>'s
-home directory is on
-<TT>v6</TT>).
-The problem is that by using local file operations, it is impossible
-to distinguish these cases: regardless of whether we got here using the name
-<TT>/home/rob</TT>
-or
-<TT>/n/bopp/v7/rob</TT>,
-the resulting directory is the same.
-Moreover, the mount table does not itself have enough information
-to disambiguate: when we do a local operation to evaluate
-<TT>..</TT>
-and land in
-<TT>/n/bopp/v7</TT>,
-we discover that the directory is a
-<TT>tofile</TT>
-in the mount table; should we step back through the table to
-<TT>/home</TT>
-or not?
-<br>&#32;<br>
-The solution comes from the
-<TT>Cnames</TT>
-themselves.
-Whether to step back through the mount point
-<TT>from=/home</TT>,
-<TT>to=/n/bopp/v7</TT>
-when evaluating
-<TT>..</TT>
-in
-<TT>rob</TT>'s
-directory is trivially resolved by asking the question,
-Does the
-<TT>Cname</TT>
-for the directory begin
-<TT>/home</TT>?
-If it does, then the path that was evaluated to get us to the current
-directory must have gone through this mount point, and we should
-back up through it to evaluate
-<TT>..</TT>;
-if not, then this mount table entry is irrelevant.
-<br>&#32;<br>
-More precisely,
-both
-<I>before</I>
-and
-<I>after</I>
-each
-<TT>..</TT>
-element in the path name is evaluated,
-if the directory is a
-<TT>tofile</TT>
-in the mount table, the corresponding
-<TT>fromfile</TT>
-is taken instead, provided the
-<TT>Cname</TT>
-of the corresponding
-<TT>fromfile</TT>
-is the prefix of the
-<TT>Cname</TT>
-of the original directory.
-Since we always know the full name of the directory
-we are evaluating, we can always compare it against all the entries in the mount table that point
-to it, thereby resolving ambiguous situations
-and maintaining the
-lexical property of
-<TT>..</TT>.
-This check also guarantees we don't follow a misleading mount point, such as the entry pointing to
-<TT>/home</TT>
-when we are really in
-<TT>/n/bopp/v7/rob</TT>.
-Keeping the full names with the
-<TT>Channels</TT>
-makes it easy to use the mount table to decide how we got here and, therefore,
-how to get back.
-<br>&#32;<br>
-In summary, the algorithm is as follows.
-Use the usual file system operations to walk to
-<TT>..</TT>;
-call the resulting directory
-<I>d</I>.
-Lexically remove
-the last element of the initial file name.
-Examine all entries in the mount table whose
-<TT>tofile</TT>
-is
-<I>d</I>
-and whose
-<TT>fromfile</TT>
-has a
-<TT>Cname</TT>
-identical to the truncated name.
-If one exists, that
-<TT>fromfile</TT>
-is the correct result; by construction, it also has the right
-<TT>Cname</TT>.
-In our example, evaluating
-<TT>..</TT>
-in
-<TT>/home/rob</TT>
-(really
-<TT>/n/bopp/v7/rob</TT>)
-will set
-<I>d</I>
-to
-<TT>/n/bopp/v7</TT>;
-that is a
-<TT>tofile</TT>
-whose
-<TT>fromfile</TT>
-is
-<TT>/home</TT>.
-Removing the
-<TT>/rob</TT>
-from the original
-<TT>Cname</TT>,
-we find the name
-<TT>/home</TT>,
-which matches that of the
-<TT>fromfile</TT>,
-so the result is the
-<TT>fromfile</TT>,
-<TT>/home</TT>.
-<br>&#32;<br>
-Since this implementation uses only local operations to maintain its names,
-it is possible to confuse it by external changes to the file system.
-Deleting or renaming directories and files that are part of a
-<TT>Cname</TT>,
-or modifying the mount table, can introduce errors.
-With more implementation work, such mistakes could probably be caught,
-but in a networked environment, with machines sharing a remote file server, renamings
-and deletions made by one machine may go unnoticed by others.
-These problems, however, are minor, uncommon and, most important, easy to understand.
-The method maintains the lexical property of file names unless an external
-agent changes the name surreptitiously;
-within a stable file system, it is always maintained and
-<TT>pwd</TT>
-is always right.
-<br>&#32;<br>
-To recapitulate, maintaining the
-<TT>Channel</TT>'s
-absolute file names lexically and using the names to disambiguate the
-mount table entries when evaluating
-<TT>..</TT>
-at a mount point
-combine to maintain the lexical definition of
-<TT>..</TT>
-efficiently.
-<H4>Cleaning names
-</H4>
-<br>&#32;<br>
-The lexical processing can generate names that are messy or redundant,
-ones with extra slashes or embedded
-<TT>../</TT>
-or
-<TT>./</TT>
-elements and other extraneous artifacts.
-As part of the kernel's implementation, we wrote a procedure,
-<TT>cleanname</TT>,
-that rewrites a name in place to canonicalize its appearance.
-The procedure is useful enough that it is now part of the Plan 9 C
-library and is employed by many programs to make sure they always
-present clean file names.
-<br>&#32;<br>
-<TT>Cleanname</TT>
-is analogous to the URL-cleaning rules defined in RFC 1808 [Field95], although
-the rules are slightly different.
-<TT>Cleanname</TT>
-iteratively does the following until no further processing can be done:
-<DL>
-<DT><DT>&#32;<DD>
-1. Reduce multiple slashes to a single slash.
-<DT><DT>&#32;<DD>
-2. Eliminate
-<TT>.</TT>
-path name elements
-(the current directory).
-<DT><DT>&#32;<DD>
-3. Eliminate
-<TT>..</TT>
-path name elements (the parent directory) and the
-non-<TT>.</TT>
-non-<TT>..,</TT>
-element that precedes them.
-<DT><DT>&#32;<DD>
-4. Eliminate
-<TT>..</TT>
-elements that begin a rooted path, that is, replace
-<TT>/..</TT>
-by
-<TT>/</TT>
-at the beginning of a path.
-<DT><DT>&#32;<DD>
-5. Leave intact
-<TT>..</TT>
-elements that begin a non-rooted path.
-</dl>
-<br>&#32;<br>
-If the result of this process is a null string,
-<TT>cleanname</TT>
-returns the string
-<TT>"."</TT>,
-representing the current directory.
-<H4>The fd2path system call
-</H4>
-<br>&#32;<br>
-Plan 9 has a new system call,
-<TT>fd2path</TT>,
-to enable programs to extract the
-<TT>Cname</TT>
-associated with an open file descriptor.
-It takes three arguments: a file descriptor, a buffer, and the size of the buffer:
-<DL><DT><DD><TT><PRE>
-int fd2path(int fd, char *buf, int nbuf)
-</PRE></TT></DL>
-It returns an error if the file descriptor is invalid; otherwise it fills the buffer with the name
-associated with
-<TT>fd</TT>.
-(If the name is too long, it is truncated; perhaps this condition should also draw an error.)
-The
-<TT>fd2path</TT>
-system call is very cheap, since all it does is copy the
-<TT>Cname</TT>
-string to user space.
-<br>&#32;<br>
-The Plan 9 implementation of
-<TT>getwd</TT>
-uses
-<TT>fd2path</TT>
-rather than the tricky algorithm necessary in Unix:
-<DL><DT><DD><TT><PRE>
-char*
-getwd(char *buf, int nbuf)
-{
-	int n, fd;
-
-	fd = open(".", OREAD);
-	if(fd &lt; 0)
-		return NULL;
-	n = fd2path(fd, buf, nbuf);
-	close(fd);
-	if(n &lt; 0)
-		return NULL;
-	return buf;
-}
-</PRE></TT></DL>
-(The Unix specification of
-<TT>getwd</TT>
-does not include a count argument.)
-This version of
-<TT>getwd</TT>
-is not only straightforward, it is very efficient, reducing the performance
-advantage of a built-in
-<TT>pwd</TT>
-command while guaranteeing that all commands, not just
-<TT>pwd</TT>,
-see sensible directory names.
-<br>&#32;<br>
-Here is a routine that prints the file name associated
-with each of its open file descriptors; it is useful for tracking down file descriptors
-left open by network listeners, text editors that spawn commands, and the like:
-<DL><DT><DD><TT><PRE>
-void
-openfiles(void)
-{
-	int i;
-	char buf[256];
-
-	for(i=0; i&lt;NFD; i++)
-		if(fd2path(i, buf, sizeof buf) &gt;= 0)
-			print("%d: %s\n", i, buf);
-}
-</PRE></TT></DL>
-<H4>Uses of good names
-</H4>
-<br>&#32;<br>
-Although
-<TT>pwd</TT>
-was the motivation for getting names right, good file names are useful in many contexts
-and have become a key part of the Plan 9 programming environment.
-The compilers record in the symbol table the full name of the source file, which makes
-it easy to track down the source of buggy, old software and also permits the
-implementation of a program,
-<TT>src</TT>,
-to automate tracking it down.
-Given the name of a program,
-<TT>src</TT>
-reads its symbol table, extracts the file information,
-and triggers the editor to open a window on the program's
-source for its
-<TT>main</TT>
-routine.
-No guesswork, no heuristics.
-<br>&#32;<br>
-The
-<TT>openfiles</TT>
-routine was the inspiration for a new file in the
-<TT>/proc</TT>
-file system [Kill84].
-For process
-<I>n</I>,
-the file
-<TT>/proc/</TT><I>n</I><TT>/fd</TT><I>
-is a list of all its open files, including its working directory,
-with associated information including its open status,
-I/O offset, unique id (analogous to i-number)
-and file name.
-Here is the contents of the
-</I><TT>fd</TT><I>
-file for a process in the window system on the machine being used to write this paper:
-<DL><DT><DD><TT><PRE>
-% cat /proc/125099/fd 
-/usr/rob
-  0 r  M 5141 00000001.00000000        0 /mnt/term/dev/cons
-  1 w  M 5141 00000001.00000000       51 /mnt/term/dev/cons
-  2 w  M 5141 00000001.00000000       51 /mnt/term/dev/cons
-  3 r  M 5141 0000000b.00000000     1166 /dev/snarf
-  4 rw M 5141 0ffffffc.00000000      288 /dev/draw/new
-  5 rw M 5141 00000036.00000000  4266337 /dev/draw/3/data
-  6 r  M 5141 00000037.00000000        0 /dev/draw/3/refresh
-  7 r  c    0 00000004.00000000  6199848 /dev/bintime
-% 
-</PRE></TT></DL>
-(The Linux implementation of
-</I><TT>/proc</TT><I>
-provides a related service by giving a directory in which each file-descriptor-numbered file is
-a symbolic link to the file itself.)
-When debugging errant systems software, such information can be valuable.
-</I><br>&#32;<br>
-Another motivation for getting names right was the need to extract from the system
-an accurate description of the mount table, so that a process's name space could be
-recreated on another machine, in order to move (or simulate) a computing environment
-across the network.
-One program that does this is Plan 9's
-<TT>cpu</TT>
-command, which recreates the local name space on a remote machine, typically a large
-fast multiprocessor.
-Without accurate names, it was impossible to do the job right; now
-<TT>/proc</TT>
-provides a description of the name space of each process,
-<TT>/proc/</TT><I>n</I><TT>/ns</TT><I>:
-<DL><DT><DD><TT><PRE>
-% cat /proc/125099/ns
-bind  / /
-mount -aC #s/boot / 
-bind  #c /dev
-bind  #d /fd
-bind -c #e /env
-bind  #p /proc
-bind -c #s /srv
-bind  /386/bin /bin
-bind -a /rc/bin /bin
-bind  /net /net
-bind -a #l /net
-mount -a #s/cs /net 
-mount -a #s/dns /net 
-bind -a #D /net
-mount -c #s/boot /n/emelie 
-bind -c /n/emelie/mail /mail
-mount -c /net/il/134/data /mnt/term 
-bind -a /usr/rob/bin/rc /bin
-bind -a /usr/rob/bin/386 /bin
-mount  #s/boot /n/emelieother other
-bind -c /n/emelieother/rob /tmp
-mount  #s/boot /n/dump dump
-bind  /mnt/term/dev/cons /dev/cons
-...
-cd /usr/rob
-% 
-</PRE></TT></DL>
-(The
-</I><TT>#</TT><I>
-notation identifies raw device drivers so they may be attached to the name space.)
-The last line of the file gives the working directory of the process.
-The format of this file is that used by a library routine,
-</I><TT>newns</TT><I>,
-which reads a textual description like this and reconstructs a name space.
-Except for the need to quote
-</I><TT>#</TT><I>
-characters, the output is also a shell script that invokes the user-level commands
-</I><TT>bind</TT><I>
-and
-</I><TT>mount</TT><I>,
-which are just interfaces to the underlying system calls.
-However,
-files like
-</I><TT>/net/il/134/data</TT><I>
-represent network connections; to find out where they point, so that the corresponding
-calls can be reestablished for another process,
-they must be examined in more detail using the network device files [PrWi93].  Another program,
-</I><TT>ns</TT><I>,
-does this; it reads the
-</I><TT>/proc/</TT><I>n</I><TT>/ns</TT><I>
-file, decodes the information, and interprets it, translating the network
-addresses and quoting the names when required:
-<DL><DT><DD><TT><PRE>
-...
-mount -a '#s/dns' /net 
-...
-mount -c il!135.104.3.100!12884 /mnt/term 
-...
-</PRE></TT></DL>
-These tools make it possible to capture an accurate description of a process's
-name space and recreate it elsewhere.
-And like the open file descriptor table,
-they are a boon to debugging; it is always helpful to know
-exactly what resources a program is using.
-</I><H4>Adapting to Unix
-</H4>
-<br>&#32;<br>
-This work was done for the Plan 9 operating system, which has the advantage that
-the non-hierarchical aspects of the name space are all known to the kernel.
-It should be possible, though, to adapt it to a Unix system.
-The problem is that Unix has nothing corresponding precisely to a
-<TT>Channel</TT>,
-which in Plan 9 represents the unique result of evaluating a name.
-The
-<TT>vnode</TT>
-structure is a shared structure that may represent a file
-known by several names, while the
-<TT>file</TT>
-structure refers only to open files, but for example the current working
-directory of a process is not open.
-Possibilities to address this discrepancy include
-introducing a
-<TT>Channel</TT>-like
-structure that connects a name and a
-<TT>vnode</TT>,
-or maintaining a separate per-process table that maps names to
-<TT>vnodes</TT>,
-disambiguating using the techniques described here.
-If it could be done
-the result would be an implementation of
-<TT>..</TT>
-that reduces the need for a built-in
-<TT>pwd</TT>
-in the shell and offers a consistent, sensible interpretation of the `parent directory'.
-<br>&#32;<br>
-We have not done this adaptation, but we recommend that the Unix community try it.
-<H4>Conclusions
-</H4>
-<br>&#32;<br>
-It should be easy to discover a well-defined, absolute path name for every open file and
-directory in the system, even in the face of symbolic links and other non-hierarchical
-elements of the file name space.
-In earlier versions of Plan 9, and all current versions of Unix,
-names can instead be inconsistent and confusing.
-<br>&#32;<br>
-The Plan 9 operating system now maintains an accurate name for each file,
-using inexpensive lexical operations coupled with local file system actions.
-Ambiguities are resolved by examining the names themselves;
-since they reflect the path that was used to reach the file, they also reflect the path back,
-permitting a dependable answer to be recovered even when stepping backwards through
-a multiply-named directory.
-<br>&#32;<br>
-Names make sense again: they are sensible and consistent.
-Now that dependable names are available, system services can depend on them,
-and recent work in Plan 9 is doing just that.
-We&#173;the community of Unix and Unix-like systems&#173;should have done this work a long time ago.
-<H4>Acknowledgements
-</H4>
-<br>&#32;<br>
-Phil Winterbottom devised the
-<TT>ns</TT>
-command and the
-<TT>fd</TT>
-and
-<TT>ns</TT>
-files in
-<TT>/proc</TT>,
-based on an earlier implementation of path name management that
-the work in this paper replaces.
-Russ Cox wrote the final version of
-<TT>cleanname</TT>
-and helped debug the code for reversing the mount table.
-Ken Thompson, Dave Presotto, and Jim McKie offered encouragement and consultation.
-<H4>References
-</H4>
-<br>&#32;<br>
-[Field95]
-R. Fielding,
-``Relative Uniform Resource Locators'',
-<I>Network Working Group Request for Comments: 1808</I>,
-June, 1995.
-<br>&#32;<br>
-[Kill84]
-T. J. Killian,
-``Processes as Files'',
-<I>Proceedings of the Summer 1984 USENIX Conference</I>,
-Salt Lake City, 1984, pp. 203-207.
-<br>&#32;<br>
-[Korn94]
-David G. Korn,
-``ksh: An Extensible High Level Language'',
-<I>Proceedings of the USENIX Very High Level Languages Symposium</I>,
-Santa Fe, 1994, pp. 129-146.
-<br>&#32;<br>
-[Korn00]
-David G. Korn,
-personal communication.
-<br>&#32;<br>
-[PeMc95]
-Jan-Simon Pendry and Marshall Kirk McKusick,
-``Union Mounts in 4.4BSD-Lite'',
-<I>Proceedings of the 1995 USENIX Conference</I>,
-New Orleans, 1995.
-<br>&#32;<br>
-[Pike91]
-Rob Pike,
-``8&#189;, the Plan 9 Window System'',
-<I>Proceedings of the Summer 1991 USENIX Conference</I>,
-Nashville, 1991, pp. 257-265.
-<br>&#32;<br>
-[Pike93]
-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
-``The Use of Name Spaces in Plan 9'',
-<I>Operating Systems Review</I>,
-<B>27</B>,
-2, April 1993, pp. 72-76.
-<br>&#32;<br>
-[PrWi93]
-Dave Presotto and Phil Winterbottom,
-``The Organization of Networks in Plan 9'',
-<I>Proceedings of the Winter 1993 USENIX Conference</I>,
-San Diego, 1993, pp. 43-50.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 910
sys/doc/libmach.html

@@ -1,910 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Adding Application Support for a New Architecture in Plan 9
-</H1>
-<DL><DD><I>Bob Flandrena<br>
-bobf@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<br>&#32;<br>
-Plan 9 has five classes of architecture-dependent software:
-headers, kernels, compilers and loaders, the
-<TT>libc</TT>
-system library, and a few application programs.  In general,
-architecture-dependent programs
-consist of a portable part shared by all architectures and a
-processor-specific portion for each supported architecture.
-The portable code is often compiled and stored in a library
-associated with
-each architecture.  A program is built by
-compiling the architecture-specific code and loading it with the
-library.  Support for a new architecture is provided
-by building a compiler for the architecture, using it to
-compile the portable code into libraries,
-writing the architecture-specific code, and
-then loading that code with
-the libraries.
-<br>&#32;<br>
-This document describes the organization of the architecture-dependent
-code and headers on Plan 9.
-The first section briefly discusses the layout of
-the headers and the source code for the kernels, compilers, loaders, and the
-system library, 
-<TT>libc</TT>.
-The second section provides a detailed
-discussion of the structure of
-<TT>libmach</TT>,
-a library containing almost
-all architecture-dependent code
-used by application programs.
-The final section describes the steps required to add
-application program support for a new architecture.
-<H4>Directory Structure
-</H4>
-<P>
-Architecture-dependent information for the new processor
-is stored in the directory tree rooted at <TT>/</TT><I>m</I>
-where
-<I>m</I>
-is the name of the new architecture (e.g.,
-<TT>mips</TT>).
-The new directory should be initialized with several important
-subdirectories, notably
-<TT>bin</TT>,
-<TT>include</TT>,
-and
-<TT>lib</TT>.
-The directory tree of an existing architecture
-serves as a good model for the new tree.
-The architecture-dependent
-<TT>mkfile</TT>
-must be stored in the newly created root directory
-for the architecture.  It is easiest to copy the
-mkfile for an existing architecture and modify
-it for the new architecture.  When the mkfile
-is correct, change the
-<TT>OS</TT>
-and
-<TT>CPUS</TT>
-variables in the
-<TT>/sys/src/mkfile.proto</TT>
-to reflect the addition of the new architecture.
-</P>
-<H4>Headers
-</H4>
-<br>&#32;<br>
-Architecture-dependent headers are stored in directory
-<TT>/</TT><I>m</I><TT>/include</TT><I>
-where
-</I><I>m</I><I>
-is the name of the architecture (e.g.,
-</I><TT>mips</TT><I>).
-Two header files are required:
-</I><TT>u.h</TT><I>
-and
-</I><TT>ureg.h</TT><I>.
-The first defines fundamental data types,
-bit settings for the floating point
-status and control registers, and
-</I><TT>va_list</TT><I>
-processing which depends on the stack
-model for the architecture.  This file
-is best built by copying and modifying the
-</I><TT>u.h</TT><I>
-file from an architecture
-with a similar stack model.
-The
-</I><TT>ureg.h</TT><I>
-file
-contains a structure describing the layout
-of the saved register set for
-the architecture; it is defined by the kernel.
-</I><br>&#32;<br>
-Header file
-<TT>/sys/include/a.out.h</TT>
-contains the definitions of the magic
-numbers used to identify executables for
-each architecture.  When support for a new
-architecture is added, the magic number
-for the architecture must be added to this file.
-<br>&#32;<br>
-The header format of a bootable executable is defined by
-each manufacturer.  Header file
-<TT>/sys/include/bootexec.h</TT>
-contains structures describing the headers currently
-supported.  If the new architecture uses a common header
-such as COFF,
-the header format is probably already defined,
-but if the bootable header format is non-standard,
-a structure defining the format must be added to this file.
-<br>&#32;<br>
-<H4>Kernel
-</H4>
-<br>&#32;<br>
-Although the kernel depends critically on the properties of the underlying
-hardware, most of the
-higher-level kernel functions, including process
-management, paging, pseudo-devices, and some
-networking code, are independent of processor
-architecture.  The portable kernel code
-is divided into two parts: that implementing kernel
-functions and that devoted to the boot process.
-Code in the first class is stored in directory
-<TT>/sys/src/9/port</TT>
-and the portable boot code is stored in
-<TT>/sys/src/9/boot</TT>.
-Architecture-dependent kernel code is stored in the
-subdirectories of
-<TT>/sys/src/9</TT>
-named for each architecture.
-<br>&#32;<br>
-The relationship between the kernel code and the boot code
-is convoluted and subtle.  The portable boot code
-is compiled into a library for each architecture.  An architecture-specific
-main program is loaded with the appropriate library and the resulting
-executable is compiled into the kernel where it is executed as
-a user process during the final stages of kernel initialization.  The boot process
-performs authentication, attaches the name space root to the appropriate
-file system and starts the
-<TT>init</TT>
-process.
-<br>&#32;<br>
-The organization of the portable kernel source code differs from that
-of most other architecture-specific code.
-Instead of storing the portable code in a library
-and loading it with the architecture-specific
-code, the portable code is compiled directly into
-the directory containing the architecture-specific code
-and linked with the object files built from the source in that directory.
-<br>&#32;<br>
-<H4>Compilers and Loaders
-</H4>
-<br>&#32;<br>
-The compiler source code conforms to the usual
-organization: portable code is compiled into a library
-for each architecture
-and the architecture-dependent code is loaded with
-that library.
-The common compiler code is stored in
-<TT>/sys/src/cmd/cc</TT>.
-The
-<TT>mkfile</TT>
-in this directory compiles the portable source and
-archives the objects in a library for each architecture.
-The architecture-specific compiler source
-is stored in a subdirectory of
-<TT>/sys/src/cmd</TT>
-with the same name as the compiler (e.g.,
-<TT>/sys/src/cmd/vc</TT>).
-<br>&#32;<br>
-There is no portable code shared by the loaders.
-Each directory of loader source
-code is self-contained, except for
-a header file and an instruction name table
-included from the
-directory of the associated
-compiler.
-<br>&#32;<br>
-<H4>Libraries
-</H4>
-<br>&#32;<br>
-Most C library modules are
-portable; the source code is stored in
-directories
-<TT>/sys/src/libc/port</TT>
-and
-<TT>/sys/src/libc/9sys</TT>.
-Architecture-dependent library code
-is stored in the subdirectory of
-<TT>/sys/src/libc</TT>
-named the same as the target processor.
-Non-portable functions not only
-implement architecture-dependent operations
-but also supply assembly language implementations
-of functions where speed is critical.
-Directory
-<TT>/sys/src/libc/9syscall</TT>
-is unusual because it
-contains architecture-dependent information
-for all architectures.
-It holds only a header file defining
-the names and numbers of system calls
-and a
-<TT>mkfile</TT>.
-The
-<TT>mkfile</TT>
-executes an
-<TT>rc</TT>
-script that parses the header file, constructs
-assembler language functions implementing the system
-call for each architecture, assembles the code,
-and archives the object files in
-<TT>libc</TT>.
-The assembler language syntax and the system interface
-differ for each architecture.
-The
-<TT>rc</TT>
-script in this
-<TT>mkfile</TT>
-must be modified to support a new architecture.
-<br>&#32;<br>
-<H4>Applications
-</H4>
-<br>&#32;<br>
-Application programs process two forms of architecture-dependent
-information: executable images and intermediate object files.
-Almost all processing is on executable files.
-System library
-<TT>libmach</TT>
-provides functions that convert
-architecture-specific data
-to a portable format so application programs
-can process this data independent of its
-underlying representation.
-Further, when a new architecture is implemented
-almost all code changes
-are confined to the library;
-most affected application programs need only be reloaded.
-The source code for the library is stored in
-<TT>/sys/src/libmach</TT>.
-<br>&#32;<br>
-An application program running on one type of
-processor must be able to interpret
-architecture-dependent information for all
-supported processors.
-For example, a debugger must be able to debug
-the executables of
-all architectures, not just the
-architecture on which it is executing, since
-<TT>/proc</TT>
-may be imported from a different machine.
-<br>&#32;<br>
-A small part of the application library
-provides functions to
-extract symbol references from object files.
-The remainder provides the following processing
-of executable files or memory images:
-<DL><DD>
-<br>&#32;<br>
-<UL>
-<LI>
-Header interpretation.
-<LI>
-Symbol table interpretation.
-<LI>
-Execution context interpretation, such as stack traces
-and stack frame location.
-<LI>
-Instruction interpretation including disassembly and
-instruction size and follow-set calculations.
-<LI>
-Exception and floating point number interpretation.
-<LI>
-Architecture-independent read and write access through a
-relocation map.
-</DL>
-</ul>
-<br>&#32;<br>
-Header file
-<TT>/sys/include/mach.h</TT>
-defines the interfaces to the
-application library.  Manual pages
-<A href="/magic/man2html/2/mach"><I>mach</I>(2),
-</A><A href="/magic/man2html/2/symbol"><I>symbol</I>(2),
-</A>and
-<A href="/magic/man2html/2/object"><I>object</I>(2)
-</A>describe the details of the
-library functions.
-<br>&#32;<br>
-Two data structures, called
-<TT>Mach</TT>
-and
-<TT>Machdata</TT>,
-contain architecture-dependent  parameters and
-a jump table of functions.
-Global variables
-<TT>mach</TT>
-and
-<TT>machdata</TT>
-point to the
-<TT>Mach</TT>
-and
-<TT>Machdata</TT>
-data structures associated with the target architecture.
-An application determines the target architecture of
-a file or executable image, sets the global pointers
-to the data structures associated with that architecture,
-and subsequently performs all references indirectly through the
-pointers.
-As a result, direct references to the tables for each
-architecture are avoided and the application code intrinsically
-supports all architectures (though only one at a time).
-<br>&#32;<br>
-Object file processing is handled similarly: architecture-dependent
-functions identify and
-decode the intermediate files for the processor.
-The application indirectly
-invokes a classification function to identify
-the architecture of the object code and to select the
-appropriate decoding function.  Subsequent calls
-then use that function to decode each record.  Again,
-the layer of indirection allows the application code
-to support all architectures without modification.
-<br>&#32;<br>
-Splitting the architecture-dependent information
-between the
-<TT>Mach</TT>
-and
-<TT>Machdata</TT>
-data structures
-allows applications to choose
-an appropriate level of service.  Even though an application
-does not directly reference the architecture-specific data structures,
-it must load the
-architecture-dependent tables and code 
-for all architectures it supports.  The size of this data
-can be substantial and many applications do not require
-the full range of architecture-dependent functionality.
-For example, the
-<TT>size</TT>
-command does not require the disassemblers for every architecture;
-it only needs to decode the header.
-The
-<TT>Mach</TT>
-data structure contains a few architecture-specific parameters
-and a description of the processor register set.
-The size of the structure
-varies with the size of the register
-set but is generally small.
-The
-<TT>Machdata</TT>
-data structure contains
-a jump table of architecture-dependent functions;
-the amount of code and data referenced by this table
-is usually large.
-<H4>Libmach Source Code Organization
-</H4>
-<br>&#32;<br>
-The
-<TT>libmach</TT>
-library provides four classes of functionality:
-<br>&#32;<br>
-<DL COMPACT>
-<DT>Header and Symbol Table Decoding - <DD>
-Files
-<TT>executable.c</TT>
-and
-<TT>sym.c</TT>
-contain code to interpret the header and
-symbol tables of
-an executable file or executing image.
-Function
-<TT>crackhdr</TT>
-decodes the header,
-reformats the
-information into an
-<TT>Fhdr</TT>
-data structure, and points
-global variable
-<TT>mach</TT>
-to the
-<TT>Mach</TT>
-data structure of the target architecture.
-The symbol table processing
-uses the data in the
-<TT>Fhdr</TT>
-structure to decode the symbol table.
-A variety of symbol table access functions then support
-queries on the reformatted table.
-<DT>Debugger Support - <DD>
-Files named
-<TT></TT><I>m</I><TT>.c</TT><I>,
-where
-</I><I>m</I><I>
-is the code letter assigned to the architecture,
-contain the initialized
-</I><TT>Mach</TT><I>
-data structure and the definition of the register
-set for each architecture.
-Architecture-specific debugger support functions and
-an initialized
-</I><TT>Machdata</TT><I>
-structure are stored in
-files named
-</I><TT></TT><I>m</I><TT>db.c</TT><I>.
-Files
-</I><TT>machdata.c</TT><I>
-and
-</I><TT>setmach.c</TT><I>
-contain debugger support functions shared
-by multiple architectures.
-<DT>Architecture-Independent Access - <DD>
-Files
-</I><TT>map.c</TT><I>,
-</I><TT>access.c</TT><I>,
-and
-</I><TT>swap.c</TT><I>
-provide accesses through a relocation map
-to data in an executable file or executing image.
-Byte-swapping is performed as needed.  Global variables
-</I><TT>mach</TT><I>
-and
-</I><TT>machdata</TT><I>
-must point to the
-</I><TT>Mach</TT><I>
-and
-</I><TT>Machdata</TT><I>
-data structures of the target architecture.
-<DT>Object File Interpretation - <DD>
-These files contain functions to identify the
-target architecture of an
-intermediate object file
-and extract references to symbols.  File
-</I><TT>obj.c</TT><I>
-contains code common to all architectures;
-file
-</I><TT></TT><I>m</I><TT>obj.c</TT><I>
-contains the architecture-specific source code
-for the machine with code character
-</I><I>m</I><I>.
-</dl>
-</I><br>&#32;<br>
-The
-<TT>Machdata</TT>
-data structure is primarily a jump
-table of architecture-dependent debugger support
-functions. Functions select the
-<TT>Machdata</TT>
-structure for a target architecture based
-on the value of the
-<TT>type</TT>
-code in the
-<TT>Fhdr</TT>
-structure or the name of the architecture.
-The jump table provides functions to swap bytes, interpret
-machine instructions,
-perform stack
-traces, find stack frames, format floating point
-numbers, and decode machine exceptions.  Some functions, such as
-machine exception decoding, are idiosyncratic and must be
-supplied for each architecture.  Others depend
-on the compiler run-time model and several
-architectures may share code common to a model.  For
-example, many architectures share the code to
-process the fixed-frame stack model implemented by
-several of the compilers.
-Finally, some
-functions, such as byte-swapping, provide a general capability and
-the jump table need only select an implementation appropriate
-to the architecture.
-<br>&#32;<br>
-<H4>Adding Application Support for a New Architecture
-</H4>
-<br>&#32;<br>
-This section describes the
-steps required to add application-level
-support for a new architecture.
-We assume
-the kernel, compilers, loaders and system libraries
-for the new architecture are already in place.  This
-implies that a code-character has been assigned and
-that the architecture-specific headers have been
-updated.
-With the exception of two programs,
-application-level changes are confined to header
-files and the source code in
-<TT>/sys/src/libmach</TT>.
-<br>&#32;<br>
-<DL COMPACT>
-<DT>1.<DD>
-Begin by updating the application library
-header file in
-<TT>/sys/include/mach.h</TT>.
-Add the following symbolic codes to the
-<TT>enum</TT>
-statement near the beginning of the file:
-<DL><DD>
-<DT>*<DD>
-The processor type code, e.g., 
-<TT>MSPARC</TT>.
-<DT>*<DD>
-The type of the executable.  There are usually
-two codes needed: one for a bootable
-executable (i.e., a kernel) and one for an
-application executable.
-<DT>*<DD>
-The disassembler type code.  Add one entry for
-each supported disassembler for the architecture.
-<DT>*<DD>
-A symbolic code for the object file.
-</DL>
-</dl>
-<br>&#32;<br>
-<DL COMPACT>
-<DT>2.<DD>
-In a file name
-<TT>/sys/src/libmach/</TT><I>m</I><TT>.c</TT><I>
-(where
-</I><I>m</I><I>
-is the identifier character assigned to the architecture),
-initialize
-</I><TT>Reglist</TT><I>
-and
-</I><TT>Mach</TT><I>
-data structures with values defining
-the register set and various system parameters.
-The source file for a similar architecture
-can serve as template.
-Most of the fields of the
-</I><TT>Mach</TT><I>
-data structure are obvious
-but a few require further explanation.
-<DL><DD>
-<DT></I><TT>kbase</TT><I> - <DD>
-This field
-contains the address of the kernel 
-</I><TT>ublock</TT><I>.
-The debuggers
-assume the first entry of the kernel
-</I><TT>ublock</TT><I>
-points to the
-</I><TT>Proc</TT><I>
-structure for a kernel thread.
-<DT></I><TT>ktmask</TT><I> - <DD>
-This field
-is a bit mask used to calculate the kernel text address from
-the kernel 
-</I><TT>ublock</TT><I>
-address.
-The first page of the
-kernel text segment is calculated by
-ANDing
-the negation of this mask with
-</I><TT>kbase</TT><I>.
-<DT></I><TT>kspoff</TT><I> - <DD>
-This field
-contains the byte offset in the
-</I><TT>Proc</TT><I>
-data structure to the saved kernel
-stack pointer for a suspended kernel thread.  This
-is the offset to the 
-</I><TT>sched.sp</TT><I>
-field of a
-</I><TT>Proc</TT><I>
-table entry.
-<DT></I><TT>kpcoff</TT><I> - <DD>
-This field contains the byte offset into the
-</I><TT>Proc</TT><I>
-data structure
-of
-the program counter of a suspended kernel thread.
-This is the offset to
-field
-</I><TT>sched.pc</TT><I>
-in that structure.
-<DT></I><TT>kspdelta</TT><I> and </I><TT>kpcdelta</TT><I> - <DD>
-These fields
-contain corrections to be added to
-the stack pointer and program counter, respectively,
-to properly locate the stack and next
-instruction of a kernel thread.  These
-values bias the saved registers retrieved
-from the
-</I><TT>Label</TT><I>
-structure named
-</I><TT>sched</TT><I>
-in the
-</I><TT>Proc</TT><I>
-data structure.
-Most architectures require no bias
-and these fields contain zeros.
-<DT></I><TT>scalloff</TT><I> - <DD>
-This field
-contains the byte offset of the
-</I><TT>scallnr</TT><I>
-field in the
-</I><TT>ublock</TT><I>
-data structure associated with a process.
-The
-</I><TT>scallnr</TT><I>
-field contains the number of the
-last system call executed by the process.
-The location of the field varies depending on
-the size of the floating point register set
-which precedes it in the
-</I><TT>ublock</TT><I>.
-</DL>
-</dl>
-</I><br>&#32;<br>
-<DL COMPACT>
-<DT>3.<DD>
-Add an entry to the initialization of the
-<TT>ExecTable</TT>
-data structure at the beginning of file
-<TT>/sys/src/libmach/executable.c</TT>.
-Most architectures
-require two entries: one for
-a normal executable and
-one for a bootable
-image.  Each table entry contains:
-<DL><DD>
-<DT>*<DD>
-Magic Number - 
-The big-endian magic number assigned to the architecture in
-<TT>/sys/include/a.out.h</TT>.
-<DT>*<DD>
-Name - 
-A string describing the executable.
-<DT>*<DD>
-Executable type code - 
-The executable code assigned in
-<TT>/sys/include/mach.h</TT>.
-<DT>*<DD>
-<TT>Mach</TT> pointer - 
-The address of the initialized
-<TT>Mach</TT>
-data structure constructed in Step 2.
-You must also add the name of this table to the
-list of
-<TT>Mach</TT>
-table definitions immediately preceding the
-<TT>ExecTable</TT>
-initialization.
-<DT>*<DD>
-Header size - 
-The number of bytes in the executable file header.
-The size of a normal executable header is always
-<TT>sizeof(Exec)</TT>.
-The size of a bootable header is
-determined by the size of the structure
-for the architecture defined in
-<TT>/sys/include/bootexec.h</TT>.
-<DT>*<DD>
-Byte-swapping function - 
-The address of
-<TT>beswal</TT>
-or
-<TT>leswal</TT>
-for big-endian and little-endian
-architectures, respectively.
-<DT>*<DD>
-Decoder function - 
-The address of a function to decode the header.
-Function
-<TT>adotout</TT>
-decodes the common header shared by all normal
-(i.e., non-bootable) executable files.
-The header format of bootable
-executable files is defined by the manufacturer and
-a custom function is almost always
-required to decode it.
-Header file
-<TT>/sys/include/bootexec.h</TT>
-contains data structures defining the bootable
-headers for all architectures.  If the new architecture
-uses an existing format, the appropriate
-decoding function should already be in
-<TT>executable.c</TT>.
-If the header format is unique, then
-a new function must be added to this file.
-Usually the decoding function for an existing
-architecture can be adopted with minor modifications.
-</DL>
-</dl>
-<br>&#32;<br>
-<DL COMPACT>
-<DT>4.<DD>
-Write an object file parser and
-store it in file
-<TT>/sys/src/libmach/</TT><I>m</I><TT>obj.c</TT><I>
-where
-</I><I>m</I><I>
-is the identifier character assigned to the architecture.
-Two functions are required: a predicate to identify an
-object file for the architecture and a function to extract
-symbol references from the object code.
-The object code format is obscure but
-it is often possible to adopt the
-code of an existing architecture
-with minor modifications.
-When these
-functions are in hand, insert their addresses
-in the jump table at the beginning of file
-</I><TT>/sys/src/libmach/obj.c</TT><I>.
-</dl>
-</I><br>&#32;<br>
-<DL COMPACT>
-<DT>5.<DD>
-Implement the required debugger support functions and
-initialize the parameters and jump table of the
-<TT>Machdata</TT>
-data structure for the architecture.
-This code is conventionally stored in
-a file named
-<TT>/sys/src/libmach/</TT><I>m</I><TT>db.c</TT><I>
-where
-</I><I>m</I><I>
-is the identifier character assigned to the architecture.
-The fields of the
-</I><TT>Machdata</TT><I>
-structure are:
-<DL><DD>
-<DT></I><TT>bpinst</TT><I> and </I><TT>bpsize</TT><I> - <DD>
-These fields
-contain the breakpoint instruction and the size
-of the instruction, respectively.
-<DT></I><TT>swab</TT><I> - <DD>
-This field
-contains the address of a function to
-byte-swap a 16-bit value.  Choose
-</I><TT>leswab</TT><I>
-or
-</I><TT>beswab</TT><I>
-for little-endian or big-endian architectures, respectively.
-<DT></I><TT>swal</TT><I> - <DD>
-This field
-contains the address of a function to
-byte-swap a 32-bit value.  Choose
-</I><TT>leswal</TT><I>
-or
-</I><TT>beswal</TT><I>
-for little-endian or big-endian architectures, respectively.
-<DT></I><TT>ctrace</TT><I> - <DD>
-This field
-contains the address of a function to perform a
-C-language stack trace.  Two general trace functions,
-</I><TT>risctrace</TT><I>
-and
-</I><TT>cisctrace</TT><I>,
-traverse fixed-frame and relative-frame stacks,
-respectively.  If the compiler for the
-new architecture conforms to one of
-these models, select the appropriate function.  If the
-stack model is unique,
-supply a custom stack trace function.
-<DT></I><TT>findframe</TT><I> - <DD>
-This field
-contains the address of a function to locate the stack
-frame associated with a text address.
-Generic functions
-</I><TT>riscframe</TT><I>
-and
-</I><TT>ciscframe</TT><I>
-process fixed-frame and relative-frame stack
-models.
-<DT></I><TT>ufixup</TT><I> - <DD>
-This field
-contains the address of a function to adjust
-the base address of the register save area.
-Currently, only the
-68020 requires this bias
-to offset over the active
-exception frame.
-<DT></I><TT>excep</TT><I> - <DD>
-This field
-contains the address of a function to produce a
-text
-string describing the
-current exception.
-Each architecture stores exception
-information uniquely, so this code must always be supplied.
-<DT></I><TT>bpfix</TT><I> - <DD>
-This field
-contains the address of a function to adjust an
-address prior to laying down a breakpoint.
-<DT></I><TT>sftos</TT><I> - <DD>
-This field
-contains the address of a function to convert a single
-precision floating point value
-to a string.  Choose
-</I><TT>leieeesftos</TT><I>
-for little-endian
-or
-</I><TT>beieeesftos</TT><I>
-for big-endian architectures.
-<DT></I><TT>dftos</TT><I> - <DD>
-This field
-contains the address of a function to convert a double
-precision floating point value
-to a string.  Choose
-</I><TT>leieeedftos</TT><I>
-for little-endian
-or
-</I><TT>beieeedftos</TT><I>
-for big-endian architectures.
-<DT></I><TT>foll</TT><I>, </I><TT>das</TT><I>, </I><TT>hexinst</TT><I>, and </I><TT>instsize</TT><I> - <DD>
-These fields point to functions that interpret machine
-instructions.
-They rely on disassembly of the instruction
-and are unique to each architecture.
-</I><TT>Foll</TT><I>
-calculates the follow set of an instruction.
-</I><TT>Das</TT><I>
-disassembles a machine instruction to assembly language.
-</I><TT>Hexinst</TT><I>
-formats a machine instruction as a text
-string of
-hexadecimal digits.
-</I><TT>Instsize</TT><I>
-calculates the size in bytes, of an instruction.
-Once the disassembler is written, the other functions
-can usually be implemented as trivial extensions of it.
-</dl>
-</I><br>&#32;<br>
-It is possible to provide support for a new architecture
-incrementally by filling the jump table entries
-of the
-<TT>Machdata</TT>
-structure as code is written.  In general, if
-a jump table entry contains a zero, application
-programs requiring that function will issue an
-error message instead of attempting to
-call the function.  For example,
-the
-<TT>foll</TT>,
-<TT>das</TT>,
-<TT>hexinst</TT>,
-and
-<TT>instsize</TT>
-jump table slots can be zeroed until a
-disassembler is written.
-Other capabilities, such as
-stack trace or variable inspection,
-can be supplied and will be available to
-the debuggers but attempts to use the
-disassembler will result in an error message.
-</DL>
-<DL COMPACT>
-<DT>6.<DD>
-Update the table named
-<TT>machines</TT>
-near the beginning of
-<TT>/sys/src/libmach/setmach.c</TT>.
-This table binds the
-file type code and machine name to the
-<TT>Mach</TT>
-and
-<TT>Machdata</TT>
-structures of an architecture.
-The names of the initialized
-<TT>Mach</TT>
-and
-<TT>Machdata</TT>
-structures built in steps 2 and 5
-must be added to the list of
-structure definitions immediately
-preceding the table initialization.
-If both Plan 9 and
-native disassembly are supported, add
-an entry for each disassembler to the table.  The
-entry for the default disassembler (usually
-Plan 9) must be first.
-<DT>7.<DD>
-Add an entry describing the architecture to
-the table named
-<TT>trans</TT>
-near the end of
-<TT>/sys/src/cmd/prof.c</TT>.
-</DL>
-<DT>8.<DD>
-Add an entry describing the architecture to
-the table named
-<TT>objtype</TT>
-near the start of
-<TT>/sys/src/cmd/pcc.c</TT>.
-</DL>
-<DT>9.<DD>
-Recompile and install
-all application programs that include header file
-<TT>mach.h</TT>
-and load with
-<TT>libmach.a</TT>.
-</dl>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 746
sys/doc/lp.html

@@ -1,746 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>A Guide to the Lp
-Printer Spooler
-</H1>
-<DL><DD><I>Paul Glick<br>
-pg@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<P>
-<I>Lp</I>
-is a collection of programs used to provide an easy-to-use
-interface for printing a variety of document types on a variety
-of printers.
-<I>Lp</I>
-is the glue that connects various document language
-translators and printer communication programs together so that
-the users may have a consistent view of printers.
-Most of the glue
-is shell script, which can be easily modified.
-The user need not
-specify options to get sensible output in most cases.
-<I>Lp</I>
-is described here
-so that others may make additions and changes.
-</DL>
-
-</P>
-<H4>1 Introduction
-</H4>
-<P>
-<I>Lp</I>
-is used to format and print data on a variety of output devices.
-The need for
-<I>lp</I>
-was rooted in the inability of other printer spoolers to do simple
-tasks without a great deal of user specification of options.
-At the time
-<I>lp</I>
-was written, there were several printer
-languages, such as ImPress and PostScript, and
-an internally developed printer that would accept
-<I>troff</I>
-output.
-Now, all our printers take PostScript,
-but printers that use HPCL and HPGL abound and
-support for those printers may be added easily.
-A great deal of what underlies
-<I>lp</I>
-is taken from BSD's
-<I>lpr</I>
-and System V's
-<I>lp</I>.
-The important features of this system are that most of the programs
-are easily modified shell scripts and the user need not
-learn to use the large amount of underlying software developed by others.
-<I>Lp</I>
-runs under Plan 9 and several flavors of
-UNIX.
-This document deals with
-<I>lp</I>
-as it relates to Plan 9.
-<I>Lp</I>
-was developed using both Datakit and Ethernet to transport data between machines.
-Now only the Ethernet transport mechanism remains.
-</P>
-<P>
-Text, graphics, and formatted text files are appropriately processed and
-placed into a spool directory from which they are taken to be printed by a daemon process.
-Additional functions include checking the status of a printer queue
-and removing jobs from the printer queue.
-</P>
-<P>
-All the shell scripts (see
-<A href="/magic/man2html/1/rc"><I>rc</I>(1))
-</A>associated with
-<I>lp</I>
-reside in the spool directory
-<TT>/sys/lib/lp</TT>
-except for the
-<I>lp</I>
-command itself, which resides in
-<TT>/rc/bin</TT>.
-Commands related to
-<I>lp</I>
-that are not shell scripts can most often be found
-in
-<TT>/</TT><I>cputype/bin/aux</I><TT>.
-The directory where all the
-</TT><I>lp</I><TT>
-scripts  reside is defined within
-</TT><I>lp</I><TT>
-by the shell variable
-</TT><TT>LPLIB</TT><TT>.
-In the remainder of this document, file names will be specified
-with this shell variable as their root.
-</P>
-</TT><H4>2 Usage
-</H4>
-<P>
-<I>Lp</I>
-requires an output device to be specified
-before it will process input.
-This can be done in any of three ways described here.
-</P>
-<DL COMPACT>
-<DT>1)<DD>
-The file
-<TT></TT>LPLIB/defdevice<TT>
-may contain the name of a default output device.
-This may not be practical for environments where
-there are many printers.
-<DT>2)<DD>
-The user's environment variable
-</TT><TT>LPDEST</TT><TT>
-may be set to the name of the device to be used.
-This is often a more practical solution when there are several printers
-available.
-This overrides a
-</TT><TT>defdevice</TT><TT>
-specification.
-<DT>3)<DD>
-The
-</TT><TT>-d</TT><TT>
-</TT><I>printer</I><TT>
-option to the
-</TT><I>lp</I><TT>
-command specifies
-</TT><I>printer</I><TT>
-as the device to which output should be directed, overriding the
-previous two specifications.
-</dl>
-</TT><P>
-<br>
-If
-<I>printer</I>
-is
-<TT>?</TT>,
-a list of printers and other information in the
-<TT>devices</TT>
-file is printed, as shown in Figure 1.
-Quote the question mark to prevent it from being
-interpreted by the shell language as a metacharacter.
-
-<DL><DT><DD><TT><PRE>
-% lp -d'?'
-device   location  host             class
-fn       2C-501    helix            post/2+600dpi+duplex
-pcclone  -         -                post+nohead
-peacock  2C-501    cetus            post/2+300dpi+nohead+color
-ps83     st8_fl3   rice             post+300dpi+reverse
-psu      2C-501    cetus            post/2+1200dpi
-     .
-     .
-     .
-%
-</PRE></TT></DL>
-<I>Figure 1.  Sample listing of installed printers</I>
-</P>
-<P>
-Normally,
-<I>lp</I>
-uses the
-<TT>file</TT>
-command to figure out what type of input it is receiving.
-This is done within the
-<TT>generic</TT>
-process which is discussed later in this paper in the
-<B>Process directory</B>
-section.
-To select a specific input processor the
-<TT>-p</TT><I>process</I>
-option is used where
-<I>process</I>
-is one of the shell scripts in the
-<TT>process</TT>
-directory.
-</P>
-<br>&#32;<br>
-Troff
-output can be printed, in this case, on printer
-<I>fn</I>
-with
-<DL><DT><DD><TT><PRE>
-% troff -ms lp.ms | lp -dfn
-</PRE></TT></DL>
-<br>&#32;<br>
-A file can be converted to PostScript using the pseudo-printer
-<TT>stdout</TT>:
-<DL><DT><DD><TT><PRE>
-% troff -ms lp.ms | lp -dstdout &gt; lp.ps
-</PRE></TT></DL>
-LaTeX (and analogously TeX)
-documents are printed in two steps:
-<DL><DT><DD><TT><PRE>
-% latex lp.tex
-     .
-     .
-% lp lp.dvi
-     .
-     .
-%
-</PRE></TT></DL>
-LaTeX
-produces a `.dvi' file and
-does not permit the use of a pipe
-connection to the standard input of
-<I>lp</I>.
-To look at the status and queue of a device, use
-<TT>-q</TT>:
-<DL><DT><DD><TT><PRE>
-% lp -dpsu -q
-daemon status:
-:  67.17% sent
-printer status:
-%%[ status: busy; source: lpd ]%%
-
-queue on cetus:
-job		user	try	size
-rice29436.1	pg	0	17454
-slocum17565.1	ches	1	49995
-%
-</PRE></TT></DL>
-This command can print the status and queue of the local
-and remote hosts.
-Administrators should be advised that working in an environment where the
-<I>lp</I>
-spool directory is shared among the local and remote hosts,
-no spooling should be done on the local hosts.
-The format of the status and queue printout is up to the administrator.
-The job started above can be killed with
-<TT>-k</TT>:
-<DL><DT><DD><TT><PRE>
-<I> lp -dpsu -k rice29436.1
-rice29436.1 removed from psu queue on cetus
-</PRE></TT></DL>
-</I><H4>3 Options
-</H4>
-<P>
-There are options available to modify the way in which a job is handled.
-It is the job of the
-<I>lp</I>
-programs to convert the option settings so they may be used by each of the
-different translation and interface programs.
-Not all options are applicable to all printer environments.
-Table 1 lists the standard
-<I>lp</I>
-options, the shell variable settings, and description of the options.
-
-<br>&#32;<br>
-<br><img src="data.19118320.gif"><br>
-<br>&#32;<br>
-<I>Table 1. Lp Option List</I>
-<br>&#32;<br>
-
-</P>
-<H4>4 Devices file
-</H4>
-<P>
-The
-<TT>devices</TT>
-file is found in the spool directory.
-Each line in the file is composed of 12 fields, separated
-by tabs or spaces, that describe the attributes
-of the printer and how it should be serviced.
-Within the
-<TT>lp</TT>
-command, a shell variable is set for each attribute;
-the following list describes them:
-</P>
-<DL COMPACT>
-<DT><TT>0<DD>
-LPDEST0</TT> is the name of the device as given to
-<I>lp</I>
-with the
-<TT>-d</TT>
-option
-or as specified by the shell environment variable
-<TT>LPDEST</TT>
-or as specified by
-the file
-<TT></TT>LPLIB/defdevice<TT>.
-This name is used in creating directories and log files that are associated with
-the printers operation.
-<DT></TT><TT>0<DD>
-LOC0</TT><TT> just describes where the printer is physically located.
-<DT></TT><TT>0<DD>
-DEST_HOST0</TT><TT> is the host from which the files are printed.
-Files may be spooled on other machines before being transferred to the
-destination host.
-<DT></TT><TT>0<DD>
-OUT_DEV0</TT><TT> is the physical device name or network address needed by the printer daemon
-to connect to the printer.
-This field depends on the requirements of the daemon and may contain a `&#191;'
-if not required.
-<DT></TT><TT>0<DD>
-SPEED0</TT><TT> is the baud rate setting for the port.
-This field depends on the requirements of the daemon and may contain a `&#191;'
-if not required.
-<DT></TT><TT>0<DD>
-LPCLASS0</TT><TT> is used to encode minor printer differences.
-The keyword
-</TT><TT>reverse</TT><TT>
-is used by some of the preprocessors
-to reverse the order the pages are printed to accommodate different output
-trays (either face up or face down).
-The keyword
-</TT><TT>nohead</TT><TT>
-is used to suppress the header page.
-This is used for special and color printers.
-The keyword
-</TT><TT>duplex</TT><TT>
-is used to coax double sided output from duplex printers.
-<DT></TT><TT>0<DD>
-LPPROC0</TT><TT> is the command from the
-</TT><TT>LPLIB/process</TT><TT>
-directory to be used to convert input to a format
-that will be accepted by the device.
-The preprocessor is invoked by the spooler.
-<DT></TT><TT>0<DD>
-SPOOLER0</TT><TT> is the command from the
-</TT><TT>LPLIB/spooler</TT><TT>
-directory which will select files using the
-</TT><TT>SCHED</TT><TT>
-command and invoke the
-</TT><TT>LPPROC</TT><TT>
-command, putting its output
-into the remote spool directory.
-The output is sent directly to the spool directory on the
-destination machine to avoid conflicts when client and
-server machines share spool directories.
-<DT></TT><TT>0<DD>
-STAT0</TT><TT> is the command from the
-</TT><TT>LPLIB/stat</TT><TT>
-directory that prints the status of the device and the list of jobs
-waiting on the queue for the device.
-The status information depends on what is available from the printer
-and interface software.
-The queue information should be changed to show information
-useful in tracking down problems.
-The
-</TT><TT>SCHED</TT><TT>
-command is used to show the jobs in the order
-in which they will be printed.
-<DT></TT><TT>0<DD>
-KILL0</TT><TT> is the command from the
-</TT><TT>LPLIB/kill</TT><TT>
-that removes jobs from the queue.
-The jobs to be removed are given as arguments to the
-</TT><I>lp</I><TT>
-command.
-When possible, it should also abort the currently running job
-if it has to be killed.
-<DT></TT><TT>0<DD>
-DAEMON0</TT><TT> is the command from the
-</TT><TT>LPLIB/daemon</TT><TT>
-that is meant to run asynchronously to remove
-jobs from the queue.
-Jobs may either be passed on to another host or sent to the
-printing device.
-</TT><I>Lp</I><TT>
-always tries to start a daemon process when one is specified.
-<DT></TT><TT>0<DD>
-SCHED0</TT><TT> is the command from the
-</TT><TT>LPLIB/sched</TT><TT>
-that is used to present the job names to the
-daemon and stat programs
-in some order, e.g., first-in-first-out, smallest first.
-</dl>
-</TT><H4>5 Support programs
-</H4>
-<P>
-The following sections describe the basic functions of the programs
-that are found in the subdirectories of
-<TT></TT><I>LPLIB</I><TT>.
-The programs in a specific directory vary with the
-type of output device or networks that have to be used.
-</P>
-</TT><H4>5.1 Process directory
-</H4>
-<P>
-The
-<TT>generic</TT>
-preprocessor
-is the default preprocessor for most printers.
-It uses the
-<A href="/magic/man2html/1/file"><I>file</I>(1)
-</A>command to determine the format of the input file.
-The appropriate preprocessor is then selected to transform the
-file to a format suitable for the printer.
-</P>
-<P>
-Here is a list of some of the preprocessors and
-a description of their function.
-A complete list of preprocessors and their descriptions can be found in the manual page
-<A href="/magic/man2html/8/lp"><I>lp</I>(8).
-</A><br>&#32;<br>
-</P>
-<DL COMPACT>
-<DT><TT>dvipost</TT><DD>
-Converts TeX or LaTeX output (<TT>.dvi</TT> files) to PostScript
-<DT><TT>ppost</TT><DD>
-Converts UTF text to PostScript.
-The default font is Courier with Lucida fonts filling in
-the remainder of the (available) Unicode character space.
-<DT><TT>tr2post</TT><DD>
-Converts (device independent) troff output for the device type
-<TT>utf</TT>.
-See
-<TT>/sys/lib/troff/font/devutf</TT>
-directory for troff font width table descriptions.
-See also the
-<TT>/sys/lib/postscript/troff</TT>
-directory for mappings of
-troff
-<TT>UTF</TT>
-character space to PostScript font space.
-<DT><TT>p9bitpost</TT><DD>
-Converts Plan 9 bitmaps (see
-<I>bitfile</I>(9.6))
-to PostScript.
-<DT><TT>g3post</TT><DD>
-Converts fax (CCITT-G31 format) to PostScript.
-<DT><TT>hpost</TT><DD>
-Does header page processing and page reversal processing, if
-necessary.
-Page reversal is done here so the header page always comes
-out at the beginning of the job.
-Header page processing is very location-dependent.
-</dl>
-<H4>5.2 Spool directory
-</H4>
-<P>
-The
-<TT>generic</TT>
-spooler is responsible for executing the preprocessor
-and directing its output to a file in the printer's queue.
-An additional file is created containing information such as the system name,
-user id, job number, and number of times this job was attempted.
-</P>
-<P>
-Certain printer handling programs do not require separate
-preprocessing and spooling.
-For such circumstances a
-<TT>nospool</TT>
-spooler is available that just executes the preprocessing program.
-The processing and spooling functions are assumed by this program and the output is sent to
-<TT>OUT_DEV</TT>
-or standard output if
-<TT>OUT_DEV</TT>
-is '-'.
-</P>
-<P>
-The
-<TT>pcclone</TT>
-spooler is used to send print jobs directly to a printer connected
-to a 386 compatible printer port (See
-<A href="/magic/man2html/3/lpt"><I>lpt</I>(3)).
-</A></P>
-<H4>5.3 Stat directory
-</H4>
-<P>
-The function of the shell scripts in the
-<TT>stat</TT>
-directory is to present status information about the
-printer and its queue.
-When necessary, the
-<TT>stat</TT>
-scripts may be designed
-to return information about the local queue as well as the remote queue.
-This is not done on Plan 9 because many systems share the same queue directory.
-The scheduler is used to print the queue in the order in which the jobs
-will be executed.
-</P>
-<H4>5.4 Kill directory
-</H4>
-<P>
-The
-<TT>kill</TT>
-scripts receive command line arguments passed to them by
-<I>lp</I>
-and remove the job and id files which match the arguments
-for the particular queue.
-When a job is killed, the generic kill procedure:
-</P>
-<DL COMPACT>
-<DT>1)<DD>
-kills the daemon for this queue if the job being killed
-is first in the queue,
-<DT>2)<DD>
-removes the files associated with the job from the queue,
-<DT>3)<DD>
-attempts to restart the daemon.
-</dl>
-<H4>5.5 Daemon directory
-</H4>
-<P>
-The
-<TT>daemon</TT>
-shell scripts are the last to be invoked by
-<I>lp</I>
-if the
-<TT>-Q</TT>
-option has not been given.
-The daemon process is executed asynchronously
-with its standard output and standard error appended to
-the printer log file.
-The log file is described in a subsequent section.
-Because the daemon runs asynchronously, it must
-catch signals that could cause it to terminate abnormally.
-The daemon first checks to see that it is the only one running
-by using the
-<TT>LOCK</TT>
-program found in the
-<TT>/</TT>cputype/bin/aux<TT>
-directory.
-The
-</TT><TT>LOCK</TT><TT>
-command creates a
-</TT><TT>LOCK</TT><TT>
-file in the printer's queue directory.
-The daemon then executes the scheduler to obtain the name of the
-next job on the queue.
-</P>
-</TT><P>
-The processing of jobs may entail transfer to another host
-or transmission to a printer.
-The details of this are specific to the individual daemons.
-If a job is processed without error, it is removed from the queue.
-If a job does not succeed, the associated files may be
-moved to a printer specific directory in
-<TT></TT><I>LPLIB/prob</I><TT>.
-In either case, the daemon can make an entry in the printer's
-log file.
-Before exiting, the daemon should clean up lock files by calling
-</TT><TT>UNLOCK</TT><TT>.
-</P>
-</TT><P>
-Several non-standard daemon programs have been designed
-to suit various requirements and whims.
-One such program announces job completion and empty paper trays
-by causing icons to appear in peoples'
-<TT>seemail</TT>
-window.
-Another, using a voice synthesizer, makes verbal announcements.
-Other daemons may be designed to taste.
-</P>
-<H4>5.6 Sched directory
-</H4>
-<P>
-The scheduler must decide which job files should be executed and
-in what order.
-The most commonly used scheduler program is
-<TT>FIFO</TT>,
-which looks like this:
-<DL><DT><DD><TT><PRE>
-ls -tr * | sed -n -e 's/.*  *//' \
-  -e '/^[0-9][0-9]*.[1-9][0-9]*<I>/p'
-</PRE></TT></DL>
-This lists all the job files in this printer's queue in modification
-time order.
-Jobs entering the queue have a dot (.) prefixed to their name
-to keep the scheduler from selecting them before they are complete.
-</P>
-</I><H4>6 Where Things Go Wrong
-</H4>
-<P>
-There are four directories where
-<I>lp</I>
-writes files.
-On the Plan 9 release these directories may be found
-in a directory on a scratch filesystem that is not
-backed-up.
-This directory is
-<TT>/n/emelieother/lp</TT>.
-It is built on top of a file system
-<TT>other</TT>
-that is mounted on the file server
-<TT>emelie</TT>.
-The four directories in
-this scratch directory
-are
-<TT>log</TT>,
-<TT>prob</TT>,
-<TT>queue</TT>,
-and
-<TT>tmp</TT>.
-<I>Lp</I>
-binds (see
-<A href="/magic/man2html/1/bind"><I>bind</I>(1))
-</A>the first three into the directory
-<TT>/sys/lib/lp</TT>
-for its processes and their children.
-The
-<TT>tmp</TT>
-directory is bound to the
-<TT>/tmp</TT>
-directory so that the lp daemons, which run as user `none',
-may write into this directory.
-</P>
-<P>
-On any new installation, it is important that these directories
-be set up and that the
-<I>/rc/bin/lp</I>
-command be editted to reflect the change.
-If you do not have a scratch filesystem for these directories,
-create the four directories
-<TT>log</TT>,
-<TT>prob</TT>,
-<TT>queue</TT>,
-and
-<TT>tmp</TT>
-in
-<TT></TT>LPLIB<TT>
-</TT><TT>(/sys/lib/lp)</TT><TT>
-so that they are writable by anyone.
-</P>
-</TT><H4>6.1 Log directory
-</H4>
-<P>
-The log files for a particular
-<I>printer</I>
-appear in a subdirectory of the spool directory
-<TT>log</TT>/<I>printer</I>.
-There are currently two types of log files.
-One is for the daemon to log errors and successful completions
-of jobs.
-These are named
-<I>printer.day</I>
-where
-<I>day</I>
-is the three letter abbreviation for the day of the week.
-These are overwritten once a week to avoid the need for regular
-cleanup.
-The other type of log file contains the status of the printer and
-is written by the program that communicates with the printer itself.
-These are named
-<I>printer</I>.<TT>st</TT>.
-These are overwritten with each new job and are saved in the
-<TT></TT><I>LPLIB/prob</I><TT>
-directory along with the job under circumstances described below.
-When a printer does not appear to be functioning these files are the
-place to look first.
-</P>
-</TT><H4>6.2 Prob directory
-</H4>
-<P>
-When a job fails to produce output,
-the log files should be checked for any obvious problems.
-If none can be found, a directory with full read and write permissions
-should be created with the name of the printer in the
-<TT></TT>LPLIB/prob<TT>
-directory.
-Subsequent failure of a job will cause the daemon to leave a
-copy of the job and the printer communication log in
-</TT><TT></TT><I>LPLIB/prob/</I><TT></TT><I>printer</I><TT>
-directory.
-It is common for a printer to enter states from which
-it cannot be rescued except by manually cycling the power on the printer.
-After this is done the print daemon should recover by itself
-(give it a minute).
-If it does not recover, remove the
-</TT><TT>LOCK</TT><TT>
-file from the printer's spool directory to kill the daemon.
-The daemon will have to be restarted by sending another job
-to the printer.
-For PostScript printers just use:
-<DL><DT><DD><TT><PRE>
-echo '%!PS' | lp
-</PRE></TT></DL>
-</P>
-</TT><H4>6.3 Repairing Stuck Daemons
-</H4>
-<P>
-There are conditions that occur which are not handled
-by the daemons.
-One such problem can only be described as the printer entering a
-comatose state.
-The printer does not respond to any messages sent to it.
-The daemon should recover from the reset and an error message
-will appear in the log files.
-If all else fails, one can kill the first job in the queue
-or remove the
-<TT>LOCK</TT>
-file from the queue directory.
-This will kill the daemon, which will have to be restarted.
-</P>
-<H4>7 Interprocessor Communication
-</H4>
-<P>
-A Plan 9 CPU server can be set up as a printer's spooling host.
-That is, the machine where jobs are spooled and from which those jobs
-are sent directly to the printer.
-To do this, the CPU must listen on TCP port 515 which is the well known
-port for the BSD line printer daemon.
-The file
-<TT>/rc/bin/service/tcp515</TT>
-is executed when a call comes in on that port.
-The Plan 9
-<TT>lpdaemon</TT>
-will accept jobs sent from BSD LPR/LPD systems.
-The
-<TT>/</TT>cputype/bin/aux/lpdaemon<TT>
-command is executed from the service call and it accepts print jobs, requests for status,
-and requests to kill jobs.
-The command
-</TT><TT>/</TT><I>cputype/bin/aux/lpsend</I><TT>
-is used to send jobs
-to other Plan 9 machines and is usually called from
-within a spooler or daemon script.
-The command
-</TT><TT>/</TT><TT>cputype/bin/aux/lpdsend</TT><TT>
-is used to send jobs
-to machines and printers that use the BSD LPR/LPD protocol and is also usually called from
-within a spooler or daemon script.
-</P>
-</TT><H4>8 Acknowledgements
-</H4>
-<P>
-Special thanks to Rich Drechsler for supplying and maintaining most of
-the PostScript translation and interface programs,
-without which
-<I>lp</I>
-would be an empty shell.
-Tomas Rokicki provided the
-TeX
-to PostScript
-translation program.
-</P>
-<H4>9 References
-</H4>
-<br>&#32;<br>
-[Camp86] Ralph Campbell,
-``4.3BSD Line Printer Spooler Manual'', UNIX System Manager's Manual,
-May, 1986, Berkeley, CA
-<br>
-[RFC1179] Request for Comments: 1179, Line Printer Daemon Protocol, Aug 1990
-<br>
-[Sys5] System V manual, date unknown
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1594
sys/doc/mk.html

@@ -1,1594 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Maintaining Files on Plan 9 with Mk
-</H1>
-<DL><DD><I>Andrew G. Hume<br>
-andrew@research.att.com<br>
-Bob Flandrena<br>
-bobf@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<P>
-<TT>Mk</TT>
-is a tool
-for describing and maintaining dependencies between
-files.
-It is similar to the
-UNIX program
-<TT>make</TT>,
-but provides several extensions.
-<TT>Mk</TT>'<TT>s</TT>
-flexible rule specifications, implied
-dependency derivation, and parallel
-execution of maintenance actions are
-well-suited to the Plan 9 environment.
-Almost all Plan 9 maintenance procedures
-are automated using
-<TT>mk</TT>.
-</DL>
-</P>
-<H4>1 Introduction
-</H4>
-<P>
-This document describes how
-<TT>mk</TT>,
-a program functionally similar to
-<TT>make</TT>
-[Feld79],
-is used to maintain dependencies between
-files in Plan 9.
-<TT>Mk</TT>
-provides several extensions to the
-capabilities of its predecessor that work
-well in Plan 9's distributed, multi-architecture
-environment.  It
-exploits the power of multiprocessors by executing
-maintenance actions in parallel and interacts with
-the Plan 9 command interpreter
-<TT>rc</TT>
-to provide a powerful set of maintenance tools.
-It accepts pattern-based dependency specifications
-that are not limited to describing
-rules for program construction.
-The result is a tool that is flexible enough to
-perform many maintenance tasks including
-database maintenance,
-hardware design, and document production.
-</P>
-<P>
-This document begins by discussing 
-the syntax of the control file,
-the pattern matching capabilities, and
-the special rules for maintaining archives.
-A brief description of
-<TT>mk</TT>'<TT>s</TT>
-algorithm for deriving dependencies
-is followed by a discussion
-of the conventions used to resolve ambiguous
-specifications.  The final sections
-describe parallel execution
-and special features.
-</P>
-<P>
-An earlier paper [Hume87]
-provides a detailed discussion of
-<TT>mk</TT>'<TT>s</TT>
-design and an appendix summarizes
-the differences between
-<TT>mk</TT>
-and
-<TT>make</TT>.
-</P>
-<H4>2 The <TT>Mkfile</TT>
-</H4>
-<P>
-<TT>Mk</TT>
-reads a file describing relationships among files
-and executes commands to bring the files up to date.
-The specification file, called a
-<TT>mkfile</TT>,
-contains three types of statements:
-assignments, includes, and rules.
-Assignment and include statements are similar
-to those in C.
-Rules specify dependencies between a
-<I>target</I>
-and its
-<I>prerequisites</I>.
-When the target and prerequisites are files, their
-modification times determine if they
-are out of date.  Rules often contain a
-<I>recipe</I>,
-an
-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
-</A>script that produces the target from
-the prerequisites.
-</P>
-<P>
-This simple
-<TT>mkfile</TT>
-produces an executable
-from a C source file:
-<DL><DT><DD><TT><PRE>
-CC=pcc
-f1:	f1.c
-	<I>CC -o f1 f1.c
-</PRE></TT></DL>
-The first line assigns the name of the portable ANSI/POSIX compiler
-to the
-</I><TT>mk</TT><I>
-variable
-</I><TT>CC</TT><I>;
-subsequent references of the form
-</I><TT></TT><I>CC</I><TT>
-select this compiler.
-The only rule specifies a dependence between the target file
-</TT><TT>f1</TT><TT>
-and the prerequisite file
-</TT><TT>f1.c</TT><TT>.
-If the target does not exist or if the
-prerequisite has been modified more recently than
-the target,
-</TT><TT>mk</TT><TT>
-passes the recipe to
-</TT><TT>rc</TT><TT>
-for execution.  Here,
-</TT><TT>f1.c</TT><TT>
-is compiled and loaded to produce
-</TT><TT>f1</TT><TT>.
-</P>
-</TT><P>
-The native Plan 9 environment
-requires executables for
-all architectures, not only the current one.
-The Plan 9 version of the same
-<TT>mkfile</TT>
-looks like:
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile
-
-f1:	f1.</I>O
-	<I>LD </I>LDFLAGS -o f1 f1.<I>O
-f1.</I>O:	f1.c
-	<I>CC </I>CFLAGS f1.c
-</PRE></TT></DL>
-The first line is an include statement
-that replaces itself with the contents of the file
-<TT>/</TT><I>objtype/mkfile</I><TT>.
-The variable
-</TT><TT></TT><TT>objtype</TT><TT>
-is inherited from the environment and
-contains the name of the target architecture.
-The prototype
-</TT><TT>mkfile</TT><TT>
-for that architecture defines architecture-specific variables:
-</TT><TT>CC</TT><TT>
-and
-</TT><TT>LD</TT><TT>
-are the names of the compiler and loader,
-</TT><TT>O</TT><TT>
-is the code character of the architecture.
-The rules compile the source file into an object
-file and invoke the loader to produce
-</TT><TT>f1</TT><TT>.
-Invoking
-</TT><TT>mk</TT><TT>
-from the command line as follows
-<DL><DT><DD><TT><PRE>
-% objtype=mips mk
-vc -w f1.c
-vl </TT><I>LDFLAGS -o f1 f1.k
-%
-</PRE></TT></DL>
-produces the
-</I><TT>mips</TT><I>
-executable of program
-</I><TT>f1</TT><I>
-regardless of the current architecture type.
-</P>
-</I><P>
-We can extend the
-<TT>mkfile</TT>
-to build two programs:
-<DL><DT><DD><TT><PRE>
-&lt;/objtype/mkfile
-ALL=f1 f2
-
-all:V:	<I>ALL
-
-f1:	f1.</I>O
-	<I>LD </I>LDFLAGS -o f1 f1.<I>O
-f1.</I>O:	f1.c
-	<I>CC </I>CFLAGS f1.c
-f2:	f2.<I>O
-	</I>LD <I>LDFLAGS -o f2 f2.</I>O
-f2.<I>O:	f2.c
-	</I>CC <I>CFLAGS f2.c
-</PRE></TT></DL>
-The target
-</I><TT>all</TT><I>,
-modified by the
-</I><I>attribute</I><I>
-</I><TT>V</TT><I>,
-builds both programs.
-The attribute identifies 
-</I><TT>all</TT><I>
-as a dummy target that is
-not related to a file of the same name;
-its precise effect is explained later.
-This example describes cascading dependencies:
-the first target depends on another which depends on a third and
-so on.
-Here, individual rules build each
-program; later we'll see how to do this with a
-general rule.
-</P>
-</I><H4>3 Variables and the environment
-</H4>
-<P>
-<TT>Mk</TT>
-does not distinguish between its
-internal variables and
-<TT>rc</TT>
-variables in the environment.
-When
-<TT>mk</TT>
-starts, it imports each environment variable into a
-<TT>mk</TT>
-variable of the same name.  Before executing a recipe,
-<TT>mk</TT>
-exports all variables, including those
-inherited from the environment,
-to the environment in which
-<TT>rc</TT>
-executes the recipe.
-</P>
-<P>
-There are several ways for a
-variable to take a value.
-It can be set with an assignment statement,
-inherited from the environment, or specified
-on the command line.
-<TT>Mk</TT>
-also maintains several special internal variables
-that are described in
-<A href="/magic/man2html/1/mk"><I>mk</I>(1).
-</A>Assignments have the following decreasing order of precedence:
-</P>
-<br>&#32;<br>
-<DL><DT><DD>
-1)  Command line assignment
-<br>
-2)  Assignment statement
-<br>
-3)  Imported from the environment
-<br>
-4)  Implicitly set by <TT>mk</TT>
-</DL>
-<br>&#32;<br>
-For example, a command line assignment overrides
-a value imported from the environment.
-<P>
-All variable values are strings.  They can be
-used for pattern matching and
-comparison but not for arithmetic.
-A
-<I>list</I>
-is a string containing several values separated by
-white space.  Each member is
-handled individually during pattern matching,
-target selection, and prerequisite evaluation.
-</P>
-<P>
-A
-<I>namelist</I>
-is a list produced by
-transforming the members of an existing list.
-The transform applies a pattern to each member,
-replacing each matched string with a new string,
-much as in the substitute command in
-<A href="/magic/man2html/1/sam"><I>sam</I>(1)
-</A>or
-<A href="/magic/man2html/1/ed"><I>ed</I>(1).
-</A>The syntax is
-<DL><DT><DD><TT><PRE>
-{<I>var</I>:A%B=C%D}
-</PRE></TT></DL>
-where
-<I>var</I>
-is a variable.
-The pattern
-<TT>A%B</TT>
-matches a member beginning with the string
-<I>A</I>
-and ending with the string
-<I>B</I>
-with any string in between;
-it behaves like the regular expression
-<TT>A.*B</TT>.
-When a member of the
-<I>var</I>
-list
-matches this pattern,
-the string
-<I>C</I>
-replaces
-<I>A</I>,
-<I>D</I>
-replaces
-<I>B</I>,
-and the matched string replaces itself.
-Any of
-<I>A</I>,
-<I>B</I>,
-<I>C</I>,
-or
-<I>D</I>
-may be the empty string.  In effect, a namelist is
-generated by applying the
-<A href="/magic/man2html/1/ed"><I>ed</I>(1)
-</A>substitute command
-<DL><DT><DD><TT><PRE>
-	s/<I>A</I>(.*)<I>B</I>/<I>C</I>\1<I>D</I>/
-</PRE></TT></DL>
-to each member of a variable list.
-</P>
-<P>
-Namelists are useful for generating
-a list based on a predictable transformation.
-For example,
-<DL><DT><DD><TT><PRE>
-	SRC=a.c b.c c.c
-	OBJ=<I>{SRC:%.c=%.v}
-</PRE></TT></DL>
-assigns the list </I><TT>(a.v b.v c.v)</TT><I> to
-</I><TT>OBJ</TT><I>.
-A namelist may be used anywhere a variable is allowed
-except in a recipe.
-</P>
-</I><P>
-Command output is assigned to a variable
-using the normal
-<TT>rc</TT>
-syntax:
-<DL><DT><DD><TT><PRE>
-	var=`{rc command}
-</PRE></TT></DL>
-The command executes in an environment populated
-with previously assigned variables, including those
-inherited from
-<TT>mk</TT>'<TT>s</TT>
-execution environment.
-The command may
-be arbitrarily complex; for example,
-<DL><DT><DD><TT><PRE>
-	TARG=`{ls -d *.[cy] | sed 's/..//'}
-</PRE></TT></DL>
-assigns a list of the C and yacc source files in the current
-directory, stripped of their suffix, to the variable
-<TT>TARG</TT>.
-</P>
-<H4>4 The include statement
-</H4>
-<P>
-The include statement
-replaces itself with the contents of a file.
-It is functionally similar to the C
-<TT>#include</TT>
-statement but uses a different syntax:
-<DL><DT><DD><TT><PRE>
-	&lt;<I>filename</I>
-</PRE></TT></DL>
-The contents of the file are evaluated
-as they are read.
-An include statement may be used anywhere except
-in a recipe.
-</P>
-<P>
-Unlike
-<TT>make</TT>,
-<TT>mk</TT>
-has no built-in rules.  Instead,
-the include statement allows generic rules
-to be imported from a prototype
-<TT>mkfile</TT>;
-most Plan 9
-<TT>mkfiles</TT>
-use this approach [Flan95].
-</P>
-<H4>5 Rules
-</H4>
-<P>
-A rule has four elements: targets,
-prerequisites, attributes, and a recipe.
-It has the form:
-<DL><DT><DD><TT><PRE>
-<I>targets</I>:<I>attributes</I>:<I>prerequisites</I>
-	<I>recipe</I>
-</PRE></TT></DL>
-The first line, containing the
-targets, attributes, and prerequisites is
-the
-<I>rule header</I>;
-it
-must begin at the left margin.
-The recipe contains zero or more lines,
-each of which begins with white space.
-One or more targets must be specified but the
-attributes, prerequisites, and recipe are optional.
-A rule specifies
-a dependency between the target(s) and its prerequisite(s),
-the recipe brings the target(s)
-up to date with the prerequisite(s) and
-attributes modify
-<TT>mk</TT>'<TT>s</TT>
-evaluation of the dependency.
-</P>
-<P>
-Normally the target is a file that depends
-on one or more prerequisite files.
-<TT>Mk</TT>
-compares the modification times of each target
-and each prerequisite; a target is considered out of date
-when it does not exist or when a prerequisite has been modified
-more recently.
-When a target is out of date,
-<TT>mk</TT>
-executes the
-recipe to bring it up to date.
-When the recipe completes,
-the modification time of the target is checked and
-used in later dependency evaluations.
-If the recipe does not update the target,
-evaluation continues with the out of date target.
-</P>
-<P>
-A prerequisite of one rule
-may be the target of another.  When
-this happens, the rules cascade
-to define a multi-step procedure.
-For example,
-an executable target depends on prerequisite
-object files, each of which is a target
-in a rule with a C source file as the prerequisite.
-<TT>Mk</TT>
-follows a chain of dependencies until it encounters
-a prerequisite that is not a target of another rule
-or it finds a target that
-is up to date.  It then
-executes the recipes in reverse order to produce
-the desired target.
-</P>
-<P>
-The rule header is evaluated when the rule is read.
-Variables are replaced by their values, namelists are
-generated, and
-commands are replaced by their
-output at this time.
-</P>
-<P>
-Most attributes modify
-<TT>mk</TT>'<TT>s</TT>
-evaluation of a rule.
-An attribute is usually a single letter but some
-are more complicated.
-This paper only discusses commonly used attributes;
-see
-<A href="/magic/man2html/1/mk"><I>mk</I>(1)
-</A>for a complete list.
-</P>
-<P>
-The
-<TT>V</TT>
-attribute identifies a
-<I>virtual</I>
-target;
-that is, a target that is not a file.
-For example,
-<DL><DT><DD><TT><PRE>
-clean:V:
-	rm *.<I>O </I>O.out
-</PRE></TT></DL>
-removes executables and compiler intermediate files.
-The target is virtual because it does not refer to a file named
-<TT>clean</TT>.
-Without the attribute, the recipe would not be
-executed if a file named
-<TT>clean</TT>
-existed.
-The
-<TT>Q</TT>
-attribute
-silences the printing of a recipe before
-execution.
-It is useful when the output of a recipe is
-similar to the recipe:
-<DL><DT><DD><TT><PRE>
-default:QV:
-	echo 'No default target; use mk all or mk install'
-</PRE></TT></DL>
-</P>
-<P>
-The recipe is an
-<TT>rc</TT>
-script.  It is optional but when it is
-missing, the rule is handled specially, as described later.
-Unlike
-<TT>make</TT>,
-<TT>mk</TT>
-executes recipes without interpretation.
-After
-stripping the first white space character from each line
-it passes the entire recipe to
-<TT>rc</TT>
-on standard input.
-Since
-<TT>mk</TT>
-does not interpret a recipe,
-escape conventions are exactly those of
-<TT>rc</TT>.
-Scripts for
-<TT>awk</TT>
-and
-<TT>sed</TT>
-commands can be embedded exactly as they would
-be entered from the command line.
-<TT>Mk</TT>
-invokes
-<TT>rc</TT>
-with the
-<TT>-e</TT>
-flag, which causes
-<TT>rc</TT>
-to stop if any command
-in the recipe exits with a non-zero status; the
-<TT>E</TT>
-attribute overrides this behavior and allows
-<TT>rc</TT>
-to continue executing in the face of errors.
-Before a recipe is executed, variables are exported
-to the environment where they are available to
-<TT>rc</TT>.
-Commands in the recipe may not read from
-standard input because
-<TT>mk</TT>
-uses it internally.
-</P>
-<P>
-References to a variable can yield different
-values depending on the location of the
-reference in the
-<TT>mkfile</TT>.
-<TT>Mk</TT>
-resolves variable references
-in assignment statements and rule headers
-when the statement is read.  Variable references
-in recipes are evaluated by
-<TT>rc</TT>
-when the recipe is executed; this
-happens after the entire
-<TT>mkfile</TT>
-has been read.  The value of a variable in a recipe
-is the last value assigned in the file.  For example,
-<DL><DT><DD><TT><PRE>
-STRING=all
-
-all:VQ:
-	echo <I>STRING
-STRING=none
-</PRE></TT></DL>
-produces the message
-</I><TT>none</TT><I>.
-A variable assignment in a recipe
-does not affect the value of the variable in the
-</I><TT>mkfile</TT><I>
-for two reasons.
-First,
-</I><TT>mk</TT><I>
-does not import values from
-the environment when a recipe completes;
-one recipe cannot pass a value through
-the environment to another recipe.
-Second, no recipe is executed until 
-</I><TT>mk</TT><I>
-has completed its evaluation, so even if a variable
-were changed,
-it would not affect the dependency evaluation.
-</P>
-</I><H4>6 Metarules
-</H4>
-<P>
-A
-<I>metarule</I>
-is a rule based on a pattern.
-The pattern selects a class of target(s) and 
-identifies related prerequisites.
-<TT>Mk</TT>
-metarules may select targets and prerequisites
-based on any criterion that can be described by a pattern, not just
-the suffix transformations associated with program
-construction.
-</P>
-<P>
-Metarule patterns are either
-<I>intrinsic</I>
-or regular expressions conforming to the
-syntax of
-<A href="/magic/man2html/6/regexp"><I>regexp</I>(6).
-</A>The intrinsic patterns are shorthand
-for common regular expressions.
-The intrinsic pattern
-<TT>%</TT>
-matches one or more of anything; it is equivalent to
-the regular expression
-<TT>`.+'</TT>.
-The other intrinsic pattern,
-<TT>&</TT>,
-matches one or more of any characters except <TT>`/'</TT>
-and <TT>`.'</TT>.
-It matches a portion of a path and is
-equivalent to the regular expression
-<TT>`[^./]+'</TT>.
-An intrinsic pattern in a prerequisite references
-the string matched by the same intrinsic pattern in the target.
-For example, the rule
-<DL><DT><DD><TT><PRE>
-	%.v:	%.c
-</PRE></TT></DL>
-says that a file ending in
-<TT>.v</TT>
-depends on a file of the same name with a
-<TT>.c</TT>
-suffix:
-<TT>foo.v</TT>
-depends on
-<TT>foo.c</TT>,
-<TT>bar.v</TT>
-depends on
-<TT>bar.c</TT>,
-and so on.
-The string matched by an intrinsic pattern in the target
-is supplied to the recipe in the variable
-<TT></TT>stem<TT>.
-Thus the rule
-<DL><DT><DD><TT><PRE>
-%.</TT><I>O:	%.c
-	</I><TT>CC </TT><I>CFLAGS </I><TT>stem.c
-</PRE></TT></DL>
-creates an object file for the target architecture from
-a similarly named C source file.  If several object
-files are out of date, the rule is applied repeatedly and
-</TT><TT></TT><I>stem</I><TT>
-refers to each file in turn.
-Since there is only one
-</TT><TT>stem</TT><TT>
-variable, there can only be one
-</TT><TT>%</TT><TT>
-or
-</TT><TT>&</TT><TT>
-pattern in a target;
-the pattern
-</TT><TT>%-%.c</TT><TT>
-is illegal.
-</P>
-</TT><P>
-Metarules simplify the
-<TT>mkfile</TT>
-for building programs
-<TT>f1</TT>
-and
-<TT>f2</TT>:
-<DL><DT><DD><TT><PRE>
-&lt;/objtype/mkfile
-
-ALL=f1 f2
-
-all:V:	<I>ALL
-
-%:	%.</I>O
-	<I>LD -o </I>target <I>prereq
-%.</I>O:	%.c
-	<I>CC </I>CFLAGS <I>stem.c
-clean:V:
-	rm -f </I>ALL *.[<I>OS]
-</PRE></TT></DL>
-(The variable
-</I><TT></TT><I>OS</I><TT>
-is a list of code characters for all architectures.)
-Here, metarules specify
-compile and load steps for all C source files.
-The loader rule relies on two internal variables
-set by
-</TT><TT>mk</TT><TT>
-during evaluation of the rule:
-</TT><TT></TT><I>target</I><TT>
-is the name of the target(s) and
-</TT><TT></TT><TT>prereq</TT><TT>
-the name of all prerequisite(s).
-Metarules allow this
-</TT><TT>mkfile</TT><TT>
-to be easily extended; a new program
-is supported by adding its name to the third line.
-</P>
-</TT><P>
-A regular expression metarule must have an
-<TT>R</TT>
-attribute.
-Prerequisites may reference matching substrings in
-the target using the form
-<TT>\</TT><I>n</I><TT></TT><I>
-where
-</I><I>n</I><I>
-is a digit from 1 to 9 specifying the
-</I><I>n</I><I>th
-parenthesized sub-expression.  In a recipe,
-</I><TT></TT><I>stem</I><I>n</I><I></I><I>
-is the equivalent reference.
-For example, a compile rule could be
-specified using regular expressions:
-<DL><DT><DD><TT><PRE>
-(.+)\.</I>O:R:	\1.c
-	<I>CC </I>CFLAGS <I>stem1.c
-</PRE></TT></DL>
-Here,
-</I><TT>\1</TT><I>
-and
-</I><TT></TT><I>stem1</I><TT>
-refer to the name of the target object file without the
-suffix.  The variable
-</TT><TT></TT><I>stem</I><TT>
-associated with an intrinsic pattern is undefined
-in a regular expression metarule.
-</P>
-</TT><H4>7 Archives
-</H4>
-<P>
-<TT>Mk</TT>
-provides a special mechanism for maintaining an archive.
-An archive member is referenced using the form
-<TT></TT><I>lib</I><TT>(</TT><I>file</I><TT>)</TT><I>
-where
-</I><I>lib</I><I>
-is the name of the archive and 
-</I><I>file</I><I>
-is the name of the member.  Two rules define the
-dependency between an object file and its membership
-in an archive:
-<DL><DT><DD><TT><PRE>
-</I>LIB(foo.8):N:	foo.8
-<I>LIB:	</I>LIB(foo.8)
-	ar rv <I>LIB foo.8
-</PRE></TT></DL>
-The first rule establishes a dependency between the
-archive member and the object file.
-Normally,
-</I><TT>mk</TT><I>
-detects an error when a target does not exist and the rule
-contains no recipe; the
-</I><TT>N</TT><I>
-attribute overrides this behavior because the subsequent rule
-updates the member.
-The second
-rule establishes the dependency between the member and
-the archive; its recipe inserts the member
-into the archive.
-This two-step specification allows the modification time
-of the archive
-to represent the state of its members.  Other rules
-can then specify the archive as a prerequisite instead of
-listing each member.
-</P>
-</I><P>
-A metarule generalizes library maintenance:
-<DL><DT><DD><TT><PRE>
-LIB=lib.a
-OBJS=etoa.O atoe.<I>O ebcdic.</I>O
-
-<I>LIB(%):N:	%
-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
-	ar rv <I>LIB </I>OBJS
-</PRE></TT></DL>
-The namelist prerequisite of the
-<TT></TT><I>LIB</I><TT>
-target generates archive member names for each object file name;
-for example, 
-</TT><TT>etoa.</TT><TT>O</TT><TT>
-becomes
-</TT><TT>lib.a(etoa.</TT><I>O)</I><TT>.
-This formulation always updates all members.
-This is acceptable for a small archive, but may 
-be slow for a big one.
-The rule
-<DL><DT><DD><TT><PRE>
-</TT><I>LIB:	</I><I>{OBJS:%=</I><I>LIB(%)}
-	ar rv </I><I>LIB `{membername </I><I>newprereq}
-</PRE></TT></DL>
-only updates out of date object files.
-The internal variable
-</I><TT></TT><I>newprereq</I><TT>
-contains the names of the out of
-date prerequisites.  The
-</TT><TT>rc</TT><TT>
-script
-</TT><TT>membername</TT><TT>
-transforms an archive member specification into a file name:
-it translates
-</TT><TT>lib.a(etoa.</TT><TT>O)</TT><TT>
-into
-</TT><TT>etoa.</TT><I>O</I><TT>.
-</P>
-</TT><P>
-The
-<TT>mkfile</TT>
-<DL><DT><DD><TT><PRE>
-&lt;/objtype/mkfile
-LIB=lib.a
-OBJS=etoa.<I>O atoe.</I>O ebcdic.<I>O
-
-prog:	main.</I>O <I>LIB
-	</I>LD -o <I>target </I>prereq
-
-<I>LIB(%):N:	%
-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
-	ar rv <I>LIB </I>OBJS
-</PRE></TT></DL>
-builds a program by loading it with a library.
-</P>
-<H4>8 Evaluation algorithm
-</H4>
-<P>
-For each target of interest,
-<TT>mk</TT>
-uses the rules in a
-<TT>mkfile</TT>
-to build a data
-structure called a dependency graph.  The nodes of
-the graph represent targets and prerequisites;
-a directed arc
-from one node to another indicates that
-the file associated with the first node depends
-on the file associated with the second.
-When the
-<TT>mkfile</TT>
-has been completely read, the graph is analyzed.
-In the first step, implied dependencies are resolved by
-computing the
-<I>transitive closure</I>
-of the graph.
-This calculation extends the graph to include all
-targets that are potentially
-derivable from the rules in the
-<TT>mkfile</TT>.
-Next the graph is checked for cycles;
-<TT>make</TT>
-accepts cyclic dependencies, but
-<TT>mk</TT>
-does not allow them.
-Subsequent steps
-prune subgraphs that are irrelevant for producing the
-desired target and verify that there is only one way
-to build it.
-The recipes associated with the
-nodes on the longest path between the
-target and an out of date prerequisite
-are then executed in reverse order.
-</P>
-<P>
-The transitive closure calculation is sensitive to
-metarules; the patterns often select many potential targets
-and cause the graph to grow rapidly.
-Fortunately,
-dependencies associated with the desired target
-usually form a small part of the graph, so, after
-pruning, analysis is tractable.
-For example, the rules
-<DL><DT><DD><TT><PRE>
-%:	x.%
-	recipe1
-x.%:	%.k
-	recipe2
-%.k:	%.f
-	recipe3
-</PRE></TT></DL>
-produce a graph with four nodes for each file in the
-current directory.
-If the desired target is
-<TT>foo</TT>,
-<TT>mk</TT>
-detects the dependency between it
-and the original file
-<TT>foo.f</TT>
-through intermediate dependencies on
-<TT>foo.k</TT>
-and
-<TT>x.foo</TT>.
-Nodes associated with other files are deleted during pruning because
-they are irrelevant to the production of
-<TT>foo</TT>.
-</P>
-<P>
-<TT>Mk</TT>
-avoids infinite cycles by evaluating
-each metarule once.
-Thus, the rule
-<DL><DT><DD><TT><PRE>
-%:	%.z
-	cp <I>prereq </I>prereq.z
-</PRE></TT></DL>
-copies the prerequisite file once.
-</P>
-<H4>9 Conventions for evaluating rules
-</H4>
-<P>
-There must be only one
-way to build each target.  However, during evaluation
-metarule patterns often select potential targets that
-conflict with the
-targets of other rules.
-<TT>Mk</TT>
-uses several conventions to resolve ambiguities
-and to select the proper dependencies.
-</P>
-<P>
-When a target selects more than one rule,
-<TT>mk</TT>
-chooses a regular rule
-over a metarule.
-For example, the
-<TT>mkfile</TT>
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile
-
-FILES=f1.</I>O f2.<I>O f3.</I>O
-
-prog:	<I>FILES
-	</I>LD -o <I>target </I>prereq
-
-%.<I>O:	%.c
-	</I>CC <I>CFLAGS </I>stem.c
-
-f2.<I>O:	f2.c
-	</I>CC f2.c
-</PRE></TT></DL>
-contains two rules that could build
-<TT>f2.</TT><I>O</I><TT>.
-</TT><TT>Mk</TT><TT>
-selects the last rule because its target,
-</TT><TT>f2.</TT><TT>O</TT><TT>,
-is explicitly specified, while the 
-</TT><TT>%.</TT><I>O</I><TT>
-rule is a metarule.  In effect,
-the explicit rule for
-</TT><TT>f2.</TT><TT>O</TT><TT>
-overrides the general rule for building object files from
-C source files.
-</P>
-</TT><P>
-When a rule has a target and prerequisites but no recipe,
-those prerequisites are added to all other rules with
-recipes that have the same target.
-All prerequisites, regardless of where they were specified, are
-exported to the recipe in variable
-<TT></TT><I>prereq</I><TT>.
-For example, in
-<DL><DT><DD><TT><PRE>
-&lt;/</TT>objtype/mkfile
-
-FILES=f1.<I>O f2.</I>O f3.<I>O
-
-prog:	</I>FILES
-	<I>LD -o </I>target <I>prereq
-
-%.</I>O:	hdr.h
-
-%.<I>O:	%.c
-	</I>CC <I>CFLAGS </I>stem.c
-</PRE></TT></DL>
-the second rule adds
-<TT>hdr.h</TT>
-as a prerequisite of the compile metarule;
-an object file produced from a C source file
-depends on
-<TT>hdr.h</TT>
-as well as the source file.  Notice that the recipe of 
-the compile rule uses
-<TT></TT><I>stem.c</I><TT>
-instead of
-</TT><TT></TT><TT>prereq</TT><TT>
-because the latter specification would attempt to compile
-</TT><TT>hdr.h</TT><TT>.
-</P>
-</TT><P>
-When a target is virtual and there is no other rule with
-the same target,
-<TT>mk</TT>
-evaluates each prerequisite.
-For example, adding the rule
-<DL><DT><DD><TT><PRE>
-all:V:	prog
-</PRE></TT></DL>
-to the preceding example builds the executable
-when either
-<TT>prog</TT>
-or
-<TT>all</TT>
-is the specified target.  In effect, the
-<TT>all</TT>
-target is an alias for
-<TT>prog</TT>.
-</P>
-<P>
-When two rules have identical rule headers and both have
-recipes, the later rule replaces the former one.
-For example,
-if a file named
-<TT>mkrules</TT>
-contains
-<DL><DT><DD><TT><PRE>
-<I>O.out:	</I>OFILES
-	<I>LD </I>LFLAGS <I>OFILES
-%.</I>O:	%.c
-	<I>CC </I>CFLAGS <I>stem.c
-</PRE></TT></DL>
-the
-</I><TT>mkfile</TT><I>
-<DL><DT><DD><TT><PRE>
-OFILES=f1.</I>O f2.<I>O f3.</I>O
-
-&lt;mkrules
-
-<I>O.out:	</I>OFILES
-	<I>LD </I>LFLAGS -l <I>OFILES -lbio -lc
-</PRE></TT></DL>
-overrides the general loader rule with a special
-rule using a non-standard library search sequence.
-A rule is neutralized by overriding it with a rule
-with a null recipe:
-<DL><DT><DD><TT><PRE>
-&lt;mkrules
-
-</I>O.out:Q:	<I>OFILES
-	;
-</PRE></TT></DL>
-The
-</I><TT>Q</TT><I>
-attribute suppresses the printing of the semicolon.
-</P>
-</I><P>
-When a rule has no prerequisites, the recipe is executed
-only when the target does not exist.  For example,
-<DL><DT><DD><TT><PRE>
-marker:
-	touch target
-</PRE></TT></DL>
-defines a rule to manage a marker file.
-If the file exists, it is considered up to date
-regardless of its modification time.
-When a virtual target has no prerequisites the
-recipe is always executed.
-The
-<TT>clean</TT>
-rule is of this type:
-<DL><DT><DD><TT><PRE>
-clean:V:
-	rm -f [<I>OS].out *.[</I>OS]
-</PRE></TT></DL>
-When a rule without prerequisites has multiple targets, the
-extra targets are aliases for the rule.
-For example, in
-<DL><DT><DD><TT><PRE>
-clean tidy nuke:V:
-	rm -f [<I>OS].out *.[</I>OS]
-</PRE></TT></DL>
-the
-rule can be invoked by any of three names.
-The first rule in a
-<TT>mkfile</TT>
-is handled specially:
-when
-<TT>mk</TT>
-is invoked without a command line target
-all targets of the first non-metarule are built.
-If that rule has multiple targets, the recipe
-is executed once for each target; normally, the recipe
-of a rule with multiple targets is only executed once.
-</P>
-<P>
-A rule applies to a target only when its prerequisites
-exist or can be derived.  More than one rule may have the
-same target as long as only one rule with a recipe
-remains applicable after the dependency evaluation completes.
-For example, consider a program built from C
-and assembler source files.  Two rules produce
-object files:
-<DL><DT><DD><TT><PRE>
-%.<I>O:	%.c
-	</I>CC <I>CFLAGS </I>stem.c
-%.<I>O:	%.s
-	</I>AS <I>AFLAGS </I>stem.s
-</PRE></TT></DL>
-As long as there are not two source files with names like
-<TT></TT><I>foo</I><TT>.c</TT><I>
-and
-</I><TT></TT><I>foo</I><TT>.s</TT><I>,
-</I><TT>mk</TT><I>
-can unambiguously select the proper rule.
-If both files exist,
-the rules are ambiguous
-and
-</I><TT>mk</TT><I>
-exits with an error message.
-</P>
-</I><P>
-In Plan 9, many programs consist of portable code stored
-in one directory and architecture-specific source stored in
-another.
-For example, the
-<TT>mkfile</TT>
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile
-
-FILES=f1.</I>O f2.<I>O f3.</I>O f3.<I>O
-
-prog:	</I>FILES
-	<I>LD -o </I>target <I>prereq
-
-%.</I>O:	%.<I>c
-	</I>CC <I>CFLAGS </I>stem.c
-
-%.<I>O:	../port/%.c
-	</I>CC <I>CFLAGS ../port/</I>stem.c
-</PRE></TT></DL>
-builds the program named
-<TT>prog</TT>
-using portable code in directory
-<TT>../port</TT>
-and architecture-specific code in the current directory.
-As long as the
-names of the C source files in 
-<TT>../port</TT>
-do not conflict with the names of files in the current directory,
-<TT>mk</TT>
-selects the appropriate rule to build the object file.
-If like-named files exist in both directories, the
-specification is ambiguous and an explicit target
-must be specified to resolve the ambiguity.
-For example,
-adding the rule
-<DL><DT><DD><TT><PRE>
-f2.<I>O:	f2.c
-	</I>CC <I>CFLAGS </I>f2.c
-</PRE></TT></DL>
-to the previous
-<TT>mkfile</TT>
-uses the architecture-specific version of
-<TT>f2.c</TT>
-instead of the portable one.
-Here, the explicit rule unambiguously
-documents which of the
-like-named source files is used to build the program.
-</P>
-<P>
-<TT>Mk</TT>'<TT></TT>s
-heuristics can produce unintended results
-when rules are not carefully specified.
-For example, the rules that build
-object files from C or assembler source files
-<DL><DT><DD><TT><PRE>
-%.<I>O:	%.c
-	</I>CC <I>CFLAGS </I>stem.c
-%.<I>O:	%.s
-	</I>AS <I>AFLAGS </I>stem.s
-</PRE></TT></DL>
-illustrate a subtle pratfall.
-Adding a header file dependency to the compile rule
-<DL><DT><DD><TT><PRE>
-%.<I>O:	%.c hdr.h
-	</I>CC <I>CFLAGS </I>stem.c
-</PRE></TT></DL>
-produces the error message
-<DL><DT><DD><TT><PRE>
-<TT>don't know how to make '</TT><I>file</I><TT>.c'</TT><I>
-</PRE></TT></DL>
-when </I><I>file</I><I>.s is an assembler
-source file.
-This occurs because 
-</I><TT></TT><I>file</I><TT>.s</TT><I>
-satisfies the assemble rule and
-</I><TT>hdr.h</TT><I>
-satisfies the compile rule, so
-either rule can potentially produce the target.
-When a prerequisite exists or can be
-derived,
-all other prerequisites in that
-rule header must exist or be derivable; here,
-the existence of
-</I><TT>hdr.h</TT><I>
-forces the evaluation of a C source file.
-Specifying the dependencies in different
-rules avoids this interpretation:
-<DL><DT><DD><TT><PRE>
-%.</I><I>O:	hdr.h
-%.</I><I>O:	%.c
-	</I><I>CC </I><I>CFLAGS </I><I>stem.c
-</PRE></TT></DL>
-Although
-</I><TT>hdr.h</TT><I>
-is an additional prerequisite of the compile rule,
-the two rules are evaluated independently and
-the existence of the C source file is not linked
-to the existence of the header file.
-However, this specification describes a different
-dependency.  Originally, only object
-files derived from C files depended on
-</I><TT>hdr.h</TT><I>;
-now all object files, including those built
-from assembler source, depend on the header file.
-</P>
-</I><P>
-Metarule patterns should be as restrictive as possible to
-prevent conflicts with other rules.
-Consider the
-<TT>mkfile</TT>
-<DL><DT><DD><TT><PRE>
-&lt;/objtype/mkfile
-BIN=/<I>objtype/bin
-PROG=foo
-
-install:V:	</I>BIN/<I>PROG
-
-%:	%.c
-	</I>CC <I>stem.c
-	</I>LD -o <I>target </I>stem.<I>O
-
-</I>BIN/%:	%
-	mv <I>stem </I>target
-</PRE></TT></DL>
-The first target builds an executable
-in the local directory; the second
-installs it in the directory
-of executables for the architecture.
-Invoking
-<TT>mk</TT>
-with the
-<TT>install</TT>
-target produces:
-<DL><DT><DD><TT><PRE>
-mk: ambiguous recipes for /mips/bin/foo:
-/mips/bin/foo &lt;-(mkfile:8)- /mips/bin/foo.c &lt;-(mkfile:12)- foo.c
-/mips/bin/foo &lt;-(mkfile:12)- foo &lt;-(mkfile:8)- foo.c
-</PRE></TT></DL>
-The prerequisite of the
-<TT>install</TT>
-rule,
-<TT></TT><I>BIN/</I><TT>PROG</TT><I>,
-matches both metarules because the
-</I><TT>%</TT><I>
-pattern matches everything.
-The
-</I><TT>&</TT><I>
-pattern restricts the compile rule to files in the
-current directory and avoids the conflict:
-<DL><DT><DD><TT><PRE>
-&amp;:	&amp;.c
-	</I><I>CC </I><I>stem.c
-	</I><I>LD -o </I><I>target </I><I>stem.</I><I>O
-</PRE></TT></DL>
-</P>
-</I><H4>10 Missing intermediates
-</H4>
-<P>
-<TT>Mk</TT>
-does not build a missing intermediate file if a target
-is up to date with the prerequisites of the intermediate.
-For example,
-when an executable is up to date with its source file,
-<TT>mk</TT>
-does not compile the source to create a missing object file.
-The evaluation only applies
-when a target is considered up to date by pretending that the
-intermediate exists.  Thus, it does not apply
-when the intermediate is a command line target
-or when it has no prerequisites.
-</P>
-<P>
-This capability is useful for
-maintaining archives.  We can modify the archive
-update recipe to remove object files after
-they are archived:
-<DL><DT><DD><TT><PRE>
-<I>LIB(%):N:	%
-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
-	names=`{membername <I>newprereq}
-	ar rv </I>LIB <I>names
-	rm -f </I>names
-</PRE></TT></DL>
-A subsequent
-<TT>mk</TT>
-does not remake the object files as long as the members
-of the archive remain up to date with the source files.
-The
-<TT>-i</TT>
-command line option overrides this behavior
-and causes all intermediates to be built.
-</P>
-<H4>11 Alternative out-of-date determination
-</H4>
-<P>
-Sometimes the modification time is not useful
-for deciding when a target and prerequisite are out of date.
-The
-<TT>P</TT>
-attribute replaces the default mechanism with the result of
-a command.  The command immediately follows the attribute
-and is repeatedly executed with each
-target and each prerequisite as its arguments;
-if its exit status is non-zero, they are considered out of date
-and the recipe is executed.  Consider the
-<TT>mkfile</TT>
-<DL><DT><DD><TT><PRE>
-foo.ref:Pcmp -s:	foo
-	cp <I>prereq </I>target
-</PRE></TT></DL>
-The command
-<DL><DT><DD><TT><PRE>
-cmp -s foo.ref foo
-</PRE></TT></DL>
-is executed and if 
-<TT>foo.ref</TT>
-differs from
-<TT>foo</TT>,
-the latter file is copied to the former.
-</P>
-<H4>12 Parallel processing
-</H4>
-<P>
-When possible,
-<TT>mk</TT>
-executes recipes in parallel.
-The variable
-<TT></TT><I>NPROC</I><TT>
-specifies the maximum number of simultaneously executing
-recipes.
-Normally it is imported from the environment,
-where the system has set it to the number of available processors.
-It can be decreased by assigning a new
-value and can be set to 1 to force single-threaded recipe execution.
-This is necessary when several targets access
-a common resource such as
-a status file or data base.
-When there is no dependency between targets,
-</TT><TT>mk</TT><TT>
-assumes the
-recipes can be
-executed concurrently.
-Normally, this allows
-multiple prerequisites to be built simultaneously;
-for example, the object file prerequisites of
-a load rule can be produced by compiling the source files in parallel.
-</TT><TT>Mk</TT><TT>
-does not define the order of execution of independent recipes.
-When the prerequisites of a rule are not independent,
-the dependencies between them should be specified in a rule or the
-</TT><TT>mkfile</TT><TT>
-should be single-threaded.
-For example, the archive update rules
-<DL><DT><DD><TT><PRE>
-</TT>LIB(%):N:	%
-<I>LIB:	</I>{OBJS:%=<I>LIB(%)}
-	ar rv </I>LIB `{membername <I>newprereq}
-</PRE></TT></DL>
-compile source files in parallel but update
-all members of the archive at once.
-It is a mistake to merge the two rules
-<DL><DT><DD><TT><PRE>
-</I>LIB(%):	%
-	ar rv <I>LIB </I>stem
-</PRE></TT></DL>
-because an
-<TT>ar</TT>
-command is executed for every
-member of the library.  Not only is this
-inefficient, but the archive is updated
-in parallel, making interference likely.
-</P>
-<P>
-The
-<TT></TT><I>nproc</I><TT>
-environment variable contains a number associated
-with the processor executing a recipe.
-It can be used to create unique
-names when the
-recipe may be executing simultaneously on several processors.
-Other maintenance tools provide mechanisms to control recipe
-scheduling explicitly [Cmel86], but
-</TT><TT>mk</TT>'<TT>s</TT>
-general rules are sufficient for all but the most unusual cases.
-</P>
-<H4>13 Deleting target files on errors
-</H4>
-<P>
-The
-<TT>D</TT>
-attribute
-causes
-<TT>mk</TT>
-to remove the target file when a
-recipe terminates prematurely.
-The error message describing the
-termination condition warns
-of the deletion.
-A partially built file is doubly dangerous:
-it is not only wrong, but is also
-considered to be up to date so
-a subsequent
-<TT>mk</TT>
-will not rebuild it.  For example,
-<DL><DT><DD><TT><PRE>
-pic.out:D:	mk.ms
-		pic prereq | tbl | troff -ms &gt; <I>target
-</PRE></TT></DL>
-produces the message
-<DL><DT><DD><TT><PRE>
-</I><TT>mk: pic mk.ms | ...  : exit status=rc 685: deleting 'pic.out'</TT><I>
-</PRE></TT></DL>
-if any program in the recipe exits with an error status.
-</P>
-</I><H4>14 Unspecified dependencies
-</H4>
-<P>
-The
-<TT>-w</TT>
-command line flag forces the
-files following the flag to be treated
-as if they were just modified.
-We can use this flag with a command that selects files
-to force a build based on the selection criterion.
-For example, if the declaration of
-a global variable named
-<I>var</I>
-is changed in a header file,
-all source files that reference
-it can be rebuilt with the command
-<DL><DT><DD><TT><PRE>
- mk -w`{grep -l <I>var</I> *.[cyl]}
-</PRE></TT></DL>
-</P>
-<H4>15 Conclusion
-</H4>
-<P>
-There are many programs related to
-<TT>make</TT>,
-each choosing a different balance between
-specialization and generality.
-<TT>Mk</TT>
-emphasizes generality but allows
-customization through its pattern specifications and
-include facilities.
-</P>
-<P>
-Plan 9 presents a difficult maintenance environment
-with its heterogeneous
-architectures and languages.
-<TT>Mk</TT>'<TT>s</TT>
-flexible specification language and simple
-interaction with
-<TT>rc</TT>
-work well in this environment.
-As a result,
-Plan 9 relies on
-<TT>mk</TT>
-to automate almost all maintenance.
-Tasks as diverse as updating the
-network data base, producing the manual,
-or building a release are expressed as
-<TT>mk</TT>
-procedures.
-</P>
-<H4>16 References
-</H4>
-<br>&#32;<br>
-[Cmel86] R. F. Cmelik,
-``Concurrent Make: A Distributed Program in Concurrent C'',
-AT&amp;T Bell Laboratories Technical Report, 1986.
-<br>&#32;<br>
-[Feld79] S. I. Feldman,
-``Make &#173; a program for maintaining computer programs'',
-Software Practice &amp; Experience ,
-1979
-Vol 9 #4,
-pp. 255-266.
-<br>&#32;<br>
-[Flan95] Bob Flandrena,
-``Plan 9 Mkfiles'',
-this volume.
-<br>&#32;<br>
-[Hume87] A. G. Hume,
-``Mk: A Successor to Make'',
-USENIX Summer Conf. Proc.,
-Phoenix, Az.
-<H4>17 Appendix: Differences between
-<TT>make</TT>
-and
-<TT>mk</TT>
-</H4>
-<P>
-The differences between
-<TT>mk</TT>
-and
-<TT>make</TT>
-are:
-</P>
-<UL>
-<LI>
-<TT>Make</TT>
-builds targets when it needs them, allowing systematic use of side effects.
-<TT>Mk</TT>
-constructs the entire dependency graph before building any target.
-<LI>
-<TT>Make</TT>
-supports suffix rules and
-<TT>%</TT>
-metarules.
-<TT>Mk</TT>
-supports
-<TT>%</TT>
-and regular expression metarules.
-(Older versions of
-<TT>make</TT>
-support only suffix rules.)
-<LI>
-<TT>Mk</TT>
-performs transitive closure on metarules,
-<TT>make</TT>
-does not.
-<LI>
-<TT>Make</TT>
-supports cyclic dependencies,
-<TT>mk</TT>
-does not.
-<LI>
-<TT>Make</TT>
-evaluates recipes one line at a time, replacing variables by their values and
-executing some commands internally.
-<TT>Mk</TT>
-passes the entire recipe to the shell without
-interpretation or internal execution.
-<LI>
-<TT>Make</TT>
-supports parallel execution of single-line recipes when building
-the prerequisites for specified targets.
-<TT>Mk</TT>
-supports parallel execution of all recipes.
-(Older versions of
-<TT>make</TT>
-did not support parallel execution.)
-<LI>
-<TT>Make</TT>
-uses special targets (beginning with a period)
-to indicate special processing.
-<TT>Mk</TT>
-uses attributes to modify rule evaluation.
-<LI>
-<TT>Mk</TT>
-supports virtual
-targets that are independent of the file system.
-<LI>
-<TT>Mk</TT>
-allows non-standard out-of-date determination,
-<TT>make</TT>
-does not.
-</ul>
-<P>
-It is usually easy to convert a
-<TT>makefile</TT>
-to or from an equivalent
-<TT>mkfile</TT>.
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 666
sys/doc/mkfiles.html

@@ -1,666 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Plan 9 Mkfiles
-</H1>
-<DL><DD><I>Bob Flandrena<br>
-bobf@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<br>&#32;<br>
-Every Plan 9 source directory contains a file, called
-<TT>mkfile</TT>,
-specifying the rules for building the executable or
-library that is the product of the directory.
-<A href="/magic/man2html/1/mk"><I>mk</I>(1)
-</A>interprets the rules in the file, calculates
-the dependencies, and executes an
-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
-</A>script to construct the product.
-If necessary components are supplied by
-neighboring directories or sub-directories, the mkfiles in those
-directories are first executed to build the components
-before the local construction proceeds.
-<br>&#32;<br>
-Most application source directories produce one of
-four types of product:
-a single executable, several
-executables, a local library, or
-a system library.
-Four generic
-mkfiles
-define the normal rules
-for building each type of product.  The simplest
-mkfiles need only
-list the components
-and include the appropriate
-generic
-mkfile 
-to do the work.
-More complex 
-mkfiles
-may supply additional rules
-to augment, modify, or override the generic rules.
-<H4>Using a Mkfile
-</H4>
-<br>&#32;<br>
-To build a product, change to the directory containing
-its source and invoke
-<I>mk</I>
-with the appropriate target as an argument.
-All mkfiles provide the following standard targets:
-<br><img src="data.19114400.gif"><br>
-<br>&#32;<br>
-If no target is specified on the
-<TT>mk</TT>
-command line, the
-<TT>all</TT>
-target is built by default.  In a directory
-producing multiple executables, there is
-no default target.
-<br>&#32;<br>
-In addition to the five standard targets,
-additional targets may be supplied by each
-generic mkfile or by the directory's mkfile.
-<br>&#32;<br>
-The environment variable
-<TT>NPROC</TT>
-is set by the system to the number of
-available processors.
-Setting
-this variable, either in the environment or in
-a mkfile, controls the amount of parallelism in
-the build.  For example, the command
-<DL><DT><DD><TT><PRE>
-	NPROC=1 mk
-</PRE></TT></DL>
-restricts a build to a single thread of execution.
-<H4>Creating a Mkfile
-</H4>
-<br>&#32;<br>
-The easiest way to build a new mkfile is to copy and modify
-an existing mkfile of the same type.
-Failing that, it is usually possible to create a new
-mkfile with minimal effort, since the appropriate
-generic mkfile predefines the rules that do all the work.
-In the simplest and most common cases, the new mkfile
-need only define a couple of variables and include the appropriate
-architecture-specific
-and generic mkfiles.
-<H4></H4>
-<br>&#32;<br>
-There are four generic mkfiles containing commonly
-used rules for building a product:
-<TT>mkone</TT>,
-<TT>mkmany</TT>,
-<TT>mklib</TT>,
-and
-<TT>mksyslib</TT>.
-These rules
-perform such actions as compiling C source files,
-loading object files, archiving libraries, and
-installing executables in the
-<TT>bin</TT>
-directory of the appropriate architecture.
-The generic mkfiles are stored in directory
-<TT>/sys/src/cmd</TT>.
-Mkfile
-<TT>mkone</TT>
-builds a single executable,
-<TT>mkmany</TT>
-builds several executables from the source in a single
-directory, and
-<TT>mklib</TT>
-and
-<TT>mksyslib</TT>,
-maintain local and system libraries, respectively.
-The rules in the generic mkfiles are driven by
-the values of variables, some of which must be
-set by the product mkfile and some of which are
-supplied by the generic mkfile.  Variables in the
-latter class include:
-<br><img src="data.19114401.gif"><br>
-<br>&#32;<br>
-The following variables are set by the product mkfile
-and used by the generic mkfile.
-Any may be empty depending on the specific product being
-made.
-<br><img src="data.19114402.gif"><br>
-<H4>Mkfile Organization
-</H4>
-<br>&#32;<br>
-All
-mkfiles
-share the following common structure:
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile	# </I>architecture-dependent definitions<I>
-<br>&#32;<br>
-</I><I>variable definitions</I><I>		# TARG</I>, <I>OFILES</I>, <I>HFILES</I>, etc.<I>
-<br>&#32;<br>
-&lt;/sys/src/cmd/</I><I>generic</I><I>	# mkone</I>, <I>mkmany</I>, <I>mklib</I>, or <I>mksyslib
-<br>&#32;<br>
-</I><I>variable overrides</I><I>		# CFLAGS</I>, <I>objtype</I>, etc.<I>
-<br>&#32;<br>
-</I><I>extra rules</I><I>			# </I>overrides, augmented rules, additional targets<I>
-</PRE></TT></DL>
-Note that the architecture-dependent mkfiles include file
-</I><TT>/sys/src/mkfile.proto</TT><I>
-for system-wide variables that are common to all architectures.
-</I><br>&#32;<br>
-The variables driving the expansion of the generic mkfile
-may be specified in any order as long as they are defined
-before the inclusion of the generic mkfile.  The value
-of a variable may be changed by assigning a new value
-following the inclusion of the generic mkfile, but the
-effects are sometimes counter-intuitive.
-Such variable assignments do not apply to the target and
-prerequisite portions of any previously defined rules;
-the new values only apply to the recipes of rules preceding
-the assignment statement and
-to all parts of any rules following it.
-<br>&#32;<br>
-The rules supplied by the generic mkfile may
-be overridden or augmented.  The new rules must
-be specified after the inclusion of the generic
-mkfile.  If the target and prerequisite portion
-of the rule exactly match the target and prerequisite
-portion of a previously defined rule and the new rule contains
-a recipe, the new rule replaces the old one.
-If the target of a new rule exactly matches the
-target of a previous rule and one or more new
-prerequisites are specified and the new rule contains
-no recipe, the new prerequisites are added to the prerequisites
-of the old rule.
-<br>&#32;<br>
-Following sections discuss
-each generic mkfile in detail.
-<H4>Mkone
-</H4>
-<br>&#32;<br>
-The
-<TT>mkone</TT>
-generic mkfile contains rules for building
-a single executable from one or more files
-in a directory.
-The variable
-<TT>TARG</TT>
-specifies the name of the executable and
-variables
-<TT>OFILES</TT>
-and
-<TT>YFILES</TT>
-specify the object files and
-<TT>yacc</TT>
-source files used to build it.
-<TT>HFILES</TT>
-contains the names of the local header files
-included in all source files.
-<TT>BIN</TT>
-is the name of the directory where the executable
-is installed.
-<TT>LIB</TT>
-contains the names of local libraries used by the
-linker.  This variable is rarely needed
-as libraries referenced by a
-<TT>#pragma</TT>
-directive in an associated header file, including
-all system libraries, are automatically
-searched by the loader.
-<br>&#32;<br>
-If
-<TT>mk</TT>
-is executed without a target, the
-<TT>all</TT>
-target is built; it
-produces an executable in
-<TT></TT>O.out<TT>.
-Variable
-</TT><TT>HFILES</TT><TT>
-identifies the header files that
-are included in all or most or
-the C source files.  Occasionally,
-a program has other header files
-that are only used in some
-source files.  A
-header can be added to the prerequisites for
-those object files by adding a rule of
-the following form following the inclusion of generic mkfile
-</TT><TT>mkone</TT><TT>:
-<DL><DT><DD><TT><PRE>
-file.</TT><I>O:	header.h
-</PRE></TT></DL>
-</I><br>&#32;<br>
-The mkfile for a directory producing a single
-executable using the normal set of rules is
-trivial: a list of some files followed by the
-inclusion of
-<I>mkone.</I>
-For example, 
-<TT>/sys/src/cmd/diff/mkfile</TT>
-contains:
-<DL><DT><DD><TT><PRE>
-&lt; /objtype/mkfile
-
-TARG=diff
-OFILES=\
-	diffdir.<I>O\
-	diffio.</I>O\
-	diffreg.<I>O\
-	main.</I>O\
-
-HFILES=diff.h
-
-BIN=/<I>objtype/bin
-&lt;/sys/src/cmd/mkone
-</PRE></TT></DL>
-The more complex mkfile in
-</I><TT>/sys/src/cmd/awk</TT><I>
-overrides compiler and loader variables to
-select the ANSI/POSIX Computing Environment with appropriately
-defined command line variables.  It also overrides
-the default
-</I><TT>yacc</TT><I>
-rule to place the output soure in file
-</I><TT>awkgram.c</TT><I>
-and the
-</I><TT>clean</TT><I>
-and
-</I><TT>nuke</TT><I>
-rules, so it can remove the non-standard intermediate
-files.  Finally, the last three rules build a version of
-</I><TT>maketab</TT><I>
-appropriate for the architecture where the
-</I><TT>mk</TT><I>
-is being
-run and then executes it to create source file
-</I><TT>proctab.c</TT><I>:
-<DL><DT><DD><TT><PRE>
-&lt;/</I>objtype/mkfile
-
-TARG=awk
-OFILES=re.<I>O\
-	lex.</I>O\
-	main.<I>O\
-	parse.</I>O\
-	proctab.<I>O\
-	tran.</I>O\
-	lib.<I>O\
-	run.</I>O\
-	awkgram.<I>O\
-
-HFILES=awk.h\
-	y.tab.h\
-	proto.h\
-
-YFILES=awkgram.y
-
-BIN=/</I>objtype/bin
-&lt;/sys/src/cmd/mkone
-CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE \
-	-D_BSD_EXTENSION -DUTF
-YFLAGS=-S -d -v
-CC=pcc
-LD=pcc
-cpuobjtype=`{sed -n 's/^O=//p' /<I>cputype/mkfile}
-
-y.tab.h awkgram.c:	</I>YFILES
-	<I>YACC -o awkgram.c </I>YFLAGS <I>prereq
-
-clean:V:
-	rm -f *.[</I>OS] [<I>OS].out [</I>OS].maketab y.tab.? y.debug\
-		 y.output <I>TARG
-
-nuke:V:
-	rm -f *.[</I>OS] [<I>OS].out [</I>OS].maketab y.tab.? y.debug\
-		 y.output awkgram.c <I>TARG
-
-proctab.c:	</I>cpuobjtype.maketab
-	./<I>cpuobjtype.maketab &gt;proctab.c
-
-</I>cpuobjtype.maketab:	y.tab.h maketab.c
-	objtype=<I>cputype
-	mk maketab.</I>cputype
-
-maketab.<I>cputype:V:	y.tab.h maketab.</I>O
-	<I>LD -o </I>O.maketab maketab.<I>O
-</PRE></TT></DL>
-</I><H4>Mkmany
-</H4>
-<br>&#32;<br>
-The
-<TT>mkmany</TT>
-generic mkfile builds several
-executables from the files in a
-directory.  It differs from the operation of
-<TT>mkone</TT>
-in three respects:
-<TT>TARG</TT>
-specifies the names of all executables,
-there is no default command-line target,
-and additional rules allow a single executable to
-be built or installed.
-<br>&#32;<br>
-The
-<TT>TARG</TT>
-variable specifies the names of all
-executables produced by the mkfile.  The
-rules assume the name of each executable is also
-the name of the file containing its
-<TT>main</TT>
-function.
-<TT>OFILES</TT>
-specifies files containing
-common subroutines loaded with all executables.
-Consider the mkfile:
-<DL><DT><DD><TT><PRE>
-&lt;/objtype/mkfile
-
-TARG=alpha beta
-OFILES=common.<I>O
-BIN=/</I>objtype/bin
-&lt;/sys/src/cmd/mkmany
-</PRE></TT></DL>
-It assumes the main functions for executables
-<TT>alpha</TT>
-and
-<TT>beta</TT>
-are in files
-<TT>alpha.</TT><I>O</I><TT>
-and
-</TT><TT>beta.</TT><TT>O</TT><TT>
-and that both programs use the subroutines
-in file
-</TT><TT>common.</TT><I>O</I><TT>.
-The
-</TT><TT>all</TT><TT>
-target builds all executables, leaving each in
-a file with a name of the form
-</TT><TT></TT><TT>O.</TT><I>progname</I><TT></TT><I>
-where
-</I><I>progname</I><I>
-is the name of the executable.  In this
-example the
-</I><TT>all</TT><I>
-target produces executables
-</I><TT></TT><I>O.alpha</I><TT>
-and 
-</TT><TT></TT><TT>O.beta</TT><TT>.
-</TT><br>&#32;<br>
-The
-<TT>mkmany</TT>
-rules provide additional
-targets for building a single
-executable:
-<br><img src="data.19114403.gif"><br>
-<H4>Mklib
-</H4>
-<br>&#32;<br>
-The
-<TT>mklib</TT>
-generic mkfile builds a local library.
-Since this form of mkfile constructs no
-executable, the
-<TT>TARG</TT>
-and
-<TT>BIN</TT>
-variables are not needed.  Instead, the
-<TT>LIB</TT>
-variable specifies the library
-to be built or updated.  Variable
-<TT>OFILES</TT>
-contains the names of the object files to be archived
-in the library.  The use of variables
-<TT>YFILES</TT>
-and
-<TT>HFILES</TT>
-does not change.  When possible, only the
-out-of-date members of the library are updated.
-<br>&#32;<br>
-The variable
-<TT>LIBDIR</TT>
-contains the name of the directory where the
-library is installed; by default it selects
-the current directory.  It can be overridden
-by assigning the new directory name after the
-point where
-<TT>mklib</TT>
-is included.
-<br>&#32;<br>
-The
-<TT>clean</TT>
-target removes object files and
-<TT>yacc</TT>
-intermediate files but does not touch the
-library.  The
-<TT>nuke</TT>
-target removes the library as well as the
-files removed by the
-<TT>clean</TT>
-target.  The command
-<DL><DD>
-<TT>mk -s clean all</TT>
-</DL>
-causes the existing library to be updated, or
-created if it doesn't already exist.  The command
-<DL><DD>
-<TT>mk -s nuke all</TT>
-</DL>
-forces the library to be rebuilt from scratch.
-<br>&#32;<br>
-The mkfile from
-<TT>/sys/src/cmd/upas/libString</TT>
-contains the following specifications to
-build the local library
-<TT>libString.a</TT><I>O</I><TT>
-for the object architecture referenced by
-</TT><TT></TT><TT>O</TT>:<TT></TT>
-<DL><DT><DD><TT><PRE>
-&lt;/<I>objtype/mkfile
-
-LIB=libString.a</I>O
-OFILES=	s_alloc.<I>O\
-	s_append.</I>O\
-	s_array.<I>O\
-	s_copy.</I>O\
-	s_getline.<I>O\
-	s_grow.</I>O\
-	s_nappend.<I>O\
-	s_parse.</I>O\
-	s_read.<I>O\
-	s_read_line.</I>O\
-	s_tolower.<I>O\
-
-&lt;/sys/src/cmd/mklib
-
-nuke:V:
-	mk clean
-	rm -f libString.a[</I>OS]
-</PRE></TT></DL>
-The override of the rule for target
-<TT>nuke</TT>
-removes the libraries for all architectures as
-opposed to the default recipe for this target
-which removes the library for the current architecture.
-<H4>Mksyslib
-</H4>
-<br>&#32;<br>
-The
-<TT>mksyslib</TT>
-generic mkfile is similar to the
-<TT>mklib</TT>
-mkfile except that it operates on a system library
-instead of a local library.
-The
-<TT>install</TT>
-and
-<TT>all</TT>
-targets are the same; since there is no local copy of
-the library, all updates are performed on the
-installed library.
-The rule for the
-<TT>nuke</TT>
-target is identical to that of the
-<TT>clean</TT>
-target; unlike the
-<TT>nuke</TT>
-target for local libraries,
-the library is never removed.
-<br>&#32;<br>
-No attempt is made to determine if individual library
-members are up-to-date; all members of a
-library are always updated.
-Special targets support manipulation of a single
-object file; the target
-<TT>objfile</TT>
-updates file
-<TT>objfile</TT><TT>.</TT><I>O</I><TT></TT><I>
-in the library of the current architecture and the target
-</I><TT>objfile.all</TT><I>
-updates
-</I><TT>objfile</TT><TT>.</TT><TT>O</TT><TT></TT><TT>
-in the libraries of all architectures.
-</TT><H4>Overrides
-</H4>
-<br>&#32;<br>
-The rules provided by a generic mkfile or
-the variables used to control the evaluation
-of those rules may be overridden in most
-circumstances.  Overrides
-must be specified in the product mkfile
-after the point where the generic
-mkfile is included; in general, variable
-and rule overrides occupy the end of a
-product mkfile.
-<br>&#32;<br>
-The value of a variable is overridden by
-assigning a new value to the variable.
-Most variable overrides modify the
-values of flags or the names of commands executed
-in recipes.  For example, the default value of
-<TT>CFLAGS</TT>
-is often overridden or augmented and
-the ANSI/POSIX Computing Environment is selected by
-setting the
-<TT>CC</TT>
-and
-<TT>LD</TT>
-variables to
-<TT>pcc.</TT>
-<br>&#32;<br>
-Modifying rules is trickier than modifying
-variables.  Additional constraints can be added
-to a rule by specifying the target and
-the new prerequisite.  For example,
-<DL><DT><DD><TT><PRE>
-%.<I>O:	header.h
-</PRE></TT></DL>
-adds file
-</I><TT>header.h</TT><I>
-the set of prerequisites for all object files.
-There is no mechanism for adding additional
-commands to an existing recipe; if a
-recipe is unsatisfactory, the rule and its recipe
-must be completely overridden.
-A rule is overridden only when the replacement rule
-matches the target and prerequisite portions
-of the original rule exactly.  The recipe
-associated with the new rule
-then replaces the recipe of the original rule.
-For example,
-</I><TT>/sys/src/cmd/lex/mkfile</TT><I>
-overrides the default
-</I><TT>installall</TT><I>
-rule to perform the normal loop on all
-architectures and then copy a prototype file
-to the system library directory.
-<DL><DT><DD><TT><PRE>
-&lt;/</I>objtype/mkfile
-
-TARG=lex
-OFILES=lmain.<I>O\
-	y.tab.</I>O\
-	sub1.<I>O\
-	sub2.</I>O\
-	header.<I>O\
-
-HFILES=ldefs.h\
-
-YFILES=parser.y\
-
-BIN=/</I>objtype/bin
-&lt;/sys/src/cmd/mkone
-
-installall:V:
-	for(objtype in <I>CPUS)
-		mk install
-	cp ncform /sys/lib/lex
-</PRE></TT></DL>
-Another way to perform the same override is to
-add a dependency to the default
-</I><TT>installall</TT><I>
-rule that executes an additional rule to
-install the prototype file:
-<DL><DT><DD><TT><PRE>
-installall:V:	ncform.install
-
-ncform.install:V:
-	cp ncform /sys/lib/lex
-</PRE></TT></DL>
-</I><H4>Special Tricks
-</H4>
-<br>&#32;<br>
-Two special cases
-require extra deviousness.
-<br>&#32;<br>
-In the first, a file needed to build an
-executable is generated by a program that,
-in turn, is built from a source file that
-is not part of the product.  In this case,
-the
-executable must be built for the
-target architecture, but the intermediate
-executable must be built for the architecture
-<TT>mk</TT>
-is executing on.  The intermediate executable
-is built by recursively invoking
-<TT>mk</TT>
-with the appropriate target and the
-executing architecture as the target
-architecture.  When that
-<TT>mk</TT>
-completes, the intermediate is
-executed to generate the source file to
-complete the build for the target architecture.
-The earlier example of
-<TT>/sys/src/cmd/awk/mkfile</TT>
-illustrates this technique.
-<br>&#32;<br>
-Another awkward situation
-occurs when a directory contains
-source to build an executable as
-well as source for auxiliary executables
-that are not to be installed.  In this case
-the
-<TT>mkmany</TT>
-generic rules are inappropriate, because
-all executables would be built and installed.
-Instead, use the
-<TT>mkone</TT>
-generic file to build the primary executable
-and provide extra targets to
-build the auxiliary files.  This
-approach is also useful when the auxiliary
-files are not executables;
-<TT>/sys/src/cmd/spell/mkfile</TT>
-augments the default rules to build and install the
-<TT>spell</TT>
-executable with
-elaborate rules to generate
-and maintain the auxiliary spelling lists.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 695
sys/doc/names.html

@@ -1,695 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The Use of Name Spaces in Plan 9
-</H1>
-<DL><DD><I>Rob Pike<br>
-Dave Presotto<br>
-Ken Thompson<br>
-Howard Trickey<br>
-Phil Winterbottom<br>
-Bell Laboratories, Murray Hill, NJ, 07974
-USA<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Appeared in
-Operating Systems Review,
-Vol. 27, #2, April 1993, pp. 72-76
-(reprinted from
-Proceedings of the 5th ACM SIGOPS European Workshop,
-Mont Saint-Michel, 1992, Paper n&#186; 34).
-</I><DT>&#32;<DD></dl>
-<br>
-Plan 9 is a distributed system built at the Computing Sciences Research
-Center of AT&amp;T Bell Laboratories (now Lucent Technologies, Bell Labs) over the last few years.
-Its goal is to provide a production-quality system for software
-development and general computation using heterogeneous hardware
-and minimal software.  A Plan 9 system comprises CPU and file
-servers in a central location connected together by fast networks.
-Slower networks fan out to workstation-class machines that serve as
-user terminals.  Plan 9 argues that given a few carefully
-implemented abstractions
-it is possible to
-produce a small operating system that provides support for the largest systems
-on a variety of architectures and networks. The foundations of the system are
-built on two ideas: a per-process name space and a simple message-oriented 
-file system protocol.
-</DL>
-<P>
-The operating system for the CPU servers and terminals is
-structured as a traditional kernel: a single compiled image
-containing code for resource management, process control,
-user processes,
-virtual memory, and I/O.  Because the file server is a separate
-machine, the file system is not compiled in, although the management
-of the name space, a per-process attribute, is.
-The entire kernel for the multiprocessor SGI Power Series machine
-is 25000 lines of C,
-the largest part of which is code for four networks including the
-Ethernet with the Internet protocol suite.
-Fewer than 1500 lines are machine-specific, and a
-functional kernel with minimal I/O can be put together from
-source files totaling 6000 lines. [Pike90]
-</P>
-<P>
-The system is relatively small for several reasons.
-First, it is all new: it has not had time to accrete as many fixes
-and features as other systems.
-Also, other than the network protocol, it adheres to no
-external interface; in particular, it is not Unix-compatible.
-Economy stems from careful selection of services and interfaces.
-Finally, wherever possible the system is built around
-two simple ideas:
-every resource in the system, either local or remote,
-is represented by a hierarchical file system; and
-a user or process
-assembles a private view of the system by constructing a file
-name space
-that connects these resources. [Needham]
-</P>
-<H4>File Protocol
-</H4>
-<P>
-All resources in Plan 9 look like file systems.
-That does not mean that they are repositories for
-permanent files on disk, but that the interface to them
-is file-oriented: finding files (resources) in a hierarchical
-name tree, attaching to them by name, and accessing their contents
-by read and write calls.
-There are dozens of file system types in Plan 9, but only a few
-represent traditional files.
-At this level of abstraction, files in Plan 9 are similar
-to objects, except that files are already provided with naming,
-access, and protection methods that must be created afresh for
-objects.  Object-oriented readers may approach the rest of this
-paper as a study in how to make objects look like files.
-</P>
-<P>
-The interface to file systems is defined by a protocol, called 9P,
-analogous but not very similar to the NFS protocol.
-The protocol talks about files, not blocks; given a connection to the root
-directory of a file server,
-the 9P messages navigate the file hierarchy, open files for I/O,
-and read or write arbitrary bytes in the files.
-9P contains 17 message types: three for
-initializing and
-authenticating a connection and fourteen for manipulating objects.
-The messages are generated by the kernel in response to user- or
-kernel-level I/O requests.
-Here is a quick tour of the major message types.
-The
-<TT>auth</TT>
-and
-<TT>attach</TT>
-messages authenticate a connection, established by means outside 9P,
-and validate its user.
-The result is an authenticated
-<I>channel</I>
-that points to the root of the
-server.
-The
-<TT>clone</TT>
-message makes a new channel identical to an existing channel,
-which may be moved to a file on the server using a
-<TT>walk</TT>
-message to descend each level in the hierarchy.
-The
-<TT>stat</TT>
-and
-<TT>wstat</TT>
-messages read and write the attributes of the file pointed to by a channel.
-The
-<TT>open</TT>
-message prepares a channel for subsequent
-<TT>read</TT>
-and
-<TT>write</TT>
-messages to access the contents of the file, while
-<TT>create</TT>
-and
-<TT>remove</TT>
-perform, on the files, the actions implied by their names.
-The
-<TT>clunk</TT>
-message discards a channel without affecting the file.
-None of the 9P messages consider caching; file caches are provided,
-when needed, either within the server (centralized caching)
-or by implementing the cache as a transparent file system between the
-client and the 9P connection to the server (client caching).
-</P>
-<P>
-For efficiency, the connection to local
-kernel-resident file systems, misleadingly called
-<I>devices,</I>
-is by regular rather than remote procedure calls.
-The procedures map one-to-one with 9P message  types.
-Locally each channel has an associated data structure
-that holds a type field used to index
-a table of procedure calls, one set per file system type,
-analogous to selecting the method set for an object. 
-One kernel-resident file system, the
-mount device,
-translates the local 9P procedure calls into RPC messages to
-remote services over a separately provided transport protocol
-such as TCP or IL, a new reliable datagram protocol, or over a pipe to
-a user process.
-Write and read calls transmit the messages over the transport layer.
-The mount device is the sole bridge between the procedural
-interface seen by user programs and remote and user-level services.
-It does all associated marshaling, buffer
-management, and multiplexing and is
-the only integral RPC mechanism in Plan 9.
-The mount device is in effect a proxy object.
-There is no RPC stub compiler; instead the mount driver and
-all servers just share a library that packs and unpacks 9P messages.
-</P>
-<H4>Examples
-</H4>
-<P>
-One file system type serves
-permanent files from the main file server,
-a stand-alone multiprocessor system with a
-350-gigabyte
-optical WORM jukebox that holds the data, fronted by a two-level
-block cache comprising 7 gigabytes of
-magnetic disk and 128 megabytes of RAM.
-Clients connect to the file server using any of a variety of
-networks and protocols and access files using 9P.
-The file server runs a distinct operating system and has no
-support for user processes; other than a restricted set of commands
-available on the console, all it does is answer 9P messages from clients.
-</P>
-<P>
-Once a day, at 5:00 AM,
-the file server sweeps through the cache blocks and marks dirty blocks
-copy-on-write.
-It creates a copy of the root directory
-and labels it with the current date, for example
-<TT>1995/0314</TT>.
-It then starts a background process to copy the dirty blocks to the WORM.
-The result is that the server retains an image of the file system as it was
-early each morning.
-The set of old root directories is accessible using 9P, so a client
-may examine backup files using ordinary commands.
-Several advantages stem from having the backup service implemented
-as a plain file system.
-Most obviously, ordinary commands can access them.
-For example, to see when a bug was fixed
-<DL><DT><DD><TT><PRE>
-grep 'mouse bug fix' 1995/*/sys/src/cmd/8&#189;/file.c
-</PRE></TT></DL>
-The owner, access times, permissions, and other properties of the
-files are also backed up.
-Because it is a file system, the backup
-still has protections;
-it is not possible to subvert security by looking at the backup.
-</P>
-<P>
-The file server is only one type of file system.
-A number of unusual services are provided within the kernel as
-local file systems.
-These services are not limited to I/O devices such
-as disks.  They include network devices and their associated protocols,
-the bitmap display and mouse,
-a representation of processes similar to
-<TT>/proc</TT>
-[Killian], the name/value pairs that form the `environment'
-passed to a new process, profiling services,
-and other resources.
-Each of these is represented as a file system &#173;
-directories containing sets of files &#173;
-but the constituent files do not represent permanent storage on disk.
-Instead, they are closer in properties to UNIX device files.
-</P>
-<P>
-For example, the
-<I>console</I>
-device contains the file
-<TT>/dev/cons</TT>,
-similar to the UNIX file
-<TT>/dev/console</TT>:
-when written,
-<TT>/dev/cons</TT>
-appends to the console typescript; when read,
-it returns characters typed on the keyboard.
-Other files in the console device include
-<TT>/dev/time</TT>,
-the number of seconds since the epoch,
-<TT>/dev/cputime</TT>,
-the computation time used by the process reading the device,
-<TT>/dev/pid</TT>,
-the process id of the process reading the device, and
-<TT>/dev/user</TT>,
-the login name of the user accessing the device.
-All these files contain text, not binary numbers,
-so their use is free of byte-order problems.
-Their contents are synthesized on demand when read; when written,
-they cause modifications to kernel data structures.
-</P>
-<P>
-The
-<I>process</I>
-device contains one directory per live local process, named by its numeric
-process id:
-<TT>/proc/1</TT>,
-<TT>/proc/2</TT>,
-etc.
-Each directory contains a set of files that access the process.
-For example, in each directory the file
-<TT>mem</TT>
-is an image of the virtual memory of the process that may be read or
-written for debugging.
-The
-<TT>text</TT>
-file is a sort of link to the file from which the process was executed;
-it may be opened to read the symbol tables for the process.
-The
-<TT>ctl</TT>
-file may be written textual messages such as
-<TT>stop</TT>
-or
-<TT>kill</TT>
-to control the execution of the process.
-The
-<TT>status</TT>
-file contains a fixed-format line of text containing information about
-the process: its name, owner, state, and so on.
-Text strings written to the
-<TT>note</TT>
-file are delivered to the process as
-<I>notes,</I>
-analogous to UNIX signals.
-By providing these services as textual I/O on files rather
-than as system calls (such as
-<TT>kill</TT>)
-or special-purpose operations (such as
-<TT>ptrace</TT>),
-the Plan 9 process device simplifies the implementation of
-debuggers and related programs.
-For example, the command
-<DL><DT><DD><TT><PRE>
-cat /proc/*/status
-</PRE></TT></DL>
-is a crude form of the
-<TT>ps</TT>
-command; the actual
-<TT>ps</TT>
-merely reformats the data so obtained.
-</P>
-<P>
-The
-<I>bitmap</I>
-device contains three files,
-<TT>/dev/mouse</TT>,
-<TT>/dev/screen</TT>,
-and
-<TT>/dev/bitblt</TT>,
-that provide an interface to the local bitmap display (if any) and pointing device.
-The
-<TT>mouse</TT>
-file returns a fixed-format record containing
-1 byte of button state and 4 bytes each of
-<I>x</I>
-and
-<I>y</I>
-position of the mouse.
-If the mouse has not moved since the file was last read, a subsequent read will
-block.
-The
-<TT>screen</TT>
-file contains a memory image of the contents of the display;
-the
-<TT>bitblt</TT>
-file provides a procedural interface.
-Calls to the graphics library are translated into messages that are written
-to the
-<TT>bitblt</TT>
-file to perform bitmap graphics operations.  (This is essentially a nested
-RPC protocol.)
-</P>
-<P>
-The various services being used by a process are gathered together into the
-process's
-name space,
-a single rooted hierarchy of file names.
-When a process forks, the child process shares the name space with the parent.
-Several system calls manipulate name spaces.
-Given a file descriptor
-<TT>fd</TT>
-that holds an open communications channel to a service,
-the call
-<DL><DT><DD><TT><PRE>
-mount(int fd, char *old, int flags)
-</PRE></TT></DL>
-authenticates the user and attaches the file tree of the service to
-the directory named by
-<TT>old</TT>.
-The
-<TT>flags</TT>
-specify how the tree is to be attached to
-<TT>old</TT>:
-replacing the current contents or appearing before or after the
-current contents of the directory.
-A directory with several services mounted is called a
-<I>union</I>
-directory and is searched in the specified order.
-The call
-<DL><DT><DD><TT><PRE>
-bind(char *new, char *old, int flags)
-</PRE></TT></DL>
-takes the portion of the existing name space visible at
-<TT>new</TT>,
-either a file or a directory, and makes it also visible at
-<TT>old</TT>.
-For example,
-<DL><DT><DD><TT><PRE>
-bind("1995/0301/sys/include", "/sys/include", REPLACE)
-</PRE></TT></DL>
-causes the directory of include files to be overlaid with its
-contents from the dump on March first.
-</P>
-<P>
-A process is created by the
-<TT>rfork</TT>
-system call, which takes as argument a bit vector defining which
-attributes of the process are to be shared between parent
-and child instead of copied.
-One of the attributes is the name space: when shared, changes
-made by either process are visible in the other; when copied,
-changes are independent.
-</P>
-<P>
-Although there is no global name space,
-for a process to function sensibly the local name spaces must adhere
-to global conventions. 
-Nonetheless, the use of local name spaces is critical to the system.
-Both these ideas are illustrated by the use of the name space to
-handle heterogeneity.
-The binaries for a given architecture are contained in a directory
-named by the architecture, for example
-<TT>/mips/bin</TT>;
-in use, that directory is bound to the conventional location
-<TT>/bin</TT>.
-Programs such as shell scripts need not know the CPU type they are
-executing on to find binaries to run.
-A directory of private binaries
-is usually unioned with
-<TT>/bin</TT>.
-(Compare this to the
-ad hoc
-and special-purpose idea of the
-<TT>PATH</TT>
-variable, which is not used in the Plan 9 shell.)
-Local bindings are also helpful for debugging, for example by binding
-an old library to the standard place and linking a program to see
-if recent changes to the library are responsible for a bug in the program.
-</P>
-<P>
-The window system,
-<TT>8&#189;</TT>
-[Pike91], is a server for files such as
-<TT>/dev/cons</TT>
-and
-<TT>/dev/bitblt</TT>.
-Each client sees a distinct copy of these files in its local
-name space: there are many instances of
-<TT>/dev/cons</TT>,
-each served by
-<TT>8&#189;</TT>
-to the local name space of a window.
-Again,
-<TT>8&#189;</TT>
-implements services using
-local name spaces plus the use
-of I/O to conventionally named files.
-Each client just connects its standard input, output, and error files
-to
-<TT>/dev/cons</TT>,
-with analogous operations to access bitmap graphics.
-Compare this to the implementation of
-<TT>/dev/tty</TT>
-on UNIX, which is done by special code in the kernel
-that overloads the file, when opened,
-with the standard input or output of the process.
-Special arrangement must be made by a UNIX window system for
-<TT>/dev/tty</TT>
-to behave as expected;
-<TT>8&#189;</TT>
-instead uses the provision of the corresponding file as its
-central idea, which to succeed depends critically on local name spaces.
-</P>
-<P>
-The environment
-<TT>8&#189;</TT>
-provides its clients is exactly the environment under which it is implemented:
-a conventional set of files in
-<TT>/dev</TT>.
-This permits the window system to be run recursively in one of its own
-windows, which is handy for debugging.
-It also means that if the files are exported to another machine,
-as described below, the window system or client applications may be
-run transparently on remote machines, even ones without graphics hardware.
-This mechanism is used for Plan 9's implementation of the X window
-system: X is run as a client of
-<TT>8&#189;</TT>,
-often on a remote machine with lots of memory.
-In this configuration, using Ethernet to connect
-MIPS machines, we measure only a 10% degradation in graphics
-performance relative to running X on
-a bare Plan 9 machine.
-</P>
-<P>
-An unusual application of these ideas is a statistics-gathering
-file system implemented by a command called
-<TT>iostats</TT>.
-The command encapsulates a process in a local name space, monitoring 9P
-requests from the process to the outside world &#173; the name space in which
-<TT>iostats</TT>
-is itself running.  When the command completes,
-<TT>iostats</TT>
-reports usage and performance figures for file activity.
-For example
-<DL><DT><DD><TT><PRE>
-iostats 8&#189;
-</PRE></TT></DL>
-can be used to discover how much I/O the window system
-does to the bitmap device, font files, and so on.
-</P>
-<P>
-The
-<TT>import</TT>
-command connects a piece of name space from a remote system
-to the local name space.
-Its implementation is to dial the remote machine and start
-a process there that serves the remote name space using 9P.
-It then calls
-<TT>mount</TT>
-to attach the connection to the name space and finally dies;
-the remote process continues to serve the files.
-One use is to access devices not available
-locally.  For example, to write a floppy one may say
-<DL><DT><DD><TT><PRE>
-import lab.pc /a: /n/dos
-cp foo /n/dos/bar
-</PRE></TT></DL>
-The call to
-<TT>import</TT>
-connects the file tree from
-<TT>/a:</TT>
-on the machine
-<TT>lab.pc</TT>
-(which must support 9P) to the local directory
-<TT>/n/dos</TT>.
-Then the file
-<TT>foo</TT>
-can be written to the floppy just by copying it across.
-</P>
-<P>
-Another application is remote debugging:
-<DL><DT><DD><TT><PRE>
-import helix /proc
-</PRE></TT></DL>
-makes the process file system on machine
-<TT>helix</TT>
-available locally; commands such as
-<TT>ps</TT>
-then see
-<TT>helix</TT>'s
-processes instead of the local ones.
-The debugger may then look at a remote process:
-<DL><DT><DD><TT><PRE>
-db /proc/27/text /proc/27/mem
-</PRE></TT></DL>
-allows breakpoint debugging of the remote process.
-Since
-<TT>db</TT>
-infers the CPU type of the process from the executable header on
-the text file, it supports
-cross-architecture debugging, too.
-Care is taken within
-<TT>db</TT>
-to handle issues of byte order and floating point; it is possible to
-breakpoint debug a big-endian MIPS process from a little-endian i386.
-</P>
-<P>
-Network interfaces are also implemented as file systems [Presotto].
-For example,
-<TT>/net/tcp</TT>
-is a directory somewhat like
-<TT>/proc</TT>:
-it contains a set of numbered directories, one per connection,
-each of which contains files to control and communicate on the connection.
-A process allocates a new connection by accessing
-<TT>/net/tcp/clone</TT>,
-which evaluates to the directory of an unused connection.
-To make a call, the process writes a textual message such as
-<TT>'connect</TT>
-<TT>135.104.53.2!512'</TT>
-to the
-<TT>ctl</TT>
-file and then reads and writes the
-<TT>data</TT>
-file.
-An
-<TT>rlogin</TT>
-service can be implemented in a few of lines of shell code.
-</P>
-<P>
-This structure makes network gatewaying easy to provide.
-We have machines with Datakit interfaces but no Internet interface.
-On such a machine one may type
-<DL><DT><DD><TT><PRE>
-import helix /net
-telnet tcp!ai.mit.edu
-</PRE></TT></DL>
-The
-<TT>import</TT>
-uses Datakit to pull in the TCP interface from
-<TT>helix</TT>,
-which can then be used directly; the
-<TT>tcp!</TT>
-notation is necessary because we routinely use multiple networks
-and protocols on Plan 9&#173;it identifies the network in which
-<TT>ai.mit.edu</TT>
-is a valid name.
-</P>
-<P>
-In practice we do not use
-<TT>rlogin</TT>
-or
-<TT>telnet</TT>
-between Plan 9 machines.  Instead a command called
-<TT>cpu</TT>
-in effect replaces the CPU in a window with that
-on another machine, typically a fast multiprocessor CPU server.
-The implementation is to recreate the
-name space on the remote machine, using the equivalent of
-<TT>import</TT>
-to connect pieces of the terminal's name space to that of
-the process (shell) on the CPU server, making the terminal
-a file server for the CPU.
-CPU-local devices such as fast file system connections
-are still local; only terminal-resident devices are
-imported.
-The result is unlike UNIX
-<TT>rlogin</TT>,
-which moves into a distinct name space on the remote machine,
-or file sharing with
-<TT>NFS</TT>,
-which keeps the name space the same but forces processes to execute
-locally.
-Bindings in
-<TT>/bin</TT>
-may change because of a change in CPU architecture, and
-the networks involved may be different because of differing hardware,
-but the effect feels like simply speeding up the processor in the
-current name space.
-</P>
-<H4>Position
-</H4>
-<P>
-These examples illustrate how the ideas of representing resources
-as file systems and per-process name spaces can be used to solve
-problems often left to more exotic mechanisms.
-Nonetheless there are some operations in Plan 9 that are not
-mapped into file I/O.
-An example is process creation.
-We could imagine a message to a control file in
-<TT>/proc</TT>
-that creates a process, but the details of
-constructing the environment of the new process &#173; its open files,
-name space, memory image, etc. &#173; are too intricate to
-be described easily in a simple I/O operation.
-Therefore new processes on Plan 9 are created by fairly conventional
-<TT>rfork</TT>
-and
-<TT>exec</TT>
-system calls;
-<TT>/proc</TT>
-is used only to represent and control existing processes.
-</P>
-<P>
-Plan 9 does not attempt to map network name spaces into the file
-system name space, for several reasons.
-The different addressing rules for various networks and protocols
-cannot be mapped uniformly into a hierarchical file name space.
-Even if they could be,
-the various mechanisms to authenticate,
-select a service,
-and control the connection would not map consistently into
-operations on a file.
-</P>
-<P>
-Shared memory is another resource not adequately represented by a
-file name space.
-Plan 9 takes care to provide mechanisms
-to allow groups of local processes to share and map memory.
-Memory is controlled
-by system calls rather than special files, however,
-since a representation in the file system would imply that memory could
-be imported from remote machines.
-</P>
-<P>
-Despite these limitations, file systems and name spaces offer an effective
-model around which to build a distributed system.
-Used well, they can provide a uniform, familiar, transparent
-interface to a diverse set of distributed resources.
-They carry well-understood properties of access, protection,
-and naming.
-The integration of devices into the hierarchical file system
-was the best idea in UNIX.
-Plan 9 pushes the concepts much further and shows that
-file systems, when used inventively, have plenty of scope
-for productive research.
-</P>
-<H4>References
-</H4>
-<br>&#32;<br>
-[Killian] T. Killian, ``Processes as Files'', USENIX Summer Conf. Proc., Salt Lake City, 1984
-<br>
-[Needham] R. Needham, ``Names'', in
-Distributed systems,
-S. Mullender, ed.,
-Addison Wesley, 1989
-<br>
-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
-``Plan 9 from Bell Labs'',
-UKUUG Proc. of the Summer 1990 Conf.,
-London, England,
-1990
-<br>
-[Presotto] D. Presotto, ``Multiprocessor Streams for Plan 9'',
-UKUUG Proc. of the Summer 1990 Conf.,
-London, England,
-1990
-<br>
-[Pike91] Pike, R., ``8.5, The Plan 9 Window System'', USENIX Summer
-Conf. Proc., Nashville, 1991
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1379
sys/doc/net/net.html

@@ -1,1379 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The Organization of Networks in Plan 9
-</H1>
-<DL><DD><I>Dave Presotto<br>
-Phil Winterbottom<br>
-<br>&#32;<br>
-presotto,philw@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Originally appeared in
-Proc. of the Winter 1993 USENIX Conf.,
-pp. 271-280,
-San Diego, CA
-</I><DT>&#32;<DD></dl>
-<br>
-In a distributed system networks are of paramount importance. This
-paper describes the implementation, design philosophy, and organization
-of network support in Plan 9. Topics include network requirements
-for distributed systems, our kernel implementation, network naming, user interfaces,
-and performance. We also observe that much of this organization is relevant to
-current systems.
-</DL>
-<H4>1 Introduction
-</H4>
-<P>
-Plan 9 [Pike90] is a general-purpose, multi-user, portable distributed system
-implemented on a variety of computers and networks.
-What distinguishes Plan 9 is its organization.
-The goals of this organization were to
-reduce administration
-and to promote resource sharing. One of the keys to its success as a distributed
-system is the organization and management of its networks.
-</P>
-<P>
-A Plan 9 system comprises file servers, CPU servers and terminals.
-The file servers and CPU servers are typically centrally
-located multiprocessor machines with large memories and
-high speed interconnects.
-A variety of workstation-class machines
-serve as terminals
-connected to the central servers using several networks and protocols.
-The architecture of the system demands a hierarchy of network
-speeds matching the needs of the components.
-Connections between file servers and CPU servers are high-bandwidth point-to-point
-fiber links.
-Connections from the servers fan out to local terminals
-using medium speed networks
-such as Ethernet [Met80] and Datakit [Fra80].
-Low speed connections via the Internet and
-the AT&amp;T backbone serve users in Oregon and Illinois.
-Basic Rate ISDN data service and 9600 baud serial lines provide slow
-links to users at home.
-</P>
-<P>
-Since CPU servers and terminals use the same kernel,
-users may choose to run programs locally on
-their terminals or remotely on CPU servers.
-The organization of Plan 9 hides the details of system connectivity
-allowing both users and administrators to configure their environment
-to be as distributed or centralized as they wish.
-Simple commands support the
-construction of a locally represented name space
-spanning many machines and networks.
-At work, users tend to use their terminals like workstations,
-running interactive programs locally and
-reserving the CPU servers for data or compute intensive jobs
-such as compiling and computing chess endgames.
-At home or when connected over
-a slow network, users tend to do most work on the CPU server to minimize
-traffic on the slow links.
-The goal of the network organization is to provide the same
-environment to the user wherever resources are used.
-</P>
-<H4>2 Kernel Network Support
-</H4>
-<P>
-Networks play a central role in any distributed system. This is particularly
-true in Plan 9 where most resources are provided by servers external to the kernel.
-The importance of the networking code within the kernel
-is reflected by its size;
-of 25,000 lines of kernel code, 12,500 are network and protocol related.
-Networks are continually being added and the fraction of code
-devoted to communications
-is growing.
-Moreover, the network code is complex.
-Protocol implementations consist almost entirely of
-synchronization and dynamic memory management, areas demanding 
-subtle error recovery
-strategies.
-The kernel currently supports Datakit, point-to-point fiber links,
-an Internet (IP) protocol suite and ISDN data service.
-The variety of networks and machines
-has raised issues not addressed by other systems running on commercial
-hardware supporting only Ethernet or FDDI.
-</P>
-<H4>2.1 The File System protocol
-</H4>
-<P>
-A central idea in Plan 9 is the representation of a resource as a hierarchical
-file system.
-Each process assembles a view of the system by building a
-<I>name space</I>
-[Needham] connecting its resources.
-File systems need not represent disc files; in fact, most Plan 9 file systems have no
-permanent storage.
-A typical file system dynamically represents
-some resource like a set of network connections or the process table.
-Communication between the kernel, device drivers, and local or remote file servers uses a
-protocol called 9P. The protocol consists of 17 messages
-describing operations on files and directories.
-Kernel resident device and protocol drivers use a procedural version
-of the protocol while external file servers use an RPC form.
-Nearly all traffic between Plan 9 systems consists
-of 9P messages.
-9P relies on several properties of the underlying transport protocol.
-It assumes messages arrive reliably and in sequence and
-that delimiters between messages
-are preserved.
-When a protocol does not meet these
-requirements (for example, TCP does not preserve delimiters)
-we provide mechanisms to marshal messages before handing them
-to the system.
-</P>
-<P>
-A kernel data structure, the
-<I>channel</I>,
-is a handle to a file server.
-Operations on a channel generate the following 9P messages.
-The
-<TT>session</TT>
-and
-<TT>attach</TT>
-messages authenticate a connection, established by means external to 9P,
-and validate its user.
-The result is an authenticated
-channel
-referencing the root of the
-server.
-The
-<TT>clone</TT>
-message makes a new channel identical to an existing channel, much like
-the
-<TT>dup</TT>
-system call.
-A
-channel
-may be moved to a file on the server using a
-<TT>walk</TT>
-message to descend each level in the hierarchy.
-The
-<TT>stat</TT>
-and
-<TT>wstat</TT>
-messages read and write the attributes of the file referenced by a channel.
-The
-<TT>open</TT>
-message prepares a channel for subsequent
-<TT>read</TT>
-and
-<TT>write</TT>
-messages to access the contents of the file.
-<TT>Create</TT>
-and
-<TT>remove</TT>
-perform the actions implied by their names on the file
-referenced by the channel.
-The
-<TT>clunk</TT>
-message discards a channel without affecting the file.
-</P>
-<P>
-A kernel resident file server called the
-<I>mount driver</I>
-converts the procedural version of 9P into RPCs.
-The
-<I>mount</I>
-system call provides a file descriptor, which can be
-a pipe to a user process or a network connection to a remote machine, to
-be associated with the mount point.
-After a mount, operations
-on the file tree below the mount point are sent as messages to the file server.
-The
-mount
-driver manages buffers, packs and unpacks parameters from
-messages, and demultiplexes among processes using the file server.
-</P>
-<H4>2.2 Kernel Organization
-</H4>
-<P>
-The network code in the kernel is divided into three layers: hardware interface,
-protocol processing, and program interface.
-A device driver typically uses streams to connect the two interface layers.
-Additional stream modules may be pushed on
-a device to process protocols.
-Each device driver is a kernel-resident file system.
-Simple device drivers serve a single level
-directory containing just a few files;
-for example, we represent each UART
-by a data and a control file.
-<DL><DT><DD><TT><PRE>
-cpu% cd /dev
-cpu% ls -l eia*
---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia1
---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia1ctl
---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia2
---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia2ctl
-cpu%
-</PRE></TT></DL>
-The control file is used to control the device;
-writing the string
-<TT>b1200</TT>
-to
-<TT>/dev/eia1ctl</TT>
-sets the line to 1200 baud.
-</P>
-<P>
-Multiplexed devices present
-a more complex interface structure.
-For example, the LANCE Ethernet driver
-serves a two level file tree (Figure 1)
-providing
-</P>
-<DL COMPACT>
-<DT>*<DD>
-device control and configuration
-<DT>*<DD>
-user-level protocols like ARP
-<DT>*<DD>
-diagnostic interfaces for snooping software.
-</dl>
-<br>&#32;<br>
-The top directory contains a
-<TT>clone</TT>
-file and a directory for each connection, numbered
-<TT>1</TT>
-to
-<TT>n</TT>.
-Each connection directory corresponds to an Ethernet packet type.
-Opening the
-<TT>clone</TT>
-file finds an unused connection directory
-and opens its
-<TT>ctl</TT>
-file.
-Reading the control file returns the ASCII connection number; the user
-process can use this value to construct the name of the proper 
-connection directory.
-In each connection directory files named
-<TT>ctl</TT>,
-<TT>data</TT>,
-<TT>stats</TT>,
-and 
-<TT>type</TT>
-provide access to the connection.
-Writing the string
-<TT>connect 2048</TT>
-to the
-<TT>ctl</TT>
-file sets the packet type to 2048
-and
-configures the connection to receive
-all IP packets sent to the machine.
-Subsequent reads of the file
-<TT>type</TT>
-yield the string
-<TT>2048</TT>.
-The
-<TT>data</TT>
-file accesses the media;
-reading it
-returns the
-next packet of the selected type.
-Writing the file
-queues a packet for transmission after
-appending a packet header containing the source address and packet type.
-The
-<TT>stats</TT>
-file returns ASCII text containing the interface address,
-packet input/output counts, error statistics, and general information
-about the state of the interface.
-<DL><DT><DD><TT><PRE>
-<br><img src="data.7580.gif"><br>
-</PRE></TT></DL>
-If several connections on an interface
-are configured for a particular packet type, each receives a
-copy of the incoming packets.
-The special packet type
-<TT>-1</TT>
-selects all packets.
-Writing the strings
-<TT>promiscuous</TT>
-and
-<TT>connect</TT>
-<TT>-1</TT>
-to the
-<TT>ctl</TT>
-file
-configures a conversation to receive all packets on the Ethernet.
-<P>
-Although the driver interface may seem elaborate,
-the representation of a device as a set of files using ASCII strings for
-communication has several advantages.
-Any mechanism supporting remote access to files immediately
-allows a remote machine to use our interfaces as gateways.
-Using ASCII strings to control the interface avoids byte order problems and
-ensures a uniform representation for
-devices on the same machine and even allows devices to be accessed remotely.
-Representing dissimilar devices by the same set of files allows common tools
-to serve
-several networks or interfaces.
-Programs like
-<TT>stty</TT>
-are replaced by
-<TT>echo</TT>
-and shell redirection.
-</P>
-<H4>2.3 Protocol devices
-</H4>
-<P>
-Network connections are represented as pseudo-devices called protocol devices.
-Protocol device drivers exist for the Datakit URP protocol and for each of the
-Internet IP protocols TCP, UDP, and IL.
-IL, described below, is a new communication protocol used by Plan 9 for
-transmitting file system RPC's.
-All protocol devices look identical so user programs contain no
-network-specific code.
-</P>
-<P>
-Each protocol device driver serves a directory structure
-similar to that of the Ethernet driver.
-The top directory contains a
-<TT>clone</TT>
-file and a directory for each connection numbered
-<TT>0</TT>
-to
-<TT>n</TT>.
-Each connection directory contains files to control one
-connection and to send and receive information.
-A TCP connection directory looks like this:
-<DL><DT><DD><TT><PRE>
-cpu% cd /net/tcp/2
-cpu% ls -l
---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 ctl
---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 data
---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 listen
---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 local
---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 remote
---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 status
-cpu% cat local remote status
-135.104.9.31 5012
-135.104.53.11 564
-tcp/2 1 Established connect
-cpu%
-</PRE></TT></DL>
-The files
-<TT>local</TT>,
-<TT>remote</TT>,
-and
-<TT>status</TT>
-supply information about the state of the connection.
-The
-<TT>data</TT>
-and
-<TT>ctl</TT>
-files
-provide access to the process end of the stream implementing the protocol.
-The
-<TT>listen</TT>
-file is used to accept incoming calls from the network.
-</P>
-<P>
-The following steps establish a connection.
-</P>
-<DL COMPACT>
-<DT>1)<DD>
-The clone device of the
-appropriate protocol directory is opened to reserve an unused connection.
-<DT>2)<DD>
-The file descriptor returned by the open points to the
-<TT>ctl</TT>
-file of the new connection.
-Reading that file descriptor returns an ASCII string containing
-the connection number.
-<DT>3)<DD>
-A protocol/network specific ASCII address string is written to the
-<TT>ctl</TT>
-file.
-<DT>4)<DD>
-The path of the
-<TT>data</TT>
-file is constructed using the connection number.
-When the
-<TT>data</TT>
-file is opened the connection is established.
-</dl>
-<br>&#32;<br>
-A process can read and write this file descriptor
-to send and receive messages from the network.
-If the process opens the
-<TT>listen</TT>
-file it blocks until an incoming call is received.
-An address string written to the
-<TT>ctl</TT>
-file before the listen selects the
-ports or services the process is prepared to accept.
-When an incoming call is received, the open completes
-and returns a file descriptor
-pointing to the
-<TT>ctl</TT>
-file of the new connection.
-Reading the
-<TT>ctl</TT>
-file yields a connection number used to construct the path of the
-<TT>data</TT>
-file.
-A connection remains established while any of the files in the connection directory
-are referenced or until a close is received from the network.
-<H4>2.4 Streams
-</H4>
-<P>
-A
-<I>stream</I>
-[Rit84a][Presotto] is a bidirectional channel connecting a
-physical or pseudo-device to user processes.
-The user processes insert and remove data at one end of the stream.
-Kernel processes acting on behalf of a device insert data at
-the other end.
-Asynchronous communications channels such as pipes,
-TCP conversations, Datakit conversations, and RS232 lines are implemented using
-streams.
-</P>
-<P>
-A stream comprises a linear list of
-<I>processing modules</I>.
-Each module has both an upstream (toward the process) and
-downstream (toward the device)
-<I>put routine</I>.
-Calling the put routine of the module on either end of the stream
-inserts data into the stream.
-Each module calls the succeeding one to send data up or down the stream.
-</P>
-<P>
-An instance of a processing module is represented by a pair of
-<I>queues</I>,
-one for each direction.
-The queues point to the put procedures and can be used
-to queue information traveling along the stream.
-Some put routines queue data locally and send it along the stream at some
-later time, either due to a subsequent call or an asynchronous
-event such as a retransmission timer or a device interrupt.
-Processing modules create helper kernel processes to
-provide a context for handling asynchronous events.
-For example, a helper kernel process awakens periodically
-to perform any necessary TCP retransmissions.
-The use of kernel processes instead of serialized run-to-completion service routines
-differs from the implementation of Unix streams.
-Unix service routines cannot
-use any blocking kernel resource and they lack a local long-lived state.
-Helper kernel processes solve these problems and simplify the stream code.
-</P>
-<P>
-There is no implicit synchronization in our streams.
-Each processing module must ensure that concurrent processes using the stream
-are synchronized.
-This maximizes concurrency but introduces the
-possibility of deadlock.
-However, deadlocks are easily avoided by careful programming; to
-date they have not caused us problems.
-</P>
-<P>
-Information is represented by linked lists of kernel structures called
-<I>blocks</I>.
-Each block contains a type, some state flags, and pointers to
-an optional buffer.
-Block buffers can hold either data or control information, i.e., directives
-to the processing modules.
-Blocks and block buffers are dynamically allocated from kernel memory.
-</P>
-<H4>2.4.1 User Interface
-</H4>
-<P>
-A stream is represented at user level as two files, 
-<TT>ctl</TT>
-and
-<TT>data</TT>.
-The actual names can be changed by the device driver using the stream,
-as we saw earlier in the example of the UART driver.
-The first process to open either file creates the stream automatically.
-The last close destroys it.
-Writing to the
-<TT>data</TT>
-file copies the data into kernel blocks
-and passes them to the downstream put routine of the first processing module.
-A write of less than 32K is guaranteed to be contained by a single block.
-Concurrent writes to the same stream are not synchronized, although the
-32K block size assures atomic writes for most protocols.
-The last block written is flagged with a delimiter
-to alert downstream modules that care about write boundaries.
-In most cases the first put routine calls the second, the second
-calls the third, and so on until the data is output.
-As a consequence, most data is output without context switching.
-</P>
-<P>
-Reading from the
-<TT>data</TT>
-file returns data queued at the top of the stream.
-The read terminates when the read count is reached
-or when the end of a delimited block is encountered.
-A per stream read lock ensures only one process
-can read from a stream at a time and guarantees
-that the bytes read were contiguous bytes from the
-stream.
-</P>
-<P>
-Like UNIX streams [Rit84a],
-Plan 9 streams can be dynamically configured.
-The stream system intercepts and interprets
-the following control blocks:
-</P>
-<DL COMPACT>
-<DT><TT>push</TT> <I>name</I><DD>
-adds an instance of the processing module 
-<I>name</I>
-to the top of the stream.
-<DT><TT>pop</TT><DD>
-removes the top module of the stream.
-<DT><TT>hangup</TT><DD>
-sends a hangup message
-up the stream from the device end.
-</dl>
-<br>&#32;<br>
-Other control blocks are module-specific and are interpreted by each
-processing module
-as they pass.
-<P>
-The convoluted syntax and semantics of the UNIX
-<TT>ioctl</TT>
-system call convinced us to leave it out of Plan 9.
-Instead,
-<TT>ioctl</TT>
-is replaced by the
-<TT>ctl</TT>
-file.
-Writing to the
-<TT>ctl</TT>
-file
-is identical to writing to a
-<TT>data</TT>
-file except the blocks are of type
-<I>control</I>.
-A processing module parses each control block it sees.
-Commands in control blocks are ASCII strings, so
-byte ordering is not an issue when one system
-controls streams in a name space implemented on another processor.
-The time to parse control blocks is not important, since control
-operations are rare.
-</P>
-<H4>2.4.2 Device Interface
-</H4>
-<P>
-The module at the downstream end of the stream is part of a device interface.
-The particulars of the interface vary with the device.
-Most device interfaces consist of an interrupt routine, an output
-put routine, and a kernel process.
-The output put routine stages data for the
-device and starts the device if it is stopped.
-The interrupt routine wakes up the kernel process whenever
-the device has input to be processed or needs more output staged.
-The kernel process puts information up the stream or stages more data for output.
-The division of labor among the different pieces varies depending on
-how much must be done at interrupt level.
-However, the interrupt routine may not allocate blocks or call
-a put routine since both actions require a process context.
-</P>
-<H4>2.4.3 Multiplexing
-</H4>
-<P>
-The conversations using a protocol device must be
-multiplexed onto a single physical wire.
-We push a multiplexer processing module
-onto the physical device stream to group the conversations.
-The device end modules on the conversations add the necessary header
-onto downstream messages and then put them to the module downstream
-of the multiplexer.
-The multiplexing module looks at each message moving up its stream and
-puts it to the correct conversation stream after stripping
-the header controlling the demultiplexing.
-</P>
-<P>
-This is similar to the Unix implementation of multiplexer streams.
-The major difference is that we have no general structure that
-corresponds to a multiplexer.
-Each attempt to produce a generalized multiplexer created a more complicated
-structure and underlined the basic difficulty of generalizing this mechanism.
-We now code each multiplexer from scratch and favor simplicity over
-generality.
-</P>
-<H4>2.4.4 Reflections
-</H4>
-<P>
-Despite five year's experience and the efforts of many programmers,
-we remain dissatisfied with the stream mechanism.
-Performance is not an issue;
-the time to process protocols and drive
-device interfaces continues to dwarf the
-time spent allocating, freeing, and moving blocks
-of data.
-However the mechanism remains inordinately
-complex.
-Much of the complexity results from our efforts
-to make streams dynamically configurable, to
-reuse processing modules on different devices
-and to provide kernel synchronization
-to ensure data structures
-don't disappear under foot.
-This is particularly irritating since we seldom use these properties.
-</P>
-<P>
-Streams remain in our kernel because we are unable to
-devise a better alternative.
-Larry Peterson's X-kernel [Pet89a]
-is the closest contender but
-doesn't offer enough advantage to switch.
-If we were to rewrite the streams code, we would probably statically
-allocate resources for a large fixed number of conversations and burn
-memory in favor of less complexity.
-</P>
-<H4>3 The IL Protocol
-</H4>
-<P>
-None of the standard IP protocols is suitable for transmission of
-9P messages over an Ethernet or the Internet.
-TCP has a high overhead and does not preserve delimiters.
-UDP, while cheap, does not provide reliable sequenced delivery.
-Early versions of the system used a custom protocol that was
-efficient but unsatisfactory for internetwork transmission.
-When we implemented IP, TCP, and UDP we looked around for a suitable
-replacement with the following properties:
-</P>
-<DL COMPACT>
-<DT>*<DD>
-Reliable datagram service with sequenced delivery
-<DT>*<DD>
-Runs over IP
-<DT>*<DD>
-Low complexity, high performance
-<DT>*<DD>
-Adaptive timeouts
-</dl>
-<br>&#32;<br>
-None met our needs so a new protocol was designed.
-IL is a lightweight protocol designed to be encapsulated by IP.
-It is a connection-based protocol
-providing reliable transmission of sequenced messages between machines.
-No provision is made for flow control since the protocol is designed to transport RPC
-messages between client and server.
-A small outstanding message window prevents too
-many incoming messages from being buffered;
-messages outside the window are discarded
-and must be retransmitted.
-Connection setup uses a two way handshake to generate
-initial sequence numbers at each end of the connection;
-subsequent data messages increment the
-sequence numbers allowing
-the receiver to resequence out of order messages. 
-In contrast to other protocols, IL does not do blind retransmission.
-If a message is lost and a timeout occurs, a query message is sent.
-The query message is a small control message containing the current
-sequence numbers as seen by the sender.
-The receiver responds to a query by retransmitting missing messages.
-This allows the protocol to behave well in congested networks,
-where blind retransmission would cause further
-congestion.
-Like TCP, IL has adaptive timeouts.
-A round-trip timer is used
-to calculate acknowledge and retransmission times in terms of the network speed.
-This allows the protocol to perform well on both the Internet and on local Ethernets.
-<P>
-In keeping with the minimalist design of the rest of the kernel, IL is small.
-The entire protocol is 847 lines of code, compared to 2200 lines for TCP.
-IL is our protocol of choice.
-</P>
-<H4>4 Network Addressing
-</H4>
-<P>
-A uniform interface to protocols and devices is not sufficient to
-support the transparency we require.
-Since each network uses a different
-addressing scheme,
-the ASCII strings written to a control file have no common format.
-As a result, every tool must know the specifics of the networks it
-is capable of addressing.
-Moreover, since each machine supplies a subset
-of the available networks, each user must be aware of the networks supported
-by every terminal and server machine.
-This is obviously unacceptable.
-</P>
-<P>
-Several possible solutions were considered and rejected; one deserves
-more discussion.
-We could have used a user-level file server
-to represent the network name space as a Plan 9 file tree. 
-This global naming scheme has been implemented in other distributed systems.
-The file hierarchy provides paths to
-directories representing network domains.
-Each directory contains
-files representing the names of the machines in that domain;
-an example might be the path
-<TT>/net/name/usa/edu/mit/ai</TT>.
-Each machine file contains information like the IP address of the machine.
-We rejected this representation for several reasons.
-First, it is hard to devise a hierarchy encompassing all representations
-of the various network addressing schemes in a uniform manner.
-Datakit and Ethernet address strings have nothing in common.
-Second, the address of a machine is
-often only a small part of the information required to connect to a service on
-the machine.
-For example, the IP protocols require symbolic service names to be mapped into
-numeric port numbers, some of which are privileged and hence special.
-Information of this sort is hard to represent in terms of file operations.
-Finally, the size and number of the networks being represented burdens users with
-an unacceptably large amount of information about the organization of the network
-and its connectivity.
-In this case the Plan 9 representation of a
-resource as a file is not appropriate.
-</P>
-<P>
-If tools are to be network independent, a third-party server must resolve
-network names.
-A server on each machine, with local knowledge, can select the best network
-for any particular destination machine or service.
-Since the network devices present a common interface,
-the only operation which differs between networks is name resolution.
-A symbolic name must be translated to
-the path of the clone file of a protocol
-device and an ASCII address string to write to the
-<TT>ctl</TT>
-file.
-A connection server (CS) provides this service.
-</P>
-<H4>4.1 Network Database
-</H4>
-<P>
-On most systems several
-files such as
-<TT>/etc/hosts</TT>,
-<TT>/etc/networks</TT>,
-<TT>/etc/services</TT>,
-<TT>/etc/hosts.equiv</TT>,
-<TT>/etc/bootptab</TT>,
-and
-<TT>/etc/named.d</TT>
-hold network information.
-Much time and effort is spent
-administering these files and keeping
-them mutually consistent.
-Tools attempt to
-automatically derive one or more of the files from
-information in other files but maintenance continues to be
-difficult and error prone.
-</P>
-<P>
-Since we were writing an entirely new system, we were free to
-try a simpler approach.
-One database on a shared server contains all the information
-needed for network administration.
-Two ASCII files comprise the main database:
-<TT>/lib/ndb/local</TT>
-contains locally administered information and
-<TT>/lib/ndb/global</TT>
-contains information imported from elsewhere.
-The files contain sets of attribute/value pairs of the form
-<I>attr<TT>=</TT>value</I>,
-where
-<I>attr</I>
-and
-<I>value</I>
-are alphanumeric strings.
-Systems are described by multi-line entries;
-a header line at the left margin begins each entry followed by zero or more
-indented attribute/value pairs specifying
-names, addresses, properties, etc.
-For example, the entry for our CPU server
-specifies a domain name, an IP address, an Ethernet address,
-a Datakit address, a boot file, and supported protocols.
-<DL><DT><DD><TT><PRE>
-sys = helix
-	dom=helix.research.bell-labs.com
-	bootf=/mips/9power
-	ip=135.104.9.31 ether=0800690222f0
-	dk=nj/astro/helix
-	proto=il flavor=9cpu
-</PRE></TT></DL>
-If several systems share entries such as
-network mask and gateway, we specify that information
-with the network or subnetwork instead of the system.
-The following entries define a Class B IP network and 
-a few subnets derived from it.
-The entry for the network specifies the IP mask,
-file system, and authentication server for all systems
-on the network.
-Each subnetwork specifies its default IP gateway.
-<DL><DT><DD><TT><PRE>
-ipnet=mh-astro-net ip=135.104.0.0 ipmask=255.255.255.0
-	fs=bootes.research.bell-labs.com
-	auth=1127auth
-ipnet=unix-room ip=135.104.117.0
-	ipgw=135.104.117.1
-ipnet=third-floor ip=135.104.51.0
-	ipgw=135.104.51.1
-ipnet=fourth-floor ip=135.104.52.0
-	ipgw=135.104.52.1
-</PRE></TT></DL>
-Database entries also define the mapping of service names
-to port numbers for TCP, UDP, and IL.
-<DL><DT><DD><TT><PRE>
-tcp=echo	port=7
-tcp=discard	port=9
-tcp=systat	port=11
-tcp=daytime	port=13
-</PRE></TT></DL>
-</P>
-<P>
-All programs read the database directly so
-consistency problems are rare.
-However the database files can become large.
-Our global file, containing all information about
-both Datakit and Internet systems in AT&amp;T, has 43,000
-lines.
-To speed searches, we build hash table files for each
-attribute we expect to search often.
-The hash file entries point to entries
-in the master files.
-Every hash file contains the modification time of its master
-file so we can avoid using an out-of-date hash table.
-Searches for attributes that aren't hashed or whose hash table
-is out-of-date still work, they just take longer.
-</P>
-<H4>4.2 Connection Server
-</H4>
-<P>
-On each system a user level connection server process, CS, translates
-symbolic names to addresses.
-CS uses information about available networks, the network database, and
-other servers (such as DNS) to translate names.
-CS is a file server serving a single file,
-<TT>/net/cs</TT>.
-A client writes a symbolic name to
-<TT>/net/cs</TT>
-then reads one line for each matching destination reachable
-from this system.
-The lines are of the form
-<I>filename message</I>,
-where
-<I>filename</I>
-is the path of the clone file to open for a new connection and
-<I>message</I>
-is the string to write to it to make the connection.
-The following example illustrates this.
-<TT>Ndb/csquery</TT>
-is a program that prompts for strings to write to
-<TT>/net/cs</TT>
-and prints the replies.
-<DL><DT><DD><TT><PRE>
-% ndb/csquery
-&#62; net!helix!9fs
-/net/il/clone 135.104.9.31!17008
-/net/dk/clone nj/astro/helix!9fs
-</PRE></TT></DL>
-</P>
-<P>
-CS provides meta-name translation to perform complicated
-searches.
-The special network name
-<TT>net</TT>
-selects any network in common between source and
-destination supporting the specified service.
-A host name of the form <TT>$</TT><I>attr</I>
-is the name of an attribute in the network database.
-The database search returns the value
-of the matching attribute/value pair
-most closely associated with the source host.
-Most closely associated is defined on a per network basis.
-For example, the symbolic name
-<TT>tcp!$auth!rexauth</TT>
-causes CS to search for the
-<TT>auth</TT>
-attribute in the database entry for the source system, then its
-subnetwork (if there is one) and then its network.
-<DL><DT><DD><TT><PRE>
-% ndb/csquery
-&#62; net!$auth!rexauth
-/net/il/clone 135.104.9.34!17021
-/net/dk/clone nj/astro/p9auth!rexauth
-/net/il/clone 135.104.9.6!17021
-/net/dk/clone nj/astro/musca!rexauth
-</PRE></TT></DL>
-</P>
-<P>
-Normally CS derives naming information from its database files.
-For domain names however, CS first consults another user level
-process, the domain name server (DNS).
-If no DNS is reachable, CS relies on its own tables.
-</P>
-<P>
-Like CS, the domain name server is a user level process providing
-one file,
-<TT>/net/dns</TT>.
-A client writes a request of the form
-<I>domain-name type</I>,
-where
-<I>type</I>
-is a domain name service resource record type.
-DNS performs a recursive query through the
-Internet domain name system producing one line
-per resource record found.  The client reads
-<TT>/net/dns</TT>
-to retrieve the records.
-Like other domain name servers, DNS caches information
-learned from the network.
-DNS is implemented as a multi-process shared memory application
-with separate processes listening for network and local requests.
-</P>
-<H4>5 Library routines
-</H4>
-<P>
-The section on protocol devices described the details
-of making and receiving connections across a network.
-The dance is straightforward but tedious.
-Library routines are provided to relieve
-the programmer of the details.
-</P>
-<H4>5.1 Connecting
-</H4>
-<P>
-The
-<TT>dial</TT>
-library call establishes a connection to a remote destination.
-It
-returns an open file descriptor for the
-<TT>data</TT>
-file in the connection directory.
-<DL><DT><DD><TT><PRE>
-int  dial(char *dest, char *local, char *dir, int *cfdp)
-</PRE></TT></DL>
-</P>
-<DL COMPACT>
-<DT><TT>dest</TT><DD>
-is the symbolic name/address of the destination.
-<DT><TT>local</TT><DD>
-is the local address.
-Since most networks do not support this, it is
-usually zero.
-<DT><TT>dir</TT><DD>
-is a pointer to a buffer to hold the path name of the protocol directory
-representing this connection.
-<TT>Dial</TT>
-fills this buffer if the pointer is non-zero.
-<DT><TT>cfdp</TT><DD>
-is a pointer to a file descriptor for the
-<TT>ctl</TT>
-file of the connection.
-If the pointer is non-zero,
-<TT>dial</TT>
-opens the control file and tucks the file descriptor here.
-</dl>
-<br>&#32;<br>
-Most programs call
-<TT>dial</TT>
-with a destination name and all other arguments zero.
-<TT>Dial</TT>
-uses CS to
-translate the symbolic name to all possible destination addresses
-and attempts to connect to each in turn until one works.
-Specifying the special name
-<TT>net</TT>
-in the network portion of the destination
-allows CS to pick a network/protocol in common
-with the destination for which the requested service is valid.
-For example, assume the system
-<TT>research.bell-labs.com</TT>
-has the Datakit address
-<TT>nj/astro/research</TT>
-and IP addresses
-<TT>135.104.117.5</TT>
-and
-<TT>129.11.4.1</TT>.
-The call
-<DL><DT><DD><TT><PRE>
-fd = dial("net!research.bell-labs.com!login", 0, 0, 0, 0);
-</PRE></TT></DL>
-tries in succession to connect to
-<TT>nj/astro/research!login</TT>
-on the Datakit and both
-<TT>135.104.117.5!513</TT>
-and
-<TT>129.11.4.1!513</TT>
-across the Internet.
-<P>
-<TT>Dial</TT>
-accepts addresses instead of symbolic names.
-For example, the destinations
-<TT>tcp!135.104.117.5!513</TT>
-and
-<TT>tcp!research.bell-labs.com!login</TT>
-are equivalent
-references to the same machine.
-</P>
-<H4>5.2 Listening
-</H4>
-<P>
-A program uses
-four routines to listen for incoming connections.
-It first
-<TT>announce()</TT>s
-its intention to receive connections,
-then
-<TT>listen()</TT>s
-for calls and finally
-<TT>accept()</TT>s
-or
-<TT>reject()</TT>s
-them.
-<TT>Announce</TT>
-returns an open file descriptor for the
-<TT>ctl</TT>
-file of a connection and fills
-<TT>dir</TT>
-with the
-path of the protocol directory
-for the announcement.
-<DL><DT><DD><TT><PRE>
-int  announce(char *addr, char *dir)
-</PRE></TT></DL>
-<TT>Addr</TT>
-is the symbolic name/address announced;
-if it does not contain a service, the announcement is for
-all services not explicitly announced.
-Thus, one can easily write the equivalent of the
-<TT>inetd</TT>
-program without
-having to announce each separate service.
-An announcement remains in force until the control file is
-closed.
-</P>
-<br>&#32;<br>
-<TT>Listen</TT>
-returns an open file descriptor for the
-<TT>ctl</TT>
-file and fills
-<TT>ldir</TT>
-with the path
-of the protocol directory
-for the received connection.
-It is passed
-<TT>dir</TT>
-from the announcement.
-<DL><DT><DD><TT><PRE>
-int  listen(char *dir, char *ldir)
-</PRE></TT></DL>
-<br>&#32;<br>
-<TT>Accept</TT>
-and
-<TT>reject</TT>
-are called with the control file descriptor and
-<TT>ldir</TT>
-returned by
-<TT>listen.</TT>
-Some networks such as Datakit accept a reason for a rejection;
-networks such as IP ignore the third argument.
-<DL><DT><DD><TT><PRE>
-int  accept(int ctl, char *ldir)
-int  reject(int ctl, char *ldir, char *reason)
-</PRE></TT></DL>
-<P>
-The following code implements a typical TCP listener.
-It announces itself, listens for connections, and forks a new
-process for each.
-The new process echoes data on the connection until the
-remote end closes it.
-The "*" in the symbolic name means the announcement is valid for
-any addresses bound to the machine the program is run on.
-<DL><DT><DD><TT><PRE>
-int
-echo_server(void)
-{
-	int dfd, lcfd;
-	char adir[40], ldir[40];
-	int n;
-	char buf[256];
-
-	afd = announce("tcp!*!echo", adir);
-	if(afd &#60; 0)
-		return -1;
-
-	for(;;){
-		/* listen for a call */
-		lcfd = listen(adir, ldir);
-		if(lcfd &#60; 0)
-			return -1;
-
-		/* fork a process to echo */
-		switch(fork()){
-		case 0:
-			/* accept the call and open the data file */
-			dfd = accept(lcfd, ldir);
-			if(dfd &#60; 0)
-				return -1;
-
-			/* echo until EOF */
-			while((n = read(dfd, buf, sizeof(buf))) &#62; 0)
-				write(dfd, buf, n);
-			exits(0);
-		case -1:
-			perror("forking");
-		default:
-			close(lcfd);
-			break;
-		}
-
-	}
-}
-</PRE></TT></DL>
-</P>
-<H4>6 User Level
-</H4>
-<P>
-Communication between Plan 9 machines is done almost exclusively in
-terms of 9P messages. Only the two services
-<TT>cpu</TT>
-and
-<TT>exportfs</TT>
-are used.
-The
-<TT>cpu</TT>
-service is analogous to
-<TT>rlogin</TT>.
-However, rather than emulating a terminal session
-across the network,
-<TT>cpu</TT>
-creates a process on the remote machine whose name space is an analogue of the window
-in which it was invoked.
-<TT>Exportfs</TT>
-is a user level file server which allows a piece of name space to be
-exported from machine to machine across a network. It is used by the
-<TT>cpu</TT>
-command to serve the files in the terminal's name space when they are
-accessed from the
-cpu server.
-</P>
-<P>
-By convention, the protocol and device driver file systems are mounted in a
-directory called
-<TT>/net</TT>.
-Although the per-process name space allows users to configure an
-arbitrary view of the system, in practice their profiles build
-a conventional name space.
-</P>
-<H4>6.1 Exportfs
-</H4>
-<P>
-<TT>Exportfs</TT>
-is invoked by an incoming network call.
-The
-<I>listener</I>
-(the Plan 9 equivalent of
-<TT>inetd</TT>)
-runs the profile of the user
-requesting the service to construct a name space before starting
-<TT>exportfs</TT>.
-After an initial protocol
-establishes the root of the file tree being
-exported,
-the remote process mounts the connection,
-allowing
-<TT>exportfs</TT>
-to act as a relay file server. Operations in the imported file tree
-are executed on the remote server and the results returned.
-As a result
-the name space of the remote machine appears to be exported into a
-local file tree.
-</P>
-<P>
-The
-<TT>import</TT>
-command calls
-<TT>exportfs</TT>
-on a remote machine, mounts the result in the local name space,
-and
-exits.
-No local process is required to serve mounts;
-9P messages are generated by the kernel's mount driver and sent
-directly over the network.
-</P>
-<P>
-<TT>Exportfs</TT>
-must be multithreaded since the system calls
-<TT>open,</TT>
-<TT>read</TT>
-and
-<TT>write</TT>
-may block.
-Plan 9 does not implement the 
-<TT>select</TT>
-system call but does allow processes to share file descriptors,
-memory and other resources.
-<TT>Exportfs</TT>
-and the configurable name space
-provide a means of sharing resources between machines.
-It is a building block for constructing complex name spaces
-served from many machines.
-</P>
-<P>
-The simplicity of the interfaces encourages naive users to exploit the potential
-of a richly connected environment.
-Using these tools it is easy to gateway between networks.
-For example a terminal with only a Datakit connection can import from the server
-<TT>helix</TT>:
-<DL><DT><DD><TT><PRE>
-import -a helix /net
-telnet ai.mit.edu
-</PRE></TT></DL>
-The
-<TT>import</TT>
-command makes a Datakit connection to the machine
-<TT>helix</TT>
-where
-it starts an instance
-<TT>exportfs</TT>
-to serve
-<TT>/net</TT>.
-The
-<TT>import</TT>
-command mounts the remote
-<TT>/net</TT>
-directory after (the
-<TT>-a</TT>
-option to
-<TT>import</TT>)
-the existing contents
-of the local
-<TT>/net</TT>
-directory.
-The directory contains the union of the local and remote contents of
-<TT>/net</TT>.
-Local entries supersede remote ones of the same name so
-networks on the local machine are chosen in preference
-to those supplied remotely.
-However, unique entries in the remote directory are now visible in the local
-<TT>/net</TT>
-directory.
-All the networks connected to
-<TT>helix</TT>,
-not just Datakit,
-are now available in the terminal. The effect on the name space is shown by the following
-example:
-<DL><DT><DD><TT><PRE>
-philw-gnot% ls /net
-/net/cs
-/net/dk
-philw-gnot% import -a musca /net
-philw-gnot% ls /net
-/net/cs
-/net/cs
-/net/dk
-/net/dk
-/net/dns
-/net/ether
-/net/il
-/net/tcp
-/net/udp
-</PRE></TT></DL>
-</P>
-<H4>6.2 Ftpfs
-</H4>
-<P>
-We decided to make our interface to FTP
-a file system rather than the traditional command.
-Our command,
-<I>ftpfs,</I>
-dials the FTP port of a remote system, prompts for login and password, sets image mode,
-and mounts the remote file system onto
-<TT>/n/ftp</TT>.
-Files and directories are cached to reduce traffic.
-The cache is updated whenever a file is created.
-Ftpfs works with TOPS-20, VMS, and various Unix flavors
-as the remote system.
-</P>
-<H4>7 Cyclone Fiber Links
-</H4>
-<P>
-The file servers and CPU servers are connected by
-high-bandwidth
-point-to-point links.
-A link consists of two VME cards connected by a pair of optical
-fibers.
-The VME cards use 33MHz Intel 960 processors and AMD's TAXI
-fiber transmitter/receivers to drive the lines at 125 Mbit/sec.
-Software in the VME card reduces latency by copying messages from system memory
-to fiber without intermediate buffering.
-</P>
-<H4>8 Performance
-</H4>
-<P>
-We measured both latency and throughput
-of reading and writing bytes between two processes
-for a number of different paths.
-Measurements were made on two- and four-CPU SGI Power Series processors.
-The CPUs are 25 MHz MIPS 3000s.
-The latency is measured as the round trip time
-for a byte sent from one process to another and
-back again.
-Throughput is measured using 16k writes from
-one process to another.
-<DL><DT><DD><TT><PRE>
-<br><img src="data.7581.gif"><br>
-</PRE></TT></DL>
-</P>
-<H4>9 Conclusion
-</H4>
-<P>
-The representation of all resources as file systems
-coupled with an ASCII interface has proved more powerful
-than we had originally imagined.
-Resources can be used by any computer in our networks
-independent of byte ordering or CPU type.
-The connection server provides an elegant means
-of decoupling tools from the networks they use.
-Users successfully use Plan 9 without knowing the
-topology of the system or the networks they use.
-More information about 9P can be found in the Section 5 of the Plan 9 Programmer's
-Manual, Volume I.
-</P>
-<H4>10 References
-</H4>
-<br>&#32;<br>
-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
-``Plan 9 from Bell Labs'',
-UKUUG Proc. of the Summer 1990 Conf. ,
-London, England,
-1990.
-<br>&#32;<br>
-[Needham] R. Needham, ``Names'', in
-Distributed systems,
-S. Mullender, ed.,
-Addison Wesley, 1989.
-<br>&#32;<br>
-[Presotto] D. Presotto, ``Multiprocessor Streams for Plan 9'',
-UKUUG Proc. of the Summer 1990 Conf. ,
-London, England, 1990.
-<br>&#32;<br>
-[Met80] R. Metcalfe, D. Boggs, C. Crane, E. Taf and J. Hupp, ``The
-Ethernet Local Network: Three reports'',
-CSL-80-2,
-XEROX Palo Alto Research Center, February 1980.
-<br>&#32;<br>
-[Fra80] A. G. Fraser, ``Datakit - A Modular Network for Synchronous
-and Asynchronous Traffic'', 
-Proc. Int'l Conf. on Communication,
-Boston, June 1980.
-<br>&#32;<br>
-[Pet89a] L. Peterson, ``RPC in the X-Kernel: Evaluating new Design Techniques'',
-Proc. Twelfth Symp. on Op. Sys. Princ.,
-Litchfield Park, AZ, December 1990.
-<br>&#32;<br>
-[Rit84a] D. M. Ritchie, ``A Stream Input-Output System'',
-AT&amp;T Bell Laboratories Technical Journal, 68(8),
-October 1984.
-
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1406
sys/doc/plumb.html

@@ -1,1406 +0,0 @@
-<html>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Plumbing and Other Utilities
-</H1>
-<DL><DD><I>Rob Pike<br>
-Bell Laboratories, Murray Hill, NJ, 07974
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<br>&#32;<br>
-Plumbing is a new mechanism for inter-process communication in Plan 9,
-specifically the passing of messages between interactive programs as part of
-the user interface.
-Although plumbing shares some properties with familiar notions
-such as cut and paste,
-it offers a more general data exchange mechanism without imposing
-a particular user interface.
-<br>&#32;<br>
-The core of the plumbing system is a program called the
-<I>plumber</I>,
-which handles all messages and dispatches and reformats them
-according to configuration rules written in a special-purpose language.
-This approach allows the contents and context of a piece of data to define how
-it is handled.
-Unlike with drag and drop or cut and paste,
-the user doesn't need to deliver the data;
-the contents of a plumbing message, as interpreted by the plumbing rules,
-determine its destination.
-<br>&#32;<br>
-The plumber has an unusual architecture: it is a language-driven file server.
-This design has distinct advantages.
-It makes plumbing easy to add to an existing, Unix-like command environment;
-it guarantees uniform handling of inter-application messages;
-it off-loads from those applications most of the work of extracting and dispatching messages;
-and it works transparently across a network.
-</DL>
-<H4>Introduction
-</H4>
-<br>&#32;<br>
-Data moves from program to program in myriad ways.
-Command-line arguments,
-shell pipe lines,
-cut and paste,
-drag and drop, and other user interface techniques all provide some form
-of interprocess communication.
-Then there are tricks associated with special domains,
-such as HTML hyperlinks or the heuristics mail readers
-use to highlight URLs embedded in mail messages.
-Some systems provide implicit ways to automate the attachment of program to data&#173;the
-best known examples are probably the resource forks in MacOS and the
-file name extension `associations' in Microsoft Windows&#173;but in practice
-humans must too often carry their data from program to program.
-<br>&#32;<br>
-Why should a human do the work?
-Usually there is one obvious thing to do with a piece of data,
-and the data itself suggests what this is.
-Resource forks and associations speak to this issue directly, but statically and narrowly and with
-little opportunity to control the behavior.
-Mechanisms with more generality,
-such as cut and paste or drag and drop, demand too much manipulation by
-the user and are (therefore) too error-prone.
-<br>&#32;<br>
-We want a system that, given a piece of data,
-hands it to the appropriate application by default with little or no human intervention,
-while still permitting the user to override the defaults if desired.
-<br>&#32;<br>
-The plumbing system is an attempt to address some of these issues in a single,
-coherent, central way.
-It provides a mechanism for
-formatting and sending arbitrary messages between applications,
-typically interactive programs such as text editors, web browsers, and the window system,
-under the control of a central message-handling server called the
-<I>plumber</I>.
-Interactive programs provide application-specific connections to the plumber,
-triggering with minimal user action the transfer of data or control to other programs.
-The result is similar to a hypertext system in which all the links are implicit,
-extracted automatically by examining the data and the user's actions.
-It obviates
-cut and paste and other such hand-driven interprocess communication mechanisms.
-Plumbing delivers the goods to the right place automatically.
-<H4>Overview
-</H4>
-<br>&#32;<br>
-The plumber is implemented as a Plan 9 file server [Pike93];
-programs send messages by writing them to the plumber's file
-<TT>/mnt/plumb/send</TT>,
-and receive messages by reading them from
-<I>ports</I>,
-which are other plumber files in
-<TT>/mnt/plumb</TT>.
-For example,
-<TT>/mnt/plumb/edit</TT>
-is by convention the file from which a text editor reads messages requesting it to
-open and display a file for editing.
-(See Figure 1.)
-<DL><DT><DD><TT><PRE>
-<br><img src="-.19117390.gif"><br>
-</PRE></TT></DL>
-<DL>
-<DT><DT>&#32;<DD>
-Figure 1. The plumber controls the flow of messages between applications.
-Programs write to the file
-<TT>send</TT>
-and receive on `ports' of various names representing services such as
-<TT>edit</TT>
-or
-<TT>web</TT>.
-Although the figure doesn't illustrate it, some programs may both send and receive messages,
-and some ports are read by multiple applications.
-<br>&#32;<br>
-</dl>
-<br>&#32;<br>
-The plumber takes messages from the
-<TT>send</TT>
-file and interprets their contents using rules defined by
-a special-purpose pattern-action language.
-The language specifies any rewriting of the message that is to be done by the plumber
-and defines how to dispose of a message, such as by sending it to a port or
-starting a new process to handle it.
-<br>&#32;<br>
-The behavior is best described by example.
-Imagine that the user has, in a terminal emulator window,
-just run a compilation that has failed:
-<DL><DT><DD><TT><PRE>
-% make
-cc -c rmstar.c
-rmstar.c:32: syntax error
-...
-</PRE></TT></DL>
-The user points the typing cursor somewhere in the string
-<TT>rmstar.c:32:</TT>
-and executes the
-<TT>plumb</TT>
-menu entry.
-This causes the terminal emulator to format a plumbing message
-containing the entire string surrounding the cursor,
-<TT>rmstar:32:</TT>,
-and to write it to
-<TT>/mnt/plumb/send</TT>.
-The plumber receives this message and compares it sequentially to the various
-patterns in its configuration.
-Eventually, it will find one that breaks the string into pieces,
-<TT>rmstar.c</TT>,
-a colon,
-<TT>32</TT>,
-and the final colon.
-Other associated patterns verify that
-<TT>rmstar.c</TT>
-is a file in the current directory of the program generating
-the message, and that
-<TT>32</TT>
-looks like a line number within it.
-The plumber rewrites the message,
-setting the data to the string
-<TT>rmstar.c</TT>
-and attaching an indication that
-<TT>32</TT>
-is a line number to display.
-Finally, it sends the resulting message to the
-<TT>edit</TT>
-port.
-The text editor picks up the message, opens
-<TT>rmstar.c</TT>
-(if it's not already open) and highlights line 32, the location of the syntax error.
-<br>&#32;<br>
-From the user's point of view, this process is simple: the error message appears,
-it is `plumbed', and the editor jumps to the problem.
-<br>&#32;<br>
-Of course, there are many different ways to cause compiler messages to
-pop up the source of an error,
-but the design of the plumber addresses more general issues than the specific
-goal of shortening the compile/debug/edit cycle.
-It facilitates the general exchange of data among programs, interactive or otherwise,
-throughout the environment, and its
-architecture&#173;a central, language-driven file server&#173;although
-unusual, has distinct advantages.
-It makes plumbing easy to add to an existing, Unix-like command environment;
-it guarantees uniform handling of inter-application messages;
-it off-loads from those applications most of the work of extracting and dispatching messages;
-and it works transparently and effortlessly across a network.
-<br>&#32;<br>
-This paper is organized bottom-up, beginning with the format of the messages
-and proceeding through the plumbing language, the handling of messages,
-and the interactive user interface.
-The last sections discuss the implications of the design
-and compare the plumbing system to other environments that
-provide similar services.
-<H4>Format of messages
-</H4>
-<br>&#32;<br>
-Since the language that controls the plumber is defined in terms of the
-contents of plumbing messages, we begin by describing their layout.
-<br>&#32;<br>
-Plumbing messages have a fixed-format textual
-header followed by a free-format data section.
-The header consists of six lines of text, in set order,
-each specifying a property of the message.
-Any line may be blank except the last, which is the length of the data portion of the
-message, as a decimal string.
-The lines are, in order:
-<DL>
-<DT><DT>&#32;<DD>
-The source application, the name of the program generating the message.
-<DT><DT>&#32;<DD>
-The destination port, the name of the port to which the messages should be sent.
-<DT><DT>&#32;<DD>
-The working directory in which the message was generated.
-<DT><DT>&#32;<DD>
-The type of the data, analogous to a MIME type, such as
-<TT>text</TT>
-or
-<TT>image/gif</TT>.
-<DT><DT>&#32;<DD>
-Attributes of the message, given as blank-separated
-<I>name</I><TT>=</TT><I>value</I><TT>
-pairs.
-The values may be quoted to protect
-blanks or quotes; values may not contain newlines.
-<DT><DT>&#32;<DD>
-The length of the data section, in bytes.
-</dl>
-</TT><br>&#32;<br>
-Here is a sample message, one that (conventionally) tells the editor to open the file
-<TT>/usr/rob/src/mem.c</TT>
-and display line
-27 within it:
-<DL><DT><DD><TT><PRE>
-plumbtest
-edit
-/usr/rob/src
-text
-addr=27
-5
-mem.c
-</PRE></TT></DL>
-Because in general it need not be text, the data section of the message has no terminating newline.
-<br>&#32;<br>
-A library interface simplifies the processing of messages by translating them
-to and from a data structure,
-<TT>Plumbmsg</TT>,
-defined like this:
-<DL><DT><DD><TT><PRE>
-typedef struct Plumbattr Plumbattr;
-typedef struct Plumbmsg  Plumbmsg;
-
-struct Plumbmsg
-{
-	char			*src;		/* source application */
-	char			*dst;		/* destination port */
-	char			*wdir;	/* working directory */
-	char			*type;	/* type of data */
-	Plumbattr	*attr;	/* attribute list */
-	int			ndata;	/* #bytes of data */
-	char			*data;
-};
-
-struct Plumbattr
-{
-	char			*name;
-	char			*value;
-	Plumbattr	*next;
-};
-</PRE></TT></DL>
-The library also includes routines to send a message, receive a message,
-manipulate the attribute list, and so on.
-<H4>The Language
-</H4>
-<br>&#32;<br>
-An instance of the plumber runs for each user on each terminal or workstation.
-It
-begins by reading its rules from the file
-<TT>lib/plumbing</TT>
-in the user's home directory,
-which in turn may use
-<TT>include</TT>
-statements to interpolate macro definitions and
-rules from standard plumbing rule libraries stored in
-<TT>/sys/lib/plumb</TT>.
-<br>&#32;<br>
-The rules control the processing of messages.
-They are written in
-a pattern-action language comprising a sequence of blank-line-separated
-<I>rule</I>
-<I>sets</I>,
-each of which contains one or more
-<I>patterns</I>
-followed by one or more
-<I>actions</I>.
-Each incoming message is compared against the rule sets in order.
-If all the patterns within a rule set succeed,
-one of the associated actions is taken and processing completes.
-<br>&#32;<br>
-The syntax of the language is straightforward.
-Each rule (pattern or action) has three components, separated by white space:
-an
-<I>object</I>,
-a
-<I>verb</I>,
-and optional
-<I>arguments</I>.
-The object
-identifies a part of the message, such as
-the source application
-(<TT>src</TT>),
-or the data
-portion of the message
-(<TT>data</TT>),
-or the rule's own arguments
-(<TT>arg</TT>);
-or it is the keyword
-<TT>plumb</TT>,
-which introduces an action.
-The verb specifies an operation to perform on the object, such as the word
-`<TT>is</TT>'
-to require precise equality between the object and the argument, or
-`<TT>isdir</TT>'
-to require that the object be the name of a directory.
-<br>&#32;<br>
-For instance, this rule set sends messages containing the names of files
-ending in
-<TT>.gif</TT>,
-<TT>.jpg</TT>,
-etc. to a program,
-<TT>page</TT>,
-to display them; it is analogous to a Windows association rule:
-<DL><DT><DD><TT><PRE>
-# image files go to page
-type is text
-data matches '[a-zA-Z0-9_\-./]+'
-data matches '([a-zA-Z0-9_\-./]+)\.(jpe?g|gif|bit|tiff|ppm)'
-arg isfile <I>0
-plumb to image
-plumb client page -wi
-</PRE></TT></DL>
-(Lines beginning with
-</I><TT>#</TT><I>
-are commentary.)
-Consider how this rule handles the following message, annotated down the left column for clarity:
-<DL><DT><DD><TT><PRE>
-</I><I>src</I><I>	plumbtest
-</I><I>dst</I><I>
-</I><I>wdir</I><I>	/usr/rob/pics
-</I><I>type</I><I>	text
-</I><I>attr</I><I>
-</I><I>ndata</I><I>	9
-</I><I>data</I><I>	horse.gif
-</PRE></TT></DL>
-The
-</I><TT>is</TT><I>
-verb specifies a precise match, and the
-</I><TT>type</TT><I>
-field of the message is the string
-</I><TT>text</TT><I>,
-so the first pattern succeeds.
-The
-</I><TT>matches</TT><I>
-verb invokes a regular expression pattern match of the object (here
-</I><TT>data</TT><I>)
-against the argument pattern.
-Both
-</I><TT>matches</TT><I>
-patterns in this rule set will succeed, and in the process set the variables
-</I><TT></TT><I>0</I><TT>
-to the matched string,
-</TT><TT></TT><I>1</I><TT>
-to the first parenthesized submatch, and so on (analogous to
-</TT><TT>&</TT><TT>,
-</TT><TT>\1</TT><TT>,
-etc. in
-</TT><TT>ed</TT><TT>'s
-regular expressions).
-The pattern
-</TT><TT>arg</TT><TT>
-</TT><TT>isfile</TT><TT>
-</TT><TT></TT><TT>0</TT><TT>
-verifies that the named file,
-</TT><TT>horse.gif</TT><TT>,
-is an actual file in the directory
-</TT><TT>/usr/rob/pics</TT><TT>.
-If all the patterns succeed, one of the actions will be executed.
-</TT><br>&#32;<br>
-There are two actions in this rule set.
-The
-<TT>plumb</TT>
-<TT>to</TT>
-rule specifies
-<TT>image</TT>
-as the destination port of the message.
-By convention, the plumber mounts its services in the directory
-<TT>/mnt/plumb</TT>,
-so in this case if the file
-<TT>/mnt/plumb/image</TT>
-has been opened, the message will be made available to the program reading from it.
-Note that the message does not name a port, but the rule set that matches
-the message does, and that is sufficient to dispatch the message.
-If on the other hand a message matches no rule but has an explicit port mentioned,
-that too is sufficient.
-<br>&#32;<br>
-If no client has opened the
-<TT>image</TT>
-port,
-that is, if the program
-<TT>page</TT>
-is not already running, the
-<TT>plumb</TT>
-<TT>client</TT>
-action gives the execution script to start the application
-and send the message on its way; the
-<TT>-wi</TT>
-arguments tell
-<TT>page</TT>
-to create a window and to receive its initial arguments from the plumbing port.
-The process by which the plumber starts a program is described in more detail in the next section.
-<br>&#32;<br>
-It may seem odd that there are two
-<TT>matches</TT>
-rules in this example.
-The reason is related to the way the plumber can use the rules themselves
-to refine the
-<I>data</I>
-in the message, somewhat in the manner of Structural Regular Expressions [Pike87a].
-For example, consider what happens if the cursor is at the last character of
-<DL><DT><DD><TT><PRE>
-% make nightmare&gt;horse.gif
-</PRE></TT></DL>
-and the user asks to plumb what the cursor is pointing at.
-The program creating the plumbing
-message&#173;in this case the terminal emulator running the window&#173;can send the
-entire white-space-delimited string
-<TT>nightmare&gt;horse.gif</TT>
-or even the entire line, and the combination of
-<TT>matches</TT>
-rules can determine that the user was referring to the string
-<TT>horse.gif</TT>.
-The user could of course select the entire string
-<TT>horse.gif</TT>,
-but it's more convenient just to point in the general location and let the machine
-figure out what should be done.
-The process is as follows.
-<br>&#32;<br>
-The application generating the message adds a special attribute to the message, named
-<TT>click</TT>,
-whose numerical value is the offset of the cursor&#173;the selection point&#173;within the data string.
-This attribute tells the plumber two things:
-first, that the regular expressions in
-<TT>matches</TT>
-rules should be used to identify the relevant data;
-and second, approximately where the relevant data lies.
-The plumber 
-will then use the first
-<TT>matches</TT>
-pattern to identify the longest leftmost match that touches the cursor, which will extract the string
-<TT>horse.gif</TT>,
-and the second pattern will then verify that that names a picture file.
-The rule set succeeds and the data is winnowed to the matching substring
-before being sent to its destination.
-<br>&#32;<br>
-Each
-<TT>matches</TT>
-pattern within a given rule set must match the same portion of the string, which
-guarantees that the rule set fails to match a string for which the
-second pattern matches only a portion.
-For instance, our example rule set should not execute if the data is the string
-<TT>horse.gift</TT>,
-and although the first pattern will match
-<TT>horse.gift</TT>,
-the second will match only
-<TT>horse.gif</TT>
-and the rule set will fail.
-<br>&#32;<br>
-The same approach of multiple
-<TT>matches</TT>
-rules can be used to exclude, for instance, a terminal period from
-a file name or URL, so a file name or URL at the end of a sentence is recognized properly.
-<br>&#32;<br>
-If a
-<TT>click</TT>
-attribute is not specified, all patterns must match the entire string,
-so the user has an option:
-he or she may select exactly what data to send,
-or may instead indicate where the data is by clicking the selection button on the mouse
-and letting the machine locate the URL or image file name within the text.
-In other words,
-the user can control the contents of the message precisely when required,
-but the default, simplest action in the user interface does the right thing most of the time.
-<H4>How Messages are Handled in the Plumber
-</H4>
-<br>&#32;<br>
-An application creates a message header, fills in whatever fields it wishes to define,
-attaches the data, and writes the result to the file
-<TT>send</TT>
-in the plumber's service directory,
-<TT>/mnt/plumb</TT>.
-The plumber receives the message and applies the plumbing rules successively to it.
-When a rule set matches, the message is dispatched as indicated by that rule set
-and processing continues with the next message.
-If no rule set matches the message, the plumber indicates this by returning a write
-error to the application, that is, the write to
-<TT>/mnt/plumb/send</TT>
-fails, with the resulting error string
-describing the failure.
-(Plan 9 uses strings rather than pre-defined numbers to describe error conditions.)
-Thus a program can discover whether a plumbing message has been sent successfully.
-<br>&#32;<br>
-After a matching rule set has been identified, the plumber applies a series of rewriting
-steps to the message.  Some rewritings are defined by the rule set; others are implicit.
-For example, if the message does not specify a destination port, the outgoing message
-will be rewritten to identify it.
-If the message does specify the port, the rule set will only match if any
-<TT>plumb</TT>
-<TT>to</TT>
-action in the rule set names the same port.
-(If it matches no rule sets, but mentions a port, it will be sent there unmodified.)
-<br>&#32;<br>
-The rule set may contain actions that explicitly rewrite components of the message.
-These may modify the attribute list or replace the data section of the message.
-Here is a sample rule set that does both.
-It matches strings of the form
-<TT>plumb.h</TT>
-or
-<TT>plumb.h:27</TT>.
-If that string identifies a file in the standard C include directory,
-<TT>/sys/include</TT>,
-perhaps with an optional line number, the outgoing message
-is rewritten to contain the full path name and an attribute,
-<TT>addr</TT>,
-to hold the line number:
-<DL><DT><DD><TT><PRE>
-# .h files are looked up in /sys/include and passed to edit
-type is text
-data matches '([a-zA-Z0-9]+\.h)(:([0-9]+))?'
-arg isfile /sys/include/<I>1
-data set /sys/include/</I>1
-attr add addr=<I>3
-plumb to edit
-</PRE></TT></DL>
-The
-</I><TT>data</TT><I>
-</I><TT>set</TT><I>
-rule replaces the contents of the data, and the
-</I><TT>attr</TT><I>
-</I><TT>add</TT><I>
-rule adds a new attribute to the message.
-The intent of this rule is to permit one to plumb an include file name in a C program
-to trigger the opening of that file, perhaps at a specified line, in the text editor.
-A variant of this rule, discussed below,
-tells the editor how to interpret syntax errors from the compiler,
-or the output of
-</I><TT>grep</TT><I>
-</I><TT>-n</TT><I>,
-both of which use a fixed syntax
-</I><I>file</I><TT>:</TT><I>line</I><TT>
-to identify a line of source.
-</TT><br>&#32;<br>
-The Plan 9 text editors interpret the
-<TT>addr</TT>
-attribute as the definition of which portion of the file to display.
-In fact, the real rule includes a richer definition of the address syntax,
-so one may plumb strings such as
-<TT>plumb.h:/plumbsend</TT>
-(using a regular expression after the
-<TT>/</TT>)
-to pop up the declaration of a function in a C header file.
-<br>&#32;<br>
-Another form of rewriting is that the plumber may modify the attribute list of
-the message to clarify how to handle the message.
-The primary example of this involves the treatment of the
-<TT>click</TT>
-attribute, described in the previous section.
-If the message contains a
-<TT>click</TT>
-attribute and the matching rule set uses it to extract the matching substring from the data,
-the plumber
-deletes the
-<TT>click</TT>
-attribute and replaces the data with the matching substring.
-<br>&#32;<br>
-Once the message is rewritten, the actions of the matching rule set are examined.
-If the rule set contains a
-<TT>plumb</TT>
-<TT>to</TT>
-action and the corresponding port is open&#173;that is, if a program is already reading
-from that port&#173;the message is delivered to the port.
-The application will receive the message and handle it as it sees fit.
-If the port is not open, a
-<TT>plumb</TT>
-<TT>start</TT>
-or
-<TT>plumb</TT>
-<TT>client</TT>
-action will start a new program to handle the message.
-<br>&#32;<br>
-The
-<TT>plumb</TT>
-<TT>start</TT>
-action is the simpler: its argument specifies a command to run
-instead of passing on the message; the message is discarded.
-Here for instance is a rule that, given the process id (pid) of an existing process,
-starts the
-<TT>acid</TT>
-debugger [Wint94] in a new window to examine that process:
-<DL><DT><DD><TT><PRE>
-# processes go to acid (assuming strlen(pid) &gt;= 2)
-type is text
-data matches '[a-zA-Z0-9.:_\-/]+'
-data matches '[0-9][0-9]+'
-arg isdir /proc/0
-plumb start window acid <I>0
-</PRE></TT></DL>
-(Note the use of multiple
-</I><TT>matches</TT><I>
-rules to avoid misfires from strings like
-</I><TT>party.1999</TT><I>.)
-The
-</I><TT>arg</TT><I>
-</I><TT>isdir</TT><I>
-rule checks that the pid represents a running process (or broken one; Plan 9 does not create
-</I><TT>core</TT><I>
-files but leaves broken processes around for debugging) by checking that the process file
-system has a directory for that pid [Kill84].
-Using this rule, one may plumb the pid string printed by the
-</I><TT>ps</TT><I>
-command or by the operating system when the program breaks;
-the debugger will then start automatically.
-</I><br>&#32;<br>
-The other startup action,
-<TT>plumb</TT>
-<TT>client</TT>,
-is used when a program will read messages from the plumbing port.
-For example,
-text editors can read files specified as command arguments, so one could use a
-<TT>plumb</TT>
-<TT>start</TT>
-rule to begin editing a file.
-If, however, the editor will read messages from the
-<TT>edit</TT>
-plumbing port, letting it read the message
-from the port insures that it uses other information in the message,
-such as the line number to display.
-The
-<TT>plumb</TT>
-<TT>client</TT>
-action is therefore like
-<TT>plumb</TT>
-<TT>start</TT>,
-but keeps the message around for delivery when the application opens the port.
-Here is the full rule set to pass a regular file to the text editor:
-<DL><DT><DD><TT><PRE>
-# existing files, possibly tagged by address, go to editor
-type is text
-data matches '([.a-zA-Z0-9_/\-]*[a-zA-Z0-9_/\-])('addr')?'
-arg isfile <I>1
-data set </I>1
-attr add addr=<I>3
-plumb to edit
-plumb client window </I>editor
-</PRE></TT></DL>
-If the editor is already running, the
-<TT>plumb</TT>
-<TT>to</TT>
-rule causes it to receive the message on the port.
-If not,
-the command
-`<TT>window</TT>
-<TT></TT><I>editor</I><TT>'
-will create a new window (using the Plan 9 program
-</TT><TT>window</TT><TT>)
-to run the editor, and once that starts it will open the
-</TT><TT>edit</TT><TT>
-plumbing port as usual and discover this first message already waiting.
-</TT><br>&#32;<br>
-The variables
-<TT></TT>editor<TT>
-and
-</TT><TT></TT><I>addr</I><TT>
-in this rule set
-are macros defined in the plumbing rules file; they specify the name of the user's favorite text editor
-and a regular expression
-that matches that editor's address syntax, such as line numbers and patterns.
-This rule set lives in a library of shared plumbing rules that
-users' private rules can build on,
-so the rule set needs to be adaptable to different editors and their address syntax.
-The macro definitions for Acme and Sam [Pike94,Pike87b] look like this:
-<DL><DT><DD><TT><PRE>
-editor=acme
-# or editor=sam
-addrelem='((#?[0-9]+)|(/[A-Za-z0-9_\^]+/?)|[.</TT>])'
-addr=:(<I>addrelem([,;+\-]</I>addrelem)*)
-</PRE></TT></DL>
-<br>&#32;<br>
-Finally, the application reads the message from the appropriate port, such as
-<TT>/mnt/plumb/edit</TT>,
-unpacks it, and goes to work.
-<H4>Message Delivery
-</H4>
-<br>&#32;<br>
-In summary, a message is delivered by writing it to the
-<TT>send</TT>
-file and having the plumber, perhaps after some rewriting, send it to the destination
-port or start a new application to handle it.
-If no destination can be found by the plumber, the original write to the
-<TT>send</TT>
-file will fail, and the application will know the message could not be delivered.
-<br>&#32;<br>
-If multiple applications are reading from the destination port, each will receive
-an identical copy of the message; that is, the plumber implements fan-out.
-The number of messages delivered is equal to the number of clients that have
-opened the destination port.
-The plumber queues the messages and makes sure that each application that opened
-the port before the message was written gets exactly one copy.
-<br>&#32;<br>
-This design minimizes blocking in the sending applications, since the write to the
-<TT>send</TT>
-file can complete as soon as the message has been queued for the appropriate port.
-If the plumber waited for the message to be read by the recipient, the sender could
-block unnecessarily.
-Unfortunately, this design also means that there is no way for a sender to know when
-the message has been handled; in fact, there are cases when
-the message will not be delivered at all, such as if the recipient exits while there are
-still messages in the queue.
-Since the plumber is part of a user interface, and not
-an autonomous message delivery system,
-the decision was made to give the
-non-blocking property priority over reliability of message delivery.
-In practice, this tradeoff has worked out well:
-applications almost always know when a message has failed to be delivered (the
-<TT>write</TT>
-fails because no destination could be found),
-and those occasions when the sender believes incorrectly that the message has been delivered
-are both extremely rare and easily recognized by the user&#173;usually because the recipient
-application has exited.
-<H4>The Rules File
-</H4>
-<br>&#32;<br>
-The plumber begins execution by reading the user's startup plumbing rules file,
-<TT>lib/plumbing</TT>.
-Since the plumber is implemented as a file server, it can also present its current rules
-as a dynamic file, a design that provides an easily understood way to maintain the rules.
-<br>&#32;<br>
-The file
-<TT>/mnt/plumb/rules</TT>
-is the text of the rule set the plumber is currently using,
-and it may be edited like a regular file to update those rules.
-To clear the rules, truncate that file;
-to add a new rule set, append to it:
-<DL><DT><DD><TT><PRE>
-% echo 'type is text
-data is self-destruct
-plumb start rm -rf <I>HOME' &gt;&gt; /mnt/plumb/rules
-</PRE></TT></DL>
-This rule set will take effect immediately.
-If it has a syntax error, the write will fail with an error message from the plumber,
-such as `malformed rule' or 'undefined verb'.
-</I><br>&#32;<br>
-To restore the plumber to its startup configuration,
-<DL><DT><DD><TT><PRE>
-% cp /usr/user/lib/plumbing /mnt/plumb/rules
-</PRE></TT></DL>
-For more sophisticated changes,
-one can of course use a regular text editor to modify
-<TT>/mnt/plumb/rules</TT>.
-<br>&#32;<br>
-This simple way of maintaining an active service could profitably be adopted by other systems.
-It avoids the need to reboot, to update registries with special tools, or to send asynchronous signals
-to critical programs.
-<H4>The User Interface
-</H4>
-<br>&#32;<br>
-One unusual property of the plumbing system is that
-the user interface that programs provide to access it can vary considerably, yet
-the result is nonetheless a unifying force in the environment.
-Shells talk to editors, image viewers, and web browsers; debuggers talk to editors;
-editors talk to themselves; and the window system talks to everybody.
-<br>&#32;<br>
-The plumber grew out of some of the ideas of the Acme editor/window-system/user interface [Pike94],
-in particular its `acquisition' feature.
-With a three-button mouse, clicking the right button in Acme on a piece of text tells Acme to
-get the thing being pointed to.
-If it is a file name, open the file;
-if it is a directory, open a viewer for its contents;
-if a line number, go to that line;
-if a regular expression, search for it.
-This one-click access to anything describable textually was very powerful but had several
-limitations, of which the most important were that Acme's rules for interpreting the
-text (that is, the implicit hyperlinks) were hard-wired and inflexible, and
-that they only applied to and within Acme itself.
-One could not, for example, use Acme's power to open an image file, since Acme is
-a text-only system.
-<br>&#32;<br>
-The plumber addresses these limitations, even with Acme itself:
-Acme now uses the plumber to interpret the right button clicks for it.
-When the right button is clicked on some text,
-Acme constructs a plumbing message much as described above,
-using the
-<TT>click</TT>
-attribute and the white-space-delimited text surrounding the click.
-It then writes the message to the plumber; if the write succeeds, all is well.
-If not, it falls back to its original, internal rules, which will result in a context search
-for the word within the current document.
-<br>&#32;<br>
-If the message is sent successfully, the recipient is likely to be Acme itself, of course:
-the request may be to open a file, for example.
-Thus Acme has turned the plumber into an external component of its own operation,
-while expanding the possibilities; the operation might be to start an image viewer to
-open a picture file, something Acme cannot do itself.
-The plumber expands the power of Acme's original user interface.
-<br>&#32;<br>
-Traditional menu-driven programs such as the text editor Sam [Pike87b] and the default
-shell window of the window
-system
-<TT>8&#189;</TT>
-[Pike91] cannot dedicate a mouse button solely to plumbing, but they can certainly
-dedicate a menu entry.
-The editing menu for such programs now contains an entry,
-<TT>plumb</TT>,
-that creates a plumbing message using the current selection.
-(Acme manages to send a message by clicking on the text with one button;
-other programs require a click with the select button and then a menu operation.)
-For example, after this happens in a shell window:
-<DL><DT><DD><TT><PRE>
-% make
-cc -c shaney.c
-shaney.c:232: i undefined
-...
-</PRE></TT></DL>
-one can click anywhere on the string
-<TT>shaney.c:232</TT>,
-execute the
-<TT>plumb</TT>
-menu entry, and have line 232 appear in the text editor, be it Sam or Acme&#173;whichever has the
-<TT>edit</TT>
-port open.
-(If this were an Acme shell window, it would be sufficient to right-click on the string.)
-<br>&#32;<br>
-[An interesting side line is how the window system knows what directory the
-shell is running in; in other words, what value to place in the
-<TT>wdir</TT>
-field of the plumb message.
-Recall that
-<TT>8&#189;</TT>
-is, like many Plan 9 programs, a file server.
-It now serves a new file,
-<TT>/dev/wdir</TT>,
-that is private to each window.
-Programs, in particular the
-Plan 9 shell,
-<TT>rc</TT>,
-can write that file to inform the window system of its current directory.
-When a
-<TT>cd</TT>
-command is executed in an interactive shell,
-<TT>rc</TT>
-updates the contents of
-<TT>/dev/wdir</TT>
-and plumbing can proceed with local file names.]
-<br>&#32;<br>
-Of course, users can plumb image file names, process ids, URLs, and other items&#173;any string
-whose syntax and disposition are defined in the plumbing rules file.
-An example of how the pieces fit together is the way Plan 9 now handles mail, particularly
-MIME-encoded messages.
-<br>&#32;<br>
-When a new mail message arrives, the mail receiver process sends a plumbing message to the
-<TT>newmail</TT>
-port, which notifies any interested process that new mail is here.
-The plumbing message contains information about the mail, including
-its sender, date, and current location in the file system.
-The interested processes include a program,
-<TT>faces</TT>,
-that gives a graphical display of the mail box using
-faces to represent the senders of messages [PiPr85],
-as well as interactive mail programs such as the Acme mail viewer [Pike94].
-The user can then click on the face that appears, and the
-<TT>faces</TT>
-program will send another plumbing message, this time to the
-<TT>showmail</TT>
-port.
-Here is the rule for that port:
-<DL><DT><DD><TT><PRE>
-# faces -&gt; new mail window for message
-type is text
-data matches '[a-zA-Z0-9_\-./]+'
-data matches '/mail/fs/[a-zA-Z0-9/]+/[0-9]+'
-plumb to showmail
-plumb start window edmail -s <I>0
-</PRE></TT></DL>
-If a program, such as the Acme mail reader, is reading that port, it will open a new window
-in which to display the message.
-If not, the
-</I><TT>plumb</TT><I>
-</I><TT>start</TT><I>
-rule will create a new window and run
-</I><TT>edmail</TT><I>,
-a conventional mail reading process, to examine it.
-Notice how the plumbing connects the components of the interface together the same way
-regardless of which components are actually being used to view mail.
-</I><br>&#32;<br>
-There is more to the mail story.
-Naturally, mail boxes in Plan 9 are treated as little file systems, which are synthesized
-on demand by a special-purpose file server that takes a flat mail box file and converts
-it into a set of directories, one per message, with component files containing the header,
-body, MIME information, and so on.
-Multi-part MIME messages are unpacked into multi-level directories, like this:
-<DL><DT><DD><TT><PRE>
-% ls -l /mail/fs/mbox/25
-d-r-xr-xr-x M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/1
-d-r-xr-xr-x M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/2
---r--r--r-- M 20 rob rob 28678 Nov 21 13:06 /mail/fs/mbox/25/body
---r--r--r-- M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/cc
-...
-% mail
-25 messages
-: 25
-From: presotto
-Date: Sun Nov 21 13:05:51 EST 1999
-To: rob
-
-Check this out.
-
-===&gt; 2/ (image/jpeg) [inline]
-	/mail/fs/mbox/25/2/fabio.jpg
-:
-</PRE></TT></DL>
-Since the components are all (synthetic) files, the user can plumb the pieces
-to view embedded pictures, URLs, and so on.
-Note that the mail program can plumb the contents of
-<TT>inline</TT>
-attachments automatically, without user interaction;
-in other words, plumbing lets the mailer handle multimedia data
-without itself interpreting it.
-<br>&#32;<br>
-At a more mundane level, a shell command,
-<TT>plumb</TT>,
-can be used to send messages:
-<DL><DT><DD><TT><PRE>
-% cd /usr/rob/src
-% plumb mem.c
-</PRE></TT></DL>
-will send the appropriate message to the
-<TT>edit</TT>
-port.
-A surprising use of the
-<TT>plumb</TT>
-command is in actions within the plumbing rules file.
-In our lab, we commonly receive Microsoft Word documents by mail,
-but we do not run Microsoft operating systems on our machines so we cannot
-view them without at least rebooting.
-Therefore, when a Word document arrives in mail, we could plumb the
-<TT>.doc</TT>
-file but the text editor could not decode it.
-However, we have a program,
-<TT>doc2txt</TT>,
-that decodes the Word file format to extract and format the embedded text.
-The solution is to use
-<TT>plumb</TT>
-in a
-<TT>plumb</TT>
-<TT>start</TT>
-action to invoke
-<TT>doc2txt</TT>
-on
-<TT>.doc</TT>
-files and synthesize a plain text file:
-<DL><DT><DD><TT><PRE>
-# rule set for microsoft word documents
-type is text
-data matches '[a-zA-Z0-9_\-./]+'
-data matches '([a-zA-Z0-9_\-./]+)\.doc'
-arg isfile 0
-plumb start doc2txt <I>data | \
-    plumb -i -d edit -a action=showdata -a filename=</I>0
-</PRE></TT></DL>
-The arguments to
-<TT>plumb</TT>
-tell it to take standard input as its data rather than the text of the arguments
-(<TT>-i</TT>),
-define the destination port
-(<TT>-d</TT>
-<TT>edit</TT>),
-and set a conventional attribute so the editor knows to show the message data
-itself rather than interpret it as a file name
-(<TT>-a</TT>
-<TT>action=showdata</TT>)
-and provide the original file name
-(<TT>-a</TT>
-<TT>filename=</TT><I>0</I><TT>).
-Now when a user plumbs a
-</TT><TT>.doc</TT><TT>
-file the plumbing rules run a process to extract the text and send it as a
-temporary file to the editor for viewing.
-It's imperfect, but it's easy and it beats rebooting.
-</TT><br>&#32;<br>
-Another simple example is a rule that turns man pages into hypertext.
-Manual page entries of the form
-<TT>plumber(1)</TT>
-can be clicked on to pop up a window containing the formatted `man page'.
-That man page will in turn contain more such citations, which will also be clickable.
-The rule is a little like that for Word documents:
-<DL><DT><DD><TT><PRE>
-# man index entries are synthesized
-type is text
-data matches '([a-zA-Z0-9_\-./]+)\(([0-9])\)'
-plumb start man 2 <I>1 | \
-    plumb -i -d edit -a action=showdata -a filename=/man/</I>1(<I>2)
-</PRE></TT></DL>
-</I><br>&#32;<br>
-There are many other inventive uses of plumbing.
-One more should give some of the flavor.
-We have a shell script,
-<TT>src</TT>,
-that takes as argument the name of an executable binary file.
-It examines the symbol table of the binary to find the source file
-from which it was compiled.
-Since the Plan 9 compilers place full source path names in the symbol table,
-<TT>src</TT>
-can discover the complete file name.
-That is then passed to
-<TT>plumb</TT>,
-complete with the line number to find the
-symbol
-<TT>main</TT>.
-For example,
-<DL><DT><DD><TT><PRE>
-% src plumb
-</PRE></TT></DL>
-is all it takes to pop up an editor window on the
-<TT>main</TT>
-routine of the
-<TT>plumb</TT>
-command, beginning at line 39 of
-<TT>/sys/src/cmd/plumb/plumb.c</TT>.
-Like most uses of plumbing,
-this is not a breakthrough in functionality, but it is a great convenience.
-<H4>Why This Architecture?
-</H4>
-<br>&#32;<br>
-The design of the plumbing system is peculiar:
-a centralized language-based file server does most of the work,
-while compared to other systems the applications themselves
-contribute relatively little.
-This architecture is deliberate, of course.
-<br>&#32;<br>
-That the plumber's behavior is derived from a linguistic description
-gives the system great flexibility and dynamism&#173;rules can be added
-and changed at will, without rebooting&#173;but the existence of a central library of rules
-ensures that, for most users, the environment behaves in well-established ways.
-<br>&#32;<br>
-That the plumber is a file server is perhaps the most unusual aspect of its design,
-but is also one of the most important.
-Messages are passed by regular I/O operations on files, so no extra technology
-such as remote procedure call or request brokers needs to be provided;
-messages are transmitted by familiar means.
-Almost every service in Plan 9 is a file server, so services can be exported
-trivially using the system's remote file system operations [Pike93].
-The plumber is no exception;
-plumbing messages pass routinely across the network to remote applications without
-any special provision,
-in contrast to some commercial IPC mechanisms that become
-significantly more complex when they involve multiple machines.
-As I write this, my window system is talking to applications running on three
-different machines, but they all share a single instance of the plumber and so
-can interoperate to integrate my environment.
-Plan 9 uses a shared file name space
-to combine multiple networked machines&#173;compute servers,
-file servers, and interactive workstations&#173;into a single
-computing environment; plumbing's design as a file server
-is a natural by-product of, and contributor to, the overall system architecture
-[Pike92].
-<br>&#32;<br>
-The centrality of the plumber is also unusual.
-Other systems tend to let the applications determine where messages will go;
-consider mail readers that recognize and highlight URLs in the messages.
-Why should just the mail readers do this, and why should they just do it for URLs?
-(Acme was guilty of similar crimes.)
-The plumber, by removing such decisions to a central authority,
-guarantees that all applications behave the same and simultaneously
-frees them all from figuring out what's important.
-The ability for the plumber to excerpt useful data from within a message
-is critical to the success of this model.
-<br>&#32;<br>
-The entire system is remarkably small.
-The plumber itself is only about two thousand lines of C code.
-Most applications work fine in a plumbing environment without knowing about it at all;
-some need trivial changes such as to standardize their error output;
-a few need to generate and receive plumbing messages.
-But even to add the ability to send and receive messages in a program such as text editor is short work,
-involving typically a few dozen lines of code.
-Plumbing fits well into the existing environment.
-<br>&#32;<br>
-But plumbing is new and it hasn't been pushed far enough yet.
-Most of the work so far has been with textual messages, although
-the underlying system is capable of handling general data.
-We plan to reimplement some of the existing data movement operations,
-such as cut and paste or drag and drop, to use plumbing as their exchange mechanism.
-Since the plumber is a central message handler, it is an obvious place to store the `clipboard'.
-The clipboard could be built as a special port that holds onto messages rather than
-deleting them after delivery.
-Since the clipboard would then be holding a plumbing
-message rather than plain text, as in the current Plan 9 environment,
-it would become possible to cut and paste arbitrary data without
-providing new mechanism.
-In effect, we would be providing a new user interface to the existing plumbing facilities.
-<br>&#32;<br>
-Another possible extension is the ability to override plumbing operations interactively.
-Originally, the plan was to provide a mechanism, perhaps a pop-up menu, that one could
-use to direct messages, for example to send a PostScript file to the editor rather than the
-PostScript viewer by naming an explicit destination in the message.
-Although this deficiency should one day be addressed, it should be done without
-complicating the interface for invoking the default behavior.
-Meanwhile, in practice the default behavior seems to work very well in practice&#173;as it
-must if plumbing is to be successful&#173;so the lack of
-overrides is not keenly felt.
-<H4>Comparison with Other Systems
-</H4>
-<br>&#32;<br>
-The ideas of the plumbing system grew from an
-attempt to generalize the way Acme acquires files and data.
-Systems further from that lineage also share some properties with plumbing.
-Most, however, require explicit linking or message passing rather than
-plumbing's implicit, context-based pattern matching, and none
-has the plumber's design of a language-based file server.
-<br>&#32;<br>
-Reiss's FIELD system [Reis95] probably comes the closest to providing the facilities of the plumber.
-It has a central message-passing mechanism that connects applications together through
-a combination of a library and a pattern-matching central message dispatcher that handles
-message send and reply.
-The main differences between FIELD's message dispatcher and the plumber are first
-that the plumber is based on a special-purpose language while the FIELD
-system uses an object-oriented library, second that the plumber has no concept
-of a reply to a message, and finally that the FIELD system
-has no concept of port.
-But the key distinction is probably in the level of use.
-In FIELD, the message dispatcher is a critical integrating force of the underlying
-programming environment, handling everything from debugging events to
-changing the working directory of a program.
-Plumbing, by contrast, is intended primarily for integrating the user interface
-of existing tools; it is more modest and very much simpler.
-The central advantage of the plumber is its convenience and dynamism;
-the FIELD system does not share the ease with which
-message dispatch rules can be added or modified.
-<br>&#32;<br>
-The inspiration for Acme was
-the user interface to the object-oriented Oberon system [WiGu92].
-Oberon's user interface interprets mouse clicks on strings such as
-<TT>Obj.meth</TT>
-to invoke calls to the method
-<TT>meth</TT>
-of the object
-<TT>Obj</TT>.
-This was the starting point for Acme's middle-button execution [Pike94],
-but nothing in Oberon is much like Acme's right-button `acquisition',
-which was the starting point for the plumber.
-Oberon's implicit method-based linking is not nearly as general as the pattern-matched
-linking of the plumber, nor does its style of user-triggered method call
-correspond well to the more general idea of inter-application communication
-of plumbing messages.
-<br>&#32;<br>
-Microsoft's OLE interface is another relative.
-It allows one application to
-<I>embed</I>
-its own data within another's,
-for example to place an Excel spreadsheet within a Frame document;
-when Frame needs to format the page, it will start Excel itself, or at least some of its
-DLLs, to format the spreadsheet.
-OLE data can only be understood by the application that created it;
-plumbing messages, by contrast, contain arbitrary data with a rigidly formatted header
-that will be interpreted by the pattern matcher and the destination application.
-The plumber's simplified message format may limit its
-flexibility but makes messages easy and efficient to dispatch and to interpret.
-At least for the cut-and-paste style of exchange OLE encourages,
-plumbing gives up some power in return for simplicity, while avoiding
-the need to invoke a vestigial program (if Excel can be called a vestige) every time
-the pasted data is examined.
-Plumbing is also better suited to
-other styles of data exchange, such as connecting compiler errors to the
-text editor.
-<br>&#32;<br>
-The Hyperbole [Wein] package for Emacs adds hypertext facilities to existing documents.
-It includes explicit links and, like plumbing, a rule-driven way to form implicit links.
-Since Emacs is purely textual, like Acme, Hyperbole does not easily extend to driving
-graphical applications, nor does it provide a general interprocess communication method.
-For instance, although Hyperbole provides some integration for mail applications,
-it cannot provide the glue that allows a click on a face icon in an external program to open a
-mail message within the viewer.
-Moreover, since it is not implemented as a file server,
-Hyperbole does not share the advantages of that architecture.
-<br>&#32;<br>
-Henry's
-<TT>error</TT>
-program in 4BSD echoes a small but common use of plumbing.
-It takes the error messages produced by a compiler and drives a text editor
-through the steps of looking at each one in turn; the notion is to quicken the
-compile/edit/debug cycle.
-Similar results are achieved in EMACS by writing special M-LISP
-macros to parse the error messages from various compilers.
-Although for this particular purpose they may be more convenient than plumbing,
-these are specific solutions to a specific problem and lack plumbing's generality.
-<br>&#32;<br>
-Of course, the resource forks in MacOS and the association rules for
-file name extensions in Windows also provide some of the functionality of
-the plumber, although again without the generality or dynamic nature.
-<br>&#32;<br>
-Closer to home, Ousterhout's Tcl (Tool Command Language) [Oust90]
-was originally designed to embed a little command interpreter
-in each application to control interprocess communication and
-provide a level of integration.
-Plumbing, on the other hand, provides minimal support within
-the application, offloading most of the message handling and all the
-command execution to the central plumber.
-<br>&#32;<br>
-The most obvious relative to plumbing is perhaps the hypertext links of a web browser.
-Plumbing differs by synthesizing
-the links on demand.
-Rather than constructing links within a document as in HTML,
-plumbing uses the context of a button click to derive what it should link to.
-That the rules for this decision can be modified dynamically gives it a more
-fluid feel than a standard web browsing world.
-One possibility for future work is to adapt a web browser to use
-plumbing as its link-following engine, much as Acme used plumbing to offload
-its acquisition rules.
-This would connect the web browser to the existing tools, rather than the
-current trend in most systems of replacing the tools by a browser.
-<br>&#32;<br>
-Each of these prior systems&#173;and there are others, e.g. [Pasa93, Free93]&#173;addresses
-a particular need or subset of the
-issues of system integration.
-Plumbing differs because its particular choices were different.
-It focuses on two key issues:
-centralizing and automating the handling of interprocess communication
-among interactive programs,
-and maximizing the convenience (or minimizing the trouble) for the human user
-of its services.
-Moreover, the plumber's implementation as a file server, with messages
-passed over files it controls,
-permits the architecture to work transparently across a network.
-None of the other systems discussed here integrates distributed systems
-as smoothly as local ones without the addition of significant extra technology.
-<H4>Discussion
-</H4>
-<br>&#32;<br>
-There were a few surprises during the development of plumbing.
-The first version of plumbing was done for the Inferno system [Dorw97a,Dorw97b],
-using its file-to-channel mechanism to mediate the IPC.
-Although it was very simple to build, it encountered difficulties because
-the plumber was too disconnected from its clients; in particular, there was
-no way to discover whether a port was in use.
-When plumbing was implemented afresh for Plan 9, it was provided through a true file server.
-Although this was much more work, it paid off handsomely.
-The plumber now knows whether a port is open, which makes it easy to decide whether
-a new program must be started to handle a message,
-and the ability to edit the rules file dynamically is a major advantage.
-Other advantages arise from the file-server design,
-such as
-the ease of exporting plumbing ports across the network to remote machines
-and the implicit security model a file-based interface provides: no one has
-permission to open my private plumbing files.
-<br>&#32;<br>
-On the other hand, Inferno was an all-new environment and the user interface for plumbing was
-able to be made uniform for all applications.
-This was impractical for Plan 9, so more
-<I>ad hoc</I>
-interfaces had to be provided for that environment.
-Yet even in Plan 9 the advantages of efficient,
-convenient, dynamic interprocess communication outweigh the variability of
-the user interface.
-In fact, it is perhaps a telling point that the system works well for a variety of interfaces;
-the provision of a central, convenient message-passing
-service is a good idea regardless of how the programs use it.
-<br>&#32;<br>
-Plumbing's rule language uses only regular expressions and a few special
-rules such as
-<TT>isfile</TT>
-for matching text.
-There is much more that could be done.  For example, in the current system a JPEG
-file can be recognized by a
-<TT>.jpg</TT>
-suffix but not by its contents, since the plumbing language has no facility
-for examining the
-<I>contents</I>
-of files named in its messages.
-To address this issue without adding more special rules requires rethinking
-the language itself.
-Although the current system seems a good balance of complexity
-and functionality,
-perhaps a richer, more general-purpose language would
-permit more exotic applications of the plumbing model.
-<br>&#32;<br>
-In conclusion, plumbing adds an effective, easy-to-use inter-application
-communication mechanism to the Plan 9
-user interface.
-Its unusual design as a language-driven file server makes it easy to add
-context-dependent, dynamically interpreted, general-purpose hyperlinks
-to the desktop, for both existing tools and new ones.
-<H4>Acknowledgements
-</H4>
-<br>&#32;<br>
-Dave Presotto wrote the mail file system and
-<TT>edmail</TT>.
-He, Russ Cox, Sape Mullender, and Cliff Young influenced the design, offered useful suggestions,
-and suffered early versions of the software.
-They also made helpful comments on this paper, as did Dennis Ritchie and Brian Kernighan.
-<H4>References
-</H4>
-<br>&#32;<br>
-[Dorw97a]
-Sean Dorward, Rob Pike, David Leo Presotto, Dennis M. Ritchie,
-Howard W. Trickey, and Philip Winterbottom,
-``Inferno'',
-<I>Proceedings of the IEEE Compcon 97 Conference</I>,
-San Jose, 1997, pp. 241-244.
-<br>&#32;<br>
-[Dorw97b]
-Sean Dorward, Rob Pike, David Leo Presotto, Dennis M. Ritchie,
-Howard W. Trickey, and Philip Winterbottom,
-``The Inferno Operating System'',
-<I>Bell Labs Technical Journal</I>,
-<B>2</B>,
-1, Winter, 1997.
-<br>&#32;<br>
-[Free93]
-FreeBSD,
-Syslog configuration file manual
-<A href="/magic/man2html/0/syslog.conf"><I>syslog.conf</I>(0).
-</A><br>&#32;<br>
-[Kill84]
-T. J. Killian,
-``Processes as Files'',
-<I>Proceedings of the Summer 1984 USENIX Conference</I>,
-Salt Lake City, 1984, pp. 203-207.
-<br>&#32;<br>
-[Oust90]
-John K. Ousterhout,
-``Tcl: An Embeddable Command Languages'',
-<I>Proceedings of the Winter 1990 USENIX Conference</I>,
-Washington, 1990, pp. 133-146.
-<br>&#32;<br>
-[Pasa93]
-Vern Paxson and Chris Saltmarsh,
-"Glish: A User-Level Software Bus for Loosely-Coupled Distributed Systems" ,
-<I>Proceedings of the Winter 1993 USENIX Conference</I>,
-San Diego, 1993, pp. 141-155.
-<br>&#32;<br>
-[Pike87a]
-Rob Pike,
-``Structural Regular Expressions'',
-<I>EUUG Spring 1987 Conference Proceedings</I>,
-Helsinki, May 1987, pp. 21-28.
-<br>&#32;<br>
-[Pike87b]
-Rob Pike,
-``The Text Editor sam'',
-<I>Software - Practice and Experience</I>,
-<B>17</B>,
-5, Nov. 1987, pp. 813-845.
-<br>&#32;<br>
-[Pike91]
-Rob Pike,
-``8&#189;, the Plan 9 Window System'',
-<I>Proceedings of the Summer 1991 USENIX Conference</I>,
-Nashville, 1991, pp. 257-265.
-<br>&#32;<br>
-[Pike93]
-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
-``The Use of Name Spaces in Plan 9'',
-<I>Operating Systems Review</I>,
-<B>27</B>,
-2, April 1993, pp. 72-76.
-<br>&#32;<br>
-[Pike94]
-Rob Pike,
-``Acme: A User Interface for Programmers'',
-<I>Proceedings of the Winter 1994 USENIX Conference</I>,
-San Francisco, 1994, pp. 223-234.
-<br>&#32;<br>
-[PiPr85]
-Rob Pike and Dave Presotto,
-``Face the Nation'',
-<I>Proceedings of the USENIX Summer 1985 Conference</I>,
-Portland, 1985, pg. 81.
-<br>&#32;<br>
-[Reis95]
-Steven P. Reiss,
-<I>The FIELD Programming Environment: A Friendly Integrated Environment for Learning and Development</I>,
-Kluwer, Boston, 1995.
-<br>&#32;<br>
-[Wein]
-Bob Weiner,
-<I>Hyperbole User Manual</I>,
-<TT>http://www.cs.indiana.edu/elisp/hyperbole/hyperbole_1.html</TT>
-<br>&#32;<br>
-[Wint94]
-Philip Winterbottom,
-``ACID: A Debugger based on a Language'',
-<I>Proceedings of the USENIX Winter Conference</I>,
-San Francisco, CA, 1994.
-<br>&#32;<br>
-[WiGu92]
-Niklaus Wirth and Jurg Gutknecht,
-<I>Project Oberon: The Design of an Operating System and Compilers</I>,
-Addison-Wesley, Reading, 1992.
-
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 500
sys/doc/port.html

@@ -1,500 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The Various Ports
-</H1>
-<P>
-This document collects comments about the various
-architectures supported by Plan 9.
-The system tries to hide most of the differences between machines,
-so the machines as seen by a Plan 9
-user look different from how they are perceived through commercial software.
-Also, because we are a small group, we couldn't do everything:
-exploit every optimization, support every model,
-drive every device.
-This document records what we
-<I>have</I>
-done.
-The first section discusses the compiler/assembler/loader suite for each machine.
-The second talks about
-the operating system implemented on each of the various
-machines.
-</P>
-<H4>The Motorola MC68020 compiler
-</H4>
-<P>
-This is the oldest compiler of the bunch.  Relative to its
-competitors&#173;commercial compilers for the same machine&#173;it generates
-quite good code.
-It assumes at least a 68020 architecture: some of the addressing
-modes it generates are not on the 68000 or 68010.
-</P>
-<P>
-We also use this compiler for the 68040.  Except for a few
-instructions and registers available only from assembly language,
-the only user-visible difference between these machines is in
-floating point.  Our 68020s all have 68881 or 68882 floating
-point units attached, so to execute floating point programs we
-depend on there being appropriate hardware.
-Unfortunately, the 68040 is not quite so thorough in its implementation
-of the IEEE 754 standard or in its provision of built-in instructions
-for the
-transcendental functions.  The latter was easy to get around: we
-don't use them on the 68020 either, but we do have a library,
-<TT>-l68881</TT>,
-that you can use if you need the performance (which can be
-substantial:
-<TT>astro</TT>
-runs twice as fast).
-We don't use this library by default because we want to run the same
-binaries on both machines and don't want to emulate
-<TT>FCOSH</TT>
-in the operating system.
-</P>
-<P>
-The problem with IEEE is nastier.  We didn't really want to deal
-with gradual underflow and all that, especially since we had
-half a dozen machines we'd need to do it on, so on the 68040
-we implement non-trapping underflow as truncation to zero and
-do nothing about denormalized numbers and not-a-numbers.
-This means the 68020
-and the 68040 are not precisely compatible.
-</P>
-<H4>The Motorola MC68000 compiler
-</H4>
-<P>
-This compiler is a stripped-down version of the MC68020 compiler
-built for an abortive port to the Dragonball processor on the Palm Pilot.
-It generates position-independent code whose overall quality is much
-poorer than the code for the MC68020.
-</P>
-<H4>The MIPS compiler
-</H4>
-<P>
-This compiler generates code for the R2000, R3000, and R4000 machines configured
-to be big-endians.  The compiler generates no R4000-specific instructions
-although the assembler and loader support the new user-mode instructions.
-There is no support for little-endian machines.
-(A little-endian port exists, but is not included in the distribution.
-Contact us if you need it.)
-Considering its speed, the Plan 9 compiler generates good code,
-but the commercial
-MIPS compiler with all the stops pulled out consistently beats it
-by 20% or so, sometimes more.  Since ours compiles about 10 times
-faster and we spend most of our time compiling anyway,
-we are content with the tradeoff.
-</P>
-<P>
-The compiler is solid: we've used it for several big projects and, of course,
-all our applications run under it.
-The behavior of floating-point programs is much like on the 68040:
-the operating system emulates where necessary to get past non-trapping
-underflow and overflow, but does not handle gradual underflow or
-denormalized numbers or not-a-numbers.
-</P>
-<H4>The SPARC compiler
-</H4>
-<P>
-The SPARC compiler is also solid and fast, although we haven't
-used it for a few years, due to a lack of current hardware.  We have seen it do
-much better than GCC with all the optimizations, but on average
-it is probably about the same.
-</P>
-<P>
-We used to run some old SPARC machines with no multiply or divide instructions,
-so the compiler
-does not produce them by default.
-Instead it calls internal subroutines.
-A loader flag,
-<TT>-M</TT>,
-causes the instructions to be emitted.  The operating system has
-trap code to emulate them if necessary, but the traps are slower than
-emulating them in user mode.
-In any modern lab, in which SPARCS have the instructions, it would be worth enabling the
-<TT>-M</TT>
-flag by default.
-</P>
-<P>
-The floating point story is the same as on the MIPS.
-</P>
-<H4>The Intel i386 compiler
-</H4>
-<P>
-This is really an
-<I>x</I>86
-compiler, for
-<I>x</I>&gt;2.
-It works only
-if the machine is in 32-bit protected mode.
-It is solid and generates tolerable code; it is our main compiler these days.
-</P>
-<P>
-Floating point is well-behaved, but the compiler assumes i387-compatible
-hardware to execute
-the instructions.  With 387 hardware,
-the system does the full IEEE 754 job, just like
-the MC68881.  By default, the libraries don't use the 387 built-ins for
-transcendentals.
-If you want them,
-build the code in
-<TT>/sys/src/libc/386/387</TT>.
-</P>
-<H4>The Intel i960 compiler
-</H4>
-<P>
-This compiler was built as a weekend hack to let us get the Cyclone
-boards running.  It has only been used to run one program&#173;the on-board
-code in the Cyclone&#173;and is therefore likely to be buggy.
-There are a number of obvious optimizations to the code that have
-never been attempted.
-For example, the compiler does not support pipelining.
-The code runs in little-endian mode.
-</P>
-<H4>The DEC Alpha compiler
-</H4>
-<P>
-The Alpha compiler is based on a port done by David Hogan while
-studying at the Basser Department of Computer Science, University of Sydney.
-It has been used to build a running version of the operating system, but has
-not been stressed as much as some of the other compilers.
-</P>
-<P>
-Although the Alpha is a 64-bit architecture, this compiler treats
-<TT>int</TT>s,
-<TT>long</TT>s
-and pointers as 32 bits.  Access to the 64-bit operations is available through the
-<TT>vlong</TT>
-type, as with the other architectures.
-</P>
-<P>
-The compiler assumes that the target CPU supports the optional byte and
-word memory operations (the ``BWX'' extension).
-If you have an old system, you can generate code without using the extension
-by passing the loader the
-<TT>-x</TT>
-option.
-</P>
-<P>
-There are a number of optimizations that the Alpha Architecture Handbook
-recommends, but this compiler does not do.  In particular, there is currently
-no support for the code alignment and code scheduling optimizations.
-</P>
-<P>
-The compiler tries to conform to IEEE, but some Alpha CPUs do not implement
-all of the rounding and trapping modes in silicon.  Fixing this problem requires
-some software emulation code in the kernel; to date, this has not been attempted.
-</P>
-<H4>The PowerPC compiler
-</H4>
-<P>
-The PowerPC compiler supports the 32-bit PowerPC architecture only;
-it does not support either the 64-bit extensions or the POWER compatibility instructions.
-It has been used for production operating system work on the 603, 603e, 604e, 821, 823, and 860.
-On the 8xx floating-point instructions must be emulated.
-Instruction scheduling is not implemented; otherwise the code generated
-is similar to that for the other load-store architectures.
-The compiler makes little or no use of unusual PowerPC features such as the
-counter register, several condition code registers, and multiply-accumulate
-instructions, but they are sometimes
-used by assembly language routines in the libraries.
-</P>
-<H4>The Acorn ARM compiler
-</H4>
-<P>
-The ARM compiler is fairly solid; it has been used for some production
-operating system work including Inferno and the Plan 9 kernel
-for the iPAQ, which uses a StrongArm SA1.
-The compiler supports the ARMv4 architecture;
-it does not support the Thumb instruction set.
-It has been used on ARM7500FE processors and the Strongarm SA1 core machines.
-The compiler generates instructions for the ARM floating-point coprocessor.
-</P>
-<H4>The AMD 29000 compiler
-</H4>
-<P>
-This compiler was used to port an operating system to an AMD 29240 processor.
-The project is long abandoned, but the compiler lives on.
-</P>
-<H4>The Carrera operating system
-</H4>
-<P>
-We used to have a number of MIPS R4400 PC-like devices called Carreras,
-with custom-built frame buffers, that we used as terminals.
-They're almost all decommissioned now, but we're including the source as a reference
-in case someone wants to get another MIPS-based system running.
-</P>
-<H4>The IBM PC operating system
-</H4>
-<P>
-The PC version of Plan 9 can boot either from MS-DOS
-or directly from a disk created by the
-<TT>format</TT>
-command; see
-<A href="/magic/man2html/8/prep"><I>prep</I>(8).
-</A>Plan 9 runs in 32-bit mode&#173;which requires a 386 or later model x86 processor&#173;and
-has an interrupt-driven I/O system, so it does not
-use the BIOS (except for a small portion of the boot program and floppy boot block).
-This helps performance but limits the set of I/O devices that it can support without
-special code.
-</P>
-<P>
-Plan 9 supports the ISA, EISA, and PCI buses as well as PCMCIA and PC card devices.
-It is infeasible to list all the supported machines, because
-the PC-clone marketplace is too volatile and there is
-no guarantee that the machine you buy today will contain the
-same components as the one you bought yesterday.
-(For our lab, we buy components and assemble the machines
-ourselves in an attempt to lessen this effect.)
-Both IDE/ATA and SCSI disks are supported, and
-there is support for large ATA drives.
-CD-ROMs are supported two ways, either on the SCSI bus, or as ATA(PI) devices.
-The SCSI adapter must be a member of the Mylex Multimaster (old Buslogic BT-*) series
-or the Symbios 53C8XX series.
-Supported Ethernet cards include the
-AMD79C790,
-3COM Etherlink III and 3C589 series,
-Lucent Wavelan and compatibles,
-NE2000,
-WD8003,
-WD8013,
-SMC Elite and Elite Ultra,
-Linksys Combo EthernetCard and EtherFast 10/100,
-and a variety of controllers based on the
-Intel i8255[789] and Digital (now Intel) 21114x chips.
-We mostly use Etherlink III, i8255[789], and 21114x, so those drivers may be more robust.
-There must be an explicit Plan 9 driver for peripherals;
-it cannot use DOS or Windows drivers.
-Also,
-Plan 9 cannot exploit special hardware-related features that fall outside of the
-IBM PC model,
-such as power management,
-unless architecture-dependent code is added to the kernel.
-For more details see
-<A href="/magic/man2html/8/plan9.ini"><I>plan9.ini</I>(8).
-</A></P>
-<P>
-Over the years,
-Plan 9 has run on a number of VGA cards.
-Recent changes to the graphics system have not been
-tested on most of the older cards; some effort may be needed to get them working again.
-In our lab, most of our machines use the ATI Mach64, S3 ViRGE, or S3 Savage chips,
-so such devices are probably
-the most reliable.
-We also use a few Matrox and TNT cards.
-The system requires a hardware cursor.
-For more details see
-<A href="/magic/man2html/6/vgadb"><I>vgadb</I>(6)
-</A>and
-<A href="/magic/man2html/8/vga"><I>vga</I>(8).
-</A>The wiki
-(<TT>http://plan9.bell-labs.com/wiki/plan9</TT>)
-contains the definitive list of cards that are known to work; see the ``supported PC hardware''
-page.
-</P>
-<P>
-For audio, Plan 9 supports the Sound Blaster 16 and compatibles.
-(Note that audio doesn't work under Plan 9 with 8-bit Sound Blasters.)
-There is also user-level support for USB audio devices; see 
-<A href="/magic/man2html/4/usb"><I>usb</I>(4).
-</A></P>
-<P>
-Finally, it's important to have a three-button mouse with Plan 9.
-The system currently works only with mice on the PS/2 port or USB.
-Serial mouse support should return before long.
-</P>
-<P>
-Once you have Plan 9 installed (see the wiki's installation document)
-run the program
-<TT>ld</TT>
-from DOS
-or use a boot disk.  See
-<A href="/magic/man2html/8/booting"><I>booting</I>(8),
-</A><A href="/magic/man2html/8/9load"><I>9load</I>(8),
-</A>and
-<A href="/magic/man2html/8/prep"><I>prep</I>(8)
-</A>for more information.
-</P>
-<H4>The Alpha PC operating system
-</H4>
-<P>
-Plan 9 runs on the Alpha PC 164.
-The Alpha port has not been used as much as the others,
-and should be considered a preliminary release.
-</P>
-<P>
-The port uses the OSF/1 flavor
-of PALcode, and should be booted from the SRM firmware (booting
-from ARC is not supported).
-Supported devices are a subset of the PC ones; currently
-this includes DECchip 2114x-based ethernet cards, S3 VGA cards,
-Sound Blaster 16-compatible audio, floppy drives, and ATA hard disks.
-</P>
-<P>
-The system has to be booted via tftp.
-See
-<A href="/magic/man2html/8/booting"><I>booting</I>(8)
-</A>for details.
-</P>
-<H4>The PowerPC operating system
-</H4>
-<P>
-We have a version of the system that runs on the PowerPC
-on a home-grown machine called Viaduct.
-The Viaduct minibrick is a small (12x9x3 cm) low-cost embedded
-computer consisting of a 50Mhz MPC850, 16MB sdram, 2MB flash,
-and two 10Mb Ethernet ports.  It is designed for home/SOHO
-networking applications such as VPN, firewalls, NAT, etc.
-</P>
-<P>
-The kernel has also been ported to the Motorola MTX embedded motherboard;
-that port is included in the distribution.
-The port only works with a 604e processor (the 603e is substantially different)
-and at present only a single CPU is permitted.
-</P>
-<H4>The Compaq iPAQ operating system
-</H4>
-<P>
-Plan 9 was ported to Compaq's iPAQ Pocket PC,
-which uses the StrongArm SA1 processor.
-The model we have is a 3630; neighboring models also work.
-The kernel can drive a PCMCIA sleeve with a WaveLAN card, but no other PCMCIA
-devices have been ported yet.
-</P>
-<P>
-The iPAQ runs
-<TT>rio</TT>
-with a small keyboard application that allows Palm-style handwriting
-input as well as typing with the stylus on a miniature keyboard.
-</P>
-<P>
-Fco. J. Ballesteros
-(<TT>nemo@plan9.escet.urjc.es</TT>)
-added support for hibernation, but we haven't been able to
-get that to work again in the new kernel; the code is there, however,
-for volunteers to play with.
-See the file
-<TT>/sys/src/9/bitsy/Booting101</TT>
-for information about installing Plan 9 on the iPAQ.
-</P>
-<H4>The file server
-</H4>
-<P>
-The file server runs on only a handful of distinct machines.
-It is a stand-alone program, distantly related to the CPU server
-code, that runs no user code: all it does is serve files on
-network connections.
-It supports only SCSI disks, which can be interleaved for
-faster throughput.
-A DOS file on
-an IDE drive can hold the configuration information.
-See
-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8)
-</A>for an explanation of how
-to configure a file server.
-</P>
-<P>
-To boot a file server, follow the directions for booting a CPU server
-using the file name
-<TT>9</TT><I>machtype</I><TT>fs</TT><I>
-where
-</I><I>machtype</I><I>
-is
-</I><TT>pc</TT><I>,
-etc. as appropriate.
-We are releasing only the PC version.
-</P>
-</I><H4>The IBM PC file server
-</H4>
-<P>
-Except for the restriction to SCSI disks,
-the PC file server has the same hardware requirements as
-the regular PC operating system.
-However, only a subset of the supported SCSI (Adaptec 1542, Mylex Multimaster,
-and Symbios 53C8XX) and Ethernet (Digital 2114x,
-Intel 8255x, and 3Com) controllers
-may be
-used.
-Any of the boot methods described in
-<A href="/magic/man2html/8/9load"><I>9load</I>(8)
-</A>will work.
-</P>
-<P>
-To boot any PC, the file
-<TT>9load</TT>
-must reside on a MS-DOS formatted floppy, IDE disk,
-or SCSI disk.
-However, PCs have no non-volatile RAM in which the
-file server can store its configuration information, so the system
-stores it in a file on an MS-DOS file system instead.
-This file, however, cannot live on a SCSI disk, only a floppy or IDE.
-(This restriction avoids a lot of duplicated interfaces in the
-system.)
-Thus the file server cannot be all-SCSI.
-See
-<A href="/magic/man2html/8/plan9.ini"><I>plan9.ini</I>(8)
-</A>for details about the
-<I>nvr</I>
-variable and specifying the console device.
-</P>
-<H4>Backup
-</H4>
-<P>
-Our main file server is unlikely to be much like yours.
-It is a PC with 128 megabytes
-of cache memory, 56 gigabytes of SCSI magnetic
-disk, and a Hewlett-Packard SureStore Optical 1200ex
-magneto-optical jukebox, with 1.2 terabytes of storage.
-This driver runs the SCSI standard jukebox protocol.
-We also have a driver for a (non-standard)
-SONY WDA-610
-Writable Disk Auto Changer (WORM),
-which stores almost 350 gigabytes of data.
-</P>
-<P>
-The WORM is actually the prime storage; the SCSI disk is just
-a cache to improve performance.
-Early each morning the system constructs on WORM an image of
-the entire system as it appears that day.  Our backup system
-is therefore just a file server that lets
-you look at yesterday's (or last year's) file system.
-</P>
-<P>
-If you don't have a magneto-optical jukebox,
-you might consider attaching a CD-R jukebox or even just
-using a single WORM drive and managing the dumps a little less
-automatically.  This is just a long way of saying that the
-system as distributed has no explicit method of backup other
-than through the WORM jukebox.
-</P>
-<P>
-Not everyone can invest in such expensive hardware, however.
-Although it wouldn't be as luxurious,
-it would be possible to use
-<A href="/magic/man2html/8/mkfs"><I>mkfs</I>(8)
-</A>to build regular file system archives and use
-<A href="/magic/man2html/8/scuzz"><I>scuzz</I>(8)
-</A>to stream them to a SCSI 8mm tape drive.
-<TT>Mkext</TT>
-could then extract them.
-Another alternative is to use
-<I>dump9660</I>
-(see
-<A href="/magic/man2html/8/mk9660"><I>mk9660</I>(8)),
-</A>which stores incremental backups on CD images
-in the form of a dump hierarchy.
-</P>
-<P>
-It is also possible to treat a regular disk, or even a part of a disk,
-as a fake WORM, which can then be streamed to tape when it fills.
-This is a bad idea for a production system but a good way to
-learn about the WORM software.
-Again, see
-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8)
-</A>for details.
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 642
sys/doc/prog4.html

@@ -1,642 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Changes to the Programming Environment
-<br>
-in the
-<br>
-Fourth Release of Plan 9
-</H1>
-<DL><DD><I>Rob Pike<br>
-<br>&#32;<br>
-rob@plan9.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<P>
-The fourth release of Plan 9 includes changes at many levels of the system,
-with repercussions in the libraries and program interfaces.
-This document summarizes the changes and describes how
-existing programs must be modified to run in the new release.
-It is not exhaustive, of course; for further detail about any of the
-topics refer to the manual pages, as always.
-</P>
-<P>
-Programmers new to Plan 9 may find valuable tidbits here, but the
-real audience for this paper is those with a need to update applications
-and servers written in C for earlier releases of the Plan 9 operating system.
-</P>
-<H4>9P, NAMELEN, and strings
-</H4>
-<P>
-The underlying file service protocol for Plan 9, 9P, retains its basic form
-but has had a number of adjustments to deal with longer file names and error strings,
-new authentication mechanisms, and to make it more efficient at
-evaluating file names.
-The change to file names affects a number of system interfaces;
-because file name elements are no longer of fixed size, they can
-no longer be stored as arrays.
-</P>
-<P>
-9P used to be a fixed-format protocol with
-<TT>NAMELEN</TT>-sized
-byte arrays representing file name elements.
-Now, it is a variable-format protocol, as described in
-<A href="/magic/man2html/5/intro"><I>intro</I>(5),
-</A>in which strings are represented by a count followed by that many bytes.
-Thus, the string
-<TT>ken</TT>
-would previously have occupied 28
-(<TT>NAMELEN</TT>)
-bytes in the message; now it occupies 5: a two-byte count followed by the three bytes of
-<TT>ken</TT>
-and no terminal zero.
-(And of course, a name could now be much longer.)
-A similar format change has been made to
-<TT>stat</TT>
-buffers: they are no longer
-<TT>DIRLEN</TT>
-bytes long but instead have variable size prefixed by a two-byte count.
-And in fact the entire 9P message syntax has changed: every message
-now begins with a message length field that makes it trivial to break the
-string into messages without parsing them, so
-<TT>aux/fcall</TT>
-is gone.
-A new library entry point,
-<TT>read9pmsg</TT>,
-makes it easy for user-level servers to break the client data stream into 9P messages.
-All servers should switch from using
-<TT>read</TT>
-(or the now gone
-<TT>getS)</TT>
-to using
-<TT>read9pmsg</TT>.
-</P>
-<P>
-This change to 9P affects the way strings are handled by the kernel and throughout
-the system.
-The consequences are primarily that fixed-size arrays have been replaced
-by pointers and counts in a variety of system interfaces.
-Most programs will need at least some adjustment to the new style.
-In summary:
-<TT>NAMELEN</TT>
-is gone, except as a vestige in the authentication libraries, where it has been
-rechristened
-<TT>ANAMELEN</TT>.
-<TT>DIRLEN</TT>
-and
-<TT>ERRLEN</TT>
-are also gone.
-All programs that mention
-these constants
-will need to be fixed.
-</P>
-<P>
-The simplest place to see this change is in the
-<TT>errstr</TT>
-system call, which no longer assumes a buffer of length
-<TT>ERRLEN</TT>
-but now requires a byte-count argument:
-<DL><DT><DD><TT><PRE>
-char buf[...];
-
-errstr(buf, sizeof buf);
-</PRE></TT></DL>
-The buffer can be any size you like.
-For convenience, the kernel stores error strings internally as 256-byte arrays,
-so if you like &#173; but it's not required &#173; you can use the defined constant
-<TT>ERRMAX=</TT>256
-as a good buffer size.
-Unlike the old
-<TT>ERRLEN</TT>
-(which had value 64),
-<TT>ERRMAX</TT>
-is advisory, not mandatory, and is not part of the 9P specification.
-</P>
-<P>
-With names, stat buffers, and directories, there isn't even an echo of a fixed-size array any more.
-</P>
-<H4>Directories and wait messages
-</H4>
-<P>
-With strings now variable-length, a number of system calls needed to change:
-<TT>errstr</TT>,
-<TT>stat</TT>,
-<TT>fstat</TT>,
-<TT>wstat</TT>,
-<TT>fwstat</TT>,
-and
-<TT>wait</TT>
-are all affected, as is
-<TT>read</TT>
-when applied to directories.
-</P>
-<P>
-As far as directories are concerned, most programs don't use the system calls
-directly anyway, since they operate on the machine-independent form, but
-instead call the machine-dependent
-<TT>Dir</TT>
-routines
-<TT>dirstat</TT>,
-<TT>dirread</TT>,
-etc.
-These used to fill user-provided fixed-size buffers; now they return objects allocated
-by
-<TT>malloc</TT>
-(which must therefore be freed after use).
-To `stat' a file:
-<DL><DT><DD><TT><PRE>
-Dir *d;
-
-d = dirstat(filename);
-if(d == nil){
-	fprint(2, "can't stat %s: %r\n", filename);
-	exits("stat");
-}
-use(d);
-free(d);
-</PRE></TT></DL>
-A common new bug is to forget to free a
-<TT>Dir</TT>
-returned by
-<TT>dirstat</TT>.
-</P>
-<P>
-<TT>Dirfstat</TT>
-and
-<TT>Dirfwstat</TT>
-work pretty much as before, but changes to 9P make
-it possible to exercise finer-grained control on what fields
-of the
-<TT>Dir</TT>
-are to be changed; see
-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
-</A>and
-<A href="/magic/man2html/5/stat"><I>stat</I>(5)
-</A>for details.
-</P>
-<P>
-Reading a directory works in a similar way to
-<TT>dirstat</TT>,
-with
-<TT>dirread</TT>
-allocating and filling in an array of
-<TT>Dir</TT>
-structures.
-The return value is the number of elements of the array.
-The arguments to
-<TT>dirread</TT>
-now include a pointer to a
-<TT>Dir*</TT>
-to be filled in with the address of the allocated array:
-<DL><DT><DD><TT><PRE>
-Dir *d;
-int i, n;
-
-while((n = dirread(fd, &amp;d)) &gt; 0){
-	for(i=0; i&lt;n; i++)
-		use(&amp;d[i]);
-	free(d);
-}
-</PRE></TT></DL>
-A new library function,
-<TT>dirreadall</TT>,
-has the same form as
-<TT>dirread</TT>
-but returns the entire directory in one call:
-<DL><DT><DD><TT><PRE>
-n = dirreadall(fd, &amp;d)
-for(i=0; i&lt;n; i++)
-	use(&amp;d[i]);
-free(d);
-</PRE></TT></DL>
-If your program insists on using the underlying
-<TT>stat</TT>
-system call or its relatives, or wants to operate directly on the
-machine-independent format returned by
-<TT>stat</TT>
-or
-<TT>read</TT>,
-it will need to be modified.
-Such programs are rare enough that we'll not discuss them here beyond referring to
-the man page
-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
-</A>for details.
-Be aware, though, that it used to be possible to regard the buffer returned by
-<TT>stat</TT>
-as a byte array that began with the zero-terminated
-name of the file; this is no longer true.
-With very rare exceptions, programs that call
-<TT>stat</TT>
-would be better recast to use the
-<TT>dir</TT>
-routines or, if their goal is just to test the existence of a file,
-<TT>access</TT>.
-</P>
-<P>
-Similar changes have affected the
-<TT>wait</TT>
-system call.  In fact,
-<TT>wait</TT>
-is no longer a system call but a library routine that calls the new
-<TT>await</TT>
-system call and returns a newly allocated machine-dependent
-<TT>Waitmsg</TT>
-structure:
-<DL><DT><DD><TT><PRE>
-Waitmsg *w;
-
-w = wait();
-if(w == nil)
-	error("wait: %r");
-print("pid is %d; exit string %s\n", w-&gt;pid, w-&gt;msg);
-free(w);
-</PRE></TT></DL>
-The exit string
-<TT>w-&gt;msg</TT>
-may be empty but it will never be a nil pointer.
-Again, don't forget to free the structure returned by
-<TT>wait</TT>.
-If all you need is the pid, you can call
-<TT>waitpid</TT>,
-which reports just the pid and doesn't return an allocated structure:
-<DL><DT><DD><TT><PRE>
-int pid;
-
-pid = waitpid();
-if(pid &lt; 0)
-	error("wait: %r");
-print("pid is %d\n", pid);
-</PRE></TT></DL>
-</P>
-<H4>Quoted strings and tokenize
-</H4>
-<P>
-<TT>Wait</TT>
-gives us a good opportunity to describe how the system copes with all this
-free-format data.
-Consider the text returned by the
-<TT>await</TT>
-system call, which includes a set of integers (pids and times) and a string (the exit status).
-This information is formatted free-form; here is the statement in the kernel that
-generates the message:
-<DL><DT><DD><TT><PRE>
-n = snprint(a, n, "%d %lud %lud %lud %q",
-	wq-&gt;w.pid,
-	wq-&gt;w.time[TUser], wq-&gt;w.time[TSys], wq-&gt;w.time[TReal],
-	wq-&gt;w.msg);
-</PRE></TT></DL>
-Note the use of
-<TT>%q</TT>
-to produce a quoted-string representation of the exit status.
-The
-<TT>%q</TT>
-format is like %s but will wrap
-<TT>rc</TT>-style
-single quotes around the string if it contains white space or is otherwise ambiguous.
-The library routine
-<TT>tokenize</TT>
-can be used to parse data formatted this way: it splits white-space-separated
-fields but understands the
-<TT>%q</TT>
-quoting conventions.
-Here is how the
-<TT>wait</TT>
-library routine builds its
-<TT>Waitmsg</TT>
-from the data returned by
-<TT>await</TT>:
-<DL><DT><DD><TT><PRE>
-Waitmsg*
-wait(void)
-{
-	int n, l;
-	char buf[512], *fld[5];
-	Waitmsg *w;
-
-	n = await(buf, sizeof buf-1);
-	if(n &lt; 0)
-		return nil;
-	buf[n] = ' ';
-	if(tokenize(buf, fld, nelem(fld)) != nelem(fld)){
-		werrstr("couldn't parse wait message");
-		return nil;
-	}
-	l = strlen(fld[4])+1;
-	w = malloc(sizeof(Waitmsg)+l);
-	if(w == nil)
-		return nil;
-	w-&gt;pid = atoi(fld[0]);
-	w-&gt;time[0] = atoi(fld[1]);
-	w-&gt;time[1] = atoi(fld[2]);
-	w-&gt;time[2] = atoi(fld[3]);
-	w-&gt;msg = (char*)&amp;w[1];
-	memmove(w-&gt;msg, fld[4], l);
-	return w;
-}
-</PRE></TT></DL>
-</P>
-<P>
-This style of quoted-string and
-<TT>tokenize</TT>
-is used all through the system now.
-In particular, devices now
-<TT>tokenize</TT>
-the messages written to their
-<TT>ctl</TT>
-files, which means that you can send messages that contain white space, by quoting them,
-and that you no longer need to worry about whether or not the device accepts a newline.
-In other words, you can say
-<DL><DT><DD><TT><PRE>
-echo message &gt; /dev/xx/ctl
-</PRE></TT></DL>
-instead of
-<TT>echo</TT>
-<TT>-n</TT>
-because
-<TT>tokenize</TT>
-treats the newline character as white space and discards it.
-</P>
-<P>
-While we're on the subject of quotes and strings, note that the implementation of
-<TT>await</TT>
-used
-<TT>snprint</TT>
-rather than
-<TT>sprint</TT>.
-We now deprecate
-<TT>sprint</TT>
-because it has no protection against buffer overflow.
-We prefer
-<TT>snprint</TT>
-or
-<TT>seprint</TT>,
-to constrain the output.
-The
-<TT>%q</TT>
-format is cleverer than most in this regard:
-if the string is too long to be represented in full,
-<TT>%q</TT>
-is smart enough to produce a truncated but correctly quoted
-string within the available space.
-</P>
-<H4>Mount
-</H4>
-<P>
-Although strings in 9P are now variable-length and not zero-terminated,
-this has little direct effect in most of the system interfaces.
-File and user names are still zero-terminated strings as always;
-the kernel does the work of translating them as necessary for
-transport.
-And of course, they are now free to be as long as you might want;
-the only hard limit is that their length must be represented in 16 bits.
-</P>
-<P>
-One example where this matters is that the file system specification in the
-<TT>mount</TT>
-system call can now be much longer.
-Programs like
-<TT>rio</TT>
-that used the specification string in creative ways were limited by the
-<TT>NAMELEN</TT>
-restriction; now they can use the string more freely.
-<TT>Rio</TT>
-now accepts a simple but less cryptic specification language for the window
-to be created by the
-<TT>mount</TT>
-call, e.g.:
-<DL><DT><DD><TT><PRE>
-% mount <I>wsys /mnt/wsys 'new -dx 250 -dy 250 -pid 1234'
-</PRE></TT></DL>
-In the old system, this sort of control was impossible through the
-</I><TT>mount</TT><I>
-interface.
-</P>
-</I><P>
-While we're on the subject of
-<TT>mount</TT>,
-note that with the new security architecture
-(see
-<A href="/magic/man2html/4/factotum"><I>factotum</I>(4)),
-</A>9P has moved its authentication outside the protocol proper.
-(For a full description of this change to 9P, see
-<A href="/magic/man2html/2/fauth"><I>fauth</I>(2),
-</A><A href="/magic/man2html/5/attach"><I>attach</I>(5),
-</A>and the paper
-<I>Security in Plan 9</I>.)<I>
-The most explicit effect of this change is that
-</I><TT>mount</TT><I>
-now takes another argument,
-</I><TT>afd</TT><I>,
-a file descriptor for the
-authentication file through which the authentication will be made.
-For most user-level file servers, which do not require authentication, it is
-sufficient to provide
-</I><TT>-1</TT><I>
-as the value of
-</I><TT>afd:</TT><I>
-<DL><DT><DD><TT><PRE>
-if(mount(fd, -1, "/mnt/wsys", MREPL,
-   "new -dx 250 -dy 250 -pid 1234") &lt; 0)
-	error("mount failed: %r");
-</PRE></TT></DL>
-To connect to servers that require authentication, use the new
-</I><TT>fauth</TT><I>
-system call or the reimplemented
-</I><TT>amount</TT><I>
-(authenticated mount) library call.
-In fact, since
-</I><TT>amount</TT><I>
-handles both authenticating and non-authenticating servers, it is often
-easiest just to replace calls to
-</I><TT>mount</TT><I>
-by calls to
-</I><TT>amount</TT><I>;
-see
-<A href="/magic/man2html/2/auth"></I><I>auth</I><I>(2)
-</A>for details.
-</P>
-</I><H4>Print
-</H4>
-<P>
-The C library has been heavily reworked in places.
-Besides the changes mentioned above, it
-now has a much more complete set of routines for handling
-<TT>Rune</TT>
-strings (that is, zero-terminated arrays of 16-bit character values).
-The most sweeping changes, however, are in the way formatted I/O is performed.
-</P>
-<P>
-The
-<TT>print</TT>
-routine and all its relatives have been reimplemented to offer a number
-of improvements:
-</P>
-<DL COMPACT>
-<DT>(1)<DD>
-Better buffer management, including the provision of an internal flush
-routine, makes it unnecessary to provide large buffers.
-For example,
-<TT>print</TT>
-uses a much smaller buffer now (reducing stack load) while simultaneously
-removing the need to truncate the output string if it doesn't fit in the buffer.
-<DT>(2)<DD>
-Global variables have been eliminated so no locking is necessary.
-<DT>(3)<DD>
-The combination of (1) and (2) means that the standard implementation of
-<TT>print</TT>
-now works fine in threaded programs, and
-<TT>threadprint</TT>
-is gone.
-<DT>(4)<DD>
-The new routine
-<TT>smprint</TT>
-prints into, and returns, storage allocated on demand by
-<TT>malloc</TT>.
-<DT>(5)<DD>
-It is now possible to print into a
-<TT>Rune</TT>
-string; for instance,
-<TT>runesmprint</TT>
-is the
-<TT>Rune</TT>
-analog of
-<TT>smprint</TT>.
-<DT>(6)<DD>
-There is improved support for custom
-print verbs and custom output routines such as error handlers.
-The routine
-<TT>doprint</TT>
-is gone, but
-<TT>vseprint</TT>
-can always be used instead.
-However, the new routines
-<TT>fmtfdinit</TT>,
-<TT>fmtstrinit</TT>,
-<TT>fmtprint</TT>,
-and friends
-are often a better replacement.
-The details are too long for exposition here;
-<A href="/magic/man2html/2/fmtinstall"><I>fmtinstall</I>(2)
-</A>explains the new interface and provides examples.
-<DT>(7)<DD>
-Two new format flags, space and comma, close somewhat the gap between
-Plan 9 and ANSI C.
-</dl>
-<P>
-Despite these changes, most programs will be unaffected;
-<TT>print</TT>
-is still
-<TT>print</TT>.
-Don't forget, though, that
-you should eliminate calls to
-<TT>sprint</TT>
-and use the
-<TT>%q</TT>
-format when appropriate.
-</P>
-<H4>Binary compatibility
-</H4>
-<P>
-The discussion so far has been about changes at the source level.
-Existing binaries will probably run without change in the new
-environment, since the kernel provides backward-compatible
-system calls for
-<TT>errstr</TT>,
-<TT>stat</TT>,
-<TT>wait</TT>,
-etc.
-The only exceptions are programs that do either a
-<TT>mount</TT>
-system call, because of the security changes and because
-the file descriptor in
-<TT>mount</TT>
-must point to a new 9P connection; or a
-<TT>read</TT>
-system call on a directory, since the returned data will
-be in the new format.
-A moment's reflection will discover that this means old
-user-level file servers will need to be fixed to run on the new system.
-</P>
-<H4>File servers
-</H4>
-<P>
-A full description of what user-level servers must do to provide service with
-the new 9P is beyond the scope of this paper.
-Your best source of information is section 5 of the manual,
-combined with study of a few examples.
-<TT>/sys/src/cmd/ramfs.c</TT>
-is a simple example; it has a counterpart
-<TT>/sys/src/lib9p/ramfs.c</TT>
-that implements the same service using the new
-<A href="/magic/man2html/2/9p"><I>9p</I>(2)
-</A>library.
-</P>
-<P>
-That said, it's worth summarizing what to watch for when converting a file server.
-The
-<TT>session</TT>
-message is gone, and there is a now a
-<TT>version</TT>
-message that is exchanged at the start of a connection to establish
-the version of the protocol to use (there's only one at the moment, identified by
-the string
-<TT>9P2000</TT>)
-and what the maximum message size will be.
-This negotiation makes it easier to handle 9P encapsulation, such as with
-<TT>exportfs</TT>,
-and also permits larger message sizes when appropriate.
-</P>
-<P>
-If your server wants to authenticate, it will need to implement an authentication file
-and implement the
-<TT>auth</TT>
-message; otherwise it should return a helpful error string to the
-<TT>Tauth</TT>
-request to signal that authentication is not required.
-</P>
-<P>
-The handling of
-<TT>stat</TT>
-and directory reads will require some changes but they should not be fundamental.
-Be aware that seeking on directories is forbidden, so it is fine if you disregard the
-file offset when implementing directory reads; this makes it a little easier to handle
-the variable-length entries.
-You should still never return a partial directory entry; if the I/O count is too small
-to return even one entry, you should return two bytes containing the byte count
-required to represent the next entry in the directory.
-User code can use this value to formulate a retry if it desires.
-See the
-DIAGNOSTICS section of
-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
-</A>for a description of this process.
-</P>
-<P>
-The trickiest part of updating a file server is that the
-<TT>clone</TT>
-and
-<TT>walk</TT>
-messages have been merged into a single message, a sort of `clone-multiwalk'.
-The new message, still called
-<TT>walk</TT>,
-proposes a sequence of file name elements to be evaluated using a possibly
-cloned fid.
-The return message contains the qids of the files reached by
-walking to the sequential elements.
-If all the elements can be walked, the fid will be cloned if requested.
-If a non-zero number of elements are requested, but none
-can be walked, an error should be returned.
-If only some can be walked, the fid is not cloned, the original fid is left
-where it was, and the returned
-<TT>Rwalk</TT>
-message should contain the partial list of successfully reached qids.
-See
-<A href="/magic/man2html/5/walk"><I>walk</I>(5)
-</A>for a full description.
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1668
sys/doc/rc.html

@@ -1,1668 +0,0 @@
-<html>
-
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Rc &#173; The Plan 9 Shell
-</H1>
-<DL><DD><I>Tom Duff<br>
-td@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<I>Rc</I>
-is a command interpreter for Plan 9 that
-provides similar facilities to UNIX's
-Bourne shell,
-with some small additions and less idiosyncratic syntax.
-This paper uses numerous examples to describe
-<I>rc</I>'s
-features, and contrasts
-<I>rc</I>
-with the Bourne shell, a model that many readers will be familiar with.
-</DL>
-<H4>1 Introduction
-</H4>
-<P>
-<I>Rc</I>
-is similar in spirit but different in detail from UNIX's
-Bourne shell.  This paper describes
-<I>rc</I>'s
-principal features with many small examples and a few larger ones.
-It assumes familiarity with the Bourne shell.
-</P>
-<H4>2 Simple commands
-</H4>
-<P>
-For the simplest uses
-<I>rc</I>
-has syntax familiar to Bourne-shell users.
-All of the following behave as expected:
-<DL><DT><DD><TT><PRE>
-date
-cat /lib/news/build
-who &gt;user.names
-who &gt;&gt;user.names
-wc &lt;file
-echo [a-f]*.c
-who | wc
-who; date
-vc *.c &amp;
-mk &amp;&amp; v.out /*/bin/fb/*
-rm -r junk || echo rm failed!
-</PRE></TT></DL>
-</P>
-<H4>3 Quotation
-</H4>
-<P>
-An argument that contains a space or one of
-<I>rc</I>'s
-other syntax characters must be enclosed in apostrophes
-(<TT>'</TT>):
-<DL><DT><DD><TT><PRE>
-rm 'odd file name'
-</PRE></TT></DL>
-An apostrophe in a quoted argument must be doubled:
-<DL><DT><DD><TT><PRE>
-echo 'How''s your father?'
-</PRE></TT></DL>
-</P>
-<H4>4 Patterns
-</H4>
-<P>
-An unquoted argument that contains any of the characters
-<TT>*</TT>
-<TT>?</TT>
-<TT>[</TT>
-is a pattern to be matched against file names.
-A
-<TT>*</TT>
-character matches any sequence of characters,
-<TT>?</TT>
-matches any single character, and
-<TT>[</TT><I>class</I><TT>]</TT><I>
-matches any character in the
-</I><TT>class</TT><I>,
-unless the first character of
-</I><I>class</I><I>
-is
-</I><TT>~</TT><I>,
-in which case the class is complemented.
-The
-</I><I>class</I><I>
-may also contain pairs of characters separated by
-</I><TT>-</TT><I>,
-standing for all characters lexically between the two.
-The character
-</I><TT>/</TT><I>
-must appear explicitly in a pattern, as must the path name components
-</I><TT>.</TT><I>
-and
-</I><TT>..</TT><I>.
-A pattern is replaced by a list of arguments, one for each path name matched,
-except that a pattern matching no names is not replaced by the empty list;
-rather it stands for itself.
-</P>
-</I><H4>5 Variables
-</H4>
-<P>
-UNIX's Bourne shell offers string-valued variables.
-<I>Rc</I>
-provides variables whose values are lists of arguments &#173;
-that is, arrays of strings.  This is the principal difference
-between
-<I>rc</I>
-and traditional UNIX command interpreters.
-Variables may be given values by typing, for example:
-<DL><DT><DD><TT><PRE>
-path=(. /bin)
-user=td
-font=/lib/font/bit/pelm/ascii.9.font
-</PRE></TT></DL>
-The parentheses indicate that the value assigned to
-<TT>path</TT>
-is a list of two strings. The variables
-<TT>user</TT>
-and
-<TT>font</TT>
-are assigned lists containing a single string.
-</P>
-<P>
-The value of a variable can be substituted into a command by
-preceding its name with a
-<TT></TT><I></I><TT>,
-like this:
-<DL><DT><DD><TT><PRE>
-echo </TT>path
-</PRE></TT></DL>
-If
-<TT>path</TT>
-had been set as above, this would be equivalent to
-<DL><DT><DD><TT><PRE>
-echo . /bin
-</PRE></TT></DL>
-Variables may be subscripted by numbers or lists of numbers,
-like this:
-<DL><DT><DD><TT><PRE>
-echo <I>path(2)
-echo </I>path(2 1 2)
-</PRE></TT></DL>
-These are equivalent to
-<DL><DT><DD><TT><PRE>
-echo /bin
-echo /bin . /bin
-</PRE></TT></DL>
-There can be no space separating the variable's name from the
-left parenthesis; otherwise, the subscript would be considered
-a separate parenthesized list.
-</P>
-<P>
-The number of strings in a variable can be determined by the
-<TT></TT><I>#</I><TT>
-operator.  For example,
-<DL><DT><DD><TT><PRE>
-echo </TT>#path
-</PRE></TT></DL>
-would print 2 for this example.
-</P>
-<P>
-The following two assignments are subtly different:
-<DL><DT><DD><TT><PRE>
-empty=()
-null=''
-</PRE></TT></DL>
-The first sets
-<TT>empty</TT>
-to a list containing no strings.
-The second sets
-<TT>null</TT>
-to a list containing a single string,
-but the string contains no characters.
-</P>
-<P>
-Although these may seem like more or less
-the same thing (in Bourne's shell, they are
-indistinguishable), they behave differently
-in almost all circumstances.
-Among other things
-<DL><DT><DD><TT><PRE>
-echo <I>#empty
-</PRE></TT></DL>
-prints 0, whereas
-<DL><DT><DD><TT><PRE>
-echo </I>#null
-</PRE></TT></DL>
-prints 1.
-</P>
-<P>
-All variables that have never been set have the value
-<TT>()</TT>.
-</P>
-<P>
-Occasionally, it is convenient to treat a variable's value
-as a single string.  The elements of a string are concatenated
-into a single string, with spaces between the elements, by
-the
-<TT></TT><I>"</I><TT>
-operator.
-Thus, if we set
-<DL><DT><DD><TT><PRE>
-list=(How now brown cow)
-string=</TT>"list
-</PRE></TT></DL>
-then both
-<DL><DT><DD><TT><PRE>
-echo <I>list
-</PRE></TT></DL>
-and
-<DL><DT><DD><TT><PRE>
-echo </I>string
-</PRE></TT></DL>
-cause the same output, viz:
-<DL><DT><DD><TT><PRE>
-How now brown cow
-</PRE></TT></DL>
-but
-<DL><DT><DD><TT><PRE>
-echo <I>#list </I>#string
-</PRE></TT></DL>
-will output
-<DL><DT><DD><TT><PRE>
-4 1
-</PRE></TT></DL>
-because
-<TT></TT><I>list</I><TT>
-has four members, but
-</TT><TT></TT><TT>string</TT><TT>
-has a single member, with three spaces separating its words.
-</P>
-</TT><H4>6 Arguments
-</H4>
-<P>
-When
-<I>rc</I>
-is reading its input from a file, the file has access
-to the arguments supplied on
-<I>rc</I>'s
-command line.  The variable
-<TT></TT><I>*</I><TT>
-initially has the list of arguments assigned to it.
-The names
-</TT><TT></TT><TT>1</TT><TT>,
-</TT><TT></TT><I>2</I><TT>,
-etc. are synonyms for
-</TT><TT></TT><TT>*(1)</TT><TT>,
-</TT><TT></TT><I>*(2)</I><TT>,
-etc.
-In addition,
-</TT><TT></TT><TT>0</TT><TT>
-is the name of the file from which
-</TT><I>rc</I><TT>'s
-input is being read.
-</P>
-</TT><H4>7 Concatenation
-</H4>
-<P>
-<I>Rc</I>
-has a string concatenation operator, the caret 
-<TT>^</TT>,
-to build arguments out of pieces.
-<DL><DT><DD><TT><PRE>
-echo hully^gully
-</PRE></TT></DL>
-is exactly equivalent to
-<DL><DT><DD><TT><PRE>
-echo hullygully
-</PRE></TT></DL>
-Suppose variable
-<TT>i</TT>
-contains the name of a command.
-Then
-<DL><DT><DD><TT><PRE>
-vc <I>i^.c
-vl -o </I>1 <I>i^.v
-</PRE></TT></DL>
-might compile the command's source code, leaving the
-result in the appropriate file.
-</P>
-</I><P>
-Concatenation distributes over lists. The following
-<DL><DT><DD><TT><PRE>
-echo (a b c)^(1 2 3)
-src=(main subr io)
-cc src^.c
-</PRE></TT></DL>
-are equivalent to
-<DL><DT><DD><TT><PRE>
-echo a1 b2 c3
-cc main.c subr.c io.c
-</PRE></TT></DL>
-In detail, the rule is: if both operands of
-<TT>^</TT>
-are lists of the same non-zero number of strings, they are concatenated
-pairwise.  Otherwise, if one of the operands is a single string,
-it is concatenated with each member of the other operand in turn.
-Any other combination of operands is an error.
-</P>
-<H4>8 Free carets
-</H4>
-<P>
-User demand has dictated that
-<I>rc</I>
-insert carets in certain places, to make the syntax
-look more like the Bourne shell.  For example, this:
-<DL><DT><DD><TT><PRE>
-cc -<I>flags </I>stems.c
-</PRE></TT></DL>
-is equivalent to
-<DL><DT><DD><TT><PRE>
-cc -^<I>flags </I>stems^.c
-</PRE></TT></DL>
-In general,
-<I>rc</I>
-will insert
-<TT>^</TT>
-between two arguments that are not separated by white space.
-Specifically, whenever one of
-<TT></TT><I>'`</I><TT>
-follows a quoted or unquoted word, or an unquoted word follows
-a quoted word with no intervening blanks or tabs, an implicit
-</TT><TT>^</TT><TT>
-is inserted between the two.  If an unquoted word immediately following a
-</TT><TT></TT><TT></TT><TT>
-contains a character other than an alphanumeric, underscore or
-</TT><TT>*</TT><TT>,
-a
-</TT><TT>^</TT><TT>
-is inserted before the first such character.
-</P>
-</TT><H4>9 Command substitution
-</H4>
-<P>
-It is often useful to build an argument list from the output of a command.
-<I>Rc</I>
-allows a command, enclosed in braces and preceded by a left quote,
-<TT>`{...}</TT>,
-anywhere that an argument is required.  The command is executed and its
-standard output captured.
-The characters stored in the variable
-<TT>ifs</TT>
-are used to split the output into arguments.
-For example,
-<DL><DT><DD><TT><PRE>
-cat `{ls -tr|sed 10q}
-</PRE></TT></DL>
-will concatenate the ten oldest files in the current directory in temporal order, given the
-default
-<TT>ifs</TT>
-setting of space, tab, and newline.
-</P>
-<H4>10 Pipeline branching
-</H4>
-<P>
-The normal pipeline notation is general enough for almost all cases.
-Very occasionally it is useful to have pipelines that are not linear.
-Pipeline topologies more general than trees can require arbitrarily large pipe buffers,
-or worse, can cause deadlock.
-<I>Rc</I>
-has syntax for some kinds of non-linear but treelike pipelines.
-For example,
-<DL><DT><DD><TT><PRE>
-	cmp &lt;{old} &lt;{new}
-</PRE></TT></DL>
-will regression-test a new version of a command.
-<TT>&lt;</TT>
-or
-<TT>&gt;</TT>
-followed by a command in braces causes the command to be run with
-its standard output or input attached to a pipe.  The parent command
-(<TT>cmp</TT>
-in the example)
-is started with the other end of the pipe attached to some file descriptor
-or other, and with an argument that will connect to the pipe when opened
-(e.g.,
-<TT>/dev/fd/6</TT>).
-Some commands are unprepared to deal with input files that turn out not to be seekable.
-For example
-<TT>diff</TT>
-needs to read its input twice.
-</P>
-<H4>11 Exit status
-</H4>
-<P>
-When a command exits it returns status to the program that executed it.
-On Plan 9 status is a character string describing an error condition.
-On normal termination it is empty.
-</P>
-<P>
-<I>Rc</I>
-captures command exit status in the variable
-<TT></TT><I>status</I><TT>.
-For a simple command the value of
-</TT><TT></TT><TT>status</TT><TT>
-is just as described above.  For a pipeline
-</TT><TT></TT><I>status</I><TT>
-is set to the concatenation of the statuses of the pipeline components with
-</TT><TT>|</TT><TT>
-characters for separators.
-</P>
-</TT><P>
-<I>Rc</I>
-has a several kinds of control flow,
-many of them conditioned by the status returned from previously
-executed commands.  Any
-<TT></TT>status<TT>
-containing only
-</TT><TT>0</TT><TT>'s
-and
-</TT><TT>|</TT><TT>'s
-has boolean value
-</TT><I>true</I><TT>.
-Any other status is
-</TT><I>false</I><TT>.
-</P>
-</TT><H4>12 Command grouping
-</H4>
-<P>
-A sequence of commands enclosed in
-<TT>{}</TT>
-may be used anywhere a command is required.
-For example:
-<DL><DT><DD><TT><PRE>
-{sleep 3600;echo 'Time''s up!'}&amp;
-</PRE></TT></DL>
-will wait an hour in the background, then print a message.
-Without the braces,
-<DL><DT><DD><TT><PRE>
-sleep 3600;echo 'Time''s up!'&amp;
-</PRE></TT></DL>
-would lock up the terminal for an hour,
-then print the message in the background.
-</P>
-<H4>13 Control flow &#173; <TT>for</TT>
-</H4>
-<P>
-A command may be executed once for each member of a list
-by typing, for example:
-<DL><DT><DD><TT><PRE>
-for(i in printf scanf putchar) look <I>i /usr/td/lib/dw.dat
-</PRE></TT></DL>
-This looks for each of the words
-</I><TT>printf</TT><I>,
-</I><TT>scanf</TT><I>
-and
-</I><TT>putchar</TT><I>
-in the given file.
-The general form is
-<DL><DT><DD><TT><PRE>
-for(</I><I>name</I><I> in </I><I>list</I><I>) </I><I>command</I><I>
-</PRE></TT></DL>
-or
-<DL><DT><DD><TT><PRE>
-for(</I><I>name</I><I>) </I><I>command</I><I>
-</PRE></TT></DL>
-In the first case
-</I><I>command</I><I>
-is executed once for each member of
-</I><I>list</I><I>
-with that member assigned to variable
-</I><I>name</I><I>.
-If the clause
-``</I><TT>in</TT><I>
-</I><I>list</I><I>''
-is missing,
-``</I><TT>in</TT><I>
-</I><TT></TT><I>*</I><TT>''
-is assumed.
-</P>
-</TT><H4>14 Conditional execution &#173; <TT>if</TT>
-</H4>
-<P>
-<I>Rc</I>
-also provides a general if-statement.  For example:
-<DL><DT><DD><TT><PRE>
-for(i in *.c) if(cpp <I>i &gt;/tmp/</I>i) vc /tmp/<I>i
-</PRE></TT></DL>
-runs the C compiler on each C source program that
-cpp processes without error.
-An `if not' statement provides a two-tailed conditional.
-For example:
-<DL><DT><DD><TT><PRE>
-for(i){
-    if(test -f /tmp/</I>i) echo <I>i already in /tmp
-    if not cp </I>i /tmp
-}
-</PRE></TT></DL>
-This loops over each file in
-<TT></TT><I>*</I><TT>,
-copying to
-</TT><TT>/tmp</TT><TT>
-those that do not already appear there, and
-printing a message for those that do.
-</P>
-</TT><H4>15 Control flow &#173; <TT>while</TT>
-</H4>
-<P>
-<I>Rc</I>'s
-while statement looks like this:
-<DL><DT><DD><TT><PRE>
-while(newer subr.v subr.c) sleep 5
-</PRE></TT></DL>
-This waits until
-<TT>subr.v</TT>
-is newer than
-<TT>subr.c</TT>,
-presumably because the C compiler finished with it.
-</P>
-<P>
-If the controlling command is empty, the loop will not terminate.
-Thus,
-<DL><DT><DD><TT><PRE>
-while() echo y
-</PRE></TT></DL>
-emulates the
-<I>yes</I>
-command.
-</P>
-<H4>16 Control flow &#173; <TT>switch</TT>
-</H4>
-<P>
-<I>Rc</I>
-provides a switch statement to do pattern-matching on
-arbitrary strings.  Its general form is
-<DL><DT><DD><TT><PRE>
-switch(<I>word</I>){
-case <I>pattern ...</I>
-    <I>commands</I>
-case <I>pattern ...</I>
-    <I>commands</I>
-...
-}
-</PRE></TT></DL>
-<I>Rc</I>
-attempts to match the word against the patterns in each case statement in turn.
-Patterns are the same as for filename matching, except that
-<TT>/</TT>
-and
-<TT>.</TT>
-and
-<TT>..</TT>
-need not be matched explicitly.
-</P>
-<P>
-If any pattern matches, the
-commands following that case up to
-the next case (or the end of the switch)
-are executed, and execution of the switch
-is complete.  For example,
-<DL><DT><DD><TT><PRE>
-switch(#*){
-case 1
-    cat &gt;&gt;<I>1
-case 2
-    cat &gt;&gt;</I>2 &lt;<I>1
-case *
-    echo 'Usage: append [from] to'
-}
-</PRE></TT></DL>
-is an append command.  Called with one file argument,
-it appends its standard input to the named file.  With two, the
-first is appended to the second.  Any other number
-elicits an error message.
-</P>
-</I><P>
-The built-in
-<TT>~</TT>
-command also matches patterns, and is often more concise than a switch.
-Its arguments are a string and a list of patterns.  It sets
-<TT></TT>status<TT>
-to true if and only if any of the patterns matches the string.
-The following example processes option arguments for the
-<A href="/magic/man2html/1/man"></TT><I>man</I><TT>(1)
-</A>command:
-<DL><DT><DD><TT><PRE>
-opt=()
-while(~ </TT><I>1 -* [1-9] 10){
-    switch(</I><TT>1){
-    case [1-9] 10
-        sec=</TT><I>1 secn=</I><TT>1
-    case -f
-        c=f s=f
-    case -[qwnt]
-        cmd=</TT><I>1
-    case -T*
-        T=</I><TT>1
-    case -*
-        opt=(</TT><I>opt </I><TT>1)
-    }
-    shift
-}
-</PRE></TT></DL>
-</P>
-</TT><H4>17 Functions
-</H4>
-<P>
-Functions may be defined by typing
-<DL><DT><DD><TT><PRE>
-fn <I>name</I> { <I>commands</I> }
-</PRE></TT></DL>
-Subsequently, whenever a command named
-<I>name</I>
-is encountered, the remainder of the command's
-argument list will assigned to
-<TT></TT><I>*</I><TT>
-and
-</TT><I>rc</I><TT>
-will execute the
-</TT><I>commands</I><TT>.
-The value of
-</TT><TT></TT><TT>*</TT><TT>
-will be restored on completion.
-For example:
-<DL><DT><DD><TT><PRE>
-fn g {
-    grep </TT><I>1 *.[hcyl]
-}
-</PRE></TT></DL>
-defines
-</I><TT>g</TT><I> pattern</I>n(Sf
-to look for occurrences of
-<I>pattern</I>
-in all program source files in the current directory.
-</P>
-<P>
-Function definitions are deleted by writing
-<DL><DT><DD><TT><PRE>
-fn <I>name</I>
-</PRE></TT></DL>
-with no function body.
-</P>
-<H4>18 Command execution
-</H4>
-<P>
-<I>Rc</I>
-does one of several things to execute a simple command.
-If the command name is the name of a function defined using
-<TT>fn</TT>,
-the function is executed.
-Otherwise, if it is the name of a built-in command, the
-built-in is executed directly by
-<I>rc</I>.
-Otherwise, directories mentioned in the variable
-<TT></TT>path<TT>
-are searched until an executable file is found.
-Extensive use of the
-</TT><TT></TT><I>path</I><TT>
-variable is discouraged in Plan 9.  Instead, use the default
-</TT><TT>(.</TT><TT>
-</TT><TT>/bin)</TT><TT>
-and bind what you need into
-</TT><TT>/bin</TT><TT>.
-</P>
-</TT><H4>19 Built-in commands
-</H4>
-<P>
-Several commands are executed internally by
-<I>rc</I>
-because they are difficult to implement otherwise.
-</P>
-<DL COMPACT>
-<DT><TT>.<DD>
- [-i] </TT><I>file ...</I><TT></TT>.if w'<TT>. [-i] </TT><I>file ...</I><TT></TT>'-4n .br
-Execute commands from
-<I>file</I>.
-<TT></TT>*<TT>
-is set for the duration to the reminder of the argument list following
-</TT><I>file</I><TT>.
-</TT><TT></TT><I>path</I><TT>
-is used to search for
-</TT><I>file</I><TT>.
-Option
-</TT><TT>-i</TT><TT>
-indicates interactive input &#173; a prompt
-(found in
-</TT><TT></TT><TT>prompt</TT><TT>)
-is printed before each command is read.
-<DT></TT><TT>b<DD>
-uiltin </TT><I>command ...</I><TT></TT>.if w'<TT>builtin </TT><I>command ...</I><TT></TT>'-4n .br
-Execute
-<I>command</I>
-as usual except that any function named
-<I>command</I>
-is ignored.
-For example,
-<DL><DT><DD><TT><PRE>
-fn cd{
-    builtin cd <I>* &amp;&amp; pwd
-}
-</PRE></TT></DL>
-defines a replacement for the
-</I><TT>cd</TT><I>
-built-in (see below) that announces the full name of the new directory.
-<DT></I><TT>c<DD>
-d [</TT><I>dir</I><TT>]</TT>.if w'<TT>cd [</TT><I>dir</I><TT>]</TT>'-4n .br
-Change the current directory to
-<I>dir</I>.
-The default argument is
-<TT></TT>home<TT>.
-</TT><TT></TT><I>cdpath</I><TT>
-is a list of places in which to search for
-</TT><I>dir</I><TT>.
-<DT></TT><TT>e<DD>
-val [</TT><I>arg ...</I><TT>]</TT>.if w'<TT>eval [</TT><I>arg ...</I><TT>]</TT>'-4n .br
-The arguments are concatenated (separated by spaces) into a string, read as input to
-<I>rc</I>,
-and executed.  For example,
-<DL><DT><DD><TT><PRE>
-x='<TT>y'
-y=Doody
-eval echo Howdy, </TT><I>x
-</PRE></TT></DL>
-would echo
-<DL><DT><DD><TT><PRE>
-Howdy, Doody
-</PRE></TT></DL>
-since the arguments of
-</I><TT>eval</TT><I>
-would be
-<DL><DT><DD><TT><PRE>
-echo Howdy, </I><TT>y
-</PRE></TT></DL>
-after substituting for
-</TT><TT></TT><I>x</I><TT>.
-<DT></TT><TT>e<DD>
-xec [</TT><I>command ...</I><TT>]</TT>.if w'<TT>exec [</TT><I>command ...</I><TT>]</TT>'-4n .br
-<I>Rc</I>
-replaces itself with the given
-<I>command</I>.
-This is like a
-<I>goto</I>
-&#173;
-<I>rc</I>
-does not wait for the command to exit, and does not return to read any more commands.
-<DT><TT>e<DD>
-xit [</TT><I>status</I><TT>]</TT>.if w'<TT>exit [</TT><I>status</I><TT>]</TT>'-4n .br
-<I>Rc</I>
-exits immediately with the given status.  If none is given, the current value of
-<TT></TT>status<TT>
-is used.
-<DT></TT><TT>f<DD>
-lag </TT><I>f</I><TT> [+-]</TT>.if w'<TT>flag </TT><I>f</I><TT> [+-]</TT>'-4n .br
-This command manipulates and tests the command line flags (described below).
-<DL><DT><DD><TT><PRE>
-flag <I>f</I><TT> +
-</PRE></TT></DL>
-sets flag
-</TT><I>f</I><TT>.
-<DL><DT><DD><TT><PRE>
-flag </TT><I>f</I><TT> -
-</PRE></TT></DL>
-clears flag
-</TT><I>f</I><TT>.
-<DL><DT><DD><TT><PRE>
-flag </TT><I>f</I><TT>
-</PRE></TT></DL>
-tests flag
-</TT><I>f</I><TT>,
-setting
-</TT><TT></TT><I>status</I><TT>
-appropriately.
-Thus
-<DL><DT><DD><TT><PRE>
-if(flag x) flag v +
-</PRE></TT></DL>
-sets the
-</TT><TT>-v</TT><TT>
-flag if the
-</TT><TT>-x</TT><TT>
-flag is already set.
-<DT></TT><TT>r<DD>
-fork [nNeEsfF]</TT>.if w'<TT>rfork [nNeEsfF]</TT>'-4n .br
-This uses the Plan 9
-<I>rfork</I>
-system entry to put
-<I>rc</I>
-into a new process group with the following attributes:
-<br><img src="data.19116850.gif"><br>
-Section
-<A href="/magic/man2html/2/fork"><I>fork</I>(2)
-</A>of the Programmer's Manual describes these attributes in more detail.
-<DT><TT>s<DD>
-hift [</TT><I>n</I><TT>]</TT>.if w'<TT>shift [</TT><I>n</I><TT>]</TT>'-4n .br
-Delete the first
-<I>n</I>
-(default 1) elements of
-<TT></TT>*<TT>.
-<DT></TT><TT>w<DD>
-ait [</TT><I>pid</I><TT>]</TT>.if w'<TT>wait [</TT><I>pid</I><TT>]</TT>'-4n .br
-Wait for the process with the given
-<I>pid</I>
-to exit.  If no
-<I>pid</I>
-is given, all outstanding processes are waited for.
-<DT><TT>w<DD>
-hatis </TT><I>name ...</I><TT></TT>.if w'<TT>whatis </TT><I>name ...</I><TT></TT>'-4n .br
-Print the value of each
-<I>name</I>
-in a form suitable for input to
-<I>rc</I>.
-The output is an assignment to a variable, the definition of a function,
-a call to
-<TT>builtin</TT>
-for a built-in command, or the path name of a binary program.
-For example,
-<DL><DT><DD><TT><PRE>
-whatis path g cd who
-</PRE></TT></DL>
-might print
-<DL><DT><DD><TT><PRE>
-path=(. /bin)
-fn g {gre -e <I>1 *.[hycl]}
-builtin cd
-/bin/who
-</PRE></TT></DL>
-<DT></I><TT>~<DD>
- </TT><I>subject pattern ...</I><TT></TT>.if w'<TT>~ </TT><I>subject pattern ...</I><TT></TT>'-4n .br
-The
-<I>subject</I>
-is matched against each
-<I>pattern</I>
-in turn.  On a match,
-<TT></TT>status<TT>
-is set to true.
-Otherwise, it is set to 
-</TT><TT>'no match'</TT><TT>.
-Patterns are the same as for filename matching.
-The
-</TT><I>patterns</I><TT>
-are not subjected to filename replacement before the
-</TT><TT>~</TT><TT>
-command is executed, so they need not be enclosed in
-quotation marks, unless of course, a literal match for
-</TT><TT>*</TT><TT>
-</TT><TT>[</TT><TT>
-or
-</TT><TT>?</TT><TT>
-is required.
-For example
-<DL><DT><DD><TT><PRE>
-~ </TT><I>1 ?
-</PRE></TT></DL>
-matches any single character, whereas
-<DL><DT><DD><TT><PRE>
-~ </I><TT>1 '?'
-</PRE></TT></DL>
-only matches a literal question mark.
-</dl>
-</TT><H4>20 Advanced I/O Redirection
-</H4>
-<P>
-<I>Rc</I>
-allows redirection of file descriptors other than 0 and 1
-(standard input and output) by specifying the file descriptor
-in square brackets
-<TT>[ ]</TT>
-after the
-<TT>&lt;</TT>
-or
-<TT>&gt;</TT>.
-For example,
-<DL><DT><DD><TT><PRE>
-vc junk.c &gt;[2]junk.diag
-</PRE></TT></DL>
-saves the compiler's diagnostics from standard error in
-<TT>junk.diag</TT>.
-</P>
-<P>
-File descriptors may be replaced by a copy, in the sense of
-<A href="/magic/man2html/2/dup"><I>dup</I>(2),
-</A>of an already-open file by typing, for example
-<DL><DT><DD><TT><PRE>
-vc junk.c &gt;[2=1]
-</PRE></TT></DL>
-This replaces file descriptor 2 with a copy of file descriptor 1.
-It is more useful in conjunction with other redirections, like this
-<DL><DT><DD><TT><PRE>
-vc junk.c &gt;junk.out &gt;[2=1]
-</PRE></TT></DL>
-Redirections are evaluated from left to right, so this redirects
-file descriptor 1 to
-<TT>junk.out</TT>,
-then points file descriptor 2 at the same file.
-By contrast,
-<DL><DT><DD><TT><PRE>
-vc junk.c &gt;[2=1] &gt;junk.out
-</PRE></TT></DL>
-redirects file descriptor 2 to a copy of file descriptor 1
-(presumably the terminal), and then directs file descriptor 1
-to a file.  In the first case, standard and diagnostic output
-will be intermixed in
-<TT>junk.out</TT>.
-In the second, diagnostic output will appear on the terminal,
-and standard output will be sent to the file.
-</P>
-<P>
-File descriptors may be closed by using the duplication notation
-with an empty right-hand side.
-For example,
-<DL><DT><DD><TT><PRE>
-vc junk.c &gt;[2=]
-</PRE></TT></DL>
-will discard diagnostics from the compilation.
-</P>
-<P>
-Arbitrary file descriptors may be sent through
-a pipe by typing, for example,
-<DL><DT><DD><TT><PRE>
-vc junk.c |[2] grep -v '^<I>'
-</PRE></TT></DL>
-This deletes blank lines
-from the C compiler's error output.  Note that the output
-of
-</I><TT>grep</TT><I>
-still appears on file descriptor 1.
-</P>
-</I><P>
-Occasionally you may wish to connect the input side of
-a pipe to some file descriptor other than zero.
-The notation
-<DL><DT><DD><TT><PRE>
-cmd1 |[5=19] cmd2
-</PRE></TT></DL>
-creates a pipeline with
-<TT>cmd1</TT>'s
-file descriptor 5 connected through a pipe to
-<TT>cmd2</TT>'s
-file descriptor 19.
-</P>
-<H4>21 Here documents
-</H4>
-<P>
-<I>Rc</I>
-procedures may include data, called ``here documents'',
-to be provided as input to commands, as in this version of the
-<I>tel</I>
-command
-<DL><DT><DD><TT><PRE>
-for(i) grep i &lt;&lt;!
-...
-tor 2T-402 2912
-kevin 2C-514 2842
-bill 2C-562 7214
-...
-!
-</PRE></TT></DL>
-A here document is introduced by the redirection symbol
-<TT>&lt;&lt;</TT>,
-followed by an arbitrary EOF marker
-(<TT>!</TT>
-in the example).  Lines following the command,
-up to a line containing only the EOF marker are saved
-in a temporary file that is connected to the command's
-standard input when it is run.
-</P>
-<P>
-<I>Rc</I>
-does variable substitution in here documents.  The following command:
-<DL><DT><DD><TT><PRE>
-ed <I>3 &lt;&lt;EOF
-g/</I>1/s//<I>2/g
-w
-EOF
-</PRE></TT></DL>
-changes all occurrences of
-</I><TT></TT><I>1</I><TT>
-to
-</TT><TT></TT><I>2</I><TT>
-in file
-</TT><TT></TT><TT>3</TT><TT>.
-To include a literal
-</TT><TT></TT><I></I><TT>
-in a here document, type
-</TT><TT></TT><TT></TT><I></I><TT>.
-If the name of a variable is followed immediately by
-</TT><TT>^</TT><TT>,
-the caret is deleted.
-</P>
-</TT><P>
-Variable substitution can be entirely suppressed by enclosing
-the EOF marker following
-<TT>&lt;&lt;</TT>
-in quotation marks, as in
-<TT>&lt;&lt;'EOF'</TT>.
-</P>
-<P>
-Here documents may be provided on file descriptors other than 0 by typing, for example,
-<DL><DT><DD><TT><PRE>
-cmd &lt;&lt;[4]End
-...
-End
-</PRE></TT></DL>
-</P>
-<P>
-If a here document appears within a compound block, the contents of the document
-must be after the whole block:
-<DL><DT><DD><TT><PRE>
-for(i in *){
-	mail <I>i &lt;&lt;EOF
-}
-words to live by
-EOF
-</PRE></TT></DL>
-</P>
-</I><H4>22 Catching Notes
-</H4>
-<P>
-<I>Rc</I>
-scripts normally terminate when an interrupt is received from the terminal.
-A function with the name of a UNIX signal, in lower case, is defined in the usual way,
-but called when
-<I>rc</I>
-receives the corresponding note.
-The
-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
-</A>section of the Programmer's Manual discusses notes in some detail.
-Notes of interest are:
-</P>
-<DL COMPACT>
-<DT><TT>s<DD>
-ighup</TT>.if w'<TT>sighup</TT>'-4n .br
-The note was `hangup'.
-Plan 9 sends this when the terminal has disconnected from
-<I>rc</I>.
-<DT><TT>s<DD>
-igint</TT>.if w'<TT>sigint</TT>'-4n .br
-The note was `interrupt', usually sent when
-the interrupt character (ASCII DEL) is typed on the terminal.
-<DT><TT>s<DD>
-igterm</TT>.if w'<TT>sigterm</TT>'-4n .br
-The note was `kill', normally sent by
-<A href="/magic/man2html/1/kill"><I>kill</I>(1).
-</A><DT><TT>s<DD>
-igexit</TT>.if w'<TT>sigexit</TT>'-4n .br
-An artificial note sent when
-<I>rc</I>
-is about to exit.
-</dl>
-<P>
-As an example,
-<DL><DT><DD><TT><PRE>
-fn sigint{
-    rm /tmp/junk
-    exit
-}
-</PRE></TT></DL>
-sets a trap for the keyboard interrupt that
-removes a temporary file before exiting.
-</P>
-<P>
-Notes will be ignored if the note routine is set to
-<TT>{}</TT>.
-Signals revert to their default behavior when their handlers'
-definitions are deleted.
-</P>
-<H4>23 Environment
-</H4>
-<P>
-The environment is a list of name-value pairs made available to
-executing binaries.
-On Plan 9, the environment is stored in a file system named
-<TT>#e</TT>,
-normally mounted on
-<TT>/env</TT>.
-The value of each variable is stored in a separate file, with components
-terminated by zero bytes.
-(The file system is
-maintained entirely in core, so no disk or network access is involved.)
-The contents of
-<TT>/env</TT>
-are shared on a per-process group basis - when a new process group is
-created it effectively attaches
-<TT>/env</TT>
-to a new file system initialized with a copy of the old one.
-A consequence of this organization is that commands can change environment
-entries and see the changes reflected in
-<I>rc</I>.
-</P>
-<P>
-Functions also appear in the environment, named by prefixing
-<TT>fn#</TT>
-to their names, like
-<TT>/env/fn#roff</TT>.
-</P>
-<H4>24 Local Variables
-</H4>
-<P>
-It is often useful to set a variable for the duration
-of a single command.  An assignment followed by a command
-has this effect.  For example
-<DL><DT><DD><TT><PRE>
-a=global
-a=local echo a
-echo <I>a
-</PRE></TT></DL>
-will print
-<DL><DT><DD><TT><PRE>
-local
-global
-</PRE></TT></DL>
-This works even for compound commands, like
-<DL><DT><DD><TT><PRE>
-f=/fairly/long/file/name {
-    { wc </I>f; spell <I>f; diff </I>f.old <I>f } |
-      pr -h 'Facts about '</I>f | lp -dfn
-}
-</PRE></TT></DL>
-</P>
-<H4>25 Examples &#173; <I>cd, pwd</I>
-</H4>
-<P>
-Here is a pair of functions that provide
-enhanced versions of the standard
-<TT>cd</TT>
-and
-<TT>pwd</TT>
-commands.  (Thanks to Rob Pike for these.)
-<DL><DT><DD><TT><PRE>
-ps1='% '	# default prompt
-tab='	'	# a tab character
-fn cd{
-  builtin cd <I>1 &amp;&amp;
-  switch(</I>#*){
-  case 0
-    dir=<I>home
-    prompt=(</I>ps1 <I>tab)
-  case *
-    switch(</I>1)
-    case /*
-      dir=<I>1
-      prompt=(`{basename `{pwd}}^</I>ps1 <I>tab)
-    case */* ..*
-      dir=()
-      prompt=(`{basename `{pwd}}^</I>ps1 <I>tab)
-    case *
-      dir=()
-      prompt=(</I>1^<I>ps1 </I>tab)
-    }
-  }
-}
-fn pwd{
-  if(~ <I>#dir 0)
-    dir=`{/bin/pwd}
-  echo </I>dir
-}
-</PRE></TT></DL>
-Function
-<TT>pwd</TT>
-is a version of the standard
-<TT>pwd</TT>
-that caches its value in variable
-<TT></TT><I>dir</I><TT>,
-because the genuine
-</TT><TT>pwd</TT><TT>
-can be quite slow to execute.
-(Recent versions of Plan 9 have very fast implementations of
-</TT><TT>pwd</TT><TT>,
-reducing the advantage of the
-</TT><TT>pwd</TT><TT>
-function.)
-</P>
-</TT><P>
-Function
-<TT>cd</TT>
-calls the
-<TT>cd</TT>
-built-in, and checks that it was successful.
-If so, it sets
-<TT></TT>dir<TT>
-and
-</TT><TT></TT><I>prompt</I><TT>.
-The prompt will include the last component of the
-current directory (except in the home directory,
-where it will be null), and
-</TT><TT></TT><TT>dir</TT><TT>
-will be reset either to the correct value or to
-</TT><TT>()</TT><TT>,
-so that the
-</TT><TT>pwd</TT><TT>
-function will work correctly.
-</P>
-</TT><H4>26 Examples &#173; <I>man</I>
-</H4>
-<P>
-The
-<I>man</I>
-command prints pages of the Programmer's Manual.
-It is called, for example, as
-<DL><DT><DD><TT><PRE>
-man 2 sinh
-man rc
-man -t cat
-</PRE></TT></DL>
-In the first case, the page for
-<I>sinh</I>
-in section 2 is printed.
-In the second case, the manual page for
-<I>rc</I>
-is printed.  Since no manual section is specified,
-all sections are searched for the page, and it is found
-in section 1.
-In the third case, the page for
-<I>cat</I>
-is typeset (the
-<TT>-t</TT>
-option).
-<DL><DT><DD><TT><PRE>
-cd /sys/man || {
-  echo <I>0: No manual! &gt;[1=2]
-  exit 1
-}
-NT=n  # default nroff
-s='*' # section, default try all
-for(i) switch(</I>i){
-case -t
-  NT=t
-case -n
-  NT=n
-case -*
-  echo Usage: <I>0 '[-nt] [section] page ...' &gt;[1=2]
-  exit 1
-case [1-9] 10
-  s=</I>i
-case *
-  eval 'pages='<I>s/</I>i
-  for(page in <I>pages){
-    if(test -f </I>page)
-      <I>NT^roff -man </I>page
-    if not
-      echo <I>0: </I>i not found &gt;[1=2]
-  }
-}
-</PRE></TT></DL>
-Note the use of
-<TT>eval</TT>
-to make a list of candidate manual pages.
-Without
-<TT>eval</TT>,
-the
-<TT>*</TT>
-stored in
-<TT></TT><I>s</I><TT>
-would not trigger filename matching
-&#173; it's enclosed in quotation marks,
-and even if it weren't, it would be expanded
-when assigned to
-</TT><TT></TT><TT>s</TT><TT>.
-Eval causes its arguments
-to be re-processed by
-</TT><I>rc</I><TT>'s
-parser and interpreter, effectively delaying
-evaluation of the
-</TT><TT>*</TT><TT>
-until the assignment to
-</TT><TT></TT><I>pages</I><TT>.
-</P>
-</TT><H4>27 Examples &#173; <I>holmdel</I>
-</H4>
-<P>
-The following
-<I>rc</I>
-script plays the deceptively simple game
-<I>holmdel</I>,
-in which the players alternately name Bell Labs locations,
-the winner being the first to mention Holmdel.
-<DL><DT><DD><TT><PRE>
-t=/tmp/holmdelpid
-fn read{
-	<I>1=`{awk '{print;exit}'}
-}
-ifs='
-'	# just a newline
-fn sigexit sigint sigquit sighup{
-	rm -f </I>t
-	exit
-}
-cat &lt;&lt;'!' &gt;<I>t
-Allentown 
-Atlanta
-Cedar Crest
-Chester
-Columbus
-Elmhurst
-Fullerton
-Holmdel
-Indian Hill
-Merrimack Valley
-Morristown
-Neptune
-Piscataway
-Reading
-Short Hills
-South Plainfield
-Summit
-Whippany
-West Long Branch
-!
-while(){
-   lab=`{fortune </I>t}
-   echo <I>lab
-   if(~ </I>lab Holmdel){
-      echo You lose.
-      exit
-   }
-   while(read lab; ! grep -i -s <I>lab </I>t) echo No such location.
-   if(~ <I>lab [hH]olmdel){
-      echo You win.
-      exit
-   }
-}
-</PRE></TT></DL>
-</P>
-</I><P>
-This script is worth describing in detail
-(rather, it would be if it weren't so silly.)
-</P>
-<P>
-Variable
-<TT></TT>t<TT>
-is an abbreviation for the name of a temporary file.
-Including
-</TT><TT></TT><I>pid</I><TT>,
-initialized by
-</TT><I>rc</I><TT>
-to its process-id,
-in the names of temporary files insures that their
-names won't collide, in case more than one instance
-of the script is running at a time.
-</P>
-</TT><P>
-Function
-<TT>read</TT>'s
-argument is the name of a variable into which a
-line gathered from standard input is read.
-<TT></TT>ifs<TT>
-is set to just a newline.  Thus
-</TT><TT>read</TT><TT>'s
-input is not split apart at spaces, but the terminating
-newline is deleted.
-</P>
-</TT><P>
-A handler is set to catch
-<TT>sigint</TT>,
-<TT>sigquit</TT>,
-and
-<TT>sighup,</TT>
-and the artificial
-<TT>sigexit</TT>
-signal.  It just removes the temporary file and exits.
-</P>
-<P>
-The temporary file is initialized from a here
-document containing a list of Bell Labs locations, and
-the main loop starts.
-</P>
-<P>
-First, the program guesses a location (in
-<TT></TT><I>lab</I><TT>)
-using the
-</TT><TT>fortune</TT><TT>
-program to pick a random line from the location list.
-It prints the location, and if it guessed Holmdel, prints
-a message and exits.
-</P>
-</TT><P>
-Then it uses the
-<TT>read</TT>
-function to get lines from standard input and validity-check
-them until it gets a legal name.
-Note that the condition part of a
-<TT>while</TT>
-can be a compound command.  Only the exit status of the
-last command in the sequence is checked.
-</P>
-<P>
-Again, if the result
-is Holmdel, it prints a message and exits.
-Otherwise it goes back to the top of the loop.
-</P>
-<H4>28 Design Principles
-</H4>
-<P>
-<I>Rc</I>
-draws heavily from Steve Bourne's
-<TT>/bin/sh</TT>.
-Any successor of the Bourne shell is bound to
-suffer in comparison.  I have tried to fix its
-best-acknowledged shortcomings and to simplify things
-wherever possible, usually by omitting inessential features.
-Only when irresistibly tempted have I introduced novel ideas.
-Obviously I have tinkered extensively with Bourne's syntax.
-</P>
-<P>
-The most important principle in
-<I>rc</I>'s
-design is that it's not a macro processor.  Input is never
-scanned more than once by the lexical and syntactic analysis
-code (except, of course, by the
-<TT>eval</TT>
-command, whose
-<I>raison d'&ecirc;tre</I>
-is to break the rule).
-</P>
-<P>
-Bourne shell scripts can often be made
-to run wild by passing them arguments containing spaces.
-These will be split into multiple arguments using
-<TT>IFS</TT>,
-often at inopportune times.
-In
-<I>rc</I>,
-values of variables, including command line arguments, are not re-read
-when substituted into a command.
-Arguments have presumably been scanned in the parent process, and ought
-not to be re-read.
-</P>
-<P>
-Why does Bourne re-scan commands after variable substitution?
-He needs to be able to store lists of arguments in variables whose values are
-character strings.
-If we eliminate re-scanning, we must change the type of variables, so that
-they can explicitly carry lists of strings.
-</P>
-<P>
-This introduces some
-conceptual complications.  We need a notation for lists of words.
-There are two different kinds of concatenation, for strings &#173;
-<TT></TT>a^<I>b</I>,
-and lists &#173;
-<TT>(</TT>a <I>b)</I>.
-The difference between
-<TT>()</TT>
-and
-<TT>''</TT>
-is confusing to novices,
-although the distinction is arguably sensible &#173;
-a null argument is not the same as no argument.
-</P>
-<P>
-Bourne also rescans input when doing command substitution.
-This is because the text enclosed in back-quotes is not
-a string, but a command.  Properly, it ought to
-be parsed when the enclosing command is, but this makes
-it difficult to
-handle nested command substitutions, like this:
-<DL><DT><DD><TT><PRE>
-size=`wc -l \`ls -t|sed 1q\``
-</PRE></TT></DL>
-The inner back-quotes must be escaped
-to avoid terminating the outer command.
-This can get much worse than the above example;
-the number of
-<TT>\</TT>'s
-required is exponential in the nesting depth.
-<I>Rc</I>
-fixes this by making the backquote a unary operator
-whose argument is a command, like this:
-<DL><DT><DD><TT><PRE>
-size=`{wc -l `{ls -t|sed 1q}}
-</PRE></TT></DL>
-No escapes are ever required, and the whole thing
-is parsed in one pass.
-</P>
-<P>
-For similar reasons
-<I>rc</I>
-defines signal handlers as though they were functions,
-instead of associating a string with each signal, as Bourne does,
-with the attendant possibility of getting a syntax error message
-in response to typing the interrupt character.  Since
-<I>rc</I>
-parses input when typed, it reports errors when you make them.
-</P>
-<P>
-For all this trouble, we gain substantial semantic simplifications.
-There is no need for the distinction between
-<TT></TT>*<TT>
-and
-</TT><TT></TT><I>@</I><TT>.
-There is no need for four types of quotation, nor the
-extremely complicated rules that govern them.  In
-</TT><I>rc</I><TT>
-you use quotation marks when you want a syntax character
-to appear in an argument, or an argument that is the empty string,
-and at no other time.
-</TT><TT>IFS</TT><TT>
-is no longer used, except in the one case where it was indispensable:
-converting command output into argument lists during command substitution.
-</P>
-</TT><P>
-This also avoids an important UNIX security hole.
-In UNIX, the
-<I>system</I>
-and
-<I>popen</I>
-functions call
-<TT>/bin/sh</TT>
-to execute a command.  It is impossible to use either
-of these routines with any assurance that the specified command will
-be executed, even if the caller of
-<I>system</I>
-or
-<I>popen</I>
-specifies a full path name for the command.  This can be devastating
-if it occurs in a set-userid program.
-The problem is that
-<TT>IFS</TT>
-is used to split the command into words, so an attacker can just
-set
-<TT>IFS=/</TT>
-in his environment and leave a Trojan horse
-named
-<TT>usr</TT>
-or
-<TT>bin</TT>
-in the current working directory before running the privileged program.
-<I>Rc</I>
-fixes this by never rescanning input for any reason.
-</P>
-<P>
-Most of the other differences between
-<I>rc</I>
-and the Bourne shell are not so serious.  I eliminated Bourne's
-peculiar forms of variable substitution, like
-<DL><DT><DD><TT><PRE>
-echo {a=b} <I>{c-d} </I>{e?error}
-</PRE></TT></DL>
-because they are little used, redundant and easily
-expressed in less abstruse terms.
-I deleted the builtins
-<TT>export</TT>,
-<TT>readonly</TT>,
-<TT>break</TT>,
-<TT>continue</TT>,
-<TT>read</TT>,
-<TT>return</TT>,
-<TT>set</TT>,
-<TT>times</TT>
-and
-<TT>unset</TT>
-because they seem redundant or
-only marginally useful.
-</P>
-<P>
-Where Bourne's syntax draws from Algol 68,
-<I>rc</I>'s
-is based on C or Awk.  This is harder to defend.
-I believe that, for example
-<DL><DT><DD><TT><PRE>
-if(test -f junk) rm junk
-</PRE></TT></DL>
-is better syntax than
-<DL><DT><DD><TT><PRE>
-if test -f junk; then rm junk; fi
-</PRE></TT></DL>
-because it is less cluttered with keywords,
-it avoids the semicolons that Bourne requires
-in odd places,
-and the syntax characters better set off the
-active parts of the command.
-</P>
-<P>
-The one bit of large-scale syntax that Bourne
-unquestionably does better than
-<I>rc</I>
-is the
-<TT>if</TT>
-statement with
-<TT>else</TT>
-clause.
-<I>Rc</I>'s
-<TT>if</TT>
-has no terminating
-<TT>fi</TT>-like
-bracket.  As a result, the parser cannot
-tell whether or not to expect an
-<TT>else</TT>
-clause without looking ahead in its input.
-The problem is that after reading, for example
-<DL><DT><DD><TT><PRE>
-if(test -f junk) echo junk found
-</PRE></TT></DL>
-in interactive mode,
-<I>rc</I>
-cannot decide whether to execute it immediately and print
-<TT></TT><I>prompt(1)</I><TT>,
-or to print
-</TT><TT></TT><TT>prompt(2)</TT><TT>
-and wait for the
-</TT><TT>else</TT><TT>
-to be typed.
-In the Bourne shell, this is not a problem, because the
-</TT><TT>if</TT><TT>
-command must end with
-</TT><TT>fi</TT><TT>,
-regardless of whether it contains an
-</TT><TT>else</TT><TT>
-or not.
-</P>
-</TT><P>
-<I>Rc</I>'s
-admittedly feeble solution is to declare that the
-<TT>else</TT>
-clause is a separate statement, with the semantic
-proviso that it must immediately follow an
-<TT>if</TT>,
-and to call it
-<TT>if not</TT>
-rather than
-<TT>else</TT>,
-as a reminder that something odd is going on.
-The only noticeable consequence of this is that
-the braces are required in the construction
-<DL><DT><DD><TT><PRE>
-for(i){
-    if(test -f <I>i) echo </I>i found
-    if not echo <I>i not found
-}
-</PRE></TT></DL>
-and that
-</I><I>rc</I><I>
-resolves the ``dangling else'' ambiguity in opposition
-to most people's expectations.
-</P>
-</I><P>
-It is remarkable that in the four most recent editions of the UNIX system
-programmer's manual the Bourne shell grammar described in the manual page
-does not admit the command
-<TT>who|wc</TT>.
-This is surely an oversight, but it suggests something darker:
-nobody really knows what the Bourne shell's grammar is.  Even examination
-of the source code is little help.  The parser is implemented by recursive
-descent, but the routines corresponding to the syntactic categories all
-have a flag argument that subtly changes their operation depending on the
-context.
-<I>Rc</I>'s
-parser is implemented using
-<I>yacc</I>,
-so I can say precisely what the grammar is.
-</P>
-<H4>29 Acknowledgements
-</H4>
-<P>
-Rob Pike, Howard Trickey and other Plan 9 users have been insistent, incessant
-sources of good ideas and criticism.  Some examples in this document are plagiarized
-from [Bourne],
-as are most of
-<I>rc</I>'s
-good features.
-</P>
-<H4>30 Reference
-</H4>
-<br>&#32;<br>
-S. R. Bourne,
-UNIX Time-Sharing System: The UNIX Shell,
-Bell System Technical Journal, Volume 57 number 6, July-August 1978
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 214
sys/doc/release3.html

@@ -1,214 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Plan 9 From Bell Labs
-<br>
-Third Release Notes
-<br>
-June 7, 2000
-</H1>
-<br>&#32;<br>
-<DL><DT><DD><TT><PRE>
-Copyright &#169; 2000 Lucent Technologies Inc.
-All Rights Reserved
-<br>&#32;<br>
-</PRE></TT></DL>
-<br>&#32;<br>
-The third release of the Plan 9 operating system from Bell Labs
-is something of a snapshot of the current system.
-This differs from the previous, 1995 release,
-which was a more coordinated, well-defined release of an already-out-of-date
-system.
-Also, the previous releases were distributed on fixed media, while this release
-is being done over the web.
-The other major difference is that the third release is licensed under
-an open source agreement, which we hope will encourage people
-to experiment with it.
-<br>&#32;<br>
-Beyond that, there are innumerable little changes throughout the code.
-Although superficially it is the same environment, there is hardly an aspect
-of the system that has not been redesigned, rewritten, or replaced.
-The following is an incomplete list of changes.
-<br>&#32;<br>
-*
-The list of architectures has changed; more compilers are included
-and the list of kernels has changed.
-There is solid support for Intel x86 multiprocessors.
-Also, although the sources are available for
-other architectures, the binaries and libraries are built only for the
-Intel x86 architectures.
-Kernel source is available for x86, Mips, DEC Alpha, and Power PC architectures.
-Compilers also exist for AMD 29000, Motorola MC68000 and MC68020,
-Intel i960, and SPARC.
-(Unlike the the last release, no SPARC kernel exists for the current system.)
-The compilers and related tools
-have been made easier to port to Unix and Windows.
-<br>&#32;<br>
-*
-The kernel now has a file cache to improve I/O performance.
-Other kernel changes include the replacement of the streams interface
-with a simpler, faster, but less flexible I/O queue structure.
-The x86 kernels support PCI and PCMCIA devices.
-<br>&#32;<br>
-*
-Network management has been simplified and generalized.
-DNS supports a resolver mode and the DNS server is now solid.
-DHCP is supported both at the client and server ends.
-The system can handle multiple IP stacks, which are also
-no longer Ethernet-specific.
-<br>&#32;<br>
-*
-The organization of disks in the kernel has been unified, providing
-a consistent interface to all disks and controllers: SCSI or ATAPI,
-magnetic or CD-ROM.
-<br>&#32;<br>
-*
-File offsets, such as in the
-<TT>seek</TT>
-system call, are now 64-bit values.
-The 1995 release defined the type
-<TT>Length</TT>
-for the x86 as
-<DL><DT><DD><TT><PRE>
-typedef union
-{
-	char	clength[8];
-	vlong	vlength;
-	struct
-	{
-		long	hlength;
-		long	length;
-	};
-} Length;
-</PRE></TT></DL>
-which is the wrong byte order.
-Now, for all architectures,
-<TT>Length</TT>
-is well handled by a
-<TT>vlong</TT>
-(<TT>long</TT>
-<TT>long</TT>)
-type, although for compatibility it's still held in a union:
-<DL><DT><DD><TT><PRE>
-typedef union
-{
-	vlong	length;
-} Length;
-</PRE></TT></DL>
-<br>&#32;<br>
-*
-The kernel now maintains a file name associated with each open file or
-directory, which can be cheaply recovered by the
-<TT>fd2path</TT>
-system call.
-Plan 9 now does a much better job with
-<TT>..</TT>
-(dot-dot).
-On a related note, a description of a process's name space may be
-read with the
-<TT>ns</TT>
-file in
-<TT>/proc</TT>,
-or by the
-<TT>ns</TT>
-command.
-<br>&#32;<br>
-*
-The security model is the same, although
-the key format has changed.
-If you have an old key file, use
-<TT>auth/convkeys2</TT>
-(see
-<A href="/magic/man2html/8/auth"><I>auth</I>(8))
-</A>to update it.
-There are new libraries for mulitprecision arithmetic and security.
-<br>&#32;<br>
-*
-The graphics model is very different.
-It is based on the Porter-Duff compositing algebra rather than
-<TT>bitblt</TT>,
-and the system supports everything from bitmaps to true-color displays.
-Some of the graphics drivers exploit hardware acceleration.
-<br>&#32;<br>
-*
-Coupled to the graphics changes, the image and font file formats have
-changed.
-They can represent a wider range of pixel formats and compress the data.
-Also the white/black sense of value is reversed (zero is now black; pixels
-represent light, not ink).
-Most of the tools can handle the old format, but they all write the new format only.
-<br>&#32;<br>
-*
-The user interface now incorporates plumbing, a language-driven
-way for applications to communicate.  See
-<A href="/magic/man2html/6/plumb"><I>plumb</I>(6)
-</A>for information.
-<br>&#32;<br>
-*
-Building on plumbing and a program that presents the mail box as a file
-system, Plan 9 now has convenient support for MIME mail messages.
-<br>&#32;<br>
-*
-<TT>8&#189;</TT>
-has been replaced by
-<TT>rio</TT>,
-which has a similar appearance but a different architecture.
-Although still a file server, it is much more efficient: the kernel driver
-multiplexes graphics output so
-<TT>rio</TT>
-is not in the display path.
-<TT>Rio</TT>
-handles input and window control only.
-<br>&#32;<br>
-*
-PC booting is more sophisticated.  PCs can now boot Plan 9 directly from
-the disk without running DOS.
-<br>&#32;<br>
-*
-Alef is gone.
-It was deemed too difficult to maintain two sets of compilers and libraries
-for all architectures.
-Alef programs were translated into C, with the help of a new thread library
-that preserves much of Alef's functionality, but none of its syntax.
-<br>&#32;<br>
-*
-Mothra is gone.  There is no web browser included in this release,
-but something may well appear before long.
-<br>&#32;<br>
-*
-The
-<TT>fb</TT>
-(frame buffer) suite is gone. Most of its tools are
-supplanted by new ones, such as
-<TT>page</TT>,
-<TT>jpg</TT>,
-and
-<TT>togif</TT>.
-<br>&#32;<br>
-*
-Also gone from this release are the games and support for
-international input
-(<TT>ktrans</TT>
-etc.).
-Both may return.
-<br>&#32;<br>
-*
-New things include an implementation of
-<TT>ssh</TT>,
-an IMAP4 server,
-and some spam-filtering software (see
-<A href="/magic/man2html/8/scanmail"><I>scanmail</I>(8)).
-</A><br>&#32;<br>
-There's lots more.
-If you have problems, mail
-<TT>9trouble@plan9.bell-labs.com</TT>.
-Please don't mail us individually.
-<br>&#32;<br>
-Good Luck!
-
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 180
sys/doc/release4.html

@@ -1,180 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Plan 9 From Bell Labs
-<br>
-Fourth Release Notes
-<br>
-April, 2002
-<br>
-updated June, 2003
-</H1>
-<br>&#32;<br>
-<DL><DT><DD><TT><PRE>
-Copyright &#169; 2002-2003 Lucent Technologies Inc.
-All Rights Reserved
-<br>&#32;<br>
-</PRE></TT></DL>
-<br>&#32;<br>
-The fourth release of the Plan 9 operating system from Bell Labs
-packages a major overhaul of the system at every level.
-From the underlying file system protocol, 9P, through the kernel,
-libraries, and applications, almost everything has been modified
-and, in many cases, redesigned or rewritten.
-<br>&#32;<br>
-The most significant change is that 9P has been redesigned to address
-a number of shortcomings, most important, its previous inability to handle long
-file names.
-Unfortunately, squeezing long names onto the disks of existing
-file servers is a messy business that we're still grappling with,
-so at the moment
-<A href="/magic/man2html/4/fs"><I>fs</I>(4)
-</A>and
-<A href="/magic/man2html/4/kfs"><I>kfs</I>(4)
-</A>can't yet handle long names,
-although they do talk the new protocol.
-(In fact, they
-talk both old and new, as required, to ease transition.)
-In the meantime, there is a workaround &#173;
-<A href="/magic/man2html/4/lnfs"><I>lnfs</I>(4)
-</A>&#173;
-and many of the other file servers such as
-<A href="/magic/man2html/4/ramfs"><I>ramfs</I>(4)
-</A>and
-<A href="/magic/man2html/4/u9fs"><I>u9fs</I>(4)
-</A>work just fine with long names.
-It's only the old disk-resident file servers
-that don't.
-The new file server
-<A href="/magic/man2html/4/fossil"><I>fossil</I>(4)
-</A>handles supports long names and many other features.
-The older servers are now deprecated.
-<br>&#32;<br>
-The following is a partial list of the major changes throughout the system.
-<br>&#32;<br>
-*
-The file system protocol, 9P, has been reworked.
-It now has variable-length names, so it can handle long names
-but also is more compact when handling short ones.
-It uses a different format that is easily parsed, eliminating the need for the old
-<TT>aux/fcall</TT>
-utility,
-and delegates its authentication duties to an external agent,
-<TT>factotum</TT>.
-<br>&#32;<br>
-*
-Security has been a focus of attention.
-A new security agent,
-<A href="/magic/man2html/4/factotum"><I>factotum</I>(4),
-</A>manages passwords and other secrets and, coupled with a new secure file store
-<A href="/magic/man2html/8/secstore"><I>secstore</I>(8),
-</A>enables secure single sign-on.
-<br>&#32;<br>
-*
-<TT>Cpu</TT>,
-<TT>import</TT>,
-and
-<TT>exportfs</TT>
-all encrypt their connections now, and since they use the new 9P they
-also use new network port numbers.
-A new service
-<A href="/magic/man2html/1/aan"><I>aan</I>(1)
-</A>is used by
-<TT>import</TT>
-to make its network connections more reliable in the face of network outages.
-The old ports still work, through the agency of a protocol conversion filter
-<A href="/magic/man2html/4/srvold9p"><I>srvold9p</I>(4).
-</A><br>&#32;<br>
-*
-We are phasing out the IL protocol since it doesn't handle long-distance connections
-well (and long-distance networks don't handle it well, either).
-IL is still used by
-<A href="/magic/man2html/4/fs"><I>fs</I>(4)
-</A>but TCP has become the standard protocol for all other services.
-<br>&#32;<br>
-*
-The software for the new network-resident secure block store,
-<A href="/magic/man2html/8/venti"><I>venti</I>(8),
-</A>is included with this distribution.
-The new
-file server
-<A href="/magic/man2html/4/fossil"><I>fossil</I>(4)
-</A>uses Venti rather than a WORM as its permanent block repository/backup medium.
-It is still being developed, but is mature enough that a handful of users
-throughout the world are using it as their primary file server.
-<br>&#32;<br>
-*
-The need to handle longer file names triggered a rethinking of the way the
-system handles strings in general.
-The kernel is now more explanatory when it gives an error message and
-more consistent in how it handles strings such as commands to devices.
-The interfaces to many of the system calls, such as
-<A href="/magic/man2html/2/errstr"><I>errstr</I>(2)
-</A>and
-<A href="/magic/man2html/2/wait"><I>wait</I>(2)
-</A>all had to change as a result, as did the library interface to read directories,
-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
-</A>and its relatives.
-<br>&#32;<br>
-*
-The formatted I/O package described in
-<A href="/magic/man2html/2/print"><I>print</I>(2)
-</A>and
-<A href="/magic/man2html/2/fmtinstall"><I>fmtinstall</I>(2)
-</A>has been redesigned.
-Although the basic interface is unchanged, it now runs without locks and
-has an internal buffer management mechanism that means
-<TT>print</TT>
-no longer needs a large on-stack buffer.
-The interface for writing custom print verbs and custom formatted I/O routines
-has also been greatly improved.
-<br>&#32;<br>
-*
-The thread library
-<A href="/magic/man2html/2/thread"><I>thread</I>(2)
-</A>has been completely rewritten.
-The main visible change is that, coupled with the changes to printing,
-<TT>threadprint</TT>
-is gone; you can just use
-<TT>print</TT>
-or
-<TT>fprint</TT>
-at will.
-<br>&#32;<br>
-*
-Support for electronic mail has been extended in many ways and now includes
-some new spam filtering tools,
-much better (and more standard) handling of MIME messages,
-the ability to render incoming HTML mail,
-and much more.
-<br>&#32;<br>
-There are so many changes to the programming interfaces of the system
-that they are described in a separate document, entitled
-Changes to the Programming Environment in the Fourth Release of Plan 9.
-Please read it before you start updating your own software to run under the new system.
-<br>&#32;<br>
-The installation method has also changed and we're moving towards a new
-method for maintaining updates.
-The Plan 9 Wiki
-(<TT>http://plan9.bell-labs.com/wiki/plan9</TT>)
-and Usenet group
-(<TT>comp.os.plan9</TT>)
-are the places to visit to learn more and stay current.
-In particular, the installation notes are now maintained in the Wiki;
-the traditional papers on installation and start-up are gone.
-<br>&#32;<br>
-There's lots more new stuff.
-If you have problems, mail
-<TT>9trouble@plan9.bell-labs.com</TT>
-or, better, check the wiki
-<TT>http://plan9.bell-labs.com/wiki/plan9</TT>
-or ask the Usenet newsgroup
-<TT>comp.os.plan9</TT>.
-<br>&#32;<br>
-Good Luck!
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 3291
sys/doc/sam/sam.html

@@ -1,3291 +0,0 @@
-<html>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>The Text Editor <TT>sam</TT>
-</H1>
-<DL><DD><I>Rob Pike<br>
-rob@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-is an interactive multi-file text editor intended for
-bitmap displays.
-A textual command language
-supplements the mouse-driven, cut-and-paste interface
-to make complex or
-repetitive editing tasks easy to specify.
-The language is characterized by the composition of regular expressions
-to describe the structure of the text being modified.
-The treatment of files as a database, with changes logged
-as atomic transactions, guides the implementation and
-makes a general `undo' mechanism straightforward.
-<P>
-<TT>Sam</TT>
-is implemented as two processes connected by a low-bandwidth stream,
-one process handling the display and the other the editing
-algorithms.  Therefore it can run with the display process
-in a bitmap terminal and the editor on a local host,
-with both processes on a bitmap-equipped host, or with
-the display process in the terminal and the editor in a
-remote host.
-By suppressing the display process,
-it can even run without a bitmap terminal.
-</P>
-<P>
-This paper is reprinted from Software&#173;Practice and Experience,
-Vol 17, number 11, pp. 813-845, November 1987.
-The paper has not been updated for the Plan 9 manuals.  Although
-<TT>Sam</TT>
-has not changed much since the paper was written, the system around it certainly has.
-Nonetheless, the description here still stands as the best introduction to the editor.
-</DL>
-</P>
-<H4>Introduction
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-is an interactive text editor that combines cut-and-paste interactive editing with
-an unusual command language based on the composition of regular expressions.
-It is written as two programs: one, the `host part,' runs on a UNIX system
-and implements the command language and provides file access; the other, the
-`terminal part,' runs asynchronously
-on a machine with a mouse and bitmap display
-and supports the display and interactive editing.
-The host part may be even run in isolation on an ordinary terminal
-to edit text using the command
-language, much like a traditional line editor,
-without assistance from a mouse or display.
-Most often,
-the terminal part runs on a Blit<sup>1</sup> terminal
-(actually on a Teletype DMD 5620, the production version of the Blit), whose
-host connection is an ordinary 9600 bps RS232 link;
-on the SUN computer the host and display processes run on a single machine,
-connected by a pipe.
-<P>
-<TT>Sam</TT>
-edits uninterpreted
-ASCII text.
-It has no facilities for multiple fonts, graphics or tables,
-unlike MacWrite,<sup>2</sup> Bravo,<sup>3</sup> Tioga<sup>4</sup>
-or Lara.<sup>5</sup>
-Also unlike them, it has a rich command language.
-(Throughout this paper, the phrase
-command language
-refers to
-textual commands; commands activated from the mouse form the
-<I>mouse</I>
-<I>language.</I>)
-<TT>Sam</TT>
-developed as an editor for use by programmers, and tries to join
-the styles of the UNIX text editor
-<TT>ed</TT><sup>6,7</sup>
-with that of interactive cut-and-paste editors by
-providing a comfortable mouse-driven interface
-to a program with a solid command language driven by regular expressions.
-The command language developed more than the mouse language, and
-acquired a notation for describing the structure of files
-more richly than as a sequence of lines,
-using a dataflow-like syntax for specifying changes.
-</P>
-<P>
-The interactive style was influenced by
-<TT>jim</TT>,<sup>1</sup>
-an early cut-and-paste editor for the Blit, and by
-<TT>mux</TT>,<sup>8</sup>
-the Blit window system.
-<TT>Mux</TT>
-merges the original Blit window system,
-<TT>mpx</TT>,<sup>1</sup>
-with cut-and-paste editing, forming something like a
-multiplexed version of
-<TT>jim</TT>
-that edits the output of (and input to) command sessions rather than files.
-</P>
-<P>
-The first part of this paper describes the command language, then the mouse
-language, and explains how they interact.
-That is followed by a description of the implementation,
-first of the host part, then of the terminal part.
-A principle that influenced the design of
-<TT>sam</TT>
-is that it should have no explicit limits, such as upper limits on
-file size or line length.
-A secondary consideration is that it be efficient.
-To honor these two goals together requires a method for efficiently
-manipulating
-huge strings (files) without breaking them into lines,
-perhaps while making thousands of changes
-under control of the command language.
-<TT>Sam</TT>'s
-method is to
-treat the file as a transaction database, implementing changes as atomic
-updates.  These updates may be unwound easily to `undo' changes.
-Efficiency is achieved through a collection of caches that minimizes
-disc traffic and data motion, both within the two parts of the program
-and between them.
-</P>
-<P>
-The terminal part of
-<TT>sam</TT>
-is fairly straightforward.
-More interesting is how the two halves of the editor stay
-synchronized when either half may initiate a change.
-This is achieved through a data structure that organizes the
-communications and is maintained in parallel by both halves.
-</P>
-<P>
-The last part of the paper chronicles the writing of
-<TT>sam</TT>
-and discusses the lessons that were learned through its development and use.
-</P>
-<P>
-The paper is long, but is composed largely of two papers of reasonable length:
-a description of the user interface of
-<TT>sam</TT>
-and a discussion of its implementation.
-They are combined because the implementation is strongly influenced by
-the user interface, and vice versa.
-</P>
-<H4>The Interface
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-is a text editor for multiple files.
-File names may be provided when it is invoked:
-<DL><DT><DD><TT><PRE>
-sam file1 file2 ...
-</PRE></TT></DL>
-and there are commands
-to add new files and discard unneeded ones.
-Files are not read until necessary
-to complete some command.
-Editing operations apply to an internal copy
-made when the file is read; the UNIX file associated with the copy
-is changed only by an explicit command.
-To simplify the discussion, the internal copy is here called a
-<I>file</I>,
-while the disc-resident original is called a
-disc file.
-<P>
-<TT>Sam</TT>
-is usually connected to a bitmap display that presents a cut-and-paste
-editor driven by the mouse.
-In this mode, the command language is still available:
-text typed in a special window, called the
-<TT>sam</TT>
-<I>window,</I>
-is interpreted
-as commands to be executed in the current file.
-Cut-and-paste editing may be used in any window &#173; even in the
-<TT>sam</TT>
-window to construct commands.
-The other mode of operation, invoked by starting
-<TT>sam</TT>
-with the option
-<TT>-d</TT>
-(for `no download'),
-does not use the mouse or bitmap display, but still permits
-editing using the textual command language, even on an ordinary terminal,
-interactively or from a script.
-</P>
-<P>
-The following sections describe first the command language (under
-<TT>sam -d</TT>
-and in the
-<TT>sam</TT>
-window), and then the mouse interface.
-These two languages are nearly independent, but connect through the
-<I>current</I>
-<I>text,</I>
-described below.
-</P>
-<H4>The Command Language
-</H4>
-<br>&#32;<br>
-A file consists of its contents, which are an array of characters
-(that is, a string); the
-<I>name</I>
-of the associated disc file; the
-modified bit
-that states whether the contents match those of
-the disc file;
-and a substring of the contents, called the
-current text
-or
-<I>dot</I>
-(see Figures 1 and 2).
-If the current text is a null string, dot falls between characters.
-The
-<I>value</I>
-of dot is the location of the current text; the
-<I>contents</I>
-of dot are the characters it contains.
-<TT>Sam</TT>
-imparts to the text no two-dimensional interpretation such as columns
-or fields; text is always one-dimensional.
-Even the idea of a `line' of text as understood by most UNIX programs
-&#173; a sequence of characters terminated by a newline character &#173;
-is only weakly supported.
-<P>
-The
-current file
-is the file to which editing commands refer.
-The current text is therefore dot in the current file.
-If a command doesn't explicitly name a particular file or piece of text,
-the command is assumed to apply to the current text.
-For the moment, ignore the presence of multiple files and consider
-editing a single file.
-<br><img src="fig1.ps.11760.gif"><br>
-<br>
-<I>Figure 1. A typical
-</I><TT>sam</TT><I>
-screen, with the editing menu presented.
-The
-</I><TT>sam</TT><I>
-(command language) window is in the middle, with file windows above and below.
-(The user interface makes it easy to create these abutting windows.)
-The partially obscured window is a third file window.
-The uppermost window is that to which typing and mouse operations apply,
-as indicated by its heavy border.
-Each window has its current text highlighted in reverse video.
-The
-</I><TT>sam</TT><I>
-window's current text is the null string on the last visible line,
-indicated by a vertical bar.
-See also Figure 2.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-Commands have one-letter names.
-Except for non-editing commands such as writing
-the file to disc, most commands make some change
-to the text in dot and leave dot set to the text resulting from the change.
-For example, the delete command,
-<TT>d</TT>,
-deletes the text in dot, replacing it by the null string and setting dot
-to the result.
-The change command,
-<TT>c</TT>,
-replaces dot by text delimited by an arbitrary punctuation character,
-conventionally
-a slash.  Thus,
-<DL><DT><DD><TT><PRE>
-c/Peter/
-</PRE></TT></DL>
-replaces the text in dot by the string
-<TT>Peter</TT>.
-Similarly,
-<DL><DT><DD><TT><PRE>
-a/Peter/
-</PRE></TT></DL>
-(append) adds the string after dot, and
-<DL><DT><DD><TT><PRE>
-i/Peter/
-</PRE></TT></DL>
-(insert) inserts before dot.
-All three leave dot set to the new text,
-<TT>Peter</TT>.
-</P>
-<P>
-Newlines are part of the syntax of commands:
-the newline character lexically terminates a command.
-Within the inserted text, however, newlines are never implicit.
-But since it is often convenient to insert multiple lines of text,
-<TT>sam</TT>
-has a special
-syntax for that case:
-<DL><DT><DD><TT><PRE>
-a
-some lines of text
-to be inserted in the file,
-terminated by a period
-on a line by itself
-.
-</PRE></TT></DL>
-In the one-line syntax, a newline character may be specified by a C-like
-escape, so
-<DL><DT><DD><TT><PRE>
-c/\n/
-</PRE></TT></DL>
-replaces dot by a single newline character.
-</P>
-<P>
-<TT>Sam</TT>
-also has a substitute command,
-<TT>s</TT>:
-<DL><DT><DD><TT><PRE>
-s/<I>expression</I>/<I>replacement</I>/
-</PRE></TT></DL>
-substitutes the replacement text for the first match, in dot,
-of the regular expression.
-Thus, if dot is the string
-<TT>Peter</TT>,
-the command
-<DL><DT><DD><TT><PRE>
-s/t/st/
-</PRE></TT></DL>
-changes it to
-<TT>Pester</TT>.
-In general,
-<TT>s</TT>
-is unnecessary, but it was inherited from
-<TT>ed</TT>
-and it has some convenient variations.
-For instance, the replacement text may include the matched text,
-specified by
-<TT>&</TT>:
-<DL><DT><DD><TT><PRE>
-s/Peter/Oh, &amp;, &amp;, &amp;, &amp;!/
-</PRE></TT></DL>
-</P>
-<P>
-There are also three commands that apply programs
-to text:
-<DL><DT><DD><TT><PRE>
-&#60; <I>UNIX program</I>
-</PRE></TT></DL>
-replaces dot by the output of the UNIX program.
-Similarly, the
-<TT>></TT>
-command
-runs the program with dot as its standard input, and
-<TT>|</TT>
-does both.  For example,
-<DL><DT><DD><TT><PRE>
-| sort
-</PRE></TT></DL>
-replaces dot by the result of applying the standard sorting utility to it.
-Again, newlines have no special significance for these
-<TT>sam</TT>
-commands.
-The text acted upon and resulting from these commands is not necessarily
-bounded by newlines, although for connection with UNIX programs,
-newlines may be necessary to obey conventions.
-</P>
-<P>
-One more command:
-<TT>p</TT>
-prints the contents of dot.
-Table I summarizes
-<TT>sam</TT>'s
-commands.
-<br><img src="-.11761.gif"><br>
-<br>&#32;<br>
-</P>
-<P>
-The value of dot may be changed by
-specifying an
-<I>address</I>
-for the command.
-The simplest address is a line number:
-<DL><DT><DD><TT><PRE>
-3
-</PRE></TT></DL>
-refers to the third line of the file, so
-<DL><DT><DD><TT><PRE>
-3d
-</PRE></TT></DL>
-deletes the third line of the file, and implicitly renumbers
-the lines so the old line 4 is now numbered 3.
-(This is one of the few places where
-<TT>sam</TT>
-deals with lines directly.)
-Line
-<TT>0</TT>
-is the null string at the beginning of the file.
-If a command consists of only an address, a
-<TT>p</TT>
-command is assumed, so typing an unadorned
-<TT>3</TT>
-prints line 3 on the terminal.
-There are a couple of other basic addresses:
-a period addresses dot itself; and
-a dollar sign
-(<TT>$</TT>)
-addresses the null string at the end of the file.
-</P>
-<P>
-An address is always a single substring of the file.
-Thus, the address
-<TT>3</TT>
-addresses the characters
-after the second newline of
-the file through the third newline of the file.
-A
-compound address
-is constructed by the comma operator
-<DL><DT><DD><TT><PRE>
-<I>address1</I>,<I>address2</I>
-</PRE></TT></DL>
-and addresses the substring of the file from the beginning of
-<I>address1</I>
-to the end of
-<I>address2</I>.
-For example, the command
-<TT>3,5p</TT>
-prints the third through fifth lines of the file and
-<TT>.,$d</TT>
-deletes the text from the beginning of dot to the end of the file.
-</P>
-<P>
-These addresses are all absolute positions in the file, but
-<TT>sam</TT>
-also has relative addresses, indicated by
-<TT>+</TT>
-or
-<TT>-</TT>.
-For example,
-<DL><DT><DD><TT><PRE>
-$-3
-</PRE></TT></DL>
-is the third line before the end of the file and
-<DL><DT><DD><TT><PRE>
-.+1
-</PRE></TT></DL>
-is the line after dot.
-If no address appears to the left of the
-<TT>+</TT>
-or
-<TT>-</TT>,
-dot is assumed;
-if nothing appears to the right,
-<TT>1</TT>
-is assumed.
-Therefore,
-<TT>.+1</TT>
-may be abbreviated to just a plus sign.
-</P>
-<P>
-The
-<TT>+</TT>
-operator acts relative to the end of its first argument, while the
-<TT>-</TT>
-operator acts relative to the beginning.  Thus
-<TT>.+1</TT>
-addresses the first line after dot,
-<TT>.-</TT>
-addresses the first line before dot, and
-<TT>+-</TT>
-refers to the line containing the end of dot.  (Dot may span multiple lines, and
-<TT>+</TT>
-selects the line after the end of dot, then
-<TT>-</TT>
-backs up one line.)
-</P>
-<P>
-The final type of address is a regular expression, which addresses the
-text matched by the expression.  The expression is enclosed in slashes, as in
-<DL><DT><DD><TT><PRE>
-/<I>expression</I>/
-</PRE></TT></DL>
-The expressions are the same as those in the UNIX program
-<TT>egrep</TT>,<sup>6,7</sup>
-and include closures, alternations, and so on.
-They find the
-leftmost longest
-string that matches the expression, that is,
-the first match after the point where the search is started,
-and if more than one match begins at the same spot, the longest such match.
-(I assume familiarity with the syntax for regular expressions in UNIX programs.<sup>9</sup>)
-For example,
-<DL><DT><DD><TT><PRE>
-/x/
-</PRE></TT></DL>
-matches the next
-<TT>x</TT>
-character in the file,
-<DL><DT><DD><TT><PRE>
-/xx*/
-</PRE></TT></DL>
-matches the next run of one or more
-<TT>x</TT>'s,
-and
-<DL><DT><DD><TT><PRE>
-/x|Peter/
-</PRE></TT></DL>
-matches the next
-<TT>x</TT>
-or
-<TT>Peter</TT>.
-For compatibility with other UNIX programs, the `any character' operator,
-a period,
-does not match a newline, so
-<DL><DT><DD><TT><PRE>
-/.*/
-</PRE></TT></DL>
-matches the text from dot to the end of the line, but excludes the newline
-and so will not match across
-the line boundary.
-</P>
-<P>
-Regular expressions are always relative addresses.
-The direction is forwards by default,
-so
-<TT>/Peter/</TT>
-is really an abbreviation for
-<TT>+/Peter/</TT>.
-The search can be reversed with a minus sign, so
-<DL><DT><DD><TT><PRE>
-<TT>-/Peter/</TT>
-</PRE></TT></DL>
-finds the first
-<TT>Peter</TT>
-before dot.
-Regular expressions may be used with other address forms, so
-<TT>0+/Peter/</TT>
-finds the first
-<TT>Peter</TT>
-in the file and
-<TT>$-/Peter/</TT>
-finds the last.
-Table II summarizes
-<TT>sam</TT>'s
-addresses.
-<br><img src="-.11762.gif"><br>
-<br>&#32;<br>
-</P>
-<P>
-The language discussed so far will not seem novel
-to people who use UNIX text editors
-such as
-<TT>ed</TT>
-or
-<TT>vi</TT>.<sup>9</sup>
-Moreover, the kinds of editing operations these commands allow, with the exception
-of regular expressions and line numbers,
-are clearly more conveniently handled by a mouse-based interface.
-Indeed,
-<TT>sam</TT>'s
-mouse language (discussed at length below) is the means by which
-simple changes are usually made.
-For large or repetitive changes, however, a textual language
-outperforms a manual interface.
-</P>
-<P>
-Imagine that, instead of deleting just one occurrence of the string
-<TT>Peter</TT>,
-we wanted to eliminate every
-<TT>Peter</TT>.
-What's needed is an iterator that runs a command for each occurrence of some
-text.
-<TT>Sam</TT>'s
-iterator is called
-<TT>x</TT>,
-for extract:
-<DL><DT><DD><TT><PRE>
-x/<I>expression</I>/ <I>command</I>
-</PRE></TT></DL>
-finds all matches in dot of the specified expression, and for each
-such match, sets dot to the text matched and runs the command.
-So to delete all the
-<TT>Peters:</TT>
-<DL><DT><DD><TT><PRE>
-0,$ x/Peter/ d
-</PRE></TT></DL>
-(Blanks in these examples are to improve readability;
-<TT>sam</TT>
-neither requires nor interprets them.)
-This searches the entire file
-(<TT>0,$</TT>)
-for occurrences of the string
-<TT>Peter</TT>,
-and runs the
-<TT>d</TT>
-command with dot set to each such occurrence.
-(By contrast, the comparable
-<TT>ed</TT>
-command would delete all
-<I>lines</I>
-containing
-<TT>Peter</TT>;
-<TT>sam</TT>
-deletes only the
-<TT>Peters</TT>.)
-The address
-<TT>0,$</TT>
-is commonly used, and may be abbreviated to just a comma.
-As another example,
-<DL><DT><DD><TT><PRE>
-, x/Peter/ p
-</PRE></TT></DL>
-prints a list of
-<TT>Peters,</TT>
-one for each appearance in the file, with no intervening text (not even newlines
-to separate the instances).
-</P>
-<P>
-Of course, the text extracted by
-<TT>x</TT>
-may be selected by a regular expression,
-which complicates deciding what set of matches is chosen &#173;
-matches may overlap.  This is resolved by generating the matches
-starting from the beginning of dot using the leftmost-longest rule,
-and searching for each match starting from the end of the previous one.
-Regular expressions may also match null strings, but a null match
-adjacent to a non-null match is never selected; at least one character
-must intervene.
-For example,
-<DL><DT><DD><TT><PRE>
-, c/AAA/
-x/B*/ c/-/
-, p
-</PRE></TT></DL>
-produces as output
-<DL><DT><DD><TT><PRE>
--A-A-A-
-</PRE></TT></DL>
-because the pattern
-<TT>B*</TT>
-matches the null strings separating the
-<TT>A</TT>'s.
-</P>
-<P>
-The
-<TT>x</TT>
-command has a complement,
-<TT>y</TT>,
-with similar syntax, that executes the command with dot set to the text
-<I>between</I>
-the matches of the expression.
-For example,
-<DL><DT><DD><TT><PRE>
-, c/AAA/
-y/A/ c/-/
-, p
-</PRE></TT></DL>
-produces the same result as the example above.
-</P>
-<P>
-The
-<TT>x</TT>
-and
-<TT>y</TT>
-commands are looping constructs, and
-<TT>sam</TT>
-has a pair of conditional commands to go with them.
-They have similar syntax:
-<DL><DT><DD><TT><PRE>
-g/<I>expression</I>/ <I>command</I>
-</PRE></TT></DL>
-(guard)
-runs the command exactly once if dot contains a match of the expression.
-This is different from
-<TT>x</TT>,
-which runs the command for
-<I>each</I>
-match:
-<TT>x</TT>
-loops;
-<TT>g</TT>
-merely tests, without changing the value of dot.
-Thus,
-<DL><DT><DD><TT><PRE>
-, x/Peter/ d
-</PRE></TT></DL>
-deletes all occurrences of
-<TT>Peter</TT>,
-but
-<DL><DT><DD><TT><PRE>
-, g/Peter/ d
-</PRE></TT></DL>
-deletes the whole file (reduces it to a null string) if
-<TT>Peter</TT>
-occurs anywhere in the text.
-The complementary conditional is
-<TT>v</TT>,
-which runs the command if there is
-<I>no</I>
-match of the expression.
-</P>
-<P>
-These control-structure-like commands may be composed to construct more
-involved operations.  For example, to print those lines of text that
-contain the string
-<TT>Peter</TT>:
-<DL><DT><DD><TT><PRE>
-, x/.*\n/ g/Peter/ p
-</PRE></TT></DL>
-The
-<TT>x</TT>
-breaks the file into lines, the
-<TT>g</TT>
-selects those lines containing
-<TT>Peter</TT>,
-and the
-<TT>p</TT>
-prints them.
-This command gives an address for the
-<TT>x</TT>
-command (the whole file), but because
-<TT>g</TT>
-does not have an explicit address, it applies to the value of
-dot produced by the
-<TT>x</TT>
-command, that is, to each line.
-All commands in
-<TT>sam</TT>
-except for the command to write a file to disc use dot for the
-default address.
-</P>
-<P>
-Composition may be continued indefinitely.
-<DL><DT><DD><TT><PRE>
-, x/.*\n/ g/Peter/ v/SaltPeter/ p
-</PRE></TT></DL>
-prints those lines containing
-<TT>Peter</TT>
-but
-<I>not</I>
-those containing
-<TT>SaltPeter</TT>.
-</P>
-<H4>Structural Regular Expressions
-</H4>
-<br>&#32;<br>
-Unlike other UNIX text editors,
-including the non-interactive ones such as
-<TT>sed</TT>
-and
-<TT>awk</TT>,<sup>7</sup>
-<TT>sam</TT>
-is good for manipulating files with multi-line `records.'
-An example is an on-line phone book composed of records,
-separated by blank lines, of the form
-<DL><DT><DD><TT><PRE>
-Herbert Tic
-44 Turnip Ave., Endive, NJ
-201-5555642
-
-Norbert Twinge
-16 Potato St., Cabbagetown, NJ
-201-5553145
-
-...
-</PRE></TT></DL>
-The format may be encoded as a regular expression:
-<DL><DT><DD><TT><PRE>
-(.+\n)+
-</PRE></TT></DL>
-that is, a sequence of one or more non-blank lines.
-The command to print Mr. Tic's entire record is then
-<DL><DT><DD><TT><PRE>
-, x/(.+\n)+/ g/^Herbert Tic$/ p
-</PRE></TT></DL>
-and that to extract just the phone number is
-<DL><DT><DD><TT><PRE>
-, x/(.+\n)+/ g/^Herbert Tic$/ x/^[0-9]*-[0-9]*\n/ p
-</PRE></TT></DL>
-The latter command breaks the file into records,
-chooses Mr. Tic's record,
-extracts the phone number from the record,
-and finally prints the number.
-<P>
-A more involved problem is that of
-renaming a particular variable, say
-<TT>n</TT>,
-to
-<TT>num</TT>
-in a C program.
-The obvious first attempt,
-<DL><DT><DD><TT><PRE>
-, x/n/ c/num/
-</PRE></TT></DL>
-is badly flawed: it changes not only the variable
-<TT>n</TT>
-but any letter
-<TT>n</TT>
-that appears.
-We need to extract all the variables, and select those that match
-<TT>n</TT>
-and only
-<TT>n</TT>:
-<DL><DT><DD><TT><PRE>
-, x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
-</PRE></TT></DL>
-The pattern
-<TT>[A-Za-z_][A-Za-z_0-9]*</TT>
-matches C identifiers.
-Next
-<TT>g/n/</TT>
-selects those containing an
-<TT>n</TT>.
-Then
-<TT>v/../</TT>
-rejects those containing two (or more) characters, and finally
-<TT>c/num/</TT>
-changes the remainder (identifiers
-<TT>n</TT>)
-to
-<TT>num</TT>.
-This version clearly works much better, but there may still be problems.
-For example, in C character and string constants, the sequence
-<TT>0fP
-is interpreted as a newline character, and we don't want to change it to
-</TT><TT>0m.</TT><TT>
-This problem can be forestalled with a
-</TT><TT>y</TT><TT>
-command:
-<DL><DT><DD><TT><PRE>
-, y/\\n/ x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
-</PRE></TT></DL>
-(the second
-</TT><TT>\fP
-is necessary because of lexical conventions in regular expressions),
-or we could even reject character constants and strings outright:
-<DL><DT><DD><TT><PRE>
-,y/'[^']*'/ y/"[^"]*"/ x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
-</PRE></TT></DL>
-The
-</TT><TT>y</TT><TT>
-commands in this version exclude from consideration all character constants
-and strings.
-The only remaining problem is to deal with the possible occurrence of
-</TT><TT>'</TT><TT>
-or
-</TT><TT>
-within these sequences, but it's easy to see how to resolve this difficulty.
-</P>
-</TT><P>
-The point of these composed commands is successive refinement.
-A simple version of the command is tried, and if it's not good enough,
-it can be honed by adding a clause or two.
-(Mistakes can be undone; see below.
-Also, the mouse language makes it unnecessary to retype the command each time.)
-The resulting chains of commands are somewhat reminiscent of
-shell pipelines.<sup>7</sup>
-Unlike pipelines, though, which pass along modified
-<I>data</I>,
-<TT>sam</TT>
-commands pass a
-<I>view</I>
-of the data.
-The text at each step of the command is the same, but which pieces
-are selected is refined step by step until the correct piece is
-available to the final step of the command line, which ultimately makes the change.
-</P>
-<P>
-In other UNIX programs, regular expressions are used only for selection,
-as in the
-<TT>sam</TT>
-<TT>g</TT>
-command, never for extraction as in the
-<TT>x</TT>
-or
-<TT>y</TT>
-command.
-For example, patterns in
-<TT>awk</TT><sup>7</sup>
-are used to select lines to be operated on, but cannot be used
-to describe the format of the input text, or to handle newline-free text.
-The use of regular expressions to describe the structure of a piece
-of text rather than its contents, as in the
-<TT>x</TT>
-command, 
-has been given a name:
-structural regular expressions.
-When they are composed, as in the above example,
-they are pleasantly expressive.
-Their use is discussed at greater length elsewhere.<sup>10</sup>
-</P>
-<P>
-</P>
-<H4>Multiple files
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-has a few other commands, mostly relating to input and output.
-<DL><DT><DD><TT><PRE>
-e discfilename
-</PRE></TT></DL>
-replaces the contents and name of the current file with those of the named
-disc file;
-<DL><DT><DD><TT><PRE>
-w discfilename
-</PRE></TT></DL>
-writes the contents to the named disc file; and
-<DL><DT><DD><TT><PRE>
-r discfilename
-</PRE></TT></DL>
-replaces dot with the contents of the named disc file.
-All these commands use the current file's name if none is specified.
-Finally,
-<DL><DT><DD><TT><PRE>
-f discfilename
-</PRE></TT></DL>
-changes the name associated with the file and displays the result:
-<DL><DT><DD><TT><PRE>
-'-. discfilename
-</PRE></TT></DL>
-This output is called the file's
-menu line,
-because it is the contents of the file's line in the button 3 menu (described
-in the
-next section).
-The first three characters are a concise notation for the state of the file.
-The apostrophe signifies that the file is modified.
-The minus sign indicates the number of windows
-open on the file (see the next section):
-<TT>-</TT>
-means none,
-<TT>+</TT>
-means one, and
-<TT>*</TT>
-means more than one.
-Finally, the period indicates that this is the current file.
-These characters are useful for controlling the
-<TT>X</TT>
-command, described shortly.
-<P>
-<TT>Sam</TT>
-may be started with a set of disc files (such as all the source for
-a program) by invoking it with a list of file names as arguments, and
-more may be added or deleted on demand.
-<DL><DT><DD><TT><PRE>
-B discfile1 discfile2 ...
-</PRE></TT></DL>
-adds the named files to
-<TT>sam</TT>'s
-list, and
-<DL><DT><DD><TT><PRE>
-D discfile1 discfile2 ...
-</PRE></TT></DL>
-removes them from
-<TT>sam</TT>'s
-memory (without effect on associated disc files).
-Both these commands have a syntax for using the shell<sup>7</sup>
-(the UNIX command interpreter) to generate the lists:
-<DL><DT><DD><TT><PRE>
-B &#60;echo *.c
-</PRE></TT></DL>
-will add all C source files, and
-<DL><DT><DD><TT><PRE>
-B &#60;grep -l variable *.c
-</PRE></TT></DL>
-will add all C source files referencing a particular variable
-(the UNIX command
-<TT>grep -l</TT>
-lists all files in its arguments that contain matches of
-the specified regular expression).
-Finally,
-<TT>D</TT>
-without arguments deletes the current file.
-</P>
-<P>
-There are two ways to change which file is current:
-<DL><DT><DD><TT><PRE>
-b filename
-</PRE></TT></DL>
-makes the named file current.
-The
-<TT>B</TT>
-command
-does the same, but also adds any new files to
-<TT>sam</TT>'s
-list.
-(In practice, of course, the current file
-is usually chosen by mouse actions, not by textual commands.)
-The other way is to use a form of address that refers to files:
-<DL><DT><DD><TT><PRE>
-"<I>expression</I>" <I>address</I>
-</PRE></TT></DL>
-refers to the address evaluated in the file whose menu line
-matches the expression (there must be exactly one match).
-For example,
-<DL><DT><DD><TT><PRE>
-"peter.c" 3
-</PRE></TT></DL>
-refers to the third line of the file whose name matches
-<TT>peter.c</TT>.
-This is most useful in the move
-(<TT>m</TT>)
-and copy
-(<TT>t</TT>)
-commands:
-<DL><DT><DD><TT><PRE>
-0,$ t "peter.c" 0
-</PRE></TT></DL>
-makes a copy of the current file at the beginning of
-<TT>peter.c</TT>.
-</P>
-<P>
-The
-<TT>X</TT>
-command
-is a looping construct, like
-<TT>x</TT>,
-that refers to files instead of strings:
-<DL><DT><DD><TT><PRE>
-X/<I>expression</I>/ <I>command</I>
-</PRE></TT></DL>
-runs the command in all
-files whose menu lines match the expression.  The best example is
-<DL><DT><DD><TT><PRE>
-X/'/ w
-</PRE></TT></DL>
-which writes to disc all modified files.
-<TT>Y</TT>
-is the complement of
-<TT>X</TT>:
-it runs the command on all files whose menu lines don't match the expression:
-<DL><DT><DD><TT><PRE>
-Y/\.c/ D
-</PRE></TT></DL>
-deletes all files that don't have
-<TT>.c</TT>
-in their names, that is, it keeps all C source files and deletes the rest.
-</P>
-<P>
-Braces allow commands to be grouped, so
-<DL><DT><DD><TT><PRE>
-{
-	<I>command1</I>
-	<I>command2</I>
-}
-</PRE></TT></DL>
-is syntactically a single command that runs two commands.
-Thus,
-<DL><DT><DD><TT><PRE>
-X/\.c/ ,g/variable/ {
-	f
-	, x/.*\n/ g/variable/ p
-}
-</PRE></TT></DL>
-finds all occurrences of
-<TT>variable</TT>
-in C source files, and prints
-out the file names and lines of each match.
-The precise semantics of compound operations is discussed in the implementation
-sections below.
-</P>
-<P>
-Finally,
-the undo command,
-<TT>u</TT>,
-undoes the last command,
-no matter how many files were affected.
-Multiple undo operations move further back in time, so
-<DL><DT><DD><TT><PRE>
-u
-u
-</PRE></TT></DL>
-(which may be abbreviated
-<TT>u2</TT>)
-undoes the last two commands.  An undo may not be undone, however, nor
-may any command that adds or deletes files.
-Everything else is undoable, though, including for example
-<TT>e</TT>
-commands:
-<DL><DT><DD><TT><PRE>
-e filename
-u
-</PRE></TT></DL>
-restores the state of the file completely, including its name, dot,
-and modified bit.  Because of the undo, potentially dangerous commands
-are not guarded by confirmations.  Only
-<TT>D</TT>,
-which destroys the information necessary to restore itself, is protected.
-It will not delete a modified file, but a second
-<TT>D</TT>
-of the same file will succeed regardless.
-The
-<TT>q</TT>
-command, which exits
-<TT>sam</TT>,
-is similarly guarded.
-</P>
-<H4>Mouse Interface
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-is most commonly run
-connected to a bitmap display and mouse for interactive editing.
-The only difference in the command language
-between regular, mouse-driven
-<TT>sam</TT>
-and
-<TT>sam -d</TT>
-is that if an address
-is provided without a command,
-<TT>sam -d</TT>
-will print the text referenced by the address, but
-regular
-<TT>sam</TT>
-will highlight it on the screen &#173; in fact,
-dot is always highlighted (see Figure 2).
-<br><img src="fig3.ps.11763.gif"><br>
-<br>
-<I>Figure 2. A
-</I><TT>sam</TT><I>
-window.  The scroll bar down the left
-represents the file, with the bubble showing the fraction
-visible in the window.
-The scroll bar may be manipulated by the mouse for convenient browsing.
-The current text,
-which is highlighted, need not fit on a line.  Here it consists of one partial
-line, one complete line, and final partial line.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-<P>
-Each file may have zero or more windows open on the display.
-At any time, only one window in all of
-<TT>sam</TT>
-is the
-current window,
-that is, the window to which typing and mouse actions refer;
-this may be the
-<TT>sam</TT>
-window (that in which commands may be typed)
-or one of the file windows.
-When a file has multiple windows, the image of the file in each window
-is always kept up to date.
-The current file is the last file affected by a command,
-so if the
-<TT>sam</TT>
-window is current,
-the current window is not a window on the current file.
-However, each window on a file has its own value of dot,
-and when switching between windows on a single file,
-the file's value of dot is changed to that of the window.
-Thus, flipping between windows behaves in the obvious, convenient way.
-</P>
-<P>
-The mouse on the Blit has three buttons, numbered left to right.
-Button 3 has a list of commands to manipulate windows,
-followed by a list of `menu lines' exactly as printed by the
-<TT>f</TT>
-command, one per file (not one per window).
-These menu lines are sorted by file name.
-If the list is long, the Blit menu software will make it more manageable
-by generating a scrolling menu instead of an unwieldy long list.
-Using the menu to select a file from the list makes that file the current
-file, and the most recently current window in that file the current window.
-But if that file is already current, selecting it in the menu cycles through
-the windows on the file; this simple trick avoids a special menu to
-choose windows on a file.
-If there is no window open on the file,
-<TT>sam</TT>
-changes the mouse cursor to prompt the user to create one.
-</P>
-<P>
-The commands on the button 3 menu are straightforward (see Figure 3), and
-are like the commands to manipulate windows in
-<TT>mux</TT>,<sup>8</sup>
-the Blit's window system.
-<TT>New</TT>
-makes a new file, and gives it one empty window, whose size is determined
-by a rectangle swept by the mouse.
-<TT>Zerox</TT>
-prompts for a window to be selected, and
-makes a clone of that window; this is how multiple windows are created on one file.
-<TT>Reshape</TT>
-changes the size of the indicated window, and
-<TT>close</TT>
-deletes it.  If that is the last window open on the file,
-<TT>close</TT>
-first does a
-<TT>D</TT>
-command on the file.
-<TT>Write</TT>
-is identical to a
-<TT>w</TT>
-command on the file; it is in the menu purely for convenience.
-Finally,
-<TT>~~sam~~</TT>
-is a menu item that appears between the commands and the file names.
-Selecting it makes the
-<TT>sam</TT>
-window the current window,
-causing subsequent typing to be interpreted as commands.
-<br><img src="fig2.ps.11764.gif"><br>
-<br>
-<I>Figure 3. The menu on button 3.
-The black rectangle on the left is a scroll bar; the menu is limited to
-the length shown to prevent its becoming unwieldy.
-Above the
-</I><TT>~~sam~~</TT><I>
-line is a list of commands;
-beneath it is a list of files, presented exactly as with the
-</I><TT>f</TT><I>
-command.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-When
-<TT>sam</TT>
-requests that a window be swept, in response to
-<TT>new</TT>,
-<TT>zerox</TT>
-or
-<TT>reshape</TT>,
-it changes the mouse cursor from the usual arrow to a box with
-a small arrow.
-In this state, the mouse may be used to indicate an arbitrary rectangle by
-pressing button 3 at one corner and releasing it at the opposite corner.
-More conveniently,
-button 3 may simply be clicked,
-whereupon
-<TT>sam</TT>
-creates the maximal rectangle that contains the cursor
-and abuts the
-<TT>sam</TT>
-window.
-By placing the
-<TT>sam</TT>
-window in the middle of the screen, the user can define two regions (one above,
-one below) in which stacked fully-overlapping
-windows can be created with minimal fuss (see Figure 1).
-This simple user interface trick makes window creation noticeably easier.
-</P>
-<P>
-The cut-and-paste editor is essentially the same as that in Smalltalk-80.<sup>11</sup>
-The text in dot is always highlighted on the screen.
-When a character is typed it replaces dot, and sets dot to the null
-string after the character.  Thus, ordinary typing inserts text.
-Button 1 is used for selection:
-pressing the button, moving the mouse, and lifting the button
-selects (sets dot to) the text between the points where the
-button was pressed and released.
-Pressing and releasing at the same point selects a null string; this
-is called clicking.  Clicking twice quickly, or
-double clicking,
-selects larger objects;
-for example, double clicking in a word selects the word,
-double clicking just inside an opening bracket selects the text
-contained in the brackets (handling nested brackets correctly),
-and similarly for
-parentheses, quotes, and so on.
-The double-clicking rules reflect a bias toward
-programmers.
-If
-<TT>sam</TT>
-were intended more for word processing, double-clicks would probably
-select linguistic structures such as sentences.
-</P>
-<P>
-If button 1 is pressed outside the current window, it makes the indicated
-window current.
-This is the easiest way to switch between windows and files.
-</P>
-<P>
-Pressing button 2 brings up a menu of editing functions (see Figure 4).
-These mostly apply to the selected text:
-<TT>cut</TT>
-deletes the selected text, and remembers it in a hidden buffer called the
-snarf buffer,
-<TT>paste</TT>
-replaces the selected text by the contents of the snarf buffer,
-<TT>snarf</TT>
-just copies the selected text to the snarf buffer,
-<TT>look</TT>
-searches forward for the next literal occurrence of the selected text, and
-<TT><mux></TT>
-exchanges snarf buffers with the window system in which
-<TT>sam</TT>
-is running.
-Finally, the last regular expression used appears as a menu entry
-to search
-forward for the next occurrence of a match for the expression.
-<br><img src="fig4.ps.11765.gif"><br>
-<br>
-<I>Figure 4. The menu on button 2.
-The bottom entry tracks the most recently used regular expression, which may
-be literal text.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-The relationship between the command language and the mouse language is
-entirely due to the equality of dot and the selected text chosen
-with button 1 on the mouse.
-For example, to make a set of changes in a C subroutine, dot can be
-set by double clicking on the left brace that begins the subroutine,
-which sets dot for the command language.
-An address-free command then typed in the
-<TT>sam</TT>
-window will apply only to the text between the opening and closing
-braces of the function.
-The idea is to select what you want, and then say what you want
-to do with it, whether invoked by a menu selection or by a typed command.
-And of course, the value of dot is highlighted on
-the display after the command completes.
-This relationship between mouse interface and command language
-is clumsy to explain, but comfortable, even natural, in practice.
-</P>
-<H4>The Implementation
-</H4>
-<br>&#32;<br>
-The next few sections describe how
-<TT>sam</TT>
-is put together, first the host part,
-then the inter-component communication,
-then the terminal part.
-After explaining how the command language is implemented,
-the discussion follows (roughly) the path of a character
-from the temporary file on disc to the screen.
-The presentation centers on the data structures,
-because that is how the program was designed and because
-the algorithms are easy to provide, given the right data
-structures.
-<H4>Parsing and execution
-</H4>
-<br>&#32;<br>
-The command language is interpreted by parsing each command with a
-table-driven recursive
-descent parser, and when a complete command is assembled, invoking a top-down
-executor.
-Most editors instead employ a simple character-at-a-time
-lexical scanner.
-Use of a parser makes it
-easy and unambiguous to detect when a command is complete,
-which has two advantages.
-First, escape conventions such as backslashes to quote
-multiple-line commands are unnecessary;  if the command isn't finished,
-the parser keeps reading.  For example, a multiple-line append driven by an
-<TT>x</TT>
-command is straightforward:
-<DL><DT><DD><TT><PRE>
-x/.*\n/ g/Peter/ a
-one line about Peter
-another line about Peter
-.
-</PRE></TT></DL>
-Other UNIX editors would require a backslash after all but the last line.
-<P>
-The other advantage is specific to the two-process structure of
-<TT>sam</TT>.
-The host process must decide when a command is completed so the
-command interpreter can be called.  This problem is easily resolved
-by having the lexical analyzer read the single stream of events from the
-terminal, directly executing all typing and mouse commands,
-but passing to the parser characters typed to the
-<TT>sam</TT>
-command window.
-This scheme is slightly complicated by the availability of cut-and-paste
-editing in the
-<TT>sam</TT>
-window, but that difficulty is resolved by applying the rules
-used in
-<TT>mux</TT>:
-when a newline is typed to the
-<TT>sam</TT>
-window, all text between the newline and the previously typed newline
-is made available to the parser.
-This permits arbitrary editing to be done to a command before
-typing newline and thereby requesting execution.
-</P>
-<P>
-The parser is driven by a table because the syntax of addresses
-and commands is regular enough
-to be encoded compactly.  There are few special cases, such as the
-replacement text in a substitution, so the syntax of almost all commands
-can be encoded with a few flags.
-These include whether the command allows an address (for example,
-<TT>e</TT>
-does not), whether it takes a regular expression (as in
-<TT>x</TT>
-and
-<TT>s</TT>),
-whether it takes replacement text (as in
-<TT>c</TT>
-or
-<TT>i</TT>),
-which may be multi-line, and so on.
-The internal syntax of regular expressions is handled by a separate
-parser; a regular expression is a leaf of the command parse tree.
-Regular expressions are discussed fully in the next section.
-</P>
-<P>
-The parser table also has information about defaults, so the interpreter
-is always called with a complete tree.  For example, the parser fills in
-the implicit
-<TT>0</TT>
-and
-<TT>$</TT>
-in the abbreviated address
-<TT>,</TT>
-(comma),
-inserts a
-<TT>+</TT>
-to the left of an unadorned regular expression in an address,
-and provides the usual default address
-<TT>.</TT>
-(dot) for commands that expect an address but are not given one.
-</P>
-<P>
-Once a complete command is parsed, the evaluation is easy.
-The address is evaluated left-to-right starting from the value of dot,
-with a mostly ordinary expression evaluator.
-Addresses, like many of the data structures in
-<TT>sam</TT>,
-are held in a C structure and passed around by value:
-<DL><DT><DD><TT><PRE>
-typedef long Posn;    /* Position in a file */
-typedef struct Range{
-        Posn    p1, p2;
-}Range;
-typedef struct Address{
-        Range   r;
-        File    *f;
-}Address;
-</PRE></TT></DL>
-An address is encoded as a substring (character positions
-<TT>p1</TT>
-to
-<TT>p2</TT>)
-in a file
-<TT>f</TT>.
-(The data type
-<TT>File</TT>
-is described in detail below.)
-</P>
-<P>
-The address interpreter is an
-<TT>Address</TT>-valued
-function that traverses the parse tree describing an address (the
-parse tree for the address has type
-<TT>Addrtree</TT>):
-<DL><DT><DD><TT><PRE>
-Address
-address(ap, a, sign)
-	Addrtree *ap;
-	Address a;
-	int sign;
-{
-	Address a2;
-	do
-		switch(ap-&#62;type){
-		case '.':
-			a=a.f-&#62;dot;
-			break;
-		case '$':
-			a.r.p1=a.r.p2=a.f-&#62;nbytes;
-			break;
-		case '"':	
-			a=matchfile(a, ap-&#62;aregexp)-&#62;dot; 
-			break;
-		case ',':
-			a2=address(ap-&#62;right, a, 0);
-			a=address(ap-&#62;left, a, 0);
-			if(a.f!=a2.f || a2.r.p2&#60;a.r.p1)
-				error(Eorder);
-			a.r.p2=a2.r.p2;
-			return a;
-		/* and so on */
-		}
-	while((ap=ap-&#62;right)!=0);
-	return a;
-}
-</PRE></TT></DL>
-</P>
-<P>
-Throughout, errors are handled by a non-local
-<TT>goto</TT>
-(a
-<TT>setjmp/longjmp</TT>
-in C terminology)
-hidden in a routine called
-<TT>error</TT>
-that immediately aborts the execution, retracts any
-partially made changes (see the section below on `undoing'), and
-returns to the top level of the parser.
-The argument to
-<TT>error</TT>
-is an enumeration type that
-is translated to a terse but possibly helpful
-message such as `?addresses out of order.'
-Very common messages are kept short; for example the message for
-a failed regular expression search is `?search.'
-</P>
-<P>
-Character addresses such as
-<TT>#3</TT>
-are trivial to implement, as the
-<TT>File</TT>
-data structure is accessible by character number.
-However,
-<TT>sam</TT>
-keeps no information about the position of newlines &#173; it is too
-expensive to track dynamically &#173; so line addresses are computed by reading
-the file, counting newlines.  Except in very large files, this has proven
-acceptable: file access is fast enough to make the technique practical,
-and lines are not central to the structure of the command language.
-</P>
-<P>
-The command interpreter, called
-<TT>cmdexec</TT>,
-is also straightforward.  The parse table includes a
-function to call to interpret a particular command.  That function
-receives as arguments
-the calculated address
-for the command
-and the command tree (of type
-<TT>Cmdtree</TT>),
-which may contain information such as the subtree for compound commands.
-Here, for example, is the function for the
-<TT>g</TT>
-and
-<TT>v</TT>
-commands:
-<DL><DT><DD><TT><PRE>
-int
-g_cmd(a, cp)
-	Address a;
-	Cmdtree *cp;
-{
-	compile(cp-&#62;regexp);
-	if(execute(a.f, a.r.p1, a.r.p2)!=(cp-&#62;cmdchar=='v')){
-		a.f-&#62;dot=a;
-		return cmdexec(a, cp-&#62;subcmd);
-	}
-	return TRUE;	/* cause execution to continue */
-}
-</PRE></TT></DL>
-(<TT>Compile</TT>
-and
-<TT>execute</TT>
-are part of the regular expression code, described in the next section.)
-Because the parser and the
-<TT>File</TT>
-data structure do most of the work, most commands
-are similarly brief.
-</P>
-<H4>Regular expressions
-</H4>
-<br>&#32;<br>
-The regular expression code in
-<TT>sam</TT>
-is an interpreted, rather than compiled on-the-fly, implementation of Thompson's
-non-deterministic finite automaton algorithm.<sup>12</sup>
-The syntax and semantics of the expressions are as in the UNIX program
-<TT>egrep</TT>,
-including alternation, closures, character classes, and so on.
-The only changes in the notation are two additions:
-<TT>0fP
-is translated to, and matches, a newline character, and
-</TT><TT>@</TT><TT>
-matches any character.  In
-</TT><TT>egrep</TT><TT>,
-the character
-</TT><TT>.</TT><TT>
-matches any character except newline, and in
-</TT><TT>sam</TT><TT>
-the same rule seemed safest, to prevent idioms like
-</TT><TT>.*</TT><TT>
-from spanning newlines.
-</TT><TT>Egrep</TT><TT>
-expressions are arguably too complicated for an interactive editor &#173;
-certainly it would make sense if all the special characters were two-character
-sequences, so that most of the punctuation characters wouldn't have
-peculiar meanings &#173; but for an interesting command language, full
-regular expressions are necessary, and
-</TT><TT>egrep</TT><TT>
-defines the full regular expression syntax for UNIX programs.
-Also, it seemed superfluous to define a new syntax, since various UNIX programs
-(</TT><TT>ed</TT><TT>,
-</TT><TT>egrep</TT><TT>
-and
-</TT><TT>vi</TT><TT>)
-define too many already.
-</TT><P>
-The expressions are compiled by a routine,
-<TT>compile</TT>,
-that generates the description of the non-deterministic finite state machine.
-A second routine,
-<TT>execute</TT>,
-interprets the machine to generate the leftmost-longest match of the
-expression in a substring of the file.
-The algorithm is described elsewhere.<sup>12,13</sup>
-<TT>Execute</TT>
-reports
-whether a match was found, and sets a global variable,
-of type
-<TT>Range</TT>,
-to the substring matched.
-</P>
-<P>
-A trick is required to evaluate the expression in reverse, such as when
-searching backwards for an expression.
-For example,
-<DL><DT><DD><TT><PRE>
--/P.*r/
-</PRE></TT></DL>
-looks backwards through the file for a match of the expression.
-The expression, however, is defined for a forward search.
-The solution is to construct a machine identical to the machine
-for a forward search except for a reversal of all the concatenation
-operators (the other operators are symmetric under direction reversal),
-to exchange the meaning of the operators
-<TT>^</TT>
-and
-<TT>$</TT>,
-and then to read the file backwards, looking for the
-usual earliest longest match.
-</P>
-<P>
-<TT>Execute</TT>
-generates only one match each time it is called.
-To interpret looping constructs such as the
-<TT>x</TT>
-command,
-<TT>sam</TT>
-must therefore synchronize between
-calls of
-<TT>execute</TT>
-to avoid
-problems with null matches.
-For example, even given the leftmost-longest rule,
-the expression
-<TT>a*</TT>
-matches three times in the string
-<TT>ab</TT>
-(the character
-<TT>a</TT>,
-the null string between the
-<TT>a</TT>
-and
-<TT>b</TT>,
-and the final null string).
-After returning a match for the
-<TT>a</TT>,
-<TT>sam</TT>
-must not match the null string before the
-<TT>b</TT>.
-The algorithm starts
-<TT>execute</TT>
-at the end of its previous match, and
-if the match it returns
-is null and abuts the previous match, rejects the match and advances
-the initial position one character.
-</P>
-<H4>Memory allocation
-</H4>
-<br>&#32;<br>
-The C language has no memory allocation primitives, although a standard
-library routine,
-<TT>malloc</TT>,
-provides adequate service for simple programs.
-For specific uses, however,
-it can be better to write a custom allocator.
-The allocator (or rather, pair of allocators) described here
-work in both the terminal and host parts of
-<TT>sam</TT>.
-They are designed for efficient manipulation of strings,
-which are allocated and freed frequently and vary in length from essentially
-zero to 32 Kbytes (very large strings are written to disc).
-More important, strings may be large and change size often,
-so to minimize memory usage it is helpful to reclaim and to coalesce the
-unused portions of strings when they are truncated.
-<P>
-Objects to be allocated in
-<TT>sam</TT>
-are of two flavors:
-the first is C
-<TT>structs</TT>,
-which are small and often addressed by pointer variables;
-the second is variable-sized arrays of characters
-or integers whose
-base pointer is always used to access them.
-The memory allocator in
-<TT>sam</TT>
-is therefore in two parts:
-first, a traditional first-fit allocator that provides fixed storage for
-<TT>structs</TT>;
-and second, a garbage-compacting allocator that reduces storage
-overhead for variable-sized objects, at the cost of some bookkeeping.
-The two types of objects are allocated from adjoining arenas, with
-the garbage-compacting allocator controlling the arena with higher addresses.
-Separating into two arenas simplifies compaction and prevents fragmentation due
-to immovable objects.
-The access rules for garbage-compactable objects
-(discussed in the next paragraph) allow them to be relocated, so when
-the first-fit arena needs space, it moves the garbage-compacted arena
-to higher addresses to make room.  Storage is therefore created only
-at successively higher addresses, either when more garbage-compacted
-space is needed or when the first-fit arena pushes up the other arena.
-</P>
-<P>
-Objects that may be compacted declare to the
-allocator a cell that is guaranteed to be the sole repository of the
-address of the object whenever a compaction can occur.
-The compactor can then update the address when the object is moved.
-For example, the implementation of type
-<TT>List</TT>
-(really a variable-length array)
-is:
-<DL><DT><DD><TT><PRE>
-typedef struct List{
-        int     nused;
-        long    *ptr;
-}List;
-</PRE></TT></DL>
-The
-<TT>ptr</TT>
-cell must always be used directly, and never copied.  When a
-<TT>List</TT>
-is to be created the
-<TT>List</TT>
-structure is allocated in the ordinary first-fit arena
-and its
-<TT>ptr</TT>
-is allocated in the garbage-compacted arena.
-A similar data type for strings, called
-<TT>String</TT>,
-stores variable-length character arrays of up to 32767 elements.
-</P>
-<P>
-A related matter of programming style:
-<TT>sam</TT>
-frequently passes structures by value, which
-simplifies the code.
-Traditionally, C programs have
-passed structures by reference, but implicit allocation on
-the stack is easier to use.
-Structure passing is a relatively new feature of C
-(it is not in the 
-standard reference manual for C<sup>14</sup>), and is poorly supported in most
-commercial C compilers.
-It's convenient and expressive, though,
-and simplifies memory management by
-avoiding the allocator altogether
-and eliminating pointer aliases.
-</P>
-<H4>Data structures for manipulating files
-</H4>
-<br>&#32;<br>
-Experience with
-<TT>jim</TT>
-showed that the requirements
-of the file data structure were few, but strict.
-First, files need to be read and written quickly;
-adding a fresh file must be painless.
-Second, the implementation must place no arbitrary upper limit on
-the number or sizes of files.  (It should be practical to edit many files,
-and files up to megabytes in length should be handled gracefully.)
-This implies that files be stored on disc, not in main memory.
-(Aficionados of virtual memory may argue otherwise, but the
-implementation of virtual
-memory in our system is not something to depend on
-for good performance.)
-Third, changes to files need be made by only two primitives:
-deletion and insertion.
-These are inverses of each other,
-which simplifies the implementation of the undo operation.
-Finally,
-it must be easy and efficient to access the file, either
-forwards or backwards, a byte at a time.
-<P>
-The
-<TT>File</TT>
-data type is constructed from three simpler data structures that hold arrays
-of characters.
-Each of these types has an insertion and deletion operator, and the
-insertion and deletion operators of the
-<TT>File</TT>
-type itself are constructed from them.
-</P>
-<P>
-The simplest type is the
-<TT>String</TT>,
-which is used to hold strings in main memory.
-The code that manages
-<TT>Strings</TT>
-guarantees that they will never be longer
-than some moderate size, and in practice they are rarely larger than 8 Kbytes.
-<TT>Strings</TT>
-have two purposes: they hold short strings like file names with little overhead,
-and because they are deliberately small, they are efficient to modify.
-They are therefore used as the data structure for in-memory caches.
-</P>
-<P>
-The disc copy of the file is managed by a data structure called a
-<TT>Disc</TT>,
-which corresponds to a temporary file.  A
-<TT>Disc</TT>
-has no storage in main memory other than bookkeeping information;
-the actual data being held is all on the disc.
-To reduce the number of open files needed,
-<TT>sam</TT>
-opens a dozen temporary UNIX files and multiplexes the
-<TT>Discs</TT>
-upon them.
-This permits many files to
-be edited; the entire
-<TT>sam</TT>
-source (48 files) may be edited comfortably with a single
-instance of
-<TT>sam</TT>.
-Allocating one temporary file per
-<TT>Disc</TT>
-would strain the operating system's limit on the number of open files.
-Also, spreading the traffic among temporary files keeps the files shorter,
-and shorter files are more efficiently implemented by the UNIX
-I/O subsystem.
-</P>
-<P>
-A
-<TT>Disc</TT>
-is an array of fixed-length blocks, each of which contains
-between 1 and 4096 characters of active data.
-(The block size of our UNIX file system is 4096 bytes.)
-The block addresses within the temporary file and the length of each
-block are stored in a
-<TT>List</TT>.
-When changes are made the live part of blocks may change size.
-Blocks are created and coalesced when necessary to try to keep the sizes
-between 2048 and 4096 bytes.
-An actively changing part of the
-<TT>Disc</TT>
-therefore typically has about a kilobyte of slop that can be
-inserted or deleted
-without changing more than one block or affecting the block order.
-When an insertion would overflow a block, the block is split, a new one
-is allocated to receive the overflow, and the memory-resident list of blocks
-is rearranged to reflect the insertion of the new block.
-</P>
-<P>
-Obviously, going to the disc for every modification to the file is
-prohibitively expensive.
-The data type
-<TT>Buffer</TT>
-consists of a
-<TT>Disc</TT>
-to hold the data and a
-<TT>String</TT>
-that acts as a cache.
-This is the first of a series of caches throughout the data structures in
-<TT>sam.</TT>
-The caches not only improve performance, they provide a way to organize
-the flow of data, particularly in the communication between the host
-and terminal.
-This idea is developed below, in the section on communications.
-</P>
-<P>
-To reduce disc traffic, changes to a
-<TT>Buffer</TT>
-are mediated by a variable-length string, in memory, that acts as a cache.
-When an insertion or deletion is made to a
-<TT>Buffer</TT>,
-if the change can be accommodated by the cache, it is done there.
-If the cache becomes bigger than a block because of an insertion,
-some of it is written to the
-<TT>Disc</TT>
-and deleted from the cache.
-If the change does not intersect the cache, the cache is flushed.
-The cache is only loaded at the new position if the change is smaller than a block;
-otherwise, it is sent directly to the
-<TT>Disc</TT>.
-This is because
-large changes are typically sequential,
-whereupon the next change is unlikely to overlap the current one.
-</P>
-<P>
-A
-<TT>File</TT>
-comprises a
-<TT>String</TT>
-to hold the file name and some ancillary data such as dot and the modified bit.
-The most important components, though, are a pair of
-<TT>Buffers</TT>,
-one called the transcript and the other the contents.
-Their use is described in the next section.
-</P>
-<P>
-The overall structure is shown in Figure 5.
-Although it may seem that the data is touched many times on its
-way from the
-<TT>Disc</TT>,
-it is read (by one UNIX system call) directly into the cache of the
-associated
-<TT>Buffer</TT>;
-no extra copy is done.
-Similarly, when flushing the cache, the text is written
-directly from the cache to disc.
-Most operations act directly on the text in the cache.
-A principle applied throughout
-<TT>sam</TT>
-is that the fewer times the data is copied, the faster the program will run
-(see also the paper by Waite<sup>15</sup>).
-<DL><DT><DD><TT><PRE>
-<br><img src="-.11766.gif"><br>
-<br>
-</PRE></TT></DL>
-<I>Figure 5. File data structures.
-The temporary files are stored in the standard repository for such files
-on the host system.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-The contents of a
-<TT>File</TT>
-are accessed by a routine that
-copies to a buffer a substring of a file starting at a specified offset.
-To read a byte at a time, a
-per-<TT>File</TT>
-array is loaded starting from a specified initial position,
-and bytes may then be read from the array.
-The implementation is done by a macro similar to the C standard I/O
-<TT>getc</TT>
-macro.<sup>14</sup>
-Because the reading may be done at any address, a minor change to the
-macro allows the file to be read backwards.
-This array is read-only; there is no
-<TT>putc</TT>.
-</P>
-<H4>Doing and undoing
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-has an unusual method for managing changes to files.
-The command language makes it easy to specify multiple variable-length changes
-to a file millions of bytes long, and such changes
-must be made efficiently if the editor is to be practical.
-The usual techniques for inserting and deleting strings
-are inadequate under these conditions.
-The
-<TT>Buffer</TT>
-and
-<TT>Disc</TT>
-data structures are designed for efficient random access to long strings,
-but care must be taken to avoid super-linear behavior when making
-many changes simultaneously.
-<P>
-<TT>Sam</TT>
-uses a two-pass algorithm for making changes, and treats each file as a database
-against which transactions are registered.
-Changes are not made directly to the contents.
-Instead, when a command is started, a `mark' containing
-a sequence number is placed in the transcript
-<TT>Buffer</TT>,
-and each change made to the file, either an insertion or deletion
-or a change to the file name,
-is appended to the end of the transcript.
-When the command is complete, the transcript is rewound to the
-mark and applied to the contents.
-</P>
-<P>
-One reason for separating evaluation from
-application in this way is to simplify tracking the addresses of changes
-made in the middle of a long sequence.
-The two-pass algorithm also allows all changes to apply to the
-<I>original</I>
-data: no change can affect another change made in the same command.
-This is particularly important when evaluating an
-<TT>x</TT>
-command because it prevents regular expression matches
-from stumbling over changes made earlier in the execution.
-Also, the two-pass
-algorithm is cleaner than the way other UNIX editors allow changes to
-affect each other;
-for example,
-<TT>ed</TT>'s
-idioms to do things like delete every other line
-depend critically on the implementation.
-Instead,
-<TT>sam</TT>'s
-simple model, in which all changes in a command occur effectively
-simultaneously, is easy to explain and to understand.
-</P>
-<P>
-The records in the transcript are of the form ``delete substring from
-locations
-123 to 456'' and ``insert 11 characters `hello there' at location 789.''
-(It is an error if the changes are not at monotonically greater
-positions through the file.)
-While the update is occurring, these numbers must be
-offset by earlier changes, but that is straightforward and
-local to the update routine;
-moreover, all the numbers have been computed
-before the first is examined.
-</P>
-<P>
-Treating the file as a transaction system has another advantage:
-undo is trivial.
-All it takes is to invert the transcript after it has been
-implemented, converting insertions
-into deletions and vice versa, and saving them in a holding
-<TT>Buffer</TT>.
-The `do' transcript can then be deleted from
-the transcript
-<TT>Buffer</TT>
-and replaced by the `undo' transcript.
-If an undo is requested, the transcript is rewound and the undo transcript
-executed.
-Because the transcript
-<TT>Buffer</TT>
-is not truncated after each command, it accumulates
-successive changes.
-A sequence of undo commands
-can therefore back up the file arbitrarily,
-which is more helpful than the more commonly implemented self-inverse form of undo.
-(<TT>Sam</TT>
-provides no way to undo an undo, but if it were desired,
-it would be easy to provide by re-interpreting the `do' transcript.)
-Each mark in the transcript contains a sequence number and the offset into
-the transcript of the previous mark, to aid in unwinding the transcript.
-Marks also contain the value of dot and the modified bit so these can be
-restored easily.
-Undoing multiple files is easy; it merely demands undoing all files whose
-latest change has the same sequence number as the current file.
-</P>
-<P>
-Another benefit of having a transcript is that errors encountered in the middle
-of a complicated command need not leave the files in an intermediate state.
-By rewinding the transcript to the mark beginning the command,
-the partial command can be trivially undone.
-</P>
-<P>
-When the update algorithm was first implemented, it was unacceptably slow,
-so a cache was added to coalesce nearby changes,
-replacing multiple small changes by a single larger one.
-This reduced the number
-of insertions into the transaction
-<TT>Buffer</TT>,
-and made a dramatic improvement in performance,
-but made it impossible
-to handle changes in non-monotonic order in the file; the caching method
-only works if changes don't overlap.
-Before the cache was added, the transaction could in principle be sorted
-if the changes were out of order, although
-this was never done.
-The current status is therefore acceptable performance with a minor
-restriction on global changes, which is sometimes, but rarely, an annoyance.
-</P>
-<P>
-The update algorithm obviously paws the data more than simpler
-algorithms, but it is not prohibitively expensive;
-the caches help.
-(The principle of avoiding copying the data is still honored here,
-although not as piously:
-the data is moved from contents' cache to
-the transcript's all at once and through only one internal buffer.)
-Performance figures confirm the efficiency.
-To read from a dead start a hundred kilobyte file on a VAX-11/750
-takes 1.4 seconds of user time, 2.5 seconds of system time,
-and 5 seconds of real time.
-Reading the same file in
-<TT>ed</TT>
-takes 6.0 seconds of user time, 1.7 seconds of system time,
-and 8 seconds of real time.
-<TT>Sam</TT>
-uses about half the CPU time.
-A more interesting example is the one stated above:
-inserting a character between every pair of characters in the file.
-The
-<TT>sam</TT>
-command is
-<DL><DT><DD><TT><PRE>
-,y/@/ a/x/
-</PRE></TT></DL>
-and takes 3 CPU seconds per kilobyte of input file, of which
-about a third is spent in the regular expression code.
-This translates to about 500 changes per second.
-<TT>Ed</TT>
-takes 1.5 seconds per kilobyte to make a similar change (ignoring newlines),
-but cannot undo it.
-The same example in
-<TT>ex</TT>,<sup>9</sup>
-a variant of
-<TT>ed</TT>
-done at the University of California at Berkeley,
-which allows one level of undoing, again takes 3 seconds.
-In summary,
-<TT>sam</TT>'s
-performance is comparable to that of other UNIX editors, although it solves
-a harder problem.
-</P>
-<H4>Communications
-</H4>
-<br>&#32;<br>
-The discussion so far has described the implementation of the host part of
-<TT>sam</TT>;
-the next few sections explain how a machine with mouse and bitmap display
-can be engaged to improve interaction.
-<TT>Sam</TT>
-is not the first editor to be written as two processes,<sup>16</sup>
-but its implementation
-has some unusual aspects.
-<P>
-There are several ways
-<TT>sam</TT>'s
-host and terminal parts may be connected.
-The first and simplest is to forgo the terminal part and use the host
-part's command language to edit text on an ordinary terminal.
-This mode is invoked by starting
-<TT>sam</TT>
-with the
-<TT>-d</TT>
-option.
-With no options,
-<TT>sam</TT>
-runs separate host and terminal programs,
-communicating with a message protocol over the physical
-connection that joins them.
-Typically, the connection is an RS-232 link between a Blit
-(the prototypical display for
-<TT>sam</TT>)
-and a host running
-the Ninth Edition of the UNIX operating system.<sup>8</sup>
-(This is the version of the system used in the Computing Sciences Research
-Center at AT&amp;T Bell Laboratories [now Lucent Technologies, Bell Labs], where I work.  Its relevant
-aspects are discussed in the Blit paper.<sup>1</sup>)
-The implementation of
-<TT>sam</TT>
-for the SUN computer runs both processes on the same machine and
-connects them by a pipe.
-</P>
-<P>
-The low bandwidth of an RS-232 link
-necessitated the split between
-the two programs.
-The division is a mixed blessing:
-a program in two parts is much harder to write and to debug
-than a self-contained one,
-but the split makes several unusual configurations possible.
-The terminal may be physically separated from the host, allowing the conveniences
-of a mouse and bitmap display to be taken home while leaving the files at work.
-It is also possible to run the host part on a remote machine:
-<DL><DT><DD><TT><PRE>
-sam -r host
-</PRE></TT></DL>
-connects to the terminal in the usual way, and then makes a call
-across the network to establish the host part of
-<TT>sam</TT>
-on the named machine.
-Finally, it cross-connects the I/O to join the two parts.
-This allows
-<TT>sam</TT>
-to be run on machines that do not support bitmap displays;
-for example,
-<TT>sam</TT>
-is the editor of choice on our Cray X-MP/24.
-<TT>Sam</TT>
-<TT>-r</TT>
-involves
-<I>three</I>
-machines: the remote host, the terminal, and the local host.
-The local host's job is simple but vital: it passes the data
-between the remote host and terminal.
-</P>
-<P>
-The host and terminal exchange messages asynchronously
-(rather than, say, as remote procedure calls) but there is no
-error detection or correction
-because, whatever the configuration, the connection is reliable.
-Because the terminal handles mundane interaction tasks such as
-popping up menus and interpreting the responses, the messages are about
-data, not actions.
-For example, the host knows nothing about what is displayed on the screen,
-and when the user types a character, the message sent to the host says
-``insert a one-byte string at location 123 in file 7,'' not ``a character
-was typed at the current position in the current file.''
-In other words, the messages look very much like the transaction records
-in the transcripts.
-</P>
-<P>
-Either the host or terminal part of
-<TT>sam</TT>
-may initiate a change to a file.
-The command language operates on the host, while typing and some
-mouse operations are executed directly in the terminal to optimize response.
-Changes initiated by the host program must be transmitted to the terminal,
-and
-vice versa.
-(A token is exchanged to determine which end is in control,
-which means that characters typed while a time-consuming command runs
-must be buffered and do not appear until the command is complete.)
-To maintain consistent information,
-the host and terminal track changes through a per-file
-data structure that records what portions of the file
-the terminal has received.
-The data structure, called a
-<TT>Rasp</TT>
-(a weak pun: it's a file with holes)
-is held and updated by both the host and terminal.
-A
-<TT>Rasp</TT>
-is a list of
-<TT>Strings</TT>
-holding those parts of the file known to the terminal,
-separated by counts of the number of bytes in the interstices.
-Of course, the host doesn't keep a separate copy of the data (it only needs
-the lengths of the various pieces),
-but the structure is the same on both ends.
-</P>
-<P>
-The
-<TT>Rasp</TT>
-in the terminal doubles as a cache.
-Since the terminal keeps the text for portions of the file it has displayed,
-it need not request data from the host when revisiting old parts of the file
-or redrawing obscured windows, which speeds things up considerably
-over low-speed links.
-</P>
-<P>
-It's trivial for the terminal to maintain its
-<TT>Rasp</TT>,
-because all changes made on the terminal apply to parts of the file
-already loaded there.
-Changes made by the host are compared against the
-<TT>Rasp</TT>
-during the update sequence after each command.
-Small changes to pieces of the file loaded in the terminal
-are sent in their entirety.
-Larger changes, and changes that fall entirely in the holes,
-are transmitted as messages without literal data:
-only the lengths of the deleted and inserted strings are transmitted.
-When a command is completed, the terminal examines its visible
-windows to see if any holes in their
-<TT>Rasps</TT>
-intersect the visible portion of the file.
-It then requests the missing data from the host,
-along with up to 512 bytes of surrounding data, to minimize
-the number of messages when visiting a new portion of the file.
-This technique provides a kind of two-level lazy evaluation for the terminal.
-The first level sends a minimum of information about
-parts of the file not being edited interactively;
-the second level waits until a change is displayed before
-transmitting the new data.
-Of course,
-performance is also helped by having the terminal respond immediately to typing
-and simple mouse requests.
-Except for small changes to active pieces of the file, which are
-transmitted to the terminal without negotiation,
-the terminal is wholly responsible for deciding what is displayed;
-the host uses the
-<TT>Rasp</TT>
-only to tell the terminal what might be relevant.
-</P>
-<P>
-When a change is initiated by the host,
-the messages to the terminal describing the change
-are generated by the routine that applies the transcript of the changes
-to the contents of the
-<TT>File</TT>.
-Since changes are undone by the same update routine,
-undoing requires
-no extra code in the communications;
-the usual messages describing changes to the file are sufficient
-to back up the screen image.
-</P>
-<P>
-The
-<TT>Rasp</TT>
-is a particularly good example of the way caches are used in
-<TT>sam</TT>.
-First, it facilitates access to the active portion of the text by placing
-the busy text in main memory.
-In so doing, it provides efficient access
-to a large data structure that does not fit in memory.
-Since the form of data is to be imposed by the user, not by the program,
-and because characters will frequently be scanned sequentially,
-files are stored as flat objects.
-Caches help keep performance good and linear when working with such
-data.
-</P>
-<P>
-Second, the
-<TT>Rasp</TT>
-and several of the other caches have some
-<I>read-ahead;</I>
-that is, the cache is loaded with more information than is needed for
-the job immediately at hand.
-When manipulating linear structures, the accesses are usually sequential,
-and read-ahead can significantly reduce the average time to access the
-next element of the object.
-Sequential access is a common mode for people as well as programs;
-consider scrolling through a document while looking for something.
-</P>
-<P>
-Finally, like any good data structure,
-the cache guides the algorithm, or at least the implementation.
-The
-<TT>Rasp</TT>
-was actually invented to control the communications between the host and
-terminal parts, but I realized very early that it was also a form of
-cache.  Other caches were more explicitly intended to serve a double
-purpose: for example, the caches in
-<TT>Files</TT>
-that coalesce updates not only reduce traffic to the
-transcript and contents
-<TT>Buffers</TT>,
-they also clump screen updates so that complicated changes to the
-screen are achieved in
-just a few messages to the terminal.
-This saved me considerable work: I did not need to write special
-code to optimize the message traffic to the
-terminal.
-Caches pay off in surprising ways.
-Also, they tend to be independent, so their performance improvements
-are multiplicative.
-</P>
-<H4>Data structures in the terminal
-</H4>
-<br>&#32;<br>
-The terminal's job is to display and to maintain a consistent image of
-pieces of the files being edited.
-Because the text is always in memory, the data structures are
-considerably simpler than those in the host part.
-<P>
-<TT>Sam</TT>
-typically has far more windows than does
-<TT>mux</TT>,
-the window system within which its Blit implementation runs.
-<TT>Mux</TT>
-has a fairly small number of asynchronously updated windows;
-<TT>sam</TT>
-needs a large number of synchronously updated windows that are
-usually static and often fully obscured.
-The different tradeoffs guided
-<TT>sam</TT>
-away from the memory-intensive implementation of windows, called
-<TT>Layers</TT>,<sup>17</sup>
-used in
-<TT>mux.</TT>
-Rather than depending on a complete bitmap image of the display for each window,
-<TT>sam</TT>
-regenerates the image from its in-memory text
-(stored in the
-<TT>Rasp</TT>)
-when necessary, although it will use such an image if it is available.
-Like
-<TT>Layers</TT>,
-though,
-<TT>sam</TT>
-uses the screen bitmap as active storage in which to update the image using
-<TT>bitblt</TT>.<sup>18,19</sup>
-The resulting organization, pictured in Figure 6,
-has a global array of windows, called
-<TT>Flayers</TT>,
-each of which holds an image of a piece of text held in a data structure
-called a
-<TT>Frame</TT>,
-which in turn represents
-a rectangular window full of text displayed in some
-<TT>Bitmap</TT>.
-Each
-<TT>Flayer</TT>
-appears in a global list that orders them all front-to-back
-on the display, and simultaneously as an element of a per-file array
-that holds all the open windows for that file.
-The complement in the terminal of the
-<TT>File</TT>
-on the host is called a
-<TT>Text</TT>;
-each connects its
-<TT>Flayers</TT>
-to the associated
-<TT>Rasp</TT>.
-<DL><DT><DD><TT><PRE>
-<br><img src="-.11767.gif"><br>
-<br>
-</PRE></TT></DL>
-<I>Figure 6. Data structures in the terminal.
-</I><TT>Flayers</TT><I>
-are also linked together into a front-to-back list.
-</I><TT>Boxes</TT><I>
-are discussed in the next section.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-The
-<TT>Bitmap</TT>
-for a
-<TT>Frame</TT>
-contains the image of the text.
-For a fully visible window, the
-<TT>Bitmap</TT>
-will be the screen (or at least the
-<TT>Layer</TT>
-in which
-<TT>sam</TT>
-is being run),
-while for partially obscured windows the
-<TT>Bitmap</TT>
-will be off-screen.
-If the window is fully obscured, the
-<TT>Bitmap</TT>
-will be null.
-</P>
-<P>
-The
-<TT>Bitmap</TT>
-is a kind of cache.
-When making changes to the display, most of the original image will
-look the same in the final image, and the update algorithms exploit this.
-The
-<TT>Frame</TT>
-software updates the image in the
-<TT>Bitmap</TT>
-incrementally; the
-<TT>Bitmap</TT>
-is not just an image, it is a data structure.<sup>18,19</sup>
-The job of the software that updates the display is therefore
-to use as much as possible of the existing image (converting the
-text from ASCII characters to pixels is expensive) in a sort of two-dimensional
-string insertion algorithm.
-The details of this process are described in the next section.
-</P>
-<P>
-The
-<TT>Frame</TT>
-software has no code to support overlapping windows;
-its job is to keep a single
-<TT>Bitmap</TT>
-up to date.
-It falls to the
-<TT>Flayer</TT>
-software to multiplex the various
-<TT>Bitmaps</TT>
-onto the screen.
-The problem of maintaining overlapping
-<TT>Flayers</TT>
-is easier than for
-<TT>Layers</TT><sup>17</sup>
-because changes are made synchronously and because the contents of the window
-can be reconstructed from the data stored in the
-<TT>Frame</TT>;
-the
-<TT>Layers</TT>
-software
-makes no such assumptions.
-In
-<TT>sam</TT>,
-the window being changed is almost always fully visible, because the current
-window is always fully visible, by construction.
-However, when multi-file changes are being made, or when
-more than one window is open on a file,
-it may be necessary to update partially obscured windows.
-</P>
-<P>
-There are three cases: the window is 
-fully visible, invisible (fully obscured), or partially visible.
-If fully visible, the
-<TT>Bitmap</TT>
-is part of the screen, so when the
-<TT>Flayer</TT>
-update routine calls the
-<TT>Frame</TT>
-update routine, the screen will be updated directly.
-If the window is invisible,
-there is no associated
-<TT>Bitmap</TT>,
-and all that is necessary is to update the
-<TT>Frame</TT>
-data structure, not the image.
-If the window is partially visible, the
-<TT>Frame</TT>
-routine is called to update the image in the off-screen
-<TT>Bitmap</TT>,
-which may require regenerating it from the text of the window.
-The
-<TT>Flayer</TT>
-code then clips this
-<TT>Bitmap</TT>
-against the
-<TT>Bitmaps</TT>
-of all
-<TT>Frames</TT>
-in front of the
-<TT>Frame</TT>
-being modified, and the remainder is copied to the display.
-</P>
-<P>
-This is much faster than recreating the image off-screen
-for every change, or clipping all the changes made to the image
-during its update.
-Unfortunately, these caches can also consume prohibitive amounts of
-memory, so they are freed fairly liberally &#173; after every change to the
-front-to-back order of the
-<TT>Flayers</TT>.
-The result is that
-the off-screen
-<TT>Bitmaps</TT>
-exist only while multi-window changes are occurring,
-which is the only time the performance improvement they provide is needed.
-Also, the user interface causes fully-obscured windows to be the
-easiest to make &#173;
-creating a canonically sized and placed window requires only a button click
-&#173; which reduces the need for caching still further.
-</P>
-<P>
-</P>
-<H4>Screen update
-</H4>
-<br>&#32;<br>
-Only two low-level primitives are needed for incremental update:
-<TT>bitblt</TT>,
-which copies rectangles of pixels, and
-<TT>string</TT>
-(which in turn calls
-<TT>bitblt</TT>),
-which draws a null-terminated character string in a
-<TT>Bitmap</TT>.
-A
-<TT>Frame</TT>
-contains a list of
-<TT>Boxes</TT>,
-each of which defines a horizontal strip of text in the window
-(see Figure 7).
-A
-<TT>Box</TT>
-has a character string
-<TT>str</TT>,
-and a
-<TT>Rectangle</TT>
-<TT>rect</TT>
-that defines the location of the strip in the window.
-(The text in
-<TT>str</TT>
-is stored in the
-<TT>Box</TT>
-separately from the
-<TT>Rasp</TT>
-associated with the window's file, so
-<TT>Boxes</TT>
-are self-contained.)
-The invariant is that
-the image of the
-<TT>Box</TT>
-can be reproduced by calling
-<TT>string</TT>
-with argument
-<TT>str</TT>
-to draw the string in
-<TT>rect</TT>,
-and the resulting picture fits perfectly within
-<TT>rect</TT>.
-In other words, the
-<TT>Boxes</TT>
-define the tiling of the window.
-The tiling may be complicated by long lines of text, which
-are folded onto the next line.
-Some editors use horizontal scrolling to avoid this complication,
-but to be comfortable this technique requires that lines not be
-<I>too</I>
-long;
-<TT>sam</TT>
-has no such restriction.
-Also, and perhaps more importantly, UNIX programs and terminals traditionally fold
-long lines to make their contents fully visible.
-<P>
-Two special kinds of
-<TT>Boxes</TT>
-contain a single
-character: either a newline or a tab.
-Newlines and tabs are white space.
-A newline
-<TT>Box</TT>
-always extends to the right edge of the window,
-forcing the following
-<TT>Box</TT>
-to the next line.
-The width of a tab depends on where it is located:
-it forces the next
-<TT>Box</TT>
-to begin at a tab location.
-Tabs also
-have a minimum width equivalent to a blank (blanks are
-drawn by
-<TT>string</TT>
-and are not treated specially); newlines have a minimum width of zero.
-<DL><DT><DD><TT><PRE>
-<br><img src="-.11768.gif"><br>
-<br>&#32;<br>
-<br>
-</PRE></TT></DL>
-<I>Figure 7. A line of text showing its
-</I><TT>Boxes</TT><I>.
-The first two blank
-</I><TT>Boxes</TT><I>
-contain tabs; the last contains a newline.
-Spaces are handled as ordinary characters.
-<br>
-<DL><DT><DD><TT><PRE>
-</I><br>&#32;<br>
-</PRE></TT></DL>
-</P>
-<P>
-The update algorithms always use the
-<TT>Bitmap</TT>
-image of the text (either the display or cache
-<TT>Bitmap</TT>);
-they never examine the characters within a
-<TT>Box</TT>
-except when the
-<TT>Box</TT>
-needs to be split in two.
-Before a change, the window consists of a tiling of
-<TT>Boxes</TT>;
-after the change the window is tiled differently.
-The update algorithms rearrange the tiles in place, without
-backup storage.
-The algorithms are not strictly optimal &#173; for example, they can
-clear a pixel that is later going to be written upon &#173;
-but they never move a tile that doesn't need to be moved,
-and they move each tile at most once.
-<TT>Frinsert</TT>
-on a Blit can absorb over a thousand characters a second if the strings
-being inserted are a few tens of characters long.
-</P>
-<P>
-Consider
-<TT>frdelete</TT>.
-Its job is to delete a substring from a
-<TT>Frame</TT>
-and restore the image of the
-<TT>Frame</TT>.
-The image of a substring has a peculiar shape (see Figure 2) comprising
-possibly a partial line,
-zero or more full lines,
-and possibly a final partial line.
-For reference, call this the
-Z-shape.
-<TT>Frdelete</TT>
-begins by splitting, if necessary, the
-<TT>Boxes</TT>
-containing the ends of
-the substring so the substring begins and ends on
-<TT>Box</TT>
-boundaries.
-Because the substring is being deleted, its image is not needed,
-so the Z-shape is then cleared.
-Then, tiles (that is, the images of
-<TT>Boxes</TT>)
-are copied, using
-<TT>bitblt</TT>,
-from immediately after the Z-shape to
-the beginning of the Z-shape,
-resulting in a new Z-shape.
-(<TT>Boxes</TT>
-whose contents would span two lines in the new position must first be split.)
-</P>
-<P>
-Copying the remainder of the
-<TT>Frame</TT>
-tile by tile
-this way will clearly accomplish the deletion but eventually,
-typically when the copying algorithm encounters a tab or newline,
-the old and new
-<TT>x</TT>
-coordinates of the tile
-to be copied are the same.
-This correspondence implies
-that the Z-shape has its beginning and ending edges aligned
-vertically, and a sequence of at most two
-<TT>bitblts</TT>
-can be used to copy the remaining tiles.
-The last step is to clear out the resulting empty space at the bottom
-of the window;
-the number of lines to be cleared is the number of complete lines in the
-Z-shape closed by the final
-<TT>bitblts.</TT>
-The final step is to merge horizontally adjacent
-<TT>Boxes</TT>
-of plain text.
-The complete source to
-<TT>frdelete</TT>
-is less than 100 lines of C.
-</P>
-<P>
-<TT>frinsert</TT>
-is more complicated because it must do four passes:
-one to construct the
-<TT>Box</TT>
-list for the inserted string,
-one to reconnoitre,
-one to copy (in opposite order to
-<TT>frdelete</TT>)
-the
-<TT>Boxes</TT>
-to make the hole for the new text,
-and finally one to copy the new text into place.
-Overall, though,
-<TT>frinsert</TT>
-has a similar flavor to
-<TT>frdelete</TT>,
-and needn't be described further.
-<TT>Frinsert</TT>
-and its subsidiary routines comprise 211 lines of C.
-</P>
-<P>
-The terminal source code is 3024 lines of C,
-and the host source is 5797 lines.
-</P>
-<H4>Discussion
-</H4>
-<H4>History
-</H4>
-<br>&#32;<br>
-The immediate ancestor of
-<TT>sam</TT>
-was the original text editor for the Blit, called
-<TT>jim</TT>.
-<TT>Sam</TT>
-inherited
-<TT>jim</TT>'s
-two-process structure and mouse language almost unchanged, but
-<TT>jim</TT>
-suffered from several drawbacks that were addressed in the design of
-<TT>sam</TT>.
-The most important of these was the lack of a command language.
-Although
-<TT>jim</TT>
-was easy to use for simple editing, it provided no direct help with
-large or repetitive editing tasks.  Instead, it provided a command to pass
-selected text through a shell pipeline,
-but this was no more satisfactory than could be expected of a stopgap measure.
-<P>
-<TT>Jim</TT>
-was written primarily as a vehicle for experimenting with a mouse-based
-interface to text, and the experiment was successful.
-<TT>Jim</TT>
-had some spin-offs:
-<TT>mux</TT>,
-the second window system for the Blit, is essentially a multiplexed
-version of the terminal part of
-<TT>jim</TT>;
-and the debugger
-<TT>pi</TT>'s
-user interface<sup>20</sup> was closely modeled on
-<TT>jim</TT>'s.
-But after a couple of years,
-<TT>jim</TT>
-had become difficult to maintain and limiting to use,
-and its replacement was overdue.
-</P>
-<P>
-I began the design of
-<TT>sam</TT>
-by asking
-<TT>jim</TT>
-customers what they wanted.
-This was probably a mistake; the answers were essentially a list of features
-to be found in other editors, which did not provide any of the
-guiding principles I was seeking.
-For instance, one common request was for a ``global substitute,''
-but no one suggested how to provide it within a cut-and-paste editor.
-I was looking for a scheme that would
-support such specialized features comfortably in the context of some
-general command language.
-Ideas were not forthcoming, though, particularly given my insistence
-on removing all limits on file sizes, line lengths and so on.
-Even worse, I recognized that, since the mouse could easily
-indicate a region of the screen that was not an integral number of lines,
-the command language would best forget about newlines altogether,
-and that meant the command language had to treat the file as a single
-string, not an array of lines.
-</P>
-<P>
-Eventually, I decided that thinking was not getting me very far and it was
-time to try building.
-I knew that the terminal part could be built easily &#173;
-that part of
-<TT>jim</TT>
-behaved acceptably well &#173; and that most of the hard work was going
-to be in the host part: the file interface, command interpreter and so on.
-Moreover, I had some ideas about how the architecture of
-<TT>jim</TT>
-could be improved without destroying its basic structure, which I liked
-in principle but which hadn't worked out as well as I had hoped.
-So I began by designing the file data structure,
-starting with the way
-<TT>jim</TT>
-worked &#173; comparable to a single structure merging
-<TT>Disc</TT>
-and
-<TT>Buffer</TT>,
-which I split to make the cache more general
-&#173; and thinking about how global substitute could be implemented.
-The answer was clearly that it had to be done in two passes,
-and the transcript-oriented implementation fell out naturally.
-</P>
-<P>
-<TT>Sam</TT>
-was written bottom-up,
-starting from the data structures and algorithms for manipulating text,
-through the command language and up to the code for maintaining
-the display.
-In retrospect, it turned out well, but this implementation method is
-not recommended in general.
-There were several times when I had a large body of interesting code
-assembled and no clue how to proceed with it.
-The command language, in particular, took almost a year to figure out,
-but can be implemented (given what was there at the beginning of that year)
-in a day or two.  Similarly, inventing the
-<TT>Rasp</TT>
-data structure delayed the
-connection of the host and terminal pieces by another few months.
-<TT>Sam</TT>
-took about two years to write, although only about four months were
-spent actually working on it.
-</P>
-<P>
-Part of the design process was unusual:
-the subset of the protocol that maintains the
-<TT>Rasp</TT>
-was simulated, debugged
-and verified by an automatic protocol analyzer,<sup>21</sup> and was bug-free
-from the start.
-The rest of the protocol, concerned mostly
-with keeping menus up to date,
-was unfortunately too unwieldy for such analysis,
-and was debugged by more traditional methods, primarily
-by logging in a file all messages in and out of the host.
-</P>
-<H4>Reflections
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-is essentially the only interactive editor used by the sixty or so members of
-the computing science research center in which I work.
-The same could not be said of
-<TT>jim</TT>;
-the lack of a command language kept some people from adopting it.
-The union of a user interface as comfortable as
-<TT>jim</TT>'s
-with a command language as powerful as
-<TT>ed</TT>'s&#191;
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> &#191;The people who criticize
-<TT>ed</TT>
-as an interactive program often forget that it and its close relative
-<TT>sed</TT><sup>7</sup>
-still thrive as programmable editors.  The strength of these programs is
-independent of their convenience for interactive editing.
-<br>
-</I><DT>&#32;<DD></dl>
-<br>
-is essential to
-<TT>sam</TT>'s
-success.
-When
-<TT>sam</TT>
-was first made available to the
-<TT>jim</TT>
-community,
-almost everyone switched to it within two or three days.
-In the months that followed, even people who had never adopted
-<TT>jim</TT>
-started using
-<TT>sam</TT>
-exclusively.
-<P>
-To be honest,
-<TT>ed</TT>
-still gets occasional use, but usually when
-something quick needs to be done and the overhead of
-downloading the terminal part of
-<TT>sam</TT>
-isn't worth the trouble.
-Also, as a `line' editor,
-<TT>sam</TT>
-<TT>-d</TT>
-is a bit odd;
-when using a good old ASCII terminal, it's comforting to have
-a true line editor.
-But it is fair to say that
-<TT>sam</TT>'s
-command language has displaced
-<TT>ed</TT>'s
-for most of the complicated editing that has kept line editors
-(that is, command-driven editors) with us.
-</P>
-<P>
-<TT>Sam</TT>'s
-command language is even fancier than
-<TT>ed</TT>'s,
-and most
-<TT>sam</TT>
-customers don't come near to using all its capabilities.
-Does it need to be so sophisticated?
-I think the answer is yes, for two reasons.
-</P>
-<P>
-First, the
-<I>model</I>
-for
-<TT>sam</TT>'s
-command language is really relatively simple, and certainly simpler than that of
-<TT>ed</TT>.
-For instance, there is only one kind of textual loop in
-<TT>sam</TT>
-&#173; the
-<TT>x</TT>
-command &#173;
-while
-<TT>ed</TT>
-has three (the
-<TT>g</TT>
-command, the global flag on substitutions, and the implicit loop over
-lines in multi-line substitutions).
-Also,
-<TT>ed</TT>'s
-substitute command is necessary to make changes within lines, but in
-<TT>sam</TT>
-the
-<TT>s</TT>
-command is more of a familiar convenience than a necessity;
-<TT>c</TT>
-and
-<TT>t</TT>
-can do all the work.
-</P>
-<P>
-Second,
-given a community that expects an editor to be about as powerful as
-<TT>ed</TT>,
-it's hard to see how
-<TT>sam</TT>
-could really be much simpler and still satisfy that expectation.
-People want to do ``global substitutes,'' and most are content
-to have the recipe for that and a few other fancy changes.
-The sophistication of the command language is really just a veneer
-over a design that makes it possible to do global substitutes
-in a screen editor.
-Some people will always want something more, however, and it's gratifying to
-be able to provide it.
-The real power of
-<TT>sam</TT>'s
-command language comes from composability of the operators, which is by
-nature orthogonal to the underlying model.
-In other words,
-<TT>sam</TT>
-is not itself complex, but it makes complex things possible.
-If you don't want to do anything complex, you can ignore the
-complexity altogether, and many people do so.
-</P>
-<P>
-Sometimes I am asked the opposite question: why didn't I just make
-<TT>sam</TT>
-a real programmable editor, with macros and variables and so on?
-The main reason is a matter of taste: I like the editor
-to be the same every time I use it.
-There is one technical reason, though:
-programmability in editors is largely a workaround for insufficient
-interactivity.
-Programmable editors are used to make particular, usually short-term,
-things easy to do, such as by providing shorthands for common actions.
-If things are generally easy to do in the first place,
-shorthands are not as helpful.
-<TT>Sam</TT>
-makes common editing operations very easy, and the solutions to
-complex editing problems seem commensurate with the problems themselves.
-Also, the ability to edit the
-<TT>sam</TT>
-window makes it easy to repeat commands &#173; it only takes a mouse button click
-to execute a command again.
-</P>
-<H4>Pros and cons
-</H4>
-<br>&#32;<br>
-<TT>Sam</TT>
-has several other good points,
-and its share of problems.
-Among the good things is the idea of
-structural regular expressions,
-whose usefulness has only begun to be explored.
-They were arrived at serendipitously when I attempted to distill the essence of
-<TT>ed</TT>'s
-way of doing global substitution and recognized that the looping command in
-<TT>ed</TT>
-was implicitly imposing a structure (an array of lines) on the file.
-<P>
-Another of
-<TT>sam</TT>'s
-good things is its undo capability.
-I had never before used an editor with a true undo,
-but I would never go back now.
-Undo
-<I>must</I>
-be done well, but if it is, it can be relied on.
-For example,
-it's safe to experiment if you're not sure how to write some intricate command,
-because if you make a mistake, it can be fixed simply and reliably.
-I learned two things about undo from writing
-<TT>sam</TT>:
-first, it's easy to provide if you design it in from the beginning, and
-second, it's necessary, particularly if the system has some subtle
-properties that may be unfamiliar or error-prone for users.
-</P>
-<P>
-<TT>Sam</TT>'s
-lack of internal limits and sizes is a virtue.
-Because it avoids all fixed-size tables and data structures,
-<TT>sam</TT>
-is able to make global changes to files that some of our other
-tools cannot even read.
-Moreover, the design keeps the performance linear when doing such
-operations, although I must admit
-<TT>sam</TT>
-does get slow when editing a huge file.
-</P>
-<P>
-Now, the problems.
-Externally, the most obvious is that it is poorly integrated into the
-surrounding window system.
-By design, the user interface in
-<TT>sam</TT>
-feels almost identical to that of
-<TT>mux</TT>,
-but a thick wall separates text in
-<TT>sam</TT>
-from the programs running in
-<TT>mux</TT>.
-For instance, the `snarf buffer' in
-<TT>sam</TT>
-must be maintained separately from that in
-<TT>mux</TT>.
-This is regrettable, but probably necessary given the unusual configuration
-of the system, with a programmable terminal on the far end of an RS-232 link.
-</P>
-<P>
-<TT>Sam</TT>
-is reliable; otherwise, people wouldn't use it.
-But it was written over such a long time, and has so many new (to me)
-ideas in it, that I would like to see it done over again to clean
-up the code and remove many of the lingering problems in the implementation.
-The worst part is in the interconnection of the host and terminal parts,
-which might even be able to go away in a redesign for a more
-conventional window system.
-The program must be split in two to use the terminal effectively,
-but the low bandwidth of the connection forces the separation to
-occur in an inconvenient part of the design if performance is to be acceptable.
-A simple remote procedure call
-protocol driven by the host, emitting only graphics
-commands, would be easy to write but wouldn't have nearly the
-necessary responsiveness.  On the other hand, if the terminal were in control
-and requested much simpler file services from the host, regular expression
-searches would require that the terminal read the entire file over its RS-232
-link, which would be unreasonably slow.
-A compromise in which either end can take control is necessary.
-In retrospect, the communications protocol should have been
-designed and verified formally, although I do not know of any tool
-that can adequately relate the protocol to
-its implementation.
-</P>
-<P>
-Not all of
-<TT>sam</TT>'s
-users are comfortable with its command language, and few are adept.
-Some (venerable) people use a sort of
-<TT>ed</TT>
-``
-subset'' of
-<TT>sam</TT>'s
-command language,
-and even ask why
-<TT>sam</TT>'s
-command language is not exactly
-<TT>ed</TT>'s.
-(The reason, of course, is that
-<TT>sam</TT>'s
-model for text does not include newlines, which are central to
-<TT>ed</TT>.
-Making the text an array of newlines to the command language would
-be too much of a break from the seamless model provided by the mouse.
-Some editors, such as
-<TT>vi</TT>,
-are willing to make this break, though.)
-The difficulty is that
-<TT>sam</TT>'s
-syntax is so close to
-<TT>ed</TT>'s
-that people believe it
-<I>should</I>
-be the same.
-I thought, with some justification in hindsight,
-that making
-<TT>sam</TT>
-similar to
-<TT>ed</TT>
-would make it easier to learn and to accept.
-But I may have overstepped and raised the users'
-expectations too much.
-It's hard to decide which way to resolve this problem.
-</P>
-<P>
-Finally, there is a tradeoff in
-<TT>sam</TT>
-that was decided by the environment in which it runs:
-<TT>sam</TT>
-is a multi-file editor, although in a different system there might instead be
-multiple single-file editors.
-The decision was made primarily because starting a new program in a Blit is
-time-consuming.
-If the choice could be made freely, however, I would
-still choose the multi-file architecture, because it allows
-groups of files to be handled as a unit;
-the usefulness of the multi-file commands is incontrovertible.
-It is delightful to have the source to an entire program
-available at your fingertips.
-</P>
-<H4>Acknowledgements
-</H4>
-<br>&#32;<br>
-Tom Cargill suggested the idea behind the
-<TT>Rasp</TT>
-data structure.
-Norman Wilson and Ken Thompson influenced the command language.
-This paper was improved by comments from
-Al Aho,
-Jon Bentley,
-Chris Fraser,
-Gerard Holzmann,
-Brian Kernighan,
-Ted Kowalski,
-Doug McIlroy
-and
-Dennis Ritchie.
-<H4>REFERENCES
-</H4>
-<P>
-</P>
-<DL COMPACT>
-<DT> 1.<DD>
-R. Pike,
-`The Blit: a multiplexed graphics terminal,'
-AT&amp;T Bell Labs. Tech. J.,
-<B>63</B>,
-(8),
-1607-1631 (1984).
-<DT> 2.<DD>
-L. Johnson,
-<I>MacWrite,</I>
-Apple Computer Inc., Cupertino, Calif. 1983.
-<DT> 3.<DD>
-B. Lampson,
-`Bravo Manual,'
-in
-Alto User's Handbook,
-pp. 31-62,
-Xerox Palo Alto Research Center,
-Palo Alto, Calif.
-1979.
-<DT> 4.<DD>
-W. Teitelman,
-`A tour through Cedar,'
-IEEE Software,
-<B>1</B>
-(2), 44-73 (1984).
-<DT> 5.<DD>
-J. Gutknecht,
-`Concepts of the text editor Lara,'
-Comm. ACM,
-<B>28</B>,
-(9),
-942-960 (1985).
-<DT> 6.<DD>
-Bell Telephone Laboratories,
-UNIX Programmer's Manual,
-Holt, Rinehart and Winston, New York 1983.
-<DT> 7.<DD>
-B. W. Kernighan and R. Pike,
-The Unix Programming Environment,
-Prentice-Hall, Englewood Cliffs, New Jersey 1984.
-<DT> 8.<DD>
-Unix Time-Sharing System Programmer's Manual, Research Version, Ninth Edition,
-Volume 1,
-AT&amp;T Bell Laboratories, Murray Hill, New Jersey 1986.
-<DT> 9.<DD>
-Unix Time-Sharing System Programmer's Manual, 4.1 Berkeley Software Distribution,
-Volumes 1 and 2C,
-University of California, Berkeley, Calif. 1981.
-<DT>10.<DD>
-R. Pike,
-`Structural Regular Expressions,'
-Proc. EUUG Spring Conf., Helsinki 1987,
-Eur. Unix User's Group, Buntingford, Herts, UK 1987.
-<DT>11.<DD>
-A. Goldberg,
-Smalltalk-80 &#191; The Interactive Programming Environment,
-Addison-Wesley, Reading, Mass. 1984.
-<DT>12.<DD>
-K. Thompson,
-`Regular expression search algorithm,'
-Comm. ACM,
-<B>11</B>,
-(6),
-419-422 (1968).
-<DT>13.<DD>
-A. V. Aho, J. E. Hopcroft and J. D. Ullman,
-The Design and Analysis of Computer Algorithms,
-Addison-Wesley, Reading, Mass. 1974.
-<DT>14.<DD>
-B. W. Kernighan and D. M. Ritchie,
-The C Programming Language,
-Prentice-Hall, Englewood Cliffs, New Jersey 1978.
-<DT>15.<DD>
-W. M. Waite,
-`The cost of lexical analysis,'
-Softw. Pract. Exp.,
-<B>16</B>,
-(5),
-473-488 (1986).
-<DT>16.<DD>
-C. W. Fraser,
-`A generalized text editor,'
-Comm. ACM,
-<B>23</B>,
-(3),
-154-158 (1980).
-<DT>17.<DD>
-R. Pike,
-`Graphics in overlapping bitmap layers,'
-ACM Trans. on Graph.,
-<B>2</B>,
-(2)
-135-160 (1983).
-<DT>18.<DD>
-L. J. Guibas and J. Stolfi,
-`A language for bitmap manipulation,'
-ACM Trans. on Graph.,
-<B>1</B>,
-(3),
-191-214 (1982).
-<DT>19.<DD>
-R. Pike, B. Locanthi and J. Reiser,
-`Hardware/software trade-offs for bitmap graphics on the Blit,'
-Softw. Pract. Exp.,
-<B>15</B>,
-(2),
-131-151 (1985).
-<DT>20.<DD>
-T. A. Cargill,
-`The feel of Pi,'
-Winter USENIX Conference Proceedings,
-Denver 1986,
-62-71,
-USENIX Assoc., El Cerrito, CA.
-<DT>21.<DD>
-G. J. Holzmann,
-`Tracing protocols,'
-AT&amp;T Tech. J.,
-<B>64</B>,
-(10),
-2413-2434 (1985).
-
-</dl>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 547
sys/doc/sleep.html

@@ -1,547 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Process Sleep and Wakeup on a Shared-memory Multiprocessor
-</H1>
-<DL><DD><I>Rob Pike<br>
-Dave Presotto<br>
-Ken Thompson<br>
-Gerard Holzmann<br>
-<br>&#32;<br>
-rob,presotto,ken,gerard@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Appeared in a slightly different form in
-Proceedings of the Spring 1991 EurOpen Conference,
-Troms&oslash;, Norway, 1991, pp. 161-166.
-</I><DT>&#32;<DD></dl>
-<br>
-The problem of enabling a `sleeping' process on a shared-memory multiprocessor
-is a difficult one, especially if the process is to be awakened by an interrupt-time
-event.  We present here the code
-for sleep and wakeup primitives that we use in our multiprocessor system.
-The code has been exercised by years of active use and by a verification
-system.
-</DL>
-<br>&#32;<br>
-Our problem is to synchronise processes on a symmetric shared-memory multiprocessor.
-Processes suspend execution, or
-<I>sleep,</I>
-while awaiting an enabling event such as an I/O interrupt.
-When the event occurs, the process is issued a
-<I>wakeup</I>
-to resume its execution.
-During these events, other processes may be running and other interrupts
-occurring on other processors.
-<br>&#32;<br>
-More specifically, we wish to implement subroutines called
-<TT>sleep</TT>,
-callable by a process to relinquish control of its current processor,
-and
-<TT>wakeup</TT>,
-callable by another process or an interrupt to resume the execution
-of a suspended process.
-The calling conventions of these subroutines will remain unspecified
-for the moment.
-<br>&#32;<br>
-We assume the processors have an atomic test-and-set or equivalent
-operation but no other synchronisation method.  Also, we assume interrupts
-can occur on any processor at any time, except on a processor that has
-locally inhibited them.
-<br>&#32;<br>
-The problem is the generalisation to a multiprocessor of a familiar
-and well-understood uniprocessor problem.  It may be reduced to a
-uniprocessor problem by using a global test-and-set to serialise the
-sleeps and wakeups,
-which is equivalent to synchronising through a monitor.
-For performance and cleanliness, however,
-we prefer to allow the interrupt handling and process control to be multiprocessed.
-<br>&#32;<br>
-Our attempts to solve the sleep/wakeup problem in Plan 9
-[Pik90]
-prompted this paper.
-We implemented solutions several times over several months and each
-time convinced ourselves &#173; wrongly &#173; they were correct.
-Multiprocessor algorithms can be
-difficult to prove correct by inspection and formal reasoning about them
-is impractical.  We finally developed an algorithm we trust by
-verifying our code using an
-empirical testing tool.
-We present that code here, along with some comments about the process by
-which it was designed.
-<H4>History
-</H4>
-<br>&#32;<br>
-Since processes in Plan 9 and the UNIX
-system have similar structure and properties, one might ask if
-UNIX
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-[Bac86]
-could not easily be adapted from their standard uniprocessor implementation
-to our multiprocessor needs.
-The short answer is, no.
-<br>&#32;<br>
-The
-UNIX
-routines
-take as argument a single global address
-that serves as a unique
-identifier to connect the wakeup with the appropriate process or processes.
-This has several inherent disadvantages.
-From the point of view of
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>,
-it is difficult to associate a data structure with an arbitrary address;
-the routines are unable to maintain a state variable recording the
-status of the event and processes.
-(The reverse is of course easy &#173; we could
-require the address to point to a special data structure &#173;
-but we are investigating
-UNIX
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>,
-not the code that calls them.)
-Also, multiple processes sleep `on' a given address, so
-<TT>wakeup</TT>
-must enable them all, and let process scheduling determine which process
-actually benefits from the event.
-This is inefficient;
-a queueing mechanism would be preferable
-but, again, it is difficult to associate a queue with a general address.
-Moreover, the lack of state means that
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-cannot know what the corresponding process (or interrupt) is doing;
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-must be executed atomically.
-On a uniprocessor it suffices to disable interrupts during their
-execution.
-On a multiprocessor, however,
-most processors
-can inhibit interrupts only on the current processor,
-so while a process is executing
-<TT>sleep</TT>
-the desired interrupt can come and go on another processor.
-If the wakeup is to be issued by another process, the problem is even harder.
-Some inter-process mutual exclusion mechanism must be used,
-which, yet again, is difficult to do without a way to communicate state.
-<br>&#32;<br>
-In summary, to be useful on a multiprocessor,
-UNIX
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-must either be made to run atomically on a single
-processor (such as by using a monitor)
-or they need a richer model for their communication.
-<H4>The design
-</H4>
-<br>&#32;<br>
-Consider the case of an interrupt waking up a sleeping process.
-(The other case, a process awakening a second process, is easier because
-atomicity can be achieved using an interlock.)
-The sleeping process is waiting for some event to occur, which may be
-modeled by a condition coming true.
-The condition could be just that the event has happened, or something
-more subtle such as a queue draining below some low-water mark.
-We represent the condition by a function of one
-argument of type
-<TT>void*</TT>;
-the code supporting the device generating the interrupts
-provides such a function to be used by
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-to synchronise.  The function returns
-<TT>false</TT>
-if the event has not occurred, and
-<TT>true</TT>
-some time after the event has occurred.
-The
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-routines must, of course, work correctly if the
-event occurs while the process is executing
-<TT>sleep</TT>.
-<br>&#32;<br>
-We assume that a particular call to
-<TT>sleep</TT>
-corresponds to a particular call to
-<TT>wakeup</TT>,
-that is,
-at most one process is asleep waiting for a particular event.
-This can be guaranteed in the code that calls
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-by appropriate interlocks.
-We also assume for the moment that there will be only one interrupt
-and that it may occur at any time, even before
-<TT>sleep</TT>
-has been called.
-<br>&#32;<br>
-For performance,
-we desire that multiple instances of
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-may be running simultaneously on our multiprocessor.
-For example, a process calling
-<TT>sleep</TT>
-to await a character from an input channel need not
-wait for another process to finish executing
-<TT>sleep</TT>
-to await a disk block.
-At a finer level, we would like a process reading from one input channel
-to be able to execute
-<TT>sleep</TT>
-in parallel with a process reading from another input channel.
-A standard approach to synchronisation is to interlock the channel `driver'
-so that only one process may be executing in the channel code at once.
-This method is clearly inadequate for our purposes; we need
-fine-grained synchronisation, and in particular to apply
-interlocks at the level of individual channels rather than at the level
-of the channel driver.
-<br>&#32;<br>
-Our approach is to use an object called a
-<I>rendezvous</I>,
-which is a data structure through which
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-synchronise.
-(The similarly named construct in Ada is a control structure;
-ours is an unrelated data structure.)
-A rendezvous
-is allocated for each active source of events:
-one for each I/O channel,
-one for each end of a pipe, and so on.
-The rendezvous serves as an interlockable structure in which to record
-the state of the sleeping process, so that
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-can communicate if the event happens before or while
-<TT>sleep</TT>
-is executing.
-<br>&#32;<br>
-Our design for
-<TT>sleep</TT>
-is therefore a function
-<DL><DT><DD><TT><PRE>
-void sleep(Rendezvous *r, int (*condition)(void*), void *arg)
-</PRE></TT></DL>
-called by the sleeping process.
-The argument
-<TT>r</TT>
-connects the call to
-<TT>sleep</TT>
-with the call to
-<TT>wakeup</TT>,
-and is part of the data structure for the (say) device.
-The function
-<TT>condition</TT>
-is described above;
-called with argument
-<TT>arg</TT>,
-it is used by
-<TT>sleep</TT>
-to decide whether the event has occurred.
-<TT>Wakeup</TT>
-has a simpler specification:
-<DL><DT><DD><TT><PRE>
-void wakeup(Rendezvous *r).
-</PRE></TT></DL>
-<TT>Wakeup</TT>
-must be called after the condition has become true.
-<H4>An implementation
-</H4>
-<br>&#32;<br>
-The
-<TT>Rendezvous</TT>
-data type is defined as
-<DL><DT><DD><TT><PRE>
-typedef struct{
-	Lock	l;
-	Proc	*p;
-}Rendezvous;
-</PRE></TT></DL>
-Our
-<TT>Locks</TT>
-are test-and-set spin locks.
-The routine
-<TT>lock(Lockr</TT>*l)
-eturns when the current process holds that lock;
-<TT>unlock(Lockr</TT>*l)
-eleases the lock.
-<br>&#32;<br>
-Here is our implementation of
-<TT>sleep</TT>.
-Its details are discussed below.
-<TT>Thisp</TT>
-is a pointer to the current process on the current processor.
-(Its value differs on each processor.)
-<DL><DT><DD><TT><PRE>
-void
-sleep(Rendezvous *r, int (*condition)(void*), void *arg)
-{
-	int s;
-
-	s = inhibit();		/* interrupts */
-	lock(&amp;r-&gt;l);
-
-	/*
-	 * if condition happened, never mind
-	 */
-	if((*condition)(arg)){	
-		unlock(&amp;r-&gt;l);
-		allow();	/* interrupts */
-		return;
-	}
-
-	/*
-	 * now we are committed to
-	 * change state and call scheduler
-	 */
-	if(r-&gt;p)
-		error("double sleep %d %d", r-&gt;p-&gt;pid, thisp-&gt;pid);
-	thisp-&gt;state = Wakeme;
-	r-&gt;p = thisp;
-	unlock(&amp;r-&gt;l);
-	allow(s);	/* interrupts */
-	sched();	/* relinquish CPU */
-}
-</PRE></TT></DL>
-Here is
-<TT>wakeup.</TT>
-<DL><DT><DD><TT><PRE>
-void
-wakeup(Rendezvous *r)
-{
-	Proc *p;
-	int s;
-
-	s = inhibit();	/* interrupts; return old state */
-	lock(&amp;r-&gt;l);
-	p = r-&gt;p;
-	if(p){
-		r-&gt;p = 0;
-		if(p-&gt;state != Wakeme)
-			panic("wakeup: not Wakeme");
-		ready(p);
-	}
-	unlock(&amp;r-&gt;l);
-	if(s)
-		allow();
-}
-</PRE></TT></DL>
-<TT>Sleep</TT>
-and
-<TT>wakeup</TT>
-both begin by disabling interrupts
-and then locking the rendezvous structure.
-Because
-<TT>wakeup</TT>
-may be called in an interrupt routine, the lock must be set only
-with interrupts disabled on the current processor,
-so that if the interrupt comes during
-<TT>sleep</TT>
-it will occur only on a different processor;
-if it occurred on the processor executing
-<TT>sleep</TT>,
-the spin lock in
-<TT>wakeup</TT>
-would hang forever.
-At the end of each routine, the lock is released and processor priority
-returned to its previous value.
-(<TT>Wakeup</TT>
-needs to inhibit interrupts in case
-it is being called by a process;
-this is a no-op if called by an interrupt.)
-<br>&#32;<br>
-<TT>Sleep</TT>
-checks to see if the condition has become true, and returns if so.
-Otherwise the process posts its name in the rendezvous structure where
-<TT>wakeup</TT>
-may find it, marks its state as waiting to be awakened
-(this is for error checking only) and goes to sleep by calling
-<TT>sched()</TT>.
-The manipulation of the rendezvous structure is all done under the lock,
-and
-<TT>wakeup</TT>
-only examines it under lock, so atomicity and mutual exclusion
-are guaranteed.
-<br>&#32;<br>
-<TT>Wakeup</TT>
-has a simpler job.  When it is called, the condition has implicitly become true,
-so it locks the rendezvous, sees if a process is waiting, and readies it to run.
-<H4>Discussion
-</H4>
-<br>&#32;<br>
-The synchronisation technique used here
-is similar to known methods, even as far back as Saltzer's thesis
-[Sal66].
-The code looks trivially correct in retrospect: all access to data structures is done
-under lock, and there is no place that things may get out of order.
-Nonetheless, it took us several iterations to arrive at the above
-implementation, because the things that
-<I>can</I>
-go wrong are often hard to see.  We had four earlier implementations
-that were examined at great length and only found faulty when a new,
-different style of device or activity was added to the system.
-<br>&#32;<br>
-Here, for example, is an incorrect implementation of wakeup,
-closely related to one of our versions.
-<DL><DT><DD><TT><PRE>
-void
-wakeup(Rendezvous *r)
-{
-	Proc *p;
-	int s;
-
-	p = r-&gt;p;
-	if(p){
-		s = inhibit();
-		lock(&amp;r-&gt;l);
-		r-&gt;p = 0;
-		if(p-&gt;state != Wakeme)
-			panic("wakeup: not Wakeme");
-		ready(p);
-		unlock(&amp;r-&gt;l);
-		if(s)
-			allow();
-	}
-}
-</PRE></TT></DL>
-The mistake is that the reading of
-<TT>r-&gt;p</TT>
-may occur just as the other process calls
-<TT>sleep</TT>,
-so when the interrupt examines the structure it sees no one to wake up,
-and the sleeping process misses its wakeup.
-We wrote the code this way because we reasoned that the fetch
-<TT>p</TT>
-<TT>=</TT>
-<TT>r-&gt;p</TT>
-was inherently atomic and need not be interlocked.
-The bug was found by examination when a new, very fast device
-was added to the system and sleeps and interrupts were closely overlapped.
-However, it was in the system for a couple of months without causing an error.
-<br>&#32;<br>
-How many errors lurk in our supposedly correct implementation above?
-We would like a way to guarantee correctness; formal proofs are beyond
-our abilities when the subtleties of interrupts and multiprocessors are
-involved.
-With that in mind, the first three authors approached the last to see
-if his automated tool for checking protocols
-[Hol91]
-could be
-used to verify our new
-<TT>sleep</TT>
-and
-<TT>wakeup</TT>
-for correctness.
-The code was translated into the language for that system
-(with, unfortunately, no way of proving that the translation is itself correct)
-and validated by exhaustive simulation.
-<br>&#32;<br>
-The validator found a bug.
-Under our assumption that there is only one interrupt, the bug cannot
-occur, but in the more general case of multiple interrupts synchronising
-through the same condition function and rendezvous,
-the process and interrupt can enter a peculiar state.
-A process may return from
-<TT>sleep</TT>
-with the condition function false
-if there is a delay between
-the condition coming true and
-<TT>wakeup</TT>
-being called,
-with the delay occurring
-just as the receiving process calls
-<TT>sleep</TT>.
-The condition is now true, so that process returns immediately,
-does whatever is appropriate, and then (say) decides to call
-<TT>sleep</TT>
-again.  This time the condition is false, so it goes to sleep.
-The wakeup process then finds a sleeping process,
-and wakes it up, but the condition is now false.
-<br>&#32;<br>
-There is an easy (and verified) solution: at the end of
-<TT>sleep</TT>
-or after
-<TT>sleep</TT>
-returns,
-if the condition is false, execute
-<TT>sleep</TT>
-again.  This re-execution cannot repeat; the second synchronisation is guaranteed
-to function under the external conditions we are supposing.
-<br>&#32;<br>
-Even though the original code is completely
-protected by interlocks and had been examined carefully by all of us
-and believed correct, it still had problems.
-It seems to us that some exhaustive automated analysis is
-required of multiprocessor algorithms to guarantee their safety.
-Our experience has confirmed that it is almost impossible to
-guarantee by inspection or simple testing the correctness
-of a multiprocessor algorithm.  Testing can demonstrate the presence
-of bugs but not their absence
-[Dij72].
-<br>&#32;<br>
-We close by claiming that the code above with
-the suggested modification passes all tests we have for correctness
-under the assumptions used in the validation.
-We would not, however, go so far as to claim that it is universally correct.
-<H4>References
-</H4>
-<br>&#32;<br>
-[Bac86] Maurice J. Bach,
-<I>The Design of the UNIX Operating System,</I>
-Prentice-Hall,
-Englewood Cliffs,
-1986.
-<br>&#32;<br>
-[Dij72] Edsger W. Dijkstra,
-``The Humble Programmer - 1972 Turing Award Lecture'',
-<I>Comm. ACM,</I>
-15(10), pp. 859-866, 
-October 1972.
-<br>&#32;<br>
-[Hol91] Gerard J. Holzmann,
-<I>Design and Validation of Computer Protocols,</I>
-Prentice-Hall,
-Englewood Cliffs,
-1991.
-<br>&#32;<br>
-[Pik90]
-Rob Pike,
-Dave Presotto,
-Ken Thompson,
-Howard Trickey,
-``Plan 9 from Bell Labs'',
-<I>Proceedings of the Summer 1990 UKUUG Conference,</I>
-pp. 1-9,
-London,
-July, 1990.
-<br>&#32;<br>
-[Sal66] Jerome H. Saltzer,
-<I>Traffic Control in a Multiplexed Computer System</I>
-MIT,
-Cambridge, Mass.,
-1966.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 2490
sys/doc/spin.html

@@ -1,2490 +0,0 @@
-<html>
-
-
-
-
-<br><img src="-.19126690.gif"><br>
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Using <small>SPIN</small>
-</H1>
-<DL><DD><I>Gerard J. Holzmann<br>
-gerard@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<small>SPIN</small> can be used for proving or disproving logical properties
-of concurrent systems.
-To render the proofs, a concurrent system is first
-modeled in a formal specification language called <small>PROMELA</small>.
-The language allows one to specify the behaviors
-of asynchronously executing
-processes that may interact through synchronous
-or asynchronous message passing, or through direct
-access to shared variables.
-<br>&#32;<br>
-System models specified in this way can be verified
-for both safety and liveness properties. The specification
-of general properties in linear time temporal logic is
-also supported.
-<br>&#32;<br>
-The first part of this manual
-discusses the basic features of the specification language <small>PROMELA</small>.
-The second part describes the verifier <small>SPIN</small>.
-</DL>
-<H4>1 The Language <small>PROMELA</small>
-</H4>
-<br>&#32;<br>
-<small>PROMELA</small> is short for Protocol Meta Language [Ho91].
-<small>PROMELA</small> is a <I>modeling</I> language, not a programming language.
-A formal model differs in two essential ways from an implementation.
-First, a model is meant to be an abstraction of a design
-that contains only those aspects of the design that are
-directly relevant to the properties one is interested in proving.
-Second, a formal model must contain things that are typically not part
-of an implementation, such as worst-case assumptions about
-the behavior of the environment that may interact with the
-system being studied, and a formal statement of relevant correctness
-properties. It is possible to mechanically extract abstract models
-from implementation level code, as discussed, for instance in [HS99].
-<br>&#32;<br>
-Verification with <small>SPIN</small> is often performed in a series of steps,
-with the construction of increasingly detailed models.
-Each model can be verified under different types of
-assumptions about the environment and for different
-types of correctness properties.
-If a property is not valid for the given assumptions about
-system behavior, the verifier can produce a counter-example
-that demonstrates how the property may be violated.
-If a property is valid, it may be possible to simplify the
-model based on that fact, and prove still other properties.
-<br>&#32;<br>
-Section 1.1 covers the basic building blocks of the language.
-Section 1.2 introduces the control flow structures.
-Section 1.3 explains how correctness properties are specified.
-Section 1.4 concludes the first part with a discussion of
-special predefined variables and functions that can be used to
-express some correctness properties.
-<br>&#32;<br>
-Up to date manual pages for <small>SPIN</small> can always be found online at:
-http://cm.bell-labs.com/cm/cs/what/spin/Man/
-<H4>1.1 Basics
-</H4>
-<br>&#32;<br>
-A <small>PROMELA</small> model can contain three different types of objects:
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DD>
-* Processes (section 1.1.1),
-<br>
-* Variables (section 1.1.2),
-<br>
-* Message channels (section 1.1.3).
-</DL>
-</dl>
-<br>&#32;<br>
-All processes are global objects.
-For obvious reasons, a <small>PROMELA</small> model must contain at least one
-process to be meaningful.
-Since <small>SPIN</small> is specifically meant to prove properties of
-concurrent systems, a model typically contains more than
-one process.
-<br>&#32;<br>
-Message channels and variables, the two basic types of data objects,
-can be declared with either a global scope or a local scope.
-A data object with global scope can be referred to by all processes.
-A data object with a local scope can be referred to by just a
-single process: the process that declares and instantiates the object.
-As usual, all objects must be declared in the specification
-before they are referenced.
-<H4>1.1.1 Processes
-</H4>
-<br>&#32;<br>
-Here is a simple process that does nothing except print
-a line of text:
-<DL><DT><DD><TT><PRE>
-init {
-	printf("it works\n")
-}
-</PRE></TT></DL>
-There are a few things to note.
-<TT>Init</TT>
-is a predefined keyword from the language.
-It can be used to declare and instantiate
-a single initial process in the model.
-(It is comparable to the
-<TT>main</TT>
-procedure of a C program.)
-The
-<TT>init</TT>
-process does not take arguments, but it can
-start up (instantiate) other processes that do.
-<TT>Printf</TT>
-is one of a few built-in procedures in the language.
-It behaves the same as the C version.
-Note, finally, that no semicolon follows the single
-<TT>printf</TT>
-statement in the above example.
-In <small>PROMELA</small>, semicolons are used as statement separators,
-not statement terminators.  (The <small>SPIN</small> parser, however, is
-lenient on this issue.)
-<br>&#32;<br>
-Any process can start new processes by using another
-built-in procedure called
-<TT>run</TT>.
-For example,
-<DL><DT><DD><TT><PRE>
-proctype you_run(byte x)
-{
-	printf("my x is: %d\n", x)
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-init {
-	run you_run(1);
-	run you_run(2)
-}
-</PRE></TT></DL>
-The word
-<TT>proctype</TT>
-is again a keyword that introduces the declaration
-of a new type of process.
-In this case, we have named that type
-<TT>you_run</TT>
-and declared that all instantiations of processes
-of this type will take one argument:  a data object
-of type
-<TT>byte</TT>,
-that can be referred to within this process by the name
-<TT>x</TT>.
-Instances of a
-<TT>proctype</TT>
-can be created with the predefined procedure
-<TT>run</TT>,
-as shown in the example.
-When the
-<TT>run</TT>
-statement completes, a copy of the process
-has been started, and all its arguments have been
-initialized with the arguments provided.
-The process may, but need not, have performed
-any statement executions at this point.
-It is now part of the concurrent system,
-and its execution can be interleaved arbitrarily with
-those of the other, already executing processes.
-(More about the semantics of execution follows shortly.)
-<br>&#32;<br>
-In many cases, we are only interested in creating a
-single instance of each process type that is declared,
-and the processes require no arguments.
-We can define this by prefixing the keyword
-<TT>proctype</TT>
-from the process declaration with another keyword:
-<TT>active</TT>.
-Instances of all active proctypes are created when the
-system itself is initialized.
-We could, for instance, have avoided the use of
-<TT>init</TT>
-by declaring the corresponding process in the last example
-as follows:
-<DL><DT><DD><TT><PRE>
-active proctype main() {
-	run you_run(1);
-	run you_run(2)
-}
-</PRE></TT></DL>
-Note that there are no parameters to instantiate in this
-case.  Had they been declared, they would default to a
-zero value, just like all other data objects
-that are not explicitly instantiated.
-<br>&#32;<br>
-Multiple copies of a process type can also be created in
-this way.  For example:
-<DL><DT><DD><TT><PRE>
-active [4] proctype try_me() {
-	printf("hi, i am process %d\n", _pid)
-}
-</PRE></TT></DL>
-creates four processes.
-A predefined variable
-<TT>_pid</TT>
-is assigned to each running process, and holds
-its unique process instantiation number.
-In some cases, this number is needed when a reference
-has to be made to a specific process.
-<br>&#32;<br>
-Summarizing:  process behavior is declared in
-<TT>proctype</TT>
-definitions, and it is instantiated with either
-<TT>run</TT>
-statements or with the prefix
-<TT>active</TT>.
-Within a proctype declaration, statements are separated
-(not terminated) by semicolons.
-As we shall see in examples that follow, instead of the
-semicolon, one can also use the alternative separator
-<TT>-&gt;</TT>
-(arrow), wherever that may help to clarify the structure
-of a <small>PROMELA</small> model.
-<H4>Semantics of Execution
-</H4>
-<br>&#32;<br>
-In <small>PROMELA</small> there is no difference between a condition or
-expression and a statement.
-Fundamental to the semantics of the language is the
-notion of the <I>executability</I> of statements.
-Statements are either executable or blocked.
-Executability is the basic means of enforcing
-synchronization between the processes in a distributed system.
-A process can wait for an event to happen by waiting
-for a statement to become executable.
-For instance, instead of writing a busy wait loop:
-<DL><DT><DD><TT><PRE>
-while (a != b)	/* not valid Promela syntax */
-	skip;	/* wait for a==b */
-...
-</PRE></TT></DL>
-we achieve the same effect in <small>PROMELA</small> with the statement
-<DL><DT><DD><TT><PRE>
-(a == b);
-...
-</PRE></TT></DL>
-Often we indicate that the continuation of an execution
-is conditional on the truth of some expression by using
-the alternate statement separator:
-<DL><DT><DD><TT><PRE>
-(a == b) -&gt; ...
-</PRE></TT></DL>
-Assignments and
-<TT>printf</TT>
-statements are always executable in <small>PROMELA</small>.
-A condition, however, can only be executed (passed) when it holds.
-If the condition does not hold, execution blocks until it does.
-There are similar rules for determining the executability
-of all other primitive and compound statements in the
-language.
-The semantics of each statement is defined in terms of
-rules for executability and effect.
-The rules for executability set a precondition on the state
-of the system in which a statement can be executed.
-The effect defines how a statement will alter a
-system state when executed.
-<br>&#32;<br>
-<small>PROMELA</small> assumes that all individual statements are executed
-atomically: that is, they model the smallest meaningful entities
-of execution in the system being studied.
-This means that <small>PROMELA</small> defines the standard asynchronous interleaving
-model of execution, where a supposed scheduler is free at
-each point in the execution to select any one of the processes
-to proceed by executing a single primitive statement.
-Synchronization constraints can be used to influence the
-interleaving patterns.  It is the purpose of a concurrent system's
-design to constrain those patterns in such a way that no
-correctness requirements can be violated, and all service
-requirements are met.  It is the purpose of the verifier
-either to find counter-examples to a designer's claim that this
-goal has been met, or to demonstrate that the claim is indeed valid.
-<H4>1.1.2 Variables
-</H4>
-<br>&#32;<br>
-The table summarizes the five basic data types used in <small>PROMELA</small>.
-<TT>Bit</TT>
-and
-<TT>bool</TT>
-are synonyms for a single bit of information.
-The first three types can store only unsigned quantities.
-The last two can hold either positive or negative values.
-The precise value ranges of variables of types
-<TT>short</TT>
-and
-<TT>int</TT>
-is implementation dependent, and corresponds
-to those of the same types in C programs
-that are compiled for the same hardware.
-The values given in the table are most common.
-<br><img src="-.19126691.gif"><br>
-<br>&#32;<br>
-The following example program declares a array of
-two elements of type
-<TT>bool</TT>
-and a scalar variable
-<TT>turn</TT>
-of the same type.
-Note that the example relies on the fact that
-<TT>_pid</TT>
-is either 0 or 1 here.
-<DL><DT><DD><TT><PRE>
-/*
- * Peterson's algorithm for enforcing
- * mutual exclusion between two processes
- * competing for access to a critical section
- */
-bool turn, want[2];
-
-active [2] proctype user()
-{
-again:
-	want[_pid] = 1; turn = _pid;
-
-	/* wait until this condition holds: */
-	(want[1 - _pid] == 0 || turn == 1 - _pid);
-
-	/* enter */
-critical:	skip;
-	/* leave */
-
-	want[_pid] = 0;
-	goto again
-}
-</PRE></TT></DL>
-In the above case, all variables are initialized to zero.
-The general syntax for declaring and instantiating a
-variable, respectively for scalar and array variables, is:
-<DL><DT><DD><TT><PRE>
-type name = expression;
-type name[constant] = expression
-</PRE></TT></DL>
-In the latter case, all elements of the array are initialized
-to the value of the expression.
-A missing initializer fields defaults to the value zero.
-As usual, multiple variables of the same type can be grouped
-behind a single type name, as in:
-<DL><DT><DD><TT><PRE>
-byte a, b[3], c = 4
-</PRE></TT></DL>
-In this example, the variable
-<TT>c</TT>
-is initialized to the value 4; variable
-<TT>a</TT>
-and the elements of array
-<TT>b</TT>
-are all initialized to zero.
-<br>&#32;<br>
-Variables can also be declared as structures.
-For example:
-<DL><DT><DD><TT><PRE>
-typedef Field {
-        short f = 3;
-        byte  g
-};
-
-typedef Msg {
-        byte a[3];
-        int fld1;
-        Field fld2;
-        chan p[3];
-        bit b
-};
-
-Msg foo;
-</PRE></TT></DL>
-introduces two user-defined data types, the first named
-<TT>Field</TT>
-and the second named
-<TT>Msg</TT>.
-A single variable named
-<TT>foo</TT>
-of type
-<TT>Msg</TT>
-is declared.
-All fields of
-<TT>foo</TT>
-that are not explicitly initialized (in the example, all fields except
-<TT>foo.fld2.f</TT>)
-are initialized to zero.
-References to the elements of a structure are written as:
-<DL><DT><DD><TT><PRE>
-foo.a[2] = foo.fld2.f + 12
-</PRE></TT></DL>
-A variable of a user-defined type can be passed as a single
-argument to a new process in
-<TT>run</TT>
-statements.
-For instance,
-<DL><DT><DD><TT><PRE>
-proctype me(Msg z) {
-	z.a[2] = 12
-}
-init {
-	Msg foo;
-	run me(foo)
-}
-</PRE></TT></DL>
-<br>&#32;<br>
-Note that even though <small>PROMELA</small> supports only one-dimensional arrays,
-a two-dimensional array can be created indirectly with user-defined
-structures, for instance as follows:
-<DL><DT><DD><TT><PRE>
-typedef Array {
-	byte el[4]
-};
-
-Array a[4];
-</PRE></TT></DL>
-This creates a data structure of 16 elements that can be
-referenced, for instance, as
-<TT>a[i].el[j]</TT>.
-<br>&#32;<br>
-As in C, the indices of an array of
-<TT>N</TT>
-elements range from 0 to
-<TT>N-1</TT>.
-<H4>Expressions
-</H4>
-<br>&#32;<br>
-Expressions must be side-effect free in <small>PROMELA</small>.
-Specifically, this means that an expression cannot
-contain assignments, or send and receive operations (see section 1.1.3).
-<DL><DT><DD><TT><PRE>
-c = c + 1; c = c - 1
-</PRE></TT></DL>
-and
-<DL><DT><DD><TT><PRE>
-c++; c--
-</PRE></TT></DL>
-are assignments in <small>PROMELA</small>, with the same effects.
-But, unlike in C,
-<DL><DT><DD><TT><PRE>
-b = c++
-</PRE></TT></DL>
-is not a valid assignment, because the right-hand side
-operand is not a valid expression in <small>PROMELA</small> (it is not side-effect free).
-<br>&#32;<br>
-It is also possible to write a side-effect free conditional
-expression, with the following syntax:
-<DL><DT><DD><TT><PRE>
-(expr1 -&gt; expr2 : expr3)
-</PRE></TT></DL>
-The parentheses around the conditional expression are required to
-avoid misinterpretation of the arrow.
-The example expression has the value of <TT>expr2</TT> when <TT>expr1</TT>
-evaluates to a non-zero value, and the value of <TT>expr3</TT> otherwise.
-<br>&#32;<br>
-In assignments like
-<DL><DT><DD><TT><PRE>
-variable = expression
-</PRE></TT></DL>
-the values of all operands used inside the expression are first cast to
-signed integers before the operands are applied.
-After the evaluation of the expression completes, the value produced
-is cast to the type of the target variable before the assignment takes place.
-<H4>1.1.3 Message Channels
-</H4>
-<br>&#32;<br>
-Message channels are used to model the transfer of data
-between processes.
-They are declared either locally or globally,
-for instance as follows:
-<DL><DT><DD><TT><PRE>
-chan qname = [16] of { short, byte }
-</PRE></TT></DL>
-The keyword
-<TT>chan</TT>
-introduces a channel declaration.
-In this case, the channel is named
-<TT>qname</TT>,
-and it is declared to be capable of storing up
-to 16 messages.
-Each message stored in the channel is declared here to
-consist of two fields: one of type
-<TT>short</TT>
-and one of type
-<TT>byte</TT>.
-The fields of a message can be any one of the basic types
-<TT>bit</TT>,
-<TT>bool</TT>,
-<TT>byte</TT>,
-<TT>short</TT>,
-<TT>int</TT>,
-and
-<TT>chan</TT>,
-or any user-defined type.
-Message fields cannot be declared as arrays.
-<br>&#32;<br>
-A message field of type
-<TT>chan</TT>
-can be used to pass a channel identifier
-through a channel from one process to another.
-<br>&#32;<br>
-The statement
-<DL><DT><DD><TT><PRE>
-qname!expr1,expr2
-</PRE></TT></DL>
-sends the values of expressions
-<TT>expr1</TT>
-and
-<TT>expr2</TT>
-to the channel that we just created.  It appends
-the message field created from the values of the two
-expressions (and cast to the appropriate types of the
-message fields declared for
-<TT>qname</TT>)
-to the tail of the message buffer of 16 slots that belongs
-to channel
-<TT>qname</TT>.
-By default the send statement is only executable if the target
-channel is non-full.
-(This default semantics can be changed in the verifier into
-one where the send statement is always executable, but the
-message will be lost when an attempt is made to append it to
-a full channel.)
-<br>&#32;<br>
-The statement
-<DL><DT><DD><TT><PRE>
-qname?var1,var2
-</PRE></TT></DL>
-retrieves a message from the head of the same buffer,
-and stores the two expressions in variables
-<TT>var1</TT>
-and
-<TT>var2</TT>.
-<br>&#32;<br>
-The receive statement is executable only if the source channel
-is non-empty.
-<br>&#32;<br>
-If more parameters are sent per message than were declared
-for the message channel, the redundant parameters are lost.
-If fewer parameters are sent than declared,
-the value of the remaining parameters is undefined.
-Similarly, if the receive operation tries to retrieve more
-parameters than available, the value of the extra parameters is
-undefined; if it receives fewer than the number of parameters
-sent, the extra information is lost.
-<br>&#32;<br>
-An alternative, and equivalent, notation for the
-send and receive operations is to structure the
-message fields with parentheses, as follows:
-<DL><DT><DD><TT><PRE>
-qname!expr1(expr2,expr3)
-qname?var1(var2,var3)
-</PRE></TT></DL>
-In the above case, we assume that
-<TT>qname</TT>
-was declared to hold messages consisting of three fields.
-<P>
-Some or all of the arguments of the receive operation
-can be given as constants instead of as variables:
-<DL><DT><DD><TT><PRE>
-qname?cons1,var2,cons2
-</PRE></TT></DL>
-In this case, an extra condition on the executability of the
-receive operation is that the value of all message fields
-specified as constants match the value of the corresponding
-fields in the message that is to be received.
-</P>
-<br>&#32;<br>
-Here is an example that uses some of the mechanisms introduced
-so far.
-<DL><DT><DD><TT><PRE>
-proctype A(chan q1)
-{	chan q2;
-	q1?q2;
-	q2!123
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-proctype B(chan qforb)
-{	int x;
-	qforb?x;
-	printf("x = %d\n", x)
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-init {
-	chan qname = [1] of { chan };
-	chan qforb = [1] of { int };
-	run A(qname);
-	run B(qforb);
-	qname!qforb
-}
-</PRE></TT></DL>
-The value printed by the process of type
-<TT>B</TT>
-will be
-<TT>123</TT>.
-<br>&#32;<br>
-A predefined function
-<TT>len(qname)</TT>
-returns the number of messages currently
-stored in channel
-<TT>qname</TT>.
-Two shorthands for the most common uses of this
-function are
-<TT>empty(qname)</TT>
-and
-<TT>full(qname)</TT>,
-with the obvious connotations.
-<br>&#32;<br>
-Since all expressions must be side-effect free,
-it is not valid to say:
-<DL><DT><DD><TT><PRE>
-(qname?var == 0)
-</PRE></TT></DL>
-or
-<DL><DT><DD><TT><PRE>
-(a &gt; b &amp;&amp; qname!123)
-</PRE></TT></DL>
-We could rewrite the second example (using an atomic sequence,
-as explained further in section 1.2.1):
-<DL><DT><DD><TT><PRE>
-atomic { (a &gt; b &amp;&amp; !full(qname)) -&gt; qname!123 }
-</PRE></TT></DL>
-The meaning of the first example is ambiguous.  It could mean
-that we want the condition to be true if the receive operation
-is unexecutable.  In that case, we can rewrite it without
-side-effects as:
-<DL><DT><DD><TT><PRE>
-empty(qname)
-</PRE></TT></DL>
-It could also mean that we want the condition
-to be true when the channel does contain a message with
-value zero.
-We can specify that as follows:
-<DL><DT><DD><TT><PRE>
-atomic { qname?[0] -&gt; qname?var }
-</PRE></TT></DL>
-The first statement of this atomic sequence is
-an expression without side-effects that
-evaluates to a non-zero value only if the
-receive operation
-<DL><DT><DD><TT><PRE>
-qname?0
-</PRE></TT></DL>
-would have been executable at that
-point (i.e., channel
-<TT>qname</TT>
-contains at least one message and the oldest
-message stored consists of one message field
-equal to zero).
-Any receive statement can be turned into
-a side-effect free expression by placing square
-brackets around the list of all message parameters.
-The channel contents remain undisturbed by the
-evaluation of such expressions.
-<br>&#32;<br>
-Note carefully, however, that in non-atomic sequences
-of two statements such as
-<DL><DT><DD><TT><PRE>
-!full(qname) -&gt; qname!msgtype
-</PRE></TT></DL>
-and
-<DL><DT><DD><TT><PRE>
-qname?[msgtype] -&gt; qname?msgtype
-</PRE></TT></DL>
-the second statement is not necessarily executable
-after the first one has been executed.
-There may be race conditions when access to the channels
-is shared between several processes.
-Another process can send a message to the channel
-just after this process determined that it was not full,
-or another process can steal away the
-message just after our process determined its presence.
-<br>&#32;<br>
-Two other types of send and receive statements are used
-less frequently: sorted send and random receive.
-A sorted send operation is written with two, instead of one,
-exclamation marks, as follows:
-<DL><DT><DD><TT><PRE>
-qname!!msg
-</PRE></TT></DL>
-A sorted send operation will insert a message into the channel's buffer
-in numerical order, instead of in FIFO order.
-The channel contents are scanned from the first message towards the
-last, and the message is inserted immediately before the first message
-that follows it in numerical order.
-To determine the numerical order, all message fields are
-taken into account.
-<br>&#32;<br>
-The logical counterpart of the sorted send operation
-is the random receive.
-It is written with two, instead of one, question marks:
-<DL><DT><DD><TT><PRE>
-qname??msg
-</PRE></TT></DL>
-A random receive operation is executable if it is executable for <I>any</I>
-message that is currently buffered in a message channel (instead of
-only for the first message in the channel).
-Normal send and receive operations can freely be combined with
-sorted send and random receive operations.
-<H4>Rendezvous Communication
-</H4>
-<br>&#32;<br>
-So far we have talked about asynchronous communication between processes
-via message channels, declared in statements such as
-<DL><DT><DD><TT><PRE>
-chan qname = [N] of { byte }
-</PRE></TT></DL>
-where
-<TT>N</TT>
-is a positive constant that defines the buffer size.
-A logical extension is to allow for the declaration
-<DL><DT><DD><TT><PRE>
-chan port = [0] of { byte }
-</PRE></TT></DL>
-to define a rendezvous port.
-The channel size is zero, that is, the channel
-<TT>port</TT>
-can pass, but cannot store, messages.
-Message interactions via such rendezvous ports are
-by definition synchronous.
-Consider the following example:
-<DL><DT><DD><TT><PRE>
-#define msgtype 33
-
-chan name = [0] of { byte, byte };
-
-active proctype A()
-{	name!msgtype(124);
-	name!msgtype(121)
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-active proctype B()
-{	byte state;
-	name?msgtype(state)
-}
-</PRE></TT></DL>
-Channel
-<TT>name</TT>
-is a global rendezvous port.
-The two processes will synchronously execute their first statement:
-a handshake on message
-<TT>msgtype</TT>
-and a transfer of the value 124 to local variable
-<TT>state</TT>.
-The second statement in process
-<TT>A</TT>
-will be unexecutable,
-because there is no matching receive operation in process
-<TT>B</TT>.
-<br>&#32;<br>
-If the channel
-<TT>name</TT>
-is defined  with a non-zero buffer capacity,
-the behavior is different.
-If the buffer size is at least 2, the process of type
-<TT>A</TT>
-can complete its execution, before its peer even starts.
-If the buffer size is 1, the sequence of events is as follows.
-The process of type
-<TT>A</TT>
-can complete its first send action, but it blocks on the
-second, because the channel is now filled to capacity.
-The process of type
-<TT>B</TT>
-can then retrieve the first message and complete.
-At this point
-<TT>A</TT>
-becomes executable again and completes,
-leaving its last message as a residual in the channel.
-<br>&#32;<br>
-Rendezvous communication is binary: only two processes,
-a sender and a receiver, can be synchronized in a
-rendezvous handshake.
-<br>&#32;<br>
-As the example shows, symbolic constants can be defined
-with preprocessor macros using
-<TT>#define</TT>.
-The source text of a <small>PROMELA</small> model is translated by the standard
-C preprocessor.
-The disadvantage of defining symbolic names in this way is,
-however, that the <small>PROMELA</small> parser will only see the expanded text,
-and cannot refer to the symbolic names themselves.
-To prevent that, <small>PROMELA</small> also supports another way to define
-symbolic names, which are preserved in error reports.
-For instance, by including the declaration
-<DL><DT><DD><TT><PRE>
-mtype = { ack, msg, error, data };
-</PRE></TT></DL>
-at the top of a <small>PROMELA</small> model, the names provided between the
-curly braces are equivalent to integers of type
-<TT>byte</TT>,
-but known by their symbolic names to the <small>SPIN</small> parser and the
-verifiers it generates.
-The constant values assigned start at 1, and count up.
-There can be only one
-<TT>mtype</TT>
-declaration per model.
-<H4>1.2 Control Flow
-</H4>
-<br>&#32;<br>
-So far, we have seen only some of the basic statements
-of <small>PROMELA</small>, and the way in which they can be combined to
-model process behaviors.
-The five types of statements we have mentioned are:
-<TT>printf</TT>,
-<TT>assignment</TT>,
-<TT>condition</TT>,
-<TT>send</TT>,
-and
-<TT>receive</TT>.
-<br>&#32;<br>
-The pseudo-statement
-<TT>skip</TT>
-is syntactically and semantically equivalent to the
-condition
-<TT>(1)</TT>
-(i.e., to true), and is in fact quietly replaced with this
-expression by the lexical analyzer of <small>SPIN</small>.
-<br>&#32;<br>
-There are also five types of compound statements.
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DD>
-*
-Atomic sequences (section 1.2.1),
-<br>
-*
-Deterministic steps (section 1.2.2),
-<br>
-*
-Selections (section 1.2.3),
-<br>
-*
-Repetitions (section 1.2.4),
-<br>
-*
-Escape sequences (section 1.2.5).
-</DL>
-</dl>
-<br>&#32;<br>
-<H4>1.2.1 Atomic Sequences
-</H4>
-<br>&#32;<br>
-The simplest compound statement is the
-<TT>atomic</TT>
-sequence:
-<DL><DT><DD><TT><PRE>
-atomic {	/* swap the values of a and b */
-	tmp = b;
-	b = a;
-	a = tmp
-}
-</PRE></TT></DL>
-In the example, the values of two variables
-<TT>a</TT>
-and
-<TT>b</TT>
-are swapped in a sequence of statement executions
-that is defined to be uninterruptable.
-That is, in the interleaving of process executions, no
-other process can execute statements from the moment that
-the first statement of this sequence begins to execute until
-the last one has completed.
-<br>&#32;<br>
-It is often useful to use
-<TT>atomic</TT>
-sequences to start a series of processes in such a
-way that none of them can start executing statements
-until all of them have been initialized:
-<DL><DT><DD><TT><PRE>
-init {
-	atomic {
-		run A(1,2);
-		run B(2,3);
-		run C(3,1)
-	}
-}
-</PRE></TT></DL>
-<TT>Atomic</TT>
-sequences may be non-deterministic.
-If any statement inside an
-<TT>atomic</TT>
-sequence is found to be unexecutable, however,
-the atomic chain is broken, and another process can take over
-control.
-When the blocking statement becomes executable later,
-control can non-deterministically return to the process,
-and the atomic execution of the sequence resumes as if
-it had not been interrupted.
-<H4>1.2.2 Deterministic Steps
-</H4>
-<br>&#32;<br>
-Another way to define an indivisible sequence of actions
-is to use the
-<TT>d_step</TT>
-statement.
-In the above case, for instance, we could also have written:
-<DL><DT><DD><TT><PRE>
-d_step {	/* swap the values of a and b */
-	tmp = b;
-	b = a;
-	a = tmp
-}
-</PRE></TT></DL>
-The difference between a
-<TT>d_step</TT>
-sequence
-and an
-<TT>atomic</TT>
-sequence are:
-<UL>
-<LI>
-A
-<TT>d_step</TT>
-sequence must be completely deterministic.
-(If non-determinism is nonetheless encountered,
-it is always resolved in a fixed and deterministic
-way: i.e., the first true guard in selection or
-repetition structures is always selected.)
-<LI>
-No
-<TT>goto</TT>
-jumps into or out of a
-<TT>d_step</TT>
-sequence are permitted.
-<LI>
-The execution of a
-<TT>d_step</TT>
-sequence cannot be interrupted when a
-blocking statement is encountered.
-It is an error if any statement other than
-the first one in a
-<TT>d_step</TT>
-sequence is found to be unexecutable.
-<LI>
-A
-<TT>d_step</TT>
-sequence is executed as one single statement.
-In a way, it is a mechanism for adding new types
-of statements to the language.
-</ul>
-<br>&#32;<br>
-None of the items listed above apply to
-<TT>atomic</TT>
-sequences.
-This means that the keyword
-<TT>d_step</TT>
-can always be replaced with the keyword
-<TT>atomic</TT>,
-but the reverse is not true.
-(The main, perhaps the only, reason for using
-<TT>d_step</TT>
-sequences is to improve the efficiency of
-verifications.)
-<H4>1.2.3 Selection Structures
-</H4>
-<br>&#32;<br>
-A more interesting construct is the selection structure.
-Using the relative values of two variables
-<TT>a</TT>
-and
-<TT>b</TT>
-to choose between two options, for instance, we can write:
-<DL><DT><DD><TT><PRE>
-if
-:: (a != b) -&gt; option1
-:: (a == b) -&gt; option2
-fi
-</PRE></TT></DL>
-The selection structure above contains two execution sequences,
-each preceded by a double colon.
-Only one sequence from the list will be executed.
-A sequence can be selected only if its first statement is executable.
-The first statement is therefore called a <I>guard</I>.
-<br>&#32;<br>
-In the above example the guards are mutually exclusive, but they
-need not be.
-If more than one guard is executable, one of the corresponding sequences
-is selected nondeterministically.
-If all guards are unexecutable the process will block until at least
-one of them can be selected.
-There is no restriction on the type of statements that can be used
-as a guard: it may include sends or receives, assignments,
-<TT>printf</TT>,
-<TT>skip</TT>,
-etc.
-The rules of executability determine in each case what the semantics
-of the complete selection structure will be.
-The following example, for instance, uses receive statements
-as guards in a selection.
-<DL><DT><DD><TT><PRE>
-mtype = { a, b };
-
-chan ch = [1] of { byte };
-
-active proctype A()
-{	ch!a
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-active proctype B()
-{	ch!b
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-active proctype C()
-{	if
-	:: ch?a
-	:: ch?b
-	fi
-}
-</PRE></TT></DL>
-The example defines three processes and one channel.
-The first option in the selection structure of the process
-of type
-<TT>C</TT>
-is executable if the channel contains
-a message named
-<TT>a</TT>,
-where
-<TT>a</TT>
-is a symbolic constant defined in the
-<TT>mtype</TT>
-declaration at the start of the program.
-The second option is executable if it contains a message
-<TT>b</TT>,
-where, similarly,
-<TT>b</TT>
-is a symbolic constant.
-Which message will be available depends on the unknown
-relative speeds of the processes.
-<br>&#32;<br>
-A process of the following type will either increment
-or decrement the value of variable
-<TT>count</TT>
-once.
-<DL><DT><DD><TT><PRE>
-byte count;
-
-active proctype counter()
-{
-	if
-	:: count++
-	:: count--
-	fi
-}
-</PRE></TT></DL>
-Assignments are always executable, so the choice made
-here is truly a non-deterministic one that is independent
-of the initial value of the variable (zero in this case).
-<H4>1.2.4 Repetition Structures
-</H4>
-<br>&#32;<br>
-We can modify the above program as follows, to obtain
-a cyclic program that randomly changes the value of
-the variable up or down, by replacing the selection
-structure with a repetition.
-<DL><DT><DD><TT><PRE>
-byte count;
-
-active proctype counter()
-{
-	do
-	:: count++
-	:: count--
-	:: (count == 0) -&gt; break
-	od
-}
-</PRE></TT></DL>
-Only one option can be selected for execution at a time.
-After the option completes, the execution of the structure
-is repeated.
-The normal way to terminate the repetition structure is
-with a
-<TT>break</TT>
-statement.
-In the example, the loop can be
-broken only when the count reaches zero.
-Note, however, that it need not terminate since the other
-two options remain executable.
-To force termination we could modify the program as follows.
-<DL><DT><DD><TT><PRE>
-active proctype counter()
-{
-	do
-	:: (count != 0) -&gt;
-		if
-		:: count++
-		:: count--
-		fi
-	:: (count == 0) -&gt; break
-	od
-}
-</PRE></TT></DL>
-A special type of statement that is useful in selection
-and repetition structures is the
-<TT>else</TT>
-statement.
-An
-<TT>else</TT>
-statement becomes executable only if no other statement
-within the same process, at the same control-flow point,
-is executable.
-We could try to use it in two places in the above example:
-<DL><DT><DD><TT><PRE>
-active proctype counter()
-{
-	do
-	:: (count != 0) -&gt;
-		if
-		:: count++
-		:: count--
-		:: else
-		fi
-	:: else -&gt; break
-	od
-}
-</PRE></TT></DL>
-The first
-<TT>else</TT>,
-inside the nested selection structure, can never become
-executable though, and is therefore redundant (both alternative
-guards of the selection are assignments, which are always
-executable).
-The second usage of the
-<TT>else</TT>,
-however, becomes executable exactly when
-<TT>!(count != 0)</TT>
-or
-<TT>(count == 0)</TT>,
-and is therefore equivalent to the latter to break from the loop.
-<br>&#32;<br>
-There is also an alternative way to exit the do-loop, without
-using a
-<TT>break</TT>
-statement:  the infamous
-<TT>goto</TT>.
-This is illustrated in the following implementation of
-Euclid's algorithm for finding the greatest common divisor
-of two non-zero, positive numbers:
-<DL><DT><DD><TT><PRE>
-proctype Euclid(int x, y)
-{
-	do
-	:: (x &gt;  y) -&gt; x = x - y
-	:: (x &lt;  y) -&gt; y = y - x
-	:: (x == y) -&gt; goto done
-	od;
-done:
-	skip
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-init { run Euclid(36, 12) }
-</PRE></TT></DL>
-The
-<TT>goto</TT>
-in this example jumps to a label named
-<TT>done</TT>.
-Since a label can only appear before a statement,
-we have added the dummy statement
-<TT>skip</TT>.
-Like a
-<TT>skip</TT>,
-a
-<TT>goto</TT>
-statement is always executable and has no other
-effect than to change the control-flow point
-of the process that executes it.
-<br>&#32;<br>
-As a final example, consider the following implementation of
-a Dijkstra semaphore, which is implemented with the help of
-a synchronous channel.
-<DL><DT><DD><TT><PRE>
-#define p	0
-#define v	1
-
-chan sema = [0] of { bit };
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-active proctype Dijkstra()
-{	byte count = 1;
-
-	do
-	:: (count == 1) -&gt;
-		sema!p; count = 0
-	:: (count == 0) -&gt;
-		sema?v; count = 1
-	od	
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-active [3] proctype user()
-{	do
-	:: sema?p;
-	   /* critical section */
-	   sema!v;
-	   /* non-critical section */
-	od
-}
-</PRE></TT></DL>
-The semaphore guarantees that only one of the three user processes
-can enter its critical section at a time.
-It does not necessarily prevent the monopolization of
-the access to the critical section by one of the processes.
-<br>&#32;<br>
-<small>PROMELA</small> does not have a mechanism for defining functions or
-procedures.  Where necessary, though, these may be
-modeled with the help of additional processes.
-The return value of a function, for instance, can be passed
-back to the calling process via global variables or messages.
-The following program illustrates this by recursively
-calculating the factorial of a number
-<TT>n</TT>.
-<DL><DT><DD><TT><PRE>
-proctype fact(int n; chan p)
-{	chan child = [1] of { int };
-	int result;
-
-	if
-	:: (n &lt;= 1) -&gt; p!1
-	:: (n &gt;= 2) -&gt;
-		run fact(n-1, child);
-		child?result;
-		p!n*result
-	fi
-}
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-init
-{	chan child = [1] of { int };
-	int result;
-
-	run fact(7, child);
-	child?result;
-	printf("result: %d\n", result)
-}
-</PRE></TT></DL>
-Each process creates a private channel and uses it
-to communicate with its direct descendant.
-There are no input statements in <small>PROMELA</small>.
-The reason is that models must always be complete to
-allow for logical verifications, and input statements
-would leave at least the source of some information unspecified.
-A way to read input
-would presuppose a source of information that is not
-part of the model.
-<br>&#32;<br>
-We have already discussed a few special types of statement:
-<TT>skip</TT>,
-<TT>break</TT>,
-and
-<TT>else</TT>.
-Another statement in this class is the
-<TT>timeout</TT>.
-The
-<TT>timeout</TT>
-is comparable to a system level
-<TT>else</TT>
-statement: it becomes executable if and only if no other
-statement in any of the processes is executable.
-<TT>Timeout</TT>
-is a modeling feature that provides for an escape from a
-potential deadlock state.
-The
-<TT>timeout</TT>
-takes no parameters, because the types of properties we
-would like to prove for <small>PROMELA</small> models must be proven independent
-of all absolute and relative timing considerations.
-In particular, the relative speeds of processes can never be
-known with certainty in an asynchronous system.
-<H4>1.2.5 Escape Sequences
-</H4>
-<br>&#32;<br>
-The last type of compound structure to be discussed is the
-<TT>unless</TT>
-statement.
-It is used as follows:
-<DL><DT><DD><TT><PRE>
-{ P } unless { E }
-</PRE></TT></DL>
-where the letters
-<TT>P</TT>
-and
-<TT>E</TT>
-represent arbitrary <small>PROMELA</small> fragments.
-Execution of the
-<TT>unless</TT>
-statement begins with the execution of statements from
-<TT>P</TT>.
-Before each statement execution in
-<TT>P</TT>
-the executability of the first statement of
-<TT>E</TT>
-is checked, using the normal <small>PROMELA</small> semantics of executability.
-Execution of statements from
-<TT>P</TT>
-proceeds only while the first statement of
-<TT>E</TT>
-remains unexecutable.
-The first time that this `guard of the escape sequence'
-is found to be executable, control changes to it,
-and execution continues as defined for
-<TT>E</TT>.
-Individual statement executions remain indivisible,
-so control can only change from inside
-<TT>P</TT>
-to the start of
-<TT>E</TT>
-in between individual statement executions.
-If the guard of the escape sequence
-does not become executable during the
-execution of
-<TT>P</TT>,
-then it is skipped entirely when
-<TT>P</TT>
-terminates.
-<br>&#32;<br>
-An example of the use of escape sequences is:
-<DL><DT><DD><TT><PRE>
-A;
-do
-:: b1 -&gt; B1
-:: b2 -&gt; B2
-...
-od
-unless { c -&gt; C };
-D
-</PRE></TT></DL>
-As shown in the example, the curly braces around the main sequence
-(or the escape sequence) can be deleted if there can be no confusion
-about which statements belong to those sequences.
-In the example, condition
-<TT>c</TT>
-acts as a watchdog on the repetition construct from the main sequence.
-Note that this is not necessarily equivalent to the construct
-<DL><DT><DD><TT><PRE>
-A;
-do
-:: b1 -&gt; B1
-:: b2 -&gt; B2
-...
-:: c -&gt; break
-od;
-C; D
-</PRE></TT></DL>
-if
-<TT>B1</TT>
-or
-<TT>B2</TT>
-are non-empty.
-In the first version of the example, execution of the iteration can
-be interrupted at <I>any</I> point inside each option sequence.
-In the second version, execution can only be interrupted at the
-start of the option sequences.
-<H4>1.3 Correctness Properties
-</H4>
-<br>&#32;<br>
-There are three ways to express correctness properties in <small>PROMELA</small>,
-using:
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DD>
-<br>
-*
-Assertions (section 1.3.1),
-<br>
-*
-Special labels (section 1.3.2),
-<br>
-*
-<TT>Never</TT>
-claims (section 1.3.3).
-</DL>
-</dl>
-<br>&#32;<br>
-<H4>1.3.1 Assertions
-</H4>
-<br>&#32;<br>
-Statements of the form
-<DL><DT><DD><TT><PRE>
-assert(expression)
-</PRE></TT></DL>
-are always executable.
-If the expression evaluates to a non-zero value (i.e., the
-corresponding condition holds), the statement has no effect
-when executed.
-The correctness property expressed, though, is that it is
-impossible for the expression to evaluate to zero (i.e., for
-the condition to be false).
-A failing assertion will cause execution to be aborted.
-<H4>1.3.2 Special Labels
-</H4>
-<br>&#32;<br>
-Labels in a <small>PROMELA</small> specification ordinarily serve as
-targets for unconditional
-<TT>goto</TT>
-jumps, as usual.
-There are, however, also three types of labels that
-have a special meaning to the verifier.
-We discuss them in the next three subsections.
-<H4>1.3.2.1 End-State Labels
-</H4>
-<br>&#32;<br>
-When a <small>PROMELA</small> model is checked for reachable deadlock states
-by the verifier, it must be able to distinguish valid <I>end state</I>s
-from invalid ones.
-By default, the only valid end states are those in which
-every <small>PROMELA</small> process that was instantiated has reached the end of
-its code.
-Not all <small>PROMELA</small> processes, however, are meant to reach the
-end of their code.
-Some may very well linger in a known wait
-state, or they may sit patiently in a loop
-ready to spring into action when new input arrives.
-<br>&#32;<br>
-To make it clear to the verifier that these alternate end states
-are also valid, we can define special end-state labels.
-We can do so, for instance, in the process type
-<TT>Dijkstra</TT>,
-from an earlier example:
-<DL><DT><DD><TT><PRE>
-proctype Dijkstra()
-{	byte count = 1;
-
-end:	do
-	:: (count == 1) -&gt;
-		sema!p; count = 0
-	:: (count == 0) -&gt;
-		sema?v; count = 1
-	od	
-}
-</PRE></TT></DL>
-The label
-<TT>end</TT>
-defines that it is not an error if, at the end of an
-execution sequence, a process of this type
-has not reached its closing curly brace, but waits at the label.
-Of course, such a state could still be part of a deadlock state, but
-if so, it is not caused by this particular process.
-<br>&#32;<br>
-There may be more than one end-state label per <small>PROMELA</small> model.
-If so, all labels that occur within the same process body must
-be unique.
-The rule is that every label name with the prefix
-<TT>end</TT>
-is taken to be an end-state label.
-<H4>1.3.2.2 Progress-State Labels
-</H4>
-<br>&#32;<br>
-In the same spirit, <small>PROMELA</small> also allows for the definition of
-<TT>progress</TT>
-labels.
-Passing a progress label during an execution is interpreted
-as a good thing:  the process is not just idling while
-waiting for things to happen elsewhere, but is making
-effective progress in its execution.
-The implicit correctness property expressed here is that any
-infinite execution cycle allowed by the model that does not
-pass through at least one of these progress labels is a
-potential starvation loop.
-In the
-<TT>Dijkstra</TT>
-example, for instance, we can label the
-successful passing of a semaphore test as progress and
-ask a verifier to make sure that there is no cycle elsewhere
-in the system.
-<DL><DT><DD><TT><PRE>
-proctype Dijkstra()
-{	byte count = 1;
-
-end:	do
-	:: (count == 1) -&gt;
-progress:	sema!p; count = 0
-	:: (count == 0) -&gt;
-		sema?v; count = 1
-	od	
-}
-</PRE></TT></DL>
-If more than one state carries a progress label,
-variations with a common prefix are again valid.
-<H4>1.3.2.3 Accept-State Labels
-</H4>
-<br>&#32;<br>
-The last type of label, the accept-state label, is used
-primarily in combination with
-<TT>never</TT>
-claims.
-Briefly, by labeling a state with any label starting
-with the prefix
-<TT>accept</TT>
-we can ask the verifier to find all cycles that <I>do</I>
-pass through at least one of those labels.
-The implicit correctness claim is that this cannot happen.
-The primary place where accept labels are used is inside
-<TT>never</TT>
-claims.
-We discuss
-<TT>never</TT>
-claims next.
-<H4>1.3.3 Never Claims
-</H4>
-<br>&#32;<br>
-Up to this point we have talked about the specification
-of correctness criteria with assertions
-and with three special types of labels.
-Powerful types of correctness criteria can already
-be expressed with these tools, yet so far our only option is
-to add them to individual
-<TT>proctype</TT>
-declarations.
-We can, for instance, express the claim ``every system state
-in which property
-<TT>P</TT>
-is true eventually leads to a system state in which property
-<TT>Q</TT>
-is true,'' with an extra monitor process, such as:
-<DL><DT><DD><TT><PRE>
-active proctype monitor()
-{
-progress:
-	do
-	:: P -&gt; Q
-	od
-}
-</PRE></TT></DL>
-If we require that property
-<TT>P</TT>
-must <I>remain</I> true while we are waiting
-<TT>Q</TT>
-to become true, we can try to change this to:
-<DL><DT><DD><TT><PRE>
-active proctype monitor()
-{
-progress:
-	do
-	:: P -&gt; assert(P || Q)
-	od
-}
-</PRE></TT></DL>
-but this does not quite do the job.
-Note that we cannot make any assumptions about the
-relative execution speeds of processes in a <small>PROMELA</small> model.
-This means that if in the remainder of the system the
-property
-<TT>P</TT>
-becomes true, we can move to the state just before the
-<TT>assert</TT>,
-and wait there for an unknown amount of time (anything between
-a zero delay and an infinite delay is possible here, since
-no other synchronizations apply).
-If
-<TT>Q</TT>
-becomes true, we may pass the assertion, but we need not
-do so.
-Even if
-<TT>P</TT>
-becomes false only <I>after</I>
-<TT>Q</TT>
-has become true, we may still fail the assertion,
-as long as there exists some later state where neither
-<TT>P</TT>
-nor
-<TT>Q</TT>
-is true.
-This is clearly unsatisfactory, and we need another mechanism
-to express these important types of liveness properties.
-<H4>The Connection with Temporal Logic
-</H4>
-<br>&#32;<br>
-A general way to express system properties of the type we
-have just discussed is to use linear time temporal logic (LTL)
-formulae.
-Every <small>PROMELA</small> expression is automatically also a valid LTL formula.
-An LTL formula can also contain the unary temporal operators &#164;
-(pronounced always), &#186; (pronounced eventually), and
-two binary temporal operators
-<TT>U</TT>
-(pronounced weak until) and
-<B><I>U</I></B>
-(pronounced strong until).
-<br>&#32;<br>
-Where the value of a <small>PROMELA</small> expression without temporal operators can be
-defined uniquely for individual system states, without further context,
-the truth value of an LTL formula is defined for sequences of states:
-specifically, it is defined for the first state of a given infinite
-sequence of system states (a trace).
-Given, for instance, the sequence of system states:
-<DL><DT><DD><TT><PRE>
-s0;s1;s2;...
-</PRE></TT></DL>
-the LTL formula
-<TT>pUq</TT>,
-with
-<TT>p</TT>
-and
-<TT>q</TT>
-standard <small>PROMELA</small> expressions, is true for
-<TT>s0</TT>
-either if
-<TT>q</TT>
-is true in
-<TT>s0</TT>,
-or if
-<TT>p</TT>
-is true in
-<TT>s0</TT>
-and
-<TT>pUq</TT>
-holds for the remainder of the sequence after
-<TT>s0</TT>.
-<br>&#32;<br>
-Informally,
-<TT>pUq</TT>
-says that
-<TT>p</TT>
-is required to hold at least until
-<TT>q</TT>
-becomes true.
-If, instead, we would write <TT>p</TT><B><I>U</I></B><TT>q</TT>,
-then we also require that there exists at least
-one state in the sequence where
-<TT>q</TT>
-does indeed become true.
-<br>&#32;<br>
-The temporal operators &#164; and &#186;
-can be defined in terms of the strong until operator
-<B><I>U</I></B>,
-as follows.
-<DL><DT><DD><TT><PRE>
-&#164; p = !&#186; !p = !(true <B><I>U</I></B><TT> !p)
-</PRE></TT></DL>
-Informally, &#164;
-</TT><TT>p</TT><TT>
-says that property
-</TT><TT>p</TT><TT>
-must hold in all states of a trace, and &#186;
-</TT><TT>p</TT><TT>
-says that
-</TT><TT>p</TT><TT>
-holds in at least one state of the trace.
-</TT><br>&#32;<br>
-To express our original example requirement: ``every system state
-in which property
-<TT>P</TT>
-is true eventually leads to a system state in which property
-<TT>Q</TT>
-is true,''
-we can write the LTL formula:
-<DL><DT><DD><TT><PRE>
-&#164; (P -&gt; &#186; Q)
-</PRE></TT></DL>
-where the logical implication symbol
-<TT>-&gt;</TT>
-is defined in the usual way as
-<DL><DT><DD><TT><PRE>
-P =&gt; Q means !P || Q
-</PRE></TT></DL>
-<H4>Mapping LTL Formulae onto Never Claims
-</H4>
-<br>&#32;<br>
-<small>PROMELA</small> does not include syntax for specifying LTL formulae
-directly, but it relies on the fact that every such
-formula can be translated into a special type of
-automaton, known as a B&uuml;chi automaton.
-In the syntax of <small>PROMELA</small> this automaton is called a
-<TT>never</TT>
-claim.
-If you don't care too much about the details of
-<TT>never</TT>
-claims, you can skip the remainder of this section and
-simple remember that <small>SPIN</small> can convert any LTL formula
-automatically into the proper never claim syntax with
-the command:
-<DL><DT><DD><TT><PRE>
-spin -f "...formula..."
-</PRE></TT></DL>
-Here are the details.
-The syntax of a never claim is:
-<DL><DT><DD><TT><PRE>
-never {
-	...
-}
-</PRE></TT></DL>
-where the dots can contain any <small>PROMELA</small> fragment, including
-arbitrary repetition, selection, unless constructs,
-jumps, etc.
-<br>&#32;<br>
-There is an important difference in semantics between a
-<TT>proctype</TT>
-declaration and a
-<TT>never</TT>
-claim.
-Every statement inside a
-<TT>never</TT>
-claim is interpreted as a proposition, i.e., a condition.
-A
-<TT>never</TT>
-claim should therefore only contain expressions and never
-statements that can have side-effects (assignments, sends or
-receives, run-statements, etc.)
-<br>&#32;<br>
-<TT>Never</TT>
-claims are used to express behaviors that are considered
-undesirable or illegal.
-We say that a
-<TT>never</TT>
-claim is `matched' if the undesirable behavior can be realized,
-contrary to the claim, and thus the correctness requirement violated.
-The claims are evaluated over system executions, that is, the
-propositions that are listed in the claim are evaluated over the
-traces from the remainder of the system.
-The claim, therefore, should not alter that behavior: it merely
-monitors it.
-Every time that the system reaches a new state, by asynchronously
-executing statements from the model, the claim will evaluate the
-appropriate propositions to determine if a counter-example can
-be constructed to the implicit LTL formula that is specified.
-<br>&#32;<br>
-Since LTL formulae are only defined for infinite executions,
-the behavior of a
-<TT>never</TT>
-claim can only be matched by an infinite system execution.
-This by itself would restrict us to the use of progress labels
-and accept labels as the only means we have discussed so far
-for expressing properties of infinite behaviors.
-To conform to standard omega automata theory, the behaviors of
-<TT>never</TT>
-claims are expressed exclusively with
-<TT>accept</TT>
-labels (never with
-<TT>progress</TT>
-labels).
-To match a claim, therefore, an infinite sequence of true propositions
-must exist, at least one of which is labeled with an
-<TT>accept</TT>
-label (inside the never claim).
-<br>&#32;<br>
-Since <small>PROMELA</small> models can also express terminating system behaviors,
-we have to define the semantics of the
-<TT>never</TT>
-claims also for those behaviors.
-To facilitate this, it is defined that a
-<TT>never</TT>
-claim can also be matched when it reaches its closing curly brace
-(i.e., when it appears to terminate).
-This semantics is based on what is usually referred to as a `stuttering
-semantics.'
-With stuttering semantics, any terminating execution can be extended
-into an equivalent infinite execution (for the purposes of evaluating
-LTL properties) by repeating (stuttering) the final state infinitely often.
-As a syntactical convenience, the final state of a
-<TT>never</TT>
-claim is defined to be accepting, i.e., it could be replaced with
-the explicit repetition construct:
-<DL><DT><DD><TT><PRE>
-accept: do :: skip od
-</PRE></TT></DL>
-Every process behavior, similarly, is (for the purposes of evaluating the
-<TT>never</TT>
-claims) thought to be extended with a dummy self-loop on all final states:
-<DL><DT><DD><TT><PRE>
-	do :: skip od
-</PRE></TT></DL>
-(Note the
-<TT>accept</TT>
-labels only occur in the
-<TT>never</TT>
-claim, not in the system.)
-<H4>The Semantics of a Never Claim
-</H4>
-<br>&#32;<br>
-<TT>Never</TT>
-claims are probably the hardest part of the language to understand,
-so it is worth spending a few extra words on them.
-On an initial reading, feel free to skip the remainder of this
-section.
-<br>&#32;<br>
-The difference between a
-<TT>never</TT>
-claim and the remainder of a <small>PROMELA</small> system can be explained
-as follows.
-A <small>PROMELA</small> model defines an asynchronous interleaving product of the
-behaviors of individual processes.
-Given an arbitrary system state, its successor states are
-conceptually obtained in two steps.
-In a first step, all the executable statements in the
-individual processes are identified.
-In a second step, each one of these statements is executed,
-each one producing one potential successor for the current state.
-The complete system behavior is thus defined recursively and
-represents all possible interleavings of the individual process behaviors.
-It is this asynchronous product machine that we call the `global
-system behavior'.
-<br>&#32;<br>
-The addition of a
-<TT>never</TT>
-claim defines a <I>synchronous</I> product of the global system behavior
-with the behavior expressed in the claim.
-This synchronous product can be thought of as the construction of a
-new global state machine, in which every state is defined as a pair
-<TT>(s,n)</TT>
-with
-<TT>s</TT>
-a state from the global system (the asynchronous product of processes), and
-<TT>n</TT>
-a state from the claim.
-Every transition in the new global machine is similarly defined by a pair
-of transitions, with the first element a statement from the system, and the
-second a proposition from the claim.
-In other words, every transition in this final synchronous product is
-defined as a joint transition of the system and the claim.
-Of course, that transition can only occur if the proposition from the
-second half of the transition pair evaluates to true in the current state
-of the system (the first half of the state pair).
-<H4>Examples
-</H4>
-<br>&#32;<br>
-To manually translate an LTL formula into a
-<TT>never</TT>
-claim (e.g. foregoing the builtin translation that <small>SPIN</small>
-offers), we must carefully consider whether the
-formula expresses a positive or a negative property.
-A positive property expresses a good behavior that we
-would like our system to have.
-A negative property expresses a bad behavior that we
-claim the system does not have.
-A
-<TT>never</TT>
-claim can express only negative claims, not positive ones.
-Fortunately, the two are exchangeable:  if we want to express
-that a good behavior is unavoidable, we can formalize all
-ways in which the good behavior could be violated, and express
-that in the
-<TT>never</TT>
-claim.
-<br>&#32;<br>
-Suppose that the LTL formula &#186;&#164;
-<TT>p</TT>,
-with
-<TT>p</TT>
-a <small>PROMELA</small> expression, expresses a negative claim
-(i.e., it is considered a correctness violation if
-there exists any execution sequence in which
-<TT>p</TT>
-can eventually remain true infinitely long).
-This can be written in a
-<TT>never</TT>
-claim as:
-<DL><DT><DD><TT><PRE>
-never {	/* &lt;&gt;[]p */
-	do
-	:: skip	/* after an arbitrarily long prefix */
-	:: p -&gt; break	/* p becomes true */
-	od;
-accept:	do
-	:: p	/* and remains true forever after */
-	od
-}
-</PRE></TT></DL>
-Note that in this case the claim does not terminate, and
-also does not necessarily match all system behaviors.
-It is sufficient if it precisely captures all violations
-of our correctness requirement, and no more.
-<br>&#32;<br>
-If the LTL formula expressed a positive property, we first
-have to invert it to the corresponding negative property
-<TT>&#186;!p</TT>
-and translate that into a
-<TT>never</TT>
-claim.
-The requirement now says that it is a violation if
-<TT>p</TT>
-does not hold infinitely long.
-<DL><DT><DD><TT><PRE>
-never {	/* &lt;&gt;!p*/
-	do
-	:: skip
-	:: !p -&gt; break
-	od
-}
-</PRE></TT></DL>
-We have used the implicit match of a claim upon reaching the
-closing terminating brace.
-Since the first violation of the property suffices to disprove
-it, we could also have written:
-<DL><DT><DD><TT><PRE>
-never {	/* &lt;&gt;!p*/
-	do
-	:: p
-	:: !p -&gt; break
-	od
-}
-</PRE></TT></DL>
-or, if we abandon the connection with LTL for a moment,
-even more tersely as:
-<DL><DT><DD><TT><PRE>
-never { do :: assert(p) od }
-</PRE></TT></DL>
-Suppose we wish to express that it is a violation of our
-correctness requirements if there exists any execution in
-the system where
-<TT>&#164; (p -&gt; &#186; q)</TT>
-is violated (i.e., the negation of this formula is satisfied).
-The following
-<TT>never</TT>
-claim expresses that property:
-<DL><DT><DD><TT><PRE>
-never {
-	do
-	:: skip
-	:: p &amp;&amp; !q -&gt; break
-	od;
-accept:
-	do
-	:: !q
-	od
-}
-</PRE></TT></DL>
-Note that using
-<TT>(!p || q)</TT>
-instead of
-<TT>skip</TT>
-in the first repetition construct would imply a check for just
-the first occurrence of proposition
-<TT>p</TT>
-becoming true in the execution sequence, while
-<TT>q</TT>
-is false.
-The above formalization checks for all occurrences, anywhere in a trace.
-<br>&#32;<br>
-Finally, consider a formalization of the LTL property
-<TT>&#164; (p -&gt; (q U r))</TT>.
-The corresponding claim is:
-<DL><DT><DD><TT><PRE>
-never {
-	do
-	:: skip		/* to match any occurrence */
-	:: p &amp;&amp;  q &amp;&amp; !r -&gt; break
-	:: p &amp;&amp; !q &amp;&amp; !r -&gt; goto error
-	od;
-	do
-	::  q &amp;&amp; !r
-	:: !q &amp;&amp; !r -&gt; break
-	od;
-error:	skip
-}
-</PRE></TT></DL>
-Note again the use of
-<TT>skip</TT>
-instead of
-<TT>(!p || r)</TT>
-to avoid matching just the first occurrence of
-<TT>(p && !r)</TT>
-in a trace.
-<H4>1.4 Predefined Variables and Functions
-</H4>
-<br>&#32;<br>
-The following predefined variables and functions
-can be especially useful in
-<TT>never</TT>
-claims.
-<br>&#32;<br>
-The predefined variables are:
-<TT>_pid</TT>
-and
-<TT>_last</TT>.
-<br>&#32;<br>
-<TT>_pid</TT>
-is a predefined local variable in each process
-that holds the unique instantiation number for
-that process.
-It is always a non-negative number.
-<br>&#32;<br>
-<TT>_last</TT>
-is a predefined global variable that always holds the
-instantiation number of the process that performed the last
-step in the current execution sequence.
-Its value is not part of the system state unless it is
-explicitly used in a specification.
-<DL><DT><DD><TT><PRE>
-never {
-	/* it is not possible for the process with pid=1
-	 * to execute precisely every other step forever
-	 */
-accept:
-	do
-	:: _last != 1 -&gt; _last == 1
-	od
-}
-</PRE></TT></DL>
-The initial value of
-<TT>_last</TT>
-is zero.
-<br>&#32;<br>
-Three predefined functions are specifically intended to be used in
-<TT>never</TT>
-claims, and may not be used elsewhere in a model:
-<TT>pc_value(pid)</TT>,
-<TT>enabled(pid)</TT>,
-<TT>procname[pid]@label</TT>.
-<br>&#32;<br>
-The function
-<TT>pc_value(pid)</TT>
-returns the current control state
-of the process with instantiation number
-<TT>pid</TT>,
-or zero if no such process exists.
-<br>&#32;<br>
-Example:
-<DL><DT><DD><TT><PRE>
-never {
-	/* Whimsical use: claim that it is impossible
-	 * for process 1 to remain in the same control
-	 * state as process 2, or one with smaller value.
-	 */
-accept:	do
-	:: pc_value(1) &lt;= pc_value(2)
-	od
-}
-</PRE></TT></DL>
-The function
-<TT>enabled(pid)</TT>
-tells whether the process with instantiation number
-<TT>pid</TT>
-has an executable statement that it can execute next.
-<br>&#32;<br>
-Example:
-<DL><DT><DD><TT><PRE>
-never {
-	/* it is not possible for the process with pid=1
-	 * to remain enabled without ever executing
-	 */
-accept:
-	do
-	:: _last != 1 &amp;&amp; enabled(1)
-	od
-}
-</PRE></TT></DL>
-The last function
-<TT>procname[pid]@label</TT>
-tells whether the process with instantiation number
-<TT>pid</TT>
-is currently in the state labeled with
-<TT>label</TT>
-in
-<TT>proctype procname</TT>.
-It is an error if the process referred to is not an instantiation
-of that proctype.
-<H4>2 Verifications with <small>SPIN</small>
-</H4>
-<br>&#32;<br>
-The easiest way to use <small>SPIN</small> is probably on a Windows terminal
-with the Tcl/Tk implementation of <small>XSPIN</small>.
-All functionality of <small>SPIN</small>, however, is accessible from
-any plain ASCII terminal, and there is something to be
-said for directly interacting with the tool itself.
-<br>&#32;<br>
-The description in this paper gives a short walk-through of
-a common mode of operation in using the verifier.
-A more tutorial style description of the verification
-process can be found in [Ho93].
-More detail on the verification of large systems with the
-help of <small>SPIN</small>'s supertrace (bitstate) verification algorithm
-can be found in [Ho95].
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DD>
-<br>
-*
-Random and interactive simulations (section 2.1),
-<br>
-*
-Generating a verifier (section 2.2),
-<br>
-*
-Compilation for different types of searches (section 2.3),
-<br>
-*
-Performing the verification (section 2.4),
-<br>
-*
-Inspecting error traces produced by the verifier (section 2.5),
-<br>
-*
-Exploiting partial order reductions (section 2.6).
-</DL>
-</dl>
-<br>&#32;<br>
-<H4>2.1 Random and Interactive Simulations
-</H4>
-<br>&#32;<br>
-Given a model in <small>PROMELA</small>, say stored in a file called
-<TT>spec</TT>,
-the easiest mode of operation is to perform a random simulation.
-For instance,
-<DL><DT><DD><TT><PRE>
-spin -p spec
-</PRE></TT></DL>
-tells <small>SPIN</small> to perform a random simulation, while printing the
-process moves selected for execution at each step (by default
-nothing is printed, other than explicit
-<TT>printf</TT>
-statements that appear in the model itself).
-A range of options exists to make the traces more verbose,
-e.g., by adding printouts of local variables (add option
-<TT>-l</TT>),
-global variables (add option
-<TT>-g</TT>),
-send statements (add option
-<TT>-s</TT>),
-or receive statements (add option
-<TT>-r</TT>).
-Use option
-<TT>-n</TT>N
-(with N any number) to fix the seed on <small>SPIN</small>'s internal
-random number generator, and thus make the simulation runs
-reproducible.
-By default the current time is used to seed the random number
-generator.
-For instance:
-<DL><DT><DD><TT><PRE>
-spin -p -l -g -r -s -n1 spec
-</PRE></TT></DL>
-<br>&#32;<br>
-If you don't like the system randomly resolving non-deterministic
-choices for you, you can select an interactive simulation:
-<DL><DT><DD><TT><PRE>
-spin -i -p spec
-</PRE></TT></DL>
-In this case you will be offered a menu with choices each time
-the execution could proceed in more than one way.
-<br>&#32;<br>
-Simulations, of course, are intended primarily for the
-debugging of a model.  They cannot prove anything about it.
-Assertions will be evaluated during simulation runs, and
-any violations that result will be reported, but none of
-the other correctness requirements can be checked in this way.
-<H4>2.2 Generating the Verifier
-</H4>
-<br>&#32;<br>
-A model-specific verifier is generated as follows:
-<DL><DT><DD><TT><PRE>
-spin -a spec
-</PRE></TT></DL>
-This generates a C program in a number of files (with names
-starting with
-<TT>pan</TT>).
-<H4>2.3 Compiling the Verifier
-</H4>
-<br>&#32;<br>
-At this point it is good to know the physical limitations of
-the computer system that you will run the verification on.
-If you know how much physical (not virtual) memory your system
-has, you can take advantage of that.
-Initially, you can simply compile the verifier for a straight
-exhaustive verification run (constituting the strongest type
-of proof if it can be completed).
-Compile as follows.
-<DL><DT><DD><TT><PRE>
-pcc -o pan pan.c		# standard exhaustive search
-</PRE></TT></DL>
-If you know a memory bound that you want to restrict the run to
-(e.g., to avoid paging), find the nearest power of 2 (e.g., 23
-for the bound 2^&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; bytes) and compile as follows.
-<DL><DT><DD><TT><PRE>
-pcc '-DMEMCNT=23' -o pan pan.c
-</PRE></TT></DL>
-or equivalently in terms of MegaBytes:
-<DL><DT><DD><TT><PRE>
-pcc '-DMEMLIM=8' -o pan pan.c
-</PRE></TT></DL>
-If the verifier runs out of memory before completing its task,
-you can decide to increase the bound or to switch to a frugal
-supertrace verification.  In the latter case, compile as follows.
-<DL><DT><DD><TT><PRE>
-pcc -DBITSTATE -o pan pan.c
-</PRE></TT></DL>
-<H4>2.4 Performing the Verification
-</H4>
-<br>&#32;<br>
-There are three specific decisions to make to
-perform verifications optimally: estimating the
-size of the reachable state space (section 2.4.1),
-estimating the maximum length of a unique execution
-sequence (2.4.2), and selecting the type of correctness
-property (2.4.3).
-No great harm is done if the estimates from the first two
-steps are off.  The feedback from the verifier usually provides
-enough clues to determine quickly what the optimal settings
-for peak performance should be.
-<H4>2.4.1 Reachable States
-</H4>
-<br>&#32;<br>
-For a standard exhaustive run, you can override the default choice
-for the size for the hash table (2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;18&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; slots) with option
-<TT>-w</TT>.
-For instance,
-<DL><DT><DD><TT><PRE>
-pan -w23
-</PRE></TT></DL>
-selects 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; slots.
-The hash table size should optimally be roughly equal to the number of
-reachable states you expect (within say a factor of two or three).
-Too large a number merely wastes memory, too low a number wastes
-CPU time, but neither can affect the correctness of the result.
-<br>&#32;<br>
-For a supertrace run, the hash table <I>is</I> the memory arena, and
-you can override the default of 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;22&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; bits with any other number.
-Set it to the maximum size of physical memory you can grab without
-making the system page, again within a factor of say two or three.
-Use, for instance
-<TT>-w23</TT>
-if you expect 8 million reachable states and have access to at least
-8 million (2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt;) bits of memory (i.e., 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;20&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; or 1 Megabyte of RAM).
-<H4>2.4.2 Search Depth
-</H4>
-<br>&#32;<br>
-By default the analyzers have a search depth restriction of 10,000 steps.
-If this isn't enough, the search will truncate at 9,999 steps (watch for
-it in the printout).
-Define a different search depth with the -m flag.
-<DL><DT><DD><TT><PRE>
-pan -m100000
-</PRE></TT></DL>
-If you exceed also this limit, it is probably good to take some
-time to consider if the model you have specified is indeed finite.
-Check, for instance, if no unbounded number of processes is created.
-If satisfied that the model is finite, increase the search depth at
-least as far as is required to avoid truncation completely.
-<br>&#32;<br>
-If you find a particularly nasty error that takes a large number of steps
-to hit, you may also set lower search depths to find the shortest variant
-of an error sequence.
-<DL><DT><DD><TT><PRE>
-pan -m40
-</PRE></TT></DL>
-Go up or down by powers of two until you find the place where the
-error first appears or disappears and then home in on the first
-depth where the error becomes apparent, and use the error trail of
-that verification run for guided simulation.
-<br>&#32;<br>
-Note that if a run with a given search depth fails to find
-an error, this does not necessarily mean that no violation of a
-correctness requirement is possible within that number of steps.
-The verifier performs its search for errors by using a standard
-depth-first graph search.  If the search is truncated at N steps,
-and a state at level N-1 happens to be reachable also within fewer
-steps from the initial state, the second time it is reached it
-will not be explored again, and thus neither will its successors.
-Those successors may contain errors states that are reachable within
-N steps from the initial state.
-Normally, the verification should be run in such a way that no
-execution paths can be truncated, but to force the complete exploration
-of also truncated searches one can override the defaults with a compile-time
-flag
-<TT>-DREACH</TT>.
-When the verifier is compiled with that additional directive, the depth at
-which each state is visited is remembered, and a state is now considered
-unvisited if it is revisited via a shorter path later in the search.
-(This option cannot be used with a supertrace search.)
-<H4>2.4.3 Liveness or Safety Verification
-</H4>
-<br>&#32;<br>
-For the last, and perhaps the most critical, runtime decision:
-it must be decided if the system is to be checked for safety
-violations or for liveness violations.
-<DL><DT><DD><TT><PRE>
-pan -l	# search for non-progress cycles
-pan -a	# search for acceptance cycles
-</PRE></TT></DL>
-(In the first case, though, you must compile pan.c with -DNP as an
-additional directive. If you forget, the executable will remind you.)
-If you don't use either of the above two options, the default types of
-correctness properties are checked (assertion violations,
-completeness, race conditions, etc.).
-Note that the use of a
-<TT>never</TT>
-claim that contains
-<TT>accept</TT>
-labels requires the use of the
-<TT>-a</TT>
-flag for complete verification.
-<br>&#32;<br>
-Adding option
-<TT>-f</TT>
-restricts the search for liveness properties further under
-a standard <I>weak fairness</I> constraint:
-<DL><DT><DD><TT><PRE>
-pan -f -l	# search for weakly fair non-progress cycles
-pan -f -a	# search for weakly fair acceptance cycles
-</PRE></TT></DL>
-With this constraint, each process is required to appear
-infinitely often in the infinite trace that constitutes
-the violation of a liveness property (e.g., a non-progress cycle
-or an acceptance cycle), unless it is permanently blocked
-(i.e., has no executable statements after a certain point in
-the trace is reached).
-Adding the fairness constraint increases the time complexity
-of the verification by a factor that is linear in the number
-of active processes.
-<br>&#32;<br>
-By default, the verifier will report on unreachable code in
-the model only when a verification run is successfully
-completed.
-This default behavior can be turned off with the runtime option
-<TT>-n</TT>,
-as in:
-<DL><DT><DD><TT><PRE>
-pan -n -f -a
-</PRE></TT></DL>
-(The order in which the options such as these are listed is
-always irrelevant.)
-A brief explanation of these and other runtime options can
-be determined by typing:
-<DL><DT><DD><TT><PRE>
-pan --
-</PRE></TT></DL>
-<H4>2.5 Inspecting Error Traces
-</H4>
-<br>&#32;<br>
-If the verification run reports an error,
-any error, </big></big></big><small>SPIN</small> dumps an error trail into a file named
-<TT>spec.trail</TT>,
-where
-<TT>spec</TT>
-is the name of your original <small>PROMELA</small> file.
-To inspect the trail, and determine the cause of the error,
-you must use the guided simulation option.
-For instance:
-<DL><DT><DD><TT><PRE>
-spin -t -c spec
-</PRE></TT></DL>
-gives you a summary of message exchanges in the trail, or
-<DL><DT><DD><TT><PRE>
-spin -t -p spec
-</PRE></TT></DL>
-gives a printout of every single step executed.
-Add as many extra or different options as you need to pin down the error:
-<DL><DT><DD><TT><PRE>
-spin -t -r -s -l -g spec
-</PRE></TT></DL>
-Make sure the file
-<TT>spec</TT>
-didn't change since you generated the analyzer from it.
-<br>&#32;<br>
-If you find non-progress cycles, add or delete progress labels
-and repeat the verification until you are content that you have found what
-you were looking for.
-<br>&#32;<br>
-If you are not interested in the first error reported,
-use pan option
-<TT>-c</TT>
-to report on specific others:
-<DL><DT><DD><TT><PRE>
-pan -c3
-</PRE></TT></DL>
-ignores the first two errors and reports on the third one that
-is discovered.
-If you just want to count all errors and not see them, use
-<DL><DT><DD><TT><PRE>
-pan -c0
-</PRE></TT></DL>
-<H4>State Assignments
-</H4>
-<br>&#32;<br>
-Internally, the verifiers produced by <small>SPIN</small> deal with a formalization of
-a <small>PROMELA</small> model in terms of extended finite state machines.
-<small>SPIN</small> therefore assigns state numbers to all statements in the model.
-The state numbers are listed in all the relevant output to make it
-completely unambiguous (source line references unfortunately do not
-have that property).
-To confirm the precise state assignments, there is a runtime option
-to the analyzer generated:
-<DL><DT><DD><TT><PRE>
-pan -d	# print state machines
-</PRE></TT></DL>
-which will print out a table with all state assignments for each
-<TT>proctype</TT>
-in the model.
-<H4>2.6 Exploiting Partial Order Reductions
-</H4>
-<br>&#32;<br>
-The search algorithm used by <small>SPIN</small> is optimized
-according to the rules of a partial order theory explained in [HoPe94].
-The effect of the reduction, however, can be increased considerably if the verifier
-has extra information about the access of processes to global
-message channels.
-For this purpose, there are two keywords in the language that
-allow one to assert that specific channels are used exclusively
-by specific processes.
-For example, the assertions
-<DL><DT><DD><TT><PRE>
-xr q1;
-xs q2;
-</PRE></TT></DL>
-claim that the process that executes them is the <I>only</I> process
-that will receive messages from channel
-<TT>q1</TT>,
-and the <I>only</I> process that will send messages to channel
-<TT>q2</TT>.
-<br>&#32;<br>
-If an exclusive usage assertion turns out to be invalid, the
-verifier will be able to detect this, and report it as a violation
-of an implicit correctness requirement.
-<br>&#32;<br>
-Every read or write access to a message channel can introduce
-new dependencies that may diminish the maximum effect of the
-partial order reduction strategies.
-If, for instance, a process uses the
-<TT>len</TT>
-function to check the number of messages stored in a channel,
-this counts as a read access, which can in some cases invalidate
-an exclusive access pattern that might otherwise exist.
-There are two special functions that can be used to poll the
-size of a channel in a safe way that is compatible with the
-reduction strategy.
-<br>&#32;<br>
-The expression
-<TT>nfull(qname)</TT>
-returns true if channel
-<TT>qname</TT>
-is not full, and
-<TT>nempty(qname)</TT>
-returns true if channel
-<TT>qname</TT>
-contains at least one message.
-Note that the parser will not recognize the free form expressions
-<TT>!full(qname)</TT>
-and
-<TT>!empty(qname)</TT>
-as equally safe, and it will forbid constructions such as
-<TT>!nfull(qname)</TT>
-or
-<TT>!nempty(qname)</TT>.
-More detail on this aspect of the reduction algorithms can be
-found in [HoPe94].
-<H4>Keywords
-</H4>
-<br>&#32;<br>
-For reference, the following table contains all the keywords,
-predefined functions, predefined variables, and
-special label-prefixes of the language <small>PROMELA</small>,
-and refers to the section of this paper in
-which they were discussed.
-<br><img src="-.19126692.gif"><br>
-<H4>References
-</H4>
-<br>&#32;<br>
-[Ho91]
-G.J. Holzmann,
-Design and Validation of Computer Protocols,
-Prentice Hall, 1991.
-<br>&#32;<br>
-[Ho93]
-G.J. Holzmann, ``Tutorial: Design and Validation of Protocols,''
-Computer Networks and ISDN Systems,
-1993, Vol. 25, No. 9, pp. 981-1017.
-<br>&#32;<br>
-[HoPe94]
-G.J. Holzmann and D.A. Peled, ``An improvement in
-formal verification,''
-Proc. 7th Int. Conf. on Formal Description Techniques,
-FORTE94, Berne, Switzerland. October 1994.
-<br>&#32;<br>
-[Ho95]
-G.J. Holzmann, ``An Analysis of Bitstate Hashing,''
-technical report 2/95, available from author.
-<br>&#32;<br>
-[HS99]
-G.J. Holzmann, ``Software model checking: extracting
-verification models from source code,''
-Proc. Formal Methods in Software Engineering and Distributed
-Systems,
-PSTV/FORTE99, Beijng, China, Oct. 1999, Kluwer,pp. 481-497.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 3348
sys/doc/troff.html

@@ -1,3348 +0,0 @@
-<html>
-
-
-
-
-
-
-
-
-<title>
--
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Troff User's Manual
-</H1>
-<DL><DD><I>Joseph F. Ossanna<br>
-Brian W. Kernighan<br>
-<br>&#32;<br>
-bwk@research.bell-labs.com<br>
-</I></DL>
-<H4>Introduction
-</H4>
-<P>
-<I>Troff</I> and <I>nroff</I> are text processors 
-that format text for typesetter- and
-typewriter-like terminals, respectively.
-They accept lines of text interspersed with lines of
-format control information and
-format the text into a printable, paginated document
-having a user-designed style.
-<I>Troff</I> and <I>nroff</I> offer
-unusual freedom in document styling:
-arbitrary style headers and footers;
-arbitrary style footnotes;
-multiple automatic sequence numbering for paragraphs, sections, etc;
-multiple column output;
-dynamic font and point-size control;
-arbitrary horizontal and vertical local motions at any point;
-and
-a family of automatic overstriking, bracket construction, and
-line-drawing functions.
-
-
-</P>
-<P>
-<I>Troff</I>
-produces its output in a device-independent form,
-although parameterized for a specific device;
-<I>troff</I> output must be processed by a driver for that
-device to produce printed output.
-</P>
-<P>
-<I>Troff</I> and <I>nroff</I> are highly compatible with each other and it is almost always
-possible to prepare input acceptable to both.
-Conditional input is provided to enable
-the user to embed input expressly destined for either program.
-<I>Nroff</I> can prepare output directly for a variety of terminal types and
-is capable of utilizing the full resolution of each terminal.
-<I>Nroff</I> is the same program as <I>troff</I>; in fact, on Plan 9 
-<I>nroff</I> is a shell script that calls <I>troff</I> with the
-argument.
-</P>
-<H4>Background to the Plan 9 Edition
-</H4>
-<P>
-The primary change to <I>troff</I> and <I>nroff</I> for Plan 9 is
-support of the Unicode Standard, which was added during
-1992 and 1993.  There are two results.  First, there is much
-less need for the myriad of two-character names that are so
-much a part of <I>troff</I> lore; in Plan 9, for example, one naturally uses the
-Unicode character &#189; instead of <I>troff</I>'s
-Second, the output device, though called
-is almost always a form of PostScript printer;
-the panoply of special drivers for different typesetters
-has largely disappeared.
-Unfortunately, not all PostScript printers can cope
-with Unicode characters, so there remains a need for
-programs that synthesize PostScript characters from bitmaps;
-this is especially true for Asian languages.
-</P>
-<H4>Background to the Second Edition
-</H4>
-<P>
-<I>Troff</I>
-was originally written by the late Joe Ossanna
-in about 1973, in assembly language for the
-PDP-11,
-to drive the Graphic Systems CAT typesetter.
-It was rewritten in C around 1975,
-and underwent slow but steady evolution until
-Ossanna's death late in 1977.
-</P>
-<P>
-In 1979, Brian Kernighan
-modified
-<I>troff</I>
-so that it would produce output for a variety of typesetters,
-while retaining its input specifications.
-Over the decade from 1979 to 1989,
-the internals
-have been modestly revised,
-though much of the code remains as it was when Ossanna wrote it.
-</P>
-<P>
-<I>Troff</I>
-reads parameter files
-each time it is invoked, to
-set values for machine resolution,
-legal type sizes and fonts, and character names,
-character widths
-and the like.
-<I>Troff</I>
-output is
-ASCII
-characters
-in a simple language
-that describes where each character is to be placed
-and in what size and font.
-A post-processor must be written for each device
-to convert this typesetter-independent language
-into specific instructions for that device.
-</P>
-<P>
-The output language contains information that was not readily
-identifiable in the older output.
-In the newer language, the beginning of each page, line, and word
-is marked,
-so post-processors can do device-specific optimizations
-such as sorting the data vertically or printing it boustrophedonically,
-independent of
-<I>troff</I>.
-</P>
-<P>
-Capabilities for graphics have been added:
-<I>troff</I>
-recognizes commands for drawing diagonal lines,
-circles, ellipses, circular arcs,
-and quadratic B-splines.
-There are also ways to pass arbitrary information to the output,
-unprocessed by
-<I>troff</I>.
-</P>
-<P>
-A number of limitations have been eased or eliminated.
-A document may have an arbitrary number of fonts on any page
-(if the output device permits it, of course).
-Fonts may be accessed merely by naming them;
-``mounting'' is no longer necessary.
-There are no limits on the number of characters.
-Character height and slant may be set
-independently of width.
-</P>
-<P>
-The remainder of this document contains a description of
-usage and command-line options;
-a summary of requests, escape sequences, and pre-defined number registers;
-a reference manual;
-tutorial examples;
-and a list of commonly-available characters.
-</P>
-<H4>Acknowledgements
-</H4>
-<P>
-Joe Ossanna's
-<I>troff</I>
-remains a remarkable accomplishment.
-For more than twenty years, it has proven a robust tool,
-taking unbelievable abuse from a variety of preprocessors
-and being forced into uses that were never conceived of
-in the original design,
-all with considerable grace under fire.
-</P>
-<P>
-Recent versions of <I>troff</I> have profited from
-significant code improvements by
-Jaap Akkerhuis, Dennis Ritchie, Ken Thompson, and Molly Wagner.
-UTF facilities owe much to Jaap Akkerhuis.
-Andrew Hume, Doug McIlroy, Peter Nelson and Ravi Sethi made valuable suggestions on the manual.
-I fear that the remaining bugs are my fault.
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-</P>
-<br>&#32;<br>
-<B>Usage
-</B><P>
-<I>Troff</I> or <I>nroff</I> is invoked as
-<DL><DT><DD><TT><PRE>
-troff  <I>options  files</I>
-nroff  <I>options  files</I>
-</PRE></TT></DL>
-where <I>options</I> represents any of a number of option arguments
-and <I>files</I> represents the list of files containing the document
-to be formatted.
-An argument consisting of a single minus
-represents standard input.
-If no filenames are given input is taken from the standard input.
-The options, which may appear in any order so long as they appear
-before the files, are:
-<br><img src="-.16251.gif"><br>
-</P>
-<P>
-Each option is a separate argument;
-for example,
-<DL><DT><DD><TT><PRE>
-troff -Tutf -ms -mpictures -o4,6,8-10 <I>file1 file2</I>
-</PRE></TT></DL>
-requests formatting of pages 4, 6, and 8 through 10 of a document contained in the files
-named <I>file1</I> and <I>file2</I>,
-specifies the output in UTF,
-and invokes the macro packages
-and
-</P>
-<P>
-Various pre- and post-processors are available for use with <I>nroff</I> and <I>troff</I>.
-These include the equation preprocessor
-<I>eqn</I>
-(for <I>troff</I> only),
-the table-construction preprocessor
-<I>tbl</I>,
-and
-<I>pic</I>
-and
-<I>grap</I>
-for various forms of graphics.
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-</P>
-<br>&#32;<br>
-<B>Request Summary
-</B><P>
-In the following table,
-the notation &#177;<I>N</I> in the
-<B><I>column means that the forms </I></B><I>N</I><B><I>, </I></B><I>+N</I><B><I>, or </I></B><I>-N</I><B><I> are permitted,
-to set the parameter to </I></B><I>N</I><B><I>, increment it by </I></B><I>N</I><B><I>, or decrement it by </I></B><I>N</I><B><I>,
-respectively.
-Plain </I></B><I>N</I><B><I> means that the value is used to set the parameter.
-</I></B><B><I>separated by 
-</I></B>are for
-<I>troff</I>
-and
-<I>nroff</I>
-respectively.
-In the 
-<B><I>column,
-<br><img src="-.16252.gif"><br>
-<br>&#32;<br>
-<br><img src="-.16253.gif"><br>
-<br>
-<DL><DT><DD><TT><PRE>
-</I></B><TT>ab</TT><B><I>	20
-</I></B><TT>ad</TT><B><I>	4
-</I></B><TT>af</TT><B><I>	8
-</I></B><TT>am</TT><B><I>	7
-</I></B><TT>as</TT><B><I>	7
-</I></B><TT>bd</TT><B><I>	2
-</I></B><TT>bp</TT><B><I>	3
-</I></B><TT>br</TT><B><I>	4
-</I></B><TT>c2</TT><B><I>	10
-</I></B><TT>cc</TT><B><I>	10
-</I></B><TT>ce</TT><B><I>	4
-</I></B><TT>cf</TT><B><I>	19
-</I></B><TT>ch</TT><B><I>	7
-</I></B><TT>cs</TT><B><I>	2
-</I></B><TT>cu</TT><B><I>	10
-</I></B><TT>da</TT><B><I>	7
-</I></B><TT>de</TT><B><I>	7
-</I></B><TT>di</TT><B><I>	7
-</I></B><TT>ds</TT><B><I>	7
-</I></B><TT>dt</TT><B><I>	7
-</I></B><TT>ec</TT><B><I>	10
-</I></B><TT>el</TT><B><I>	16
-</I></B><TT>em</TT><B><I>	7
-</I></B><TT>eo</TT><B><I>	10
-</I></B><TT>ev</TT><B><I>	17
-</I></B><TT>ex</TT><B><I>	18
-</I></B><TT>fc</TT><B><I>	9
-</I></B><TT>fi</TT><B><I>	4
-</I></B><TT>fl</TT><B><I>	20
-</I></B><TT>fp</TT><B><I>	2
-</I></B><TT>ft</TT><B><I>	2
-</I></B><TT>hc</TT><B><I>	13
-</I></B><TT>hw</TT><B><I>	13
-</I></B><TT>hy</TT><B><I>	13
-</I></B><TT>ie</TT><B><I>	16
-</I></B><TT>if</TT><B><I>	16
-</I></B><TT>ig</TT><B><I>	20
-</I></B><TT>in</TT><B><I>	6
-</I></B><TT>it</TT><B><I>	7
-</I></B><TT>lc</TT><B><I>	9
-</I></B><TT>lg</TT><B><I>	10
-</I></B><TT>lf</TT><B><I>	20
-</I></B><TT>ll</TT><B><I>	6
-</I></B><TT>ls</TT><B><I>	5
-</I></B><TT>lt</TT><B><I>	14
-</I></B><TT>mc</TT><B><I>	20
-</I></B><TT>mk</TT><B><I>	3
-</I></B><TT>na</TT><B><I>	4
-</I></B><TT>ne</TT><B><I>	3
-</I></B><TT>nf</TT><B><I>	4
-</I></B><TT>nh</TT><B><I>	13
-</I></B><TT>nm</TT><B><I>	15
-</I></B><TT>nn</TT><B><I>	15
-</I></B><TT>nr</TT><B><I>	8
-</I></B><TT>ns</TT><B><I>	5
-</I></B><TT>nx</TT><B><I>	19
-</I></B><TT>os</TT><B><I>	5
-</I></B><TT>pc</TT><B><I>	14
-</I></B><TT>pi</TT><B><I>	19
-</I></B><TT>pl</TT><B><I>	3
-</I></B><TT>pm</TT><B><I>	20
-</I></B><TT>pn</TT><B><I>	3
-</I></B><TT>po</TT><B><I>	3
-</I></B><TT>ps</TT><B><I>	2
-</I></B><TT>rd</TT><B><I>	18
-</I></B><TT>rm</TT><B><I>	7
-</I></B><TT>rn</TT><B><I>	7
-</I></B><TT>rr</TT><B><I>	8
-</I></B><TT>rs</TT><B><I>	5
-</I></B><TT>rt</TT><B><I>	3
-</I></B><TT>so</TT><B><I>	19
-</I></B><TT>sp</TT><B><I>	5
-</I></B><TT>ss</TT><B><I>	2
-</I></B><TT>sv</TT><B><I>	5
-</I></B><TT>sy</TT><B><I>	19
-</I></B><TT>ta</TT><B><I>	9
-</I></B><TT>tc</TT><B><I>	9
-</I></B><TT>ti</TT><B><I>	6
-</I></B><TT>tl</TT><B><I>	14
-</I></B><TT>tm</TT><B><I>	20
-</I></B><TT>tr</TT><B><I>	10
-</I></B><TT>uf</TT><B><I>	10
-</I></B><TT>ul</TT><B><I>	10
-</I></B><TT>vs</TT><B><I>	5
-</I></B><TT>wh</TT><B><I>	7
-<br>&#32;<br>
-</PRE></TT></DL>
-</P>
-</I></B><br>&#32;<br>
-<B>Alphabetical Request and Section Number Cross Reference
-</B><br>&#32;<br>
-<br>&#32;<br>
-<DL><DT><DD><TT><PRE>
-<br>&#32;<br>
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-</PRE></TT></DL>
-<br>&#32;<br>
-<B>Escape Sequences for Characters, Indicators, and Functions
-</B><br>&#32;<br>
-<br><img src="-.16254.gif"><br>
-<br>&#32;<br>
-The escape sequences
-and
-are interpreted in copy mode (&#167;7.2).
- 
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-<br>&#32;<br>
-<B>Predefined Number Registers
-</B><br>&#32;<br>
-<br><img src="-.16255.gif"><br>
-
-
-<br>&#32;<br>
-<B>Predefined Read-Only Number Registers
-</B><br>&#32;<br>
-<br><img src="-.16256.gif"><br>
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-<br>&#32;<br>
-<B>Reference Manual
-</B><H4>1 General Explanation
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Form of input.</I></B> 
-Input consists of <I>text lines</I>, which are destined to be printed,
-interspersed with <I>control lines</I>,
-which set parameters or otherwise control subsequent processing.
-Control lines begin with a <I>control character</I>&#173;
-normally <TT>.</TT> (period) or <TT>'</TT> (single quote)&#173;
-followed by a one or two character name that specifies
-a basic <I>request</I> or the substitution of
-a user-defined <I>macro</I> in place of the control line.
-The control character <TT>'</TT> suppresses the <I>break</I> function&#173;
-the forced output of a partially filled line&#173;
-caused by certain requests.
-The control character may be separated from the request/macro name by
-white space (spaces and/or tabs) for aesthetic reasons.
-Names should be followed by either
-space or newline.
-Control lines with unrecognized names are ignored.
-<P>
-Various special functions may be introduced anywhere in the input by
-means of an <I>escape</I> character, normally <TT>\</TT>.
-For example, the function
-causes the interpolation of the contents of the
-<I>number register R</I>
-in place of the function;
-here <I>R</I> is either a single character name
-as in <TT>\n</TT><I>x</I>,
-or a two-character name introduced by
-a left-parenthesis, as in <TT>\n(</TT><I>xx</I>.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Formatter and device resolution.</I></B> 
-<I>Troff</I> internally stores and processes dimensions in units that correspond to
-the particular device for which output is being prepared;
-values from 300 to 1200/inch are typical.
-See &#167;23.
-<I>Nroff</I> internally uses 240 units/inch,
-corresponding to the least common multiple of the
-horizontal and vertical resolutions of various
-typewriter-like output devices.
-<I>Troff</I> rounds horizontal/vertical numerical parameter input to the actual
-horizontal/vertical resolution of the output device indicated by the <TT>-T</TT> option
-(default
-<I>Nroff</I> similarly rounds numerical input to the actual resolution
-of its output device
-(default Model 37 Teletype).
-<br>&#32;<br>
-<B><I>0.0s.  Numerical parameter input.</I></B> 
-Both <I>nroff</I> and <I>troff</I>
-accept numerical input with the appended scale
-indicators
-shown in the following table,
-where
-<I>S</I> is the current type size in points and
-<I>V</I> is the current vertical line spacing in
-basic units.
-<br><img src="-.16257.gif"><br>
-In <I>nroff</I>, both the em and the en are taken to be equal to the
-nominal character width,
-which is output-device dependent;
-common values are 1/10 and 1/12 inch.
-Actual character widths in <I>nroff</I> need not be all the same and constructed characters
-such as -&#62; (->) are often extra wide.
-The default scaling is
-for the horizontally-oriented requests
-and functions
-and horizontal coordinates of
-for the vertically-oriented requests and functions
-and vertical coordinates of
-for the
-request;
-and
-for the requests
-and
-<I>All</I> other requests ignore any scale indicators.
-When a number register containing an already appropriately scaled number
-is interpolated to provide numerical input,
-the unit scale indicator
-<TT>u</TT> may need to be appended to prevent
-an additional inappropriate default scaling.
-The number, <I>N</I>, may be specified in decimal-fraction form
-but the parameter finally stored is rounded to an integer number of basic units.
-Internal computations are performed in integer arithmetic.
-<P>
-The <I>absolute position</I> indicator <TT>|</TT> may be prefixed
-to a number <I>N</I>
-to generate the distance to the vertical or horizontal place <I>N</I>.
-For vertically-oriented requests and functions, <TT>|</TT><I>N</I>
-becomes the distance in basic units from the current vertical place on the page or in a <I>diversion</I> (&#167;7.4)
-to the vertical place <I>N</I>.
-For <I>all</I> other requests and functions,
-<TT>|</TT><I>N</I>
-becomes the distance from
-the current horizontal place on the <I>input</I> line to the horizontal place <I>N</I>.
-For example,
-<DL><DT><DD><TT><PRE>
-.sp |3.2c
-</PRE></TT></DL>
-will space in the required direction to 3.2 centimeters from the top of the page.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Numerical expressions.</I></B> 
-Wherever numerical input is expected,
-an expression involving parentheses,
-the arithmetic operators <TT>+</TT>, <TT>-</TT>, <TT>/</TT>, <TT>*</TT>, <TT>%</TT> (mod),
-and the logical operators
-<TT>&#60;</TT>,
-<TT>&#62;</TT>,
-<TT>&#60;=</TT>,
-<TT>&#62;=</TT>,
-<TT>=</TT> (or <TT>==</TT>),
-<TT>&amp;</TT> (and),
-<TT>:</TT> (or)
-may be used.
-Except where controlled by parentheses, evaluation of expressions is left-to-right;
-there is no operator precedence.
-In the case of certain requests, an initial <TT>+</TT> or <TT>-</TT> is stripped
-and interpreted as an increment or decrement indicator respectively.
-In the presence of default scaling, the desired scale indicator must be
-attached to <I>every</I> number in an expression
-for which the desired and default scaling differ.
-For example,
-if the number register <TT>x</TT> contains 2
-and the current point size is 10,
-then
-<DL><DT><DD><TT><PRE>
-.ll (4.25i+\nxP+3)/2u
-</PRE></TT></DL>
-will set the line length to 1/2 the sum of 4.25 inches + 2 picas + 3 ems.
-<br>&#32;<br>
-<B><I>0.0s.  Notation.</I></B> 
-Numerical parameters are indicated in this manual in two ways.
-&#177;<I>N</I> means that the argument may take the forms <I>N</I>, <I>+N</I>, or <I>-N</I> and
-that the corresponding effect is to set the parameter
-to <I>N</I>, to increment it by <I>N</I>, or to decrement it by <I>N</I> respectively.
-Plain <I>N</I> means that an initial algebraic sign is <I>not</I>
-an increment indicator,
-but merely the sign of <I>N</I>.
-Generally, unreasonable numerical input is either ignored
-or truncated to a reasonable value.
-For example,
-most requests expect to set parameters to non-negative
-values;
-exceptions are
-and
-The requests
-and
-restore the previous parameter value in the absence
-of an argument.
-<P>
-Single character arguments are indicated by single lower case letters
-and
-one/two character arguments are indicated by a pair of lower case letters.
-Character string arguments are indicated by multi-character mnemonics.
-</P>
-<H4>2 Font and Character Size Control
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Character set.</I></B> 
-The <I>troff</I> character set is defined by a description file specific to each output device (&#167;23).
-There are normally several regular fonts and one or more special fonts.
-Characters are input as themselves,
-as <I></I><TT>&#191;TT><I>xx</I>, as <I></I><TT>C'</TT><I>name</I><I></I><TT>'</TT>,
-or as 
-The form
-permits a name of any length;
-the form
-refers to the <I>n</I>-th character on the current font,
-whether named or not.
-<P>
-Normally the input characters
-and
-are printed as `, ', and - respectively;
-and
-produce `, ', and -.
-If the character does not exist in the font, <I>troff</I> assumes the width is 1 em and
-outputs the character with a
-name as defined in Section 22.
-(This is independent of how the device handles characters unknown to it.)
-</P>
-<P>
-<I>Nroff</I> has an analogous, but different, mechanism for defining legal characters
-and how to print them.
-By default all characters are valid.
-There are such
-additional characters as may be available on
-the output device,
-such characters as may be constructed
-by overstriking or other combination,
-and those that can reasonably be mapped
-into other printable characters.
-The exact behavior is determined by a driving
-table prepared for each device.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Fonts.</I></B> 
-<I>Troff</I>
-begins execution by reading information for a set of defaults fonts,
-said to be
-<I>mounted</I>;
-conventionally, the first four are
-Times Roman (<TT>R</TT>),
-Times Italic
-(<TT>I</TT>),
-Times Bold
-(<TT>B</TT>),
-and
-Times Bold Italic
-(<TT>BI</TT>) ,
-and the last is a Special font
-containing miscellaneous characters.
-(This document uses Lucida Sans in place of Times.)
-The set of fonts and positions is determined by the device description file,
-described in &#167;23.
-<P>
-The current font, initially Roman, may be changed
-by the <TT>ft</TT> request,
-or by embedding at any desired point
-<TT>/TT><I>x</I>, <TT>TT><I>xx</I>, or <TT>/TT><I>N</I>,
-where
-<I>x</I> and <I>xx</I> are the name of a font
-and <I>N</I> is a numerical font position.
-</P>
-<P>
-It is not necessary to change to the Special font;
-characters on that font are automatically handled
-as if they were physically part of the current font.
-The Special font may actually be several fonts;
-the name
-is reserved and is generally used for one of these.
-All special fonts must be mounted after regular fonts.
-</P>
-<P>
-<I>Troff</I> can be informed that any particular font is mounted
-by use of the <TT>fp</TT> request.
-The list of known fonts is installation dependent.
-In the subsequent discussion of font-related requests,
-<I>F</I> represents either a one/two-character
-font name or the numerical font position.
-The current font is available (as a numerical position) in the read-only number register <TT>.f</TT>.
-</P>
-<P>
-A request for a named but not-mounted font is honored
-if the font description information exists.
-In this way, there is no limit on the number of fonts that may be printed
-in any part of a document.
-Mounted fonts may be handled more efficiently,
-and they may be referred to by their mount positions,
-but there is no other difference.
-Mention of an unmounted font loads it temporarily at font position
-zero, which serves as a one-font cache.
-</P>
-<P>
-The function
-causes the current font to be slanted by
-&#177;<I>N</I>
-degrees.
-Not all devices support slanting.
-</P>
-<P>
-<I>Nroff</I> understands font control
-and normally underlines italic characters (see &#167;10.5).
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Character size.</I></B> 
-Character point sizes available depend on the specific output device;
-a typical (historical) set of values is
-6, 7, 8, 9, 10, 11, 12, 14, 16, 18, 20, 22, 24, 28, and 36.
-This is a range of 1/12 inch to 1/2 inch.
-The <TT>ps</TT> request is used to change or restore the point size.
-Alternatively the point size may be changed between any two characters
-by embedding a
-at the desired point
-to set the size to <I>N</I>,
-or a
-</TT>(1<=<I>N</I><=9)
-to increment/decrement the size by <I>N</I>;
-restores the previous size.
-Requested point size values that are between two valid
-sizes yield the larger of the two.
-<P>
-Note that through an accident of history, a construction like
-is parsed as size 39, and thus converted to size 36 (given the sizes above),
-while
-is parsed as size 4 followed by
-The forms
-<I></I><TT></TT><I>nn</I> and <I></I><TT></TT>&#177;<I></I><TT>(</TT><I>nn</I>
-permit specification of sizes that would otherwise be ambiguous.
-</P>
-<P>
-The current size is available in the <TT>.s</TT> register.
-<I>Nroff</I> ignores type size requests.
-</P>
-<P>
-The function
-sets the height of the current font to
-<I>N</I>, or increments it by <I>+N</I>, or decrements it by <I>-N</I>;
-if <I>N=</I>0, the height is restored to the current point size.
-In each case, the width is unchanged.
-Not all devices support independent height and width for characters.
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> *The fields have the same meaning as described earlier in the Request Summary.
-</I><DT>&#32;<DD></dl>
-<br>
-<br>&#32;<br>
-<I>Request</I>	<I>Initial</I>	<I>If No</I>
-<br>
-<I>Form</I>	<I>Value</I>	<I>Argument</I>	<I>Notes</I>
-<br>&#32;<br>
-<TT>.ps</TT><I> &#177;N</I>*	10point	previous	E
-<DL COMPACT>
-<DT><DD>
-Point size
-set to &#177;<I>N</I>.
-Alternatively, embed
-or
-</TT>Any positive size value may be requested;
-if invalid, the next larger valid size will result, with a
-maximum of 36.
-A paired sequence
-<I>+N</I>, <I>-N</I>
-will work because the previous requested value is also remembered.
-Ignored in <I>nroff</I>.
-</dl>
-<br>&#32;<br>
-<TT>.ss</TT><I> N</I>	12/36em	ignored	E
-<DL COMPACT>
-<DT><DD>
-Space-character size
-(i.e., inter-word gap)
-is set to <I>N</I>/36 ems.
-This size is the minimum word spacing in adjusted text.
-Ignored in <I>nroff</I>.
-</dl>
-<br>&#32;<br>
-<TT>.cs</TT><I>FNM</I>	off	-	P
-<DL COMPACT>
-<DT><DD>
-Constant character space
-(width) mode is
-set on for font <I>F</I> (if mounted); the width of every character will be
-taken to be <I>N</I>/36 ems.
-If <I>M</I> is absent,
-the em is that of the character's point size;
-if <I>M</I> is given,
-the em is <I>M</I> points.
-All affected characters
-are centered in this space, including those with an actual width
-larger than this space.
-Special Font characters occurring while the current font
-is <I>F</I> are also so treated.
-If <I>N</I> is absent, the mode is turned off.
-The mode must be in effect when the characters are physically printed.
-Ignored in <I>nroff</I>.
-</dl>
-<br>&#32;<br>
-<TT>.bd</TT><I> F N</I>	off	-	P
-<DL COMPACT>
-<DT><DD>
-The characters in font <I>F</I> will be artificially
-emboldened by printing each one twice, separated by <I>N-</I>1 basic units.
-A reasonable value for <I>N</I> is 3 when the character size is near 10 points.
-If <I>N</I> is missing the embolden mode is turned off.
-The emboldening value <I>N</I> is in the <TT>.b</TT> register.
-<DT><DT>&#32;<DD>
-This paragraph is printed with <TT>.bd R 3</TT>.
-The mode must be in effect when the characters are physically printed.
-Ignored in <I>nroff</I>.
-<br>
-</dl>
-<br>&#32;<br>
-<TT>.bd S </TT><I>F N</I>	off	-	P
-<DL COMPACT>
-<DT><DD>
-The characters in the Special font
-will be emboldened whenever the current font is <I>F</I>.
-The mode must be in effect when the characters are physically printed.
-Ignored in <I>nroff</I>.
-</dl>
-<br>&#32;<br>
-<TT>.ft</TT> <I>F</I>	Roman	previous	E
-<DL COMPACT>
-<DT><DD>
-Font changed to
-<I>F</I>.
-Alternatively, embed
-The font name <TT>P</TT> is reserved to mean the previous font,
-and the name
-for the special font.
-</dl>
-<br>&#32;<br>
-<TT>.fp </TT><I>N F L</I>	R,I,B,...,S	ignored	-
-<DL COMPACT>
-<DT><DD>
-Font position.
-This is a statement
-that a font named <I>F</I> is associated with position <I>N</I>.
-It is a fatal error if <I>F</I> is not known.
-For fonts with names longer than two characters,
-<I>L</I>
-refers to the long name,
-and
-<I>F</I>
-becomes a synonym.
-There is generally a limit of about 10 mounted fonts.
-</dl>
-<H4>3 Page control
-</H4>
-<P>
-Top and bottom margins are not automatically provided;
-it is conventional to define two <I>macros</I> and to set <I>traps</I>
-for them at vertical positions 0 (top) and <I>-N</I> (distance <I>N</I> up from the bottom).
-See &#167;7 and Tutorial Examples &#167;T2.
-A pseudo-page transition onto the first page occurs
-either when the first <I>break</I> occurs or
-when the first <I>non-diverted</I> text processing occurs.
-Arrangements
-for a trap to occur at the top of the first page
-must be completed before this transition.
-In the following, references to the <I>current diversion</I> (&#167;7.4)
-mean that the mechanism being described works during both
-ordinary and diverted output (the former considered as the top diversion level).
-</P>
-<P>
-The limitations on <I>troff</I> and <I>nroff</I> output dimensions
-are device dependent.
-</P>
-<br>&#32;<br>
-<TT>.pl</TT><I> &#177;N</I>	11in	11in	<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Page length set to &#177;<I>N</I>.
-The current page length is available in the <TT>.p</TT> register.
-</dl>
-<br>&#32;<br>
-<TT>.bp</TT><I> &#177;N</I>	<I>N=</I>1	-	B,<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Begin page.
-The current page is ejected and a new page is begun.
-If &#177;<I>N</I> is given, the new page number will be &#177;<I>N</I>.
-Also see request <TT>ns</TT>.
-</dl>
-<br>&#32;<br>
-<TT>.pn</TT><I> &#177;N</I>	<I>N</I>=1	ignored	-
-<DL COMPACT>
-<DT><DD>
-Page number.
-The next page (when it occurs) will have the page number &#177;<I>N</I>.
-A <TT>pn</TT> must occur before the initial pseudo-page transition
-to affect the page number of the first page.
-The current page number is in the <TT>%</TT> register.
-</dl>
-<br>&#32;<br>
-<TT>.po</TT><I> &#177;N</I>	1in; 0	previous	<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Page offset.
-The current <I>left margin</I> is set to &#177;<I>N</I>.
-The <I>troff</I> initial value provides 1 inch of paper margin
-on a typical device.
-The current page offset is available in the <TT>.o</TT> register.
-</dl>
-<br>&#32;<br>
-<TT>.ne</TT><I> N</I>	-	<I>N=</I>1<I>V</I>	D,<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Need <I>N</I> vertical space.
-If the distance <I>D</I> to the next trap position (see &#167;7.5) is less than <I>N</I>,
-a forward vertical space of size <I>D</I> occurs,
-which will spring the trap.
-If there are no remaining
-traps on the page,
-<I>D</I> is the distance to the bottom of the page.
-If <I>D&#60;V</I>, another line could still be output
-and spring the trap.
-In a diversion, <I>D</I> is the distance to the <I>diversion trap</I>, if any,
-or is very large.
-</dl>
-<br>&#32;<br>
-<TT>.mk</TT><I> R</I>	none	internal	D
-<DL COMPACT>
-<DT><DD>
-Mark the current vertical place
-in an internal register (both associated with the current diversion level),
-or in register <I>R</I>, if given.
-See <TT>rt</TT> request.
-</dl>
-<br>&#32;<br>
-<TT>.rt</TT><I> &#177;N</I>	none	internal	D,<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Return <I>upward only</I> to a marked vertical place
-in the current diversion.
-If &#177;<I>N</I> (with respect to current place) is given,
-the place is &#177;<I>N</I> from the top of the page or diversion
-or, if <I>N</I> is absent, to a
-place marked by a previous <TT>mk</TT>.
-The <TT>sp</TT> request (&#167;5.3) may be used
-instead of <TT>rt</TT>
-by spacing to the absolute place stored in a explicit register,
-e.g., using
-<I>R</I> ...
-this also works when the motion is downwards.
-</dl>
-<H4>4 Text Filling, Adjusting, and Centering
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Filling and adjusting.</I></B> 
-Normally,
-words are collected from input text lines
-and assembled into a output text line
-until some word does not fit.
-An attempt is then made
-to hyphenate the word to put part
-of it into the output line.
-The spaces between the words on the output line
-are then increased to spread out the line
-to the current <I>line length</I>
-minus any current <I>indent</I>.
-A <I>word</I> is any string of characters delimited by
-the <I>space</I> character or the beginning/end of the input line.
-Any adjacent pair of words that must be kept together
-(neither split across output lines nor spread apart
-in the adjustment process)
-can be tied together by separating them with the
-<I>unpaddable space</I> character
-``<TT>\ </TT>'' (backslash-space).
-The adjusted word spacings are uniform in <I>troff</I>
-and the minimum interword spacing can be controlled
-with the <TT>ss</TT> request (&#167;2).
-In <I>nroff</I>, they are normally nonuniform because of
-quantization to character-size spaces;
-however,
-the command line option <TT>-e</TT> causes uniform
-spacing with full output device resolution.
-Filling, adjustment, and hyphenation (&#167;13) can all be
-prevented or controlled.
-The text length on the last line output is available in the <TT>.n</TT> register,
-and text baseline position on the page for this line is in the <TT>nl</TT> register.
-The text baseline high-water mark (lowest place) on the current page is in
-the <TT>.h</TT> register.
-The current horizontal output position is in the <TT>.k</TT> register.
-<P>
-An input text line
-<I>ending</I>
-with <TT>.</TT>, <TT>?</TT>, or <TT>!</TT>,
-optionally followed by any number of
-or
-&#191;,
-is taken
-to be the end of a sentence, and an additional space character is
-automatically provided during filling.
-To prevent this, add
-to the end of the input line.
-Multiple inter-word space characters found in the input are retained,
-except for trailing spaces;
-initial spaces also cause a break.
-</P>
-<P>
-When filling is in effect, a <TT>\p</TT> may be embedded or attached to a word to
-cause a break at the end of the word and have the resulting output
-line spread out to fill the current line length.
-</P>
-<P>
-A text input line that happens to begin
-with a control character can
-be made not to look like a control line
-by prefixing it with
-the non-printing, zero-width filler character <TT>\&amp;</TT>.
-Still another way is to specify output translation of some
-convenient character into the control character
-using <TT>tr</TT> (&#167;10.5).
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Interrupted text.</I></B> 
-The copying of a input line in <I>nofill</I>(non-fill) mode can be interrupted
-by terminating
-the partial line with a <TT>\c</TT>.
-The next encountered input text line will be considered to be a continuation
-of the same line of input text.
-Similarly,
-a word within <I>filled</I> text may be interrupted by terminating the
-word (and line) with <TT>\c</TT>;
-the next encountered text will be taken as a continuation of the
-interrupted word.
-If the intervening control lines cause a break,
-any partial line will be forced out along with any partial word.
-<br>&#32;<br>
-<TT>.br</TT>	-	-	B
-<DL COMPACT>
-<DT><DD>
-Break.
-The filling of the line currently
-being collected is stopped and
-the line is output without adjustment.
-Text lines beginning with space characters
-(but not tabs)
-and empty text lines (blank lines) also cause a break.
-</dl>
-<br>&#32;<br>
-<TT>.fi</TT>	fill on	-	B,E
-<DL COMPACT>
-<DT><DD>
-Fill subsequent output lines.
-The register <TT>.u</TT> is 1 in fill mode and 0 in nofill mode.
-</dl>
-<br>&#32;<br>
-<TT>.nf</TT>	fill on	-	B,E
-<DL COMPACT>
-<DT><DD>
-Nofill.
-Subsequent output lines are neither filled nor adjusted.
-Input text lines are copied directly to output lines
-without regard for the current line length.
-</dl>
-<br>&#32;<br>
-<TT>.ad</TT><I> c</I>	adj, both	adjust	E
-<DL COMPACT>
-<DT><DD>
-Line adjustment is begun.
-If fill mode is not on, adjustment will be deferred until
-fill mode is back on.
-If the type indicator <I>c</I> is present,
-the adjustment type is changed as shown in the following table.
-<br><img src="-.16258.gif"><br>
-The number register
-contains the current value of the
-setting;
-its value can be recorded and used subsequently to set adjustment.
-</dl>
-<br>&#32;<br>
-<TT>.na</TT>	adjust	-	E
-<DL COMPACT>
-<DT><DD>
-Noadjust.
-Adjustment is turned off;
-the right margin will be ragged.
-The adjustment type for <TT>ad</TT> is not changed.
-Output line filling still occurs if fill mode is on.
-</dl>
-<br>&#32;<br>
-<TT>.ce</TT><I> N</I>	off	<I>N=</I>1	B,E
-<DL COMPACT>
-<DT><DD>
-Center the next <I>N</I> input text lines
-within the current available horizontal space (line-length minus indent).
-If <I>N=</I>0, any residual count is cleared.
-A break occurs after each of the <I>N</I> input lines.
-If the input line is too long,
-it will be left adjusted.
-</dl>
-<H4>5 Vertical Spacing
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Baseline spacing.</I></B> 
-The vertical spacing (<I>V</I>) between the baselines of successive
-output lines can be set
-using the <TT>vs</TT> request.
-<I>V</I> should be large enough to accommodate the character sizes
-on the affected output lines.
-For the common type sizes (9-12 points),
-usual typesetting practice is to set <I>V</I> to 2 points greater than the
-point size;
-<I>troff</I> default is 10-point type on a 12-point spacing
-(as in this document).
-The current <I>V</I> is available in the <TT>.v</TT> register.
-Multiple-<I>V</I> line separation (e.g., double spacing) may be requested
-with <TT>ls</TT>,
-but it is better to use a large
-instead;
-certain preprocessors assume single spacing.
-The current line spacing is available in the <TT>.L</TT> register.
-<br>&#32;<br>
-<B><I>0.0s.  Extra line-space.</I></B> 
-If a word contains a tall construct requiring
-the output line containing it to have extra vertical space
-before and/or after it,
-the <I>extra-line-space</I> function <TT>\x'</TT><I>N</I><TT>'</TT>
-can be embedded in or attached to that word.
-If <I>N</I> is negative,
-the output line containing the word will
-be preceded by <I>N</I> extra vertical space;
-if <I>N</I> is positive,
-the output line containing the word
-will be followed by <I>N</I> extra vertical space.
-If successive requests for extra space apply to the same line,
-the maximum values are used.
-The most recently utilized post-line extra line-space is available in the <TT>.a</TT> register.
-<P>
-In
-and other functions having a pair of delimiters around
-their parameter,
-the delimiter choice (here 
-is arbitrary,
-except that it can not look like the continuation of a number expression for <I>N</I>.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Blocks of vertical space.</I></B> 
-A block of vertical space is ordinarily requested using <TT>sp</TT>,
-which honors the <I>no-space</I> mode and which does
-not space past a trap.
-A contiguous block of vertical space may be reserved using <TT>sv</TT>.
-<br>&#32;<br>
-<TT>.vs </TT><I>N</I>	12pts; 1/6in	previous	E,<B>p</B>
-<DL COMPACT>
-<DT><DD>
-Set vertical baseline spacing size <I>V</I>.
-Transient extra vertical space is available with <TT>\x</TT><I>'N'</I> (see above).
-</dl>
-<br>&#32;<br>
-<TT>.ls </TT><I>N</I>	<I>N=</I>1	previous	E
-<DL COMPACT>
-<DT><DD>
-<I>Line</I> spacing
-set to &#177;<I>N</I>.
-<I>N-</I>1 <I>V</I>s (blank lines) are
-appended to each output text line.
-Appended blank lines are omitted, if the text or previous appended blank line reached a trap position.
-</dl>
-<br>&#32;<br>
-<TT>.sp </TT><I>N</I>	-	<I>N=</I>1 <I>V</I>	B,<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Space vertically in either direction.
-If <I>N</I> is negative, the motion is backward (upward)
-and is limited to the distance to the top of the page.
-Forward (downward) motion is truncated to the distance to the
-nearest trap.
-(Recall the use of
-from &#167;1.3.)
-If the no-space mode is on,
-no spacing occurs (see <TT>ns</TT> and <TT>rs</TT> below).
-</dl>
-<br>&#32;<br>
-<TT>.sv</TT><I> N</I>	-	<I>N=</I>1 <I>V</I>	<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Save a contiguous vertical block of size <I>N</I>.
-If the distance to the next trap is greater
-than <I>N</I>, <I>N</I> vertical space is output.
-No-space mode has no effect.
-If this distance is less than <I>N</I>,
-no vertical space is immediately output,
-but <I>N</I> is remembered for later output (see <TT>os</TT>).
-Subsequent <TT>sv</TT> requests will overwrite any still remembered <I>N</I>.
-</dl>
-<br>&#32;<br>
-<TT>.os</TT>	-	-	-
-<DL COMPACT>
-<DT><DD>
-Output saved vertical space.
-No-space mode has no effect.
-Used to finally output a block of vertical space requested
-by an earlier <TT>sv</TT> request.
-</dl>
-<br>&#32;<br>
-<TT>.ns</TT>	space	-	D
-<DL COMPACT>
-<DT><DD>
-No-space mode turned on.
-When on, no-space mode inhibits <TT>sp</TT> requests and
-<TT>bp</TT> requests <I>without</I> a next page number.
-No-space mode is turned off when a line of
-output occurs, or with <TT>rs</TT>.
-</dl>
-<br>&#32;<br>
-<TT>.rs</TT>	space	-	D
-<DL COMPACT>
-<DT><DD>
-Restore spacing.
-The no-space mode is turned off.
-</dl>
-<br>&#32;<br>
-Blank text line.		-	B
-<DL COMPACT>
-<DT><DD>
-Causes a break and
-output of a blank line exactly like <TT>sp 1</TT>.
-</dl>
-<H4>6 Line Length and Indenting
-</H4>
-<P>
-The maximum line length for fill mode may be set with <TT>ll</TT>.
-The indent may be set with <TT>in</TT>;
-an indent applicable to only the next output line may be set with <TT>ti</TT>.
-The line length includes indent space but not
-page offset space.
-The line length minus the indent is the basis for centering with <TT>ce</TT>.
-The effect of <TT>ll</TT>, <TT>in</TT>, or <TT>ti</TT>
-is delayed, if a partially collected line exists,
-until after that line is output.
-In fill mode the length of text on an output line is less than or equal to
-the line length minus the indent.
-The current line length and indent are available in registers <TT>.l</TT> and <TT>.i</TT> respectively.
-The length of <I>three-part titles</I> produced by <TT>tl</TT>
-(see &#167;14) is independently set by <TT>lt</TT>.
-</P>
-<br>&#32;<br>
-<TT>.ll</TT><I> &#177;N</I>	6.5in	previous	E,<B>m</B>
-<DL COMPACT>
-<DT><DD>
-Line length is set to &#177;<I>N</I>.
-</dl>
-<br>&#32;<br>
-<TT>.in</TT><I> &#177;N</I>	<I>N=</I>0	previous	B,E,<B>m</B>
-<DL COMPACT>
-<DT><DD>
-Indent is set to &#177;<I>N</I>.
-The indent is prefixed to each output line.
-</dl>
-<br>&#32;<br>
-<TT>.ti</TT><I> &#177;N</I>	-	ignored	B,E,<B>m</B>
-<DL COMPACT>
-<DT><DD>
-Temporary indent.
-The next output text line will be indented a distance &#177;<I>N</I>
-with respect to the current indent.
-The resulting total indent may not be negative.
-The current indent is not changed.
-</dl>
-<H4>7 Macros, Strings, Diversion, and Position Traps
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Macros and strings.</I></B> 
-A <I>macro</I> is a named set of arbitrary <I>lines</I> that may be invoked by name or
-with a <I>trap</I>.
-A <I>string</I> is a named string of <I>characters</I>,
-not including a newline character,
-that may be interpolated by name at any point.
-Request, macro, and string names share the same name list.
-Macro and string names
-may be one or two characters long and may usurp previously defined
-request, macro, or string names;
-this implies that built-in operations may be (irrevocably) redefined.
-Any of these entities may be renamed with <TT>rn</TT>
-or removed with <TT>rm</TT>.
-<P>
-Macros are created by <TT>de</TT> and <TT>di</TT>, and appended to by <TT>am</TT> and <TT>da</TT>;
-<TT>di</TT> and <TT>da</TT> cause normal output to be stored in a macro.
-A macro is invoked in the same way as a request;
-a control line beginning <TT>.</TT><I>xx</I> will interpolate the contents of macro <I>xx</I>.
-The remainder of the line may contain up to nine <I>arguments</I>.
-</P>
-<P>
-Strings are created by <TT>ds</TT> and appended to by <TT>as</TT>.
-The strings <I>x</I> and <I>xx</I> are interpolated at any desired point with
-<TT>\*</TT><I>x</I> and <TT>\*(</TT><I>xx</I> respectively.
-String references and macro invocations may be nested.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Copy mode input interpretation.</I></B> 
-During the definition and extension
-of strings and macros (not by diversion)
-the input is read in <I>copy mode</I>.
-In copy mode, input is copied without interpretation
-except that:
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-^* The contents of number registers indicated by <TT>\n</TT> are interpolated.
-^* Strings indicated by <TT>\*</TT> are interpolated.
-^* Arguments indicated by <TT>\$</TT> are interpolated.
-^* Concealed newlines indicated by <TT>\</TT><I>newline</I> are eliminated.
-^* Comments indicated by <TT>\"</TT> are eliminated.
-^* <TT>\t</TT> and <TT>\a</TT> are interpreted as ASCII horizontal tab and SOH respectively (&#167;9).
-^* <TT>\\</TT> is interpreted as <TT>\</TT>.
-^* <TT>\.</TT> is interpreted as ``<TT>.</TT>''.
-</PRE></TT></DL>
-</dl>
-<br>&#32;<br>
-These interpretations can be suppressed by
-prefixing
-a <TT>\</TT>.
-For example, since <TT>\\</TT> maps into a <TT>\</TT>, <TT>\\n</TT> will copy as <TT>\n</TT>, which
-will be interpreted as a number register indicator when the
-macro or string is reread.
-<br>&#32;<br>
-<B><I>0.0s.  Arguments.</I></B> 
-When a macro is invoked by name, the remainder of the line is
-taken to contain up to nine arguments.
-The argument separator is the space character (not tab), and arguments
-may be surrounded by double quotes to permit embedded space characters.
-Pairs of double quotes may be embedded in double-quoted arguments to
-represent a single double-quote character.
-The argument
-is explicitly null.
-If the desired arguments won't fit on a line,
-a concealed newline may be used to continue on the next line.
-A trailing double quote may be omitted.
-<P>
-When a macro is invoked the <I>input level</I> is <I>pushed down</I> and
-any arguments available at the previous level become unavailable
-until the macro is completely read and the previous level is restored.
-A macro's own arguments can be interpolated at any point
-within the macro with
-which interpolates the <I>N</I>th
-argument
-(1<=<I>N</I><=9).
-If an invoked argument does not exist,
-a null string results.
-For example, the macro <I>xx</I> may be defined by
-<DL><DT><DD><TT><PRE>
-&amp;de xx	\" begin definition
-Today is \\$1 the \\$2.
-&amp;.	\" end definition
-</PRE></TT></DL>
-and called by
-<DL><DT><DD><TT><PRE>
-&amp;xx Monday 14th
-</PRE></TT></DL>
-to produce the text
-<DL><DT><DD><TT><PRE>
-Today is Monday the 14th.
-</PRE></TT></DL>
-Note that each <TT>\$</TT>
-was concealed in the definition with a prefixed <TT>\</TT>.
-The number of
-arguments is in the <TT>.$</TT> register.
-</P>
-<P>
-No arguments are available at the top (non-macro) level,
-within a string, or within a trap-invoked macro.
-</P>
-<P>
-Arguments are copied in copy mode onto a stack
-where they are available for reference.
-It is advisable to
-conceal string references (with an extra <TT>\</TT>)
-to delay interpolation until argument reference time.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Diversions.</I></B> 
-Processed output may be diverted into a macro for purposes
-such as footnote processing (see Tutorial &#167;T5)
-or determining the horizontal and vertical size of some text for
-conditional changing of pages or columns.
-A single diversion trap may be set at a specified vertical position.
-The number registers <TT>dn</TT> and <TT>dl</TT> respectively contain the
-vertical and horizontal size of the most
-recently ended diversion.
-Processed text that is diverted into a macro
-retains the vertical size of each of its lines when reread
-in <I>nofill</I> mode
-regardless of the current <I>V</I>.
-Constant-spaced (<TT>cs</TT>) or emboldened (<TT>bd</TT>) text that is diverted
-can be reread correctly only if these modes are again or still in effect
-at reread time.
-One way to do this is to embed in the diversion the appropriate
-<TT>cs</TT> or <TT>bd</TT> requests with the <I>transparent</I>
-mechanism described in &#167;10.6.
-<P>
-Diversions may be nested
-and certain parameters and registers
-are associated
-with the current diversion level
-(the top non-diversion level may be thought of as the
-0th diversion level).
-These are the diversion trap and associated macro,
-no-space mode,
-the internally-saved marked place (see <TT>mk</TT> and <TT>rt</TT>),
-the current vertical place (<TT>.d</TT> register),
-the current high-water text baseline (<TT>.h</TT> register),
-and the current diversion name (<TT>.z</TT> register).
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Traps.</I></B> 
-Three types of trap mechanisms are available&#173;page traps, a diversion trap, and
-an input-line-count trap.
-Macro-invocation traps may be planted using <TT>wh</TT> at any page position including the top.
-This trap position may be changed using <TT>ch</TT>.
-Trap positions at or below the bottom of the page
-have no effect unless or until
-moved to within the page or rendered effective by an increase in page length.
-Two traps may be planted at the same position only by first planting them at different
-positions and then moving one of the traps;
-the first planted trap will conceal the second unless and until the first one is moved
-(see Tutorial Examples).
-If the first one is moved back, it again conceals the second trap.
-The macro associated with a page trap is automatically
-invoked when a line of text is output whose vertical size reaches
-or sweeps past the trap position.
-Reaching the bottom of a page springs the top-of-page trap, if any,
-provided there is a next page.
-The distance to the next trap position is available in the <TT>.t</TT> register;
-if there are no traps between the current position and the bottom of the page,
-the distance returned is the distance to the page bottom.
-<P>
-A macro-invocation trap effective in the current diversion may be planted using <TT>dt</TT>.
-The <TT>.t</TT> register works in a diversion; if there is no subsequent trap a large
-distance is returned.
-For a description of input-line-count traps, see <TT>it</TT> below.
-</P>
-<br>&#32;<br>
-<TT>&de</TT><I> xx yy</I>	-	<I>.yy=</I><TT>..</TT>	-
-<DL COMPACT>
-<DT><DD>
-Define or redefine the macro <I>xx</I>.
-The contents of the macro begin on the next input line.
-Input lines are copied in <I>copy mode</I> until the definition is terminated by a
-line beginning with <TT>.</TT><I>yy</I>,
-whereupon the macro <I>yy</I> is called.
-In the absence of <I>yy</I>, the definition
-is terminated by a
-line beginning with ``<TT>..</TT>''.
-A macro may contain <TT>de</TT> requests
-provided the terminating macros differ
-or the contained definition terminator is concealed.
-``<TT>..</TT>'' can be concealed as
-<TT>\\..</TT> which will copy as <TT>\..</TT> and be reread as ``<TT>..</TT>''.
-</dl>
-<br>&#32;<br>
-<TT>&am</TT><I> xx yy</I>	-	<I>.yy=</I><TT>..</TT>	-
-<DL COMPACT>
-<DT><DD>
-Append to macro
-<I>xx</I>
-(append version of <TT>de</TT>).
-</dl>
-<br>&#32;<br>
-<TT>&ds</TT><I> xx string</I>	-	ignored	-
-<DL COMPACT>
-<DT><DD>
-Define a string
-<I>xx</I> containing <I>string</I>.
-Any initial double quote in <I>string</I> is stripped off to permit
-initial blanks.
-</dl>
-<br>&#32;<br>
-<TT>&as</TT><I> xx string</I>	-	ignored	-
-<DL COMPACT>
-<DT><DD>
-Append
-<I>string</I> to string <I>xx</I>
-(append version of <TT>ds</TT>).
-</dl>
-<br>&#32;<br>
-<TT>&rm</TT><I> xx</I>	-	ignored	-
-<DL COMPACT>
-<DT><DD>
-Remove
-request, macro, or string.
-The name <I>xx</I> is removed from the name list and
-any related storage space is freed.
-Subsequent references will have no effect.
-If many macros and strings are being created dynamically, it
-may become necessary to remove unused ones
-to recapture internal storage space for newer registers.
-</dl>
-<br>&#32;<br>
-<TT>&rn</TT><I> xx yy</I>	-	ignored	-
-<DL COMPACT>
-<DT><DD>
-Rename request, macro, or string
-<I>xx</I> to <I>yy</I>.
-If <I>yy</I> exists, it is first removed.
-</dl>
-<br>&#32;<br>
-<TT>&di</TT><I> xx</I>	-	end	D
-<DL COMPACT>
-<DT><DD>
-Divert output to macro <I>xx</I>.
-Normal text processing occurs during diversion
-except that page offsetting is not done.
-The diversion ends when the request <TT>di</TT> or <TT>da</TT> is encountered without an argument;
-extraneous
-requests of this type should not appear when nested diversions are being used.
-</dl>
-<br>&#32;<br>
-<TT>&da </TT><I>xx</I>	-	end	D
-<DL COMPACT>
-<DT><DD>
-Divert, appending to macro <I>xx</I>
-(append version of <TT>di</TT>).
-</dl>
-<br>&#32;<br>
-<TT>&wh</TT><I> N xx</I>	-	-	<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Install
-a trap to invoke <I>xx</I> at page position <I>N</I>;
-a negative N will be interpreted as a distance from the
-page bottom.
-Any macro previously planted at <I>N</I> is replaced by <I>xx</I>.
-A zero <I>N</I> refers to the top of a page.
-In the absence of <I>xx</I>, the first trap found at <I>N</I>, if any, is removed.
-</dl>
-<br>&#32;<br>
-<TT>&ch</TT><I> xx N</I>	-	-	<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Change
-the trap position for macro <I>xx</I> to be <I>N</I>.
-In the absence of <I>N</I>, the trap, if any, is removed.
-</dl>
-<br>&#32;<br>
-<TT>&dt</TT><I> N xx</I>	-	off	D,<B>v</B>
-<DL COMPACT>
-<DT><DD>
-Install a diversion trap
-at position <I>N</I> in the <I>current</I> diversion to invoke
-macro <I>xx</I>.
-Another <TT>dt</TT> will redefine the diversion trap.
-If no arguments are given, the diversion trap is removed.
-</dl>
-<br>&#32;<br>
-<TT>&it</TT><I> N xx</I>	-	off	E
-<DL COMPACT>
-<DT><DD>
-Set an input-line-count trap
-to invoke the macro <I>xx</I> after <I>N</I> lines of <I>text</I> input
-have been read
-(control or request lines do not count).
-The text may be inline text or
-text interpolated by inline or trap-invoked macros.
-</dl>
-<br>&#32;<br>
-<TT>&em</TT><I> xx</I>	none	none	-
-<DL COMPACT>
-<DT><DD>
-The
-macro <I>xx</I> will be invoked
-when all input has ended.
-The effect is almost as if the contents of <I>xx</I> had been at the end
-of the last file processed,
-but all processing ceases at the next page eject.
-</dl>
-<H4>8 Number Registers
-</H4>
-<P>
-A variety of parameters are available to the user as
-predefined <I>number registers</I> (see Summary, page 0u+7u).
-In addition, users may define their own registers.
-Register names are one or two characters long and do not conflict
-with request, macro, or string names.
-Except for certain predefined read-only registers,
-a number register can be read, written, automatically
-incremented or decremented, and interpolated
-into the input in a variety of formats.
-One common use of user-defined registers is to
-automatically number sections, paragraphs, lines, etc.
-A number register may be used any time numerical input is expected or desired
-and may be used in numerical <I>expressions</I> (&#167;1.4).
-</P>
-<P>
-Number registers are created and modified using <TT>nr</TT>, which
-specifies the name, numerical value, and the auto-increment size.
-Registers are also modified, if accessed
-with an auto-incrementing sequence.
-If the registers <I>x</I> and <I>xx</I> both contain
-<I>N</I> and have the auto-increment size <I>M</I>,
-the following access sequences have the effect shown:
-<br><img src="-.16259.gif"><br>
-When interpolated, a number register is converted to
-decimal (default),
-decimal with leading zeros,
-lower-case Roman,
-upper-case Roman,
-lower-case sequential alphabetic,
-or
-upper-case sequential alphabetic
-according to the format specified by <TT>af</TT>.
-</P>
-<br>&#32;<br>
-<TT>&nr</TT><I> R &#177;N M</I>		-	<B>u</B>
-<DL COMPACT>
-<DT><DD>
-The number register
-<I>R</I> is assigned the value &#177;<I>N</I>
-with respect to the previous value, if any.
-The increment for auto-incrementing is set to <I>M</I>.
-</dl>
-<br>&#32;<br>
-<TT>&af</TT><I> R c</I>	arabic	-	-
-<DL COMPACT>
-<DT><DD>
-Assign
-format <I>c</I> to register <I>R</I>.
-The available formats are:
-<br><img src="-.162510.gif"><br>
-An arabic format having <I>N</I> digits
-specifies a field width of <I>N</I> digits (example 2 above).
-The read-only registers and the width function
-(&#167;11.2)
-are always arabic.
-Warning: the value of a number register in a non-Arabic format
-is not numeric, and will not produce the expected results in expressions.
-<DT><DT>&#32;<DD>
-The function
-or
-returns the format of a number register in a form suitable for
-it returns nothing if the register has not been used.
-</dl>
-<br>&#32;<br>
-<TT>&rr</TT><I> R</I>	-	ignored	-
-<DL COMPACT>
-<DT><DD>
-Remove number register <I>R</I>.
-If many registers are being created dynamically, it
-may become necessary to remove unused registers
-to recapture internal storage space for newer registers.
-The register
-contains the number of number registers still available.
-</dl>
-<H4>9 Tabs, Leaders, and Fields
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Tabs and leaders.</I></B> 
-The ASCII horizontal tab character and the ASCII
-SOH (control-A, hereafter called the <I>leader</I> character)
-can both be used to generate either horizontal motion or
-a string of repeated characters.
-The length of the generated entity is governed
-by internal <I>tab stops</I> specifiable
-with <TT>ta</TT>.
-The default difference is that tabs generate motion and leaders generate
-a string of periods;
-<TT>tc</TT> and <TT>lc</TT>
-offer the choice of repeated character or motion.
-There are three types of internal tab stops&#173;
-<I>left</I> adjusting, <I>right</I> adjusting,
-and <I>centering</I>.
-In the following table,
-<I>D</I> is the distance from the current position on the <I>input</I> line
-(where a tab or leader was found)
-to the next tab stop,
-<I>next-string</I> consists
-of the input characters following the tab (or leader) up to the next tab (or leader) or end of line,
-and
-<I>W</I> is the width of <I>next-string</I>.
-<br><img src="-.162511.gif"><br>
-The length of generated motion is allowed to be negative, but
-that of a repeated character string cannot be.
-Repeated character strings contain an integer number of characters, and
-any residual distance is prepended as motion.
-Tabs or leaders found after the last tab stop are ignored, but may be used
-as <I>next-string</I> terminators.
-<P>
-Tabs and leaders are not interpreted in copy mode.
-<TT>\t</TT> and <TT>\a</TT> always generate a non-interpreted
-tab and leader respectively, and
-are equivalent to actual tabs and leaders in copy mode.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Fields.</I></B> 
-A <I>field</I> is contained between
-a pair of <I>field delimiter</I> characters,
-and consists of substrings
-separated by <I>padding</I> indicator characters.
-The field length is the distance on the
-<I>input</I> line from the position where the field begins to the next tab stop.
-The difference between the total length of all the substrings
-and the field length is incorporated as horizontal
-padding space that is divided among the indicated
-padding places.
-The incorporated padding is allowed to be negative.
-For example,
-if the field delimiter is <TT>#</TT> and the padding indicator is <TT>^</TT>,
-<TT>#^</TT><I>xxx</I><TT>^</TT><I>right</I><TT>#</TT>
-specifies a right-adjusted string with the string <I>xxx</I> centered
-in the remaining space.
-<br>&#32;<br>
-<TT>&ta</TT><I> Nt ...</I>	0.8; 0.5in	none	E,<B>m</B>
-<DL COMPACT>
-<DT><DD>
-Set tab stops and types.
-<I>t=</I><TT>R</TT>, right adjusting;
-<I>t=</I><TT>C</TT>, centering;
-<I>t</I> absent, left adjusting.
-<I>Troff</I> tab stops are preset every 0.5in.,
-<I>nroff</I> every 0.8in.
-The stop values are separated by spaces, and
-a value preceded by <TT>+</TT>
-is treated as an increment to the previous stop value.
-</dl>
-<br>&#32;<br>
-<TT>&tc</TT><I> c</I>	none	none	E
-<DL COMPACT>
-<DT><DD>
-The tab repetition character
-becomes <I>c</I>,
-or is removed, thus specifying motion.
-</dl>
-<br>&#32;<br>
-<TT>&lc</TT><I> c</I>	<TT>.</TT>	none	E
-<DL COMPACT>
-<DT><DD>
-The leader repetition character
-becomes <I>c</I>,
-or is removed, thus specifying motion.
-</dl>
-<br>&#32;<br>
-<TT>&fc</TT><I> a b</I>	off	off	-
-<DL COMPACT>
-<DT><DD>
-The field delimiter
-is set to <I>a</I>;
-the padding indicator is set to the space character or to
-<I>b</I>, if given.
-In the absence of arguments the field mechanism is turned off.
-</dl>
-<H4>10 Input and Output Conventions and Character Translations
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Input character translations.</I></B> 
-Ways of inputting the valid character set were
-discussed in &#167;2.1.
-The ASCII control characters horizontal tab (&#167;9.1),
-SOH (&#167;9.1), and backspace (&#167;10.3) are discussed elsewhere.
-The newline delimits input lines.
-In addition,
-STX, ETX, ENQ, ACK, and BEL
-are accepted,
-and may be used as delimiters or translated into a graphic with <TT>tr</TT> (&#167;10.5).
-All others are ignored.
-<P>
-The <I>escape</I> character <TT>\</TT>
-introduces <I>escape sequences</I>,
-which cause the following character to mean
-another character, or to indicate
-some function.
-A complete list of such sequences is given in the Summary on page 0u+7u.
-The escape character <TT>\</TT>
-should not be confused with the ASCII control character ESC.
-The escape character <TT>\</TT> can be input with the sequence <TT>\\</TT>.
-The escape character can be changed with <TT>ec</TT>,
-and all that has been said about the default <TT>\</TT> becomes true
-for the new escape character.
-<TT>\e</TT> can be used to print whatever the current escape character is.
-The escape mechanism may be turned off with <TT>eo</TT>,
-and restored with <TT>ec</TT>.
-</P>
-<br>&#32;<br>
-<TT>&ec</TT><I> c</I>	<TT></TT>	<TT></TT>	-
-<DL COMPACT>
-<DT><DD>
-Set escape character
-to <TT></TT>, or to <I>c</I>, if given.
-</dl>
-<br>&#32;<br>
-<TT>&eo</TT>	on	-	-
-<DL COMPACT>
-<DT><DD>
-Turn escape mechanism off.
-</dl>
-<br>&#32;<br>
-<B><I>0.0s.  Ligatures.</I></B> 
-The set of available ligatures is device and font dependent,
-but is often a subset of
-<B>fi</B>, <B>fl</B>, <B>ff</B>, <B>ffi</B>, and <B>ffl</B>.
-They may be input by
-<TT>\(fi</TT>, <TT>\(fl</TT>, <TT>\(ff</TT>, <TT>\(Fi</TT>, and <TT>\(Fl</TT> respectively.
-The ligature mode is normally on in <I>troff</I>, and automatically invokes 
-ligatures during input.
-<br>&#32;<br>
-<TT>&lg</TT><I> N</I>	on; off	on	-
-<DL COMPACT>
-<DT><DD>
-Ligature mode
-is turned on if <I>N</I> is absent or non-zero,
-and turned off if <I>N=</I>0.
-If <I>N=</I>2, only the two-character ligatures are automatically invoked.
-Ligature mode is inhibited for
-request, macro, string, register, or file names,
-and in copy mode.
-No effect in <I>nroff</I>.
-</dl>
-<br>&#32;<br>
-<B><I>0.0s.  Backspacing, underlining, overstriking, etc.</I></B> 
-Unless in copy mode, the ASCII backspace character is replaced
-by a backward horizontal motion having the width of the
-space character.
-Underlining as a form of line-drawing is discussed in &#167;12.4.
-A generalized overstriking function is described in &#167;12.1.
-<P>
-<I>Nroff</I> automatically underlines
-characters in the <I>underline</I> font,
-specifiable with <TT>uf</TT>,
-normally that on font position 2.
-In addition to <TT>ft</TT> and
-the underline font may be selected by <TT>ul</TT> and <TT>cu</TT>.
-Underlining is restricted to an output-device-dependent
-subset of reasonable characters.
-</P>
-<br>&#32;<br>
-<TT>&ul</TT><I> N</I>	off	<I>N=</I>1	E
-<DL COMPACT>
-<DT><DD>
-Italicize in <I>troff</I>
-(underline in <I>nroff</I>) the next <I>N</I>
-input text lines.
-Actually, switch to underline font, saving the
-current font for later restoration;
-other font changes within the span of a <TT>ul</TT>
-will take effect,
-but the restoration will undo the last change.
-Output generated by <TT>tl</TT> (&#167;14) is affected by the
-font change, but does not decrement <I>N</I>.
-If <I>N&#62;</I>1, there is the risk that
-a trap interpolated macro may provide text
-lines within the span;
-environment switching can prevent this.
-</dl>
-<br>&#32;<br>
-<TT>&cu</TT><I> N</I>	off	<I>N=</I>1	E
-<DL COMPACT>
-<DT><DD>
-Continuous underline.
-A variant
-of <TT>ul</TT> that causes <I>every</I> character to be underlined in <I>nroff</I>.
-Identical to <TT>ul</TT> in <I>troff</I>.
-</dl>
-<br>&#32;<br>
-<TT>&uf</TT><I> F</I>	Italic	Italic	-
-<DL COMPACT>
-<DT><DD>
-Underline font set to <I>F</I>.
-In <I>nroff</I>,
-<I>F</I> may not be on position 1.
-</dl>
-<br>&#32;<br>
-<B><I>0.0s.  Control characters.</I></B> 
-Both the control character <TT>.</TT> and the <I>no-break</I>
-control character <TT>'</TT> may be changed.
-Such a change must be compatible with the design
-of any macros used in the span of the change,
-and
-particularly of any trap-invoked macros.
-<br>&#32;<br>
-<TT>&cc</TT><I> c</I>	<TT>.</TT>	<TT>.</TT>	E
-<DL COMPACT>
-<DT><DD>
-The basic control character
-is set to <I>c</I>,
-or reset to ``<TT>.</TT>''.
-</dl>
-<br>&#32;<br>
-<TT>&c2</TT><I> c</I>	<TT>'	'</TT>	E
-<DL COMPACT>
-<DT><DD>
-The <I>no-break</I> control character is set
-to <I>c</I>, or reset to ``<TT>'</TT>''.
-</dl>
-<br>&#32;<br>
-<B><I>0.0s.  Output translation.</I></B> 
-One character can be made a stand-in for another character using <TT>tr</TT>.
-All text processing (e.g., character comparisons) takes place
-with the input (stand-in) character, which appears to have the width of the final
-character.
-The graphic translation occurs at the moment of output
-(including diversion).
-<br>&#32;<br>
-<TT>&tr</TT><I> abcd....</I>	none	-	O
-<DL COMPACT>
-<DT><DD>
-Translate
-<I>a</I> into <I>b</I>, <I>c</I> into <I>d</I>, etc.
-If an odd number of characters is given,
-the last one will be mapped into the space character.
-To be consistent, a particular translation
-must stay in effect from <I>input</I> to <I>output</I> time.
-</dl>
-<br>&#32;<br>
-<B><I>0.0s.  Transparent throughput.</I></B> 
-An input line beginning with a <TT>\!</TT> is read in copy mode and <I>transparently</I> output
-(without the initial <TT>\!</TT>);
-the text processor is otherwise unaware of the line's presence.
-This mechanism may be used to pass control information to a post-processor
-or to embed control lines in a macro created by a diversion.
-<br>&#32;<br>
-<B><I>0.0s.  Transparent output</I></B> 
-The sequence
-copies
-<I>anything</I>
-to the output, as a device control function of the form
-<I>anything</I>
-(&#167;22).
-Escape sequences in
-<I>anything</I>
-are processed.
-<br>&#32;<br>
-<B><I>0.0s.  Comments and concealed newlines.</I></B> 
-An uncomfortably long input line that must stay
-one line (e.g., a string definition, or nofilled text)
-can be split into several physical lines by ending all but
-the last one with the escape <TT>\</TT>.
-The sequence <TT></TT><I>newline</I> is always ignored,
-except in a comment.
-Comments may be embedded at the end of any line by
-prefacing them with <TT>\"</TT>.
-The newline at the end of a comment cannot be concealed.
-A line beginning with <TT>\"</TT> will appear as a blank line and
-behave like
-a comment can be on a line by itself by beginning the line with <TT>.\"</TT>.
-<H4>11 Local Horizontal and Vertical Motions, and the Width Function
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Local Motions.</I></B> 
-The functions <TT>\v'</TT><I>N</I><TT>'</TT> and
-<TT>\h'</TT><I>N</I><TT>'</TT>
-can be used for <I>local</I> vertical and horizontal motion respectively.
-The distance <I>N</I> may be negative; the positive directions
-are rightward and downward.
-A local motion is one contained within a line.
-To avoid unexpected vertical dislocations, it is necessary that
-the net vertical local motion within a word in filled text
-and otherwise within a line balance to zero.
-The escape sequences providing local motion are
-summarized in the following table.
-<br><img src="-.162512.gif"><br>
-As an example,
-<TT>E^2</TT>
-could be generated by a sequence of size changes and motions:
-<TT>E\s-2\v'-0.4m'2\v'0.4m'\s+2</TT>;
-note that
-the 0.4 em vertical motions are at the smaller size.
-<br>&#32;<br>
-<B><I>0.0s.  Width Function.</I></B> 
-The <I>width</I> function <TT>\w'</TT><I>string</I><TT>'</TT>
-generates the numerical width of <I>string</I> (in basic units).
-Size and font changes may be embedded in <I>string</I>,
-and will not affect the current environment.
-For example,
-<TT>.ti -\w'\fB1. 'u</TT> could be used to
-temporarily indent leftward a distance equal to the
-size of the string ``<TT>1. </TT>'' in font
-<P>
-The width function also sets three number registers.
-The registers <TT>st</TT> and <TT>sb</TT> are set respectively to the highest and
-lowest extent of <I>string</I> relative to the baseline;
-then, for example,
-the total height of the string is <TT>\n(stu-\n(sbu</TT>.
-In <I>troff</I> the number register <TT>ct</TT> is set to a value
-between 0 and 3.
-The value
-0 means that all of the characters in <I>string</I> were short lower
-case characters without descenders (like <TT>e</TT>);
-1 means that at least one character has a descender (like <TT>y</TT>);
-2 means that at least one character is tall (like <TT>H</TT>);
-and 3 means that both tall characters and characters with
-descenders are present.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Mark horizontal place.</I></B> 
-The function <TT>\k</TT><I>x</I> causes the current horizontal
-position in the <I>input line</I> to be stored in register <I>x</I>.
-For example,
-the construction <TT>\kx</TT><I>word</I><TT>\h'|\nxu+3u'</TT><I>word</I><TT></TT>
-will embolden <I>word</I> by backing up to almost its beginning and overprinting it,
-resulting in <I>word</I>h'|0u+3u'<I>word</I>.
-<H4>12 Overstrike, Bracket, Line-drawing, Graphics, and Zero-width Functions
-</H4>
-<br>&#32;<br>
-<B><I>0.0s.  Overstriking.</I></B> 
-Automatically centered overstriking of up to nine characters
-is provided by the <I>overstrike</I> function
-<TT>\o'</TT><I>string</I><TT>'</TT>.
-The characters in <I>string</I> are overprinted with centers aligned; the total width
-is that of the widest character.
-<I>string</I> may not contain local vertical motion.
-As examples,
-<TT>\o'e\''</TT> produces o'e'', and
-<TT>\o'\(mo\(sl'</TT> produces o'C/'.
-<br>&#32;<br>
-<B><I>0.0s.  Zero-width characters.</I></B> 
-The function
-will output <I>c</I> without spacing over
-it, and can be used to produce left-aligned overstruck
-combinations.
-As examples,
-<TT>\z&#164;+</TT> will produce z&#164;+, and
-<TT>\(br\z\(rn\(ul\(br</TT> will produce a small
-badly constructed box |z _|.
-<br>&#32;<br>
-<B><I>0.0s.  Large Brackets.</I></B> 
-The Special Font usually contains a number of bracket construction pieces
-(())|||||||
-that can be combined into various bracket styles.
-The function <TT>\b'</TT><I>string</I><TT>'</TT> may be used to pile
-up vertically the characters in <I>string</I>
-(the first character on top and the last at the bottom);
-the characters are vertically separated by 1 em and the total
-pile is centered 1/2 em above the current baseline
-(&#189; line in <I>nroff</I>).
-For example,
-<DL><DT><DD><TT><PRE>
-\b'\(lc\(lf'E\b'\(rc\(rf'\x'-0.5m'\x'0.5m'
-</PRE></TT></DL>
-produces
-x'-.5m'x'.5m'b'||'Eb'||'.
-<br>&#32;<br>
-<B><I>0.0s.  Line drawing.</I></B> 
-The function <TT><HR></TT> (backslash-ell) draws a string of repeated <I>c</I>'s towards the right for a distance <I>N</I>.
-If <I>c</I> looks like a continuation of
-an expression for <I>N</I>, it may be insulated from <I>N</I> with <TT></TT>.
-If <I>c</I> is not specified, the <TT>_</TT> (baseline rule) is used
-(underline character in <I>nroff</I>).
-If <I>N</I> is negative, a backward horizontal motion
-of size <I>N</I> is made before drawing the string.
-Any space resulting from <I>N</I>/(size of <I>c</I>) having a remainder is put at the beginning (left end)
-of the string.
-If <I>N</I> is less than the width of <I>c</I>,
-a single <I>c</I> is centered on a distance <I>N</I>.
-In the case of characters
-that are designed to be connected, such as
-baseline-rule <TT>_</TT>,
-under-rule <TT>_</TT>,
-and
-root-en <TT> </TT>,
-the remainder space is covered by overlapping.
-As an example, a macro to underscore a string can be written
-<DL><DT><DD><TT><PRE>
-&amp;de us
-\\$1\l'|0\(ul'
-&amp;&amp;
-</PRE></TT></DL>
-or one to draw a box around a string
-<DL><DT><DD><TT><PRE>
-&amp;de bx
-\(br\|\\$1\|\(br\l'|0\(rn'\l'|0\(ul'
-&amp;&amp;
-</PRE></TT></DL>
-such that
-<DL><DT><DD><TT><PRE>
-&amp;ul "underlined words"
-</PRE></TT></DL>
-and
-<DL><DT><DD><TT><PRE>
-&amp;bx "words in a box"
-</PRE></TT></DL>
-yield
-underlined words<HR>
-and
-|words in a box|<HR><HR>
-h'-w'.'u'.
-<P>
-The function <TT>\L'</TT><I>Nc</I><TT>'</TT> draws a vertical line consisting
-of the (optional) character <I>c</I> stacked vertically apart 1em
-(1 line in <I>nroff</I>),
-with the first two characters overlapped,
-if necessary, to form a continuous line.
-The default character is the <I>box rule</I> | (<TT>\(br</TT>);
-the other suitable character is the <I>bold vertical</I> | (<TT>\(bv</TT>).
-The line is begun without any initial motion relative to the
-current baseline.
-A positive <I>N</I> specifies a line drawn downward and
-a negative <I>N</I> specifies a line drawn upward.
-After the line is drawn no compensating
-motions are made;
-the instantaneous baseline is at the end of the line.
-</P>
-<P>
-The horizontal and vertical line drawing functions may be used
-in combination to produce large boxes.
-The zero-width <I>box-rule</I> and the &#189;-em wide <I>under-rule</I>
-were designed to form corners when using 1-em vertical
-spacings.
-For example the macro
-<DL><DT><DD><TT><PRE>
-.de eb
-.sp -1	\"compensate for next automatic baseline spacing
-.nf	\"avoid possibly overflowing word buffer
-\h'-.5n'\L'|\\nau-1'\l'\\n(.lu+1n\(ul'\L'-|\\nau+1'\l'|0u-.5n\(ul'
-.fi
-..
-</PRE></TT></DL>
-will draw a box around some text whose beginning vertical place was
-saved in number register <I>a</I>
-(e.g., using <TT>.mk a</TT>)
-as was done for this paragraph.
-<br>&#32;<br>
-<DL><DT><DD><TT><PRE>
-h'-.5n'L'|0+1u-1'<HR>L'-|0+1u+1'<HR>
-</PRE></TT></DL>
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Graphics.</I></B> 
-The function
-draws a graphic object of type <I>c</I>
-according to a sequence of parameters,
-which are generally pairs of numbers.
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<TT>D'l </TT><I>dh</I><TT> </TT><I>dv</I><TT>'	</TT>draw line from current position by <I>dh</I>, <I>dv</I><TT>
-</TT><TT>D'c </TT><I>d</I><TT>'	</TT>draw circle of diameter <I>d</I> with left side at current position<TT>
-</TT><TT>D'e </TT><I>d</I><TT></TT>1<TT></TT><I>d</I><TT></TT>2<TT>'	</TT>draw ellipse of diameters <I>d</I>1 and <I>d</I>2<TT>
-</TT><TT>D'a </TT><I>dh</I><TT></TT>1<TT> </TT><I>dv</I><TT></TT>1<TT> </TT><I>dh</I><TT></TT>2<TT> </TT><I>dv</I><TT></TT>2<TT>'</TT><TT>	</TT>draw arc from current position to <I>dh</I>1<I>+dh</I>2, <I>dv</I>1<I>+dv</I>2,<TT>
-	   </TT>with center at <I>dh</I>1, <I>dv</I>1 from current position<TT>
-</TT><TT>D'~ </TT><I>dh</I><TT></TT>1<TT></TT><I>dv</I><TT></TT>1<TT></TT><I>dh</I><TT></TT>2<TT></TT><I>dv</I><TT></TT>2<TT></TT><I>...</I><TT>'</TT><TT>	</TT>draw B-spline from current position by <I>dh</I>1<I></I>,<I>dv</I>1,<TT>
-	   </TT>then by <I>dh</I>2,<I>dv</I>2, then by <I>dh</I>2,<I>dv</I>2, then ...<TT>
-</PRE></TT></DL>
-</dl>
-</TT><br>&#32;<br>
-For example,
-draws the ellipse
-D'e.2i .1i',
-and
-the line
-D'l.2i -.1i'D'l.1i .1i'.
-A
-with an unknown <I>c</I> is processed and copied through to the output
-for unspecified interpretation;
-coordinates are interpreted alternately as horizontal and vertical
-values.
-<P>
-Numbers taken as horizontal (first, third, etc.) have default scaling of ems;
-vertical numbers (second, fourth, etc.) have default scaling of <I>V</I>s (&#167;1.3).
-The position after a graphical object has been drawn is
-at its end; for circles and ellipses, the ``end''
-is at the right side.
-</P>
-<H4>13 Hyphenation.
-</H4>
-<P>
-Automatic hyphenation may be switched off and on.
-When switched on with <TT>hy</TT>,
-several variants may be set.
-A <I>hyphenation indicator</I> character may be embedded in a word to
-specify desired hyphenation points,
-or may be prefixed to suppress hyphenation.
-In addition,
-the user may specify a small list of exception words.
-</P>
-<P>
-Only words that consist of a central alphabetic string
-surrounded by (usually null) non-alphabetic strings
-are candidates for automatic hyphenation.
-Words that contain hyphens
-(minus),
-em-dashes (<TT>\(em</TT>),
-or hyphenation indicator characters
-are always subject to splitting after those characters,
-whether automatic hyphenation is on or off.
-</P>
-<br>&#32;<br>
-<TT>&nh</TT>	hyphenate	-	E
-<DL COMPACT>
-<DT><DD>
-Automatic hyphenation is turned off.
-</dl>
-<br>&#32;<br>
-<TT>&hy</TT> <I>N</I>	on, <I>N=</I>1	on, <I>N=</I>1	E
-<DL COMPACT>
-<DT><DD>
-Automatic hyphenation is turned on
-for <I>N</I>>=1, or off for <I>N=</I>0.
-If <I>N=</I>2, last lines (ones that will cause a trap)
-are not hyphenated.
-For <I>N=</I>4 and 8, the last and first two characters
-respectively of a word are not split off.
-These values are additive;
-i.e., <I>N=</I>14 will invoke all three restrictions.
-</dl>
-<br>&#32;<br>
-<TT>&hc</TT><I> c</I>	<TT>	</TT>	E
-<DL COMPACT>
-<DT><DD>
-Hyphenation indicator character is set
-to <I>c</I> or to the default <TT></TT>.
-The indicator does not appear in the output.
-</dl>
-<br>&#32;<br>
-<TT>&hw</TT><I> word ...</I>		ignored	-
-<DL COMPACT>
-<DT><DD>
-Specify
-hyphenation points in words
-with embedded minus signs.
-Versions of a word with terminal <I>s</I> are implied;
-i.e.,
-implies
-This list is examined initially and after
-each suffix stripping.
-The space available is small.
-</dl>
-<H4>14 Three-Part Titles.
-</H4>
-<P>
-The titling function <TT>tl</TT> provides for automatic placement
-of three fields at the left, center, and right of a line
-with a title length
-specifiable with <TT>lt</TT>.
-<TT>tl</TT> may be used anywhere, and is independent of the
-normal text collecting process.
-A common use is in header and footer macros.
-</P>
-<br>&#32;<br>
-<TT>&tl '</TT><I>left</I><TT>'</TT><I>center</I><TT>'</TT><I>right</I><TT>'</TT>	-	-	
-<DL COMPACT>
-<DT><DD>
-The strings
-<I>left</I>, <I>center</I>, and <I>right</I> are
-respectively left-adjusted, centered, and right-adjusted
-in the current title length.
-Any of the strings may be empty,
-and overlapping is permitted.
-If the page-number character (initially <TT>%</TT>) is found within any of the fields it is replaced
-by the current page number in the format assigned to register <TT>%</TT>.
-Any character may be used in place of
-as the string delimiter.
-</dl>
-<br>&#32;<br>
-<TT>&pc</TT><I> c</I>	<TT>%</TT>	off	-
-<DL COMPACT>
-<DT><DD>
-The page number character is set to <I>c</I>,
-or removed.
-The page number register remains <TT>%</TT>.
-</dl>
-<br>&#32;<br>
-<TT>&lt</TT><I> &#177;N</I>	6.5in	previous	E,<B>m</B>
-<DL COMPACT>
-<DT><DD>
-Length of title
-is set to &#177;<I>N</I>.
-The line length and the title length are independent.
-Indents do not apply to titles; page offsets do.
-</dl>
-<H4>15 Output Line Numbering.
-</H4>
-<P>
-Automatic sequence numbering of output lines may be
-requested with <TT>nm</TT>.
-When in effect,
-a three-digit, arabic number plus a digit-space
-is prefixed to output text lines.
-The text lines are thus offset by four digit-spaces,
-and otherwise retain their line length;
-a reduction in line length may be desired to keep the right margin
-aligned with an earlier margin.
-Blank lines, other vertical spaces, and lines generated by <TT>tl</TT>
-are not numbered.
-Numbering can be temporarily suspended with <TT>nn</TT>,
-or with an <TT>.nm</TT> followed by a later <TT>.nm +0</TT>.
-In addition,
-a line number indent <I>I</I>, and the number-text separation <I>S</I>
-may be specified in digit-spaces.
-Further, it can be specified that only those line numbers that are
-multiples of some number <I>M</I> are to be printed (the others will appear
-as blank number fields).
-<br>
-</P>
-<br>&#32;<br>
-<TT>&nm</TT><I> &#177;N M S I</I>		off	E
-<DL COMPACT>
-<DT><DD>
-Line number mode.
-If &#177;<I>N</I> is given,
-line numbering is turned on,
-and the next output line numbered is numbered &#177;<I>N</I>.
-Default values are <I>M=</I>1, <I>S=</I>1, and <I>I=</I>0.
-Parameters corresponding to missing arguments are unaffected;
-a non-numeric argument is considered missing.
-In the absence of all arguments, numbering is turned off;
-the next line number is preserved for possible further use
-in number register <TT>ln</TT>.
-</dl>
-<br>&#32;<br>
-<TT>&nn</TT><I> N</I>	-	<I>N=</I>1	E
-<DL COMPACT>
-<DT><DD>
-The next <I>N</I> text output lines are not
-numbered.
-</dl>
-<P>
-As an example, the paragraph portions of this section
-are numbered with <I>M=</I>3:
-<TT>.nm 1 3</TT> was placed at the beginning;
-<TT>.nm</TT> was placed at the end of the first paragraph;
-and <TT>.nm +0</TT> was placed in front of this paragraph;
-and <TT>.nm</TT> finally placed at the end.
-Line lengths were also changed (by <TT>\w'0000'u</TT>) to keep the right side aligned.
-Another example is
-which turns on numbering with the line number of the next
-line to be 5 greater than the last numbered line,
-with <I>M=</I>5, with spacing <I>S</I> untouched, and with the indent <I>I</I> set to 3.
-<br>
-</P>
-<H4>16 Conditional Acceptance of Input
-</H4>
-<P>
-In the following,
-<I>c</I> is a one-character built-in <I>condition</I> name,
-<TT>!</TT> signifies <I>not</I>,
-<I>N</I> is a numerical expression,
-<I>string1</I> and <I>string2</I> are strings delimited by any non-blank, non-numeric character not in the strings,
-and
-<I>anything</I> represents what is conditionally accepted.
-</P>
-<br>&#32;<br>
-<TT>&if</TT><I> c anything</I>	-	-	
-<DL COMPACT>
-<DT><DD>
-If condition
-<I>c</I> true, accept <I>anything</I> as input;
-in multi-line case use \{<I>anything</I>\}.
-</dl>
-<br>&#32;<br>
-<TT>&if !</TT><I>c anything</I>	-	-	
-<DL COMPACT>
-<DT><DD>
-If condition <I>c</I> false, accept <I>anything</I>.
-</dl>
-<br>&#32;<br>
-<TT>&if</TT><I> N anything</I>		-	<B>u</B>
-<DL COMPACT>
-<DT><DD>
-If expression <I>N</I> > 0, accept <I>anything</I>.
-</dl>
-<br>&#32;<br>
-<TT>&if !</TT><I>N anything</I>		-	<B>u</B>
-<DL COMPACT>
-<DT><DD>
-If expression <I>N</I> <= 0 [sic], accept <I>anything</I>.
-</dl>
-<br>&#32;<br>
-<TT>&if '</TT><I>string1</I><TT>'</TT><I>string2</I><TT>'</TT><I> anything</I>	-		
-<DL COMPACT>
-<DT><DD>
-If <I>string1</I> identical to <I>string2</I>,
-accept <I>anything</I>.
-</dl>
-<br>&#32;<br>
-<TT>&if !'</TT><I>string1</I><TT>'</TT><I>string2</I><TT>'</TT><I> anything</I>	-		
-<DL COMPACT>
-<DT><DD>
-If <I>string1</I> not identical to <I>string2</I>,
-accept <I>anything</I>.
-</dl>
-<br>&#32;<br>
-<TT>&ie</TT><I> c anything</I>		-	<B>u</B>
-<DL COMPACT>
-<DT><DD>
-If portion of if-else;
-all of the forms for <TT>if</TT> above are valid.
-</dl>
-<br>&#32;<br>
-<TT>&el</TT><I> anything</I>	-	-	
-<DL COMPACT>
-<DT><DD>
-Else portion of if-else.
-</dl>
-<P>
-The built-in condition names are:
-<br><img src="-.162513.gif"><br>
-If the condition <I>c</I> is true, or if the number <I>N</I> is greater than zero,
-or if the strings compare identically (including motions and character size and font),
-<I>anything</I> is accepted as input.
-If a <TT>!</TT> precedes the condition, number, or string comparison,
-the sense of the acceptance is reversed.
-</P>
-<P>
-Any spaces between the condition and the beginning of <I>anything</I> are skipped over.
-The <I>anything</I> can be either a single input line (text, macro, or whatever)
-or a number of input lines.
-In the multi-line case,
-the first line must begin with a left delimiter <TT>\{</TT> and
-the last line must end with a right delimiter <TT>\}</TT>.
-</P>
-<P>
-The request <TT>ie</TT> (if-else) is identical to <TT>if</TT>
-except that the acceptance state is remembered.
-A subsequent and matching <TT>el</TT> (else) request then uses the reverse sense of that state.
-<TT>ie</TT>-<TT>el</TT> pairs may be nested.
-</P>
-<P>
-Some examples are:
-<DL><DT><DD><TT><PRE>
-&amp;if e .tl 'Even Page %'''
-</PRE></TT></DL>
-which outputs a title if the page number is even; and
-<DL><DT><DD><TT><PRE>
-&amp;ie \n%&#62;1 \{\
-'	sp 0.5i
-&amp;	tl 'Page %'''
-'	sp |1.2i \}
-&amp;el .sp |2.5i
-</PRE></TT></DL>
-which treats page 1 differently from other pages.
-</P>
-<H4>17 Environment Switching.
-</H4>
-<P>
-A number of the parameters that
-control the text processing are gathered together into an
-<I>environment</I>, which can be switched by the user.
-The environment parameters are those associated
-with requests noting E in their <I>Notes</I> column;
-in addition, partially collected lines and words are in the environment.
-Everything else is global; examples are page-oriented parameters,
-diversion-oriented parameters, number registers, and macro and string definitions.
-All environments are initialized with default parameter values.
-</P>
-<br>&#32;<br>
-<TT>&ev</TT><I> N</I>	<I>N=</I>0	previous	-
-<DL COMPACT>
-<DT><DD>
-Environment switched to
-environment 0<=<I>N</I><=2.
-Switching is done in push-down fashion so that
-restoring a previous environment <I>must</I> be done with <TT>.ev</TT>
-rather than specific reference.
-Note that what is pushed down and restored is the environment
-<I>number,</I>
-not its contents.
-</dl>
-<H4>18 Insertions from the Standard Input
-</H4>
-<P>
-The input can be temporarily switched to the system standard input
-with <TT>rd</TT>,
-which will switch back when two consecutive newlines
-are found (the extra blank line is not used).
-This mechanism is intended for insertions in form-letter-like documentation.
-The standard input can be the user's keyboard,
-a pipe, or a file.
-</P>
-<br>&#32;<br>
-<TT>&rd</TT><I> prompt</I>	-	<I>prompt=</I>BEL" 	-"
-<DL COMPACT>
-<DT><DD>
-Read insertion
-from the standard input until two newlines in a row are found.
-If the standard input is the user's keyboard, <I>prompt</I> (or a BEL)
-is written onto the standard output.
-<TT>rd</TT> behaves like a macro,
-and arguments may be placed after <I>prompt</I>.
-</dl>
-<br>&#32;<br>
-<TT>&ex</TT>	-	-	-
-<DL COMPACT>
-<DT><DD>
-Exit from <I>nroff</I>/<I>troff</I>.
-Text processing is terminated exactly as if all input had ended.
-</dl>
-<P>
-If insertions are to be
-taken from the terminal keyboard while output is being printed
-on the terminal, the command line option <TT>-q</TT> will turn off the echoing
-of keyboard input and prompt only with BEL.
-The regular input and insertion input cannot
-simultaneously come from the standard input.
-</P>
-<P>
-As an example,
-multiple copies of a form letter may be prepared by entering the insertions
-for all the copies in one file to be used as the standard input,
-and causing the file containing the letter to reinvoke itself with <TT>nx</TT> (&#167;19);
-the process would ultimately be ended by an <TT>ex</TT> in the insertion file.
-</P>
-<H4>19 Input/Output File Switching
-</H4>
-<br>&#32;<br>
-<TT>&so</TT><I> filename</I>		-	-
-<DL COMPACT>
-<DT><DD>
-Switch source file.
-The top input (file reading) level is switched to <I>filename</I>.
-When the new file ends,
-input is again taken from the original file.
-<TT>so</TT>'s may be nested.
-</dl>
-<br>&#32;<br>
-<TT>&nx</TT><I> filename</I>		end-of-file	-
-<DL COMPACT>
-<DT><DD>
-Next file is <I>filename</I>.
-The current file is considered ended, and the input is immediately switched
-to <I>filename</I>.
-</dl>
-<br>&#32;<br>
-<TT>&sy</TT><I> string</I>		-	-
-<DL COMPACT>
-<DT><DD>
-Execute program from <I>string</I>,
-which is the rest of the input line.
-The output is not collected automatically.
-The number register
-which contains the process id of the <I>troff</I> process,
-may be useful in generating unique filenames for output.
-</dl>
-<br>&#32;<br>
-<TT>&pi</TT><I> string</I>		-	-
-<DL COMPACT>
-<DT><DD>
-Pipe output to <I>string</I>,
-which is the rest of the input line.
-This request must occur before any printing occurs;
-typically it is the first line of input.
-</dl>
-<br>&#32;<br>
-<TT>&cf</TT><I> filename</I>		-	-
-<DL COMPACT>
-<DT><DD>
-Copy
-contents of file
-<I>filename</I>
-to output, completely unprocessed.
-The file is assumed to contain something meaningful
-to subsequent processes.
-</dl>
-<H4>20 Miscellaneous
-<br>
-that a <I>margin</I> character <I>c</I> appear a distance
-<I>N</I> to the right of the right margin
-after each non-empty text line (except those produced by <TT>tl</TT>).
-If the output line is too long (as can happen in nofill mode)
-the character will be appended to the line.
-If <I>N</I> is not given, the previous <I>N</I> is used; the initial <I>N</I> is
-0.2 inches in <I>nroff</I> and 1 em in <I>troff</I>.
-The margin character used with this paragraph was a 12-point box-rule.
-<br>
-</H4>
-<br>&#32;<br>
-<TT>.tm</TT><I> string</I>	-	newline	-
-<DL COMPACT>
-<DT><DD>
-After skipping initial blanks,
-<I>string</I> (rest of the line) is read in copy mode
-and written on the standard error.
-</dl>
-<br>&#32;<br>
-<TT>&ab</TT><I> string</I>	-	newline	-
-<DL COMPACT>
-<DT><DD>
-After skipping initial blanks,
-<I>string</I> (rest of the line) is read in copy mode
-and written on the standard error.
-<I>Troff</I> or <I>nroff</I> then exit.
-</dl>
-<br>&#32;<br>
-<TT>.ig</TT><I> yy</I>	-	<I>.yy=</I><TT>..</TT>	-
-<DL COMPACT>
-<DT><DD>
-Ignore
-input lines.
-<TT>ig</TT> behaves exactly like <TT>de</TT> (&#167;7) except that the
-input is discarded.
-The input is read in copy mode, and any auto-incremented
-registers will be affected.
-</dl>
-<br>&#32;<br>
-<TT>.lf</TT><I> N filename</I>		-	-
-<DL COMPACT>
-<DT><DD>
-Set
-line number to <I>N</I> and filename to <I>filename</I>
-for purposes of subsequent error messages, etc.
-The number register [sic]
-contains the name of the current input file,
-as set by command line argument,
-or
-The number register
-contains the number of input lines read from the current file,
-again perhaps as modified by
-</dl>
-<br>&#32;<br>
-<TT>.pm</TT><I> t</I>	-	all	-
-<DL COMPACT>
-<DT><DD>
-Print macros.
-The names and sizes of all of the defined macros and strings are printed
-on the standard error;
-if <I>t</I> is given, only the total of the sizes is printed.
-The sizes is given in blocks
-of 128 characters.
-</dl>
-<br>&#32;<br>
-<TT>.fl</TT>	-	-	B
-<DL COMPACT>
-<DT><DD>
-Flush output buffer.
-Force output, including any pending position information.
-</dl>
-<H4>21 Output and Error Messages.
-</H4>
-<P>
-The output from <TT>tm</TT>, <TT>pm</TT>, and the prompt from <TT>rd</TT>,
-as well as various error messages, are written onto
-the standard error.
-The latter is different from the standard output,
-where formatted text goes.
-By default, both are written onto the user's terminal,
-but they can be independently redirected.
-</P>
-<P>
-Various error conditions may occur during
-the operation of <I>nroff</I> and <I>troff</I>.
-Certain less serious errors having only local impact do not
-cause processing to terminate.
-Two examples are <I>word overflow</I>, caused by a word that is too large
-to fit into the word buffer (in fill mode), and
-<I>line overflow</I>, caused by an output line that grew too large
-to fit in the line buffer.
-In both cases, a message is printed, the offending excess
-is discarded,
-and the affected word or line is marked at the point of truncation
-with a * in <I>nroff</I> and a <= in <I>troff</I>.
-Processing continues if possible,
-on the grounds that output useful for debugging may be produced.
-If a serious error occurs, processing terminates,
-and a message is printed, along with a list of the macro names currently active.
-Examples of serious errors include the inability to create, read, or write files,
-and the exceeding of certain internal limits that
-make future output unlikely to be useful.
-</P>
-<H4>22 Output Language
-</H4>
-<P>
-<I>Troff</I>
-produces its output in a language that is independent of any
-specific output device,
-except that the numbers in it have been computed on the basis
-of the resolution of the device,
-and the sizes, fonts, and characters that that device can print.
-Nevertheless it is quite possible to interpret that output
-on a different device, within the latter's capabilities.
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<I></I><TT>s</TT><I>n</I>	set point size to <I>n</I>
-<I></I><TT>f</TT><I>n</I>	set font to <I>n</I>
-<I></I><TT>c</TT><I>c</I>	print character <I>c</I>
-<I></I><TT>C</TT><I>name</I>	print the character called <I>name</I>; terminate <I>name</I> by white space
-<I></I><TT>N</TT><I>n</I>	print character <I>n</I> on current font
-<I></I><TT>H</TT><I>n</I>	go to absolute horizontal position <I>n</I> (<I>n</I>>=0)
-<I></I><TT>V</TT><I>n</I>	go to absolute vertical position <I>n</I> (<I>n</I>>=0, down is positive)
-<I></I><TT>h</TT><I>n</I>	go <I>n</I> units horizontally; <I>n</I><I><</I>0 is to the left
-<I></I><TT>v</TT><I>n</I>	go <I>n</I> units vertically; <I>n</I><I><</I>0 is up
-<I>nnc</I>	move right <I>nn</I>, then print UTF character <I>c</I>;  <I>nn</I> must be exactly 2 digits
-<I></I><TT>p</TT><I>n</I>	new page <I>n</I> begins&#173;set vertical position to 0
-<I></I><TT>n</TT><I>b</I> <I>a</I>	end of line (information only&#173;no action);  <I>b</I> = space before line, <I>a</I> = after
-<I></I><TT>w</TT>	paddable word space (information only&#173;no action)
-<I></I><TT>D</TT><I>c</I> ...0graphics function <I>c</I>; see below
-<I></I><TT>x</TT> ...0device control functions; see below
-<I></I><TT>#</TT> ...0comment
-</PRE></TT></DL>
-</dl>
-<br>&#32;<br>
-All position values are in units.
-Sequences that end in digits must be followed by a non-digit.
-Blanks, tabs and newlines may occur as separators
-in the input, and are mandatory to separate constructions
-that would otherwise be confused.
-Graphics functions, device control functions, and comments extend to the
-end of the line they occur on.
-<P>
-The device control and graphics commands are intended as open-ended
-families, to be expanded as needed.
-The graphics functions coincide directly with the
-sequences:
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<I></I><TT>Dl</TT> <I>dh dv</I>	draw line from current position by <I>dh</I>, <I>dv</I>
-<I></I><TT>Dc</TT> <I>d</I>	draw circle of diameter <I>d</I> with left side here
-<I></I><TT>De</TT> <I>dh</I>1 <I>dv</I>2	draw ellipse of diameters <I>dh</I>1 and <I>dv</I>2
-<I></I><TT>Da</TT> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2	draw arc from current position to <I>dh</I>1<I>+dh</I>2, <I>dv</I>1<I>+dv</I>2,
-		   center at <I>dh</I>1, <I>dv</I>1 from current position
-<I></I><TT>D~</TT> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2 ...	draw B-spline from current position to <I>dh</I>1, <I>dv</I>1,
-		   then to <I>dh</I>2, <I>dv</I>2, then to ...
-<I></I><TT>D</TT><I>z</I> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2 ...	for any other <I>z</I> is uninterpreted
-</PRE></TT></DL>
-</dl>
-<br>&#32;<br>
-In all of these, <I>dh</I>, <I>dv</I> is an increment on the current horizontal and
-vertical position,
-with down and right positive.
-All distances and dimensions are in units.
-<P>
-The device control functions begin with
-then a command, then other parameters.
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-x T <I>s</I>	name of typesetter is <I>s</I><TT>
-x r </TT><I>n h v</I><TT>	</TT>resolution is <I>n</I> units/inch;<TT>
-		</TT><I>h</I> = minimum horizontal motion, <I>v</I> = minimum vertical<TT>
-x i	</TT>initialize<TT>
-x f </TT><I>n s</I><TT>	</TT>mount font <I>s</I> on font position <I>n</I><TT>
-x p	</TT>pause&#173;can restart<TT>
-x s	</TT>stop&#173;done forever<TT>
-x t	</TT>generate trailer information, if any<TT>
-x H </TT><I>n</I><TT>	</TT>set character height to <I>n</I><TT>
-x S </TT><I>n</I><TT>	</TT>set slant to <I>n</I><TT>
-x X </TT><I>any</I><TT>	</TT>generated by the <TT>\X</TT> function<TT>
-x </TT><I>any</I><TT>	</TT>to be ignored if not recognized<TT>
-</PRE></TT></DL>
-</dl>
-</TT><br>&#32;<br>
-Subcommands like
-may be spelled out like
-<P>
-The commands
-and
-must occur first;
-fonts must be mounted before they can be used;
-comes last.
-There are no other order requirements.
-</P>
-<P>
-The following is the output from
-for a typical printer,
-as described in &#167;23:
-<DL><DT><DD><TT><PRE>
-x T utf
-x res 720 1 1
-x init
-V0
-p1
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-x font 1 R
-x font 2 I
-x font 3 B
-x font 4 BI
-x font 5 CW
-x font 6 H
-x font 7 HB
-x font 8 HX
-x font 9 S1
-x font 10 S
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-s10
-f1
-H0
-s10
-f1
-V0
-H720
-V120
-ch
-50e44l28l28o50,w58w72o50r33l28dn120 0
-x trailer
-V7920
-x stop
-</PRE></TT></DL>
-</P>
-<P>
-<I>Troff</I> output is normally not redundant;
-size and font changes and position information are not included
-unless needed.
-Nevertheless, each page is self-contained, for the benefit of postprocessors
-that re-order pages or process only a subset.
-</P>
-<H4>23 Device and Font Description Files
-</H4>
-<P>
-The parameters that describe a output device
-<I>name</I>
-are read
-from the directory
-each time
-<I>troff</I>
-is invoked.
-The device name is provided by default,
-by the environment variable
-or by a command-line argument
-The default device name is
-for UTFencoded Unicode characters.
-The pre-defined string
-contains the name of the device.
-The
-command-line option may be used to change the default directory.
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Device description file.</I></B> 
-General parameters of the device are stored, one per line, in
-the file 
-as a sequence of names and values.
-<I>Troff</I> recognizes these parameters, and ignores any
-others that may be present for specific drivers:
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<I></I><TT>fonts</TT> <I>n</I> <I>F</I>1 <I>F</I>2 <I>.</I><I>.</I><I>.</I> <I>F</I><I>n</I>
-<I></I><TT>sizes</TT> <I>s</I>1 <I>s</I>2 <I>.</I><I>.</I><I>.</I><I></I><TT>0</TT>
-<I></I><TT>res</TT> <I>n</I>
-<I></I><TT>hor</TT> <I>n</I>
-<I></I><TT>vert</TT> <I>n</I>
-<I></I><TT>unitwidth</TT> <I>n</I>
-<I></I><TT>charset</TT>
-<I>list of multi-character character names (optional)</I>
-</PRE></TT></DL>
-</dl>
-<br>&#32;<br>
-The <I>F</I><I>i</I> are font names
-to be initially mounted.
-The list of sizes is a set of integers representing
-some or all of the legal sizes the device can produce,
-terminated by a zero.
-The 
-parameter gives the resolution of the machine in units per inch;
-and
-give the minimum number of units that can be moved
-horizontally and vertically.
-<P>
-Character widths for each font are assumed to be given in machine units
-at point size
-(In other words, a character with a width of
-<I>n</I> is <I>n</I> units wide at size
-All widths are integers at all sizes.
-</P>
-<P>
-A list of valid character names may be introduced by
-the list of names is optional.
-</P>
-<P>
-A line whose first non-blank character is
-is a comment.
-Except that
-must occur last, parameters may appear in any order.
-</P>
-<P>
-Here is a subset of the
-file for a typical Postscript printer:
-<DL><DT><DD><TT><PRE>
-# Description file for Postscript printers.
-
-fonts 10 R I B BI CW H HB HX S1 S
-sizes 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
-   24 25 26 27 28 29 30 31 32 33 34 35 36 38 40 44 48 54 60 72 0
-res 720
-hor 1
-vert 1
-unitwidth 10
-charset
-hy ct fi fl ff Fi Fl dg em 14 34 12 en aa
-ga ru sc dd -&#62; br Sl ps cs cy as os =. ld
-rd le ge pp -+ ob vr
-sq bx ci fa te ** pl mi eq ~= *A *B *X *D
-*E *F *G *Y *I *K *L *M *N *O *P *R *H *S *T *U *W
-*C *Q *Z ul rn *a *b *x *d *e *f *g *y *i *k
-*l *m *n *o *p *h *r *s *t *u *w *c *q *z
-</PRE></TT></DL>
-</P>
-<br>&#32;<br>
-<B><I>0.0s.  Font description files.</I></B> 
-Each font is described by an analogous description file,
-which begins with parameters of the font, one per line, followed by a
-list of characters and widths.
-The file for font
-<I>f</I>
-is
-<DL>
-<DT><DT>&#32;<DD>
-<DL><DT><DD><TT><PRE>
-<I></I><TT>name</TT> <I>str</I>	name of font is <I>str</I>
-<I></I><TT>ligatures</TT> <I>. . .</I> <I></I><TT>0</TT>	list of ligatures
-<I></I><TT>spacewidth</TT> <I>n</I>	width of a space on this font
-<I></I><TT>special</TT>	this is a special font
-<I></I><TT>charset</TT>
-<I>list of character name, width, ascender/descender, code</I>, tab separated
-</PRE></TT></DL>
-</dl>
-<br>&#32;<br>
-The
-and
-fields are mandatory;
-must be last.
-Comments are permitted,
-as are other unrecognized parameters.
-<P>
-Each line following
-describes one character: its name, its width in units as described above,
-ascender/descender information, and a decimal, octal or hexadecimal value
-by which the output device knows it
-(the
-``number'' of the character).
-The character name is arbitrary, except that
-signifies an unnamed character.
-If the width field contains
-the name is a synonym for the previous character.
-The ascender/descender field is 1 if
-the character has a descender (hangs below the baseline, like
-is 2 if it has an ascender (is tall, like
-is 3 if both,
-and is 0 if neither.
-The value is returned
-in the 
-register, as computed by the
-function (&#167;11.2).
-</P>
-<P>
-Here are excerpts from a typical font description file
-for the same Postscript printer.
-<DL><DT><DD><TT><PRE>
-hy	33	0	45	hyphen \(hy
--	"			- is a synonym for \(hy
-<br>&#32;<br>
-Q	72	3	81
-<br>&#32;<br>
-a	44	0	97
-b	50	2	98
-c	44	0	99
-d	50	2	100
-y	50	1	121
-<br>&#32;<br>
-em	100	0	208
----	44	2	220	Pound symbol &#163;, \N'220'
----	36	0	221	centered dot \N'221'
-</PRE></TT></DL>
-This says, for example, that the width of the letter
-is 44 units at point size 10,
-the value of 
-Point sizes are scaled linearly and rounded, so the width of
-will be 44 at size 10, 40 at size 9, 35 at size 8,
-and so on.
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-</P>
-<br>&#32;<br>
-<B>Tutorial Examples
-<br>&#32;<br>
-<br>&#32;<br>
-<br>&#32;<br>
-<br>&#32;<br>
-</B><H4>Introduction
-</H4>
-<P>
-It is almost always necessary to
-prepare at least a small set of macro definitions
-to describe a document.
-Such common formatting needs
-as page margins and footnotes
-are deliberately not built into <I>nroff</I> and <I>troff</I>.
-Instead,
-the macro and string definition, number register, diversion,
-environment switching, page-position trap, and conditional input mechanisms
-provide the basis for user-defined implementations.
-</P>
-<P>
-For most uses, a standard package like
-or
-is the right choice.
-The next stage is to augment that,
-or to selectively replace macros from the standard package.
-The last stage, much harder,
-is to write one's own from scratch.
-This is not a task for the novice.
-</P>
-<P>
-The examples discussed here are intended to be useful and somewhat realistic,
-but will not necessarily cover all relevant contingencies.
-Explicit numerical parameters are used
-in the examples
-to make them easier to read and to
-illustrate typical values.
-In many cases, number registers would be used
-to reduce the number of places where numerical
-information is kept,
-and to concentrate conditional parameter initialization
-like that which depends on whether <I>troff</I> or <I>nroff</I> is being used.
-</P>
-<H4>Page Margins
-</H4>
-<P>
-As discussed in &#167;3,
-header and footer macros are usually defined
-to describe the top and bottom page margin areas respectively.
-A trap is planted at page position 0 for the header, and at
-<I>-N</I> (<I>N</I> from the page bottom) for the footer.
-The simplest such definitions might be
-<DL><DT><DD><TT><PRE>
-&amp;de hd	\"define header
-'sp 1i
-&amp;&amp;	\"end definition
-&amp;de fo	\"define footer
-'bp
-&amp;&amp;	\"end definition
-&amp;wh 0 hd
-&amp;wh -1i fo
-</PRE></TT></DL>
-which provide blank 1 inch top and bottom margins.
-The header will occur on the <I>first</I> page
-only if the definition and trap exist prior to
-the initial pseudo-page transition (&#167;3).
-In fill mode, the output line that springs the footer trap
-was typically forced out because some part or whole word didn't fit on it.
-If anything in the footer and header that follows causes a break,
-that word or part word will be forced out.
-In this and other examples,
-requests like <TT>bp</TT> and <TT>sp</TT> that normally cause breaks are invoked using
-the no-break control character <TT>'</TT>
-to avoid this.
-When the header/footer design contains material
-requiring independent text processing, the
-environment may be switched, avoiding
-most interaction with the running text.
-</P>
-<P>
-A more realistic example would be
-<DL><DT><DD><TT><PRE>
-&amp;de hd	\"header
-&amp;if \\n%&#62;1 \{\
-'sp ~0.5i-1	\"tl base at 0.5i
-&amp;tl ''- % -''	\"centered page number
-&amp;ps	\"restore size
-&amp;ft	\"restore font
-&amp;vs  \}	\"restore vs
-'sp ~1.0i  	\"space to 1.0i
-&amp;ns	\"turn on no-space mode
-&amp;&amp;
-&amp;de fo	\"footer
-&amp;ps 10	\"set footer/header size
-&amp;ft R	\"set font
-&amp;vs 12p	\"set baseline spacing
-&amp;if \\n%=1 \{\
-'sp ~\\n(.pu-0.5i-1  \"tl base 0.5i up
-&amp;tl ''- % -'' \}  \"first page number
-'bp
-&amp;&amp;
-&amp;wh 0 hd
-&amp;wh -1i fo
-</PRE></TT></DL>
-which sets the size, font, and baseline spacing for the
-header/footer material, and ultimately restores them.
-The material in this case is a page number at the bottom of the
-first page and at the top of the remaining pages.
-The <TT>sp</TT>'s refer to absolute positions to avoid
-dependence on the baseline spacing.
-Another reason for doing this in the footer
-is that the footer is invoked by printing a line whose
-vertical spacing swept past the trap position by possibly
-as much as the baseline spacing.
-No-space mode is turned on at the end of <TT>hd</TT>
-to render ineffective
-accidental occurrences of <TT>sp</TT> at the top of the running text.
-</P>
-<P>
-This method of restoring size, font, etc., presupposes
-that such requests (that set <I>previous</I> value) are <I>not</I>
-used in the running text.
-A better scheme is to save and restore both the current <I>and</I>
-previous values as shown for size in the following:
-<DL><DT><DD><TT><PRE>
-&amp;de fo
-&amp;nr s1 \\n(.s	\"current size
-&amp;ps
-&amp;nr s2 \\n(.s	\"previous size
-&amp;  ---	\"rest of footer
-&amp;&amp;
-&amp;de hd
-&amp;  ---	\"header stuff
-&amp;ps \\n(s2  \"restore previous size
-&amp;ps \\n(s1  \"restore current size
-&amp;&amp;
-</PRE></TT></DL>
-Page numbers may be printed in the bottom margin
-by a separate macro triggered during the footer's
-page ejection:
-<DL><DT><DD><TT><PRE>
-&amp;de bn	\"bottom number
-&amp;tl ''- % -''	\"centered page number
-&amp;&amp;
-&amp;wh -0.5i-1v bn	 \"tl base 0.5i up
-</PRE></TT></DL>
-</P>
-<H4>Paragraphs and Headings
-</H4>
-<P>
-The housekeeping
-associated with starting a new paragraph should be collected
-in a paragraph macro
-that, for example,
-does the desired preparagraph spacing,
-forces the correct font, size, baseline spacing, and indent,
-checks that enough space remains for <I>more than one</I> line,
-and
-requests a temporary indent.
-<DL><DT><DD><TT><PRE>
-&amp;de pg    \"paragraph
-&amp;br       \"break
-&amp;ft R     \"force font,
-&amp;ps 10    \"size,
-&amp;vs 12p   \"spacing,
-&amp;in 0     \"and indent
-&amp;sp 0.4   \"prespace
-&amp;ne 1+\\n(.Vu  \"want more than 1 line
-&amp;ti 0.2i         \"temp indent
-&amp;&amp;
-</PRE></TT></DL>
-The first break in <TT>pg</TT>
-will force out any previous partial lines,
-and must occur before the <TT>vs</TT>.
-The forcing of font, etc., is
-partly a defense against prior error and
-partly to permit
-things like section heading macros to
-set parameters only once.
-The prespacing parameter is suitable for <I>troff</I>;
-a larger space, at least as big as the output device vertical resolution, would be
-more suitable in <I>nroff</I>.
-The choice of remaining space to test for in the <TT>ne</TT>
-is the smallest amount greater than one line
-(the <TT>.V</TT> is the available vertical resolution).
-</P>
-<P>
-A macro to automatically number section headings
-might look like:
-<DL><DT><DD><TT><PRE>
-&amp;de sc	\"section
-&amp;  ---	\"force font, etc.
-&amp;sp 0.4	\"prespace
-&amp;ne 2.4+\\n(.Vu \"want 2.4+ lines
-&amp;fi
-\\n+S.
-&amp;&amp;
-&amp;nr S 0 1	\"init S
-</PRE></TT></DL>
-The usage is <TT>.sc</TT>,
-followed by the section heading text,
-followed by <TT>.pg</TT>.
-The <TT>ne</TT> test value includes one line of heading,
-0.4 line in the following <TT>pg</TT>, and
-one line of the paragraph text.
-A word consisting of the next section number and a period is
-produced to begin the heading line.
-The format of the number may be set by <TT>af</TT> (&#167;8).
-</P>
-<P>
-Another common form is the labeled, indented paragraph,
-where the label protrudes left into the indent space.
-<DL><DT><DD><TT><PRE>
-&amp;de lp	\"labeled paragraph
-&amp;pg
-&amp;in 0.5i	\"paragraph indent
-&amp;ta 0.2i 0.5i	\"label, paragraph
-&amp;ti 0
-\t\\$1\t\c	\"flow into paragraph
-&amp;&amp;
-</PRE></TT></DL>
-The intended usage is ``<TT>.lp</TT> <I>label</I>'';
-<I>label</I> will begin at 0.2 inch, and
-cannot exceed a length of 0.3 inch without intruding into
-the paragraph.
-The label could be right adjusted against 0.4 inch by
-setting the tabs instead with <TT>.ta|0.4iR|0.5i</TT>.
-The last line of <TT>lp</TT> ends with <TT>\c</TT> so that
-it will become a part of the first line of the text
-that follows.
-</P>
-<H4>Multiple Column Output
-</H4>
-<P>
-The production of multiple column pages requires
-the footer macro to decide whether it was
-invoked by other than the last column,
-so that it will begin a new column rather than
-produce the bottom margin.
-The header can initialize a column register that
-the footer will increment and test.
-The following is arranged for two columns, but
-is easily modified for more.
-<DL><DT><DD><TT><PRE>
-&amp;de hd	\"header
-&amp;  ---
-&amp;nr cl 0 1	\"init column count
-&amp;mk	\"mark top of text
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fo	\"footer
-&amp;ie \\n+(cl&#60;2 \{\
-&amp;po +3.4i	\"next column; 3.1+0.3
-&amp;rt	\"back to mark
-&amp;ns \}	\"no-space mode
-&amp;el \{\
-&amp;po \\nMu	\"restore left margin
-&amp;  ---
-'bp \}
-&amp;&amp;
-&amp;ll 3.1i	\"column width
-&amp;nr M \\n(.o	\"save left margin
-</PRE></TT></DL>
-Typically a portion of the top of the first page
-contains full width text;
-the request for the narrower line length,
-as well as another <TT>.mk</TT> would
-be made where the two column output was to begin.
-</P>
-<H4>Footnotes
-</H4>
-<P>
-The footnote mechanism to be described is used by
-embedding the footnotes in the input text at the
-point of reference,
-demarcated by an initial <TT>.fn</TT> and a terminal <TT>.ef</TT>:
-<DL><DT><DD><TT><PRE>
-&amp;fn
-<I>Footnote text and control lines...</I>
-&amp;ef
-</PRE></TT></DL>
-In the following,
-footnotes are processed in a separate environment and diverted
-for later printing in the space immediately prior to the bottom
-margin.
-There is provision for the case where the last collected
-footnote doesn't completely fit in the available space.
-<DL><DT><DD><TT><PRE>
-&amp;de hd	\"header
-&amp;  ---
-&amp;nr x 0 1	\"init footnote count
-&amp;nr y 0-\\nb	\"current footer place
-&amp;ch fo -\\nbu	\"reset footer trap
-&amp;if \\n(dn .fz	\"leftover footnote
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fo	\"footer
-&amp;nr dn 0  \"zero last diversion size
-&amp;if \\nx \{\
-&amp;ev 1	\"expand footnotes in ev1
-&amp;nf	\"retain vertical size
-&amp;FN	\"footnotes
-&amp;rm FN	\"delete it
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;if "\\n(.z"fy" .di  \"end overflow di
-&amp;nr x 0	\"disable fx
-&amp;ev  \}	\"pop environment
-&amp;  ---
-'bp
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fx	\"process footnote overflow
-&amp;if \\nx .di fy	\"divert overflow
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fn	\"start footnote
-&amp;da FN	\"divert (append) footnote
-&amp;ev 1	\"in environment 1
-&amp;if \\n+x=1 .fs   \"if 1st, separator
-&amp;fi	\"fill mode
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de ef	\"end footnote
-&amp;br	\"finish output
-&amp;nr z \\n(.v	\"save spacing
-&amp;ev	\"pop ev
-&amp;di	\"end diversion
-&amp;nr y -\\n(dn	\"new footer position,
-&amp;if \\nx=1 .nr y -(\\n(.v-\\nz) \
-	\"uncertainty correction
-&amp;ch fo \\nyu	\"y is negative
-&amp;if (\\n(nl+1v)&#62;(\\n(.p+\\ny) \
-&amp;ch fo \\n(nlu+1v	 \"didn't fit
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fs	\"separator
-\l'1i'	\"1 inch rule
-&amp;br
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;de fz	\"get leftover footnote
-&amp;fn
-&amp;nf	\"retain vertical size
-&amp;fy	\"where fx put it
-&amp;ef
-&amp;&amp;
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-&amp;nr b 1.0i  \"bottom margin size
-&amp;wh 0 hd    \"header trap
-&amp;wh 12i fo  \"footer trap-&#62;temp pos
-&amp;wh -\\nbu fx	\"fx at footer position
-&amp;ch fo -\\nbu	\"conceal fx with fo
-</PRE></TT></DL>
-</P>
-<P>
-The header <TT>hd</TT> initializes a footnote count register <TT>x</TT>,
-and sets both the current footer trap position register <TT>y</TT> and
-the footer trap itself to a nominal position specified in
-register <TT>b</TT>.
-In addition, if the register <TT>dn</TT> indicates a leftover footnote,
-<TT>fz</TT> is invoked to reprocess it.
-The footnote start macro <TT>fn</TT> begins a diversion (append) in environment 1,
-and increments the count <TT>x</TT>; if the count is one, the footnote separator <TT>fs</TT>
-is interpolated.
-The separator is kept in a separate macro to permit user redefinition.
-</P>
-<P>
-The footnote end macro <TT>ef</TT> restores
-the previous environment and ends the diversion after saving the spacing size in register <TT>z</TT>.
-<TT>y</TT> is then decremented by the size of the footnote, available in <TT>dn</TT>;
-then on the first footnote, <TT>y</TT> is further decremented by the difference
-in vertical baseline spacings of the two environments, to
-prevent the late triggering of the footer trap from causing the last
-line of the combined footnotes to overflow.
-The footer trap is then set to the lower (on the page) of <TT>y</TT> or the current page position (<TT>nl</TT>)
-plus one line, to allow for printing the reference line.
-</P>
-<P>
-If indicated by <TT>x</TT>, the footer <TT>fo</TT> rereads the footnotes from <TT>FN</TT> in nofill mode
-in environment 1,
-and deletes <TT>FN</TT>.
-If the footnotes were too large to fit, the macro <TT>fx</TT> will be trap-invoked to redivert
-the overflow into <TT>fy</TT>,
-and the register <TT>dn</TT> will later indicate to the header whether <TT>fy</TT> is empty.
-</P>
-<P>
-Both <TT>fo</TT> and <TT>fx</TT> are planted in the nominal footer trap position in an order
-that causes <TT>fx</TT> to be concealed unless the <TT>fo</TT> trap is moved.
-The footer then terminates the overflow diversion, if necessary, and
-zeros <TT>x</TT> to disable <TT>fx</TT>,
-because the uncertainty correction
-together with a not-too-late triggering of the footer can result
-in the footnote rereading finishing before reaching the <TT>fx</TT> trap.
-</P>
-<P>
-A good exercise for the student is to combine the multiple-column and footnote mechanisms.
-</P>
-<H4>The Last Page
-</H4>
-<P>
-After the last input file has ended, <I>nroff</I> and <I>troff</I>
-invoke the <I>end macro</I> (&#167;7), if any,
-and when it finishes, eject the remainder of the page.
-During the eject, any traps encountered are processed normally.
-At the end of this last page, processing terminates
-unless a partial line, word, or partial word remains.
-If it is desired that another page be started, the end-macro
-<DL><DT><DD><TT><PRE>
-&amp;de en	\"end-macro
-\c
-'bp
-&amp;&amp;
-&amp;em en
-</PRE></TT></DL>
-will deposit a null partial word,
-and produce another last page.
-<br>&#32;<br>
-<HR>
-<br>&#32;<br>
-</P>
-<br>&#32;<br>
-<B>Special Character Names
-</B><P>
-The following table lists names for a set of characters,
-most of which have traditionally been provided by <I>troff</I> using
-the `special' or `symbol' font.
-Many of these sequences are old ways to get what are now Unicode
-characters;
-Lucida Sans, for example, has glyphs corresponding to many of these
-but does not have the special sequences.
-Therefore
-the <I>troff</I> sequence
-gives the character &#191; from the Times font instead of the
-character &#191; from the current font, in this case Lucida Sans.
-Not all sequences print on any particular device, including this one; Peter
-faces appear in their place.
-<br><img src="-.162514.gif"><br>
-</P>
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 0 - 1323
sys/doc/utf.html

@@ -1,1323 +0,0 @@
-<html>
-<title>
-data
-</title>
-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
-<H1>Hello World
-<br>
-or
-<br>
-&#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949;
-<br>
-or
-<br>
-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;
-</H1>
-<DL><DD><I>Rob Pike<br>
-Ken Thompson<br>
-<br>&#32;<br>
-rob,ken@plan9.bell-labs.com<br>
-</I></DL>
-<DL><DD><H4>ABSTRACT</H4>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> Originally appeared, in a slightly different form, in
-Proc. of the Winter 1993 USENIX Conf.,
-pp. 43-50,
-San Diego
-</I><DT>&#32;<DD></dl>
-<br>
-Plan 9 from Bell Labs has recently been converted from ASCII
-to an ASCII-compatible variant of the Unicode Standard, a 16-bit character set.
-In this paper we explain the reasons for the change,
-describe the character set and representation we chose,
-and present the programming models and software changes
-that support the new text format.
-Although we stopped short of full internationalization&#173;for
-example, system error messages are in Unixese, not Japanese&#173;we
-believe Plan 9 is the first system to treat the representation
-of all major languages on a uniform, equal footing throughout all its
-software.
-</DL>
-<H4>Introduction
-</H4>
-<P>
-The world is multilingual but most computer systems
-are based on English and ASCII.
-The first release of Plan 9 [Pike90], a new distributed operating
-system from Bell Laboratories, seemed a good occasion
-to correct this chauvinism.
-It is easier to make such deep changes when building new systems than
-by refitting old ones.
-</P>
-<P>
-The ANSI C standard [ANSIC] contains some guidance on the matter of
-`wide' and `multi-byte' characters but falls far short of
-solving the myriad associated problems.
-We could find no literature on how to convert a
-<I>system</I>
-to larger character sets, although some individual
-<I>programs</I>
-had been converted.
-This paper reports what we discovered as we
-explored the problem of representing multilingual
-text at all levels of an operating system,
-from the file system and kernel through
-the applications and up to the window system
-and display.
-</P>
-<P>
-Plan 9 has not been `internationalized':
-its manuals are in English,
-its error messages are in English,
-and it can display text that goes from left to right only.
-But before we can address these other problems,
-we need to handle, uniformly and comfortably,
-the textual representation of all the major written languages.
-That subproblem is richer than we had anticipated.
-</P>
-<H4>Standards
-</H4>
-<P>
-Our first step was to select a standard.
-At the time (January 1992),
-there were only two viable options:
-ISO 10646 [ISO10646] and Unicode [Unicode].
-The documents describing both proposals were still in the draft stage.
-</P>
-<P>
-The draft of ISO 10646 was not
-very attractive to us.
-It defined a sparse set of 32-bit characters,
-which would be
-hard to implement
-and have punitive storage requirements.
-Also, the draft attempted to
-mollify national interests by allocating
-16-bit subspaces to national committees
-to partition individually.
-The suggested mode of use was to
-``flip'' between separate national
-standards to implement the international standard.
-This did not strike us as a sound basis for a character set.
-As well, transmitting 32-bit values in a byte stream,
-such as in pipes, would be expensive and hard to implement.
-Since the standard does not define a byte order for such
-transmission, the byte stream would also have to carry
-state to enable the values to be recovered.
-</P>
-<P>
-The Unicode Standard is a proposal by a consortium of mostly American
-computer companies formed
-to protest the technical
-failings of ISO 10646.
-It defines a uniform 16-bit code based on the
-principle of unification:
-two characters are the same if they look the
-same even though they are from different
-languages.
-This principle, called Han unification,
-allows the large Japanese, Chinese, and Korean
-character sets to be packed comfortably into a 16-bit representation.
-</P>
-<P>
-We chose the Unicode Standard for its technical merits and because its
-code space was better defined.
-Moreover,
-the Unicode Consortium was derailing the
-ISO 10646 standard.
-(Now, in 1995,
-ISO 10646 is a standard
-with one 16-bit group defined,
-which is almost exactly the Unicode Standard.
-As most people expected, the two standards bodies
-reached a d&eacute;tente and
-ISO 10646 and Unicode represent the same character set.)
-</P>
-<P>
-The Unicode Standard defines an adequate character set
-but an unreasonable representation.
-It states that all characters
-are 16 bits wide and are communicated and stored in
-16-bit units.
-It also reserves a pair of characters
-(hexadecimal FFFE and FEFF) to detect byte order
-in transmitted text, requiring state in the byte stream.
-(The Unicode Consortium was thinking of files, not pipes.)
-To adopt this encoding,
-we would have had to convert all text going
-into and out of Plan 9 between ASCII and Unicode, which cannot be done.
-Within a single program, in command of all its input and output,
-it is possible to define characters as 16-bit quantities;
-in the context of a networked system with
-hundreds of applications on diverse machines
-by different manufacturers,
-it is impossible.
-</P>
-<P>
-We needed a way to adapt the Unicode Standard to the tools-and-pipes
-model of text processing embodied by the Unix system.
-To do that, we
-needed an ASCII-compatible textual
-representation of Unicode characters for transmission
-and storage.
-In the draft ISO standard there was an informative
-(non-required)
-Annex
-called UTF
-that provided a byte stream encoding
-of the 32-bit ISO code.
-The encoding uses multibyte sequences composed
-from the 190 printable characters of Latin-1
-to represent character values larger
-than 159.
-</P>
-<P>
-The UTF encoding has several good properties.
-By far the most important is that
-a byte in the ASCII range 0-127 represents
-itself in UTF.
-Thus UTF is backward compatible with ASCII.
-</P>
-<P>
-UTF has other advantages.
-It is a byte encoding and is
-therefore byte-order independent.
-ASCII control characters appear in the byte stream
-only as themselves, never as an element of a sequence
-encoding another character,
-so newline bytes separate lines of UTF text.
-Finally, ANSI C's
-<TT>strcmp</TT>
-function applied to UTF strings preserves the ordering of Unicode characters.
-</P>
-<P>
-To encode and decode UTF is expensive (involving multiplication,
-division, and modulo operations) but workable.
-UTF's major disadvantage is that the encoding
-is not self-synchronizing.
-It is in general impossible to find the character
-boundaries in a UTF string without reading from
-the beginning of the string, although in practice
-control characters such as newlines,
-tabs, and blanks provide synchronization points.
-</P>
-<P>
-In August 1992,
-X-Open circulated a proposal for another UTF-like
-byte encoding of Unicode characters.
-Their major concern was that an embedded character
-in a file name
-(in particular a slash)
-could be part of an escape sequence in UTF and
-therefore confuse a traditional file system.
-Their proposal would allow all 7-bit ASCII characters
-to represent themselves
-<I>and only themselves</I>
-in text.
-Multibyte sequences would contain only characters
-with the high bit set.
-We proposed a modification to the new UTF that
-would address our synchronization problem.
-Our proposal, which was  originally known informally as UTF-2 and FSS-UTF,
-is now referred to as UTF-8 and has been approved by ISO to become
-Annex P to ISO 10646.
-</P>
-<P>
-The model for text in Plan 9 is chosen from these
-three standards*:
-</P>
-<DL>
-<DT><DT>&#32;<DD>
-NOTE:<I> * ``That's the nice thing about standards&#173;there's so many to choose from.'' - Andy Tannenbaum (no, the other one)
-</I><DT>&#32;<DD></dl>
-<br>
-the Unicode character set encoded as a byte stream by
-UTF-8, from
-(soon to be) Annex P of ISO 10646.
-Although this mixture may seem like a precarious position for us to adopt,
-it is not as bad as it sounds.
-ISO 10646 and the Unicode Standard have converged,
-other systems such as Linux have adopted the same character set and encoding,
-and the general feeling seems to be that Unicode and UTF-8 will be accepted as the way
-to exchange text between systems.
-The prognosis for wide acceptance is good.
-<P>
-There are a couple of aspects of the Unicode Standard we have not faced.
-One is the issue of right-to-left text such as Hebrew or Arabic.
-Since that is an issue of display, not representation, we believe
-we can defer that problem for the moment without affecting our
-ability to solve it later.
-Another issue is diacriticals and `combining characters',
-which cause overstriking of multiple Unicode characters.
-Although necessary for some scripts, such as Thai, Arabic, and Hebrew,
-such characters confuse the issues for Latin languages because they
-generate multiple representations for accented characters.
-ISO 10646 describes three levels of implementation;
-in Plan 9 we decided not to address the issue.
-Again, this can be labeled as a display issue and its finer points are still being debated,
-so we felt comfortable deferring.  Ma&ntilde;ana.
-</P>
-<P>
-Although we converted Plan 9 in the altruistic interests of
-serving foreign languages, we have found the large character
-set attractive for other reasons.  The Unicode Standard includes many
-characters&#173;mathematical symbols, scientific notation,
-more general punctuation, and more&#173;that we now use
-daily in our work.  We no longer test our imaginations
-to find ways to include non-ASCII symbols in our text;
-why type
-<TT>:-)</TT>
-when you can use the character &#9786;?
-Most compelling is the ability to absorb documents
-and data that contain non-ASCII characters; our browser for the
-Oxford English Dictionary
-lets us see the dictionary as it really is, with pronunciation
-in the IPA font, foreign phrases properly rendered, and so on,
-<I>in plain text.</I>
-</P>
-<P>
-In the rest of this paper, except when
-stated otherwise, the term `UTF' refers to the UTF-8 encoding
-of Unicode characters as adopted by Plan 9.
-</P>
-<H4>C Compiler
-</H4>
-<P>
-The first program to be converted to UTF
-was the C Compiler.
-There are two levels of conversion.
-On the syntactic level,
-input to the C compiler
-is UTF; on the semantic level,
-the C language needs to define
-how compiled programs manipulate
-the UTF set.
-</P>
-<P>
-The syntactic part is simple.
-The ANSI C language standard defines the
-source character set to be ASCII.
-Since UTF is backward compatible with ASCII,
-the compiler needs little change.
-The only places where a larger character set
-is allowed are in character constants, strings, and comments.
-Since 7-bit ASCII characters can represent only
-themselves in UTF,
-the compiler does not have to be careful while looking
-for the termination of a string or comment.
-</P>
-<P>
-The Plan 9 compiler extends ANSI C to treat any Unicode
-character with a value outside of the ASCII range as
-an alphabetic.
-To a Greek programmer or an English mathematician,
-&#945; is a sensible and now valid variable name.
-</P>
-<P>
-On the semantic level, ANSI C allows,
-but does not tie down,
-the notion of a
-<I>wide character</I>
-and admits string and character constants
-of this type.
-We chose the wide character type to be
-<TT>unsigned</TT>
-<TT>short</TT>.
-In the libraries, the word
-<TT>Rune</TT>
-is defined by a
-<TT>typedef</TT>
-to be equivalent to
-<TT>unsigned</TT>
-<TT>short</TT>
-and is
-used to signify a Unicode character.
-</P>
-<P>
-There are surprises; for example:
-<DL><DT><DD><TT><PRE>
-L'x'	is 120
-'x'	is 120
-L'&yuml;'	is 255
-'&yuml;'	is -1, stdio EOF (if char is signed)
-L'&#945;'	is 945
-'&#945;'	is illegal
-</PRE></TT></DL>
-In the string constants,
-<DL><DT><DD><TT><PRE>
-"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;"
-L"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;",
-</PRE></TT></DL>
-the former is an array of
-<TT>chars</TT>
-with 22 elements
-and a null byte,
-while the latter is an array of
-<TT>unsigned</TT>
-<TT>shorts</TT>
-(<TT>Runes</TT>)
-with 8 elements and a null
-<TT>Rune</TT>.
-</P>
-<P>
-The Plan 9 library provides an output conversion function,
-<TT>print</TT>
-(analogous to
-<TT>printf</TT>),
-with formats
-<TT>%c</TT>,
-<TT>%C</TT>,
-<TT>%s</TT>,
-and
-<TT>%S</TT>.
-Since
-<TT>print</TT>
-produces text, its output is always UTF.
-The character conversion
-<TT>%c</TT>
-(lower case) masks its argument
-to 8 bits before converting to UTF.
-Thus
-<TT>L'&yuml;'</TT>
-and
-<TT>'&yuml;'</TT>
-printed under
-<TT>%c</TT>
-will be identical,
-but
-<TT>L'</TT>&#945;<TT>'</TT>
-will print as the Unicode
-character with decimal value 177.
-The character conversion
-<TT>%C</TT>
-(upper case) masks its argument
-to 16 bits before converting to UTF.
-Thus
-<TT>L'&yuml;'</TT>
-and
-<TT>L'</TT>&#945;<TT>'</TT>
-will print correctly under
-<TT>%C</TT>,
-but
-<TT>'&yuml;'</TT>
-will not.
-The conversion
-<TT>%s</TT>
-(lower case)
-expects a pointer to
-<TT>char</TT>
-and copies UTF sequences up to a null byte.
-The conversion
-<TT>%S</TT>
-(upper case) expects a pointer to
-<TT>Rune</TT>
-and
-performs sequential
-<TT>%C</TT>
-conversions until a null
-<TT>Rune</TT>
-is encountered.
-</P>
-<P>
-Another problem in format conversion
-is the definition of
-<TT>%10s</TT>:
-does the number refer to bytes or characters?
-We decided that such formats were most
-often used to align output columns and
-so made the number count characters.
-Some programs, however, use the count
-to place blank-padded strings
-in fixed-sized arrays.
-These programs must be found and corrected.
-</P>
-<P>
-Here is a complete example:
-<DL><DT><DD><TT><PRE>
-#include &lt;u.h&gt;
-
-char c[] = "&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;";
-Rune s[] = L"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;";
-
-main(void)
-{
-	print("%d, %d\n", sizeof(c), sizeof(s));
-	print("%s\n", c);
-	print("%S\n", s);
-}
-</PRE></TT></DL>
-</P>
-<P>
-This program prints
-<TT>23,</TT>
-<TT>18</TT>
-and then two identical lines of
-UTF text.
-In practice,
-<TT>%S</TT>
-and
-<TT>L"..."</TT>
-are rare in programs; one reason is
-that most formatted I/O is done in unconverted UTF.
-</P>
-<H4>Ramifications
-</H4>
-<P>
-All programs in Plan 9 now read and write text as UTF, not ASCII.
-This change breaks two deep-rooted symmetries implicit in most C programs:
-</P>
-<DL COMPACT>
-<DT>1.<DD>
-A character is no longer a
-<TT>char</TT>.
-<DT>2.<DD>
-The internal representation (Rune) of a character now differs from its
-external representation (UTF).
-</dl>
-<P>
-In the sections that follow,
-we show how these issues were faced in the layers of
-system software from the operating system up to the applications.
-The effects are wide-reaching and often surprising.
-</P>
-<H4>Operating system
-</H4>
-<P>
-Since UTF is the only format for text in Plan 9,
-the interface to the operating system had to be converted to UTF.
-Text strings cross the interface in several places:
-command arguments,
-file names,
-user names (people can log in using their native name),
-error messages,
-and miscellaneous minor places such as commands to the I/O system.
-Little change was required: null-terminated UTF strings
-are equivalent to null-terminated ASCII strings for most purposes
-of the operating system.
-The library routines described in the next section made that
-change straightforward.
-</P>
-<P>
-The window system, once called
-<TT>8.5</TT>,
-is now rightfully called
-<TT>8&#189;</TT>.
-</P>
-<H4>Libraries
-</H4>
-<P>
-A header file included by all programs (see [Pike92]) declares
-the
-<TT>Rune</TT>
-type to hold 16-bit character values:
-<DL><DT><DD><TT><PRE>
-typedef unsigned short Rune;
-</PRE></TT></DL>
-Also defined are several constants relevant to UTF:
-<DL><DT><DD><TT><PRE>
-enum
-{
-    UTFmax    = 3,    /* maximum bytes per rune */
-    Runesync  = 0x80, /* can't appear in UTF sequence (&lt;) */
-    Runeself  = 0x80, /* rune==UTF sequence (&lt;) */
-    Runeerror = 0x80, /* decoding error in UTF */
-};
-</PRE></TT></DL>
-(With the original UTF,
-<TT>Runesync</TT>
-was hexadecimal 21 and
-<TT>Runeself</TT>
-was A0.)
-<TT>UTFmax</TT>
-bytes are sufficient
-to hold the UTF encoding of any Unicode character.
-Characters of value less than
-<TT>Runesync</TT>
-only appear in a UTF string as
-themselves, never as part of a sequence encoding another character.
-Characters of value less than
-<TT>Runeself</TT>
-encode into single bytes
-of the same value.
-Finally, when the library detects errors in UTF input&#173;byte sequences
-that are not valid UTF sequences&#173;it converts the first byte of the
-error sequence to the character
-<TT>Runeerror</TT>.
-There is little a rune-oriented program can do when given bad data
-except exit, which is unreasonable, or carry on.
-Originally the conversion routines, described below,
-returned errors when given invalid UTF,
-but we found ourselves repeatedly checking for errors and ignoring them.
-We therefore decided to convert a bad sequence to a valid rune
-and continue processing.
-(The ANSI C routines, on the other hand, return errors.)
-</P>
-<P>
-This technique does have the unfortunate property that converting
-invalid UTF byte strings in and out of runes does not preserve the input,
-but this circumstance only occurs when non-textual input is
-given to a textual program.
-The Unicode Standard defines an error character, value FFFD, to stand for
-characters from other sets that it does not represent.
-The
-<TT>Runeerror</TT>
-character is a different concept, related to the encoding rather than the character set, so we
-chose a different character for it.
-</P>
-<P>
-The Plan 9 C library contains a number of routines for
-manipulating runes.
-The first set converts between runes and UTF strings:
-<DL><DT><DD><TT><PRE>
-extern	int	runetochar(char*, Rune*);
-extern	int	chartorune(Rune*, char*);
-extern	int	runelen(long);
-extern	int	fullrune(char*, int);
-</PRE></TT></DL>
-<TT>Runetochar</TT>
-translates a single
-<TT>Rune</TT>
-to a UTF sequence and returns the number of bytes produced.
-<TT>Chartorune</TT>
-goes the other way, reporting how many bytes were consumed.
-<TT>Runelen</TT>
-returns the number of bytes in the UTF encoding of a rune.
-<TT>Fullrune</TT>
-examines a UTF string up to a specified number of bytes
-and reports whether the string begins with a complete UTF encoding.
-All these routines use the
-<TT>Runeerror</TT>
-character to work around encoding problems.
-</P>
-<P>
-There is also a set of routines for examining null-terminated UTF strings,
-based on the model of the ANSI standard
-<TT>str</TT>
-routines, but with
-<TT>utf</TT>
-substituted for
-<TT>str</TT>
-and
-<TT>rune</TT>
-for
-<TT>chr</TT>:
-<DL><DT><DD><TT><PRE>
-extern	int	utflen(char*);
-extern	char*	utfrune(char*, long);
-extern	char*	utfrrune(char*, long);
-extern	char*	utfutf(char*, char*);
-</PRE></TT></DL>
-<TT>Utflen</TT>
-returns the number of runes in a UTF string;
-<TT>utfrune</TT>
-returns a pointer to the first occurrence of a rune in a UTF string;
-and
-<TT>utfrrune</TT>
-a pointer to the last.
-<TT>Utfutf</TT>
-searches for the first occurrence of a UTF string in another UTF string.
-Given the synchronizing property of UTF-8,
-<TT>utfutf</TT>
-is the same as
-<TT>strstr</TT>
-if the arguments point to valid UTF strings.
-</P>
-<P>
-It is a mistake to use
-<TT>strchr</TT>
-or
-<TT>strrchr</TT>
-unless searching for a 7-bit ASCII character, that is, a character
-less than
-<TT>Runeself</TT>.
-</P>
-<P>
-We have no routines for manipulating null-terminated arrays of
-<TT>Runes</TT>.
-Although they should probably exist for completeness, we have
-found no need for them, for the same reason that
-<TT>%S</TT>
-and
-<TT>L"..."</TT>
-are rarely used.
-</P>
-<P>
-Most Plan 9 programs use a new buffered I/O library, BIO, in place of
-Standard I/O.
-BIO contains routines to read and write UTF streams, converting to and from
-runes.
-<TT>Bgetrune</TT>
-returns, as a
-<TT>Rune</TT>
-within a
-<TT>long</TT>,
-the next character in the UTF input stream;
-<TT>Bputrune</TT>
-takes a rune and writes its UTF representation.
-<TT>Bungetrune</TT>
-puts a rune back into the input stream for rereading.
-</P>
-<P>
-Plan 9 programs use a simple set of macros to process command line arguments.
-Converting these macros to UTF automatically updated the
-argument processing of most programs.
-In general,
-argument flag names can no longer be held in bytes and
-arrays of 256 bytes cannot be used to hold a set of flags.
-</P>
-<P>
-We have done nothing analogous to ANSI C's locales, partly because
-we do not feel qualified to define locales and partly because we remain
-unconvinced of that model for dealing with the problems.
-That is really more an issue of internationalization than conversion
-to a larger character set; on the other hand,
-because we have chosen a single character set that encompasses
-most languages, some of the need for
-locales is eliminated.
-(We have a utility,
-<TT>tcs</TT>,
-that translates between UTF and other character sets.)
-</P>
-<P>
-There are several reasons why our library does not follow the ANSI design
-for wide and multi-byte characters.
-The ANSI model was designed by a committee, untried, almost
-as an afterthought, whereas
-we wanted to design as we built.
-(We made several major changes to the interface
-as we became familiar with the problems involved.)
-We disagree with ANSI C's handling of invalid multi-byte sequences.
-Also, the ANSI C library is incomplete:
-although it contains some crucial routines for handling
-wide and multi-byte characters, there are some serious omissions.
-For example, our software can exploit
-the fact that UTF preserves ASCII characters in the byte stream.
-We could remove that assumption by replacing all
-calls to
-<TT>strchr</TT>
-with
-<TT>utfrune</TT>
-and so on.
-(Because of the weaker properties of the original UTF,
-we have actually done so.)
-ANSI C cannot:
-the standard says nothing about the representation, so portable code should
-<I>never</I>
-call
-<TT>strchr</TT>,
-yet there is no ANSI equivalent to
-<TT>utfrune</TT>.
-ANSI C simultaneously invalidates
-<TT>strchr</TT>
-and offers no replacement.
-</P>
-<P>
-Finally, ANSI did nothing to integrate wide characters
-into the I/O system: it gives no method for printing
-wide characters.
-We therefore needed to invent some things and decided to invent
-everything.
-In the end, some of our entry points do correspond closely to
-ANSI routines&#173;for example
-<TT>chartorune</TT>
-and
-<TT>runetochar</TT>
-are similar to
-<TT>mbtowc</TT>
-and
-<TT>wctomb</TT>&#173;but
-Plan 9's library defines more functionality, enough
-to write real applications comfortably.
-</P>
-<H4>Converting the tools
-</H4>
-<P>
-The source for our tools and applications had already been converted to
-work with Latin-1, so it was `8-bit safe', but the conversion to the Unicode
-Standard and UTF is more involved.
-Some programs needed no change at all:
-<TT>cat</TT>,
-for instance,
-interprets its argument strings, delivered in UTF,
-as file names that it passes uninterpreted to the
-<TT>open</TT>
-system call,
-and then just copies bytes from its input to its output;
-it never makes decisions based on the values of the bytes.
-(Plan 9
-<TT>cat</TT>
-has no options such as
-<TT>-v</TT>
-to complicate matters.)
-Most programs, however, needed modest change.
-</P>
-<P>
-It is difficult to
-find automatically the places that need attention,
-but
-<TT>grep</TT>
-helps.
-Software that uses the libraries conscientiously can be searched
-for calls to library routines that examine bytes as characters:
-<TT>strchr</TT>,
-<TT>strrchr</TT>,
-<TT>strstr</TT>,
-etc.
-Replacing these by calls to
-<TT>utfrune</TT>,
-<TT>utfrrune</TT>,
-and
-<TT>utfutf</TT>
-is enough to fix many programs.
-Few tools actually need to operate on runes internally;
-more typically they need only to look for the final slash in a file
-name and similar trivial tasks.
-Of the 170 C source programs in the top levels of
-<TT>/sys/src/cmd</TT>,
-only 23 now contain the word
-<TT>Rune</TT>.
-</P>
-<P>
-The programs that
-<I>do</I>
-store runes internally
-are mostly those whose
-<I>raison</I>
-<I>d'&ecirc;tre</I>
-is character manipulation:
-<TT>sam</TT>
-(the text editor),
-<TT>sed</TT>,
-<TT>sort</TT>,
-<TT>tr</TT>,
-<TT>troff</TT>,
-<TT>8&#189;</TT>
-(the window system and terminal emulator),
-and so on.
-To decide whether to compute using runes
-or UTF-encoded byte strings requires balancing the cost of converting
-the data when read and written
-against the cost of converting relevant text on demand.
-For programs such as editors that run a long time with a relatively
-constant dataset, runes are the better choice.
-There are space considerations too, but they are more complicated:
-plain ASCII text grows when converted to runes; UTF-encoded Japanese
-shrinks.
-</P>
-<P>
-Again, it is hard to automate the conversion of a program from
-<TT>chars</TT>
-to
-<TT>Runes</TT>.
-It is not enough just to change the type of variables; the assumption
-that bytes and characters are equivalent can be insidious.
-For instance, to clear a character array by
-<DL><DT><DD><TT><PRE>
-memset(buf, 0, BUFSIZE)
-</PRE></TT></DL>
-becomes wrong if
-<TT>buf</TT>
-is changed from an array of
-<TT>chars</TT>
-to an array of
-<TT>Runes</TT>.
-Any program that indexes tables based on character values needs
-rethinking.
-Consider
-<TT>tr</TT>,
-which originally used multiple 256-byte arrays for the mapping.
-The na&iuml;ve conversion would yield multiple 65536-rune arrays.
-Instead Plan 9
-<TT>tr</TT>
-saves space by building in effect
-a run-encoded version of the map.
-</P>
-<P>
-<TT>Sort</TT>
-has related problems.
-The cooperation of UTF and
-<TT>strcmp</TT>
-means that a simple sort&#173;one with no options&#173;can be done
-on the original UTF strings using
-<TT>strcmp</TT>.
-With sorting options enabled, however,
-<TT>sort</TT>
-may need to convert its input to runes: for example,
-option
-<TT>-t</TT>&#945;<TT></TT>
-requires searching for alphas in the input text to
-crack the input into fields.
-The field specifier
-<TT>+3.2</TT>
-refers to 2 runes beyond the third field.
-Some of the other options are hopelessly provincial:
-consider the case-folding and dictionary order options
-(Japanese doesn't even have an official dictionary order) or
-<TT>-M</TT>
-which compares by case-insensitive English month name.
-Handling these options involves the
-larger issues of internationalization and is beyond the scope
-of this paper and our expertise.
-Plan 9
-<TT>sort</TT>
-works sensibly with options that make sense relative to the input.
-The simple and most important options are, however, usually meaningful.
-In particular,
-<TT>sort</TT>
-sorts UTF into the same order that
-<TT>look</TT>
-expects.
-</P>
-<P>
-Regular expression-matching algorithms need rethinking to
-be applied to UTF text.
-Deterministic automata are usually applied to bytes;
-converting them to operate on variable-sized byte sequences is awkward.
-On the other hand, converting the input stream to runes adds measurable
-expense
-and the state tables expand
-from size 256 to 65536; it can be expensive just to generate them.
-For simple string searching,
-the Boyer-Moore algorithm works with UTF provided the input is
-guaranteed to be only valid UTF strings; however, it does not work
-with the old UTF encoding.
-At a more mundane level, even character classes are harder:
-the usual bit-vector representation within a non-deterministic automaton
-is unwieldy with 65536 characters in the alphabet.
-</P>
-<P>
-We compromised.
-An existing library for compiling and executing regular expressions
-was adapted to work on runes, with two entry points for searching
-in arrays of runes and arrays of chars (the pattern is always UTF text).
-Character classes are represented internally as runs of runes;
-the reserved value
-<TT>FFFF</TT>
-marks the end of the class.
-Then
-<I>all</I>
-utilities that use regular expressions&#173;editors,
-<TT>grep</TT>,
-<TT>awk</TT>,
-etc.&#173;except the shell, whose notation
-was grandfathered, were converted to use the library.
-For some programs, there was a concomitant loss of performance,
-but there was also a strong advantage.
-To our knowledge, Plan 9 is the only Unix-like system
-that has a single definition and implementation of
-regular expressions; patterns are written and interpreted
-identically by all the programs in the system.
-</P>
-<P>
-A handful of programs have the notion of character built into them
-so strongly as to confuse the issue of what they should do with UTF input.
-Such programs were treated as individual special cases.
-For example,
-<TT>wc</TT>
-is, by default, unchanged in behavior and output; a new option,
-<TT>-r</TT>,
-counts the number of correctly encoded runes&#173;valid UTF sequences&#173;in
-its input;
-<TT>-b</TT>
-the number of invalid sequences.
-</P>
-<P>
-It took us several months to convert all the software in the system
-to the Unicode Standard and the old UTF.
-When we decided to convert from that to the new UTF,
-only three things needed to be done.
-First, we rewrote the library routines to encode and decode the
-new UTF.  This took an evening.
-Next, we converted all the files containing UTF
-to the new encoding.
-We wrote a trivial program to look for non-ASCII bytes in
-text files and used a Plan 9 program called
-<TT>tcs</TT>
-(translate character set) to change encodings.
-Finally, we recompiled all the system software;
-the library interface was unchanged, so recompilation was sufficient
-to effect the transformation.
-The second two steps were done concurrently and took an afternoon.
-We concluded that the actual encoding is relatively unimportant to the
-software; the adoption of large characters and a byte-stream encoding
-<I>per</I>
-<I>se</I>
-are much deeper issues.
-</P>
-<H4>Graphics and fonts
-</H4>
-<P>
-Plan 9 provides only minimal support for plain text terminals.
-It is instead designed to be used with all character input and
-output mediated by a window system such as
-<TT>8&#189;</TT>.
-The window system and related software are responsible for the
-display of UTF text as Unicode character images.
-For plain text, the window system must provide a user-settable
-<I>font</I>
-that provides a (possibly empty) picture for each Unicode character.
-Fancier applications that use bold and Italic characters
-need multiple fonts storing multiple pictures for each
-Unicode value.
-All the issues are apparent, though,
-in just the problem of
-displaying a single image for each character, that is, the
-Unicode equivalent of a plain text terminal.
-With 128 or even 256 characters, a font can be just
-an array of bitmaps.  With 65536 characters,
-a more sophisticated design is necessary.  To store the ideographs
-for just Japanese as 16&#215;16&#215;1 bit images,
-the smallest they can reasonably be, takes over a quarter of a
-megabyte.  Make the images a little larger, store more bits per
-pixel, and hold a copy in every running application, and the
-memory cost becomes unreasonable.
-</P>
-<P>
-The structure of the bitmap graphics services is described at length elsewhere
-[Pike91].
-In summary, the memory holding the bitmaps is stored in the same machine that has
-the display, mouse, and keyboard: the terminal in Plan 9 terminology,
-the workstation in others'.
-Access to that memory and associated services is provided
-by device files served by system
-software on the terminal.  One of those files,
-<TT>/dev/bitblt</TT>,
-interprets messages written upon it as requests for actions
-corresponding to entry points in the graphics library:
-allocate a bitmap, execute a raster operation, draw a text string, etc.
-The window system
-acts as a multiplexer that mediates access to the services
-and resources of the terminal by simulating in each client window
-a set of files mirroring those provided by the system.
-That is, each window has a distinct
-<TT>/dev/mouse</TT>,
-<TT>/dev/bitblt</TT>,
-and so on through which applications drive graphical
-input and output.
-</P>
-<P>
-One of the resources managed by
-<TT>8&#189;</TT>
-and the terminal is the set of active
-<I>subfonts.</I>
-Each subfont holds the
-bitmaps and associated data structures for a sequential set of Unicode
-characters.
-Subfonts are stored in files and loaded into the terminal by
-<TT>8&#189;</TT>
-or an application.
-For example, one subfont
-might hold the images of the first 256 characters of the Unicode space,
-corresponding to the Latin-1 character set;
-another might hold the standard phonetic character set, Unicode characters
-with value 0250 to 02E9.
-These files are collected in directories corresponding to typefaces:
-<TT>/lib/font/bit/pelm</TT>
-contains the Pellucida Monospace character set, with subfonts holding
-the Latin-1, Greek, Cyrillic and other components of the typeface.
-A suffix on subfont files encodes (in a subfont-specific
-way) the size of the images:
-<TT>/lib/font/bit/pelm/latin1.9</TT>
-contains the Latin-1 Pellucida Monospace characters with lower
-case letters 9 pixels high;
-<TT>/lib/font/bit/jis/jis5400.16</TT>
-contains 16-pixel high
-ideographs starting at Unicode value 5400.
-</P>
-<P>
-The subfonts do not identify which portion of the Unicode space
-they cover.  Instead, a
-font file, in plain text,
-describes how to assemble subfonts into a complete
-character set.
-The font file is presented as an argument to the window system
-to determine how plain text is displayed in text windows and
-applications.
-Here is the beginning of the font file
-<TT>/lib/font/bit/pelm/jis.9.font</TT>,
-which describes the layout of a font covering that portion of
-the Unicode Standard for which we have characters of typical
-display size, using Japanese characters
-to cover the Han space:
-<DL><DT><DD><TT><PRE>
-18	14
-0x0000	0x00FF	latin1.9
-0x0100	0x017E	latineur.9
-0x0250	0x02E9	ipa.9
-0x0386	0x03F5	greek.9
-0x0400	0x0475	cyrillic.9
-0x2000	0x2044	../misc/genpunc.9
-0x2070	0x208E	supsub.9
-0x20A0	0x20AA	currency.9
-0x2100	0x2138	../misc/letterlike.9
-0x2190	0x21EA	../misc/arrows
-0x2200	0x227F	../misc/math1
-0x2280	0x22F1	../misc/math2
-0x2300	0x232C	../misc/tech
-0x2500	0x257F	../misc/chart
-0x2600	0x266F	../misc/ding
-</PRE></TT></DL>
-<DL><DT><DD><TT><PRE>
-0x3000	0x303f	../jis/jis3000.16
-0x30a1	0x30fe	../jis/katakana.16
-0x3041	0x309e	../jis/hiragana.16
-0x4e00	0x4fff	../jis/jis4e00.16
-0x5000	0x51ff	../jis/jis5000.16
-...
-</PRE></TT></DL>
-The first two numbers set the interline spacing of the font (18
-pixels) and the distance from the baseline to the top of the
-line (14 pixels).
-When characters are displayed, they are placed so as best
-to fit within those constraints; characters
-too large to fit will be truncated.
-The rest of the file associates subfont files
-with portions of Unicode space.
-The first four such files are in the Pellucida Monospace typeface
-and directory; others reside in other directories.  The file names
-are relative to the font file's own location.
-</P>
-<P>
-There are several advantages to this two-level structure.
-First, it simultaneously breaks the huge Unicode space into manageable
-components and provides a unifying architecture for
-assembling fonts from disjoint pieces.
-Second, the structure promotes sharing.
-For example, we have only one set of Japanese
-characters but dozens of typefaces for the Latin-1 characters,
-and this structure permits us to store only one copy of the
-Japanese set but use it with any Roman typeface.
-Also, customization is easy.
-English-speaking users who don't need Japanese characters
-but may want to read an on-line Oxford English Dictionary can
-assemble a custom font with the
-Latin-1 (or even just ASCII) characters and the International
-Phonetic Alphabet (IPA).
-Moreover, to do so requires just editing a plain text file,
-not using a special font editing tool.
-Finally, the structure guides the design of
-caching protocols to improve performance and memory usage.
-</P>
-<P>
-To load a complete Unicode character set into each application
-would consume too
-much memory and, particularly on slow terminal lines, would take
-unreasonably long.
-Instead, Plan 9 assembles a multi-level cache structure for
-each font.
-An application opens a font file, reads and parses it,
-and allocates a data structure.
-A message written to
-<TT>/dev/bitblt</TT>
-allocates an associated structure held in the terminal, in particular,
-a bitmap to act as a cache
-for recently used character images.
-Other messages copy these images to bitmaps such as the screen
-by loading characters from subfonts into the cache on demand and
-from there to the destination bitmap.
-The protocol to draw characters is in terms of cache indices,
-not Unicode character number or UTF sequences.
-These details are hidden from the application, which instead
-sees only a subroutine to draw a string in a bitmap from a
-given font, functions to discover character size information,
-and routines to allocate and to free fonts.
-</P>
-<P>
-As needed, whole
-subfonts are opened by the graphics library, read, and then downloaded
-to the terminal.
-They are held open by the library in an LRU-replacement list.
-Even when the program closes a subfont, it is retained
-in the terminal for later use.
-When the application opens the subfont, it asks the terminal
-if it already has a copy to avoid reading it from the file
-server if possible.
-This level of cache has the property that the bitmaps for, say,
-all the Japanese characters are stored only once, in the terminal;
-the applications read only size and width information from the terminal
-and share the images.
-</P>
-<P>
-The sizes of the character and subfont caches held by the
-application are adaptive.
-A simple algorithm monitors the cache miss rate to enlarge and
-shrink the caches as required.
-The size of the character cache is limited to 2048 images maximum,
-which in practice seems enough even for Japanese text.
-For plain ASCII-like text it naturally stays around 128 images.
-</P>
-<P>
-This mechanism sounds complicated but is implemented by only about
-500 lines in the library and considerably less in each of the
-terminal's graphics driver and
-<TT>8&#189;</TT>.
-It has the advantage that only characters that are
-being used are loaded into memory.
-It is also efficient: if the characters being drawn
-are in the cache the extra overhead is negligible.
-It works particularly well for alphabetic character sets,
-but also adapts on demand for ideographic sets.
-When a user first looks at Japanese text, it takes a few
-seconds to read all the font data, but thereafter the
-text is drawn almost as fast as regular text (the images
-are larger, so draw a little slower).
-Also, because the bitmaps are remembered by the terminal,
-if a second application then looks at Japanese text
-it starts faster than the first.
-</P>
-<P>
-We considered
-building a `font server'
-to cache character images and associated data
-for the applications, the window system, and the terminal.
-We rejected this design because, although isolating
-many of the problems of font management into a separate program,
-it didn't simplify the applications.
-Moreover, in a distributed system such as Plan 9 it is easy
-to have too many special purpose servers.
-Making the management of the fonts the concern of only
-the essential components simplifies the system and makes
-bootstrapping less intricate.
-</P>
-<H4>Input
-</H4>
-<P>
-A completely different problem is how to type Unicode characters
-as input to the system.
-We selected an unused key on our ASCII keyboards
-to serve as a prefix for multi-keystroke
-sequences that generate Unicode characters.
-For example, the character
-<TT>&uuml;</TT>
-is generated by the prefix key
-(typically
-<TT>ALT</TT>
-or
-<TT>Compose</TT>)
-followed by a double quote and a lower-case
-<TT>u</TT>.
-When that character is read by the application, from the file
-<TT>/dev/cons</TT>,
-it is of course presented as its UTF encoding.
-Such sequences generate characters from an arbitrary set that
-includes all of Latin-1 plus a selection of mathematical
-and technical characters.
-An arbitrary Unicode character may be generated by typing the prefix,
-an upper case X, and four hexadecimal digits that identify
-the Unicode value.
-</P>
-<P>
-These simple mechanisms are adequate for most of our day-to-day needs:
-it's easy to remember to type `ALT 1 2' for &#189; or `ALT accent letter'
-for accented Latin letters.
-For the occasional unusual character, the cut and paste features of
-<TT>8&#189;</TT>
-serve well.  A program called (perhaps misleadingly)
-<TT>unicode</TT>
-takes as argument a hexadecimal value, and prints the UTF representation of that character,
-which may then be picked up with the mouse and used as input.
-</P>
-<P>
-These methods
-are clearly unsatisfactory when working in a non-English language.
-In the native country of such a language
-the appropriate keyboard is likely to be at hand.
-But it's also reasonable&#173;especially now that the system handles Unicode characters&#173;to
-work in a language foreign to the keyboard.
-</P>
-<P>
-For alphabetic languages such as Greek or Russian, it is
-straightforward to construct a program that does phonetic substitution,
-so that, for example, typing a Latin `a' yields the Greek `&#945;'.
-Within Plan 9, such a program can be inserted transparently
-between the real keyboard and a program such as the window system,
-providing a manageable input device for such languages.
-</P>
-<P>
-For ideographic languages such as Chinese or Japanese the problem is harder.
-Native users of such languages have adopted methods for dealing with
-Latin keyboards that involve a hybrid technique based on phonetics
-to generate a list of possible symbols followed by menu selection to
-choose the desired one.
-Such methods can be
-effective, but their design must be rooted in information about
-the language unknown to non-native speakers.
-(<TT>Cxterm</TT>,
-a Chinese terminal emulator built by and for
-Chinese programmers,
-employs such a technique
-[Pong and Zhang].)
-Although the technical problem of implementing such a device
-is easy in Plan 9&#173;it is just an elaboration of the technique for
-alphabetic languages&#173;our lack of familiarity with such languages
-has restrained our enthusiasm for building one.
-</P>
-<P>
-The input problem is technically the least interesting but perhaps
-emotionally the most important of the problems of converting a system
-to an international character set.
-Beyond that remain the deeper problems of internationalization
-such as multi-lingual error messages and command names,
-problems we are not qualified to solve.
-With the ability to treat text of most languages on an equal
-footing, though, we can begin down that path.
-Perhaps people in non-English speaking countries will
-consider adopting Plan 9, solving the input problem locally&#173;perhaps
-just by plugging in their local terminals&#173;and begin to use
-a system with at least the capacity to be international.
-</P>
-<H4>Acknowledgements
-</H4>
-<P>
-Dennis Ritchie provided consultation and encouragement.
-Bob Flandrena converted most of the standard tools to UTF.
-Brian Kernighan suffered cheerfully with several
-inadequate implementations and converted
-<TT>troff</TT>
-to UTF.
-Rich Drechsler converted his Postscript driver to UTF.
-John Hobby built the Postscript &#9786;.
-We thank them all.
-</P>
-<H4>References
-</H4>
-<br>&#32;<br>
-[ANSIC] <I>American National Standard for Information Systems -
-Programming Language C</I>, American National Standards Institute, Inc.,
-New York, 1990.
-<br>&#32;<br>
-[ISO10646]
-ISO/IEC DIS 10646-1:1993
-<I>Information technology -
-Universal Multiple-Octet Coded Character Set (UCS) &#173;
-Part 1: Architecture and Basic Multilingual Plane</I>.
-<br>&#32;<br>
-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
-``Plan 9 from Bell Labs'',
-UKUUG Proc. of the Summer 1990 Conf.,
-London, England,
-1990.
-<br>&#32;<br>
-[Pike91] R. Pike, ``8&#189;, The Plan 9 Window System'', USENIX Summer
-Conf. Proc., Nashville, 1991, reprinted in this volume.
-<br>&#32;<br>
-[Pike92] R. Pike, ``How to Use the Plan 9 C Compiler'', this volume.
-<br>&#32;<br>
-[Pong and Zhang] Man-Chi Pong and Yongguang Zhang, ``cxterm:
-A Chinese Terminal Emulator for the X Window System'',
-Software&#173;Practice and Experience,
-Vol 22(1), 809-926, October 1992.
-<br>&#32;<br>
-[Unicode]
-<I>The Unicode Standard,
-Worldwide Character Encoding,
-Version 1.0, Volume 1</I>,
-The Unicode Consortium,
-Addison Wesley,
-New York,
-1991.
-<br>&#32;<br>
-<A href=http://www.lucent.com/copyright.html>
-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
-</body></html>

+ 41 - 18
sys/man/3/uart

@@ -36,28 +36,27 @@ It accepts the following commands:
 Set the baud rate to
 .IR n .
 .TP
+.BI c n
+Set hangup on DCD if
+.I n
+is non-zero; else clear it.
+.TP
 .BI d n
 Set DTR if
 .I n
 is non-zero;
 else clear it.
 .TP
-.BI k n
-Send a break lasting
+.BI e n
+Set hangup on DSR if
 .I n
-milliseconds.
+is non-zero; else clear it.
 .TP
-.BI r n
-Set RTS if
-.I n
-is non-zero;
-else clear it.
+.B f
+Flush output queue.
 .TP
-.BI m n
-Obey modem CTS signal if
-.I n
-is non-zero;
-else clear it.
+.B h
+Close input and output queues.
 .TP
 .BI i n
 Enable/disable the FIFOs.
@@ -75,6 +74,25 @@ value of
 .I n
 causes the maximum-supported trigger level to be set.
 .TP
+.BI k n
+Send a break lasting
+.I n
+milliseconds.
+.TP
+.BI l n
+Set number of bits per byte to
+.IR n .
+Legal values are 5, 6, 7, or 8.
+.TP
+.BI m n
+Obey modem CTS signal if
+.I n
+is non-zero;
+else clear it.
+.TP
+.B n
+Make writes non-blocking.
+.TP
 .BI p c
 Set parity to odd if
 .I c
@@ -86,16 +104,21 @@ is
 .BR e ;
 else set no parity.
 .TP
+.BI q n
+Set input and output queue limits to
+.IR n .
+.TP
+.BI r n
+Set RTS if
+.I n
+is non-zero;
+else clear it.
+.TP
 .BI s n
 Set number of stop bits to
 .IR n .
 Legal values are 1 or 2.
 .TP
-.BI l n
-Set number of bits per byte to
-.IR n .
-Legal values are 5, 6, 7, or 8.
-.TP
 .BI w n
 Set the uart clock timer to
 n times 100us.

+ 11 - 8
sys/man/6/smtpd

@@ -204,18 +204,20 @@ relaying is allowed only if the source IP address is in
 or the destination domain is specified in
 .BR ourdomains .
 .SS Blocked Addresses
-When
-.B /mail/lib/blocked
-exists and is readable,
-.I smtpd
-reads a list of banned addresses from it.
+.I Smtpd
+consults
+.B /mail/ratify
+(see
+.IR ratfs (4))
+for a list of banned addresses.
 Messages received from these addresses are
 rejected with a 5\fIxx\fP-series SMTP error code.
 There is no option
-to turn blocking on or off; if the file is accessible,
-blocking is enabled on all
+to turn blocking on or off; if 
+.B /mail/ratify
+is mounted,
 .I smtpd
-sessions, including those from trusted networks.
+will use it, even for connections from trusted networks.
 .PP
 The command line format and address specifications
 conform to the notation described above.  If the parameters
@@ -303,4 +305,5 @@ command line arguments applicable
 to exposed systems.
 .SH "SEE ALSO"
 .IR mail (1),
+.IR ratfs (4),
 .IR scanmail (8)

+ 10 - 3
sys/src/libventi/client.c

@@ -29,12 +29,19 @@ vtDial(char *host, int canfail)
 	if(host == nil)
 		host = "$venti";
 
-	na = netmkaddr(host, 0, "venti");
-	fd = dial(na, 0, 0, 0);
+	if (host == nil) {
+		if (!canfail)
+			werrstr("no venti host set");
+		na = "";
+		fd = -1;
+	} else {
+		na = netmkaddr(host, 0, "venti");
+		fd = dial(na, 0, 0, 0);
+	}
 	if(fd < 0){
 		rerrstr(e, sizeof e);
 		if(!canfail){
-			vtSetError("%s", e);
+			vtSetError("venti dialstring %s: %s", na, e);
 			return nil;
 		}
 	}