18 years ago · 84ec658f21
--- a/dist/replica/_plan9.db
+++ b/dist/replica/_plan9.db
@@ -231,9 +231,9 @@
 
				 386/bin/fortune - 775 sys sys 1135570810 67113
			
 
				 386/bin/fossil - 20000000775 sys sys 1042005470 0
			
 
				 386/bin/fossil/conf - 775 sys sys 1085077052 1506
			
 
				-386/bin/fossil/flchk - 775 sys sys 1135570810 237600
			
 
				-386/bin/fossil/flfmt - 775 sys sys 1135570811 245747
			
 
				-386/bin/fossil/fossil - 775 sys sys 1135570812 360522
			
 
				+386/bin/fossil/flchk - 775 sys sys 1138211975 237734
			
 
				+386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
			
 
				+386/bin/fossil/fossil - 775 sys sys 1138211977 360656
			
 
				 386/bin/fossil/last - 775 sys sys 1135570812 63280
			
 
				 386/bin/freq - 775 sys sys 1136397208 61797
			
 
				 386/bin/fs - 20000000775 sys sys 954380769 0
			
@@ -567,7 +567,7 @@
 
				 386/lib/libstdio.a - 664 sys sys 1115950159 126206
			
 
				 386/lib/libsunrpc.a - 664 sys sys 1115950160 355994
			
 
				 386/lib/libthread.a - 664 sys sys 1135531448 71308
			
 
				-386/lib/libventi.a - 664 sys sys 1124766772 97708
			
 
				+386/lib/libventi.a - 664 sys sys 1138211977 98048
			
 
				 386/mbr - 775 sys sys 1131317338 407
			
 
				 386/mkfile - 664 sys sys 948141303 46
			
 
				 386/pbs - 775 sys sys 1131317339 494
			
@@ -5391,7 +5391,7 @@ power/mkfile - 664 sys sys 948141304 46
 
				 rc - 20000000775 sys sys 944959447 0
			
 
				 rc/bin - 20000000775 sys sys 1018637942 0
			
 
				 rc/bin/9fat: - 775 sys sys 1133179689 367
			
 
				-rc/bin/9fs - 775 sys sys 1079969823 948
			
 
				+rc/bin/9fs - 775 sys sys 1138240042 1027
			
 
				 rc/bin/B - 775 sys sys 945617206 645
			
 
				 rc/bin/C - 775 sys sys 1127395076 855
			
 
				 rc/bin/Kill - 775 sys sys 1018637942 115
			
@@ -5591,46 +5591,35 @@ sparc64/lib - 20000000775 sys sys 1114458535 0
 
				 sparc64/mkfile - 664 sys sys 1114458667 46
			
 
				 sys - 20000000775 sys sys 952648870 0
			
 
				 sys/doc - 20000000775 sys sys 1018471272 0
			
 
				-sys/doc/-.2669382.gif - 664 sys sys 1019969850 2078
			
 
				 sys/doc/8½ - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/8½/8½.html - 664 sys sys 1020895860 33484
			
 
				 sys/doc/8½/8½.ms - 664 sys sys 1020895859 31593
			
 
				 sys/doc/8½/8½.ps - 664 sys sys 1020895860 797150
			
 
				 sys/doc/8½/fig1.ps - 664 sys sys 1020895859 473747
			
 
				 sys/doc/8½/mkfile - 664 sys sys 1020895860 215
			
 
				-sys/doc/9.html - 664 sys sys 1136302690 87619
			
 
				 sys/doc/9.ms - 664 sys sys 953237044 84632
			
 
				 sys/doc/9.ps - 664 sys sys 960837924 508340
			
 
				-sys/doc/acid.html - 664 sys sys 1091459045 71723
			
 
				 sys/doc/acid.ms - 664 sys sys 1127411211 65062
			
 
				 sys/doc/acid.ps - 664 sys sys 1015012454 426359
			
 
				-sys/doc/acidpaper.html - 664 sys sys 1091459045 46880
			
 
				 sys/doc/acidpaper.ms - 664 sys sys 952880777 44805
			
 
				 sys/doc/acidpaper.ps - 664 sys sys 960837913 359639
			
 
				 sys/doc/acme - 20000000775 sys sys 945616779 0
			
 
				 sys/doc/acme/acme.fig1 - 664 sys sys 944959632 164559
			
 
				 sys/doc/acme/acme.fig2 - 664 sys sys 944959632 56026
			
 
				-sys/doc/acme/acme.html - 664 sys sys 1020013936 51769
			
 
				 sys/doc/acme/acme.ms - 664 sys sys 952880782 49851
			
 
				 sys/doc/acme/acme.pdf - 664 sys sys 1020384351 117006
			
 
				 sys/doc/acme/acme.ps - 664 sys sys 960837907 611301
			
 
				 sys/doc/acme/bs - 664 sys sys 944959634 556
			
 
				 sys/doc/acme/mkfile - 664 sys sys 961259926 304
			
 
				-sys/doc/ape.html - 664 sys sys 1091459042 14415
			
 
				 sys/doc/ape.ms - 664 sys sys 953344517 12595
			
 
				 sys/doc/ape.ps - 664 sys sys 960837914 258779
			
 
				-sys/doc/asm.html - 664 sys sys 1091459045 31098
			
 
				 sys/doc/asm.ms - 664 sys sys 958247686 28542
			
 
				 sys/doc/asm.ps - 664 sys sys 960837915 322051
			
 
				-sys/doc/auth.html - 664 sys sys 1091459079 77445
			
 
				 sys/doc/auth.ms - 664 sys sys 1021579975 66803
			
 
				 sys/doc/auth.ps - 664 sys sys 1021579976 451672
			
 
				 sys/doc/cleanps - 775 sys sys 961259933 184
			
 
				 sys/doc/colophon.ps - 664 sys sys 960837922 214122
			
 
				-sys/doc/comp.html - 664 sys sys 1091459046 42144
			
 
				 sys/doc/comp.ms - 664 sys sys 954266992 37792
			
 
				 sys/doc/comp.ps - 664 sys sys 960837915 345786
			
 
				-sys/doc/compiler.html - 664 sys sys 1091459044 32960
			
 
				 sys/doc/compiler.ms - 664 sys sys 1067721142 30279
			
 
				 sys/doc/compiler.ps - 664 sys sys 1091459054 309735
			
 
				 sys/doc/contents.ms - 664 sys sys 1019916701 4920
			
@@ -5641,7 +5630,6 @@ sys/doc/fossil.ms - 664 sys sys 1063856349 31400
 
				 sys/doc/fossil.pdf - 664 sys sys 1042123169 63200
			
 
				 sys/doc/fossil.ps - 664 sys sys 1135487951 313552
			
 
				 sys/doc/fs - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/fs/fs.html - 664 sys sys 1020013937 21345
			
 
				 sys/doc/fs/fs.pdf - 664 sys sys 1020384351 47177
			
 
				 sys/doc/fs/fs.ps - 664 sys sys 960837905 276918
			
 
				 sys/doc/fs/mkfile - 664 sys sys 961259926 282
			
@@ -5656,7 +5644,6 @@ sys/doc/fs/p7 - 664 sys sys 953844581 958
 
				 sys/doc/fs/p8 - 664 sys sys 953844574 881
			
 
				 sys/doc/fs/xx - 664 sys sys 944959592 65957
			
 
				 sys/doc/il - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/il/il.html - 664 sys sys 1020013937 12278
			
 
				 sys/doc/il/il.ms - 664 sys sys 952880783 11367
			
 
				 sys/doc/il/il.pdf - 664 sys sys 1020384351 44630
			
 
				 sys/doc/il/il.ps - 664 sys sys 960837905 258028
			
@@ -5666,29 +5653,21 @@ sys/doc/il/transition.fig - 664 sys sys 944959591 15431
 
				 sys/doc/il/transition.pic - 664 sys sys 944959591 11912
			
 
				 sys/doc/il/xx - 664 sys sys 944959591 48924
			
 
				 sys/doc/index.htm - 664 sys sys 1019916696 6906
			
 
				-sys/doc/index.html - 664 sys sys 1020082751 6906
			
 
				-sys/doc/lexnames.html - 664 sys sys 1091459043 37261
			
 
				 sys/doc/lexnames.ms - 664 sys sys 954383595 34046
			
 
				 sys/doc/lexnames.ps - 664 sys sys 960837909 335546
			
 
				-sys/doc/libmach.html - 664 sys sys 1091459044 26650
			
 
				 sys/doc/libmach.ms - 664 sys sys 1021579974 24145
			
 
				 sys/doc/libmach.ps - 664 sys sys 960837916 291283
			
 
				-sys/doc/lp.html - 664 sys sys 1091459043 21721
			
 
				 sys/doc/lp.ms - 664 sys sys 954614673 22366
			
 
				 sys/doc/lp.ps - 664 sys sys 960837917 294399
			
 
				-sys/doc/mk.html - 664 sys sys 1091459046 40021
			
 
				 sys/doc/mk.ms - 664 sys sys 952880779 34413
			
 
				 sys/doc/mk.ps - 664 sys sys 960837917 329779
			
 
				 sys/doc/mkfile - 664 sys sys 1091459055 4411
			
 
				-sys/doc/mkfiles.html - 664 sys sys 1091459045 17750
			
 
				 sys/doc/mkfiles.ms - 664 sys sys 952880779 17888
			
 
				 sys/doc/mkfiles.ps - 664 sys sys 960837918 269496
			
 
				-sys/doc/names.html - 664 sys sys 1091459046 23529
			
 
				 sys/doc/names.ms - 664 sys sys 954269607 22103
			
 
				 sys/doc/names.ps - 664 sys sys 960837918 288716
			
 
				 sys/doc/net - 20000000775 sys sys 954033300 0
			
 
				 sys/doc/net/mkfile - 664 sys sys 961259927 345
			
 
				-sys/doc/net/net.html - 664 sys sys 1020013937 43449
			
 
				 sys/doc/net/net.ms - 664 sys sys 952880783 41191
			
 
				 sys/doc/net/net.pdf - 664 sys sys 1020384351 82603
			
 
				 sys/doc/net/net.ps - 664 sys sys 960837908 350053
			
@@ -5696,25 +5675,19 @@ sys/doc/net/tree - 664 sys sys 944959636 866
 
				 sys/doc/net/tree.pout - 664 sys sys 944959636 1755
			
 
				 sys/doc/network.art - 664 sys sys 944959651 2260
			
 
				 sys/doc/network.pic - 664 sys sys 944959651 4124
			
 
				-sys/doc/plumb.html - 664 sys sys 1091459044 55783
			
 
				 sys/doc/plumb.ms - 664 sys sys 954383596 53250
			
 
				 sys/doc/plumb.ps - 664 sys sys 960837910 391830
			
 
				-sys/doc/port.html - 664 sys sys 1091459042 18123
			
 
				 sys/doc/port.ms - 664 sys sys 1020111393 16694
			
 
				 sys/doc/port.ps - 664 sys sys 1020111394 268901
			
 
				 sys/doc/preamble - 664 sys sys 961259497 203475
			
 
				 sys/doc/prfile - 775 sys sys 944959668 3782
			
 
				-sys/doc/prog4.html - 664 sys sys 1091459046 18653
			
 
				 sys/doc/prog4.ms - 664 sys sys 1019932830 16485
			
 
				 sys/doc/prog4.ps - 664 sys sys 1019932831 271581
			
 
				 sys/doc/ps - 664 sys sys 944959649 1739
			
 
				-sys/doc/rc.html - 664 sys sys 1091459044 41702
			
 
				 sys/doc/rc.ms - 664 sys sys 1063856321 34373
			
 
				 sys/doc/rc.ps - 664 sys sys 960837920 337955
			
 
				-sys/doc/release3.html - 664 sys sys 1019922810 6329
			
 
				 sys/doc/release3.ms - 664 sys sys 961261276 5492
			
 
				 sys/doc/release3.ps - 664 sys sys 961261277 230682
			
 
				-sys/doc/release4.html - 664 sys sys 1091459042 6473
			
 
				 sys/doc/release4.ms - 664 sys sys 1063856338 5160
			
 
				 sys/doc/release4.ps - 664 sys sys 1091459052 230868
			
 
				 sys/doc/sam - 20000000775 sys sys 945617037 0
			
@@ -5731,24 +5704,19 @@ sys/doc/sam/fig6.pic - 464 sys sys 944959644 1105
 
				 sys/doc/sam/fig7.pic - 464 sys sys 944959644 258
			
 
				 sys/doc/sam/mkfile - 664 sys sys 961259927 574
			
 
				 sys/doc/sam/refs - 464 sys sys 944959644 2652
			
 
				-sys/doc/sam/sam.html - 664 sys sys 1020013938 100660
			
 
				 sys/doc/sam/sam.ms - 464 sys sys 954266468 94536
			
 
				 sys/doc/sam/sam.pdf - 664 sys sys 1020384352 156123
			
 
				 sys/doc/sam/sam.ps - 664 sys sys 960837910 707546
			
 
				 sys/doc/sam/sam.tut - 464 sys sys 944959644 40481
			
 
				-sys/doc/sleep.html - 664 sys sys 1091459043 16602
			
 
				 sys/doc/sleep.ms - 664 sys sys 953237030 15206
			
 
				 sys/doc/sleep.ps - 664 sys sys 960837920 263882
			
 
				-sys/doc/spin.html - 664 sys sys 1091459042 75873
			
 
				 sys/doc/spin.ms - 664 sys sys 953344522 67475
			
 
				 sys/doc/spin.ps - 664 sys sys 960837923 443064
			
 
				 sys/doc/title - 664 sys sys 1018974170 740
			
 
				 sys/doc/title.ps - 664 sys sys 1018974170 214289
			
 
				 sys/doc/trademarks.ps - 664 sys sys 960837912 217896
			
 
				-sys/doc/troff.html - 664 sys sys 1019922811 110932
			
 
				 sys/doc/troff.ms - 664 sys sys 953237047 120683
			
 
				 sys/doc/troff.ps - 664 sys sys 1091459051 782310
			
 
				-sys/doc/utf.html - 664 sys sys 1091459044 43965
			
 
				 sys/doc/utf.ms - 664 sys sys 952880781 41659
			
 
				 sys/doc/utf.ps - 664 sys sys 960837922 363085
			
 
				 sys/doc/venti - 20000000775 sys sys 1019852318 0
			
@@ -5763,7 +5731,7 @@ sys/doc/venti/emelie.gif - 664 sys sys 1019852316 5004
 
				 sys/doc/venti/emelie2.gif - 664 sys sys 1019852317 4357
			
 
				 sys/doc/venti/mkfile - 664 sys sys 1019965454 79
			
 
				 sys/doc/venti/probablity.gif - 664 sys sys 1019852317 1244
			
 
				-sys/doc/venti/venti.html - 664 sys sys 1019852317 55272
			
 
				+sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
			
 
				 sys/doc/venti/venti.pdf - 664 sys sys 1020384352 139090
			
 
				 sys/doc/venti/venti.ps - 664 sys sys 1019852320 2012620
			
 
				 sys/games - 20000000775 sys sys 952648872 0
			
@@ -7596,7 +7564,7 @@ sys/man/3/segment - 664 sys sys 1017423721 2378
 
				 sys/man/3/srv - 664 sys sys 958419690 1470
			
 
				 sys/man/3/ssl - 664 sys sys 1018386776 3413
			
 
				 sys/man/3/tls - 664 sys sys 1045501496 7018
			
 
				-sys/man/3/uart - 664 sys sys 1102093395 1710
			
 
				+sys/man/3/uart - 664 sys sys 1138191356 2003
			
 
				 sys/man/3/usb - 664 sys sys 1126971427 6960
			
 
				 sys/man/3/vga - 664 sys sys 1131301005 4957
			
 
				 sys/man/4 - 20000000775 sys sys 1018581459 0
			
@@ -7682,7 +7650,7 @@ sys/man/6/plot - 664 sys sys 944959679 6739
 
				 sys/man/6/plumb - 664 sys sys 969499892 10918
			
 
				 sys/man/6/regexp - 664 sys sys 954089523 2050
			
 
				 sys/man/6/rewrite - 664 sys sys 969499892 3235
			
 
				-sys/man/6/smtpd - 664 sys sys 971095216 8192
			
 
				+sys/man/6/smtpd - 664 sys sys 1138191586 8178
			
 
				 sys/man/6/snap - 664 sys sys 1132452694 2402
			
 
				 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
			
 
				 sys/man/6/users - 664 sys sys 1130912014 1392
			
@@ -15530,7 +15498,7 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 
				 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
			
 
				 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
			
 
				 sys/src/libventi - 20000000775 sys sys 947360466 0
			
 
				-sys/src/libventi/client.c - 664 sys sys 1121977166 5362
			
 
				+sys/src/libventi/client.c - 664 sys sys 1138191441 5493
			
 
				 sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
			
 
				 sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
			
 
				 sys/src/libventi/fatal.c - 664 sys sys 1084468118 225
			
--- a/dist/replica/plan9.db
+++ b/dist/replica/plan9.db
@@ -231,9 +231,9 @@
 
				 386/bin/fortune - 775 sys sys 1135570810 67113
			
 
				 386/bin/fossil - 20000000775 sys sys 1042005470 0
			
 
				 386/bin/fossil/conf - 775 sys sys 1085077052 1506
			
 
				-386/bin/fossil/flchk - 775 sys sys 1135570810 237600
			
 
				-386/bin/fossil/flfmt - 775 sys sys 1135570811 245747
			
 
				-386/bin/fossil/fossil - 775 sys sys 1135570812 360522
			
 
				+386/bin/fossil/flchk - 775 sys sys 1138211975 237734
			
 
				+386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
			
 
				+386/bin/fossil/fossil - 775 sys sys 1138211977 360656
			
 
				 386/bin/fossil/last - 775 sys sys 1135570812 63280
			
 
				 386/bin/freq - 775 sys sys 1136397208 61797
			
 
				 386/bin/fs - 20000000775 sys sys 954380769 0
			
@@ -567,7 +567,7 @@
 
				 386/lib/libstdio.a - 664 sys sys 1115950159 126206
			
 
				 386/lib/libsunrpc.a - 664 sys sys 1115950160 355994
			
 
				 386/lib/libthread.a - 664 sys sys 1135531448 71308
			
 
				-386/lib/libventi.a - 664 sys sys 1124766772 97708
			
 
				+386/lib/libventi.a - 664 sys sys 1138211977 98048
			
 
				 386/mbr - 775 sys sys 1131317338 407
			
 
				 386/mkfile - 664 sys sys 948141303 46
			
 
				 386/pbs - 775 sys sys 1131317339 494
			
@@ -5391,7 +5391,7 @@ power/mkfile - 664 sys sys 948141304 46
 
				 rc - 20000000775 sys sys 944959447 0
			
 
				 rc/bin - 20000000775 sys sys 1018637942 0
			
 
				 rc/bin/9fat: - 775 sys sys 1133179689 367
			
 
				-rc/bin/9fs - 775 sys sys 1079969823 948
			
 
				+rc/bin/9fs - 775 sys sys 1138240042 1027
			
 
				 rc/bin/B - 775 sys sys 945617206 645
			
 
				 rc/bin/C - 775 sys sys 1127395076 855
			
 
				 rc/bin/Kill - 775 sys sys 1018637942 115
			
@@ -5591,46 +5591,35 @@ sparc64/lib - 20000000775 sys sys 1114458535 0
 
				 sparc64/mkfile - 664 sys sys 1114458667 46
			
 
				 sys - 20000000775 sys sys 952648870 0
			
 
				 sys/doc - 20000000775 sys sys 1018471272 0
			
 
				-sys/doc/-.2669382.gif - 664 sys sys 1019969850 2078
			
 
				 sys/doc/8½ - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/8½/8½.html - 664 sys sys 1020895860 33484
			
 
				 sys/doc/8½/8½.ms - 664 sys sys 1020895859 31593
			
 
				 sys/doc/8½/8½.ps - 664 sys sys 1020895860 797150
			
 
				 sys/doc/8½/fig1.ps - 664 sys sys 1020895859 473747
			
 
				 sys/doc/8½/mkfile - 664 sys sys 1020895860 215
			
 
				-sys/doc/9.html - 664 sys sys 1136302690 87619
			
 
				 sys/doc/9.ms - 664 sys sys 953237044 84632
			
 
				 sys/doc/9.ps - 664 sys sys 960837924 508340
			
 
				-sys/doc/acid.html - 664 sys sys 1091459045 71723
			
 
				 sys/doc/acid.ms - 664 sys sys 1127411211 65062
			
 
				 sys/doc/acid.ps - 664 sys sys 1015012454 426359
			
 
				-sys/doc/acidpaper.html - 664 sys sys 1091459045 46880
			
 
				 sys/doc/acidpaper.ms - 664 sys sys 952880777 44805
			
 
				 sys/doc/acidpaper.ps - 664 sys sys 960837913 359639
			
 
				 sys/doc/acme - 20000000775 sys sys 945616779 0
			
 
				 sys/doc/acme/acme.fig1 - 664 sys sys 944959632 164559
			
 
				 sys/doc/acme/acme.fig2 - 664 sys sys 944959632 56026
			
 
				-sys/doc/acme/acme.html - 664 sys sys 1020013936 51769
			
 
				 sys/doc/acme/acme.ms - 664 sys sys 952880782 49851
			
 
				 sys/doc/acme/acme.pdf - 664 sys sys 1020384351 117006
			
 
				 sys/doc/acme/acme.ps - 664 sys sys 960837907 611301
			
 
				 sys/doc/acme/bs - 664 sys sys 944959634 556
			
 
				 sys/doc/acme/mkfile - 664 sys sys 961259926 304
			
 
				-sys/doc/ape.html - 664 sys sys 1091459042 14415
			
 
				 sys/doc/ape.ms - 664 sys sys 953344517 12595
			
 
				 sys/doc/ape.ps - 664 sys sys 960837914 258779
			
 
				-sys/doc/asm.html - 664 sys sys 1091459045 31098
			
 
				 sys/doc/asm.ms - 664 sys sys 958247686 28542
			
 
				 sys/doc/asm.ps - 664 sys sys 960837915 322051
			
 
				-sys/doc/auth.html - 664 sys sys 1091459079 77445
			
 
				 sys/doc/auth.ms - 664 sys sys 1021579975 66803
			
 
				 sys/doc/auth.ps - 664 sys sys 1021579976 451672
			
 
				 sys/doc/cleanps - 775 sys sys 961259933 184
			
 
				 sys/doc/colophon.ps - 664 sys sys 960837922 214122
			
 
				-sys/doc/comp.html - 664 sys sys 1091459046 42144
			
 
				 sys/doc/comp.ms - 664 sys sys 954266992 37792
			
 
				 sys/doc/comp.ps - 664 sys sys 960837915 345786
			
 
				-sys/doc/compiler.html - 664 sys sys 1091459044 32960
			
 
				 sys/doc/compiler.ms - 664 sys sys 1067721142 30279
			
 
				 sys/doc/compiler.ps - 664 sys sys 1091459054 309735
			
 
				 sys/doc/contents.ms - 664 sys sys 1019916701 4920
			
@@ -5641,7 +5630,6 @@ sys/doc/fossil.ms - 664 sys sys 1063856349 31400
 
				 sys/doc/fossil.pdf - 664 sys sys 1042123169 63200
			
 
				 sys/doc/fossil.ps - 664 sys sys 1135487951 313552
			
 
				 sys/doc/fs - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/fs/fs.html - 664 sys sys 1020013937 21345
			
 
				 sys/doc/fs/fs.pdf - 664 sys sys 1020384351 47177
			
 
				 sys/doc/fs/fs.ps - 664 sys sys 960837905 276918
			
 
				 sys/doc/fs/mkfile - 664 sys sys 961259926 282
			
@@ -5656,7 +5644,6 @@ sys/doc/fs/p7 - 664 sys sys 953844581 958
 
				 sys/doc/fs/p8 - 664 sys sys 953844574 881
			
 
				 sys/doc/fs/xx - 664 sys sys 944959592 65957
			
 
				 sys/doc/il - 20000000775 sys sys 945616779 0
			
 
				-sys/doc/il/il.html - 664 sys sys 1020013937 12278
			
 
				 sys/doc/il/il.ms - 664 sys sys 952880783 11367
			
 
				 sys/doc/il/il.pdf - 664 sys sys 1020384351 44630
			
 
				 sys/doc/il/il.ps - 664 sys sys 960837905 258028
			
@@ -5666,29 +5653,21 @@ sys/doc/il/transition.fig - 664 sys sys 944959591 15431
 
				 sys/doc/il/transition.pic - 664 sys sys 944959591 11912
			
 
				 sys/doc/il/xx - 664 sys sys 944959591 48924
			
 
				 sys/doc/index.htm - 664 sys sys 1019916696 6906
			
 
				-sys/doc/index.html - 664 sys sys 1020082751 6906
			
 
				-sys/doc/lexnames.html - 664 sys sys 1091459043 37261
			
 
				 sys/doc/lexnames.ms - 664 sys sys 954383595 34046
			
 
				 sys/doc/lexnames.ps - 664 sys sys 960837909 335546
			
 
				-sys/doc/libmach.html - 664 sys sys 1091459044 26650
			
 
				 sys/doc/libmach.ms - 664 sys sys 1021579974 24145
			
 
				 sys/doc/libmach.ps - 664 sys sys 960837916 291283
			
 
				-sys/doc/lp.html - 664 sys sys 1091459043 21721
			
 
				 sys/doc/lp.ms - 664 sys sys 954614673 22366
			
 
				 sys/doc/lp.ps - 664 sys sys 960837917 294399
			
 
				-sys/doc/mk.html - 664 sys sys 1091459046 40021
			
 
				 sys/doc/mk.ms - 664 sys sys 952880779 34413
			
 
				 sys/doc/mk.ps - 664 sys sys 960837917 329779
			
 
				 sys/doc/mkfile - 664 sys sys 1091459055 4411
			
 
				-sys/doc/mkfiles.html - 664 sys sys 1091459045 17750
			
 
				 sys/doc/mkfiles.ms - 664 sys sys 952880779 17888
			
 
				 sys/doc/mkfiles.ps - 664 sys sys 960837918 269496
			
 
				-sys/doc/names.html - 664 sys sys 1091459046 23529
			
 
				 sys/doc/names.ms - 664 sys sys 954269607 22103
			
 
				 sys/doc/names.ps - 664 sys sys 960837918 288716
			
 
				 sys/doc/net - 20000000775 sys sys 954033300 0
			
 
				 sys/doc/net/mkfile - 664 sys sys 961259927 345
			
 
				-sys/doc/net/net.html - 664 sys sys 1020013937 43449
			
 
				 sys/doc/net/net.ms - 664 sys sys 952880783 41191
			
 
				 sys/doc/net/net.pdf - 664 sys sys 1020384351 82603
			
 
				 sys/doc/net/net.ps - 664 sys sys 960837908 350053
			
@@ -5696,25 +5675,19 @@ sys/doc/net/tree - 664 sys sys 944959636 866
 
				 sys/doc/net/tree.pout - 664 sys sys 944959636 1755
			
 
				 sys/doc/network.art - 664 sys sys 944959651 2260
			
 
				 sys/doc/network.pic - 664 sys sys 944959651 4124
			
 
				-sys/doc/plumb.html - 664 sys sys 1091459044 55783
			
 
				 sys/doc/plumb.ms - 664 sys sys 954383596 53250
			
 
				 sys/doc/plumb.ps - 664 sys sys 960837910 391830
			
 
				-sys/doc/port.html - 664 sys sys 1091459042 18123
			
 
				 sys/doc/port.ms - 664 sys sys 1020111393 16694
			
 
				 sys/doc/port.ps - 664 sys sys 1020111394 268901
			
 
				 sys/doc/preamble - 664 sys sys 961259497 203475
			
 
				 sys/doc/prfile - 775 sys sys 944959668 3782
			
 
				-sys/doc/prog4.html - 664 sys sys 1091459046 18653
			
 
				 sys/doc/prog4.ms - 664 sys sys 1019932830 16485
			
 
				 sys/doc/prog4.ps - 664 sys sys 1019932831 271581
			
 
				 sys/doc/ps - 664 sys sys 944959649 1739
			
 
				-sys/doc/rc.html - 664 sys sys 1091459044 41702
			
 
				 sys/doc/rc.ms - 664 sys sys 1063856321 34373
			
 
				 sys/doc/rc.ps - 664 sys sys 960837920 337955
			
 
				-sys/doc/release3.html - 664 sys sys 1019922810 6329
			
 
				 sys/doc/release3.ms - 664 sys sys 961261276 5492
			
 
				 sys/doc/release3.ps - 664 sys sys 961261277 230682
			
 
				-sys/doc/release4.html - 664 sys sys 1091459042 6473
			
 
				 sys/doc/release4.ms - 664 sys sys 1063856338 5160
			
 
				 sys/doc/release4.ps - 664 sys sys 1091459052 230868
			
 
				 sys/doc/sam - 20000000775 sys sys 945617037 0
			
@@ -5731,24 +5704,19 @@ sys/doc/sam/fig6.pic - 464 sys sys 944959644 1105
 
				 sys/doc/sam/fig7.pic - 464 sys sys 944959644 258
			
 
				 sys/doc/sam/mkfile - 664 sys sys 961259927 574
			
 
				 sys/doc/sam/refs - 464 sys sys 944959644 2652
			
 
				-sys/doc/sam/sam.html - 664 sys sys 1020013938 100660
			
 
				 sys/doc/sam/sam.ms - 464 sys sys 954266468 94536
			
 
				 sys/doc/sam/sam.pdf - 664 sys sys 1020384352 156123
			
 
				 sys/doc/sam/sam.ps - 664 sys sys 960837910 707546
			
 
				 sys/doc/sam/sam.tut - 464 sys sys 944959644 40481
			
 
				-sys/doc/sleep.html - 664 sys sys 1091459043 16602
			
 
				 sys/doc/sleep.ms - 664 sys sys 953237030 15206
			
 
				 sys/doc/sleep.ps - 664 sys sys 960837920 263882
			
 
				-sys/doc/spin.html - 664 sys sys 1091459042 75873
			
 
				 sys/doc/spin.ms - 664 sys sys 953344522 67475
			
 
				 sys/doc/spin.ps - 664 sys sys 960837923 443064
			
 
				 sys/doc/title - 664 sys sys 1018974170 740
			
 
				 sys/doc/title.ps - 664 sys sys 1018974170 214289
			
 
				 sys/doc/trademarks.ps - 664 sys sys 960837912 217896
			
 
				-sys/doc/troff.html - 664 sys sys 1019922811 110932
			
 
				 sys/doc/troff.ms - 664 sys sys 953237047 120683
			
 
				 sys/doc/troff.ps - 664 sys sys 1091459051 782310
			
 
				-sys/doc/utf.html - 664 sys sys 1091459044 43965
			
 
				 sys/doc/utf.ms - 664 sys sys 952880781 41659
			
 
				 sys/doc/utf.ps - 664 sys sys 960837922 363085
			
 
				 sys/doc/venti - 20000000775 sys sys 1019852318 0
			
@@ -5763,7 +5731,7 @@ sys/doc/venti/emelie.gif - 664 sys sys 1019852316 5004
 
				 sys/doc/venti/emelie2.gif - 664 sys sys 1019852317 4357
			
 
				 sys/doc/venti/mkfile - 664 sys sys 1019965454 79
			
 
				 sys/doc/venti/probablity.gif - 664 sys sys 1019852317 1244
			
 
				-sys/doc/venti/venti.html - 664 sys sys 1019852317 55272
			
 
				+sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
			
 
				 sys/doc/venti/venti.pdf - 664 sys sys 1020384352 139090
			
 
				 sys/doc/venti/venti.ps - 664 sys sys 1019852320 2012620
			
 
				 sys/games - 20000000775 sys sys 952648872 0
			
@@ -7596,7 +7564,7 @@ sys/man/3/segment - 664 sys sys 1017423721 2378
 
				 sys/man/3/srv - 664 sys sys 958419690 1470
			
 
				 sys/man/3/ssl - 664 sys sys 1018386776 3413
			
 
				 sys/man/3/tls - 664 sys sys 1045501496 7018
			
 
				-sys/man/3/uart - 664 sys sys 1102093395 1710
			
 
				+sys/man/3/uart - 664 sys sys 1138191356 2003
			
 
				 sys/man/3/usb - 664 sys sys 1126971427 6960
			
 
				 sys/man/3/vga - 664 sys sys 1131301005 4957
			
 
				 sys/man/4 - 20000000775 sys sys 1018581459 0
			
@@ -7682,7 +7650,7 @@ sys/man/6/plot - 664 sys sys 944959679 6739
 
				 sys/man/6/plumb - 664 sys sys 969499892 10918
			
 
				 sys/man/6/regexp - 664 sys sys 954089523 2050
			
 
				 sys/man/6/rewrite - 664 sys sys 969499892 3235
			
 
				-sys/man/6/smtpd - 664 sys sys 971095216 8192
			
 
				+sys/man/6/smtpd - 664 sys sys 1138191586 8178
			
 
				 sys/man/6/snap - 664 sys sys 1132452694 2402
			
 
				 sys/man/6/thumbprint - 664 sys sys 1019866709 1124
			
 
				 sys/man/6/users - 664 sys sys 1130912014 1392
			
@@ -15530,7 +15498,7 @@ sys/src/libthread/xincmips.s - 664 sys sys 1014928160 674
 
				 sys/src/libthread/xincport.h - 664 sys sys 1127405405 211
			
 
				 sys/src/libthread/xincpower.s - 664 sys sys 1048645448 342
			
 
				 sys/src/libventi - 20000000775 sys sys 947360466 0
			
 
				-sys/src/libventi/client.c - 664 sys sys 1121977166 5362
			
 
				+sys/src/libventi/client.c - 664 sys sys 1138191441 5493
			
 
				 sys/src/libventi/debug.c - 664 sys sys 1045502093 1258
			
 
				 sys/src/libventi/errfmt.c - 664 sys sys 1019678691 133
			
 
				 sys/src/libventi/fatal.c - 664 sys sys 1084468118 225
			
--- a/dist/replica/plan9.log
+++ b/dist/replica/plan9.log
@@ -27199,3 +27199,49 @@
 
				 1138120255 0 c sys/src/9/port/devuart.c - 664 sys sys 1138118881 12141
			
 
				 1138127457 0 c sys/src/9/port/portdat.h - 664 sys sys 1138127259 22540
			
 
				 1138131057 0 c sys/games/lib/fortunes - 664 sys sys 1138129633 257147
			
 
				+1138192278 0 c sys/man/3/uart - 664 sys sys 1138191356 2003
			
 
				+1138192278 1 c sys/man/6/smtpd - 664 sys sys 1138191586 8178
			
 
				+1138192278 2 c sys/src/libventi/client.c - 664 sys sys 1138191441 5493
			
 
				+1138212084 0 c 386/bin/fossil/flchk - 775 sys sys 1138211975 237734
			
 
				+1138212084 1 c 386/bin/fossil/flfmt - 775 sys sys 1138211976 245881
			
 
				+1138212084 2 c 386/bin/fossil/fossil - 775 sys sys 1138211977 360656
			
 
				+1138212084 3 c 386/lib/libventi.a - 664 sys sys 1138211977 98048
			
 
				+1138230089 0 c sys/doc/9.html - 664 sys sys 1138229042 163762
			
 
				+1138230089 1 c sys/doc/auth.html - 664 sys sys 1138229043 256779
			
 
				+1138230089 2 a sys/doc/contents.html - 664 sys sys 1138229042 27104
			
 
				+1138230089 3 c sys/doc/names.html - 664 sys sys 1138229043 49369
			
 
				+1138233689 0 c sys/doc/venti/venti.html - 664 sys sys 1138233389 55272
			
 
				+1138233689 1 d sys/doc/utf.html - 664 sys sys 1091459044 0
			
 
				+1138233689 2 d sys/doc/troff.html - 664 sys sys 1019922811 0
			
 
				+1138233689 3 d sys/doc/spin.html - 664 sys sys 1091459042 0
			
 
				+1138233689 4 d sys/doc/sleep.html - 664 sys sys 1091459043 0
			
 
				+1138233689 5 d sys/doc/sam/sam.html - 664 sys sys 1020013938 0
			
 
				+1138233689 6 d sys/doc/release4.html - 664 sys sys 1091459042 0
			
 
				+1138233689 7 d sys/doc/release3.html - 664 sys sys 1019922810 0
			
 
				+1138233689 8 d sys/doc/rc.html - 664 sys sys 1091459044 0
			
 
				+1138233689 9 d sys/doc/prog4.html - 664 sys sys 1091459046 0
			
 
				+1138233689 10 d sys/doc/port.html - 664 sys sys 1091459042 0
			
 
				+1138233689 11 d sys/doc/plumb.html - 664 sys sys 1091459044 0
			
 
				+1138233689 12 d sys/doc/net/net.html - 664 sys sys 1020013937 0
			
 
				+1138233689 13 d sys/doc/names.html - 664 sys sys 1138229043 0
			
 
				+1138233689 14 d sys/doc/mkfiles.html - 664 sys sys 1091459045 0
			
 
				+1138233689 15 d sys/doc/mk.html - 664 sys sys 1091459046 0
			
 
				+1138233689 16 d sys/doc/lp.html - 664 sys sys 1091459043 0
			
 
				+1138233689 17 d sys/doc/libmach.html - 664 sys sys 1091459044 0
			
 
				+1138233689 18 d sys/doc/lexnames.html - 664 sys sys 1091459043 0
			
 
				+1138233689 19 d sys/doc/index.html - 664 sys sys 1020082751 0
			
 
				+1138233689 20 d sys/doc/il/il.html - 664 sys sys 1020013937 0
			
 
				+1138233689 21 d sys/doc/fs/fs.html - 664 sys sys 1020013937 0
			
 
				+1138233689 22 d sys/doc/contents.html - 664 sys sys 1138229042 0
			
 
				+1138233689 23 d sys/doc/compiler.html - 664 sys sys 1091459044 0
			
 
				+1138233689 24 d sys/doc/comp.html - 664 sys sys 1091459046 0
			
 
				+1138233689 25 d sys/doc/auth.html - 664 sys sys 1138229043 0
			
 
				+1138233689 26 d sys/doc/asm.html - 664 sys sys 1091459045 0
			
 
				+1138233689 27 d sys/doc/ape.html - 664 sys sys 1091459042 0
			
 
				+1138233689 28 d sys/doc/acme/acme.html - 664 sys sys 1020013936 0
			
 
				+1138233689 29 d sys/doc/acidpaper.html - 664 sys sys 1091459045 0
			
 
				+1138233689 30 d sys/doc/acid.html - 664 sys sys 1091459045 0
			
 
				+1138233689 31 d sys/doc/9.html - 664 sys sys 1138229042 0
			
 
				+1138233689 32 d sys/doc/8½/8½.html - 664 sys sys 1020895860 0
			
 
				+1138233689 33 d sys/doc/-.2669382.gif - 664 sys sys 1019969850 0
			
 
				+1138240891 0 c rc/bin/9fs - 775 sys sys 1138240042 1027
			
--- a/rc/bin/9fs
+++ b/rc/bin/9fs
@@ -20,7 +20,9 @@ case other
 
				 case juke
			
 
				 	srv -q il!jukefs && mount /srv/il!jukefs /n/njuke && bind -c /n/njuke/juke /n/juke
			
 
				 case sources
			
 
				-	srv -m tcp!sources.cs.bell-labs.com sources /n/sources
			
 
				+	srv -q tcp!sources.cs.bell-labs.com sources /n/sources &&
			
 
				+		{ mount /srv/sources /n/sources ||
			
 
				+		  mount -n /srv/sources /n/sources }
			
 
				 case sourcesdump
			
 
				 	9fs sources
			
 
				 	mount /srv/sources /n/sourcesdump main/archive
			
--- a/sys/doc/-.2669382.gif
+++ b/sys/doc/-.2669382.gif
--- a/sys/doc/8½/8½.html
+++ b/sys/doc/8½/8½.html
@@ -1,868 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>8&#189;, the Plan 9 Window System
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-rob@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Originally appeared, in a slightly different form, in
			
 
				-Proc. of the Summer 1991 USENIX Conf.,
			
 
				-pp. 257-265,
			
 
				-Nashville.
			
 
				-Note that
			
 
				-<TT>8&#189;</TT>
			
 
				-has been replaced by
			
 
				-<TT>rio</TT>
			
 
				-(see
			
 
				-<A href="/magic/man2html/1/rio"><I>rio</I>(1)).
			
 
				-</A></I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-The Plan 9 window system, 8&#189;, is a modest-sized program of novel design.
			
 
				-It provides textual I/O and bitmap graphic services to both
			
 
				-local and remote client programs by offering a multiplexed file service to those clients.
			
 
				-It serves traditional UNIX files like
			
 
				-<TT>/dev/tty</TT>
			
 
				-as well as more unusual ones that provide access to the mouse
			
 
				-and the raw screen.
			
 
				-Bitmap graphics operations are provided by serving a file called
			
 
				-<TT>/dev/bitblt</TT>
			
 
				-that interprets client messages to perform raster operations.
			
 
				-The file service that 8&#189; offers its clients is identical to that it uses for
			
 
				-its own implementation, so it is fundamentally no more than
			
 
				-a multiplexer.
			
 
				-This architecture has some rewarding symmetries and can be implemented
			
 
				-compactly.
			
 
				-</DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-In 1989 I constructed a toy window system from only a few hundred
			
 
				-lines of source code using a custom language and an unusual architecture
			
 
				-involving concurrent processes [Pike89].
			
 
				-Although that system was rudimentary at best, it demonstrated that
			
 
				-window systems are not inherently complicated.
			
 
				-The following year, for the new Plan 9 distributed system [Pike92], I applied some of
			
 
				-the lessons from that toy project to write, in C,
			
 
				-a production-quality window system
			
 
				-called 8&#189;.
			
 
				-8&#189; provides, on black-and-white, grey-scale, or color displays,
			
 
				-the services required of a modern window system, including
			
 
				-programmability and support for remote graphics.
			
 
				-The entire system, including the default program that runs in the 
			
 
				-window &#173; the equivalent of
			
 
				-<TT>xterm</TT>
			
 
				-[Far89] with `cutting and pasting' between windows &#173;
			
 
				-is well under 90 kilobytes of text on a Motorola 68020 processor, about
			
 
				-half the size of the
			
 
				-operating system
			
 
				-kernel that supports it and a tenth the size of the X server
			
 
				-[Sche86]
			
 
				-<I>without</I>
			
 
				-<TT>xterm</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-What makes 8&#189; so compact?  Much of the saving comes from overall simplicity:
			
 
				-8&#189; has little graphical fanciness, a concise programming interface, and
			
 
				-a simple, fixed user interface.
			
 
				-8&#189; also makes some decisions by fiat
			
 
				-&#173; three-button mouse, overlapping windows, built-in terminal program and
			
 
				-window manager, etc. &#173;
			
 
				-rather than trying to appeal to all tastes.
			
 
				-Although compact, 8&#189; is not ascetic.
			
 
				-It provides the fundamentals and
			
 
				-enough extras to make them comfortable to use.
			
 
				-The most important contributor to its small size, though, is its
			
 
				-overall design as a file server.
			
 
				-This structure may be applicable to window systems
			
 
				-on traditional UNIX-like operating systems.
			
 
				-</P>
			
 
				-<P>
			
 
				-The small size of 8&#189; does not reflect reduced functionality:
			
 
				-8&#189; provides service roughly equivalent to the X window system.
			
 
				-8&#189;'s clients may of course be as complex as they choose,
			
 
				-although the tendency to mimic 8&#189;'s design
			
 
				-and the clean programming interface means they
			
 
				-are not nearly as bloated as X applications.
			
 
				-</P>
			
 
				-<H4>User's Model
			
 
				-</H4>
			
 
				-<P>
			
 
				-8&#189; turns the single screen, mouse, and keyboard of the terminal
			
 
				-(in Plan 9 terminology) or workstation (in commercial terminology) into an array
			
 
				-of independent virtual terminals that may be textual terminals supporting a shell and
			
 
				-the usual suite of tools
			
 
				-or graphical applications using the full power of the bitmap screen and mouse.
			
 
				-Text is represented in UTF, an encoding of the Unicode Standard [Pike93].
			
 
				-The entire programming interface is provided through
			
 
				-reading and writing files in
			
 
				-<TT>/dev</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Primarily for reasons of history and familiarity,
			
 
				-the general model and appearance of 8&#189; are similar to those of
			
 
				-<TT>mux</TT>
			
 
				-[Pike88].
			
 
				-The right button has a short menu for controlling window creation, destruction,
			
 
				-and placement.
			
 
				-When a window is created, it runs the default shell,
			
 
				-<TT>rc</TT>
			
 
				-[Duff90], with standard input
			
 
				-and output directed to the window and accessible through the file
			
 
				-<TT>/dev/cons</TT>
			
 
				-(`console'),
			
 
				-analogous to the
			
 
				-<TT>/dev/tty</TT>
			
 
				-of UNIX.
			
 
				-The name change represents a break with the past: Plan 9 does not provide a
			
 
				-Teletype-style model of terminals.  8&#189; provides the only way
			
 
				-most users ever access Plan 9.
			
 
				-</P>
			
 
				-<P>
			
 
				-Graphical applications,
			
 
				-like ordinary programs,
			
 
				-may be run by typing their names
			
 
				-to the shell running in a window.
			
 
				-This runs the application in the same window;
			
 
				-to run the application in a new window one may use an external program,
			
 
				-<TT>window</TT>,
			
 
				-described below.
			
 
				-For graphical applications, the virtual terminal model
			
 
				-is extended somewhat to allow programs to perform graphical operations,
			
 
				-access the
			
 
				-mouse, and perform related functions by reading and writing files with
			
 
				-suggestive names such as
			
 
				-<TT>/dev/mouse</TT>
			
 
				-and
			
 
				-<TT>/dev/window</TT>
			
 
				-multiplexed per-window
			
 
				-much like
			
 
				-<TT>/dev/cons</TT>.
			
 
				-The implementation and semantics of these files,
			
 
				-described below, is central to the structure of 8&#189;.
			
 
				-</P>
			
 
				-<P>
			
 
				-The default program that runs in a window is familiar to users of Blit terminals [Pike83].
			
 
				-It is very similar to that of
			
 
				-<TT>mux</TT>
			
 
				-[Pike88], providing mouse-based editing of input and output text,
			
 
				-the ability to scroll back to see earlier output, and so on.
			
 
				-It also has a new feature, toggled by typing ESC,
			
 
				-that enables the user to control when
			
 
				-typed characters may be read by the shell or application,
			
 
				-instead of (for example) after each newline.
			
 
				-This feature makes the window program directly useful for many text-editing
			
 
				-tasks such as composing mail messages before sending them.
			
 
				-</P>
			
 
				-<H4>Plan 9 and 8&#189;
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 is a distributed system that provides support for UNIX-like applications
			
 
				-in an environment built from distinct CPU servers, file servers, and terminals
			
 
				-connected by a variety of networks [Pike90].
			
 
				-The terminals are comparable to modest workstations that, once connected to a file
			
 
				-server over a medium-bandwidth network such as Ethernet, are self-sufficient computers
			
 
				-running a full operating system.
			
 
				-Unlike workstations, however, their role is just to
			
 
				-provide an affordable multiplexed user interface to the rest of the system:
			
 
				-they run the window system and support simple interactive
			
 
				-tasks such as text editing.
			
 
				-Thus they lie somewhere between workstations and X terminals in design,
			
 
				-cost, performance, and function.
			
 
				-(The terminals can be used
			
 
				-for general computing, but in practice Plan 9 users do their
			
 
				-computing on the CPU servers.)
			
 
				-The Plan 9 terminal software, including 8&#189;,
			
 
				-was developed on a 68020-based
			
 
				-machine called a Gnot
			
 
				-and has been ported to
			
 
				-the NeXTstation,
			
 
				-the MIPS Magnum 3000,
			
 
				-SGI Indigos,
			
 
				-and Sun SPARCstations&#173;all small workstations that we use as terminals&#173;as
			
 
				-well as PCs.
			
 
				-</P>
			
 
				-<P>
			
 
				-Heavy computations such as compilation, text processing,
			
 
				-or scientific calculation are done on the CPU servers, which are connected
			
 
				-to the file servers by high-bandwidth networks.
			
 
				-For interactive work,
			
 
				-these computations can access the terminal that instantiated them.
			
 
				-The terminal and CPU server being used by a particular user are connected to the
			
 
				-same file server, although over different networks; Plan 9 provides a view of the
			
 
				-file server that is independent of location in the network.
			
 
				-</P>
			
 
				-<P>
			
 
				-The components of Plan 9 are connected by a common protocol based on the sharing of files.
			
 
				-All resources in the network are implemented as file servers; programs that wish to
			
 
				-access them connect to them over the network and communicate using ordinary file
			
 
				-operations.
			
 
				-An unusual aspect of Plan 9 is that the
			
 
				-name space
			
 
				-of a process, the set of files that can be accessed by name
			
 
				-(for example by an
			
 
				-<TT>open</TT>
			
 
				-system call) is not global to all processes on a machine; distinct processes
			
 
				-may have distinct name spaces.  The system provides methods by which processes
			
 
				-may change their name spaces, such as the ability to
			
 
				-<I>mount</I>
			
 
				-a service upon an existing directory, making the files of the service
			
 
				-visible in the directory.
			
 
				-(This is a different operation from its
			
 
				-UNIX
			
 
				-namesake.)
			
 
				-Multiple services may be mounted upon the same directory,
			
 
				-allowing the files from multiple services to be accessed in the same directory.
			
 
				-Options to the
			
 
				-<TT>mount</TT>
			
 
				-system call control the order of searching for files in such a
			
 
				-union directory.
			
 
				-</P>
			
 
				-<P>
			
 
				-The most obvious example of a network resource is a file server, where permanent
			
 
				-files reside.  There are a number of unusual services, however, whose design in
			
 
				-a different environment would likely not be file-based.  Many are described
			
 
				-elsewhere [Pike92]; some examples are the representation
			
 
				-of processes for debugging,
			
 
				-much like Killian's process files for the 8th edition [Kill84],
			
 
				-and the implementation of the name/value pairs of the
			
 
				-UNIX
			
 
				-<TT>exec</TT>
			
 
				-environment as files.
			
 
				-User processes may also implement a file service and make it available to clients
			
 
				-in the network, much like the `mounted streams' in the 9th Edition
			
 
				-[Pres90].
			
 
				-A typical example is a program that interprets an externally-defined file system
			
 
				-such as that on a CD-ROM or a standard
			
 
				-UNIX
			
 
				-system and makes the contents available to Plan 9 programs.
			
 
				-This design is used by all distributed applications in Plan 9, including 8&#189;.
			
 
				-</P>
			
 
				-<P>
			
 
				-8&#189; serves a set of files in the conventional directory
			
 
				-<TT>/dev</TT>
			
 
				-with names like
			
 
				-<TT>cons</TT>,
			
 
				-<TT>mouse</TT>,
			
 
				-and
			
 
				-<TT>screen</TT>.
			
 
				-Clients of 8&#189; communicate with the window system by reading and writing
			
 
				-these files.
			
 
				-For example, a client program, such as a shell,
			
 
				-can print text by writing its standard output, which is automatically
			
 
				-connected to
			
 
				-<TT>/dev/cons</TT>,
			
 
				-or it may open and write that file explicitly.
			
 
				-Unlike files served by a traditional file server, however, the instance of
			
 
				-<TT>/dev/cons</TT>
			
 
				-served in each window by 8&#189; is a distinct file;
			
 
				-the per-process name spaces of Plan 9 allow 8&#189; to provide a unique
			
 
				-<TT>/dev/cons</TT>
			
 
				-to each client.
			
 
				-This mechanism is best illustrated by the creation of a new 8&#189; client.
			
 
				-</P>
			
 
				-<P>
			
 
				-When 8&#189; starts, it creates a full-duplex pipe to be the communication
			
 
				-medium for the messages that implement the file service it will provide.
			
 
				-One end will be shared by all the clients; the other end is held by
			
 
				-8&#189; to accept requests for I/O.
			
 
				-When a user makes a new window using the mouse,
			
 
				-8&#189; allocates the window data structures and forks a child process.
			
 
				-The child's name space,
			
 
				-initially shared with the parent,
			
 
				-is then duplicated
			
 
				-so that changes the child makes to its name space will not affect the parent.
			
 
				-The child then attaches its end of the communication pipe,
			
 
				-<TT>cfd</TT>,
			
 
				-to the directory
			
 
				-<TT>/dev</TT>
			
 
				-by doing a
			
 
				-<TT>mount</TT>
			
 
				-system call:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount(cfd, "/dev", MBEFORE, buf)
			
 
				-</PRE></TT></DL>
			
 
				-This call attaches the service associated with the file descriptor
			
 
				-<TT>cfd</TT>
			
 
				-&#173; the client end of the pipe &#173; to the beginning of
			
 
				-<TT>/dev</TT>
			
 
				-so that the files in the new service take priority over existing files
			
 
				-in the directory.
			
 
				-This makes the new files
			
 
				-<TT>cons</TT>,
			
 
				-<TT>mouse</TT>,
			
 
				-and so on,
			
 
				-available in
			
 
				-<TT>/dev</TT>
			
 
				-in a way that hides any files with the same names already in place.
			
 
				-The argument
			
 
				-<TT>buf</TT>
			
 
				-is a character string (null in this case),
			
 
				-described below.
			
 
				-</P>
			
 
				-<P>
			
 
				-The client process then closes file descriptors 0, 1, and 2 and opens
			
 
				-<TT>/dev/cons</TT>
			
 
				-repeatedly to connect the standard
			
 
				-input, output, and error files to the window's
			
 
				-<TT>/dev/cons</TT>.
			
 
				-It then does an
			
 
				-<TT>exec</TT>
			
 
				-system call to begin executing the shell in the window.
			
 
				-This entire sequence, complete with error handling, is 33 lines of C.
			
 
				-</P>
			
 
				-<P>
			
 
				-The view of these events from 8&#189;'s end of the pipe is a sequence
			
 
				-of file protocol messages from the new client generated by the
			
 
				-intervening operating
			
 
				-system in response to the
			
 
				-<TT>mount</TT>
			
 
				-and
			
 
				-<TT>open</TT>
			
 
				-system calls executed by the client.
			
 
				-The message generated by the
			
 
				-<TT>mount</TT>
			
 
				-informs 8&#189; that a new client has attached to the file service it provides;
			
 
				-8&#189;'s response is a unique identifier kept by the operating system and
			
 
				-passed in all messages generated by I/O on the files derived from that
			
 
				-<TT>mount</TT>.
			
 
				-This identifier is used by 8&#189; to distinguish the various clients so
			
 
				-each sees a unique
			
 
				-<TT>/dev/cons</TT>;
			
 
				-most servers do not need to make this distinction.
			
 
				-</P>
			
 
				-<P>
			
 
				-A process unrelated to 8&#189; may create windows by a variant of this mechanism.
			
 
				-When 8&#189; begins, it uses a Plan 9 service to `post' the client end of the
			
 
				-communication pipe in a public place.
			
 
				-A process may open that pipe and
			
 
				-<TT>mount</TT>
			
 
				-it to attach to the window system,
			
 
				-much in the way an X client may connect to a
			
 
				-UNIX
			
 
				-domain socket to the server bound to the file system.
			
 
				-The final argument to
			
 
				-<TT>mount</TT>
			
 
				-is passed through uninterpreted by the operating
			
 
				-system.
			
 
				-It provides a way for the client and server to
			
 
				-exchange information at the time of the
			
 
				-<TT>mount</TT>.
			
 
				-8&#189; interprets it as the dimensions of the window to be
			
 
				-created for the new client.  (In the case above, the window has been
			
 
				-created by the time the mount occurs, and
			
 
				-<TT>buf</TT>
			
 
				-carries no information.)
			
 
				-When the
			
 
				-<TT>mount</TT>
			
 
				-returns, the process can open the files of the new window and begin I/O to
			
 
				-use it.
			
 
				-</P>
			
 
				-<P>
			
 
				-Because 8&#189;'s interface is based on files,
			
 
				-standard system utilities can be used to control its services.
			
 
				-For example,
			
 
				-its method of creating windows externally is packaged in a
			
 
				-16-line shell script, called
			
 
				-<TT>window</TT>,
			
 
				-the core of which is just a
			
 
				-<TT>mount</TT>
			
 
				-operation that prefixes 8&#189;'s directory to
			
 
				-<TT>/dev</TT>
			
 
				-and runs a command passed on the argument line:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount -b $'8&#189;serv' /dev
			
 
				-$* &#60; /dev/cons &#62; /dev/cons &#62;[2] /dev/cons &amp;
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>window</TT>
			
 
				-program is typically employed by users to create their
			
 
				-initial working environment when they boot the system, although
			
 
				-it has more general possibilities.
			
 
				-</P>
			
 
				-<P>
			
 
				-Other basic features of the system fall out naturally from the
			
 
				-file-based model.
			
 
				-When the user deletes a window, 8&#189; sends the equivalent of a
			
 
				-UNIX
			
 
				-signal to the process group &#173; the clients &#173; in the window,
			
 
				-removes the window from the screen, and poisons the incoming connections
			
 
				-to the files that drive it.  If a client ignores the signal and
			
 
				-continues to write to the window, it will get I/O errors.
			
 
				-If, on the other hand, all the processes in a window exit spontaneously,
			
 
				-they will automatically close all connections to the window.
			
 
				-8&#189; counts references to the window's files; when none are left,
			
 
				-it shuts down the window and removes it from the screen.
			
 
				-As a different example, when the user hits the DEL key to generate an
			
 
				-interrupt,
			
 
				-8&#189; writes a message to a special file, provided by Plan 9's
			
 
				-process control interface, that interrupts all the processes
			
 
				-in the window.
			
 
				-In all these examples, the implementation works seamlessly
			
 
				-across a network.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are two valuable side effects of implementing
			
 
				-a window system by multiplexing
			
 
				-<TT>/dev/cons</TT>
			
 
				-and other such files.
			
 
				-First, the problem of giving a meaningful
			
 
				-interpretation to the file
			
 
				-<TT>/dev/cons</TT>
			
 
				-(<TT>/dev/tty</TT>)
			
 
				-in each window is solved automatically.
			
 
				-To provide
			
 
				-<TT>/dev/cons</TT>
			
 
				-is the fundamental job of the window system, rather than just an awkward burden;
			
 
				-other systems must often make special and otherwise irrelevant arrangements for
			
 
				-<TT>/dev/tty</TT>
			
 
				-to behave as expected in a window.
			
 
				-Second, any program that can access the server, including a
			
 
				-process on a remote machine, can access the files using standard
			
 
				-read and write system calls to communicate with the window system,
			
 
				-and standard open and close calls to connect to it.
			
 
				-Again, no special arrangements need to be made for remote processes to
			
 
				-use all the graphics facilities of 8&#189;.
			
 
				-</P>
			
 
				-<H4>Graphical input
			
 
				-</H4>
			
 
				-<P>
			
 
				-Of course 8&#189; offers more than ASCII I/O to its clients.
			
 
				-The state of the mouse may be discovered by reading the file
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-which returns a ten-byte message encoding the state
			
 
				-of the buttons and the position of the cursor.
			
 
				-If the mouse has not moved since the last read of
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-or if the window associated with the instance of
			
 
				-<TT>/dev/mouse</TT>
			
 
				-is not the `input focus', the read blocks.
			
 
				-</P>
			
 
				-<P>
			
 
				-The format of the message is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>'m'</TT>
			
 
				-1 byte of button state
			
 
				-4 bytes of x, low byte first
			
 
				-4 bytes of y, low byte first
			
 
				-</PRE></TT></DL>
			
 
				-As in all shared data structures in Plan 9,
			
 
				-the order of every byte in the message is defined 
			
 
				-so all clients can execute the same code to unpack the message
			
 
				-into a local data structure.
			
 
				-</P>
			
 
				-<P>
			
 
				-For keyboard input, clients can read
			
 
				-<TT>/dev/cons</TT>
			
 
				-or, if they need character-at-a-time input,
			
 
				-<TT>/dev/rcons</TT>
			
 
				-(`raw console').
			
 
				-There is no explicit event mechanism to help clients that need to read
			
 
				-from multiple sources.
			
 
				-Instead, a small (365 line) external
			
 
				-support library can be used.
			
 
				-It attaches a process
			
 
				-to the various blocking input sources &#173; mouse, keyboard, and perhaps
			
 
				-a third user-provided file descriptor &#173;
			
 
				-and funnels their input into a single pipe from which may be read
			
 
				-the various types of
			
 
				-events in the traditional style.
			
 
				-This package is a compromise.  As discussed in a previous paper
			
 
				-[Pike89] I prefer
			
 
				-to free applications from event-based programming.  Unfortunately, though, I see
			
 
				-no easy way to achieve this in single-threaded C programs, and am unwilling
			
 
				-to require all programmers to master concurrent programming.
			
 
				-It should be noted, though, that even this compromise results in a small
			
 
				-and easily understood interface.  An example program that uses it is
			
 
				-given near the end of the paper.
			
 
				-</P>
			
 
				-<H4>Graphical output
			
 
				-</H4>
			
 
				-<P>
			
 
				-The file
			
 
				-<TT>/dev/screen</TT>
			
 
				-may be read by any client to recover the contents of the entire screen,
			
 
				-such as for printing (see Figure 1).
			
 
				-Similarly,
			
 
				-<TT>/dev/window</TT>
			
 
				-holds the contents of the current window.
			
 
				-These are read-only files.
			
 
				-</P>
			
 
				-<P>
			
 
				-To perform graphics operations in their windows, client programs access
			
 
				-<TT>/dev/bitblt</TT>.
			
 
				-It implements a protocol that encodes bitmap graphics operations.
			
 
				-Most of the messages in the protocol (there are 23 messages in all, about
			
 
				-half to manage the multi-level fonts necessary for efficient handling
			
 
				-of Unicode characters)
			
 
				-are transmissions (via a write)
			
 
				-from the client to the window system to perform a graphical
			
 
				-operation such as a
			
 
				-<TT>bitblt</TT>
			
 
				-[PLR85] or character-drawing operation; a few include return information
			
 
				-(recovered via a read) to the client.
			
 
				-As with
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-the
			
 
				-<TT>/dev/bitblt</TT>
			
 
				-protocol is in a defined byte order.
			
 
				-Here, for example, is the layout of the
			
 
				-<TT>bitblt</TT>
			
 
				-message:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>'b'</TT>
			
 
				-2 bytes of destination id
			
 
				-2x4 bytes of destination point
			
 
				-2 bytes of source id
			
 
				-4x4 bytes of source rectangle
			
 
				-2 bytes of boolean function code
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-Figure 1.
			
 
				-A representative 8&#189; screen, running on a NeXTstation under Plan 9
			
 
				-(with no NeXT software).  In the upper right, a program announces the
			
 
				-arrival of mail.  In the top and left are a broswer for astronomical
			
 
				-databases and an image of a galaxy produced by the browser.
			
 
				-In the lower left there is a screen editor,
			
 
				-<TT>sam</TT>
			
 
				-[Pike87],
			
 
				-editing Japanese text encoded in UTF,
			
 
				-and in the lower right an 8&#189; running recursively and, inside that instantiation,
			
 
				-a previewer for
			
 
				-<TT>troff</TT>
			
 
				-output.
			
 
				-Underneath the faces is a small window running the command that
			
 
				-prints the screen by passing
			
 
				-<TT>/dev/screen</TT>
			
 
				-to the bitmap printing utility.
			
 
				-<br>&#32;<br>
			
 
				-</dl>
			
 
				-<P>
			
 
				-The message is trivially constructed from the
			
 
				-<TT>bitblt</TT>
			
 
				-subroutine in the library, defined as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void bitblt(Bitmap *dst, Point dp,
			
 
				-            Bitmap *src, Rectangle sr, Fcode c).
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The `id'
			
 
				-fields in the message indicate another property of 8&#189;:
			
 
				-the clients do not store the actual data for any of their bitmaps locally.
			
 
				-Instead, the protocol provides a message to allocate a bitmap, to be
			
 
				-stored in the server, and returns to the client an integer identifier,
			
 
				-much like a
			
 
				-UNIX
			
 
				-file descriptor, to be used in operations on that bitmap.
			
 
				-Bitmap number 0 is conventionally the client's window,
			
 
				-analogous to standard input for file I/O.
			
 
				-In fact, no bitmap graphics operations are executed in the client at all;
			
 
				-they are all performed on its behalf by the server.
			
 
				-Again, using the standard remote file operations in Plan 9,
			
 
				-this permits remote machines having no graphics capability, such
			
 
				-as the CPU server,
			
 
				-to run graphics applications.
			
 
				-Analogous features of the original Andrew window system [Gos86]
			
 
				-and of X [Sche86] require more complex mechanisms.
			
 
				-</P>
			
 
				-<P>
			
 
				-Nor does 8&#189; itself operate directly on bitmaps.
			
 
				-Instead, it calls another server to do its graphics operations for it,
			
 
				-using an identical protocol.
			
 
				-The operating system for the Plan 9 terminals contains an internal
			
 
				-server that implements that protocol, exactly as does 8&#189;, but for a single
			
 
				-client.  That server stores the actual bytes for the bitmaps
			
 
				-and implements the fundamental bitmap graphics operations.
			
 
				-Thus the environment in which 8&#189; runs
			
 
				-has exactly the structure it provides for its clients;
			
 
				-8&#189; reproduces the environment for its clients,
			
 
				-multiplexing the interface to keep the clients separate.
			
 
				-</P>
			
 
				-<P>
			
 
				-This idea of multiplexing by simulation is applicable to more
			
 
				-than window systems, of course, and has some side effects.
			
 
				-Since 8&#189; simulates its own environment for its clients, it may run
			
 
				-in one of its own windows (see Figure 1).
			
 
				-A useful and common application of this
			
 
				-technique is to connect a window to a remote machine, such as a CPU
			
 
				-server, and run the window system there so that each subwindow is automatically
			
 
				-on the remote machine.
			
 
				-It is also a handy way to debug a new version of the window system
			
 
				-or to create an environment with, for example, a different default font.
			
 
				-</P>
			
 
				-<H4>Implementation
			
 
				-</H4>
			
 
				-<P>
			
 
				-To provide graphics to its clients, 8&#189; mostly just multiplexes and passes
			
 
				-through to its own server the clients' requests, occasionally rearranging
			
 
				-the messages to maintain the fiction that the clients have unique screens
			
 
				-(windows).
			
 
				-To manage the overlapping windows it uses the layers model,
			
 
				-which is handled by a separate library [Pike83a].
			
 
				-Thus it has little work to do and is a fairly simple program;
			
 
				-it is dominated by a couple of switch statements to interpret
			
 
				-the bitmap and file server protocols.
			
 
				-The built-in window program and its associated menus and text-management
			
 
				-support are responsible for most of the code.
			
 
				-</P>
			
 
				-<P>
			
 
				-The operating system's server is also compact:
			
 
				-the version for the 68020 processor, excluding the implementation
			
 
				-of a half dozen bitmap graphics operations, is 2295 lines of C
			
 
				-(again, about half dealing with fonts);
			
 
				-the graphics operations are another 2214 lines.
			
 
				-</P>
			
 
				-<P>
			
 
				-8&#189; is structured as a set of communicating coroutines,
			
 
				-much as discussed in a 1989 paper [Pike89].
			
 
				-One coroutine manages the mouse, another the keyboard, and another
			
 
				-is instantiated to manage the state of each window and associated client.
			
 
				-When no coroutine wishes to run, 8&#189; reads the next file I/O request from
			
 
				-its clients, which arrive serially on the full-duplex communication pipe.
			
 
				-Thus 8&#189; is entirely synchronous.
			
 
				-</P>
			
 
				-<P>
			
 
				-The program source is small and compiles in about 10 seconds
			
 
				-in our Plan 9 environment.  There are ten source files and
			
 
				-one
			
 
				-<TT>makefile</TT>
			
 
				-totaling 5100 lines.
			
 
				-This includes the source for the window management process,
			
 
				-the cut-and-paste terminal program,
			
 
				-the window/file server itself,
			
 
				-and a small coroutine library
			
 
				-(<TT>proc.c</TT>).
			
 
				-It does not include the layer library
			
 
				-(another 1031 lines)
			
 
				-or the library to handle the cutting and pasting of text
			
 
				-displayed in a window (960 lines),
			
 
				-or the general graphics support library that manages all the
			
 
				-non-drawing aspects of graphics &#173; arithmetic on points and rectangles,
			
 
				-memory management, error handling, clipping, &#173; plus fonts,
			
 
				-events, and non-primitive drawing operations such as circles and ellipses
			
 
				-(a final 3051 lines).
			
 
				-Not all the pieces of these libraries are used by 8&#189; itself;
			
 
				-a large part of the graphics library in particular is used only by clients.
			
 
				-Thus it is somewhat unfair to 8&#189; just to sum these numbers, including
			
 
				-the 4509 lines of support in the kernel, and arrive
			
 
				-at a total implementation size of 14651 lines of source to implement
			
 
				-all of 8&#189; from the lowest levels to the highest.
			
 
				-But that number gives a fair measure of the complexity of the overall system.
			
 
				-</P>
			
 
				-<P>
			
 
				-The implementation is also efficient.
			
 
				-8&#189;'s performance is competitive to X windows'.
			
 
				-Compared using Dunwoody's and Linton's
			
 
				-<TT>gbench</TT>
			
 
				-benchmarks on the 68020,
			
 
				-distributed with the ``X Test Suite'',
			
 
				-circles and arcs are drawn about half as fast in 8&#189; as in
			
 
				-X11 release 4 compiled with
			
 
				-<TT>gcc</TT>
			
 
				-for equivalent hardware,
			
 
				-probably because they are currently implemented in a user library
			
 
				-by calls to the
			
 
				-<TT>point</TT>
			
 
				-primitive.
			
 
				-Line drawing speed is about equal between the two systems.
			
 
				-Unicode text is drawn about the same speed by 8&#189; as ASCII text by
			
 
				-X, and
			
 
				-the
			
 
				-<TT>bitblt</TT>
			
 
				-test is runs four times faster for 8&#189;.
			
 
				-These numbers vary enough to caution against drawing sweeping
			
 
				-conclusions, but they
			
 
				-suggest that 8&#189;'s architecture does not penalize its performance.
			
 
				-Finally, 8&#189; boots in under a second and creates a new window
			
 
				-apparently instantaneously.
			
 
				-</P>
			
 
				-<H4>An example
			
 
				-</H4>
			
 
				-<P>
			
 
				-Here is a complete program that runs under 8&#189;.
			
 
				-It prints the string
			
 
				-<TT>"hello world"</TT>
			
 
				-wherever the left mouse button is depressed, and exits when the
			
 
				-right mouse button is depressed.
			
 
				-It also prints the string in the center of its window, and maintains
			
 
				-that string when the window is resized.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &#60;u.h&#62;
			
 
				-#include &#60;libc.h&#62;
			
 
				-#include &#60;libg.h&#62;
			
 
				-
			
 
				-void
			
 
				-ereshaped(Rectangle r)
			
 
				-{
			
 
				-    Point p;
			
 
				-
			
 
				-    screen.r = r;
			
 
				-    bitblt(&amp;screen, screen.r.min, &amp;screen, r, Zero); /* clear */
			
 
				-    p.x = screen.r.min.x + Dx(screen.r)/2;
			
 
				-    p.y = screen.r.min.y + Dy(screen.r)/2;
			
 
				-    p = sub(p, div(strsize(font, "hello world"), 2));
			
 
				-    string(&amp;screen, p, font, "hello world", S);
			
 
				-}
			
 
				-
			
 
				-main(void)
			
 
				-{
			
 
				-    Mouse m;
			
 
				-
			
 
				-    binit(0, 0, 0);	/* initialize graphics library */
			
 
				-    einit(Emouse);	/* initialize event library */
			
 
				-    ereshaped(screen.r);
			
 
				-    for(;;){
			
 
				-        m = emouse();
			
 
				-        if(m.buttons &amp; RIGHTB)
			
 
				-            break;
			
 
				-        if(m.buttons &amp; LEFTB){
			
 
				-            string(&amp;screen, m.xy, font, "hello world", S);
			
 
				-            /* wait for release of button */
			
 
				-            do; while(emouse().buttons &amp; LEFTB);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The complete loaded binary is a little over 26K bytes on a 68020.
			
 
				-This program should be compared to the similar ones in the excellent paper
			
 
				-by Rosenthal [Rose88].
			
 
				-(The current program does more: it also employs the mouse.)
			
 
				-The clumsiest part is
			
 
				-<TT>ereshaped</TT>,
			
 
				-a function with a known name that is called from the event library
			
 
				-whenever the window is
			
 
				-reshaped or moved, as is discovered inelegantly but adequately
			
 
				-by a special case of a mouse message.
			
 
				-(Simple so-called expose events are not events
			
 
				-at all in 8&#189;; the layer library takes care of them transparently.)
			
 
				-The lesson of this program, with deference to Rosenthal, is that if
			
 
				-the window system is cleanly designed a toolkit should be unnecessary
			
 
				-for simple tasks.
			
 
				-</P>
			
 
				-<H4>Status
			
 
				-</H4>
			
 
				-<P>
			
 
				-As of 1992, 8&#189; is in regular daily use by almost all the 60 people in our
			
 
				-research center.  Some of those people use it to access Plan 9 itself; others
			
 
				-use it as a front end to remote
			
 
				-UNIX
			
 
				-systems, much as one would use an X terminal.
			
 
				-</P>
			
 
				-<P>
			
 
				-Some things about 8&#189; may change.
			
 
				-It would be nice if its capabilities were more easily accessible
			
 
				-from the shell.
			
 
				-A companion to this paper [Pike91] proposes one way to do this,
			
 
				-but that does not include any graphics functionality.
			
 
				-Perhaps a textual version of the
			
 
				-<TT>/dev/bitblt</TT>
			
 
				-file is a way to proceed; that would allow, for example,
			
 
				-<TT>awk</TT>
			
 
				-programs to draw graphs directly.
			
 
				-</P>
			
 
				-<P>
			
 
				-Can this style of window system be built on other operating systems?
			
 
				-A major part of the design of 8&#189; depends on its structure as a file server.
			
 
				-In principle this could be done for any system that supports user processes
			
 
				-that serve files, such as any system running NFS or AFS [Sun89, Kaza87].
			
 
				-One requirement, however, is 8&#189;'s need
			
 
				-to respond to its clients' requests out of order:
			
 
				-if one client reads
			
 
				-<TT>/dev/cons</TT>
			
 
				-in a window with no characters to be read,
			
 
				-other clients should be able to perform I/O in their windows, or even
			
 
				-the same window.
			
 
				-Another constraint is that the 8&#189; files are like devices,
			
 
				-and must not be cached by the client.
			
 
				-NFS cannot honor these requirements; AFS may be able to.
			
 
				-Of course, other interprocess communication mechanisms such as sockets
			
 
				-could be used as a basis for a window system.  One may even argue that
			
 
				-X's model fits into this overall scheme.  It may prove easy and worthwhile
			
 
				-to write a small 8&#189;-like system for commercial
			
 
				-UNIX
			
 
				-systems to demonstrate that its merits can be won in systems other than
			
 
				-Plan 9.
			
 
				-</P>
			
 
				-<H4>Conclusion
			
 
				-</H4>
			
 
				-<P>
			
 
				-In conclusion, 8&#189; uses an unusual architecture in
			
 
				-concert with the file-oriented interprocess communication of Plan 9
			
 
				-to provide network-based interactive graphics to client programs.
			
 
				-It demonstrates that even production-quality window systems are not
			
 
				-inherently large or complicated
			
 
				-and may be simple to use and to program.
			
 
				-</P>
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Helpful comments on early drafts of this paper were made by
			
 
				-Doug Blewett,
			
 
				-Stu Feldman,
			
 
				-Chris Fraser,
			
 
				-Brian Kernighan,
			
 
				-Dennis Ritchie,
			
 
				-and Phil Winterbottom.
			
 
				-8&#189;'s support for color was added by Howard Trickey.
			
 
				-Many of the ideas leading to 8&#189; were tried out in earlier, sometimes less
			
 
				-successful, programs.  I would like to thank those users who suffered
			
 
				-through some of my previous 7&#189; window systems.
			
 
				-</P>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Duff90] Tom Duff, ``Rc - A Shell for Plan 9 and UNIX systems'', Proc. of the Summer 1990 UKUUG Conf., London, July, 1990, pp. 21-33, reprinted, in a different form, in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Far89] Far too many people, XTERM(1), Massachusetts Institute of Technology, 1989.
			
 
				-<br>&#32;<br>
			
 
				-[Gos86] James Gosling and David Rosenthal,
			
 
				-``A window manager for bitmapped displays and UNIX'', in Methodology of Window Management, edited by F.R.A. Hopgood et al., Springer, 1986.
			
 
				-<br>&#32;<br>
			
 
				-[Kaza87] Mike Kazar, ``Synchronization and Caching issues in the Andrew File System'', Tech. Rept. CMU-ITC-058, Information Technology Center, Carnegie Mellon University, June, 1987.
			
 
				-<br>&#32;<br>
			
 
				-[Kill84] Tom Killian, ``Processes as Files'', USENIX Summer Conf. Proc., Salt Lake City June, 1984.
			
 
				-<br>&#32;<br>
			
 
				-[Pike83] Rob Pike, ``The Blit: A Multiplexed Graphics Terminal'', Bell Labs Tech. J., V63, #8, part 2, pp. 1607-1631.
			
 
				-<br>&#32;<br>
			
 
				-[Pike83a] Rob Pike, ``Graphics in Overlapping Bitmap Layers'', Trans. on Graph., Vol 2, #2, 135-160, reprinted in Proc. SIGGRAPH '83, pp. 331-356.
			
 
				-<br>&#32;<br>
			
 
				-[Pike87] Rob Pike, ``The Text Editor <TT>sam</TT>'', Softw. - Prac. and Exp., Nov 1987, Vol 17 #11, pp. 813-845, reprinted in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Pike88] Rob Pike, ``Window Systems Should Be Transparent'', Comp. Sys., Summer 1988, Vol 1 #3, pp. 279-296.
			
 
				-<br>&#32;<br>
			
 
				-[Pike89] Rob Pike, ``A Concurrent Window System'', Comp. Sys., Spring 1989, Vol 2 #2, pp. 133-153.
			
 
				-<br>&#32;<br>
			
 
				-[Pike91] Rob Pike, ``A Minimalist Global User Interface'', USENIX Summer Conf. Proc., Nashville, June, 1991.
			
 
				-<br>&#32;<br>
			
 
				-[Pike92]  Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
			
 
				-Operating Systems Review
			
 
				-Vol 27, #2, Apr 1993, pp. 72-76
			
 
				-(reprinted from Proceedings of the 5th ACM SIGOPS European Workshop, Mont Saint-Michel, 1992, Paper n&#186; 34, and reprinted in this volume).
			
 
				-<br>&#32;<br>
			
 
				-[Pike94] Rob Pike and Ken Thompson, ``Hello World or &#191;ALPHA&#191;&#191;MU&#191;&#191;ALPHA &#191;&#191;&#191;MUEPSILON or &#191;&#191;&#191;&#191;&#191; &#191;&#191;'', USENIX Winter Conf. Proc., San Diego, Jan, 1993, reprinted in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[PLR85] Rob Pike, Bart Locanthi and John Reiser, ``Hardware/Software Tradeoffs for Bitmap Graphics on the Blit'', Softw. - Prac. and Exp., Feb 1985, Vol 15 #2, pp. 131-152.
			
 
				-<br>&#32;<br>
			
 
				-[Pres90] David L. Presotto and Dennis M. Ritchie, ``Interprocess Communication in the Ninth Edition Unix System'', Softw. - Prac. and Exp., June 1990, Vol 20 #S1, pp. S1/3-S1/17.
			
 
				-<br>&#32;<br>
			
 
				-[Rose88] David Rosenthal, ``A Simple X11 Client Program -or- How hard can it really be to write ``Hello, World''?'', USENIX Winter Conf. Proc., Dallas, Jan, 1988, pp. 229-242.
			
 
				-<br>&#32;<br>
			
 
				-[Sche86] Robert W. Scheifler and Jim Gettys,
			
 
				-``The X Window System'',
			
 
				-ACM Trans. on Graph., Vol 5 #2, pp. 79-109.
			
 
				-<br>&#32;<br>
			
 
				-[Sun89] Sun Microsystems, NFS: Network file system protocol specification,
			
 
				-RFC 1094, Network Information Center, SRI International, March, 1989.
			
 
				-<br>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/9.html
+++ b/sys/doc/9.html
@@ -1,2373 +0,0 @@
 
				-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
			
 
				-<html>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<center><H1>Plan 9 from Bell Labs
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-Dave Presotto<br>
			
 
				-Sean Dorward<br>
			
 
				-Bob Flandrena<br>
			
 
				-Ken Thompson<br>
			
 
				-Howard Trickey<br>
			
 
				-Phil Winterbottom<br>
			
 
				-Bell Laboratories, Murray Hill, NJ, 07974
			
 
				-USA<br>
			
 
				-</center></I></DL>
			
 
				-<H4>Motivation
			
 
				-</H4>
			
 
				-<P>
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Appeared in a slightly different form in
			
 
				-Computing Systems,
			
 
				-Vol 8 #3, Summer 1995, pp. 221-254.
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-By the mid 1980's, the trend in computing was
			
 
				-away from large centralized time-shared computers towards
			
 
				-networks of smaller, personal machines,
			
 
				-typically UNIX `workstations'.
			
 
				-People had grown weary of overloaded, bureaucratic timesharing machines
			
 
				-and were eager to move to small, self-maintained systems, even if that
			
 
				-meant a net loss in computing power.
			
 
				-As microcomputers became faster, even that loss was recovered, and
			
 
				-this style of computing remains popular today.
			
 
				-<P>
			
 
				-In the rush to personal workstations, though, some of their weaknesses
			
 
				-were overlooked.
			
 
				-First, the operating system they run, UNIX, is itself an old timesharing system and
			
 
				-has had trouble adapting to ideas
			
 
				-born after it.  Graphics and networking were added to UNIX well into
			
 
				-its lifetime and remain poorly integrated and difficult to administer.
			
 
				-More important, the early focus on having private machines
			
 
				-made it difficult for networks of machines to serve as seamlessly as the old
			
 
				-monolithic timesharing systems.
			
 
				-Timesharing centralized the management
			
 
				-and amortization of costs and resources;
			
 
				-personal computing fractured, democratized, and ultimately amplified
			
 
				-administrative problems.
			
 
				-The choice of
			
 
				-an old timesharing operating system to run those personal machines
			
 
				-made it difficult to bind things together smoothly.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 began in the late 1980's as an attempt to have it both
			
 
				-ways: to build a system that was centrally administered and cost-effective
			
 
				-using cheap modern microcomputers as its computing elements.
			
 
				-The idea was to build a time-sharing system out of workstations, but in a novel way.
			
 
				-Different computers would handle
			
 
				-different tasks: small, cheap machines in people's offices would serve
			
 
				-as terminals providing access to large, central, shared resources such as computing
			
 
				-servers and file servers.  For the central machines, the coming wave of
			
 
				-shared-memory multiprocessors seemed obvious candidates.
			
 
				-The philosophy is much like that of the Cambridge
			
 
				-Distributed System [NeHe82].
			
 
				-The early catch phrase was to build a UNIX out of a lot of little systems,
			
 
				-not a system out of a lot of little UNIXes.
			
 
				-</P>
			
 
				-<P>
			
 
				-The problems with UNIX were too deep to fix, but some of its ideas could be
			
 
				-brought along.  The best was its use of the file system to coordinate
			
 
				-naming of and access to resources, even those, such as devices, not traditionally
			
 
				-treated as files.
			
 
				-For Plan 9, we adopted this idea by designing a network-level protocol, called 9P,
			
 
				-to enable machines to access files on remote systems.
			
 
				-Above this, we built a naming
			
 
				-system that lets people and their computing agents build customized views
			
 
				-of the resources in the network.
			
 
				-This is where Plan 9 first began to look different:
			
 
				-a Plan 9 user builds a private computing environment and recreates it wherever
			
 
				-desired, rather than doing all computing on a private machine.
			
 
				-It soon became clear that this model was richer
			
 
				-than we had foreseen, and the ideas of per-process name spaces
			
 
				-and file-system-like resources were extended throughout
			
 
				-the system&#x2014;to processes, graphics, even the network itself.
			
 
				-</P>
			
 
				-<P>
			
 
				-By 1989 the system had become solid enough
			
 
				-that some of us began using it as our exclusive computing environment.
			
 
				-This meant bringing along many of the services and applications we had
			
 
				-used on UNIX.  We used this opportunity to revisit many issues, not just
			
 
				-kernel-resident ones, that we felt UNIX addressed badly.
			
 
				-Plan 9 has new compilers,
			
 
				-languages,
			
 
				-libraries,
			
 
				-window systems,
			
 
				-and many new applications.
			
 
				-Many of the old tools were dropped, while those brought along have
			
 
				-been polished or rewritten.
			
 
				-</P>
			
 
				-<P>
			
 
				-Why be so all-encompassing?
			
 
				-The distinction between operating system, library, and application
			
 
				-is important to the operating system researcher but uninteresting to the
			
 
				-user.  What matters is clean functionality.
			
 
				-By building a complete new system,
			
 
				-we were able to solve problems where we thought they should be solved.
			
 
				-For example, there is no real `tty driver' in the kernel; that is the job of the window
			
 
				-system.
			
 
				-In the modern world, multi-vendor and multi-architecture computing
			
 
				-are essential, yet the usual compilers and tools assume the program is being
			
 
				-built to run locally; we needed to rethink these issues.
			
 
				-Most important, though, the test of a system is the computing
			
 
				-environment it provides.
			
 
				-Producing a more efficient way to run the old UNIX warhorses
			
 
				-is empty engineering;
			
 
				-we were more interested in whether the new ideas suggested by
			
 
				-the architecture of the underlying system encourage a more effective way of working.
			
 
				-Thus, although Plan 9 provides an emulation environment for
			
 
				-running POSIX commands, it is a backwater of the system.
			
 
				-The vast majority
			
 
				-of system software is developed in the `native' Plan 9 environment.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are benefits to having an all-new system.
			
 
				-First, our laboratory has a history of building experimental peripheral boards.
			
 
				-To make it easy to write device drivers,
			
 
				-we want a system that is available in source form
			
 
				-(no longer guaranteed with UNIX, even
			
 
				-in the laboratory in which it was born).
			
 
				-Also, we want to redistribute our work, which means the software
			
 
				-must be locally produced.  For example, we could have used some vendors'
			
 
				-C compilers for our system, but even had we overcome the problems with
			
 
				-cross-compilation, we would have difficulty
			
 
				-redistributing the result.
			
 
				-</P>
			
 
				-<P>
			
 
				-This paper serves as an overview of the system.  It discusses the architecture
			
 
				-from the lowest building blocks to the computing environment seen by users.
			
 
				-It also serves as an introduction to the rest of the Plan 9 Programmer's Manual,
			
 
				-which it accompanies.  More detail about topics in this paper
			
 
				-can be found elsewhere in the manual.
			
 
				-</center></P>
			
 
				-<H4>Design
			
 
				-</H4>
			
 
				-<P>
			
 
				-The view of the system is built upon three principles.
			
 
				-First, resources are named and accessed like files in a hierarchical file system.
			
 
				-Second, there is a standard protocol, called 9P, for accessing these
			
 
				-resources.
			
 
				-Third, the disjoint hierarchies provided by different services are
			
 
				-joined together into a single private hierarchical file name space.
			
 
				-The unusual properties of Plan 9 stem from the consistent, aggressive
			
 
				-application of these principles.
			
 
				-</P>
			
 
				-<P>
			
 
				-A large Plan 9 installation has a number of computers networked
			
 
				-together, each providing a particular class of service.
			
 
				-Shared multiprocessor servers provide computing cycles;
			
 
				-other large machines offer file storage.
			
 
				-These machines are located in an air-conditioned machine
			
 
				-room and are connected by high-performance networks.
			
 
				-Lower bandwidth networks such as Ethernet or ISDN connect these
			
 
				-servers to office- and home-resident workstations or PCs, called terminals
			
 
				-in Plan 9 terminology.
			
 
				-Figure 1 shows the arrangement.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="network.pic.0.gif"><br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<I>Figure 1. Structure of a large Plan 9 installation.</I>
			
 
				-CPU servers and file servers share fast local-area networks,
			
 
				-while terminals use slower wider-area networks such as Ethernet,
			
 
				-Datakit, or telephone lines to connect to them.
			
 
				-Gateway machines, which are just CPU servers connected to multiple
			
 
				-networks, allow machines on one network to see another.
			
 
				-<br>&#32;<br>
			
 
				-</dl>
			
 
				-<P>
			
 
				-The modern style of computing offers each user a dedicated workstation or PC.
			
 
				-Plan 9's approach is different.
			
 
				-The various machines with screens, keyboards, and mice all provide
			
 
				-access to the resources of the network, so they are functionally equivalent,
			
 
				-in the manner of the terminals attached to old timesharing systems.
			
 
				-When someone uses the system, though,
			
 
				-the terminal is temporarily personalized by that user.
			
 
				-Instead of customizing the hardware, Plan 9 offers the ability to customize
			
 
				-one's view of the system provided by the software.
			
 
				-That customization is accomplished by giving local, personal names for the
			
 
				-publicly visible resources in the network.
			
 
				-Plan 9 provides the mechanism to assemble a personal view of the public
			
 
				-space with local names for globally accessible resources.
			
 
				-Since the most important resources of the network are files, the model
			
 
				-of that view is file-oriented.
			
 
				-</P>
			
 
				-<P>
			
 
				-The client's local name space provides a way to customize the user's
			
 
				-view of the network.  The services available in the network all export file
			
 
				-hierarchies.
			
 
				-Those important to the user are gathered together into
			
 
				-a custom name space; those of no immediate interest are ignored.
			
 
				-This is a different style of use from the idea of a `uniform global name space'.
			
 
				-In Plan 9, there are known names for services and uniform names for
			
 
				-files exported by those services,
			
 
				-but the view is entirely local.  As an analogy, consider the difference
			
 
				-between the phrase `my house' and the precise address of the speaker's
			
 
				-home.  The latter may be used by anyone but the former is easier to say and
			
 
				-makes sense when spoken.
			
 
				-It also changes meaning depending on who says it,
			
 
				-yet that does not cause confusion.
			
 
				-Similarly, in Plan 9 the name
			
 
				-<TT>/dev/cons</TT>
			
 
				-always refers to the user's terminal and
			
 
				-<TT>/bin/date</TT>
			
 
				-the correct version of the date
			
 
				-command to run,
			
 
				-but which files those names represent depends on circumstances such as the
			
 
				-architecture of the machine executing
			
 
				-<TT>date</TT>.
			
 
				-Plan 9, then, has local name spaces that obey globally understood
			
 
				-conventions;
			
 
				-it is the conventions that guarantee sane behavior in the presence
			
 
				-of local names.
			
 
				-</P>
			
 
				-<P>
			
 
				-The 9P protocol is structured as a set of transactions that
			
 
				-send a request from a client to a (local or remote) server and return the result.
			
 
				-9P controls file systems, not just files:
			
 
				-it includes procedures to resolve file names and traverse the name
			
 
				-hierarchy of the file system provided by the server.
			
 
				-On the other hand,
			
 
				-the client's name space is held by the client system alone, not on or with the server,
			
 
				-a distinction from systems such as Sprite [OCDNW88].
			
 
				-Also, file access is at the level of bytes, not blocks, which distinguishes
			
 
				-9P from protocols like NFS and RFS.
			
 
				-A paper by Welch compares Sprite, NFS, and Plan 9's network file system structures [Welc94].
			
 
				-</P>
			
 
				-<P>
			
 
				-This approach was designed with traditional files in mind,
			
 
				-but can be extended
			
 
				-to many other resources.
			
 
				-Plan 9 services that export file hierarchies include I/O devices,
			
 
				-backup services,
			
 
				-the window system,
			
 
				-network interfaces,
			
 
				-and many others.
			
 
				-One example is the process file system,
			
 
				-<TT>/proc</TT>,
			
 
				-which provides a clean way
			
 
				-to examine and control running processes.
			
 
				-Precursor systems had a similar idea [Kill84], but Plan 9 pushes the
			
 
				-file metaphor much further [PPTTW93].
			
 
				-The file system model is well-understood, both by system builders and general users,
			
 
				-so services that present file-like interfaces are easy to build, easy to understand,
			
 
				-and easy to use.
			
 
				-Files come with agreed-upon rules for
			
 
				-protection,
			
 
				-naming,
			
 
				-and access both local and remote,
			
 
				-so services built this way are ready-made for a distributed system.
			
 
				-(This is a distinction from `object-oriented' models, where these issues
			
 
				-must be faced anew for every class of object.)
			
 
				-Examples in the sections that follow illustrate these ideas in action.
			
 
				-</center></P>
			
 
				-<H4>The Command-level View
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 is meant to be used from a machine with a screen running
			
 
				-the window system.
			
 
				-It has no notion of `teletype' in the UNIX sense.  The keyboard handling of
			
 
				-the bare system is rudimentary, but once the window system, 8&frac12; [Pike91],
			
 
				-is running,
			
 
				-text can be edited with `cut and paste' operations from a pop-up menu,
			
 
				-copied between windows, and so on.
			
 
				-8&frac12; permits editing text from the past, not just on the current input line.
			
 
				-The text-editing capabilities of 8&frac12; are strong enough to displace
			
 
				-special features such as history in the shell,
			
 
				-paging and scrolling,
			
 
				-and mail editors.
			
 
				-8&frac12; windows do not support cursor addressing and,
			
 
				-except for one terminal emulator to simplify connecting to traditional systems,
			
 
				-there is no cursor-addressing software in Plan 9.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each window is created in a separate name space.
			
 
				-Adjustments made to the name space in a window do not affect other windows
			
 
				-or programs, making it safe to experiment with local modifications to the name
			
 
				-space, for example
			
 
				-to substitute files from the dump file system when debugging.
			
 
				-Once the debugging is done, the window can be deleted and all trace of the
			
 
				-experimental apparatus is gone.
			
 
				-Similar arguments apply to the private space each window has for environment
			
 
				-variables, notes (analogous to UNIX signals), etc.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each window is created running an application, such as the shell, with
			
 
				-standard input and output connected to the editable text of the window.
			
 
				-Each window also has a private bitmap and multiplexed access to the
			
 
				-keyboard, mouse, and other graphical resources through files like
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-<TT>/dev/bitblt</TT>,
			
 
				-and
			
 
				-<TT>/dev/cons</TT>
			
 
				-(analogous to UNIX's
			
 
				-<TT>/dev/tty</TT>).
			
 
				-These files are provided by 8&frac12;, which is implemented as a file server.
			
 
				-Unlike X windows, where a new application typically creates a new window
			
 
				-to run in, an 8&frac12; graphics application usually runs in the window where it starts.
			
 
				-It is possible and efficient for an application to create a new window, but
			
 
				-that is not the style of the system.
			
 
				-Again contrasting to X, in which a remote application makes a network
			
 
				-call to the X server to start running,
			
 
				-a remote 8&frac12; application sees the
			
 
				-<TT>mouse</TT>,
			
 
				-<TT>bitblt</TT>,
			
 
				-and
			
 
				-<TT>cons</TT>
			
 
				-files for the window as usual in
			
 
				-<TT>/dev</TT>;
			
 
				-it does not know whether the files are local.
			
 
				-It just reads and writes them to control the window;
			
 
				-the network connection is already there and multiplexed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The intended style of use is to run interactive applications such as the window
			
 
				-system and text editor on the terminal and to run computation- or file-intensive
			
 
				-applications on remote servers.
			
 
				-Different windows may be running programs on different machines over
			
 
				-different networks, but by making the name space equivalent in all windows,
			
 
				-this is transparent: the same commands and resources are available, with the same names,
			
 
				-wherever the computation is performed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The command set of Plan 9 is similar to that of UNIX.
			
 
				-The commands fall into several broad classes.  Some are new programs for
			
 
				-old jobs: programs like
			
 
				-<TT>ls</TT>,
			
 
				-<TT>cat</TT>,
			
 
				-and
			
 
				-<TT>who</TT>
			
 
				-have familiar names and functions but are new, simpler implementations.
			
 
				-<TT>Who</TT>,
			
 
				-for example, is a shell script, while
			
 
				-<TT>ps</TT>
			
 
				-is just 95 lines of C code.
			
 
				-Some commands are essentially the same as their UNIX ancestors:
			
 
				-<TT>awk</TT>,
			
 
				-<TT>troff</TT>,
			
 
				-and others have been converted to ANSI C and extended to handle
			
 
				-Unicode, but are still the familiar tools.
			
 
				-Some are entirely new programs for old niches: the shell
			
 
				-<TT>rc</TT>,
			
 
				-text editor
			
 
				-<TT>sam</TT>,
			
 
				-debugger
			
 
				-<TT>acid</TT>,
			
 
				-and others
			
 
				-displace the better-known UNIX tools with similar jobs.
			
 
				-Finally, about half the commands are new.
			
 
				-</P>
			
 
				-<P>
			
 
				-Compatibility was not a requirement for the system.
			
 
				-Where the old commands or notation seemed good enough, we
			
 
				-kept them.  When they didn't, we replaced them.
			
 
				-</center></P>
			
 
				-<H4>The File Server
			
 
				-</H4>
			
 
				-<P>
			
 
				-A central file server stores permanent files and presents them to the network
			
 
				-as a file hierarchy exported using 9P.
			
 
				-The server is a stand-alone system, accessible only over the network,
			
 
				-designed to do its one job well.
			
 
				-It runs no user processes, only a fixed set of routines compiled into the
			
 
				-boot image.
			
 
				-Rather than a set of disks or separate file systems,
			
 
				-the main hierarchy exported by the server is a single
			
 
				-tree, representing files on many disks.
			
 
				-That hierarchy is
			
 
				-shared by many users over a wide area on a variety of networks.
			
 
				-Other file trees exported by
			
 
				-the server include
			
 
				-special-purpose systems such as temporary storage and, as explained
			
 
				-below, a backup service.
			
 
				-</P>
			
 
				-<P>
			
 
				-The file server has three levels of storage.
			
 
				-The central server in our installation has
			
 
				-about 100 megabytes of memory buffers,
			
 
				-27 gigabytes of magnetic disks,
			
 
				-and 350 gigabytes of
			
 
				-bulk storage in a write-once-read-many (WORM) jukebox.
			
 
				-The disk is a cache for the WORM and the memory is a cache for the disk;
			
 
				-each is much faster, and sees about an order of magnitude more traffic,
			
 
				-than the level it caches.
			
 
				-The addressable data in the file system can be larger than the size of the
			
 
				-magnetic disks, because they are only a cache;
			
 
				-our main file server has about 40 gigabytes of active storage.
			
 
				-</P>
			
 
				-<P>
			
 
				-The most unusual feature of the file server
			
 
				-comes from its use of a WORM device for
			
 
				-stable storage.
			
 
				-Every morning at 5 o'clock, a
			
 
				-<I>dump</I>
			
 
				-of the file system occurs automatically.
			
 
				-The file system is frozen and
			
 
				-all blocks modified since the last dump
			
 
				-are queued to be written to the WORM.
			
 
				-Once the blocks are queued,
			
 
				-service is restored and
			
 
				-the read-only root of the dumped
			
 
				-file system appears in a
			
 
				-hierarchy of all dumps ever taken, named by its date.
			
 
				-For example, the directory
			
 
				-<TT>/n/dump/1995/0315</TT>
			
 
				-is the root directory of an image of the file system
			
 
				-as it appeared in the early morning of March 15, 1995.
			
 
				-It takes a few minutes to queue the blocks,
			
 
				-but the process to copy blocks to the WORM, which runs in the background, may take hours.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are two ways the dump file system is used.
			
 
				-The first is by the users themselves, who can browse the
			
 
				-dump file system directly or attach pieces of
			
 
				-it to their name space.
			
 
				-For example, to track down a bug,
			
 
				-it is straightforward to try the compiler from three months ago
			
 
				-or to link a program with yesterday's library.
			
 
				-With daily snapshots of all files,
			
 
				-it is easy to find when a particular change was
			
 
				-made or what changes were made on a particular date.
			
 
				-People feel free to make large speculative changes
			
 
				-to files in the knowledge that they can be backed
			
 
				-out with a single
			
 
				-copy command.
			
 
				-There is no backup system as such;
			
 
				-instead, because the dump
			
 
				-is in the file name space, 
			
 
				-backup problems can be solved with
			
 
				-standard tools
			
 
				-such as
			
 
				-<TT>cp</TT>,
			
 
				-<TT>ls</TT>,
			
 
				-<TT>grep</TT>,
			
 
				-and
			
 
				-<TT>diff</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The other (very rare) use is complete system backup.
			
 
				-In the event of disaster,
			
 
				-the active file system can be initialized from any dump by clearing the
			
 
				-disk cache and setting the root of
			
 
				-the active file system to be a copy
			
 
				-of the dumped root.
			
 
				-Although easy to do, this is not to be taken lightly:
			
 
				-besides losing any change made after the date of the dump, this recovery method
			
 
				-results in a very slow system.
			
 
				-The cache must be reloaded from WORM, which is much
			
 
				-slower than magnetic disks.
			
 
				-The file system takes a few days to reload the working
			
 
				-set and regain its full performance.
			
 
				-</P>
			
 
				-<P>
			
 
				-Access permissions of files in the dump are the same
			
 
				-as they were when the dump was made.
			
 
				-Normal utilities have normal
			
 
				-permissions in the dump without any special arrangement.
			
 
				-The dump file system is read-only, though,
			
 
				-which means that files in the dump cannot be written regardless of their permission bits;
			
 
				-in fact, since directories are part of the read-only structure,
			
 
				-even the permissions cannot be changed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Once a file is written to WORM, it cannot be removed,
			
 
				-so our users never see
			
 
				-``please clean up your files''
			
 
				-messages and there is no
			
 
				-<TT>df</TT>
			
 
				-command.
			
 
				-We regard the WORM jukebox as an unlimited resource.
			
 
				-The only issue is how long it will take to fill.
			
 
				-Our WORM has served a community of about 50 users
			
 
				-for five years and has absorbed daily dumps, consuming a total of
			
 
				-65% of the storage in the jukebox.
			
 
				-In that time, the manufacturer has improved the technology,
			
 
				-doubling the capacity of the individual disks.
			
 
				-If we were to upgrade to the new media,
			
 
				-we would have more free space than in the original empty jukebox.
			
 
				-Technology has created storage faster than we can use it.
			
 
				-</center></P>
			
 
				-<H4>Unusual file servers
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 is characterized by a variety of servers that offer
			
 
				-a file-like interface to unusual services.
			
 
				-Many of these are implemented by user-level processes, although the distinction
			
 
				-is unimportant to their clients; whether a service is provided by the kernel,
			
 
				-a user process, or a remote server is irrelevant to the way it is used.
			
 
				-There are dozens of such servers; in this section we present three representative ones.
			
 
				-</P>
			
 
				-<P>
			
 
				-Perhaps the most remarkable file server in Plan 9 is 8&frac12;, the window system.
			
 
				-It is discussed at length elsewhere [Pike91], but deserves a brief explanation here.
			
 
				-8&frac12; provides two interfaces: to the user seated at the terminal, it offers a traditional
			
 
				-style of interaction with multiple windows, each running an application, all controlled
			
 
				-by a mouse and keyboard.
			
 
				-To the client programs, the view is also fairly traditional:
			
 
				-programs running in a window see a set of files in
			
 
				-<TT>/dev</TT>
			
 
				-with names like
			
 
				-<TT>mouse</TT>,
			
 
				-<TT>screen</TT>,
			
 
				-and
			
 
				-<TT>cons</TT>.
			
 
				-Programs that want to print text to their window write to
			
 
				-<TT>/dev/cons</TT>;
			
 
				-to read the mouse, they read
			
 
				-<TT>/dev/mouse</TT>.
			
 
				-In the Plan 9 style, bitmap graphics is implemented by providing a file
			
 
				-<TT>/dev/bitblt</TT>
			
 
				-on which clients write encoded messages to execute graphical operations such as
			
 
				-<TT>bitblt</TT>
			
 
				-(RasterOp).
			
 
				-What is unusual is how this is done:
			
 
				-8&frac12; is a file server, serving the files in
			
 
				-<TT>/dev</TT>
			
 
				-to the clients running in each window.
			
 
				-Although every window looks the same to its client,
			
 
				-each window has a distinct set of files in
			
 
				-<TT>/dev</TT>.
			
 
				-8&frac12; multiplexes its clients' access to the resources of the terminal
			
 
				-by serving multiple sets of files.  Each client is given a private name space
			
 
				-with a
			
 
				-<I>different</I>
			
 
				-set of files that behave the same as in all other windows.
			
 
				-There are many advantages to this structure.
			
 
				-One is that 8&frac12; serves the same files it needs for its own implementation&#x2014;it
			
 
				-multiplexes its own interface&#x2014;so it may be run, recursively, as a client of itself.
			
 
				-Also, consider the implementation of
			
 
				-<TT>/dev/tty</TT>
			
 
				-in UNIX, which requires special code in the kernel to redirect
			
 
				-<TT>open</TT>
			
 
				-calls to the appropriate device.
			
 
				-Instead, in 8&frac12; the equivalent service falls out
			
 
				-automatically: 8&frac12; serves
			
 
				-<TT>/dev/cons</TT>
			
 
				-as its basic function; there is nothing extra to do.
			
 
				-When a program wants to
			
 
				-read from the keyboard, it opens
			
 
				-<TT>/dev/cons</TT>,
			
 
				-but it is a private file, not a shared one with special properties.
			
 
				-Again, local name spaces make this possible; conventions about the consistency of
			
 
				-the files within them make it natural.
			
 
				-</P>
			
 
				-<P>
			
 
				-8&frac12; has a unique feature made possible by its design.
			
 
				-Because it is implemented as a file server,
			
 
				-it has the power to postpone answering read requests for a particular window.
			
 
				-This behavior is toggled by a reserved key on the keyboard.
			
 
				-Toggling once suspends client reads from the window;
			
 
				-toggling again resumes normal reads, which absorb whatever text has been prepared,
			
 
				-one line at a time.
			
 
				-This allows the user to edit multi-line input text on the screen before the application sees it,
			
 
				-obviating the need to invoke a separate editor to prepare text such as mail
			
 
				-messages.
			
 
				-A related property is that reads are answered directly from the
			
 
				-data structure defining the text on the display: text may be edited until
			
 
				-its final newline makes the prepared line of text readable by the client.
			
 
				-Even then, until the line is read, the text the client will read can be changed.
			
 
				-For example, after typing
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% make
			
 
				-rm *
			
 
				-</PRE></TT></DL>
			
 
				-to the shell, the user can backspace over the final newline at any time until
			
 
				-<TT>make</TT>
			
 
				-finishes, holding off execution of the
			
 
				-<TT>rm</TT>
			
 
				-command, or even point with the mouse
			
 
				-before the
			
 
				-<TT>rm</TT>
			
 
				-and type another command to be executed first.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is no
			
 
				-<TT>ftp</TT>
			
 
				-command in Plan 9.  Instead, a user-level file server called
			
 
				-<TT>ftpfs</TT>
			
 
				-dials the FTP site, logs in on behalf of the user, and uses the FTP protocol
			
 
				-to examine files in the remote directory.
			
 
				-To the local user, it offers a file hierarchy, attached to
			
 
				-<TT>/n/ftp</TT>
			
 
				-in the local name space, mirroring the contents of the FTP site.
			
 
				-In other words, it translates the FTP protocol into 9P to offer Plan 9 access to FTP sites.
			
 
				-The implementation is tricky;
			
 
				-<TT>ftpfs</TT>
			
 
				-must do some sophisticated caching for efficiency and
			
 
				-use heuristics to decode remote directory information.
			
 
				-But the result is worthwhile:
			
 
				-all the local file management tools such as
			
 
				-<TT>cp</TT>,
			
 
				-<TT>grep</TT>,
			
 
				-<TT>diff</TT>,
			
 
				-and of course
			
 
				-<TT>ls</TT>
			
 
				-are available to FTP-served files exactly as if they were local files.
			
 
				-Other systems such as Jade and Prospero
			
 
				-have exploited the same opportunity [Rao81, Neu92],
			
 
				-but because of local name spaces and the simplicity of implementing 9P,
			
 
				-this approach
			
 
				-fits more naturally into Plan 9 than into other environments.
			
 
				-</P>
			
 
				-<P>
			
 
				-One server,
			
 
				-<TT>exportfs</TT>,
			
 
				-is a user process that takes a portion of its own name space and
			
 
				-makes it available to other processes by
			
 
				-translating 9P requests into system calls to the Plan 9 kernel.
			
 
				-The file hierarchy it exports may contain files from multiple servers.
			
 
				-<TT>Exportfs</TT>
			
 
				-is usually run as a remote server
			
 
				-started by a local program,
			
 
				-either
			
 
				-<TT>import</TT>
			
 
				-or
			
 
				-<TT>cpu</TT>.
			
 
				-<TT>Import</TT>
			
 
				-makes a network call to the remote machine, starts
			
 
				-<TT>exportfs</TT>
			
 
				-there, and attaches its 9P connection to the local name space.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import helix /net
			
 
				-</PRE></TT></DL>
			
 
				-makes Helix's network interfaces visible in the local
			
 
				-<TT>/net</TT>
			
 
				-directory.  Helix is a central server and
			
 
				-has many network interfaces, so this permits a machine with one network to
			
 
				-access to any of Helix's networks.  After such an import, the local
			
 
				-machine may make calls on any of the networks connected to Helix.
			
 
				-Another example is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import helix /proc
			
 
				-</PRE></TT></DL>
			
 
				-which makes Helix's processes visible in the local
			
 
				-<TT>/proc</TT>,
			
 
				-permitting local debuggers to examine remote processes.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>cpu</TT>
			
 
				-command connects the local terminal to a remote
			
 
				-CPU server.
			
 
				-It works in the opposite direction to
			
 
				-<TT>import</TT>:
			
 
				-after calling the server, it starts a
			
 
				-<I>local</I>
			
 
				-<TT>exportfs</TT>
			
 
				-and mounts it in the name space of a process, typically a newly created shell, on the
			
 
				-server.
			
 
				-It then rearranges the name space
			
 
				-to make local device files (such as those served by
			
 
				-the terminal's window system) visible in the server's
			
 
				-<TT>/dev</TT>
			
 
				-directory.
			
 
				-The effect of running a
			
 
				-<TT>cpu</TT>
			
 
				-command is therefore to start a shell on a fast machine, one more tightly
			
 
				-coupled to the file server,
			
 
				-with a name space analogous
			
 
				-to the local one.
			
 
				-All local device files are visible remotely, so remote applications have full
			
 
				-access to local services such as bitmap graphics,
			
 
				-<TT>/dev/cons</TT>,
			
 
				-and so on.
			
 
				-This is not the same as
			
 
				-<TT>rlogin</TT>,
			
 
				-which does nothing to reproduce the local name space on the remote system,
			
 
				-nor is it the same as
			
 
				-file sharing with, say, NFS, which can achieve some name space equivalence but
			
 
				-not the combination of access to local hardware devices, remote files, and remote
			
 
				-CPU resources.
			
 
				-The
			
 
				-<TT>cpu</TT>
			
 
				-command is a uniquely transparent mechanism.
			
 
				-For example, it is reasonable
			
 
				-to start a window system in a window running a
			
 
				-<TT>cpu</TT>
			
 
				-command; all windows created there automatically start processes on the CPU server.
			
 
				-</center></P>
			
 
				-<H4>Configurability and administration
			
 
				-</H4>
			
 
				-<P>
			
 
				-The uniform interconnection of components in Plan 9 makes it possible to configure
			
 
				-a Plan 9 installation many different ways.
			
 
				-A single laptop PC can function as a stand-alone Plan 9 system;
			
 
				-at the other extreme, our setup has central multiprocessor CPU
			
 
				-servers and file servers and scores of terminals ranging from small PCs to
			
 
				-high-end graphics workstations.
			
 
				-It is such large installations that best represent how Plan 9 operates.
			
 
				-</P>
			
 
				-<P>
			
 
				-The system software is portable and the same
			
 
				-operating system runs on all hardware.
			
 
				-Except for performance, the appearance of the system on, say,
			
 
				-an SGI workstation is the same
			
 
				-as on a laptop.
			
 
				-Since computing and file services are centralized, and terminals have
			
 
				-no permanent file storage, all terminals are functionally identical.
			
 
				-In this way, Plan 9 has one of the good properties of old timesharing systems, where
			
 
				-a user could sit in front of any machine and see the same system.  In the modern
			
 
				-workstation community, machines tend to be owned by people who customize them
			
 
				-by storing private information on local disk.
			
 
				-We reject this style of use,
			
 
				-although the system itself can be used this way.
			
 
				-In our group, we have a laboratory with many public-access machines&#x2014;a terminal
			
 
				-room&#x2014;and a user may sit down at any one of them and work.
			
 
				-</P>
			
 
				-<P>
			
 
				-Central file servers centralize not just the files, but also their administration
			
 
				-and maintenance.
			
 
				-In fact, one server is the main server, holding all system files; other servers provide
			
 
				-extra storage or are available for debugging and other special uses, but the system
			
 
				-software resides on one machine.
			
 
				-This means that each program
			
 
				-has a single copy of the binary for each architecture, so it is
			
 
				-trivial to install updates and bug fixes.
			
 
				-There is also a single user database; there is no need to synchronize distinct
			
 
				-<TT>/etc/passwd</TT>
			
 
				-files.
			
 
				-On the other hand, depending on a single central server does limit the size of an installation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another example of the power of centralized file service
			
 
				-is the way Plan 9 administers network information.
			
 
				-On the central server there is a directory,
			
 
				-<TT>/lib/ndb</TT>,
			
 
				-that contains all the information necessary to administer the local Ethernet and
			
 
				-other networks.
			
 
				-All the machines use the same database to talk to the network; there is no
			
 
				-need to manage a distributed naming system or keep parallel files up to date.
			
 
				-To install a new machine on the local Ethernet, choose a
			
 
				-name and IP address and add these to a single file in
			
 
				-<TT>/lib/ndb</TT>;
			
 
				-all the machines in the installation will be able to talk to it immediately.
			
 
				-To start running, plug the machine into the network, turn it on, and use BOOTP
			
 
				-and TFTP to load the kernel.
			
 
				-All else is automatic.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally,
			
 
				-the automated dump file system frees all users from the need to maintain
			
 
				-their systems, while providing easy access to backup files without
			
 
				-tapes, special commands, or the involvement of support staff.
			
 
				-It is difficult to overstate the improvement in lifestyle afforded by this service.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 runs on a variety of hardware without
			
 
				-constraining how to configure an installation.
			
 
				-In our laboratory, we
			
 
				-chose to use central servers because they amortize costs and administration.
			
 
				-A sign that this is a good decision is that our cheap
			
 
				-terminals remain comfortable places
			
 
				-to work for about five years, much longer than workstations that must provide
			
 
				-the complete computing environment.
			
 
				-We do, however, upgrade the central machines, so
			
 
				-the computation available from even old Plan 9 terminals improves with time.
			
 
				-The money saved by avoiding regular upgrades of terminals
			
 
				-is instead spent on the newest, fastest multiprocessor servers.
			
 
				-We estimate this costs about half the money of networked workstations
			
 
				-yet provides general access to more powerful machines.
			
 
				-</center></P>
			
 
				-<H4>C Programming
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 utilities are written in several languages.
			
 
				-Some are scripts for the shell,
			
 
				-<TT>rc</TT>
			
 
				-[Duff90]; a handful
			
 
				-are written in a new C-like concurrent language called Alef [Wint95], described below.
			
 
				-The great majority, though, are written in a dialect of ANSI C [ANSIC].
			
 
				-Of these, most are entirely new programs, but some
			
 
				-originate in pre-ANSI C code
			
 
				-from our research UNIX system [UNIX85].
			
 
				-These have been updated to ANSI C
			
 
				-and reworked for portability and cleanliness.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Plan 9 C dialect has some minor extensions,
			
 
				-described elsewhere [Pike95], and a few major restrictions.
			
 
				-The most important restriction is that the compiler demands that
			
 
				-all function definitions have ANSI prototypes
			
 
				-and all function calls appear in the scope of a prototyped declaration
			
 
				-of the function.
			
 
				-As a stylistic rule,
			
 
				-the prototyped declaration is placed in a header file
			
 
				-included by all files that call the function.
			
 
				-Each system library has an associated header file, declaring all
			
 
				-functions in that library.
			
 
				-For example, the standard Plan 9 library is called
			
 
				-<TT>libc</TT>,
			
 
				-so all C source files include
			
 
				-<TT>&lt;libc.h&gt;</TT>.
			
 
				-These rules guarantee that all functions
			
 
				-are called with arguments having the expected types &#x2014; something
			
 
				-that was not true with pre-ANSI C programs.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another restriction is that the C compilers accept only a subset of the
			
 
				-preprocessor directives required by ANSI.
			
 
				-The main omission is
			
 
				-<TT>#if</TT>,
			
 
				-since we believe it
			
 
				-is never necessary and often abused.
			
 
				-Also, its effect is
			
 
				-better achieved by other means.
			
 
				-For instance, an
			
 
				-<TT>#if</TT>
			
 
				-used to toggle a feature at compile time can be written
			
 
				-as a regular
			
 
				-<TT>if</TT>
			
 
				-statement, relying on compile-time constant folding and
			
 
				-dead code elimination to discard object code.
			
 
				-</P>
			
 
				-<P>
			
 
				-Conditional compilation, even with
			
 
				-<TT>#ifdef</TT>,
			
 
				-is used sparingly in Plan 9.
			
 
				-The only architecture-dependent
			
 
				-<TT>#ifdefs</TT>
			
 
				-in the system are in low-level routines in the graphics library.
			
 
				-Instead, we avoid such dependencies or, when necessary, isolate
			
 
				-them in separate source files or libraries.
			
 
				-Besides making code hard to read,
			
 
				-<TT>#ifdefs</TT>
			
 
				-make it impossible to know what source is compiled into the binary
			
 
				-or whether source protected by them will compile or work properly.
			
 
				-They make it harder to maintain software.
			
 
				-</P>
			
 
				-<P>
			
 
				-The standard Plan 9 library overlaps much of
			
 
				-ANSI C and POSIX [POSIX], but diverges
			
 
				-when appropriate to Plan 9's goals or implementation.
			
 
				-When the semantics of a function
			
 
				-change, we also change the name.
			
 
				-For instance, instead of UNIX's
			
 
				-<TT>creat</TT>,
			
 
				-Plan 9 has a
			
 
				-<TT>create</TT>
			
 
				-function that takes three arguments,
			
 
				-the original two plus a third that, like the second
			
 
				-argument of
			
 
				-<TT>open</TT>,
			
 
				-defines whether the returned file descriptor is to be opened for reading,
			
 
				-writing, or both.
			
 
				-This design was forced by the way 9P implements creation,
			
 
				-but it also simplifies the common use of
			
 
				-<TT>create</TT>
			
 
				-to initialize a temporary file.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another departure from ANSI C is that Plan 9 uses a 16-bit character set
			
 
				-called Unicode [ISO10646, Unicode].
			
 
				-Although we stopped short of full internationalization,
			
 
				-Plan 9 treats the representation
			
 
				-of all major languages uniformly throughout all its
			
 
				-software.
			
 
				-To simplify the exchange of text between programs, the characters are packed into
			
 
				-a byte stream by an encoding we designed, called UTF-8,
			
 
				-which is now
			
 
				-becoming accepted as a standard [FSSUTF].
			
 
				-It has several attractive properties,
			
 
				-including byte-order independence,
			
 
				-backwards compatibility with ASCII,
			
 
				-and ease of implementation.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are many problems in adapting existing software to a large
			
 
				-character set with an encoding that represents characters with
			
 
				-a variable number of bytes.
			
 
				-ANSI C addresses some of the issues but
			
 
				-falls short of
			
 
				-solving them all.
			
 
				-It does not pick a character set encoding and does not
			
 
				-define all the necessary I/O library routines.
			
 
				-Furthermore, the functions it
			
 
				-<I>does</I>
			
 
				-define have engineering problems.
			
 
				-Since the standard left too many problems unsolved,
			
 
				-we decided to build our own interface.
			
 
				-A separate paper has the details [Pike93].
			
 
				-</P>
			
 
				-<P>
			
 
				-A small class of Plan 9 programs do not follow the conventions
			
 
				-discussed in this section.
			
 
				-These are programs imported from and maintained by
			
 
				-the UNIX community;
			
 
				-<TT>tex</TT>
			
 
				-is a representative example.
			
 
				-To avoid reconverting such programs every time a new version
			
 
				-is released,
			
 
				-we built a porting environment, called the ANSI C/POSIX Environment, or APE [Tric95].
			
 
				-APE comprises separate include files, libraries, and commands,
			
 
				-conforming as much as possible to the strict ANSI C and base-level
			
 
				-POSIX specifications.
			
 
				-To port network-based software such as X Windows, it was necessary to add
			
 
				-some extensions to those
			
 
				-specifications, such as the BSD networking functions.
			
 
				-</center></P>
			
 
				-<H4>Portability and Compilation
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 is portable across a variety of processor architectures.
			
 
				-Within a single computing session, it is common to use
			
 
				-several architectures: perhaps the window system running on
			
 
				-an Intel processor connected to a MIPS-based CPU server with files
			
 
				-resident on a SPARC system.
			
 
				-For this heterogeneity to be transparent, there must be conventions
			
 
				-about data interchange between programs; for software maintenance
			
 
				-to be straightforward, there must be conventions about cross-architecture
			
 
				-compilation.
			
 
				-</P>
			
 
				-<P>
			
 
				-To avoid byte order problems,
			
 
				-data is communicated between programs as text whenever practical.
			
 
				-Sometimes, though, the amount of data is high enough that a binary
			
 
				-format is necessary;
			
 
				-such data is communicated as a byte stream with a pre-defined encoding
			
 
				-for multi-byte values.
			
 
				-In the rare cases where a format
			
 
				-is complex enough to be defined by a data structure,
			
 
				-the structure is never
			
 
				-communicated as a unit; instead, it is decomposed into
			
 
				-individual fields, encoded as an ordered byte stream, and then
			
 
				-reassembled by the recipient.
			
 
				-These conventions affect data
			
 
				-ranging from kernel or application program state information to object file
			
 
				-intermediates generated by the compiler.
			
 
				-</P>
			
 
				-<P>
			
 
				-Programs, including the kernel, often present their data
			
 
				-through a file system interface,
			
 
				-an access mechanism that is inherently portable.
			
 
				-For example, the system clock is represented by a decimal number in the file
			
 
				-<TT>/dev/time</TT>;
			
 
				-the
			
 
				-<TT>time</TT>
			
 
				-library function (there is no
			
 
				-<TT>time</TT>
			
 
				-system call) reads the file and converts it to binary.
			
 
				-Similarly, instead of encoding the state of an application
			
 
				-process in a series of flags and bits in private memory,
			
 
				-the kernel
			
 
				-presents a text string in the file named
			
 
				-<TT>status</TT>
			
 
				-in the 
			
 
				-<TT>/proc</TT>
			
 
				-file system associated with each process.
			
 
				-The Plan 9
			
 
				-<TT>ps</TT>
			
 
				-command is trivial: it prints the contents of
			
 
				-the desired status files after some minor reformatting; moreover, after
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import helix /proc
			
 
				-</PRE></TT></DL>
			
 
				-a local
			
 
				-<TT>ps</TT>
			
 
				-command reports on the status of Helix's processes.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each supported architecture has its own compilers and loader.
			
 
				-The C and Alef compilers produce intermediate files that
			
 
				-are portably encoded; the contents
			
 
				-are unique to the target architecture but the format of the
			
 
				-file is independent of compiling processor type.
			
 
				-When a compiler for a given architecture is compiled on
			
 
				-another type of processor and then used to compile a program
			
 
				-there,
			
 
				-the intermediate produced on
			
 
				-the new architecture is identical to the intermediate
			
 
				-produced on the native processor.  From the compiler's
			
 
				-point of view, every compilation is a cross-compilation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although each architecture's loader accepts only intermediate files produced
			
 
				-by compilers for that architecture,
			
 
				-such files could have been generated by a compiler executing
			
 
				-on any type of processor.
			
 
				-For instance, it is possible to run
			
 
				-the MIPS compiler on a 486, then use the MIPS loader on a
			
 
				-SPARC to produce a MIPS executable.
			
 
				-</P>
			
 
				-<P>
			
 
				-Since Plan 9 runs on a variety of architectures, even in a single installation,
			
 
				-distinguishing the compilers and intermediate names
			
 
				-simplifies multi-architecture
			
 
				-development from a single source tree.
			
 
				-The compilers and the loader for each architecture are
			
 
				-uniquely named; there is no
			
 
				-<TT>cc</TT>
			
 
				-command.
			
 
				-The names are derived by concatenating a code letter
			
 
				-associated with the target architecture with the name of the
			
 
				-compiler or loader.  For example, the letter `8' is
			
 
				-the code letter for Intel
			
 
				-<I>x</I>86
			
 
				-processors; the C compiler is named
			
 
				-<TT>8c</TT>,
			
 
				-the Alef compiler
			
 
				-<TT>8al</TT>,
			
 
				-and the loader is called
			
 
				-<TT>8l</TT>.
			
 
				-Similarly, the compiler intermediate files are suffixed
			
 
				-<TT>.8</TT>,
			
 
				-not
			
 
				-<TT>.o</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Plan 9
			
 
				-build program
			
 
				-<TT>mk</TT>,
			
 
				-a relative of
			
 
				-<TT>make</TT>,
			
 
				-reads the names of the current and target
			
 
				-architectures from environment variables called
			
 
				-<TT></TT><I>cputype</I>
			
 
				-and
			
 
				-</TT><TT></TT><TT>objtype</TT>.
			
 
				-By default the current processor is the target, but setting
			
 
				-<TT></TT><I>objtype</I>
			
 
				-to the name of another architecture
			
 
				-before invoking
			
 
				-</TT><TT>mk</TT>
			
 
				-results in a cross-build:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% objtype=sparc mk
			
 
				-</PRE></TT></DL>
			
 
				-builds a program for the SPARC architecture regardless of the executing machine.
			
 
				-The value of
			
 
				-</TT><TT></TT><TT>objtype</TT>
			
 
				-selects a
			
 
				-file of architecture-dependent variable definitions
			
 
				-that configures the build to use the appropriate compilers and loader.
			
 
				-Although simple-minded, this technique works well in practice:
			
 
				-all applications in Plan 9 are built from a single source tree
			
 
				-and it is possible to build the various architectures in parallel without conflict.
			
 
				-</center></P>
			
 
				-<H4>Parallel programming
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9's support for parallel programming has two aspects.
			
 
				-First, the kernel provides
			
 
				-a simple process model and a few carefully designed system calls for
			
 
				-synchronization and sharing.
			
 
				-Second, a new parallel programming language called Alef
			
 
				-supports concurrent programming.
			
 
				-Although it is possible to write parallel
			
 
				-programs in C, Alef is the parallel language of choice.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is a trend in new operating systems to implement two
			
 
				-classes of processes: normal UNIX-style processes and light-weight
			
 
				-kernel threads.
			
 
				-Instead, Plan 9 provides a single class of process but allows fine control of the
			
 
				-sharing of a process's resources such as memory and file descriptors.
			
 
				-A single class of process is a
			
 
				-feasible approach in Plan 9 because the kernel has an efficient system
			
 
				-call interface and cheap process creation and scheduling.
			
 
				-</P>
			
 
				-<P>
			
 
				-Parallel programs have three basic requirements:
			
 
				-management of resources shared between processes,
			
 
				-an interface to the scheduler,
			
 
				-and fine-grain process synchronization using spin locks.
			
 
				-On Plan 9,
			
 
				-new processes are created using the
			
 
				-<TT>rfork</TT>
			
 
				-system call.
			
 
				-<TT>Rfork</TT>
			
 
				-takes a single argument,
			
 
				-a bit vector that specifies
			
 
				-which of the parent process's resources should be shared,
			
 
				-copied, or created anew
			
 
				-in the child.
			
 
				-The resources controlled by
			
 
				-<TT>rfork</TT>
			
 
				-include the name space,
			
 
				-the environment,
			
 
				-the file descriptor table,
			
 
				-memory segments,
			
 
				-and notes (Plan 9's analog of UNIX signals).
			
 
				-One of the bits controls whether the
			
 
				-<TT>rfork</TT>
			
 
				-call will create a new process; if the bit is off, the resulting
			
 
				-modification to the resources occurs in the process making the call.
			
 
				-For example, a process calls
			
 
				-<TT>rfork(RFNAMEG)</TT>
			
 
				-to disconnect its name space from its parent's.
			
 
				-Alef uses a
			
 
				-fine-grained fork in which all the resources, including
			
 
				-memory, are shared between parent
			
 
				-and child, analogous to creating a kernel thread in many systems.
			
 
				-</P>
			
 
				-<P>
			
 
				-An indication that
			
 
				-<TT>rfork</TT>
			
 
				-is the right model is the variety of ways it is used.
			
 
				-Other than the canonical use in the library routine
			
 
				-<TT>fork</TT>,
			
 
				-it is hard to find two calls to
			
 
				-<TT>rfork</TT>
			
 
				-with the same bits set; programs
			
 
				-use it to create many different forms of sharing and resource allocation.
			
 
				-A system with just two types of processes&#x2014;regular processes and threads&#x2014;could
			
 
				-not handle this variety.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are two ways to share memory.
			
 
				-First, a flag to
			
 
				-<TT>rfork</TT>
			
 
				-causes all the memory segments of the parent to be shared with the child
			
 
				-(except the stack, which is
			
 
				-forked copy-on-write regardless).
			
 
				-Alternatively, a new segment of memory may be
			
 
				-attached using the
			
 
				-<TT>segattach</TT>
			
 
				-system call; such a segment
			
 
				-will always be shared between parent and child.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>rendezvous</TT>
			
 
				-system call provides a way for processes to synchronize.
			
 
				-Alef uses it to implement communication channels,
			
 
				-queuing locks,
			
 
				-multiple reader/writer locks, and
			
 
				-the sleep and wakeup mechanism.
			
 
				-<TT>Rendezvous</TT>
			
 
				-takes two arguments, a tag and a value.
			
 
				-When a process calls
			
 
				-<TT>rendezvous</TT>
			
 
				-with a tag it sleeps until another process
			
 
				-presents a matching tag.
			
 
				-When a pair of tags match, the values are exchanged
			
 
				-between the two processes and both
			
 
				-<TT>rendezvous</TT>
			
 
				-calls return.
			
 
				-This primitive is sufficient to implement the full set of synchronization routines.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, spin locks are provided by
			
 
				-an architecture-dependent library at user level.
			
 
				-Most processors provide atomic test and set instructions that
			
 
				-can be used to implement locks.
			
 
				-A notable exception is the MIPS R3000, so the SGI
			
 
				-Power series multiprocessors have special lock hardware on the bus.
			
 
				-User processes gain access to the lock hardware
			
 
				-by mapping pages of hardware locks
			
 
				-into their address space using the
			
 
				-<TT>segattach</TT>
			
 
				-system call.
			
 
				-</P>
			
 
				-<P>
			
 
				-A Plan 9 process in a system call will block regardless of its `weight'.
			
 
				-This means that when a program wishes to read from a slow
			
 
				-device without blocking the entire calculation, it must fork a process to do
			
 
				-the read for it.  The solution is to start a satellite
			
 
				-process that does the I/O and delivers the answer to the main program
			
 
				-through shared memory or perhaps a pipe.
			
 
				-This sounds onerous but works easily and efficiently in practice; in fact,
			
 
				-most interactive Plan 9 applications, even relatively ordinary ones written
			
 
				-in C, such as
			
 
				-the text editor Sam [Pike87], run as multiprocess programs.
			
 
				-</P>
			
 
				-<P>
			
 
				-The kernel support for parallel programming in Plan 9 is a few hundred lines
			
 
				-of portable code; a handful of simple primitives enable the problems to be handled
			
 
				-cleanly at user level.
			
 
				-Although the primitives work fine from C,
			
 
				-they are particularly expressive from within Alef.
			
 
				-The creation
			
 
				-and management of slave I/O processes can be written in a few lines of Alef,
			
 
				-providing the foundation for a consistent means of multiplexing
			
 
				-data flows between arbitrary processes.
			
 
				-Moreover, implementing it in a language rather than in the kernel
			
 
				-ensures consistent semantics between all devices
			
 
				-and provides a more general multiplexing primitive.
			
 
				-Compare this to the UNIX
			
 
				-<TT>select</TT>
			
 
				-system call:
			
 
				-<TT>select</TT>
			
 
				-applies only to a restricted set of devices,
			
 
				-legislates a style of multiprogramming in the kernel,
			
 
				-does not extend across networks,
			
 
				-is difficult to implement, and is hard to use.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another reason
			
 
				-parallel programming is important in Plan 9 is that
			
 
				-multi-threaded user-level file servers are the preferred way
			
 
				-to implement services.
			
 
				-Examples of such servers include the programming environment
			
 
				-Acme [Pike94],
			
 
				-the name space exporting tool
			
 
				-<TT>exportfs</TT>
			
 
				-[PPTTW93],
			
 
				-the HTTP daemon,
			
 
				-and the network name servers
			
 
				-<TT>cs</TT>
			
 
				-and
			
 
				-<TT>dns</TT>
			
 
				-[PrWi93].
			
 
				-Complex applications such as Acme prove that
			
 
				-careful operating system support can reduce the difficulty of writing
			
 
				-multi-threaded applications without moving threading and
			
 
				-synchronization primitives into the kernel.
			
 
				-</center></P>
			
 
				-<H4>Implementation of Name Spaces
			
 
				-</H4>
			
 
				-<P>
			
 
				-User processes construct name spaces using three system calls:
			
 
				-<TT>mount</TT>,
			
 
				-<TT>bind</TT>,
			
 
				-and
			
 
				-<TT>unmount</TT>.
			
 
				-The
			
 
				-<TT>mount</TT>
			
 
				-system call attaches a tree served by a file server to
			
 
				-the current name space.  Before calling
			
 
				-<TT>mount</TT>,
			
 
				-the client must (by outside means) acquire a connection to the server in
			
 
				-the form of a file descriptor that may be written and read to transmit 9P messages.
			
 
				-That file descriptor represents a pipe or network connection.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>mount</TT>
			
 
				-call attaches a new hierarchy to the existing name space.
			
 
				-The
			
 
				-<TT>bind</TT>
			
 
				-system call, on the other hand, duplicates some piece of existing name space at
			
 
				-another point in the name space.
			
 
				-The
			
 
				-<TT>unmount</TT>
			
 
				-system call allows components to be removed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Using
			
 
				-either
			
 
				-<TT>bind</TT>
			
 
				-or
			
 
				-<TT>mount</TT>,
			
 
				-multiple directories may be stacked at a single point in the name space.
			
 
				-In Plan 9 terminology, this is a
			
 
				-<I>union</I>
			
 
				-directory and behaves like the concatenation of the constituent directories.
			
 
				-A flag argument to
			
 
				-<TT>bind</TT>
			
 
				-and
			
 
				-<TT>mount</TT>
			
 
				-specifies the position of a new directory in the union,
			
 
				-permitting new elements
			
 
				-to be added either at the front or rear of the union or to replace it entirely.
			
 
				-When a file lookup is performed in a union directory, each component
			
 
				-of the union is searched in turn and the first match taken; likewise,
			
 
				-when a union directory is read, the contents of each of the component directories
			
 
				-is read in turn.
			
 
				-Union directories are one of the most widely used organizational features
			
 
				-of the Plan 9 name space.
			
 
				-For instance, the directory
			
 
				-<TT>/bin</TT>
			
 
				-is built as a union of
			
 
				-<TT>/</TT><I>cputype/bin</I>
			
 
				-(program binaries),
			
 
				-</TT><TT>/rc/bin</TT>
			
 
				-(shell scripts),
			
 
				-and perhaps more directories provided by the user.
			
 
				-This construction makes the shell
			
 
				-</TT><TT></TT><TT>PATH</TT>
			
 
				-variable unnecessary.
			
 
				-</P>
			
 
				-<P>
			
 
				-One question raised by union directories
			
 
				-is which element of the union receives a newly created file.
			
 
				-After several designs, we decided on the following.
			
 
				-By default, directories in unions do not accept new files, although the
			
 
				-<TT>create</TT>
			
 
				-system call applied to an existing file succeeds normally.
			
 
				-When a directory is added to the union, a flag to
			
 
				-<TT>bind</TT>
			
 
				-or
			
 
				-<TT>mount</TT>
			
 
				-enables create permission (a property of the name space) in that directory.
			
 
				-When a file is being created with a new name in a union, it is created in the
			
 
				-first directory of the union with create permission; if that creation fails,
			
 
				-the entire
			
 
				-<TT>create</TT>
			
 
				-fails.
			
 
				-This scheme enables the common use of placing a private directory anywhere
			
 
				-in a union of public ones,
			
 
				-while allowing creation only in the private directory.
			
 
				-</P>
			
 
				-<P>
			
 
				-By convention, kernel device file systems
			
 
				-are bound into the
			
 
				-<TT>/dev</TT>
			
 
				-directory, but to bootstrap the name space building process it is
			
 
				-necessary to have a notation that permits
			
 
				-direct access to the devices without an existing name space.
			
 
				-The root directory
			
 
				-of the tree served by a device driver can be accessed using the syntax
			
 
				-<TT>#</TT><I>c</I>,
			
 
				-where
			
 
				-<I>c</I>
			
 
				-is a unique character (typically a letter) identifying the
			
 
				-<I>type</I>
			
 
				-of the device.
			
 
				-Simple device drivers serve a single level directory containing a few files.
			
 
				-As an example,
			
 
				-each serial port is represented by a data and a control file:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% bind -a '#t' /dev
			
 
				-% cd /dev
			
 
				-% ls -l eia*
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1ctl
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2ctl
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>bind</TT>
			
 
				-program is an encapsulation of the
			
 
				-<TT>bind</TT>
			
 
				-system call; its
			
 
				-<TT>-a</TT>
			
 
				-flag positions the new directory at the end of the union.
			
 
				-The data files
			
 
				-<TT>eia1</TT>
			
 
				-and
			
 
				-<TT>eia2</TT>
			
 
				-may be read and written to communicate over the serial line.
			
 
				-Instead of using special operations on these files to control the devices,
			
 
				-commands written to the files
			
 
				-<TT>eia1ctl</TT>
			
 
				-and
			
 
				-<TT>eia2ctl</TT>
			
 
				-control the corresponding device;
			
 
				-for example,
			
 
				-writing the text string
			
 
				-<TT>b1200</TT>
			
 
				-to
			
 
				-<TT>/dev/eia1ctl</TT>
			
 
				-sets the speed of that line to 1200 baud.
			
 
				-Compare this to the UNIX
			
 
				-<TT>ioctl</TT>
			
 
				-system call: in Plan 9, devices are controlled by textual messages,
			
 
				-free of byte order problems, with clear semantics for reading and writing.
			
 
				-It is common to configure or debug devices using shell scripts.
			
 
				-</P>
			
 
				-<P>
			
 
				-It is the universal use of the 9P protocol that
			
 
				-connects Plan 9's components together to form a
			
 
				-distributed system.
			
 
				-Rather than inventing a unique protocol for each
			
 
				-service such as
			
 
				-<TT>rlogin</TT>,
			
 
				-FTP, TFTP, and X windows,
			
 
				-Plan 9 implements services
			
 
				-in terms of operations on file objects,
			
 
				-and then uses a single, well-documented protocol to exchange information between
			
 
				-computers.
			
 
				-Unlike NFS, 9P treats files as a sequence of bytes rather than blocks.
			
 
				-Also unlike NFS, 9P is stateful: clients perform
			
 
				-remote procedure calls to establish pointers to objects in the remote
			
 
				-file server.
			
 
				-These pointers are called file identifiers or
			
 
				-<I>fids</I>.
			
 
				-All operations on files supply a fid to identify an object in the remote file system.
			
 
				-</P>
			
 
				-<P>
			
 
				-The 9P protocol defines 17 messages, providing
			
 
				-means to authenticate users, navigate fids around
			
 
				-a file system hierarchy, copy fids, perform I/O, change file attributes, 
			
 
				-and create and delete files.
			
 
				-Its complete specification is in Section 5 of the Programmer's Manual [9man].
			
 
				-Here is the procedure to gain access to the name hierarchy supplied by a server.
			
 
				-A file server connection is established via a pipe or network connection.
			
 
				-An initial
			
 
				-<TT>session</TT>
			
 
				-message performs a bilateral authentication between client and server.
			
 
				-An
			
 
				-<TT>attach</TT>
			
 
				-message then connects a fid suggested by the client to the root of the server file
			
 
				-tree.
			
 
				-The
			
 
				-<TT>attach</TT>
			
 
				-message includes the identity of the user performing the attach; henceforth all
			
 
				-fids derived from the root fid will have permissions associated with
			
 
				-that user.
			
 
				-Multiple users may share the connection, but each must perform an attach to
			
 
				-establish his or her identity.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>walk</TT>
			
 
				-message moves a fid through a single level of the file system hierarchy.
			
 
				-The
			
 
				-<TT>clone</TT>
			
 
				-message takes an established fid and produces a copy that points
			
 
				-to the same file as the original.
			
 
				-Its purpose is to enable walking to a file in a directory without losing the fid
			
 
				-on the directory.
			
 
				-The
			
 
				-<TT>open</TT>
			
 
				-message locks a fid to a specific file in the hierarchy,
			
 
				-checks access permissions,
			
 
				-and prepares the fid
			
 
				-for I/O.
			
 
				-The
			
 
				-<TT>read</TT>
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-messages allow I/O at arbitrary offsets in the file;
			
 
				-the maximum size transferred is defined by the protocol.
			
 
				-The
			
 
				-<TT>clunk</TT>
			
 
				-message indicates the client has no further use for a fid.
			
 
				-The
			
 
				-<TT>remove</TT>
			
 
				-message behaves like
			
 
				-<TT>clunk</TT>
			
 
				-but causes the file associated with the fid to be removed and any associated
			
 
				-resources on the server to be deallocated.
			
 
				-</P>
			
 
				-<P>
			
 
				-9P has two forms: RPC messages sent on a pipe or network connection and a procedural
			
 
				-interface within the kernel.
			
 
				-Since kernel device drivers are directly addressable,
			
 
				-there is no need to pass messages to
			
 
				-communicate with them;
			
 
				-instead each 9P transaction is implemented by a direct procedure call.
			
 
				-For each fid,
			
 
				-the kernel maintains a local representation in a data structure called a
			
 
				-<I>channel</I>,
			
 
				-so all operations on files performed by the kernel involve a channel connected
			
 
				-to that fid.
			
 
				-The simplest example is a user process's file descriptors, which are
			
 
				-indexes into an array of channels.
			
 
				-A table in the kernel provides a list
			
 
				-of entry points corresponding one to one with the 9P messages for each device.
			
 
				-A system call such as
			
 
				-<TT>read</TT>
			
 
				-from the user translates into one or more procedure calls
			
 
				-through that table, indexed by the type character stored in the channel:
			
 
				-<TT>procread</TT>,
			
 
				-<TT>eiaread</TT>,
			
 
				-etc.
			
 
				-Each call takes at least
			
 
				-one channel as an argument.
			
 
				-A special kernel driver, called the
			
 
				-<I>mount</I>
			
 
				-driver, translates procedure calls to messages, that is,
			
 
				-it converts local procedure calls to remote ones.
			
 
				-In effect, this special driver
			
 
				-becomes a local proxy for the files served by a remote file server.
			
 
				-The channel pointer in the local call is translated to the associated fid
			
 
				-in the transmitted message.
			
 
				-</P>
			
 
				-<P>
			
 
				-The mount driver is the sole RPC mechanism employed by the system.
			
 
				-The semantics of the supplied files, rather than the operations performed upon
			
 
				-them, create a particular service such as the
			
 
				-<TT>cpu</TT>
			
 
				-command.
			
 
				-The mount driver demultiplexes protocol
			
 
				-messages between clients sharing a communication channel
			
 
				-with a file server.
			
 
				-For each outgoing RPC message,
			
 
				-the mount driver allocates a buffer labeled by a small unique integer,
			
 
				-called a
			
 
				-<I>tag</I>.
			
 
				-The reply to the RPC is labeled with the same tag, which is used by
			
 
				-the mount driver to match the reply with the request.
			
 
				-</P>
			
 
				-<P>
			
 
				-The kernel representation of the name space
			
 
				-is called the
			
 
				-<I>mount table</I>,
			
 
				-which stores a list of bindings between channels.
			
 
				-Each entry in the mount table contains a pair of channels: a
			
 
				-<I>from</I>
			
 
				-channel and a
			
 
				-<I>to</I>
			
 
				-channel.
			
 
				-Every time a walk succeeds in moving a channel to a new location in the name space,
			
 
				-the mount table is consulted to see if a `from' channel matches the new name; if
			
 
				-so the `to' channel is cloned and substituted for the original.
			
 
				-Union directories are implemented by converting the `to'
			
 
				-channel into a list of channels: 
			
 
				-a successful walk to a union directory returns a `to' channel that forms
			
 
				-the head of
			
 
				-a list of channels, each representing a component directory
			
 
				-of the union.
			
 
				-If a walk
			
 
				-fails to find a file in the first directory of the union, the list is followed,
			
 
				-the next component cloned, and walk tried on that directory.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each file in Plan 9 is uniquely identified by a set of integers:
			
 
				-the type of the channel (used as the index of the function call table),
			
 
				-the server or device number
			
 
				-distinguishing the server from others of the same type (decided locally by the driver),
			
 
				-and a
			
 
				-<I>qid</I>
			
 
				-formed from two 32-bit numbers called
			
 
				-<I>path</I>
			
 
				-and
			
 
				-<I>version</I>.
			
 
				-The path is a unique file number assigned by a device driver or
			
 
				-file server when a file is created.
			
 
				-The version number is updated whenever
			
 
				-the file is modified; as described in the next section,
			
 
				-it can be used to maintain cache coherency between
			
 
				-clients and servers.
			
 
				-</P>
			
 
				-<P>
			
 
				-The type and device number are analogous to UNIX major and minor
			
 
				-device numbers;
			
 
				-the qid is analogous to the i-number.
			
 
				-The device and type
			
 
				-connect the channel to a device driver and the qid
			
 
				-identifies the file within that device. 
			
 
				-If the file recovered from a walk has the same type, device, and qid path
			
 
				-as an entry in the mount table, they are the same file and the
			
 
				-corresponding substitution from the mount table is made.
			
 
				-This is how the name space is implemented.
			
 
				-</center></P>
			
 
				-<H4>File Caching
			
 
				-</H4>
			
 
				-<P>
			
 
				-The 9P protocol has no explicit support for caching files on a client.
			
 
				-The large memory of the central file server acts as a shared cache for all its clients,
			
 
				-which reduces the total amount of memory needed across all machines in the network.
			
 
				-Nonetheless, there are sound reasons to cache files on the client, such as a slow
			
 
				-connection to the file server.
			
 
				-</P>
			
 
				-<P>
			
 
				-The version field of the qid is changed whenever the file is modified,
			
 
				-which makes it possible to do some weakly coherent forms of caching.
			
 
				-The most important is client caching of text and data segments of executable files.
			
 
				-When a process
			
 
				-<TT>execs</TT>
			
 
				-a program, the file is re-opened and the qid's version is compared with that in the cache;
			
 
				-if they match, the local copy is used.
			
 
				-The same method can be used to build a local caching file server.
			
 
				-This user-level server interposes on the 9P connection to the remote server and
			
 
				-monitors the traffic, copying data to a local disk.
			
 
				-When it sees a read of known data, it answers directly,
			
 
				-while writes are passed on immediately&#x2014;the cache is write-through&#x2014;to keep
			
 
				-the central copy up to date.
			
 
				-This is transparent to processes on the terminal and requires no change to 9P;
			
 
				-it works well on home machines connected over serial lines.
			
 
				-A similar method can be applied to build a general client cache in unused local
			
 
				-memory, but this has not been done in Plan 9.
			
 
				-</center></P>
			
 
				-<H4>Networks and Communication Devices
			
 
				-</H4>
			
 
				-<P>
			
 
				-Network interfaces are kernel-resident file systems, analogous to the EIA device
			
 
				-described earlier.
			
 
				-Call setup and shutdown are achieved by writing text strings to the control file
			
 
				-associated with the device;
			
 
				-information is sent and received by reading and writing the data file.
			
 
				-The structure and semantics of the devices is common to all networks so,
			
 
				-other than a file name substitution,
			
 
				-the same procedure makes a call using TCP over Ethernet as URP over Datakit [Fra80].
			
 
				-</P>
			
 
				-<P>
			
 
				-This example illustrates the structure of the TCP device:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% ls -lp /net/tcp
			
 
				-d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 0
			
 
				-d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 1
			
 
				---rw-rw-rw- I 0 bootes bootes 0 Feb 23 20:20 clone
			
 
				-% ls -lp /net/tcp/0
			
 
				---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 ctl
			
 
				---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 data
			
 
				---rw-rw---- I 0 rob    bootes 0 Feb 23 20:20 listen
			
 
				---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 local
			
 
				---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 remote
			
 
				---r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 status
			
 
				-%
			
 
				-</PRE></TT></DL>
			
 
				-The top directory,
			
 
				-<TT>/net/tcp</TT>,
			
 
				-contains a
			
 
				-<TT>clone</TT>
			
 
				-file and a directory for each connection, numbered
			
 
				-<TT>0</TT>
			
 
				-to
			
 
				-<I>n</I>.
			
 
				-Each connection directory corresponds to an TCP/IP connection.
			
 
				-Opening
			
 
				-<TT>clone</TT>
			
 
				-reserves an unused connection and returns its control file.
			
 
				-Reading the control file returns the textual connection number, so the user
			
 
				-process can construct the full name of the newly allocated
			
 
				-connection directory.
			
 
				-The
			
 
				-<TT>local</TT>,
			
 
				-<TT>remote</TT>,
			
 
				-and
			
 
				-<TT>status</TT>
			
 
				-files are diagnostic; for example,
			
 
				-<TT>remote</TT>
			
 
				-contains the address (for TCP, the IP address and port number) of the remote side.
			
 
				-</P>
			
 
				-<P>
			
 
				-A call is initiated by writing a connect message with a network-specific address as
			
 
				-its argument; for example, to open a Telnet session (port 23) to a remote machine
			
 
				-with IP address 135.104.9.52,
			
 
				-the string is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-connect 135.104.9.52!23
			
 
				-</PRE></TT></DL>
			
 
				-The write to the control file blocks until the connection is established;
			
 
				-if the destination is unreachable, the write returns an error.
			
 
				-Once the connection is established, the
			
 
				-<TT>telnet</TT>
			
 
				-application reads and writes the
			
 
				-<TT>data</TT>
			
 
				-file
			
 
				-to talk to the remote Telnet daemon.
			
 
				-On the other end, the Telnet daemon would start by writing
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-announce 23
			
 
				-</PRE></TT></DL>
			
 
				-to its control file to indicate its willingness to receive calls to this port.
			
 
				-Such a daemon is called a
			
 
				-<I>listener</I>
			
 
				-in Plan 9.
			
 
				-</P>
			
 
				-<P>
			
 
				-A uniform structure for network devices cannot hide all the details
			
 
				-of addressing and communication for dissimilar networks.
			
 
				-For example, Datakit uses textual, hierarchical addresses unlike IP's 32-bit addresses, so
			
 
				-an application given a control file must still know what network it represents.
			
 
				-Rather than make every application know the addressing of every network,
			
 
				-Plan 9 hides these details in a
			
 
				-<I>connection</I>
			
 
				-<I>server</I>,
			
 
				-called
			
 
				-<TT>cs</TT>.
			
 
				-<TT>Cs</TT>
			
 
				-is a file system mounted in a known place.
			
 
				-It supplies a single control file that an application uses to discover how to connect
			
 
				-to a host.
			
 
				-The application writes the symbolic address and service name for
			
 
				-the connection it wishes to make,
			
 
				-and reads back the name of the
			
 
				-<TT>clone</TT>
			
 
				-file to open and the address to present to it.
			
 
				-If there are multiple networks between the machines,
			
 
				-<TT>cs</TT>
			
 
				-presents a list of possible networks and addresses to be tried in sequence;
			
 
				-it uses heuristics to decide the order.
			
 
				-For instance, it presents the highest-bandwidth choice first.
			
 
				-</P>
			
 
				-<P>
			
 
				-A single library function called
			
 
				-<TT>dial</TT>
			
 
				-talks to
			
 
				-<TT>cs</TT>
			
 
				-to establish the connection.
			
 
				-An application that uses
			
 
				-<TT>dial</TT>
			
 
				-needs no changes, not even recompilation, to adapt to new networks;
			
 
				-the interface to
			
 
				-<TT>cs</TT>
			
 
				-hides the details.
			
 
				-</P>
			
 
				-<P>
			
 
				-The uniform structure for networks in Plan 9 makes the
			
 
				-<TT>import</TT>
			
 
				-command all that is needed to construct gateways.
			
 
				-</center></P>
			
 
				-<H4>Kernel structure for networks
			
 
				-</H4>
			
 
				-<P>
			
 
				-The kernel plumbing used to build Plan 9 communications
			
 
				-channels is called
			
 
				-<I>streams</I>
			
 
				-[Rit84][Presotto].
			
 
				-A stream is a bidirectional channel connecting a
			
 
				-physical or pseudo-device to a user process.
			
 
				-The user process inserts and removes data at one end of the stream;
			
 
				-a kernel process acting on behalf of a device operates at
			
 
				-the other end.
			
 
				-A stream comprises a linear list of
			
 
				-<I>processing modules</I>.
			
 
				-Each module has both an upstream (toward the process) and
			
 
				-downstream (toward the device)
			
 
				-<I>put routine</I>.
			
 
				-Calling the put routine of the module on either end of the stream
			
 
				-inserts data into the stream.
			
 
				-Each module calls the succeeding one to send data up or down the stream.
			
 
				-Like UNIX streams [Rit84],
			
 
				-Plan 9 streams can be dynamically configured.
			
 
				-</center></P>
			
 
				-<H4>The IL Protocol
			
 
				-</H4>
			
 
				-<P>
			
 
				-The 9P protocol must run above a reliable transport protocol with delimited messages.
			
 
				-9P has no mechanism to recover from transmission errors and
			
 
				-the system assumes that each read from a communication channel will
			
 
				-return a single 9P message;
			
 
				-it does not parse the data stream to discover message boundaries.
			
 
				-Pipes and some network protocols already have these properties but
			
 
				-the standard IP protocols do not.
			
 
				-TCP does not delimit messages, while
			
 
				-UDP [RFC768] does not provide reliable in-order delivery.
			
 
				-</P>
			
 
				-<P>
			
 
				-We designed a new protocol, called IL (Internet Link), to transmit 9P messages over IP.
			
 
				-It is a connection-based protocol that provides
			
 
				-reliable transmission of sequenced messages between machines.
			
 
				-Since a process can have only a single outstanding 9P request,
			
 
				-there is no need for flow control in IL.
			
 
				-Like TCP, IL has adaptive timeouts: it scales acknowledge and retransmission times
			
 
				-to match the network speed.
			
 
				-This allows the protocol to perform well on both the Internet and on local Ethernets.
			
 
				-Also, IL does no blind retransmission,
			
 
				-to avoid adding to the congestion of busy networks.
			
 
				-Full details are in another paper [PrWi95].
			
 
				-</P>
			
 
				-<P>
			
 
				-In Plan 9, the implementation of IL is smaller and faster than TCP.
			
 
				-IL is our main Internet transport protocol.
			
 
				-</center></P>
			
 
				-<H4>Overview of authentication
			
 
				-</H4>
			
 
				-<P>
			
 
				-Authentication establishes the identity of a
			
 
				-user accessing a resource.
			
 
				-The user requesting the resource is called the
			
 
				-<I>client</I>
			
 
				-and the user granting access to the resource is called the
			
 
				-<I>server</I>.
			
 
				-This is usually done under the auspices of a 9P attach message.
			
 
				-A user may be a client in one authentication exchange and a server in another.
			
 
				-Servers always act on behalf of some user,
			
 
				-either a normal client or some administrative entity, so authentication
			
 
				-is defined to be between users, not machines.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each Plan 9 user has an associated DES [NBS77] authentication key;
			
 
				-the user's identity is verified by the ability to
			
 
				-encrypt and decrypt special messages called challenges.
			
 
				-Since knowledge of a user's key gives access to that user's resources,
			
 
				-the Plan 9 authentication protocols never transmit a message containing
			
 
				-a cleartext key.
			
 
				-</P>
			
 
				-<P>
			
 
				-Authentication is bilateral:
			
 
				-at the end of the authentication exchange,
			
 
				-each side is convinced of the other's identity.
			
 
				-Every machine begins the exchange with a DES key in memory.
			
 
				-In the case of CPU and file servers, the key, user name, and domain name
			
 
				-for the server are read from permanent storage,
			
 
				-usually non-volatile RAM.
			
 
				-In the case of terminals,
			
 
				-the key is derived from a password typed by the user at boot time.
			
 
				-A special machine, known as the
			
 
				-<I>authentication</I>
			
 
				-<I>server</I>,
			
 
				-maintains a database of keys for all users in its administrative domain and
			
 
				-participates in the authentication protocols.
			
 
				-</P>
			
 
				-<P>
			
 
				-The authentication protocol is as follows:
			
 
				-after exchanging challenges, one party
			
 
				-contacts the authentication server to create
			
 
				-permission-granting
			
 
				-<I>tickets</I>
			
 
				-encrypted with
			
 
				-each party's secret key and containing a new conversation key.
			
 
				-Each
			
 
				-party decrypts its own ticket and uses the conversation key to
			
 
				-encrypt the other party's challenge.
			
 
				-</P>
			
 
				-<P>
			
 
				-This structure is somewhat like Kerberos [MBSS87], but avoids
			
 
				-its reliance on synchronized clocks.
			
 
				-Also
			
 
				-unlike Kerberos, Plan 9 authentication supports a `speaks for'
			
 
				-relation [LABW91] that enables one user to have the authority
			
 
				-of another;
			
 
				-this is how a CPU server runs processes on behalf of its clients.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9's authentication structure builds
			
 
				-secure services rather than depending on firewalls.
			
 
				-Whereas firewalls require special code for every service penetrating the wall,
			
 
				-the Plan 9 approach permits authentication to be done in a single place&#x2014;9P&#x2014;for
			
 
				-all services.
			
 
				-For example, the
			
 
				-<TT>cpu</TT>
			
 
				-command works securely across the Internet.
			
 
				-</center></P>
			
 
				-<H4>Authenticating external connections
			
 
				-</H4>
			
 
				-<P>
			
 
				-The regular Plan 9 authentication protocol is not suitable for text-based services such as
			
 
				-Telnet
			
 
				-or FTP.
			
 
				-In such cases, Plan 9 users authenticate with hand-held DES calculators called
			
 
				-<I>authenticators</I>.
			
 
				-The authenticator holds a key for the user, distinct from
			
 
				-the user's normal authentication key.
			
 
				-The user `logs on' to the authenticator using a 4-digit PIN.
			
 
				-A correct PIN enables the authenticator for a challenge/response exchange with the server.
			
 
				-Since a correct challenge/response exchange is valid only once
			
 
				-and keys are never sent over the network,
			
 
				-this procedure is not susceptible to replay attacks, yet
			
 
				-is compatible with protocols like Telnet and FTP.
			
 
				-</center></P>
			
 
				-<H4>Special users
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 has no super-user.
			
 
				-Each server is responsible for maintaining its own security, usually permitting
			
 
				-access only from the console, which is protected by a password.
			
 
				-For example, file servers have a unique administrative user called
			
 
				-<TT>adm</TT>,
			
 
				-with special privileges that apply only to commands typed at the server's
			
 
				-physical console.
			
 
				-These privileges concern the day-to-day maintenance of the server,
			
 
				-such as adding new users and configuring disks and networks.
			
 
				-The privileges do
			
 
				-<I>not</I>
			
 
				-include the ability to modify, examine, or change the permissions of any files.
			
 
				-If a file is read-protected by a user, only that user may grant access to others.
			
 
				-</P>
			
 
				-<P>
			
 
				-CPU servers have an equivalent user name that allows administrative access to
			
 
				-resources on that server such as the control files of user processes.
			
 
				-Such permission is necessary, for example, to kill rogue processes, but
			
 
				-does not extend beyond that server.
			
 
				-On the other hand, by means of a key
			
 
				-held in protected non-volatile RAM,
			
 
				-the identity of the administrative user is proven to the
			
 
				-authentication server.
			
 
				-This allows the CPU server to authenticate remote users, both
			
 
				-for access to the server itself and when the CPU server is acting
			
 
				-as a proxy on their behalf.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, a special user called
			
 
				-<TT>none</TT>
			
 
				-has no password and is always allowed to connect;
			
 
				-anyone may claim to be
			
 
				-<TT>none</TT>.
			
 
				-<TT>None</TT>
			
 
				-has restricted permissions; for example, it is not allowed to examine dump files
			
 
				-and can read only world-readable files.
			
 
				-</P>
			
 
				-<P>
			
 
				-The idea behind
			
 
				-<TT>none</TT>
			
 
				-is analogous to the anonymous user in FTP
			
 
				-services.
			
 
				-On Plan 9, guest FTP servers are further confined within a special
			
 
				-restricted name space.
			
 
				-It disconnects guest users from system programs, such as the contents of
			
 
				-<TT>/bin</TT>,
			
 
				-but makes it possible to make local files available to guests
			
 
				-by binding them explicitly into the space.
			
 
				-A restricted name space is more secure than the usual technique of exporting
			
 
				-an ad hoc directory tree; the result is a kind of cage around untrusted users.
			
 
				-</center></P>
			
 
				-<H4>The cpu command and proxied authentication
			
 
				-</H4>
			
 
				-<P>
			
 
				-When a call is made to a CPU server for a user, say Peter,
			
 
				-the intent is that Peter wishes to run processes with his own authority.
			
 
				-To implement this property,
			
 
				-the CPU server does the following when the call is received.
			
 
				-First, the listener forks off a process to handle the call.
			
 
				-This process changes to the user
			
 
				-<TT>none</TT>
			
 
				-to avoid giving away permissions if it is compromised.
			
 
				-It then performs the authentication protocol to verify that the
			
 
				-calling user really is Peter, and to prove to Peter that
			
 
				-the machine is itself trustworthy.
			
 
				-Finally, it reattaches to all relevant file servers using the
			
 
				-authentication protocol to identify itself as Peter.
			
 
				-In this case, the CPU server is a client of the file server and performs the
			
 
				-client portion of the authentication exchange on behalf of Peter.
			
 
				-The authentication server will give the process tickets to 
			
 
				-accomplish this only if the CPU server's administrative user name is allowed to
			
 
				-<I>speak for</I>
			
 
				-Peter.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<I>speaks for</I>
			
 
				-relation [LABW91] is kept in a table on the authentication server.
			
 
				-To simplify the management of users computing in different authentication domains,
			
 
				-it also contains mappings between user names in different domains,
			
 
				-for example saying that user
			
 
				-<TT>rtm</TT>
			
 
				-in one domain is the same person as user
			
 
				-<TT>rtmorris</TT>
			
 
				-in another.
			
 
				-</center></P>
			
 
				-<H4>File Permissions
			
 
				-</H4>
			
 
				-<P>
			
 
				-One of the advantages of constructing services as file systems
			
 
				-is that the solutions to ownership and permission problems fall out naturally.
			
 
				-As in UNIX,
			
 
				-each file or directory has separate read, write, and execute/search permissions
			
 
				-for the file's owner, the file's group, and anyone else.
			
 
				-The idea of group is unusual:
			
 
				-any user name is potentially a group name.
			
 
				-A group is just a user with a list of other users in the group.
			
 
				-Conventions make the distinction: most people have user names without group members,
			
 
				-while groups have long lists of attached names.  For example, the
			
 
				-<TT>sys</TT>
			
 
				-group traditionally has all the system programmers,
			
 
				-and system files are accessible
			
 
				-by group
			
 
				-<TT>sys</TT>.
			
 
				-Consider the following two lines of a user database stored on a server:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pjw:pjw:
			
 
				-sys::pjw,ken,philw,presotto
			
 
				-</PRE></TT></DL>
			
 
				-The first establishes user
			
 
				-<TT>pjw</TT>
			
 
				-as a regular user.  The second establishes user
			
 
				-<TT>sys</TT>
			
 
				-as a group and lists four users who are
			
 
				-<I>members</I>
			
 
				-of that group.
			
 
				-The empty colon-separated field is space for a user to be named as the
			
 
				-<I>group</I>
			
 
				-<I>leader</I>.
			
 
				-If a group has a leader, that user has special permissions for the group,
			
 
				-such as freedom to change the group permissions
			
 
				-of files in that group.
			
 
				-If no leader is specified, each member of the group is considered equal, as if each were
			
 
				-the leader.
			
 
				-In our example, only
			
 
				-<TT>pjw</TT>
			
 
				-can add members to his group, but all of
			
 
				-<TT>sys</TT>'s
			
 
				-members are equal partners in that group.
			
 
				-</P>
			
 
				-<P>
			
 
				-Regular files are owned by the user that creates them.
			
 
				-The group name is inherited from the directory holding the new file.
			
 
				-Device files are treated specially:
			
 
				-the kernel may arrange the ownership and permissions of
			
 
				-a file appropriate to the user accessing the file.
			
 
				-</P>
			
 
				-<P>
			
 
				-A good example of the generality this offers is process files,
			
 
				-which are owned and read-protected by the owner of the process.
			
 
				-If the owner wants to let someone else access the memory of a process,
			
 
				-for example to let the author of a program debug a broken image, the standard
			
 
				-<TT>chmod</TT>
			
 
				-command applied to the process files does the job.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another unusual application of file permissions
			
 
				-is the dump file system, which is not only served by the same file
			
 
				-server as the original data, but represented by the same user database.
			
 
				-Files in the dump are therefore given identical protection as files in the regular
			
 
				-file system;
			
 
				-if a file is owned by
			
 
				-<TT>pjw</TT>
			
 
				-and read-protected, once it is in the dump file system it is still owned by
			
 
				-<TT>pjw</TT>
			
 
				-and read-protected.
			
 
				-Also, since the dump file system is immutable, the file cannot be changed;
			
 
				-it is read-protected forever.
			
 
				-Drawbacks are that if the file is readable but should have been read-protected,
			
 
				-it is readable forever, and that user names are hard to re-use.
			
 
				-</center></P>
			
 
				-<H4>Performance
			
 
				-</H4>
			
 
				-<P>
			
 
				-As a simple measure of the performance of the Plan 9 kernel,
			
 
				-we compared the
			
 
				-time to do some simple operations on Plan 9 and on SGI's IRIX Release 5.3
			
 
				-running on an SGI Challenge M with a 100MHz MIPS R4400 and a 1-megabyte
			
 
				-secondary cache.
			
 
				-The test program was written in Alef,
			
 
				-compiled with the same compiler,
			
 
				-and run on identical hardware,
			
 
				-so the only variables are the operating system and libraries.
			
 
				-</P>
			
 
				-<P>
			
 
				-The program tests the time to do a context switch
			
 
				-(<TT>rendezvous</TT>
			
 
				-on Plan 9,
			
 
				-<TT>blockproc</TT>
			
 
				-on IRIX);
			
 
				-a trivial system call
			
 
				-(<TT>rfork(0)</TT>
			
 
				-and
			
 
				-<TT>nap(0)</TT>);
			
 
				-and
			
 
				-lightweight fork
			
 
				-(<TT>rfork(RFPROC)</TT>
			
 
				-and
			
 
				-<TT>sproc(PR_SFDS|PR_SADDR)</TT>).
			
 
				-It also measures the time to send a byte on a pipe from one process
			
 
				-to another and the throughput on a pipe between two processes.
			
 
				-The results appear in Table 1.
			
 
				-<br><img src="-.1.gif"><br>
			
 
				-Table 1.  Performance comparison.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-Although the Plan 9 times are not spectacular, they show that the kernel is
			
 
				-competitive with commercial systems.
			
 
				-</center><H4>Discussion
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 has a relatively conventional kernel;
			
 
				-the system's novelty lies in the pieces outside the kernel and the way they interact.
			
 
				-When building Plan 9, we considered all aspects
			
 
				-of the system together, solving problems where the solution fit best.
			
 
				-Sometimes the solution spanned many components.
			
 
				-An example is the problem of heterogeneous instruction architectures,
			
 
				-which is addressed by the compilers (different code characters, portable
			
 
				-object code),
			
 
				-the environment
			
 
				-(<TT></TT><I>cputype</I>
			
 
				-and
			
 
				-</TT><TT></TT><TT>objtype</TT>),
			
 
				-the name space
			
 
				-(binding in
			
 
				-<TT>/bin</TT>),
			
 
				-and other components.
			
 
				-Sometimes many issues could be solved in a single place.
			
 
				-The best example is 9P,
			
 
				-which centralizes naming, access, and authentication.
			
 
				-9P is really the core
			
 
				-of the system;
			
 
				-it is fair to say that the Plan 9 kernel is primarily a 9P multiplexer.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9's focus on files and naming is central to its expressiveness.
			
 
				-Particularly in distributed computing, the way things are named has profound
			
 
				-influence on the system [Nee89].
			
 
				-The combination of
			
 
				-local name spaces and global conventions to interconnect networked resources
			
 
				-avoids the difficulty of maintaining a global uniform name space,
			
 
				-while naming everything like a file makes the system easy to understand, even for
			
 
				-novices.
			
 
				-Consider the dump file system, which is trivial to use for anyone familiar with
			
 
				-hierarchical file systems.
			
 
				-At a deeper level, building all the resources above a single uniform interface
			
 
				-makes interoperability easy.
			
 
				-Once a resource exports a 9P interface,
			
 
				-it can combine transparently
			
 
				-with any other part of the system to build unusual applications;
			
 
				-the details are hidden.
			
 
				-This may sound object-oriented, but there are distinctions.
			
 
				-First, 9P defines a fixed set of `methods'; it is not an extensible protocol.
			
 
				-More important,
			
 
				-files are well-defined and well-understood
			
 
				-and come prepackaged with familiar methods of access, protection, naming, and
			
 
				-networking.
			
 
				-Objects, despite their generality, do not come with these attributes defined.
			
 
				-By reducing `object' to `file', Plan 9 gets some technology for free.
			
 
				-</P>
			
 
				-<P>
			
 
				-Nonetheless, it is possible to push the idea of file-based computing too far.
			
 
				-Converting every resource in the system into a file system is a kind of metaphor,
			
 
				-and metaphors can be abused.
			
 
				-A good example of restraint is
			
 
				-<TT>/proc</TT>,
			
 
				-which is only a view of a process, not a representation.
			
 
				-To run processes, the usual
			
 
				-<TT>fork</TT>
			
 
				-and
			
 
				-<TT>exec</TT>
			
 
				-calls are still necessary, rather than doing something like
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cp /bin/date /proc/clone/mem
			
 
				-</PRE></TT></DL>
			
 
				-The problem with such examples is that they require the server to do things
			
 
				-not under its control.
			
 
				-The ability to assign meaning to a command like this does not
			
 
				-imply the meaning will fall naturally out of the structure of answering the 9P requests
			
 
				-it generates.
			
 
				-As a related example, Plan 9 does not put machine's network names in the file
			
 
				-name space.
			
 
				-The network interfaces provide a very different model of naming, because using
			
 
				-<TT>open</TT>,
			
 
				-<TT>create</TT>,
			
 
				-<TT>read</TT>,
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-on such files would not offer a suitable place to encode all the details of call
			
 
				-setup for an arbitrary network.
			
 
				-This does not mean that the network interface cannot be file-like, just that it must
			
 
				-have a more tightly defined structure.
			
 
				-</P>
			
 
				-<P>
			
 
				-What would we do differently next time?
			
 
				-Some elements of the implementation are unsatisfactory.
			
 
				-Using streams to implement network interfaces in the kernel
			
 
				-allows protocols to be connected together dynamically,
			
 
				-such as to attach the same TTY driver to TCP, URP, and
			
 
				-IL connections,
			
 
				-but Plan 9 makes no use of this configurability.
			
 
				-(It was exploited, however, in the research UNIX system for which
			
 
				-streams were invented.)
			
 
				-Replacing streams by static I/O queues would
			
 
				-simplify the code and make it faster.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although the main Plan 9 kernel is portable across many machines,
			
 
				-the file server is implemented separately.
			
 
				-This has caused several problems:
			
 
				-drivers that must be written twice,
			
 
				-bugs that must be fixed twice,
			
 
				-and weaker portability of the file system code.
			
 
				-The solution is easy: the file server kernel should be maintained
			
 
				-as a variant of the regular operating system, with no user processes and
			
 
				-special compiled-in
			
 
				-kernel processes to implement file service.
			
 
				-Another improvement to the file system would be a change of internal structure.
			
 
				-The WORM jukebox is the least reliable piece of the hardware, but because
			
 
				-it holds the metadata of the file system, it must be present in order to serve files.
			
 
				-The system could be restructured so the WORM is a backup device only, with the
			
 
				-file system proper residing on magnetic disks.
			
 
				-This would require no change to the external interface.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although Plan 9 has per-process name spaces, it has no mechanism to give the
			
 
				-description of a process's name space to another process except by direct inheritance.
			
 
				-The
			
 
				-<TT>cpu</TT>
			
 
				-command, for example, cannot in general reproduce the terminal's name space;
			
 
				-it can only re-interpret the user's login profile and make substitutions for things like
			
 
				-the name of the binary directory to load.
			
 
				-This misses any local modifications made before running
			
 
				-<TT>cpu</TT>.
			
 
				-It should instead be possible to capture the terminal's name space and transmit
			
 
				-its description to a remote process.
			
 
				-</P>
			
 
				-<P>
			
 
				-Despite these problems, Plan 9 works well.
			
 
				-It has matured into the system that supports our research,
			
 
				-rather than being the subject of the research itself.
			
 
				-Experimental new work includes developing interfaces to faster networks,
			
 
				-file caching in the client kernel,
			
 
				-encapsulating and exporting name spaces,
			
 
				-and the ability to re-establish the client state after a server crash.
			
 
				-Attention is now focusing on using the system to build distributed applications.
			
 
				-</P>
			
 
				-<P>
			
 
				-One reason for Plan 9's success is that we use it for our daily work, not just as a research tool.
			
 
				-Active use forces us to address shortcomings as they arise and to adapt the system
			
 
				-to solve our problems.
			
 
				-Through this process, Plan 9 has become a comfortable, productive programming
			
 
				-environment, as well as a vehicle for further systems research.
			
 
				-</center></P>
			
 
				-<H4>References
			
 
				-<DL COMPACT>
			
 
				-<DT>[9man]<DD>
			
 
				-Plan 9 Programmer's Manual,
			
 
				-Volume 1,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1995.
			
 
				-<DT>[ANSIC]<DD>
			
 
				-<I>American National Standard for Information Systems -
			
 
				-Programming Language C</I>, American National Standards Institute, Inc.,
			
 
				-New York, 1990.
			
 
				-<DT>[Duff90]<DD>
			
 
				-Tom Duff, ``Rc - A Shell for Plan 9 and UNIX systems'',
			
 
				-Proc. of the Summer 1990 UKUUG Conf.,
			
 
				-London, July, 1990, pp. 21-33, reprinted, in a different form, in this volume.
			
 
				-<DT>[Fra80]<DD>
			
 
				-A.G. Fraser,
			
 
				-``Datakit - A Modular Network for Synchronous and Asynchronous Traffic'',
			
 
				-Proc. Int. Conf. on Commun.,
			
 
				-June 1980, Boston, MA.
			
 
				-<DT>[FSSUTF]<DD>
			
 
				-File System Safe UCS Transformation Format (FSS-UTF),
			
 
				-X/Open Preliminary Specification, 1993.
			
 
				-ISO designation is
			
 
				-ISO/IEC JTC1/SC2/WG2 N 1036, dated 1994-08-01.
			
 
				-<DT>[ISO10646] <DD>
			
 
				-ISO/IEC DIS 10646-1:1993
			
 
				-Information technology -
			
 
				-Universal Multiple-Octet Coded Character Set (UCS) &#x2014;
			
 
				-Part 1: Architecture and Basic Multilingual Plane.
			
 
				-<DT>[Kill84]<DD>
			
 
				-T.J. Killian,
			
 
				-``Processes as Files'',
			
 
				-USENIX Summer 1984 Conf. Proc.,
			
 
				-June 1984, Salt Lake City, UT.
			
 
				-<DT>[LABW91] <DD>
			
 
				-Butler Lampson,
			
 
				-Mart&iacute;n Abadi,
			
 
				-Michael Burrows, and
			
 
				-Edward Wobber,
			
 
				-``Authentication in Distributed Systems: Theory and Practice'',
			
 
				-Proc. 13th ACM Symp. on Op. Sys. Princ.,
			
 
				-Asilomar, 1991,
			
 
				-pp. 165-182.
			
 
				-<DT>[MBSS87] <DD>
			
 
				-S. P. Miller,
			
 
				-B. C. Neumann,
			
 
				-J. I. Schiller, and
			
 
				-J. H. Saltzer,
			
 
				-``Kerberos Authentication and Authorization System'',
			
 
				-Massachusetts Institute of Technology,
			
 
				-1987.
			
 
				-<DT>[NBS77]<DD>
			
 
				-National Bureau of Standards (U.S.),
			
 
				-Federal Information Processing Standard 46,
			
 
				-National Technical Information Service, Springfield, VA, 1977.
			
 
				-<DT>[Nee89]<DD>
			
 
				-R. Needham, ``Names'', in
			
 
				-Distributed systems,
			
 
				-S. Mullender, ed.,
			
 
				-Addison Wesley, 1989
			
 
				-<DT>[NeHe82] <DD>
			
 
				-R.M. Needham and A.J. Herbert,
			
 
				-The Cambridge Distributed Computing System,
			
 
				-Addison-Wesley, London, 1982
			
 
				-<DT>[Neu92]<DD>
			
 
				-B. Clifford Neuman,
			
 
				-``The Prospero File System'',
			
 
				-USENIX File Systems Workshop Proc.,
			
 
				-Ann Arbor, 1992, pp. 13-28.
			
 
				-<DT>[OCDNW88] <DD>
			
 
				-John Ousterhout, Andrew Cherenson, Fred Douglis, Mike Nelson, and Brent Welch,
			
 
				-``The Sprite Network Operating System'',
			
 
				-IEEE Computer,
			
 
				-21(2), 23-38, Feb. 1988.
			
 
				-<DT>[Pike87]<DD>
			
 
				-Rob Pike, ``The Text Editor <TT>sam</TT>'',
			
 
				-Software - Practice and Experience,
			
 
				-Nov 1987, <B>17</B>(11), pp. 813-845; reprinted in this volume.
			
 
				-<DT>[Pike91]<DD>
			
 
				-Rob Pike, ``8&frac12;, the Plan 9 Window System'',
			
 
				-USENIX Summer Conf. Proc.,
			
 
				-Nashville, June, 1991, pp. 257-265,
			
 
				-reprinted in this volume.
			
 
				-<DT>[Pike93]<DD>
			
 
				-Rob Pike and Ken Thompson, ``Hello World or &#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949; or
			
 
				-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;'',
			
 
				-USENIX Winter Conf. Proc.,
			
 
				-San Diego, 1993, pp. 43-50,
			
 
				-reprinted in this volume.
			
 
				-<DT>[Pike94]<DD>
			
 
				-Rob Pike,
			
 
				-``Acme: A User Interface for Programmers'',
			
 
				-USENIX Proc. of the Winter 1994 Conf.,
			
 
				-San Francisco, CA,
			
 
				-<DT>[Pike95]<DD>
			
 
				-Rob Pike,
			
 
				-``How to Use the Plan 9 C Compiler'',
			
 
				-Plan 9 Programmer's Manual,
			
 
				-Volume 2,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1995.
			
 
				-<DT>[POSIX]<DD>
			
 
				-Information Technology&#x2014;Portable Operating
			
 
				-System Interface (POSIX) Part 1:
			
 
				-System Application Program Interface (API)
			
 
				-[C Language],
			
 
				-IEEE, New York, 1990.
			
 
				-<DT>[PPTTW93] <DD>
			
 
				-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom, ``The Use of Name Spaces in Plan 9'',
			
 
				-Op. Sys. Rev.,
			
 
				-Vol. 27, No. 2, April 1993, pp. 72-76,
			
 
				-reprinted in this volume.
			
 
				-<DT>[Presotto]<DD>
			
 
				-Dave Presotto,
			
 
				-``Multiprocessor Streams for Plan 9'',
			
 
				-UKUUG Summer 1990 Conf. Proc.,
			
 
				-July 1990, pp. 11-19.
			
 
				-<DT>[PrWi93]<DD>
			
 
				-Dave Presotto and Phil Winterbottom,
			
 
				-``The Organization of Networks in Plan 9'',
			
 
				-USENIX Proc. of the Winter 1993 Conf.,
			
 
				-San Diego, CA,
			
 
				-pp. 43-50,
			
 
				-reprinted in this volume.
			
 
				-<DT>[PrWi95]<DD>
			
 
				-Dave Presotto and Phil Winterbottom,
			
 
				-``The IL Protocol'',
			
 
				-Plan 9 Programmer's Manual,
			
 
				-Volume 2,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1995.
			
 
				-<DT>[RFC768] <DD>
			
 
				-J. Postel, RFC768,
			
 
				-<I>User Datagram Protocol,</I>
			
 
				-<I>DARPA Internet Program Protocol Specification,</I>
			
 
				-August 1980.
			
 
				-<DT>[RFC793] <DD>
			
 
				-RFC793,
			
 
				-<I>Transmission Control Protocol,</I>
			
 
				-<I>DARPA Internet Program Protocol Specification,</I>
			
 
				-September 1981.
			
 
				-<DT>[Rao91]<DD>
			
 
				-Herman Chung-Hwa Rao,
			
 
				-The Jade File System,
			
 
				-(Ph. D. Dissertation),
			
 
				-Dept. of Comp. Sci,
			
 
				-University of Arizona,
			
 
				-TR 91-18.
			
 
				-<DT>[Rit84]<DD>
			
 
				-D.M. Ritchie,
			
 
				-``A Stream Input-Output System'',
			
 
				-AT&amp;T Bell Laboratories Technical Journal,
			
 
				-<B>63</B>(8), October, 1984.
			
 
				-<DT>[Tric95]<DD>
			
 
				-Howard Trickey,
			
 
				-``APE &#x2014; The ANSI/POSIX Environment'',
			
 
				-Plan 9 Programmer's Manual,
			
 
				-Volume 2,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1995.
			
 
				-<DT>[Unicode]<DD>
			
 
				-The Unicode Standard,
			
 
				-Worldwide Character Encoding,
			
 
				-Version 1.0, Volume 1,
			
 
				-The Unicode Consortium,
			
 
				-Addison Wesley,
			
 
				-New York,
			
 
				-1991.
			
 
				-<DT>[UNIX85]<DD>
			
 
				-UNIX Time-Sharing System Programmer's Manual,
			
 
				-Research Version, Eighth Edition, Volume 1.
			
 
				-AT&amp;T Bell Laboratories, Murray Hill, NJ, 1985.
			
 
				-<DT>[Welc94]<DD>
			
 
				-Brent Welch,
			
 
				-``A Comparison of Three Distributed File System Architectures: Vnode, Sprite, and Plan 9'',
			
 
				-Computing Systems,
			
 
				-7(2), pp. 175-199, Spring, 1994.
			
 
				-<DT>[Wint95]<DD>
			
 
				-Phil Winterbottom,
			
 
				-``Alef Language Reference Manual'',
			
 
				-Plan 9 Programmer's Manual,
			
 
				-Volume 2,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1995.
			
 
				-</H4>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2006 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/acid.html
+++ b/sys/doc/acid.html
@@ -1,2921 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Acid Manual
			
 
				-</H1>
			
 
				-<DL><DD><I>Phil Winterbottom<br>
			
 
				-philw@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid is a general purpose, source level symbolic debugger.
			
 
				-The debugger is built around a simple command language. 
			
 
				-The command language, distinct from the language of the program being debugged,
			
 
				-provides a flexible user interface that allows the debugger
			
 
				-interface to be customized for a specific application or architecture.
			
 
				-Moreover, it provides an opportunity to write test and
			
 
				-verification code independently of a program's source code.
			
 
				-Acid is able to debug multiple
			
 
				-processes provided they share a common set of symbols, such as the processes in
			
 
				-a threaded program.
			
 
				-</P>
			
 
				-<P>
			
 
				-Like other language-based solutions, Acid presents a poor user interface but
			
 
				-provides a powerful debugging tool.
			
 
				-Application of Acid to hard problems is best approached by writing functions off-line
			
 
				-(perhaps loading them with the
			
 
				-<TT>include</TT>
			
 
				-function or using the support provided by
			
 
				-<A href="/magic/man2html/1/acme"><I>acme</I>(1)),
			
 
				-</A>rather than by trying to type intricate Acid operations
			
 
				-at the interactive prompt.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid allows the execution of a program to be controlled by operating on its
			
 
				-state while it is stopped and by monitoring and controlling its execution
			
 
				-when it is running. Each program action that causes a change 
			
 
				-of execution state is reflected by the execution
			
 
				-of an Acid function, which may be user defined.
			
 
				-A library of default functions provides the functionality of a normal debugger.
			
 
				-</P>
			
 
				-<P>
			
 
				-A Plan 9 process is controlled by writing messages to a control file in the
			
 
				-<A href="/magic/man2html/3/proc"><I>proc</I>(3)
			
 
				-</A>file system. Each control message has a corresponding Acid function, which
			
 
				-sends the message to the process. These functions take a process id
			
 
				-(<I>pid</I>)
			
 
				-as an
			
 
				-argument. The memory and text file of the program may be manipulated using
			
 
				-the indirection operators. The symbol table, including source cross reference,
			
 
				-is available to an Acid program. The combination allows complex operations
			
 
				-to be performed both in terms of control flow and data manipulation.
			
 
				-</P>
			
 
				-<H4>Input format and <TT>whatis</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-Comments start with
			
 
				-<TT>//</TT>
			
 
				-and continue to the end of the line.
			
 
				-Input is a series of statements and expressions separated by semicolons.
			
 
				-At the top level of the interpreter, the builtin function
			
 
				-<TT>print</TT>
			
 
				-is called automatically to display the result of all expressions except function calls.
			
 
				-A unary
			
 
				-<TT>+</TT>
			
 
				-may be used as a shorthand to force the result of a function call to be printed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Also at the top level, newlines are treated as semicolons
			
 
				-by the parser, so semicolons are unnecessary when evaluating expressions.
			
 
				-</P>
			
 
				-<P>
			
 
				-When Acid starts, it loads the default program modules,
			
 
				-enters interactive mode, and prints a prompt. In this state Acid accepts
			
 
				-either function definitions or statements to be evaluated.
			
 
				-In this interactive mode
			
 
				-statements are evaluated immediately, while function definitions are
			
 
				-stored for later invocation.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>whatis</TT>
			
 
				-operator can be used to report the state of identifiers known to the interpreter.
			
 
				-With no argument,
			
 
				-<TT>whatis</TT>
			
 
				-reports the name of all defined Acid functions; when supplied with an identifier
			
 
				-as an argument it reports any variable, function, or type definition
			
 
				-associated with the identifier.
			
 
				-Because of the way the interpreter handles semicolons,
			
 
				-the result of a
			
 
				-<TT>whatis</TT>
			
 
				-statement can be returned directly to Acid without adding semicolons.
			
 
				-A syntax error or interrupt returns Acid to the normal evaluation
			
 
				-mode; any partially evaluated definitions are lost.
			
 
				-</P>
			
 
				-<H4>Using the Library Functions
			
 
				-</H4>
			
 
				-<P>
			
 
				-After loading the program binary, Acid loads the portable and architecture-specific
			
 
				-library functions  that form the standard debugging environment.
			
 
				-These files are Acid source code and are human-readable.
			
 
				-The following example uses the standard debugging library to show how
			
 
				-language and program interact:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% acid /bin/ls
			
 
				-/bin/ls:mips plan 9 executable
			
 
				-
			
 
				-/sys/lib/acid/port
			
 
				-/sys/lib/acid/mips
			
 
				-acid: new()
			
 
				-75721: system call  _main ADD  <I>-0x14,R29
			
 
				-75721: breakpoint   main+0x4   MOVW  R31,0x0(R29)
			
 
				-acid: bpset(ls)
			
 
				-acid: cont()
			
 
				-75721: breakpoint   ls    ADD  </I>-0x16c8,R29
			
 
				-acid: stk()
			
 
				-At pc:0x0000141c:ls /sys/src/cmd/ls.c:87
			
 
				-ls(s=0x0000004d,multi=0x00000000) /sys/src/cmd/ls.c:87
			
 
				-    called from main+0xf4 /sys/src/cmd/ls.c:79
			
 
				-main(argc=0x00000000,argv=0x7ffffff0) /sys/src/cmd/ls.c:48
			
 
				-    called from _main+0x20 /sys/src/libc/mips/main9.s:10
			
 
				-acid: PC
			
 
				-0xc0000f60
			
 
				-acid: *PC
			
 
				-0x0000141c
			
 
				-acid: ls
			
 
				-0x0000141c
			
 
				-</PRE></TT></DL>
			
 
				-The function
			
 
				-<TT>new()</TT>
			
 
				-creates a new process and stops it at the first instruction.
			
 
				-This change in state is reported by a call to the
			
 
				-Acid function
			
 
				-<TT>stopped</TT>,
			
 
				-which is called by the interpreter whenever the debugged program stops.
			
 
				-<TT>Stopped</TT>
			
 
				-prints the status line giving the pid, the reason the program stopped
			
 
				-and the address and instruction at the current PC.
			
 
				-The function
			
 
				-<TT>bpset</TT>
			
 
				-makes an entry in the breakpoint table and plants a breakpoint in memory.
			
 
				-The
			
 
				-<TT>cont</TT>
			
 
				-function continues the process, allowing it to run until some condition
			
 
				-causes it to stop. In this case the program hits the breakpoint placed on
			
 
				-the function
			
 
				-<TT>ls</TT>
			
 
				-in the C program. Once again the
			
 
				-<TT>stopped</TT>
			
 
				-routine is called to print the status of the program. The function
			
 
				-<TT>stk</TT>
			
 
				-prints a C stack trace of the current process. It is implemented using
			
 
				-a builtin Acid function that returns the stack trace as a list; the code
			
 
				-that formats the information is all written in Acid. 
			
 
				-The Acid variable
			
 
				-<TT>PC</TT>
			
 
				-holds the address of the 
			
 
				-cell where the current value of the processor register
			
 
				-<TT>PC</TT>
			
 
				-is stored. By indirecting through
			
 
				-the value of
			
 
				-<TT>PC</TT>
			
 
				-the address where the program is stopped can be found.
			
 
				-All of the processor registers are available by the same mechanism.
			
 
				-</P>
			
 
				-<H4>Types
			
 
				-</H4>
			
 
				-<P>
			
 
				-An Acid variable has one of four types:
			
 
				-<I>integer</I>,
			
 
				-<I>float</I>,
			
 
				-<I>list</I>,
			
 
				-or
			
 
				-<I>string</I>.
			
 
				-The type of a variable is inferred from the type of the right-hand
			
 
				-side of the assignment expression which last set its value.
			
 
				-Referencing a variable that has not yet
			
 
				-been assigned draws a "used but not set" error. Many of the operators may
			
 
				-be applied to more than
			
 
				-one type; for these operators the action of the operator is determined by
			
 
				-the types of its operands. The action of each operator is defined in the
			
 
				-<I>Expressions</I>
			
 
				-section of this manual.
			
 
				-</P>
			
 
				-<H4>Variables
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid has three kinds of variables: variables defined by the symbol table
			
 
				-of the debugged program, variables that are defined and maintained
			
 
				-by the interpreter as the debugged program changes state, and variables
			
 
				-defined and used by Acid programs.
			
 
				-</P>
			
 
				-<P>
			
 
				-Some examples of variables maintained by the interpreter are the register
			
 
				-pointers listed by name in the Acid list variable
			
 
				-<TT>registers</TT>,
			
 
				-and the symbol table listed by name and contents in the Acid variable
			
 
				-<TT>symbols</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The variable
			
 
				-<TT>pid</TT>
			
 
				-is updated by the interpreter to select the most recently created process
			
 
				-or the process selected by the
			
 
				-<TT>setproc</TT>
			
 
				-builtin function.
			
 
				-</P>
			
 
				-<H4>Formats
			
 
				-</H4>
			
 
				-<P>
			
 
				-In addition to a type, variables have formats. The format is a code
			
 
				-letter that determines the printing style and the effect of some of the
			
 
				-operators on that variable. The format codes are derived from the format
			
 
				-letters used by
			
 
				-<A href="/magic/man2html/1/db"><I>db</I>(1).
			
 
				-</A>By default, symbol table variables and numeric constants
			
 
				-are assigned the format code
			
 
				-<TT>X</TT>,
			
 
				-which specifies 32-bit hexadecimal.
			
 
				-Printing a variable with this code yields the output
			
 
				-<TT>0x00123456</TT>.
			
 
				-The format code of a variable may be changed from the default by using the 
			
 
				-builtin function
			
 
				-<TT>fmt</TT>.
			
 
				-This function takes two arguments, an expression and a format code. After
			
 
				-the expression is evaluated the new format code is attached to the result
			
 
				-and forms the return value from
			
 
				-<TT>fmt</TT>.
			
 
				-The backslash operator is a short form of
			
 
				-<TT>fmt</TT>.
			
 
				-The format supplied by the backslash operator must be the format character
			
 
				-rather than an expression.
			
 
				-If the result is assigned to a variable the new format code is maintained
			
 
				-in the variable. For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: x=10
			
 
				-acid: print(x)
			
 
				-0x0000000a 
			
 
				-acid: x = fmt(x, 'D')
			
 
				-acid: print(x, fmt(x, 'X'))
			
 
				-10 0x0000000a
			
 
				-acid: x
			
 
				-10
			
 
				-acid: x\o
			
 
				-12
			
 
				-</PRE></TT></DL>
			
 
				-The supported format characters are:
			
 
				-<DL><DD>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>o</TT><DD>
			
 
				-Print two-byte integer in octal.
			
 
				-<DT><TT>O</TT><DD>
			
 
				-Print four-byte integer in octal.
			
 
				-<DT><TT>q</TT><DD>
			
 
				-Print two-byte integer in signed octal.
			
 
				-<DT><TT>Q</TT><DD>
			
 
				-Print four-byte integer in signed octal.
			
 
				-<DT><TT>B</TT><DD>
			
 
				-Print four-byte integer in binary.
			
 
				-<DT><TT>d</TT><DD>
			
 
				-Print two-byte integer in signed decimal.
			
 
				-<DT><TT>D</TT><DD>
			
 
				-Print four-byte integer in signed decimal.
			
 
				-<DT><TT>Y</TT><DD>
			
 
				-Print eight-byte integer in signed decimal.
			
 
				-<DT><TT>Z</TT><DD>
			
 
				-Print eight-byte integer in unsigned decimal.
			
 
				-<DT><TT>x</TT><DD>
			
 
				-Print two-byte integer in hexadecimal.
			
 
				-<DT><TT>X</TT><DD>
			
 
				-Print four-byte integer in hexadecimal.
			
 
				-<DT><TT>Y</TT><DD>
			
 
				-Print eight-byte integer in hexadecimal.
			
 
				-<DT><TT>u</TT><DD>
			
 
				-Print two-byte integer in unsigned decimal.
			
 
				-<DT><TT>U</TT><DD>
			
 
				-Print four-byte integer in unsigned decimal.
			
 
				-<DT><TT>f</TT><DD>
			
 
				-Print single-precision floating point number.
			
 
				-<DT><TT>F</TT><DD>
			
 
				-Print double-precision floating point number.
			
 
				-<DT><TT>g</TT><DD>
			
 
				-Print a single precision floating point number in string format.
			
 
				-<DT><TT>G</TT><DD>
			
 
				-Print a double precision floating point number in string format.
			
 
				-<DT><TT>b</TT><DD>
			
 
				-Print byte in hexadecimal.
			
 
				-<DT><TT>c</TT><DD>
			
 
				-Print byte as an ASCII character.
			
 
				-<DT><TT>C</TT><DD>
			
 
				-Like
			
 
				-<TT>c</TT>,
			
 
				-with
			
 
				-printable ASCII characters represented normally and
			
 
				-others printed in the form <TT>\x</TT><I>nn</I>.
			
 
				-<DT><TT>s</TT><DD>
			
 
				-Interpret the addressed bytes as UTF characters
			
 
				-and print successive characters until a zero byte is reached.
			
 
				-<DT><TT>r</TT><DD>
			
 
				-Print a two-byte integer as a rune.
			
 
				-<DT><TT>R</TT><DD>
			
 
				-Print successive two-byte integers as runes
			
 
				-until a zero rune is reached.
			
 
				-<DT><TT>i</TT><DD>
			
 
				-Print as machine instructions.
			
 
				-<DT><TT>I</TT><DD>
			
 
				-As
			
 
				-<TT>i</TT>
			
 
				-above, but print the machine instructions in
			
 
				-an alternate form if possible:
			
 
				-<TT>sunsparc</TT>
			
 
				-and
			
 
				-<TT>mipsco</TT>
			
 
				-reproduce the manufacturers' syntax.
			
 
				-<DT><TT>a</TT><DD>
			
 
				-Print the value in symbolic form.
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<H4>Complex types
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid permits the definition of the layout of memory.
			
 
				-The usual method is to use the
			
 
				-<TT>-a</TT>
			
 
				-flag of the compilers to produce Acid-language descriptions of data structures (see
			
 
				-<A href="/magic/man2html/1/2c"><I>2c</I>(1))
			
 
				-</A>although such definitions can be typed interactively.
			
 
				-The keywords
			
 
				-<TT>complex</TT>,
			
 
				-<TT>adt</TT>,
			
 
				-<TT>aggr</TT>,
			
 
				-and
			
 
				-<TT>union</TT>
			
 
				-are all equivalent; the compiler uses the synonyms to document the declarations.
			
 
				-A complex type is described as a set of members, each containing a format letter,
			
 
				-an offset in the structure, and a name.  For example, the C structure
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-struct List {
			
 
				-	int         type;
			
 
				-	struct List *next;
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-is described by the Acid statement
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-complex List {
			
 
				-	'D'	0	type;
			
 
				-	'X'	4	next;
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Scope
			
 
				-</H4>
			
 
				-<P>
			
 
				-Variables are global unless they are either parameters to functions
			
 
				-or are declared as
			
 
				-<TT>local</TT>
			
 
				-in a function body. Parameters and local variables are available only in
			
 
				-the body of the function in which they are instantiated.
			
 
				-Variables are dynamically bound: if a function declares a local variable
			
 
				-with the same name as a global variable, the global variable will be hidden
			
 
				-whenever the function is executing.
			
 
				-For example, if a function
			
 
				-<TT>f</TT>
			
 
				-has a local called
			
 
				-<TT>main</TT>,
			
 
				-any function called below
			
 
				-<TT>f</TT>
			
 
				-will see the local version of
			
 
				-<TT>main</TT>,
			
 
				-not the external symbol.
			
 
				-</P>
			
 
				-<H4>Addressing
			
 
				-</H4>
			
 
				-<P>
			
 
				-Since the symbol table specifies addresses,
			
 
				-to access the value of program variables
			
 
				-an extra level of indirection
			
 
				-is required relative to the source code.
			
 
				-For consistency, the registers are maintained as pointers as well; Acid variables with the names
			
 
				-of processor registers point to cells holding the saved registers.
			
 
				-</P>
			
 
				-<P>
			
 
				-The location in a file or memory image associated with
			
 
				-an address is calculated from a map
			
 
				-associated with the file.
			
 
				-Each map contains one or more quadruples (<I>t</I>,
			
 
				-<I>b</I>,
			
 
				-<I>e</I>,
			
 
				-<I>f</I>),
			
 
				-defining a segment named
			
 
				-<I>t</I>
			
 
				-(usually 
			
 
				-<TT>text</TT>,
			
 
				-<TT>data</TT>,
			
 
				-<TT>regs</TT>,
			
 
				-or
			
 
				-<TT>fpregs</TT>)
			
 
				-mapping addresses in the range
			
 
				-<I>b</I>
			
 
				-through
			
 
				-<I>e</I>
			
 
				-to the part of the file
			
 
				-beginning at
			
 
				-offset
			
 
				-<I>f</I>.
			
 
				-The memory model of a Plan 9 process assumes
			
 
				-that segments are disjoint.  There
			
 
				-can be more than one segment of a given type (e.g., a process
			
 
				-may have more than one text segment) but segments
			
 
				-may not overlap.
			
 
				-An address
			
 
				-<I>a</I>
			
 
				-is translated
			
 
				-to a file address
			
 
				-by finding a segment
			
 
				-for which
			
 
				-<I>b</I>
			
 
				-+
			
 
				-<I>a</I>
			
 
				-&lt;
			
 
				-<I>e</I>;
			
 
				-the location in the file
			
 
				-is then
			
 
				-<I>address</I>
			
 
				-+
			
 
				-<I>f</I>
			
 
				--
			
 
				-<I>b</I>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Usually,
			
 
				-the text and initialized data of a program
			
 
				-are mapped by segments called 
			
 
				-<TT>text</TT>
			
 
				-and
			
 
				-<TT>data</TT>.
			
 
				-Since a program file does not contain bss, stack, or register data,
			
 
				-these data are
			
 
				-not mapped by the data segment.
			
 
				-The text segment is mapped similarly in the memory image of
			
 
				-a normal (i.e., non-kernel) process.
			
 
				-However, the segment called 
			
 
				-<TT>*data</TT>
			
 
				-maps memory from the beginning to the end of the program's data space.
			
 
				-This region contains the program's static data, the bss, the
			
 
				-heap and the stack.  A segment
			
 
				-called
			
 
				-<TT>*regs</TT>
			
 
				-maps the registers;
			
 
				-<TT>*fpregs</TT>
			
 
				-maps the floating point registers.
			
 
				-</P>
			
 
				-<P>
			
 
				-Sometimes it is useful to define a map with a single segment
			
 
				-mapping the region from 0 to 0xFFFFFFFF; such a map
			
 
				-allows the entire file to be examined
			
 
				-without address translation.  The builtin function
			
 
				-<TT>map</TT>
			
 
				-examines and modifies Acid's map for a process.
			
 
				-</P>
			
 
				-<H4>Name Conflicts
			
 
				-</H4>
			
 
				-<P>
			
 
				-Name conflicts between keywords in the Acid language, symbols in the program,
			
 
				-and previously defined functions are resolved when the interpreter starts up.
			
 
				-Each name is made unique by prefixing enough
			
 
				-<TT></TT><I></I><TT>
			
 
				-characters to the front of the name to make it unique. Acid reports
			
 
				-a list of each name change at startup. The report looks like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/bin/sam: mips plan 9 executable
			
 
				-/lib/acid/port
			
 
				-/lib/acid/mips
			
 
				-Symbol renames:
			
 
				-	append=</TT>append T/0xa4e40
			
 
				-acid:
			
 
				-</PRE></TT></DL>
			
 
				-The symbol
			
 
				-<TT>append</TT>
			
 
				-is both a keyword and a text symbol in the program. The message reports
			
 
				-that the text symbol is now named
			
 
				-<TT></TT><I>append</I><TT>.
			
 
				-</P>
			
 
				-</TT><H4>Expressions
			
 
				-</H4>
			
 
				-<P>
			
 
				-Operators have the same
			
 
				-binding and precedence as in C.
			
 
				-For operators of equal precedence, expressions are evaluated from left to right. 
			
 
				-</P>
			
 
				-<H4>Boolean expressions
			
 
				-</H4>
			
 
				-<P>
			
 
				-If an expression is evaluated for a boolean condition the test
			
 
				-performed depends on the type of the result. If the result is of
			
 
				-<I>integer</I>
			
 
				-or
			
 
				-<I>floating</I>
			
 
				-type the result is true if the value is non-zero. If the expression is a
			
 
				-<I>list</I>
			
 
				-the result is true if there are any members in the list.
			
 
				-If the expression is a
			
 
				-<I>string</I>
			
 
				-the result is true if there are any characters in the string.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	primary-expression:
			
 
				-		identifier
			
 
				-		identifier <TT>:</TT> identifier
			
 
				-		constant
			
 
				-		<TT>(</TT> expression <TT>)</TT>
			
 
				-		<TT>{</TT> elist <TT>}</TT>
			
 
				-
			
 
				-	elist:
			
 
				-		expression
			
 
				-		elist , expression
			
 
				-</PRE></TT></DL>
			
 
				-An identifier may be any legal Acid variable. The colon operator returns the
			
 
				-address of parameters or local variables in the current stack of a program.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-*main:argc
			
 
				-</PRE></TT></DL>
			
 
				-prints the number of arguments passed into main. Local variables and parameters
			
 
				-can only be referenced after the frame has been established. It may be necessary to
			
 
				-step a program over the first few instructions of a breakpointed function to properly set
			
 
				-the frame.
			
 
				-</P>
			
 
				-<P>
			
 
				-Constants follow the same lexical rules as C.
			
 
				-A list of expressions delimited by braces forms a list constructor.
			
 
				-A new list is produced by evaluating each expression when the constructor is executed.
			
 
				-The empty list is formed from
			
 
				-<TT>{}</TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: x = 10
			
 
				-acid: l = { 1, x, 2\D }
			
 
				-acid: x = 20
			
 
				-acid: l
			
 
				-{0x00000001 , 0x0000000a , 2 }
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Lists
			
 
				-</H4>
			
 
				-<P>
			
 
				-Several operators manipulate lists.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	list-expression:
			
 
				-		primary-expression
			
 
				-		<TT>head</TT> primary-expression
			
 
				-		<TT>tail</TT> primary-expression
			
 
				-		<TT>append</TT> expression <TT>,</TT> primary-expression
			
 
				-		<TT>delete</TT> expression <TT>,</TT> primary-expression
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<I>primary-expression</I>
			
 
				-for
			
 
				-<TT>head</TT>
			
 
				-and
			
 
				-<TT>tail</TT>
			
 
				-must yield a value of type
			
 
				-<I>list</I>.
			
 
				-If there are no elements in the list the value of
			
 
				-<TT>head</TT>
			
 
				-or
			
 
				-<TT>tail</TT>
			
 
				-will be the empty list. Otherwise
			
 
				-<TT>head</TT>
			
 
				-evaluates to the first element of the list and
			
 
				-<TT>tail</TT>
			
 
				-evaluates to the rest.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: head {}
			
 
				-{}
			
 
				-acid: head {1, 2, 3, 4}
			
 
				-0x00000001 
			
 
				-acid: tail {1, 2, 3, 4}
			
 
				-{0x00000002 , 0x00000003 , 0x00000004 }
			
 
				-</PRE></TT></DL>
			
 
				-The first operand of
			
 
				-<TT>append</TT>
			
 
				-and
			
 
				-<TT>delete</TT>
			
 
				-must be an expression that yields a
			
 
				-<I>list</I>.
			
 
				-<TT>Append</TT>
			
 
				-places the result of evaluating
			
 
				-<I>primary-expression</I>
			
 
				-at the end of the list.
			
 
				-The
			
 
				-<I>primary-expression</I>
			
 
				-supplied to
			
 
				-<TT>delete</TT>
			
 
				-must evaluate to an integer;
			
 
				-<TT>delete</TT>
			
 
				-removes the 
			
 
				-<I>n</I>'th
			
 
				-item from the list, where
			
 
				-<I>n</I>
			
 
				-is integral value of
			
 
				-<I>primary-expression.</I>
			
 
				-List indices are zero-based.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	acid: append {1, 2}, 3
			
 
				-	{0x00000001 , 0x00000002 , 0x00000003 }
			
 
				-	acid: delete {1, 2, 3}, 1
			
 
				-	{0x00000001 , 0x00000003 }
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Assigning a list to a variable copies a reference to the list; if a list variable
			
 
				-is copied it still points at the same list.  To copy a list, the elements must
			
 
				-be copied piecewise using
			
 
				-<TT>head</TT>
			
 
				-and
			
 
				-<TT>append</TT>.
			
 
				-</P>
			
 
				-<H4>Operators
			
 
				-</H4>
			
 
				-<P>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	postfix-expression:
			
 
				-		list-expression
			
 
				-		postfix-expression <TT>[</TT> expression <TT>]</TT>
			
 
				-		postfix-expression <TT>(</TT> argument-list <TT>)</TT>
			
 
				-		postfix-expression <TT>.</TT> tag
			
 
				-		postfix-expression <TT>-&gt;</TT> tag 
			
 
				-		postfix-expression <TT>++</TT>
			
 
				-		postfix-expression <TT>--</TT>
			
 
				-
			
 
				-	argument-list:
			
 
				-		expression
			
 
				-		argument-list , expression
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>[</TT>
			
 
				-<I>expression</I>
			
 
				-<TT>]</TT>
			
 
				-operator performs indexing.
			
 
				-The indexing expression must result in an expression of
			
 
				-<I>integer</I>
			
 
				-type, say
			
 
				-<I>n</I>.
			
 
				-The operation depends on the type of
			
 
				-<I>postfix-expression</I>.
			
 
				-If the
			
 
				-<I>postfix-expression</I>
			
 
				-yields an
			
 
				-<I>integer</I>
			
 
				-it is assumed to be the base address of an array in the memory image.
			
 
				-The index offsets into this array; the size of the array members is
			
 
				-determined by the format associated with the
			
 
				-<I>postfix-expression</I>.
			
 
				-If the 
			
 
				-<I>postfix-expression</I>
			
 
				-yields a
			
 
				-<I>string</I>
			
 
				-the index operator fetches the
			
 
				-<I>n</I>'th
			
 
				-character
			
 
				-of the string. If the index points beyond the end
			
 
				-of the string, a zero is returned.
			
 
				-If the
			
 
				-<I>postfix-expression</I>
			
 
				-yields a
			
 
				-<I>list</I>
			
 
				-then the indexing operation returns the
			
 
				-<I>n</I>'th
			
 
				-item of the list.
			
 
				-If the list contains less than
			
 
				-<I>n</I>
			
 
				-items the empty list
			
 
				-<TT>{}</TT>
			
 
				-is returned.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>++</TT>
			
 
				-and
			
 
				-<TT>--</TT>
			
 
				-operators increment and decrement integer variables.
			
 
				-The amount of increment or decrement depends on the format code. These postfix
			
 
				-operators return the value of the variable before the increment or decrement
			
 
				-has taken place.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	unary-expression:
			
 
				-		postfix-expression
			
 
				-		<TT>++</TT> unary-expression
			
 
				-		<TT>--</TT> unary-expression
			
 
				-
			
 
				-	unary-operator: one of
			
 
				-		<TT>*</TT> <TT>@</TT> <TT>+</TT> <TT>-</TT> ~ <TT>!</TT>
			
 
				-</PRE></TT></DL>
			
 
				-The operators
			
 
				-<TT>*</TT>
			
 
				-and
			
 
				-<TT>@</TT>
			
 
				-are the indirection operators.
			
 
				-<TT>@</TT>
			
 
				-references a value from the text file of the program being debugged.
			
 
				-The size of the value depends on the format code. The
			
 
				-<TT>*</TT>
			
 
				-operator fetches a value from the memory image of a process. If either
			
 
				-operator appears on the left-hand side of an assignment statement, either the file
			
 
				-or memory will be written. The file can only be modified when Acid is invoked
			
 
				-with the
			
 
				-<TT>-w</TT>
			
 
				-option.
			
 
				-The prefix
			
 
				-<TT>++</TT>
			
 
				-and
			
 
				-<TT>--</TT>
			
 
				-operators perform the same operation as their postfix counterparts but
			
 
				-return the value after the increment or decrement has been performed. Since the
			
 
				-<TT>++</TT>
			
 
				-and
			
 
				-<TT>*</TT>
			
 
				-operators fetch and increment the correct amount for the specified format,
			
 
				-the following function prints correct machine instructions on a machine with
			
 
				-variable length instructions, such as the 68020 or 386:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	defn asm(addr)
			
 
				-	{
			
 
				-		addr = fmt(addr, 'i');
			
 
				-		loop 1, 10 do
			
 
				-			print(*addr++, "\n");
			
 
				-	}
			
 
				-</PRE></TT></DL>
			
 
				-The operators
			
 
				-<TT>~</TT>
			
 
				-and
			
 
				-<TT>!</TT>
			
 
				-perform bitwise and logical negation respectively. Their operands must be of
			
 
				-<I>integer</I>
			
 
				-type.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	cast-expression:
			
 
				-		unary-expression
			
 
				-		unary-expression <TT>\</TT> format-char
			
 
				-		<TT>(</TT> complex-name <TT>)</TT> unary-expression		
			
 
				-</PRE></TT></DL>
			
 
				-A unary expression may be preceded by a cast. The cast has the effect of
			
 
				-associating the value of 
			
 
				-<I>unary-expression</I>
			
 
				-with a complex type structure.
			
 
				-The result may then be dereferenced using the
			
 
				-<TT>.</TT>
			
 
				-and
			
 
				-<TT>-&gt;</TT>
			
 
				-operators.
			
 
				-</P>
			
 
				-<P>
			
 
				-An Acid variable may be associated with a complex type
			
 
				-to enable accessing the type's members:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: complex List {
			
 
				-	'D'	0	type;
			
 
				-	'X'	4	next;
			
 
				-};
			
 
				-acid: complex List lhead
			
 
				-acid: lhead.type
			
 
				-10
			
 
				-acid: lhead = ((List)lhead).next
			
 
				-acid: lhead.type
			
 
				--46
			
 
				-</PRE></TT></DL>
			
 
				-Note that the
			
 
				-<TT>next</TT>
			
 
				-field cannot be given a complex type automatically.
			
 
				-</P>
			
 
				-<P>
			
 
				-When entered at the top level of the interpreter,
			
 
				-an expression of complex type
			
 
				-is treated specially.
			
 
				-If the type is called
			
 
				-<TT>T</TT>
			
 
				-and an Acid function also called
			
 
				-<TT>T</TT>
			
 
				-exists,
			
 
				-then that function will be called with the expression as its argument.
			
 
				-The compiler options
			
 
				-<TT>-a</TT>
			
 
				-and
			
 
				-<TT>-aa</TT>
			
 
				-will generate Acid source code defining such complex types and functions; see
			
 
				-<A href="/magic/man2html/1/2c"><I>2c</I>(1).
			
 
				-</A></P>
			
 
				-<P>
			
 
				-A
			
 
				-<I>unary-expression</I>
			
 
				-may be qualified with a format specifier using the
			
 
				-<TT>\</TT>
			
 
				-operator. This has the same effect as passing the expression to the
			
 
				-<TT>fmt</TT>
			
 
				-builtin function.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	multiplicative-expression:
			
 
				-		cast-expression
			
 
				-		multiplicative-expression <TT>*</TT> multiplicative-expression
			
 
				-		multiplicative-expression <TT>/</TT> multiplicative-expression
			
 
				-		multiplicative-expression <TT>%</TT> multiplicative-expression
			
 
				-</PRE></TT></DL>
			
 
				-These operate on
			
 
				-<I>integer</I>
			
 
				-and 
			
 
				-<I>float</I>
			
 
				-types and perform the expected operations:
			
 
				-<TT>*</TT>
			
 
				-multiplication,
			
 
				-<TT>/</TT>
			
 
				-division,
			
 
				-<TT>%</TT>
			
 
				-modulus.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	additive-expression:
			
 
				-		multiplicative-expression
			
 
				-		additive-expression <TT>+</TT> multiplicative-expression
			
 
				-		additive-expression <TT>-</TT> multiplicative-expression
			
 
				-</PRE></TT></DL>
			
 
				-These operators perform as expected for
			
 
				-<I>integer</I>
			
 
				-and 
			
 
				-<I>float</I>
			
 
				-operands.
			
 
				-Unlike in C,
			
 
				-<TT>+</TT>
			
 
				-and
			
 
				-<TT>-</TT>
			
 
				-do not scale the addition based on the format of the expression.
			
 
				-This means that
			
 
				-<TT>i=i+1</TT>
			
 
				-will always add 1 but
			
 
				-<TT>i++</TT>
			
 
				-will add the size corresponding to the format stored with
			
 
				-<TT>i</TT>.
			
 
				-If both operands are of either
			
 
				-<I>string</I>
			
 
				-or
			
 
				-<I>list</I>
			
 
				-type  then addition is defined as concatenation. Subtraction is undefined for
			
 
				-these two types.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	shift-expression:
			
 
				-		additive-expression
			
 
				-		shift-expression <TT>&lt;&lt;</TT> additive-expression
			
 
				-		shift-expression <TT>&gt;&gt;</TT> additive-expression
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>&gt;&gt;</TT>
			
 
				-and
			
 
				-<TT>&lt;&lt;</TT>
			
 
				-operators perform bitwise right and left shifts respectively. Both
			
 
				-require operands of
			
 
				-<I>integer</I>
			
 
				-type.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	relational-expression:
			
 
				-		relational-expression <TT>&lt;</TT> shift-expression
			
 
				-		relational-expression <TT>&gt;</TT> shift-expression
			
 
				-		relational-expression <TT>&lt;=</TT> shift-expression
			
 
				-		relational-expression <TT>&gt;=</TT> shift-expression
			
 
				-
			
 
				-	equality-expression:
			
 
				-		relational-expression
			
 
				-		relational-expression <TT>==</TT> equality-expression
			
 
				-		relational-expression <TT>!=</TT> equality-expression
			
 
				-</PRE></TT></DL>
			
 
				-The comparison operators are
			
 
				-<TT>&lt;</TT>
			
 
				-(less than),
			
 
				-<TT>&gt;</TT>
			
 
				-(greater than),
			
 
				-<TT>&lt;=</TT>
			
 
				-(less than or equal to),
			
 
				-<TT>&gt;=</TT>
			
 
				-(greater than or equal to),
			
 
				-<TT>==</TT>
			
 
				-(equal to) and
			
 
				-<TT>!=</TT>
			
 
				-(not equal to). The result of a comparison is 0
			
 
				-if the condition is false, otherwise 1. The relational operators can only be
			
 
				-applied to operands of
			
 
				-<I>integer</I>
			
 
				-and
			
 
				-<I>float</I>
			
 
				-type. The equality operators apply to all types.  Comparing mixed types is legal.
			
 
				-Mixed integer and float compare on the integral value.  Other mixtures are always unequal.
			
 
				-Two lists are equal if they
			
 
				-have the same number of members and a pairwise comparison of the members results
			
 
				-in equality.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	AND-expression:
			
 
				-		equality-expression
			
 
				-		AND-expression <TT>&amp;</TT> equality-expression
			
 
				-
			
 
				-	XOR-expression:
			
 
				-		AND-expression
			
 
				-		XOR-expression <TT>^</TT> AND-expression
			
 
				-
			
 
				-	OR-expression:
			
 
				-		XOR-expression
			
 
				-		OR-expression <TT>|</TT> XOR-expression
			
 
				-</PRE></TT></DL>
			
 
				-These operators perform bitwise logical operations and apply only to the
			
 
				-<I>integer</I>
			
 
				-type.
			
 
				-The operators are
			
 
				-<TT>&</TT>
			
 
				-(logical and),
			
 
				-<TT>^</TT>
			
 
				-(exclusive or) and
			
 
				-<TT>|</TT>
			
 
				-(inclusive or).
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	logical-AND-expression:
			
 
				-		OR-expression
			
 
				-		logical-AND-expression <TT>&amp;&amp;</TT> OR-expression
			
 
				-
			
 
				-	logical-OR-expression:
			
 
				-		logical-AND-expression
			
 
				-		logical-OR-expression <TT>||</TT> logical-AND-expression
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>&&</TT>
			
 
				-operator returns 1 if both of its operands evaluate to boolean true, otherwise 0.
			
 
				-The
			
 
				-<TT>||</TT>
			
 
				-operator returns 1 if either of its operands evaluates to boolean true,
			
 
				-otherwise 0.
			
 
				-</P>
			
 
				-<H4>Statements
			
 
				-</H4>
			
 
				-<P>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>if</TT> expression <TT>then</TT> statement <TT>else</TT> statement
			
 
				-	<TT>if</TT> expression <TT>then</TT> statement
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<I>expression</I>
			
 
				-is evaluated as a boolean. If its value is true the statement after
			
 
				-the
			
 
				-<TT>then</TT>
			
 
				-is executed, otherwise the statement after the
			
 
				-<TT>else</TT>
			
 
				-is executed. The 
			
 
				-<TT>else</TT>
			
 
				-portion may be omitted.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>while</TT> expression <TT>do</TT> statement
			
 
				-</PRE></TT></DL>
			
 
				-In a while loop, the
			
 
				-<I>statement</I>
			
 
				-is executed while the boolean
			
 
				-<I>expression</I>
			
 
				-evaluates
			
 
				-true.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>loop</TT> startexpr, endexpr <TT>do</TT> statement
			
 
				-</PRE></TT></DL>
			
 
				-The two expressions
			
 
				-<I>startexpr</I>
			
 
				-and
			
 
				-<I>endexpr</I>
			
 
				-are evaluated prior to loop entry.
			
 
				-<I>Statement</I>
			
 
				-is evaluated while the value of
			
 
				-<I>startexpr</I>
			
 
				-is less than or equal to
			
 
				-<I>endexpr</I>.
			
 
				-Both expressions must yield
			
 
				-<I>integer</I>
			
 
				-values. The value of
			
 
				-<I>startexpr</I>
			
 
				-is
			
 
				-incremented by one for each loop iteration.
			
 
				-Note that there is no explicit loop variable; the
			
 
				-<I>expressions</I>
			
 
				-are just values.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>return</TT> expression
			
 
				-</PRE></TT></DL>
			
 
				-<TT>return</TT>
			
 
				-terminates execution of the current function and returns to its caller.
			
 
				-The value of the function is given by expression. Since
			
 
				-<TT>return</TT>
			
 
				-requires an argument, nil-valued functions should return the empty list
			
 
				-<TT>{}</TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>local</TT> variable
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>local</TT>
			
 
				-statement creates a local instance of
			
 
				-<I>variable</I>,
			
 
				-which exists for the duration
			
 
				-of the instance of the function in which it is declared. Binding is dynamic: the local variable,
			
 
				-rather than the previous value of
			
 
				-<I>variable</I>,
			
 
				-is visible to called functions.
			
 
				-After a return from the current function the previous value of
			
 
				-<I>variable</I>
			
 
				-is
			
 
				-restored.
			
 
				-</P>
			
 
				-<P>
			
 
				-If Acid is interrupted, the values of all local variables are lost,
			
 
				-as if the function returned.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	<TT>defn</TT> function-name <TT>(</TT> parameter-list <TT>)</TT> body
			
 
				-
			
 
				-	parameter-list:
			
 
				-		variable
			
 
				-		parameter-list , variable
			
 
				-
			
 
				-	body:
			
 
				-		<TT>{</TT> statement <TT>}</TT>
			
 
				-</PRE></TT></DL>
			
 
				-Functions are introduced by the
			
 
				-<TT>defn</TT>
			
 
				-statement. The definition of parameter names suppresses any variables
			
 
				-of the same name until the function returns. The body of a function is a list
			
 
				-of statements enclosed by braces.
			
 
				-</P>
			
 
				-<H4>Code variables
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid permits the delayed evaluation of a parameter to a function.  The parameter
			
 
				-may then be evaluated at any time with the
			
 
				-<TT>eval</TT>
			
 
				-operator.  Such parameters are called
			
 
				-<I>code variables</I>
			
 
				-and are defined by prefixing their name with an asterisk in their declaration.
			
 
				-</P>
			
 
				-<P>
			
 
				-For example, this function wraps up an expression for later evaluation:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: defn code(*e) { return e; }
			
 
				-acid: x = code(v+atoi("100")\D)
			
 
				-acid: print(x)
			
 
				-(v+atoi("100"))\D;
			
 
				-acid: eval x
			
 
				-&lt;stdin&gt;:5: (error) v used but not set
			
 
				-acid: v=5
			
 
				-acid: eval x
			
 
				-105
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Source Code Management
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid provides the means to examine source code. Source code is
			
 
				-represented by lists of strings. Builtin functions provide mapping
			
 
				-from address to lines and vice-versa. The default debugging environment
			
 
				-has the means to load and display source files.
			
 
				-</P>
			
 
				-<H4>Builtin Functions
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Acid interpreter has a number of builtin functions, which cannot be redefined.
			
 
				-These functions perform machine- or operating system-specific functions such as
			
 
				-symbol table and process management.
			
 
				-The following section presents a description of each builtin function.
			
 
				-The notation
			
 
				-<TT>{}</TT>
			
 
				-is used to denote the empty list, which is the default value of a function that
			
 
				-does not execute a
			
 
				-<TT>return</TT>
			
 
				-statement.
			
 
				-The type and number of parameters for each function are specified in the
			
 
				-description; where a parameter can be of any type it is specified as type
			
 
				-<I>item</I>.
			
 
				-
			
 
				-
			
 
				-
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>Access</TT>
			
 
				-returns the integer 1 if the file name in
			
 
				-<I>string</I>
			
 
				-can be read by the builtin functions
			
 
				-<TT>file</TT>,
			
 
				-<TT>readfile</TT>,
			
 
				-or
			
 
				-<TT>include</TT>,
			
 
				-otherwise 0. A typical use of this function is to follow
			
 
				-a search path looking for a source file; it is used by
			
 
				-<TT>findsrc</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-if access("main.c") then
			
 
				-	return file("main.c");
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>atof</TT>
			
 
				-converts the string supplied as its argument into a floating point
			
 
				-number. The function accepts strings in the same format as the C
			
 
				-function of the same name. The value returned has the format code
			
 
				-<TT>f</TT>.
			
 
				-<TT>atof</TT>
			
 
				-returns the value 0.0 if it is unable to perform the conversion.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: +atof("10.4e6")
			
 
				-1.04e+07
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>atoi</TT>
			
 
				-converts the argument
			
 
				-to an integer value.
			
 
				-The function accepts strings in the same format as the C function of the
			
 
				-same name. The value returned has the format code
			
 
				-<TT>D</TT>.
			
 
				-<TT>atoi</TT>
			
 
				-returns the integer 0 if it is unable to perform a conversion.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: +atoi("-1255")
			
 
				--1255
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>error</TT>
			
 
				-generates an error message and returns the interpreter to interactive
			
 
				-mode. If an Acid program is running, it is aborted.
			
 
				-Processes being debugged are not affected. The values of all local variables are lost.
			
 
				-<TT>error</TT>
			
 
				-is commonly used to stop the debugger when some interesting condition arises
			
 
				-in the debugged program.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-while 1 do {
			
 
				-	step();
			
 
				-	if *main != @main then
			
 
				-		error("memory corrupted");
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>file</TT>
			
 
				-reads the contents of the file specified by
			
 
				-<I>string</I>
			
 
				-into a list.
			
 
				-Each element in the list is a string corresponding to a line in the file.
			
 
				-<TT>file</TT>
			
 
				-breaks lines at the newline character, but the newline
			
 
				-characters are not returned as part each string.
			
 
				-<TT>file</TT>
			
 
				-returns the empty list if it encounters an error opening or reading the data.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(file("main.c")[0])
			
 
				-#include	&lt;u.h&gt;
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>filepc</TT>
			
 
				-interprets its
			
 
				-<I>string</I>
			
 
				-argument as a source file address in the form of a file name and line offset.
			
 
				-<TT>filepc</TT>
			
 
				-uses the symbol table to map the source address into a text address
			
 
				-in the debugged program. The
			
 
				-<I>integer</I>
			
 
				-return value has the format
			
 
				-<TT>X</TT>.
			
 
				-<TT>filepc</TT>
			
 
				-returns an address of -1 if the source address is invalid.
			
 
				-The source file address uses the same format as
			
 
				-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
			
 
				-</A>This function is commonly used to set breakpoints from the source text.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: bpset(filepc("main:10"))
			
 
				-acid: bptab()
			
 
				-	0x00001020 usage  ADD	-0xc,R29
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>fmt</TT>
			
 
				-evaluates the expression
			
 
				-<I>item</I>
			
 
				-and sets the format of the result to
			
 
				-<I>fmt</I>.
			
 
				-The format of a value determines how it will be printed and
			
 
				-what kind of object will be fetched by the
			
 
				-<TT>*</TT>
			
 
				-and
			
 
				-<TT>@</TT>
			
 
				-operators. The
			
 
				-<TT>\</TT>
			
 
				-operator is a short-hand form of the
			
 
				-<TT>fmt</TT>
			
 
				-builtin function. The
			
 
				-<TT>fmt</TT>
			
 
				-function leaves the format of the
			
 
				-<I>item</I>
			
 
				-unchanged.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: main=fmt(main, 'i') // as instructions
			
 
				-acid: print(main\X, "\t", *main)
			
 
				-0x00001020 ADD	<I>-64,R29
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>fnbound</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as an address in the text of the debugged program.
			
 
				-<TT>fnbound</TT>
			
 
				-returns a list containing two integers corresponding to
			
 
				-the start and end addresses of the function containing the supplied address.
			
 
				-If the
			
 
				-<I>integer</I>
			
 
				-address is not in the text segment of the program then the empty list is returned.
			
 
				-<TT>fnbound</TT>
			
 
				-is used by
			
 
				-<TT>next</TT>
			
 
				-to detect stepping into new functions.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(fnbound(main))
			
 
				-{0x00001050, 0x000014b8}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The follow set is defined as the set of program counter values that could result
			
 
				-from executing an instruction.
			
 
				-<TT>follow</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as a text address, decodes the instruction at
			
 
				-that address and, with the current register set, builds a list of possible
			
 
				-next program counter values. If the instruction at the specified address
			
 
				-cannot be decoded
			
 
				-<TT>follow</TT>
			
 
				-raises an error.
			
 
				-<TT>follow</TT>
			
 
				-is used to plant breakpoints on
			
 
				-all potential paths of execution. The following code fragment
			
 
				-plants breakpoints on top of all potential following instructions.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-lst = follow(*PC);
			
 
				-while lst do
			
 
				-{
			
 
				-	*head lst = bpinst;
			
 
				-	lst = tail lst;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>include</TT>
			
 
				-opens the file specified by
			
 
				-<I>string</I>
			
 
				-and uses its contents as command input to the interpreter.
			
 
				-The interpreter restores input to its previous source when it encounters
			
 
				-either an end of file or an error.
			
 
				-<TT>include</TT>
			
 
				-can be used to incrementally load symbol table information without
			
 
				-leaving the interpreter.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: include("/sys/src/cmd/acme/syms")
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>interpret</TT>
			
 
				-evaluates the
			
 
				-<I>string</I>
			
 
				-expression and uses its result as command input for the interpreter.
			
 
				-The interpreter restores input to its previous source when it encounters
			
 
				-either the end of string or an error. The
			
 
				-<TT>interpret</TT>
			
 
				-function allows Acid programs to write Acid code for later evaluation.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: interpret("main+10;")
			
 
				-0x0000102a
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>itoa</TT>
			
 
				-takes an integer argument and converts it into an ASCII string
			
 
				-in the
			
 
				-<TT>D</TT>
			
 
				-format.
			
 
				-an alternate format string
			
 
				-may be provided in the
			
 
				-<TT>%</TT>
			
 
				-style of
			
 
				-<A href="/magic/man2html/2/print"><I>print</I>(2).
			
 
				-</A>This function is commonly used to build
			
 
				-<TT>rc</TT>
			
 
				-command lines.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: rc("cat /proc/"+itoa(pid)+"/segment")
			
 
				-Stack    7fc00000 80000000    1
			
 
				-Data     00001000 00009000    1
			
 
				-Data     00009000 0000a000    1
			
 
				-Bss      0000a000 0000c000    1
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>kill</TT>
			
 
				-writes a kill control message into the control file of the process
			
 
				-specified by the
			
 
				-<I>integer</I>
			
 
				-pid.
			
 
				-If the process was previously installed by
			
 
				-<TT>setproc</TT>
			
 
				-it will be removed from the list of active processes.
			
 
				-If the
			
 
				-<I>integer</I>
			
 
				-has the same value as
			
 
				-<TT>pid</TT>,
			
 
				-then
			
 
				-<TT>pid</TT>
			
 
				-will be set to 0.
			
 
				-To continue debugging, a new process must be selected using
			
 
				-<TT>setproc</TT>.
			
 
				-For example, to kill all the active processes:
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-while proclist do {
			
 
				-	kill(head proclist);
			
 
				-	proclist = tail proclist;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>map</TT>
			
 
				-either retrieves all the mappings associated with a process or sets a single
			
 
				-map entry to a new value.
			
 
				-If the
			
 
				-<I>list</I>
			
 
				-argument is omitted then
			
 
				-<TT>map</TT>
			
 
				-returns a list of lists. Each sublist has four values and describes a
			
 
				-single region of contiguous addresses in the
			
 
				-memory or file image of the debugged program. The first entry is the name of the
			
 
				-mapping. If the name begins with
			
 
				-<TT>*</TT>
			
 
				-it denotes a map into the memory of an active process.
			
 
				-The second and third values specify the base and end
			
 
				-address of the region and the fourth number specifies the offset in the file
			
 
				-corresponding to the first location of the region.
			
 
				-A map entry may be set by supplying a list in the same format as the sublist
			
 
				-described above. The name of the mapping must match a region already defined
			
 
				-by the current map.
			
 
				-Maps are set automatically for Plan 9 processes and some kernels; they may
			
 
				-need to be set by hand for other kernels and programs that run on bare hardware.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: map({"text", _start, end, 0x30})
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>match</TT>
			
 
				-compares each item in
			
 
				-<I>list</I>
			
 
				-using the equality operator
			
 
				-<TT>==</TT>
			
 
				-with
			
 
				-<I>item</I>.
			
 
				-The
			
 
				-<I>item</I>
			
 
				-can be of any type. If the match succeeds the result is the integer index
			
 
				-of the matching value, otherwise -1.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: list={8,9,10,11}
			
 
				-acid: print(list[match(10, list)]\D)
			
 
				-10
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>newproc</TT>
			
 
				-starts a new process with an argument vector constructed from
			
 
				-<I>string</I>.
			
 
				-The argument vector excludes the name of the program to execute and
			
 
				-each argument in
			
 
				-<I>string</I>
			
 
				-must be space separated. A new process can accept no more
			
 
				-than 512 arguments. The internal variable
			
 
				-<TT>pid</TT>
			
 
				-is set to the pid of the newly created process. The new pid
			
 
				-is also appended to the list of active processes stored in the variable
			
 
				-<TT>proclist</TT>.
			
 
				-The new process is created then halted at the first instruction, causing
			
 
				-the debugger to call
			
 
				-<TT>stopped</TT>.
			
 
				-The library functions
			
 
				-<TT>new</TT>
			
 
				-and
			
 
				-<TT>win</TT>
			
 
				-should be used to start processes when using the standard debugging
			
 
				-environment.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: newproc("-l .")
			
 
				-56720: system call	_main	ADD	-0x14,R29
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>pcfile</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as a text address in the debugged program. The address and symbol table
			
 
				-are used to generate a string containing the name of the source file
			
 
				-corresponding to the text address. If the address does not lie within the
			
 
				-program the string
			
 
				-<TT>?file?</TT>
			
 
				-is returned.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print("Now at ", pcfile(*PC), ":", pcline(*PC))
			
 
				-Now at ls.c:46 
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>pcline</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as a text address in the debugged program. The address and symbol table
			
 
				-are used to generate an integer containing the line number in the source file
			
 
				-corresponding to the text address. If the address does not lie within the
			
 
				-program the integer 0 is returned.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: +file("main.c")[pcline(main)]
			
 
				-main(int argc, char *argv[])
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>print</TT>
			
 
				-evaluates each
			
 
				-<I>item</I>
			
 
				-supplied in its argument list and prints it to standard output. Each
			
 
				-argument will be printed according to its associated format character.
			
 
				-When the interpreter is executing, output is buffered and flushed every
			
 
				-5000 statements or when the interpreter returns to interactive mode.
			
 
				-<TT>print</TT>
			
 
				-accepts a maximum of 512 arguments.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(10, "decimal ", 10\D, "octal ", 10\o)
			
 
				-0x0000000a decimal 10 octal 000000000012 
			
 
				-acid: print({1, 2, 3})
			
 
				-{0x00000001 , 0x00000002 , 0x00000003 }
			
 
				-acid: print(main, main\a, "\t", @main\i)
			
 
				-0x00001020 main	ADD	<I>-64,R29
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>printto</TT>
			
 
				-offers a limited form of output redirection. The first
			
 
				-<I>string</I>
			
 
				-argument is used as the path name of a new file to create.
			
 
				-Each
			
 
				-<I>item</I>
			
 
				-is then evaluated and printed to the newly created file. When all items
			
 
				-have been printed the file is closed.
			
 
				-<TT>printto</TT>
			
 
				-accepts a maximum of 512 arguments.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: printto("/env/foo", "hello")
			
 
				-acid: rc("echo -n foo")
			
 
				-hello
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>rc</TT>
			
 
				-evaluates
			
 
				-<I>string</I>
			
 
				-to form a shell command. A new command interpreter is started
			
 
				-to execute the command. The Acid interpreter blocks until the command
			
 
				-completes. The return value is the empty string
			
 
				-if the command succeeds, otherwise the exit status of the failed command.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: rc("B "+itoa(-pcline(addr))+" "+pcfile(addr));
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>readfile</TT>
			
 
				-takes the contents of the file specified by
			
 
				-<I>string</I>
			
 
				-and returns its contents as a new string.
			
 
				-If
			
 
				-<TT>readfile</TT>
			
 
				-encounters a zero byte in the file, it terminates.
			
 
				-If
			
 
				-<TT>readfile</TT>
			
 
				-encounters an error opening or reading the file then the empty list
			
 
				-is returned.
			
 
				-<TT>readfile</TT>
			
 
				-can be used to read the contents of device files whose lines are not
			
 
				-terminated with newline characters.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: ""+readfile("/dev/label")
			
 
				-helix
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>reason</TT>
			
 
				-uses machine-dependent information to generate a string explaining
			
 
				-why a process has stopped. The
			
 
				-<I>integer</I>
			
 
				-argument is the value of an architecture dependent status register,
			
 
				-for example
			
 
				-<TT>CAUSE</TT>
			
 
				-on the MIPS.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(reason(*CAUSE))
			
 
				-system call
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>regexp</TT>
			
 
				-matches the
			
 
				-<I>pattern</I>
			
 
				-string supplied as its first argument with the 
			
 
				-<I>string</I>
			
 
				-supplied as its second.
			
 
				-If the pattern matches the result is the value 1, otherwise 0.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(regexp(".*bar", "foobar"))
			
 
				-1
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>setproc</TT>
			
 
				-selects the default process used for memory and control operations. It effectively
			
 
				-shifts the focus of control between processes. The 
			
 
				-<I>integer</I>
			
 
				-argument specifies the pid of the process to look at.
			
 
				-The variable
			
 
				-<TT>pid</TT>
			
 
				-is set to the pid of the selected process. If the process is being
			
 
				-selected for the first time its pid is added to the list of active
			
 
				-processes
			
 
				-<TT>proclist</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: setproc(68382)
			
 
				-acid: procs()
			
 
				-&gt;68382: Stopped at main+0x4 setproc(68382)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>start</TT>
			
 
				-writes a
			
 
				-<TT>start</TT>
			
 
				-message to the control file of the process specified by the pid
			
 
				-supplied as its
			
 
				-<I>integer</I>
			
 
				-argument.
			
 
				-<TT>start</TT>
			
 
				-draws an error if the process is not in the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: start(68382)
			
 
				-acid: procs()
			
 
				-&gt;68382: Running at main+0x4 setproc(68382)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>startstop</TT>
			
 
				-performs the same actions as a call to
			
 
				-<TT>start</TT>
			
 
				-followed by a call to
			
 
				-<TT>stop</TT>.
			
 
				-The
			
 
				-<I>integer</I>
			
 
				-argument specifies the pid of the process to control. The process
			
 
				-must be in the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-Execution is restarted, the debugger then waits for the process to
			
 
				-return to the
			
 
				-<TT>Stopped</TT>
			
 
				-state. A process will stop if a startstop message has been written to its control
			
 
				-file and any of the following conditions becomes true: the process executes or returns from
			
 
				-a system call, the process generates a trap or the process receives a note.
			
 
				-<TT>startstop</TT>
			
 
				-is used to implement single stepping.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: startstop(pid)
			
 
				-75374: breakpoint	ls	ADD	<I>-0x16c8,R29
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>status</TT>
			
 
				-uses the pid supplied by its
			
 
				-<I>integer</I>
			
 
				-argument to generate a string describing the state of the process.
			
 
				-The string corresponds to the state returned by the
			
 
				-sixth column of the
			
 
				-<A href="/magic/man2html/1/ps"><I>ps</I>(1)
			
 
				-</A>command.
			
 
				-A process must be in the
			
 
				-<TT>Stopped</TT>
			
 
				-state to modify its memory or registers.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: ""+status(pid)
			
 
				-Stopped
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>stop</TT>
			
 
				-writes a
			
 
				-<TT>stop</TT>
			
 
				-message to the control file of the process specified by the
			
 
				-pid supplied as its
			
 
				-<I>integer</I>
			
 
				-argument.
			
 
				-The interpreter blocks until the debugged process enters the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-A process will stop if a stop message has been written to its control
			
 
				-file and any of the following conditions becomes true: the process executes or returns from
			
 
				-a system call, the process generates a trap, the process is scheduled or the
			
 
				-process receives a note.
			
 
				-<TT>stop</TT>
			
 
				-is used to wait for a process to halt before planting a breakpoint since Plan 9
			
 
				-only allows a process's memory to be written while it is in the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-defn bpset(addr) {
			
 
				-	if (status(pid)!="Stopped") then {
			
 
				-		print("Waiting...\n");
			
 
				-		stop(pid);
			
 
				-	}
			
 
				-	...
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>strace</TT>
			
 
				-generates a list of lists corresponding to procedures called by the debugged
			
 
				-program. Each sublist describes a single stack frame in the active process.
			
 
				-The first element is an
			
 
				-<I>integer</I>
			
 
				-of format
			
 
				-<TT>X</TT>
			
 
				-specifying the address of the called function. The second element is the value
			
 
				-of the program counter when the function was called. The third and fourth elements
			
 
				-contain lists of parameter and automatic variables respectively.
			
 
				-Each element of these lists
			
 
				-contains a string with the name of the variable and an
			
 
				-<I>integer</I>
			
 
				-value of format
			
 
				-<TT>X</TT>
			
 
				-containing the current value of the variable.
			
 
				-The arguments to
			
 
				-<TT>strace</TT>
			
 
				-are the current value of the program counter, the current value of the
			
 
				-stack pointer, and the address of the link register. All three parameters
			
 
				-must be integers.
			
 
				-The setting of 
			
 
				-<I>linkreg</I>
			
 
				-is architecture dependent. On the MIPS linkreg is set to the address of saved
			
 
				-<TT>R31</TT>,
			
 
				-on the SPARC to the address of saved
			
 
				-<TT>R15</TT>.
			
 
				-For the other architectures
			
 
				-<I>linkreg</I>
			
 
				-is not used, but must point to valid memory.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: print(strace(*PC, *SP, linkreg))
			
 
				-{{0x0000141c, 0xc0000f74,
			
 
				-{{"s", 0x0000004d}, {"multi", 0x00000000}}, 
			
 
				-{{"db", 0x00000000}, {"fd", 0x000010a4},
			
 
				-{"n", 0x00000001}, {"i", 0x00009824}}}}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>waitstop</TT>
			
 
				-writes a waitstop message to the control file of the process specified by the
			
 
				-pid supplied as its
			
 
				-<I>integer</I>
			
 
				-argument.
			
 
				-The interpreter will remain blocked until the debugged process enters the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-A process will stop if a waitstop message has been written to its control
			
 
				-file and any of the following conditions becomes true: the process generates a trap
			
 
				-or receives a note. Unlike
			
 
				-<TT>stop</TT>,
			
 
				-the
			
 
				-<TT>waitstop</TT>
			
 
				-function is passive; it does not itself cause the program to stop.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: waitstop(pid)
			
 
				-75374: breakpoint	ls	ADD	-0x16c8,R29
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<H4>Library Functions
			
 
				-</H4>
			
 
				-<P>
			
 
				-A standard debugging environment is provided by modules automatically
			
 
				-loaded when
			
 
				-Acid is started.
			
 
				-These modules are located in the directory
			
 
				-<TT>/sys/lib/acid</TT>.
			
 
				-These functions may be overridden, personalized, or added to by code defined in
			
 
				-<TT></TT><I>home/lib/acid</I><TT>.
			
 
				-The implementation of these functions can be examined using the
			
 
				-</TT><TT>whatis</TT><TT>
			
 
				-operator and then modified during debugging sessions.
			
 
				-
			
 
				-
			
 
				-
			
 
				-</P>
			
 
				-</TT><br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>Bsrc</TT>
			
 
				-interprets the
			
 
				-<I>integer</I>
			
 
				-argument as a text address. The text address is used to produce a pathname
			
 
				-and line number suitable for the
			
 
				-<TT>B</TT>
			
 
				-command
			
 
				-to send to the text editor
			
 
				-<A href="/magic/man2html/1/sam"><I>sam</I>(1)
			
 
				-</A>or
			
 
				-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
			
 
				-</A><TT>Bsrc</TT>
			
 
				-builds an
			
 
				-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
			
 
				-</A>command to invoke
			
 
				-<TT>B</TT>,
			
 
				-which either selects an existing source file or loads a new source file into the editor.
			
 
				-The line of source corresponding to the text address is then selected.
			
 
				-In the following example
			
 
				-<TT>stopped</TT>
			
 
				-is redefined so that the editor
			
 
				-follows and displays the source line currently being executed.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-defn stopped(pid) {
			
 
				-	pstop(pid);
			
 
				-	Bsrc(*PC);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-For machines equipped with floating point,
			
 
				-<TT>Fpr</TT>
			
 
				-displays the contents of the floating point registers as double precision
			
 
				-values.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: Fpr()
			
 
				-F0   0.	F2   0.
			
 
				-F4   0.	F6   0.
			
 
				-F8   0.	F10  0.
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>Ureg</TT>
			
 
				-interprets the integer passed as its first argument as the address of a
			
 
				-kernel
			
 
				-<TT>Ureg</TT>
			
 
				-structure. Each element of the structure is retrieved and printed.
			
 
				-The size and contents of the
			
 
				-<TT>Ureg</TT>
			
 
				-structure are architecture dependent.
			
 
				-This function can be used to decode the first argument passed to a
			
 
				-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
			
 
				-</A>function after a process has received a note.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: Ureg(*notehandler:ur)
			
 
				-	status	0x3000f000
			
 
				-	pc	0x1020
			
 
				-	sp	0x7ffffe00
			
 
				-	cause	0x00004002
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>acidinit</TT>
			
 
				-is called by the interpreter after all
			
 
				-modules have been loaded at initialization time.
			
 
				-It is used to set up machine specific variables and the default source path.
			
 
				-<TT>acidinit</TT>
			
 
				-should not be called by user code.
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>addsrcdir</TT>
			
 
				-interprets its string argument as a new directory
			
 
				-<TT>findsrc</TT>
			
 
				-should search when looking for source code files.
			
 
				-<TT>addsrcdir</TT>
			
 
				-draws an error if the directory is already in the source search path. The search
			
 
				-path may be examined by looking at the variable
			
 
				-<TT>srcpath</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: rc("9fs fornax")
			
 
				-acid: addsrcpath("/n/fornax/sys/src/cmd")
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>asm</TT>
			
 
				-interprets its integer argument as a text address from which to disassemble
			
 
				-machine instructions.
			
 
				-<TT>asm</TT>
			
 
				-prints the instruction address in symbolic and hexadecimal form, then prints
			
 
				-the instructions with addressing modes. Up to twenty instructions will
			
 
				-be disassembled.
			
 
				-<TT>asm</TT>
			
 
				-stops disassembling when it reaches the end of the current function.
			
 
				-Instructions are read from the file image using the
			
 
				-<TT>@</TT>
			
 
				-operator.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: asm(main)
			
 
				-main     0x00001020 ADD    -0x64,R29
			
 
				-main+0x4 0x00001024 MOVW   R31,0x0(R29)
			
 
				-main+0x8 0x00001028 MOVW   R1,argc+4(FP)
			
 
				-main+0xc 0x0000102c MOVW   <I>bin(SB),R1
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>bpdel</TT>
			
 
				-removes a previously set breakpoint from memory.
			
 
				-The
			
 
				-<I>integer</I>
			
 
				-supplied as its argument must be the address of a previously set breakpoint.
			
 
				-The breakpoint address is deleted from the active breakpoint list
			
 
				-<TT>bplist</TT>,
			
 
				-then the original instruction is copied from the file image to the memory
			
 
				-image so that the breakpoint is removed.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: bpdel(main+4)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>bpset</TT>
			
 
				-places a breakpoint instruction at the address specified
			
 
				-by its
			
 
				-<I>integer</I>
			
 
				-argument, which must be in the text segment.
			
 
				-<TT>bpset</TT>
			
 
				-draws an error if a breakpoint has already been set at the specified address.
			
 
				-A list of current breakpoints is maintained in the variable
			
 
				-<TT>bplist</TT>.
			
 
				-Unlike in
			
 
				-<A href="/magic/man2html/1/db"><I>db</I>(1),
			
 
				-</A>breakpoints are left in memory even when a process is stopped, and
			
 
				-the process must exist, perhaps by being
			
 
				-created by either
			
 
				-<TT>new</TT>
			
 
				-or
			
 
				-<TT>win</TT>,
			
 
				-in order to place a breakpoint.
			
 
				-(<TT>Db</TT>
			
 
				-accepts breakpoint commands before the process is started.)
			
 
				-On the
			
 
				-MIPS and SPARC architectures,
			
 
				-breakpoints at function entry points should be set 4 bytes into the function
			
 
				-because the
			
 
				-instruction scheduler may fill
			
 
				-<TT>JAL</TT>
			
 
				-branch delay slots with the first instruction of the function.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: bpset(main+4)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>bptab</TT>
			
 
				-prints a list of currently installed breakpoints. The list contains the
			
 
				-breakpoint address in symbolic and hexadecimal form as well as the instruction
			
 
				-the breakpoint replaced. Breakpoints are not maintained across process creation
			
 
				-using
			
 
				-<TT>new</TT>
			
 
				-and
			
 
				-<TT>win</TT>.
			
 
				-They are maintained across a fork, but care must be taken to keep control of
			
 
				-the child process.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: bpset(ls+4)
			
 
				-acid: bptab()
			
 
				-	0x00001420 ls+0x4  MOVW	R31,0x0(R29)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>casm</TT>
			
 
				-continues to disassemble instructions from where the last
			
 
				-<TT>asm</TT>
			
 
				-or
			
 
				-<TT>casm</TT>
			
 
				-command stopped. Like
			
 
				-<TT>asm</TT>,
			
 
				-this command stops disassembling at function boundaries.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: casm()
			
 
				-main+0x10 0x00001030	MOVW	0x1,R3
			
 
				-main+0x14 0x00001034	MOVW	R3,0x8(R29)
			
 
				-main+0x18 0x00001038	MOVW	<I>0x1,R5
			
 
				-main+0x1c 0x0000103c	JAL	Binit(SB)
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>cont</TT>
			
 
				-restarts execution of the currently active process.
			
 
				-If the process is stopped on a breakpoint, the breakpoint is first removed,
			
 
				-the program is single stepped, the breakpoint is replaced and the program
			
 
				-is then set executing. This may cause
			
 
				-<TT>stopped()</TT>
			
 
				-to be called twice.
			
 
				-<TT>cont</TT>
			
 
				-causes the interpreter to block until the process enters the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: cont()
			
 
				-95197: breakpoint	ls+0x4	MOVW	R31,0x0(R29)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>dump</TT>
			
 
				-interprets its first argument as an address, its second argument as a
			
 
				-count and its third as a format string.
			
 
				-<TT>dump</TT>
			
 
				-fetches an object from memory at the current address and prints it according
			
 
				-to the format. The address is incremented by the number of bytes specified by
			
 
				-the format and the process is repeated count times. The format string is any
			
 
				-combination of format characters, each preceded by an optional count.
			
 
				-For each object,
			
 
				-<TT>dump</TT>
			
 
				-prints the address in hexadecimal, a colon, the object and then a newline.
			
 
				-<TT>dump</TT>
			
 
				-uses
			
 
				-<TT>mem</TT>
			
 
				-to fetch each object.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: dump(main+35, 4, "X2bi")
			
 
				-0x00001043: 0x0c8fa700 108 143 lwc2 r0,0x528f(R4) 
			
 
				-0x0000104d: 0xa9006811   0   0 swc3 r0,0x0(R24) 
			
 
				-0x00001057: 0x2724e800   4  37 ADD  -0x51,R23,R31 
			
 
				-0x00001061: 0xa200688d   6   0 NOOP
			
 
				-0x0000106b: 0x2710c000   7   0 BREAK
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>findsrc</TT>
			
 
				-interprets its
			
 
				-<I>string</I>
			
 
				-argument as a source file. Each directory in the source path is searched
			
 
				-in turn for the file. If the file is found, the source text is loaded using
			
 
				-<TT>file</TT>
			
 
				-and stored in the list of active source files called
			
 
				-<TT>srctext</TT>.
			
 
				-The name of the file is added to the source file name list
			
 
				-<TT>srcfiles</TT>.
			
 
				-Users are unlikely to call
			
 
				-<TT>findsrc</TT>
			
 
				-from the command line, but may use it from scripts to preload source files
			
 
				-for a debugging session. This function is used by
			
 
				-<TT>src</TT>
			
 
				-and
			
 
				-<TT>line</TT>
			
 
				-to locate and load source code. The default search path for the MIPS
			
 
				-is
			
 
				-<TT>./</TT>,
			
 
				-<TT>/sys/src/libc/port</TT>,
			
 
				-<TT>/sys/src/libc/9sys</TT>,
			
 
				-<TT>/sys/src/libc/mips</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: findsrc(pcfile(main));
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-For machines equipped with floating point,
			
 
				-<TT>fpr</TT>
			
 
				-displays the contents of the floating point registers as single precision
			
 
				-values. When the interpreter stores or manipulates floating point values
			
 
				-it converts into double precision values.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: fpr()
			
 
				-F0   0.	F1   0.
			
 
				-F2   0.	F3   0.
			
 
				-F4   0.	F5   0.
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>func</TT>
			
 
				-single steps the active process until it leaves the current function
			
 
				-by either calling another function or returning to its caller.
			
 
				-<TT>func</TT>
			
 
				-will execute a single instruction after leaving the current function.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: func()
			
 
				-95197: breakpoint	ls+0x8	MOVW	R1,R8
			
 
				-95197: breakpoint	ls+0xc	MOVW	R8,R1
			
 
				-95197: breakpoint	ls+0x10	MOVW	R8,s+4(FP)
			
 
				-95197: breakpoint	ls+0x14	MOVW	<I>0x2f,R5
			
 
				-95197: breakpoint	ls+0x18	JAL	utfrrune(SB)
			
 
				-95197: breakpoint	utfrrune	ADD	</I>-0x18,R29
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>gpr</TT>
			
 
				-prints the values of the general purpose processor registers.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: gpr()
			
 
				-R1	0x00009562 R2	0x000010a4 R3	0x00005d08
			
 
				-R4	0x0000000a R5	0x0000002f R6	0x00000008
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>labstk</TT>
			
 
				-performs a stack trace from a Plan 9
			
 
				-<I>label.</I>
			
 
				-The kernel,
			
 
				-C compilers store continuations in a common format. Since the
			
 
				-compilers all use caller save conventions a continuation may be saved by
			
 
				-storing a
			
 
				-<TT>PC</TT>
			
 
				-and
			
 
				-<TT>SP</TT>
			
 
				-pair. This data structure is called a label and is used by the
			
 
				-the C function
			
 
				-<TT>longjmp</TT>
			
 
				-and the kernel to schedule threads and processes.
			
 
				-<TT>labstk</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as the address of a label and produces a stack trace for
			
 
				-the thread of execution. The value of the function
			
 
				-<TT>ALEF_tid</TT>
			
 
				-is a suitable argument for
			
 
				-<TT>labstk</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: labstk(*mousetid)
			
 
				-At pc:0x00021a70:Rendez_Sleep+0x178 rendez.l:44
			
 
				-Rendez_Sleep(r=0xcd7d8,bool=0xcd7e0,t=0x0) rendez.l:5
			
 
				-	called from ALEF_rcvmem+0x198 recvmem.l:45
			
 
				-ALEF_rcvmem(c=0x000cd764,l=0x00000010) recvmem.l:6
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>lstk</TT>
			
 
				-produces a long format stack trace.
			
 
				-The stack trace includes each function in the stack,
			
 
				-where it was called from, and the value of the parameters and automatic
			
 
				-variables for each function.
			
 
				-<TT>lstk</TT>
			
 
				-displays the value rather than the address of each variable and all
			
 
				-variables are assumed to be an integer in format
			
 
				-<TT>X</TT>.
			
 
				-To print a variable in its correct format use the
			
 
				-<TT>:</TT>
			
 
				-operator to find the address and apply the appropriate format before indirection
			
 
				-with the
			
 
				-<TT>*</TT>
			
 
				-operator. It may be necessary to single step a couple of instructions into
			
 
				-a function to get a correct stack trace because the frame pointer adjustment
			
 
				-instruction may get scheduled down into the body of the function.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: lstk()
			
 
				-At pc:0x00001024:main+0x4 ls.c:48
			
 
				-main(argc=0x00000001,argv=0x7fffefec) ls.c:48
			
 
				-	called from _main+0x20 main9.s:10
			
 
				-	_argc=0x00000000
			
 
				-	_args=0x00000000
			
 
				-	fd=0x00000000
			
 
				-	buf=0x00000000
			
 
				-	i=0x00000000
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>mem</TT>
			
 
				-interprets its first
			
 
				-<I>integer</I>
			
 
				-argument as the address of an object to be printed according to the
			
 
				-format supplied in its second
			
 
				-<I>string</I>
			
 
				-argument.
			
 
				-The format string can be any combination of format characters, each preceded
			
 
				-by an optional count.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: mem(bdata+0x326, "2c2Xb")
			
 
				-P = 0xa94bc464 0x3e5ae44d  19 
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>new</TT>
			
 
				-starts a new copy of the debugged program. The new program is started
			
 
				-with the program arguments set by the variable
			
 
				-<TT>progargs</TT>.
			
 
				-The new program is stopped in the second instruction of
			
 
				-<TT>main</TT>.
			
 
				-The breakpoint list is reinitialized.
			
 
				-<TT>new</TT>
			
 
				-may be used several times to instantiate several copies of a program
			
 
				-simultaneously. The user can rotate between the copies using
			
 
				-<TT>setproc</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: progargs="-l"
			
 
				-acid: new()
			
 
				-60: external interrupt	_main	ADD	<I>-0x14,R29
			
 
				-60: breakpoint	main+0x4	MOVW	R31,0x0(R29)
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>next</TT>
			
 
				-steps through a single language level statement without tracing down
			
 
				-through each statement in a called function. For each statement,
			
 
				-<TT>next</TT>
			
 
				-prints the machine instructions executed as part of the statement. After
			
 
				-the statement has executed, source lines around the current program
			
 
				-counter are displayed.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: next()
			
 
				-60: breakpoint	Binit+0x4 MOVW	R31,0x0(R29)
			
 
				-60: breakpoint	Binit+0x8 MOVW	f+8(FP),R4
			
 
				-binit.c:93
			
 
				- 88	
			
 
				- 89	int
			
 
				- 90	Binit(Biobuf *bp, int f, int mode)
			
 
				- 91	{
			
 
				-&gt;92		return Binits(bp, f, mode, bp-&gt;b, BSIZE);
			
 
				- 93	}
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>notestk</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as the address of a
			
 
				-<TT>Ureg</TT>
			
 
				-structure passed by the kernel to a
			
 
				-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
			
 
				-</A>function during note processing.
			
 
				-<TT>notestk</TT>
			
 
				-uses the
			
 
				-<TT>PC</TT>,
			
 
				-<TT>SP</TT>,
			
 
				-and link register from the
			
 
				-<TT>Ureg</TT>
			
 
				-to print a stack trace corresponding to the point in the program where the note
			
 
				-was received.
			
 
				-To get a valid stack trace on the MIPS and SPARC architectures from a notify
			
 
				-routine, the program must stop in a new function called from the notify routine
			
 
				-so that the link register is valid and the notify routine's parameters are
			
 
				-addressable.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: notestk(*notify:ur)
			
 
				-Note pc:0x00001024:main+0x4 ls.c:48
			
 
				-main(argc=0x00000001,argv=0x7fffefec) ls.c:48
			
 
				-	called from _main+0x20 main9.s:10
			
 
				-	_argc=0x00000000
			
 
				-	_args=0x00000000
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>pfl</TT>
			
 
				-interprets its argument as a text address and uses it to print
			
 
				-the source file and line number corresponding to the address. The output
			
 
				-has the same format as file addresses in
			
 
				-<A href="/magic/man2html/1/acme"><I>acme</I>(1).
			
 
				-</A><DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: pfl(main)
			
 
				-ls.c:48
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>procs</TT>
			
 
				-prints a list of active process attached to the debugger. Each process
			
 
				-produces a single line of output giving the pid, process state, the address
			
 
				-the process is currently executing, and the
			
 
				-<TT>setproc</TT>
			
 
				-command required to make that process current.
			
 
				-The current process is marked in the first column with a
			
 
				-<TT>&gt;</TT>
			
 
				-character. The debugger maintains a list of processes in the variable
			
 
				-<TT>proclist</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: procs()
			
 
				-&gt;62: Stopped at main+0x4 setproc(62)
			
 
				- 60: Stopped at Binit+0x8 setproc(60)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>pstop</TT>
			
 
				-prints the status of the process specified by the
			
 
				-<I>integer</I>
			
 
				-pid supplied as its argument.
			
 
				-<TT>pstop</TT>
			
 
				-is usually called from
			
 
				-<TT>stopped</TT>
			
 
				-every time a process enters the
			
 
				-<TT>Stopped</TT>
			
 
				-state.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: pstop(62)
			
 
				-0x0000003e: breakpoint	main+0x4	MOVW	R31,0x0(R29)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>regs</TT>
			
 
				-prints the contents of both the general and special purpose registers.
			
 
				-<TT>regs</TT>
			
 
				-calls
			
 
				-<TT>spr</TT>
			
 
				-then
			
 
				-<TT>gpr</TT>
			
 
				-to display the contents of the registers.
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>source</TT>
			
 
				-prints the directory search path followed by a list of currently loaded
			
 
				-source files. The source management functions
			
 
				-<TT>src</TT>
			
 
				-and
			
 
				-<TT>findsrc</TT>
			
 
				-use the search path to locate and load source files. Source files are
			
 
				-loaded incrementally into a source data base during debugging. A list
			
 
				-of loaded files is stored in the variable
			
 
				-<TT>srcfiles</TT>
			
 
				-and the contents of each source file in the variable
			
 
				-<TT>srctext</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: source()
			
 
				-/n/bootes/sys/src/libbio/
			
 
				-/sys/src/libc/port/
			
 
				-/sys/src/libc/9sys/
			
 
				-/sys/src/libc/mips/
			
 
				-	binit.c
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>spr</TT>
			
 
				-prints the contents of the processor control and memory management
			
 
				-registers. Where possible, the contents of the registers are decoded
			
 
				-to provide extra information; for example the
			
 
				-<TT>CAUSE</TT>
			
 
				-register on the MIPS is
			
 
				-printed both in hexadecimal and using the
			
 
				-<TT>reason</TT>
			
 
				-function.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: spr()
			
 
				-PC	0x00001024 main+0x4  ls.c:48
			
 
				-SP	0x7fffef68 LINK	0x00006264 _main+0x28 main9.s:12
			
 
				-STATUS	0x0000ff33 CAUSE	0x00000024 breakpoint
			
 
				-TLBVIR	0x000000d3 BADVADR	0x00001020
			
 
				-HI	0x00000004 LO		0x00001ff7
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>src</TT>
			
 
				-interprets its
			
 
				-<I>integer</I>
			
 
				-argument as a text address and uses this address to print 5 lines
			
 
				-of source before and after the address. The current line is marked with a
			
 
				-<TT>&gt;</TT>
			
 
				-character.
			
 
				-<TT>src</TT>
			
 
				-uses the source search path maintained by
			
 
				-<TT>source</TT>
			
 
				-and
			
 
				-<TT>addsrcdir</TT>
			
 
				-to locate the required source files.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: src(*PC)
			
 
				-ls.c:47
			
 
				- 42	Biobuf	bin;
			
 
				- 43	
			
 
				- 44	#define		HUNK	50
			
 
				- 45	
			
 
				- 46	void
			
 
				-&gt;47	main(int argc, char *argv[])
			
 
				- 48	{
			
 
				- 49		int i, fd;
			
 
				- 50		char buf[64];
			
 
				- 51	
			
 
				- 52		Binit(&amp;bin, 1, OWRITE);
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>step</TT>
			
 
				-causes the debugged process to execute a single machine level instruction.
			
 
				-If the program is stopped on a breakpoint set by
			
 
				-<TT>bpset</TT>
			
 
				-it is first removed, the single step executed, and the breakpoint replaced.
			
 
				-<TT>step</TT>
			
 
				-uses
			
 
				-<TT>follow</TT>
			
 
				-to predict the address of the program counter after the current instruction
			
 
				-has been executed. A breakpoint is placed at each of these predicted addresses
			
 
				-and the process is started. When the process stops the breakpoints are removed.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: step()
			
 
				-62: breakpoint	main+0x8	MOVW	R1,argc+4(FP)
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>stk</TT>
			
 
				-produces a short format stack trace. The stack trace includes each function
			
 
				-in the stack, where it was called from, and the value of the parameters.
			
 
				-The short format omits the values of automatic variables.
			
 
				-Parameters are assumed to be integer values in the format
			
 
				-<TT>X</TT>;
			
 
				-to print a parameter in the correct format use the
			
 
				-<TT>:</TT>
			
 
				-to obtain its address, apply the correct format, and use the
			
 
				-<TT>*</TT>
			
 
				-indirection operator to find its value.
			
 
				-It may be necessary to single step a couple of instructions into
			
 
				-a function to get a correct stack trace because the frame pointer adjustment
			
 
				-instruction may get scheduled down into the body of the function.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: stk()
			
 
				-At pc:0x00001028:main+0x8 ls.c:48
			
 
				-main(argc=0x00000002,argv=0x7fffefe4) ls.c:48
			
 
				-	called from _main+0x20 main9.s:10
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>stmnt</TT>
			
 
				-executes a single language level statement.
			
 
				-<TT>stmnt</TT>
			
 
				-displays each machine level instruction as it is executed. When the executed
			
 
				-statement is completed the source for the next statement is displayed.
			
 
				-Unlike
			
 
				-<TT>next</TT>,
			
 
				-the
			
 
				-<TT>stmnt</TT>
			
 
				-function will trace down through function calls.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: stmnt()
			
 
				-62: breakpoint	main+0x18 MOVW	R5,0xc(R29)
			
 
				-62: breakpoint	main+0x1c JAL	Binit(SB)
			
 
				-62: breakpoint	Binit     ADD	-0x18,R29
			
 
				-binit.c:91
			
 
				- 89	int
			
 
				- 90	Binit(Biobuf *bp, int f, int mode)
			
 
				-&gt;91	{
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>stopped</TT>
			
 
				-is called automatically by the interpreter
			
 
				-every time a process enters the
			
 
				-<TT>Stopped</TT>
			
 
				-state, such as when it hits a breakpoint.
			
 
				-The pid is passed as the
			
 
				-<I>integer</I>
			
 
				-argument.  The default implementation just calls
			
 
				-<TT>pstop</TT>,
			
 
				-but the function may be changed to provide more information or perform fine control
			
 
				-of execution.  Note that
			
 
				-<TT>stopped</TT>
			
 
				-should return; for example, calling
			
 
				-<TT>step</TT>
			
 
				-in
			
 
				-<TT>stopped</TT>
			
 
				-will recur until the interpreter runs out of stack space.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: defn stopped(pid) {
			
 
				-	if *lflag != 0 then error("lflag modified");
			
 
				-	}
			
 
				-acid: progargs = "-l"
			
 
				-acid: new();
			
 
				-acid: while 1 do step();
			
 
				-&lt;stdin&gt;:7: (error) lflag modified
			
 
				-acid: stk()
			
 
				-At pc:0x00001220:main+0x200 ls.c:54
			
 
				-main(argc=0x00000001,argv=0x7fffffe8) ls.c:48
			
 
				-	called from _main+0x20 main9.s:10
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>symbols</TT>
			
 
				-uses the regular expression supplied by
			
 
				-<I>string</I>
			
 
				-to search the symbol table for symbols whose name matches the
			
 
				-regular expression.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: symbols("main")
			
 
				-main	T	0x00001020
			
 
				-_main	T	0x0000623c
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-
			
 
				-
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<TT>win</TT>
			
 
				-performs exactly the same function as
			
 
				-<TT>new</TT>
			
 
				-but uses the window system to create a new window for the debugged process.
			
 
				-The variable
			
 
				-<TT>progargs</TT>
			
 
				-supplies arguments to the new process.
			
 
				-The environment variable
			
 
				-<TT></TT><I>8&#189;srv</I><TT>
			
 
				-must be set to allow the interpreter to locate the mount channel for the
			
 
				-window system.
			
 
				-The window is created in the top left corner of the screen and is
			
 
				-400x600 pixels in size. The
			
 
				-</TT><TT>win</TT><TT>
			
 
				-function may be modified to alter the geometry.
			
 
				-The window system will not be able to deliver notes in the new window
			
 
				-since the pid of the created process is not passed when the server is
			
 
				-mounted to create a new window.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-</TT><DL><DT><DD><TT><PRE>
			
 
				-<br>
			
 
				-acid: win()
			
 
				-</PRE></TT></DL>
			
 
				-<br>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/acidpaper.html
+++ b/sys/doc/acidpaper.html
@@ -1,1368 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Acid: A Debugger Built From A Language
			
 
				-</H1>
			
 
				-<DL><DD><I>Phil Winterbottom<br>
			
 
				-philw@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Originally appeared in
			
 
				-Proc. of the Winter 1994 USENIX Conf.,
			
 
				-pp. 211-222,
			
 
				-San Francisco, CA
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-Acid is an unusual source-level symbolic debugger for Plan 9. It is implemented
			
 
				-as a language interpreter with specialized primitives that provide
			
 
				-debugger support.  Programs written in the language manipulate
			
 
				-one or more target processes; variables in the language represent the
			
 
				-symbols, state, and resources of those processes. 
			
 
				-This structure allows complex
			
 
				-interaction between the debugger and the target program and
			
 
				-provides a convenient method of parameterizing differences between
			
 
				-machine architectures.
			
 
				-Although some effort is required to learn
			
 
				-the debugging language, the richness and flexibility of the
			
 
				-debugging environment encourages new ways of reasoning about the way
			
 
				-programs run and the conditions under which they fail.
			
 
				-</DL>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-The size and complexity
			
 
				-of programs have increased in proportion to processor speed and memory but
			
 
				-the interface between debugger and programmer has changed little.
			
 
				-Graphical user interfaces have eased some of the tedious
			
 
				-aspects of the interaction. A graphical interface is a convenient
			
 
				-means for navigating through source and data structures but provides
			
 
				-little benefit for process control.
			
 
				-The introduction of a new concurrent language, Alef [Win93], emphasized the
			
 
				-inadequacies of the existing Plan 9 [Pike90] debugger
			
 
				-<I>db</I>,
			
 
				-a distant relative of
			
 
				-<I>adb</I>,
			
 
				-and made it clear that a new debugger was required.
			
 
				-</P>
			
 
				-<P>
			
 
				-Current debuggers like
			
 
				-<I>dbx</I>,
			
 
				-<I>sdb</I>,
			
 
				-and
			
 
				-<I>gdb</I>
			
 
				-are limited to answering only the questions their authors
			
 
				-envisage.  As a result, they supply a plethora
			
 
				-of specialized commands, each attempting to anticipate
			
 
				-a specific question a user may ask.
			
 
				-When a debugging situation arises that is beyond the scope
			
 
				-of the command set, the tool is useless.
			
 
				-Further,
			
 
				-it is often tedious or impossible to reproduce an anomalous state
			
 
				-of the program, especially when
			
 
				-the state is embedded in the program's data structures.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid applies some ideas found in CAD software used for
			
 
				-hardware test and simulation.
			
 
				-It is based on the notion that the state and resources of a program
			
 
				-are best represented and manipulated by a language. The state and resources,
			
 
				-such as memory, registers, variables, type information and source code
			
 
				-are represented by variables in the language.
			
 
				-Expressions provide a computation mechanism and control
			
 
				-statements allow repetitive or selective interpretation based
			
 
				-on the result of expression evaluation.
			
 
				-The heart of the Acid debugger is an interpreter for a small typeless
			
 
				-language whose operators mirror the operations
			
 
				-of C and Alef, which in turn correspond well to the basic operations of
			
 
				-the machine. The interpreter itself knows nothing of the underlying
			
 
				-hardware; it deals with the program state and resources
			
 
				-in the abstract.
			
 
				-Fundamental routines to control
			
 
				-processes, read files, and interface to the system are implemented
			
 
				-as builtin functions available to the interpreter.
			
 
				-The actual debugger functionality is coded
			
 
				-in Acid; commands are implemented as Acid functions.
			
 
				-</P>
			
 
				-<P>
			
 
				-This language-based approach has several advantages.
			
 
				-Most importantly, programs written in Acid, including most of the
			
 
				-debugger itself, are inherently portable.
			
 
				-Furthermore, Acid avoids the limitations other debuggers impose when
			
 
				-debugging parallel programs.  Instead of embedding a fixed
			
 
				-process model in the debugger, Acid allows the
			
 
				-programmer to adapt the debugger to handle an
			
 
				-arbitrary process partitioning or program structure. 
			
 
				-The ability to
			
 
				-interact dynamically with an executing process provides clear advantages
			
 
				-over debuggers constrained to probe a static image.
			
 
				-Finally, the Acid language is a powerful vehicle for expressing
			
 
				-assertions about logic, process state, and the contents of data structures.
			
 
				-When combined with dynamic interaction it allows a
			
 
				-limited form of automated program verification without requiring
			
 
				-modification or recompilation of the source code.
			
 
				-The language is also an
			
 
				-excellent vehicle for preserving a test suite for later regression testing.
			
 
				-</P>
			
 
				-<P>
			
 
				-The debugger may be customized by its users; standard
			
 
				-functions may be modified or extended to suit a particular application
			
 
				-or preference.
			
 
				-For example, the kernel developers in our group require a
			
 
				-command set supporting assembler-level debugging while the application
			
 
				-programmers prefer source-level functionality.
			
 
				-Although the default library is biased toward assembler-level debugging,
			
 
				-it is easily modified to provide a convenient source-level interface.
			
 
				-The debugger itself does not change; the user combines primitives
			
 
				-and existing Acid functions in different ways to
			
 
				-implement the desired interface.
			
 
				-</P>
			
 
				-<H4>2 Related Work
			
 
				-</H4>
			
 
				-<P>
			
 
				-DUEL [Gol93], an extension to
			
 
				-<I>gdb</I>
			
 
				-[Stal91], proposes using a high level expression evaluator to solve
			
 
				-some of these problems. The evaluator provides iterators to loop over data
			
 
				-structures and conditionals to control evaluation of expressions.
			
 
				-The author shows that complex state queries can be formulated
			
 
				-by combining concise expressions but this only addresses part of the problem.
			
 
				-A program is a dynamic entity; questions asked when the program is in
			
 
				-a static state are meaningful only after the program has been `caught' in
			
 
				-that state. The framework for manipulating the program is still as
			
 
				-primitive as the underlying debugger. While DUEL provides a means to
			
 
				-probe data structures it entirely neglects the most beneficial aspect
			
 
				-of debugging languages: the ability to control processes. Acid is structured
			
 
				-around a thread of control that passes between the interpreter and the
			
 
				-target program.
			
 
				-</P>
			
 
				-<P>
			
 
				-The NeD debugger [May92] is a set of extensions to TCL [Ous90] that provide
			
 
				-debugging primitives. The resulting language, NeDtcl, is used to implement
			
 
				-a portable interface between a conventional debugger, pdb [May90], and
			
 
				-a server that executes NeDtcl programs operating on the target program.
			
 
				-Execution of the NeDtcl programs implements the debugging primitives
			
 
				-that pdb expects.
			
 
				-NeD is targeted at multi-process debugging across a network,
			
 
				-and proves the flexibility of a language as a means of
			
 
				-communication between debugging tools. Whereas NeD provides an interface
			
 
				-between a conventional debugger and the process it debugs, Acid is the
			
 
				-debugger itself. While NeD has some of the ideas
			
 
				-found in Acid it is targeted toward a different purpose. Acid seeks to
			
 
				-integrate the manipulation of a program's resources into the debugger
			
 
				-while NeD provides a flexible interconnect between components of
			
 
				-the debugging environment. The choice of TCL is appropriate for its use
			
 
				-in NeD but is not suitable for Acid. Acid relies on the coupling of the type
			
 
				-system with expression evaluation, which are the root of its design,
			
 
				-to provide the debugging primitives.
			
 
				-</P>
			
 
				-<P>
			
 
				-Dalek [Ols90] is an event based language extension to gdb. State transitions
			
 
				-in the target program cause events to be queued for processing by the
			
 
				-debugging language.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid has many of the advantages of same process or
			
 
				-<I>local</I>
			
 
				-<I>agent</I>
			
 
				-debuggers, like Parasight [Aral], without the need for dynamic linking or
			
 
				-shared memory.
			
 
				-Acid improves on the ideas of these other systems by completely integrating
			
 
				-all aspects of the debugging process into the language environment. Of
			
 
				-particular importance is the relationship between Acid variables,
			
 
				-program symbols, source code, registers and type information. This
			
 
				-integration is made possible by the design of the Acid language.
			
 
				-</P>
			
 
				-<P>
			
 
				-Interpreted languages such as Lisp and Smalltalk are able to provide
			
 
				-richer debugging environments through more complete information than
			
 
				-their compiled counterparts. Acid is a means to gather and represent
			
 
				-similar information about compiled programs through cooperation
			
 
				-with the compilation tools and library implementers.
			
 
				-</P>
			
 
				-<H4>3 Acid the Language
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid is a small interpreted language targeted to its debugging task.
			
 
				-It focuses on representing program state and addressing data rather than
			
 
				-expressing complex computations. Program state is
			
 
				-<I>addressable</I>
			
 
				-from an Acid program.
			
 
				-In addition to parsing and executing expressions and providing
			
 
				-an architecture-independent interface to the target process,
			
 
				-the interpreter supplies a mark-and-scan garbage collector
			
 
				-to manage storage.
			
 
				-</P>
			
 
				-<P>
			
 
				-Every Acid session begins with the loading of the Acid libraries.
			
 
				-These libraries contain functions, written in Acid, that provide
			
 
				-a standard debugging environment including breakpoint management,
			
 
				-stepping by instruction or statement, stack tracing, and
			
 
				-access to variables, memory, and registers.
			
 
				-The library contains 600 lines of Acid code and provides
			
 
				-functionality similar to
			
 
				-<I>dbx</I>.
			
 
				-Following the loading of the system library, Acid loads
			
 
				-user-specified libraries; this load sequence allows the
			
 
				-user to augment or override the standard commands
			
 
				-to customize the debugging environment.  When all libraries
			
 
				-are loaded, Acid issues an interactive prompt and begins
			
 
				-evaluating expressions entered by the user.  The Acid `commands'
			
 
				-are actually invocations of builtin primitives or previously defined
			
 
				-Acid functions. Acid evaluates each expression as it is entered and
			
 
				-prints the result.
			
 
				-</P>
			
 
				-<H4>4 Types and Variables
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid variables are of four basic types:
			
 
				-<I>integer</I>,
			
 
				-<I>string</I>,
			
 
				-<I>float</I>,
			
 
				-and
			
 
				-<I>list</I>.
			
 
				-The type of a variable is inferred by the type of the right-hand side of
			
 
				-an assignment expression.
			
 
				-Many of the operators can be applied to more than
			
 
				-one type; for these operators the action of the operator is determined
			
 
				-by the type of its operands.
			
 
				-For example,
			
 
				-the
			
 
				-<TT>+</TT>
			
 
				-operator adds
			
 
				-<I>integer</I>
			
 
				-and
			
 
				-<I>float</I>
			
 
				-operands, and concatenates
			
 
				-<I>string</I>
			
 
				-and
			
 
				-<I>list</I>
			
 
				-operands.
			
 
				-Lists are the only complex type in Acid; there are no arrays, structures
			
 
				-or pointers. Operators provide
			
 
				-<TT>head</TT>,
			
 
				-<TT>tail</TT>,
			
 
				-<TT>append</TT>
			
 
				-and
			
 
				-<TT>delete</TT>
			
 
				-operations.
			
 
				-Lists can also be indexed like arrays.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid has two levels of scope: global and local.
			
 
				-Function parameters and variables declared in a function body
			
 
				-using the
			
 
				-<TT>local</TT>
			
 
				-keyword are created at entry to the function and
			
 
				-exist for the lifetime of a function.
			
 
				-Global variables are created by assignment and need not be declared.
			
 
				-All variables and functions in the program
			
 
				-being debugged are entered in the Acid symbol table as global
			
 
				-variables during Acid initialization.
			
 
				-Conflicting variable names are resolved by prefixing enough `<I>' characters
			
 
				-to make them unique.
			
 
				-Syntactically, Acid variables and target program
			
 
				-symbols are referenced identically.
			
 
				-However, the variables are managed differently in the Acid
			
 
				-symbol table and the user must be aware of this distinction.
			
 
				-The value of an Acid variable is stored in the symbol
			
 
				-table; a reference returns the value.
			
 
				-The symbol table entry for a variable or function in the target
			
 
				-program contains the address of that symbol in the image
			
 
				-of the program.  Thus, the value of a program variable is
			
 
				-accessed by indirect reference through the Acid
			
 
				-variable that has the same name; the value of an Acid variable is the
			
 
				-address of the corresponding program variable.
			
 
				-</P>
			
 
				-</I><H4>5 Control Flow
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>while</TT>
			
 
				-and
			
 
				-<TT>loop</TT>
			
 
				-statements implement looping.
			
 
				-The former
			
 
				-is similar to the same statement in C.
			
 
				-The latter evaluates starting and ending expressions yielding
			
 
				-integers and iterates while an incrementing loop index
			
 
				-is within the bounds of those expressions.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: i = 0; loop 1,5 do print(i=i+1)
			
 
				-0x00000001
			
 
				-0x00000002
			
 
				-0x00000003
			
 
				-0x00000004
			
 
				-0x00000005
			
 
				-acid:
			
 
				-</PRE></TT></DL>
			
 
				-The traditional
			
 
				-<TT>if-then-else</TT>
			
 
				-statement implements conditional execution.
			
 
				-</P>
			
 
				-<H4>6 Addressing
			
 
				-</H4>
			
 
				-<P>
			
 
				-Two indirection operators allow Acid to access values in
			
 
				-the program being debugged.
			
 
				-The
			
 
				-<TT>*</TT>
			
 
				-operator fetches a value from the memory image of an
			
 
				-executing process;
			
 
				-the
			
 
				-<TT>@</TT>
			
 
				-operator fetches a value from the text file of the process.
			
 
				-When either operator appears on the left side of an assignment, the value
			
 
				-is written rather than read.
			
 
				-</P>
			
 
				-<P>
			
 
				-The indirection operator must know the size of the object
			
 
				-referenced by a variable.
			
 
				-The Plan 9 compilers neglect to include this
			
 
				-information in the program symbol table, so Acid cannot
			
 
				-derive this information implicitly.
			
 
				-Instead Acid variables have formats.
			
 
				-The format is a code
			
 
				-letter specifying the printing style and the effect of some of the
			
 
				-operators on that variable.
			
 
				-The indirection operators look at the format code to determine the
			
 
				-number of bytes to read or write.
			
 
				-The format codes are derived from the format letters used by
			
 
				-<I>db</I>.
			
 
				-By default, symbol table variables and numeric constants
			
 
				-are assigned the format code
			
 
				-<TT>'X'</TT>
			
 
				-which specifies 32-bit hexadecimal.
			
 
				-Printing such a variable yields output of the form
			
 
				-<TT>0x00123456</TT>.
			
 
				-An indirect reference through the variable fetches 32 bits
			
 
				-of data at the address indicated by the variable.
			
 
				-Other formats specify various data types, for example
			
 
				-<TT>i</TT>
			
 
				-an instruction,
			
 
				-<TT>D</TT>
			
 
				-a signed 32 bit decimal,
			
 
				-<TT>s</TT>
			
 
				-a null-terminated string.
			
 
				-The
			
 
				-<TT>fmt</TT>
			
 
				-function
			
 
				-allows the user to change the format code of a variable
			
 
				-to control the printing format and
			
 
				-operator side effects.
			
 
				-This function evaluates the expression supplied as the first
			
 
				-argument, attaches the format code supplied as the second
			
 
				-argument to the result and returns that value.
			
 
				-If the result is assigned to a variable,
			
 
				-the new format code applies to
			
 
				-that variable.  For convenience, Acid provides the
			
 
				-<TT>\</TT>
			
 
				-operator as a shorthand infix form of
			
 
				-<TT>fmt</TT>.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: x=10
			
 
				-acid: x				 // print x in hex
			
 
				-0x0000000a 
			
 
				-acid: x = fmt(x, 'D')		 // make x type decimal
			
 
				-acid: print(x, fmt(x, 'X'), x\X) // print x in decimal &amp; hex
			
 
				-10 0x0000000a 0x0000000a
			
 
				-acid: x				 // print x in decimal
			
 
				-10
			
 
				-acid: x\o			 // print x in octal
			
 
				-000000000012
			
 
				-</PRE></TT></DL>
			
 
				-The 
			
 
				-<TT>++</TT>
			
 
				-and
			
 
				-<TT>--</TT>
			
 
				-operators increment or decrement a variable by an amount
			
 
				-determined by its format code.  Some formats imply a non-fixed size.
			
 
				-For example, the
			
 
				-<TT>i</TT>
			
 
				-format code disassembles an instruction into a string.
			
 
				-On a 68020, which has variable length instructions:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: p=main\i                     // p=addr(main), type INST
			
 
				-acid: loop 1,5 do print(p\X, @p++) // disassemble 5 instr's
			
 
				-0x0000222e LEA	0xffffe948(A7),A7
			
 
				-0x00002232 MOVL	s+0x4(A7),A2
			
 
				-0x00002236 PEA	0x2f(0)
			
 
				-0x0000223a MOVL	A2,-(A7)
			
 
				-0x0000223c BSR	utfrrune
			
 
				-acid:
			
 
				-</PRE></TT></DL>
			
 
				-Here,
			
 
				-<TT>main</TT>
			
 
				-is the address of the function of the same name in the program under test.
			
 
				-The loop retrieves the five instructions beginning at that address and
			
 
				-then prints the address and the assembly language representation of each.
			
 
				-Notice that the stride of the increment operator varies with the size of
			
 
				-the instruction: the
			
 
				-<TT>MOVL</TT>
			
 
				-at 
			
 
				-<TT>0x0000223a</TT>
			
 
				-is a two byte instruction while all others are four bytes long.
			
 
				-</P>
			
 
				-<P>
			
 
				-Registers are treated as normal program variables referenced
			
 
				-by their symbolic assembler language names.
			
 
				-When a
			
 
				-process stops, the register set is saved by the kernel
			
 
				-at a known virtual address in the process memory map.
			
 
				-The Acid variables associated with the registers point
			
 
				-to the saved values and the
			
 
				-<TT>*</TT>
			
 
				-indirection operator can then be used to read and write the register set.
			
 
				-Since the registers are accessed via Acid variables they may
			
 
				-be used in arbitrary expressions.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: PC                            // addr of saved PC
			
 
				-0xc0000f60 
			
 
				-acid: *PC
			
 
				-0x0000623c                          // contents of PC
			
 
				-acid: *PC\a
			
 
				-main
			
 
				-acid: *R1=10                        // modify R1
			
 
				-acid: asm(*PC+4)                    // disassemble @ PC+4
			
 
				-main+0x4 0x00006240 	MOVW	R31,0x0(R29)
			
 
				-main+0x8 0x00006244 	MOVW	<I>setR30(SB),R30
			
 
				-main+0x10 0x0000624c 	MOVW	R1,_clock(SB)
			
 
				-</PRE></TT></DL>
			
 
				-Here, the saved
			
 
				-</I><TT>PC</TT><I>
			
 
				-is stored at address
			
 
				-</I><TT>0xc0000f60</TT><I>;
			
 
				-its current content is
			
 
				-</I><TT>0x0000623c</TT><I>.
			
 
				-The
			
 
				-`</I><TT>a</TT><I>'
			
 
				-format code converts this value to a string specifying
			
 
				-the address as an offset beyond the nearest symbol.
			
 
				-After setting the value of register
			
 
				-</I><TT>1</TT><I>,
			
 
				-the example uses the
			
 
				-</I><TT>asm</TT><I>
			
 
				-command to disassemble a short section of code beginning
			
 
				-at four bytes beyond the current value of the
			
 
				-</I><TT>PC</TT><I>.
			
 
				-</P>
			
 
				-</I><H4>7 Process Interface
			
 
				-</H4>
			
 
				-<P>
			
 
				-A program executing under Acid is monitored through the
			
 
				-<I>proc</I>
			
 
				-file system interface provided by Plan 9.
			
 
				-Textual messages written to the
			
 
				-<TT>ctl</TT>
			
 
				-file control the execution of the process.
			
 
				-For example writing
			
 
				-<TT>waitstop</TT>
			
 
				-to the control file causes the write to block until the target
			
 
				-process enters the kernel and is stopped. When the process is stopped
			
 
				-the write completes. The
			
 
				-<TT>startstop</TT>
			
 
				-message starts the target process and then does a
			
 
				-<TT>waitstop</TT>
			
 
				-action.
			
 
				-Synchronization between the debugger and the target process is determined
			
 
				-by the actions of the various messages. Some operate asynchronously to the
			
 
				-target process and always complete immediately, others block until the
			
 
				-action completes. The asynchronous messages allow Acid to control
			
 
				-several processes simultaneously.
			
 
				-</P>
			
 
				-<P>
			
 
				-The interpreter has builtin functions named after each of the control
			
 
				-messages. The functions take a process id as argument.
			
 
				-Any time a control message causes the program to execute instructions 
			
 
				-the interpreter performs two actions when the control operation has completed.
			
 
				-The Acid variables pointing at the register set are fixed up to point
			
 
				-at the saved registers, and then
			
 
				-the user defined function
			
 
				-<TT>stopped</TT>
			
 
				-is executed.
			
 
				-The 
			
 
				-<TT>stopped</TT>
			
 
				-function may print the current address,
			
 
				-line of source or instruction and return to interactive mode. Alternatively
			
 
				-it may traverse a complex data structure, gather statistics and then set
			
 
				-the program running again.
			
 
				-</P>
			
 
				-<P>
			
 
				-Several Acid variables are maintained by the debugger rather than the
			
 
				-programmer.
			
 
				-These variables allow generic Acid code to deal with the current process,
			
 
				-architecture specifics or the symbol table.
			
 
				-The variable
			
 
				-<TT>pid</TT>
			
 
				-is the process id of the current process Acid is debugging.
			
 
				-The variable
			
 
				-<TT>symbols</TT>
			
 
				-contains a list of lists where each sublist contains the symbol
			
 
				-name, its type and the value of the symbol.
			
 
				-The variable
			
 
				-<TT>registers</TT>
			
 
				-contains a list of the machine-specific register names. Global symbols in the target program
			
 
				-can be referenced directly by name from Acid. Local variables
			
 
				-are referenced using the colon operator as <TT>function:variable</TT>.
			
 
				-</P>
			
 
				-<H4>8 Source Level Debugging
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid provides several builtin functions to manipulate source code.
			
 
				-The
			
 
				-<TT>file</TT>
			
 
				-function reads a text file, inserting each line into a list.
			
 
				-The
			
 
				-<TT>pcfile</TT>
			
 
				-and
			
 
				-<TT>pcline</TT>
			
 
				-functions each take an address as an argument.
			
 
				-The first
			
 
				-returns a string containing the name of the source file
			
 
				-and the second returns an integer containing the line number
			
 
				-of the source line containing the instruction at the address.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: pcfile(main)		// file containing main
			
 
				-main.c
			
 
				-acid: pcline(main)		// line # of main in source
			
 
				-11
			
 
				-acid: file(pcfile(main))[pcline(main)]	// print that line
			
 
				-main(int argc, char *argv[])
			
 
				-acid: src(*PC)			// print statements nearby
			
 
				- 9
			
 
				- 10 void
			
 
				-&gt;11 main(int argc, char *argv[])
			
 
				- 12 {
			
 
				- 13	int a;
			
 
				-</PRE></TT></DL>
			
 
				-In this example, the three primitives are combined in an expression to print
			
 
				-a line of source code associated with an address.
			
 
				-The
			
 
				-<TT>src</TT>
			
 
				-function prints a few lines of source
			
 
				-around the address supplied as its argument. A companion routine,
			
 
				-<TT>Bsrc</TT>,
			
 
				-communicates with the external editor
			
 
				-<TT>sam</TT>.
			
 
				-Given an address, it loads the corresponding source file into the editor
			
 
				-and highlights the line containing the address.  This simple interface
			
 
				-is easily extended to more complex functions.
			
 
				-For example, the
			
 
				-<TT>step</TT>
			
 
				-function can select the current file and line in the editor
			
 
				-each time the target program stops, giving the user a visual
			
 
				-trace of the execution path of the program. A more complete interface
			
 
				-allowing two way communication between Acid and the
			
 
				-<TT>acme</TT>
			
 
				-user interface [Pike93] is under construction. A filter between the debugger
			
 
				-and the user interface provides interpretation of results from both
			
 
				-sides of the interface. This allows the programming environment to
			
 
				-interact with the debugger and vice-versa, a capability missing from the
			
 
				-<TT>sam</TT>
			
 
				-interface.
			
 
				-The
			
 
				-<TT>src</TT>
			
 
				-and
			
 
				-<TT>Bsrc</TT>
			
 
				-functions are both written in Acid code using the file and line primitives.
			
 
				-Acid provides library functions to step through source level
			
 
				-statements and functions. Furthermore, addresses in Acid expressions can be
			
 
				-specified by source file and line.
			
 
				-Source code is manipulated in the Acid
			
 
				-<I>list</I>
			
 
				-data type.
			
 
				-</P>
			
 
				-<H4>9 The Acid Library
			
 
				-</H4>
			
 
				-<P>
			
 
				-The following examples define some useful commands and
			
 
				-illustrate the interaction of the debugger and the interpreter.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-defn bpset(addr)                          // set breakpoint
			
 
				-{
			
 
				-	if match(addr, bplist) &gt;= 0 then
			
 
				-		print("bkpoint already set:", addr\a, "\n");
			
 
				-	else {
			
 
				-		*fmt(addr, bpfmt) = bpinst;   // plant it
			
 
				-		bplist = append bplist, addr; // add to list
			
 
				-	}
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>bpset</TT>
			
 
				-function plants a break point in memory. The function starts by
			
 
				-using the
			
 
				-<TT>match</TT>
			
 
				-builtin to
			
 
				-search the breakpoint list to determine if a breakpoint is already
			
 
				-set at the address.
			
 
				-The indirection operator, controlled by the format code returned
			
 
				-by the
			
 
				-<TT>fmt</TT>
			
 
				-primitive, is used to plant the breakpoint in memory.
			
 
				-The variables
			
 
				-<TT>bpfmt</TT>
			
 
				-and
			
 
				-<TT>bpinst</TT>
			
 
				-are Acid global variables containing the format code specifying
			
 
				-the size of the breakpoint instruction and the breakpoint instruction
			
 
				-itself.
			
 
				-These
			
 
				-variables are set by architecture-dependent library code
			
 
				-when the debugger first attaches to the executing image.
			
 
				-Finally the address of the breakpoint is
			
 
				-appended to the breakpoint list,
			
 
				-<TT>bplist</TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-defn step()				// single step
			
 
				-{
			
 
				-	local lst, lpl, addr, bput;
			
 
				-
			
 
				-	bput = 0;			// sitting on bkpoint
			
 
				-	if match(*PC, bplist) &gt;= 0 then {	
			
 
				-		bput = fmt(*PC, bpfmt);	// save current addr
			
 
				-		*bput = @bput;		// replace it
			
 
				-	}
			
 
				-
			
 
				-	lst = follow(*PC);		// get follow set
			
 
				-
			
 
				-	lpl = lst;
			
 
				-	while lpl do {			// place breakpoints
			
 
				-		*(head lpl) = bpinst;
			
 
				-		lpl = tail lpl;
			
 
				-	}
			
 
				-
			
 
				-	startstop(pid);			// do the step
			
 
				-
			
 
				-	while lst do {			// remove breakpoints
			
 
				-		addr = fmt(head lst, bpfmt);
			
 
				-		*addr = @addr;		// replace instr.
			
 
				-		lst = tail lst;
			
 
				-	}
			
 
				-	if bput != 0 then
			
 
				-		*bput = bpinst;		// restore breakpoint
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>step</TT>
			
 
				-function executes a single assembler instruction.
			
 
				-If the
			
 
				-<TT>PC</TT>
			
 
				-is sitting
			
 
				-on a breakpoint, the address and size of
			
 
				-the breakpoint are saved.
			
 
				-The breakpoint instruction
			
 
				-is then removed using the
			
 
				-<TT>@</TT>
			
 
				-operator to fetch
			
 
				-<TT>bpfmt</TT>
			
 
				-bytes from the text file and to place it into the memory
			
 
				-of the executing process using the
			
 
				-<TT>*</TT>
			
 
				-operator.
			
 
				-The
			
 
				-<TT>follow</TT>
			
 
				-function is an Acid
			
 
				-builtin which returns a follow-set: a list of instruction addresses which
			
 
				-could be executed next.
			
 
				-If the instruction stored at the
			
 
				-<TT>PC</TT>
			
 
				-is a branch instruction, the
			
 
				-list contains the addresses of the next instruction and
			
 
				-the branch destination; otherwise, it contains only the
			
 
				-address of the next instruction.
			
 
				-The follow-set is then used to replace each possible following
			
 
				-instruction with a breakpoint instruction.  The original
			
 
				-instructions need not be saved; they remain
			
 
				-in their unaltered state in the text file.
			
 
				-The
			
 
				-<TT>startstop</TT>
			
 
				-builtin writes the `startstop' message to the
			
 
				-<I>proc</I>
			
 
				-control file for the process named
			
 
				-<TT>pid</TT>.
			
 
				-The target process executes until some condition causes it to
			
 
				-enter the kernel, in this case, the execution of a breakpoint.
			
 
				-When the process blocks, the debugger regains control and invokes the
			
 
				-Acid library function
			
 
				-<TT>stopped</TT>
			
 
				-which reports the address and cause of the blockage.
			
 
				-The
			
 
				-<TT>startstop</TT>
			
 
				-function completes and returns to the
			
 
				-<TT>step</TT>
			
 
				-function where
			
 
				-the follow-set is used to replace the breakpoints placed earlier.
			
 
				-Finally, if the address of the original
			
 
				-<TT>PC</TT>
			
 
				-contained a breakpoint, it is replaced.
			
 
				-</P>
			
 
				-<P>
			
 
				-Notice that this approach to process control is inherently portable;
			
 
				-the Acid code is shared by the debuggers for all architectures.
			
 
				-Acid variables and builtin functions provide a transparent interface
			
 
				-to architecture-dependent values and functions.  Here the breakpoint
			
 
				-value and format are referenced through Acid variables and the
			
 
				-<TT>follow</TT>
			
 
				-primitive masks the differences in the underlying instruction set.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>next</TT>
			
 
				-function, similar to the
			
 
				-<I>dbx</I>
			
 
				-command of the same name,
			
 
				-is a simpler example.
			
 
				-This function steps through
			
 
				-a single source statement but steps over function calls.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-defn next()
			
 
				-{
			
 
				-	local sp, bound;
			
 
				-
			
 
				-	sp = *SP;			// save starting SP
			
 
				-	bound = fnbound(*PC);		// begin &amp; end of fn.
			
 
				-	stmnt();			// step 1 statement
			
 
				-	pc = *PC;
			
 
				-	if pc &gt;= bound[0] &amp;&amp; pc &lt; bound[1] then
			
 
				-		return {};
			
 
				-
			
 
				-	while (pc&lt;bound[0] || pc&gt;bound[1]) &amp;&amp; sp&gt;=*SP do {
			
 
				-		step();
			
 
				-		pc = *PC;
			
 
				-	}
			
 
				-	src(*PC);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>next</TT>
			
 
				-function
			
 
				-starts by saving the current stack pointer in a local variable.
			
 
				-It then uses the Acid library function
			
 
				-<TT>fnbound</TT>
			
 
				-to return the addresses of the first and last instructions in
			
 
				-the current function in a list.
			
 
				-The
			
 
				-<TT>stmnt</TT>
			
 
				-function executes a single source statement and then uses
			
 
				-<TT>src</TT>
			
 
				-to print a few lines of source around the new
			
 
				-<TT>PC</TT>.
			
 
				-If the new value of the
			
 
				-<TT>PC</TT>
			
 
				-remains in the current function,
			
 
				-<TT>next</TT>
			
 
				-returns.
			
 
				-When the executed statement is a function call or a return
			
 
				-from a function, the new value of the
			
 
				-<TT>PC</TT>
			
 
				-is outside the bounds calculated by
			
 
				-<TT>fnbound</TT>
			
 
				-and the test of the
			
 
				-<TT>while</TT>
			
 
				-loop is evaluated.
			
 
				-If the statement was a return, the new value of the stack pointer
			
 
				-is greater than the original value and the loop completes without
			
 
				-execution.
			
 
				-Otherwise, the loop is entered and instructions are continually
			
 
				-executed until the value of the
			
 
				-<TT>PC</TT>
			
 
				-is between the bounds calculated earlier.  At that point, execution
			
 
				-ceases and a few lines of source in the vicinity of the
			
 
				-<TT>PC</TT>
			
 
				-are printed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid provides concise and elegant expression for control and
			
 
				-manipulation of target programs. These examples demonstrate how a
			
 
				-few well-chosen primitives can be combined to create a rich debugging environment.
			
 
				-</P>
			
 
				-<H4>10 Dealing With Multiple Architectures
			
 
				-</H4>
			
 
				-<P>
			
 
				-A single binary of Acid may be used to debug a program running on any
			
 
				-of the five processor architectures supported by Plan 9.  For example,
			
 
				-Plan 9 allows a user on a MIPS to import the
			
 
				-<I>proc</I>
			
 
				-file system from an i486-based PC and remotely debug a program executing
			
 
				-on that processor.
			
 
				-</P>
			
 
				-<P>
			
 
				-Two levels of abstraction provide this architecture independence.
			
 
				-On the lowest level, a Plan 9 library supplies functions to
			
 
				-decode the file header of the program being debugged and
			
 
				-select a table of system parameters
			
 
				-and a jump vector of architecture-dependent
			
 
				-functions based on the magic number.
			
 
				-Among these functions are byte-order-independent
			
 
				-access to memory and text files, stack manipulation, disassembly,
			
 
				-and floating point number interpretation.
			
 
				-The second level of abstraction is supplied by Acid.
			
 
				-It consists of primitives and approximately 200 lines
			
 
				-of architecture-dependent Acid library code that interface the
			
 
				-interpreter to the architecture-dependent library.
			
 
				-This layer performs functions such as mapping register names to
			
 
				-memory locations, supplying breakpoint values and sizes,
			
 
				-and converting processor specific data to Acid data types.
			
 
				-An example of the latter is the stack trace function
			
 
				-<TT>strace</TT>,
			
 
				-which uses the stack traversal functions in the
			
 
				-architecture-dependent library to construct a list of lists describing
			
 
				-the context of a process.  The first level of list selects
			
 
				-each function in the trace; subordinate lists contain the
			
 
				-names and values of parameters and local variables of
			
 
				-the functions.  Acid commands and library functions that
			
 
				-manipulate and display process state information operate
			
 
				-on the list representation and are independent of the
			
 
				-underlying architecture.
			
 
				-</P>
			
 
				-<H4>11 Alef Runtime
			
 
				-</H4>
			
 
				-<P>
			
 
				-Alef is a concurrent programming language,
			
 
				-designed specifically for systems programming, which supports both
			
 
				-shared variable and message passing paradigms.
			
 
				-Alef borrows the C expression syntax but implements
			
 
				-a substantially different type system.
			
 
				-The language provides a rich set of 
			
 
				-exception handling, process management, and synchronization
			
 
				-primitives, which rely on a runtime system.
			
 
				-Alef program bugs are often deadlocks, synchronization failures,
			
 
				-or non-termination caused by locks being held incorrectly.
			
 
				-In such cases, a process stalls deep
			
 
				-in the runtime code and it is clearly
			
 
				-unreasonable to expect a programmer using the language
			
 
				-to understand the detailed
			
 
				-internal semantics of the runtime support functions.
			
 
				-</P>
			
 
				-<P>
			
 
				-Instead, there is an Alef support library, coded in Acid, that
			
 
				-allows the programmer to interpret the program state in terms of
			
 
				-Alef operations.  Consider the example of a multi-process program
			
 
				-stalling because of improper synchronization.  A stack trace of
			
 
				-the program indicates that it is waiting for an event in some
			
 
				-obscure Alef runtime
			
 
				-synchronization function.
			
 
				-The function itself is irrelevant to the
			
 
				-programmer; of greater importance is the identity of the
			
 
				-unfulfilled event.
			
 
				-Commands in the Alef support library decode
			
 
				-the runtime data structures and program state to report the cause
			
 
				-of the blockage in terms of the high-level operations available to
			
 
				-the Alef programmer.  
			
 
				-Here, the Acid language acts
			
 
				-as a communications medium between Alef implementer and Alef user.
			
 
				-</P>
			
 
				-<H4>12 Parallel Debugging
			
 
				-</H4>
			
 
				-<P>
			
 
				-The central issue in parallel debugging is how the debugger is
			
 
				-multiplexed between the processes comprising
			
 
				-the program.
			
 
				-Acid has no intrinsic model of process partitioning; it
			
 
				-only assumes that parallel programs share a symbol table,
			
 
				-though they need not share memory.
			
 
				-The
			
 
				-<TT>setproc</TT>
			
 
				-primitive attaches the debugger to a running process
			
 
				-associated with the process ID supplied as its argument
			
 
				-and assigns that value to the global variable
			
 
				-<TT>pid</TT>,
			
 
				-thereby allowing simple rotation among a group of processes.
			
 
				-Further, the stack trace primitive is driven by parameters
			
 
				-specifying a unique process context, so it is possible to
			
 
				-examine the state of cooperating processes without switching
			
 
				-the debugger focus from the process of interest.
			
 
				-Since Acid is inherently extensible and capable of
			
 
				-dynamic interaction with subordinate processes, the
			
 
				-programmer can define Acid commands to detect and control
			
 
				-complex interactions between processes.
			
 
				-In short, the programmer is free to specify how the debugger reacts
			
 
				-to events generated in specific threads of the program.
			
 
				-</P>
			
 
				-<P>
			
 
				-The support for parallel debugging in Acid depends on a crucial kernel
			
 
				-modification: when the text segment of a program is written (usually to
			
 
				-place a breakpoint), the segment is cloned to prevent other threads
			
 
				-from encountering the breakpoint.  Although this incurs a slight performance
			
 
				-penalty, it is of little importance while debugging.
			
 
				-</P>
			
 
				-<H4>13 Communication Between Tools
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Plan 9 Alef and C compilers do not
			
 
				-embed detailed type information in the symbol table of an
			
 
				-executable file.
			
 
				-However, they do accept a command line option causing them to
			
 
				-emit descriptions of complex data types
			
 
				-(e.g., aggregates and abstract data types)
			
 
				-to an auxiliary file.
			
 
				-The vehicle for expressing this information is Acid source code.
			
 
				-When an Acid debugging session is 
			
 
				-subsequently started, that file is loaded with the other Acid libraries.
			
 
				-</P>
			
 
				-<P>
			
 
				-For each complex object in the program the compiler generates
			
 
				-three pieces of Acid code.
			
 
				-The first is a table describing the size and offset of each
			
 
				-member of the complex data type.  Following is an Acid function,
			
 
				-named the same as the object, that formats and prints each member.
			
 
				-Finally, Acid declarations associate the
			
 
				-Alef or C program variables of a type with the functions
			
 
				-to print them.
			
 
				-The three forms of declaration are shown in the following example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-struct Bitmap {
			
 
				-	Rectangle    0 r;
			
 
				-	Rectangle   16 clipr;
			
 
				-	'D'   32 ldepth;
			
 
				-	'D'   36 id;
			
 
				-	'X'   40 cache;
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-defn
			
 
				-Bitmap(addr) {
			
 
				-	complex Bitmap addr;
			
 
				-	print("Rectangle r {\n");
			
 
				-	Rectangle(addr.r);
			
 
				-	print("}\n");
			
 
				-	print("Rectangle clipr {\n");
			
 
				-	Rectangle(addr.clipr);
			
 
				-	print("}\n");
			
 
				-	print("	ldepth	", addr.ldepth, "\n");
			
 
				-	print("	id	", addr.id, "\n");
			
 
				-	print("	cache	", addr.cache, "\n");
			
 
				-};
			
 
				-
			
 
				-complex Bitmap darkgrey;
			
 
				-complex Bitmap Window_settag:b;
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>struct</TT>
			
 
				-declaration specifies decoding instructions for the complex type named
			
 
				-<TT>Bitmap</TT>.
			
 
				-Although the syntax is superficially similar to a C structure declaration,
			
 
				-the semantics differ markedly: the C declaration specifies a layout, while
			
 
				-the Acid declaration tells how to decode it.
			
 
				-The declaration specifies a type, an offset, and name for each
			
 
				-member of the complex object. The type is either the name of another
			
 
				-complex declaration, for example,
			
 
				-<TT>Rectangle</TT>,
			
 
				-or a format code.
			
 
				-The offset is the number of bytes from the start
			
 
				-of the object to the member
			
 
				-and the name is the member's name in the Alef or C declaration.
			
 
				-This type description is a close match for C and Alef, but is simple enough
			
 
				-to be language independent.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Bitmap</TT>
			
 
				-function expects the address of a
			
 
				-<TT>Bitmap</TT>
			
 
				-as its only argument.
			
 
				-It uses the decoding information contained in the
			
 
				-<TT>Bitmap</TT>
			
 
				-structure declaration to extract, format, and print the
			
 
				-value of each member of the complex object pointed to by
			
 
				-the argument.
			
 
				-The Alef compiler emits code to call other Acid functions
			
 
				-where a member is another complex type; here,
			
 
				-<TT>Bitmap</TT>
			
 
				-calls
			
 
				-<TT>Rectangle</TT>
			
 
				-to print its contents.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>complex</TT>
			
 
				-declarations associate Alef variables with complex types.
			
 
				-In the example,
			
 
				-<TT>darkgrey</TT>
			
 
				-is the name of a global variable of type
			
 
				-<TT>Bitmap</TT>
			
 
				-in the program being debugged.
			
 
				-Whenever the name
			
 
				-<TT>darkgrey</TT>
			
 
				-is evaluated by Acid, it automatically calls the
			
 
				-<TT>Bitmap</TT>
			
 
				-function with the address of
			
 
				-<TT>darkgrey</TT>
			
 
				-as the argument.
			
 
				-The second
			
 
				-<TT>complex</TT>
			
 
				-declaration associates a local variable or parameter named
			
 
				-<TT>b</TT>
			
 
				-in function
			
 
				-<TT>Window_settag</TT>
			
 
				-with the
			
 
				-<TT>Bitmap</TT>
			
 
				-complex data type.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid borrows the C operators
			
 
				-<TT>.</TT>
			
 
				-and
			
 
				-<TT>-&gt;</TT>
			
 
				-to access the decoding parameters of a member of a complex type.
			
 
				-Although this representation is sufficiently general for describing
			
 
				-the decoding of both C and Alef complex data types, it may
			
 
				-prove too restrictive for target languages with more complicated
			
 
				-type systems.
			
 
				-Further, the assumption that the compiler can select the proper
			
 
				-Acid format code for each basic type in the language is somewhat
			
 
				-naive.  For example, when a member of a complex type is a pointer,
			
 
				-it is assigned a hexadecimal type code; integer members are always 
			
 
				-assigned a decimal type code.
			
 
				-This heuristic proves inaccurate when an integer field is a
			
 
				-bit mask or set of bit flags which are more appropriately displayed
			
 
				-in hexadecimal or octal.
			
 
				-</P>
			
 
				-<H4>14 Code Verification
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid's ability to interact dynamically with
			
 
				-an executing program allows passive test and
			
 
				-verification of the target program.  For example,
			
 
				-a common concern is leak detection in programs using
			
 
				-<TT>malloc</TT>.
			
 
				-Of interest are two items: finding memory that was allocated
			
 
				-but never freed and detecting bad pointers passed to
			
 
				-<TT>free</TT>.
			
 
				-An auxiliary Acid library contains Acid functions to
			
 
				-monitor the execution of a program and detect these
			
 
				-faults, either as they happen or in the automated
			
 
				-post-mortem analysis of the memory arena.
			
 
				-In the following example, the
			
 
				-<TT>sort</TT>
			
 
				-command is run under the control of the
			
 
				-Acid memory leak library.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-helix% acid -l malloc /bin/sort
			
 
				-/bin/sort: mips plan 9 executable
			
 
				-/lib/acid/port
			
 
				-/lib/acid/mips
			
 
				-/lib/acid/malloc
			
 
				-acid: go()
			
 
				-now
			
 
				-is
			
 
				-the
			
 
				-time
			
 
				-&lt;ctrl-d&gt;
			
 
				-is
			
 
				-now
			
 
				-the
			
 
				-time
			
 
				-27680 : breakpoint	_exits+0x4	MOVW	0x8,R1
			
 
				-acid: 
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>go</TT>
			
 
				-command creates a process and plants
			
 
				-breakpoints at the entry to
			
 
				-<TT>malloc</TT>
			
 
				-and
			
 
				-<TT>free</TT>.
			
 
				-The program is then started and continues until it
			
 
				-exits or stops.  If the reason for stopping is anything
			
 
				-other than the breakpoints in
			
 
				-<TT>malloc</TT>
			
 
				-and
			
 
				-<TT>free</TT>,
			
 
				-Acid prints the usual status information and returns to the
			
 
				-interactive prompt.
			
 
				-</P>
			
 
				-<P>
			
 
				-When the process stops on entering
			
 
				-<TT>malloc</TT>,
			
 
				-the debugger must capture and save the address that
			
 
				-<TT>malloc</TT>
			
 
				-will return.
			
 
				-After saving a stack
			
 
				-trace so the calling routine can be identified, it places
			
 
				-a breakpoint at the return address and restarts the program.
			
 
				-When
			
 
				-<TT>malloc</TT>
			
 
				-returns, the breakpoint stops the program,
			
 
				-allowing the debugger
			
 
				-to grab the address of the new memory block from the return register.
			
 
				-The address and stack trace are added to the list of outstanding
			
 
				-memory blocks, the breakpoint is removed from the return point, and
			
 
				-the process is restarted.
			
 
				-</P>
			
 
				-<P>
			
 
				-When the process stops at the beginning of
			
 
				-<TT>free</TT>,
			
 
				-the memory address supplied as the argument is compared to the list
			
 
				-of outstanding memory blocks.  If it is not found an error message
			
 
				-and a stack trace of the call is reported; otherwise, the
			
 
				-address is deleted from the list.
			
 
				-</P>
			
 
				-<P>
			
 
				-When the program exits, the list of outstanding memory blocks contains
			
 
				-the addresses of all blocks that were allocated but never freed.
			
 
				-The
			
 
				-<TT>leak</TT>
			
 
				-library function traverses the list producing a report describing
			
 
				-the allocated blocks.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: leak()
			
 
				-Lost a total of 524288 bytes from:
			
 
				-    malloc() malloc.c:32 called from dofile+0xe8 sort.c:217 
			
 
				-    dofile() sort.c:190 called from main+0xac sort.c:161 
			
 
				-    main() sort.c:128 called from _main+0x20 main9.s:10 
			
 
				-Lost a total of 64 bytes from:
			
 
				-    malloc() malloc.c:32 called from newline+0xfc sort.c:280 
			
 
				-    newline() sort.c:248 called from dofile+0x110 sort.c:222 
			
 
				-    dofile() sort.c:190 called from main+0xac sort.c:161 
			
 
				-    main() sort.c:128 called from _main+0x20 main9.s:10 
			
 
				-Lost a total of 64 bytes from:
			
 
				-    malloc() malloc.c:32 called from realloc+0x14 malloc.c:129 
			
 
				-    realloc() malloc.c:123 called from bldkey+0x358 sort.c:1388 
			
 
				-    buildkey() sort.c:1345 called from newline+0x150 sort.c:285 
			
 
				-    newline() sort.c:248 called from dofile+0x110 sort.c:222 
			
 
				-    dofile() sort.c:190 called from main+0xac sort.c:161 
			
 
				-    main() sort.c:128 called from _main+0x20 main9.s:10
			
 
				-acid: refs()
			
 
				-data...bss...stack...
			
 
				-acid: leak()
			
 
				-acid: 
			
 
				-</PRE></TT></DL>
			
 
				-The presence of a block in the allocation list does not imply
			
 
				-it is there because of a leak; for instance, it may have been
			
 
				-in use when the program terminated.
			
 
				-The
			
 
				-<TT>refs()</TT>
			
 
				-library function scans the
			
 
				-<I>data</I>,
			
 
				-<I>bss</I>,
			
 
				-and
			
 
				-<I>stack</I>
			
 
				-segments of the process looking for pointers
			
 
				-into the allocated blocks.  When one is found, the block is deleted from
			
 
				-the outstanding block list.
			
 
				-The
			
 
				-<TT>leak</TT>
			
 
				-function is used again to report the
			
 
				-blocks remaining allocated and unreferenced.
			
 
				-This strategy proves effective in detecting
			
 
				-disconnected (but non-circular) data structures.
			
 
				-</P>
			
 
				-<P>
			
 
				-The leak detection process is entirely passive.
			
 
				-The program is not
			
 
				-specially compiled and the source code is not required.
			
 
				-As with the Acid support functions for the Alef runtime environment,
			
 
				-the author of the library routines has encapsulated the
			
 
				-functionality of the library interface
			
 
				-in Acid code.
			
 
				-Any programmer may then check a program's use of the
			
 
				-library routines without knowledge of either implementation.
			
 
				-The performance impact of running leak detection is great
			
 
				-(about 10 times slower),
			
 
				-but it has not prevented interactive programs like
			
 
				-<TT>sam</TT>
			
 
				-and the
			
 
				-<TT>8&#189;</TT>
			
 
				-window system from being tested.
			
 
				-</P>
			
 
				-<H4>15 Code Coverage
			
 
				-</H4>
			
 
				-<P>
			
 
				-Another common component of software test uses 
			
 
				-<I>coverage</I>
			
 
				-analysis.
			
 
				-The purpose of the test is to determine which paths through the code have
			
 
				-not been executed while running the test suite.
			
 
				-This is usually
			
 
				-performed by a combination of compiler support and a reporting tool run
			
 
				-on the output generated by statements compiled into the program.
			
 
				-The compiler emits code that
			
 
				-logs the progress of the program as it executes basic blocks and writes the
			
 
				-results to a file. The file is then processed by the reporting tool 
			
 
				-to determine which basic blocks have not been executed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid can perform the same function in a language independent manner without
			
 
				-modifying the source, object or binary of the program. The following example
			
 
				-shows
			
 
				-<TT>ls</TT>
			
 
				-being run under the control of the Acid coverage library.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-philw-helix% acid -l coverage /bin/ls
			
 
				-/bin/ls: mips plan 9 executable
			
 
				-/lib/acid/port
			
 
				-/lib/acid/mips
			
 
				-/lib/acid/coverage
			
 
				-acid: coverage()
			
 
				-acid
			
 
				-newstime
			
 
				-profile
			
 
				-tel
			
 
				-wintool
			
 
				-2: (error) msg: pid=11419 startstop: process exited
			
 
				-acid: analyse(ls)
			
 
				-ls.c:102,105
			
 
				-	102:     return 1;
			
 
				-	103: }
			
 
				-	104: if(db[0].qid.path&amp;CHDIR &amp;&amp; dflag==0){
			
 
				-	105:     output();
			
 
				-ls.c:122,126
			
 
				-	122:     memmove(dirbuf+ndir, db, sizeof(Dir));
			
 
				-	123:     dirbuf[ndir].prefix = 0;
			
 
				-	124:     p = utfrrune(s, '/');
			
 
				-	125:     if(p){
			
 
				-	126:         dirbuf[ndir].prefix = s;
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>coverage</TT>
			
 
				-function begins by looping through the text segment placing
			
 
				-breakpoints at the entry to each basic block. The start of each basic
			
 
				-block is found using the Acid builtin function
			
 
				-<TT>follow</TT>.
			
 
				-If the list generated by
			
 
				-<TT>follow</TT>
			
 
				-contains more than one
			
 
				-element, then the addresses mark the start of basic blocks. A breakpoint
			
 
				-is placed at each address to detect entry into the block. If the result
			
 
				-of
			
 
				-<TT>follow</TT>
			
 
				-is a single address then no action is taken, and the next address is
			
 
				-considered. Acid maintains a list of
			
 
				-breakpoints already in place and avoids placing duplicates (an address may be
			
 
				-the destination of several branches).
			
 
				-</P>
			
 
				-<P>
			
 
				-After placing the breakpoints the program is set running.
			
 
				-Each time a breakpoint is encountered
			
 
				-Acid deletes the address from the breakpoint list, removes the breakpoint
			
 
				-from memory and then restarts the program.
			
 
				-At any instant the breakpoint list contains the addresses of basic blocks
			
 
				-which have not been executed. 
			
 
				-The
			
 
				-<TT>analyse</TT>
			
 
				-function reports the lines of source code bounded by basic blocks
			
 
				-whose addresses are have not been deleted from the breakpoint list.
			
 
				-These are the basic blocks which have not been executed.
			
 
				-Program performance is almost unaffected since each breakpoint is executed
			
 
				-only once and then removed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The library contains a total of 128 lines of Acid code.
			
 
				-An obvious extension of this algorithm could be used to provide basic block
			
 
				-profiling.
			
 
				-</P>
			
 
				-<H4>16 Conclusion
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acid has two areas of weakness. As with
			
 
				-other language-based tools like
			
 
				-<I>awk</I>,
			
 
				-a programmer must learn yet another language to step beyond the normal
			
 
				-debugging functions and use the full power of the debugger.
			
 
				-Second, the command line interface supplied by the
			
 
				-<I>yacc</I>
			
 
				-parser is inordinately clumsy.
			
 
				-Part of the problem relates directly to the use of
			
 
				-<I>yacc</I>
			
 
				-and could be circumvented with a custom parser.
			
 
				-However, structural problems would remain: Acid often requires
			
 
				-too much typing to execute a simple
			
 
				-command.
			
 
				-A debugger should prostitute itself to its users, doing whatever
			
 
				-is wanted with a minimum of encouragement; commands should be
			
 
				-concise and obvious. The language interface is more consistent than
			
 
				-an ad hoc command interface but is clumsy to use.
			
 
				-Most of these problems are addressed by an Acme interface
			
 
				-which is under construction. This should provide the best of
			
 
				-both worlds: graphical debugging and access to the underlying acid
			
 
				-language when required.
			
 
				-</P>
			
 
				-<P>
			
 
				-The name space clash between Acid variables, keywords, program variables,
			
 
				-and functions is unavoidable.
			
 
				-Although it rarely affects a debugging session, it is annoying
			
 
				-when it happens and is sometimes difficult to circumvent.
			
 
				-The current renaming scheme
			
 
				-is too crude; the new names are too hard to remember.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acid has proved to be a powerful tool whose applications
			
 
				-have exceeded expectations.
			
 
				-Of its strengths, portability, extensibility and parallel debugging support
			
 
				-were by design and provide the expected utility.
			
 
				-In retrospect,
			
 
				-its use as a tool for code test and verification and as
			
 
				-a medium for communicating type information and encapsulating
			
 
				-interfaces has provided unanticipated benefits and altered our
			
 
				-view of the debugging process.
			
 
				-</P>
			
 
				-<H4>17 Acknowledgments
			
 
				-</H4>
			
 
				-<P>
			
 
				-Bob Flandrena was the first user and helped prepare the paper.
			
 
				-Rob Pike endured three buggy Alef compilers and a new debugger
			
 
				-in a single sitting.
			
 
				-</P>
			
 
				-<H4>18 References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
			
 
				-``Plan 9 from Bell Labs'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf.,
			
 
				-London, England,
			
 
				-1990,
			
 
				-reprinted, in a different form, in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Gol93] M. Golan, D. Hanson,
			
 
				-``DUEL -- A Very High-Level Debugging Language'',
			
 
				-USENIX Proc. of the Winter 1993 Conf.,
			
 
				-San Diego, CA,
			
 
				-1993.
			
 
				-<br>&#32;<br>
			
 
				-[Lin90] M. A. Linton,
			
 
				-``The Evolution of DBX'',
			
 
				-USENIX Proc. of the Summer 1990 Conf.,
			
 
				-Anaheim, CA,
			
 
				-1990.
			
 
				-<br>&#32;<br>
			
 
				-[Stal91] R. M. Stallman, R. H. Pesch,
			
 
				-``Using GDB: A guide to the GNU source level debugger'',
			
 
				-Technical Report, Free Software Foundation,
			
 
				-Cambridge, MA,
			
 
				-1991.
			
 
				-<br>&#32;<br>
			
 
				-[Win93] P. Winterbottom,
			
 
				-``Alef reference Manual'',
			
 
				-this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Pike93] Rob Pike,
			
 
				-``Acme: A User Interface for Programmers'',
			
 
				-USENIX Proc. of the Winter 1994 Conf.,
			
 
				-San Francisco, CA,
			
 
				-reprinted in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Ols90] Ronald A. Olsson, Richard H. Crawford, and W. Wilson Ho,
			
 
				-``Dalek: A GNU, improved programmable debugger'',
			
 
				-USENIX Proc. of the Summer 1990 Conf.,
			
 
				-Anaheim, CA.
			
 
				-<br>&#32;<br>
			
 
				-[May92] Paul Maybee,
			
 
				-``NeD: The Network Extensible Debugger''
			
 
				-USENIX Proc. of the Summer 1992 Conf.,
			
 
				-San Antonio, TX.
			
 
				-<br>&#32;<br>
			
 
				-[Aral] Ziya Aral, Ilya Gertner, and Greg Schaffer,
			
 
				-``Efficient debugging primitives for multiprocessors'',
			
 
				-Proceedings of the Third International Conference on Architectural
			
 
				-Support for Programming Languages and Operating Systems,
			
 
				-SIGPLAN notices Nr. 22, May 1989.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/acme/acme.html
+++ b/sys/doc/acme/acme.html
@@ -1,1351 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Acme: A User Interface for Programmers
			
 
				-</H1>
			
 
				-<DL><DD><I><I>Rob Pike</I>
			
 
				-<I>rob@plan9.bell-labs.com</I>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> <HR>
			
 
				-<br>
			
 
				-Originally appeared in
			
 
				-Proc. of the Winter 1994 USENIX Conf.,
			
 
				-pp. 223-234,
			
 
				-San Francisco, CA
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-A hybrid of window system, shell, and editor, Acme gives text-oriented
			
 
				-applications a clean, expressive, and consistent style of interaction.
			
 
				-Traditional window systems support interactive client programs and offer libraries of
			
 
				-pre-defined operations such as pop-up menus
			
 
				-and buttons to promote a consistent
			
 
				-user interface among the clients.
			
 
				-Acme instead provides its clients with a fixed user interface and
			
 
				-simple conventions to encourage its uniform use.
			
 
				-Clients access the facilities of Acme through a file system interface;
			
 
				-Acme is in part a file server that exports device-like files that may be
			
 
				-manipulated to access and control the contents of its windows.
			
 
				-Written in a concurrent programming language,
			
 
				-Acme is structured as a set of communicating processes that neatly subdivide
			
 
				-the various aspects of its tasks: display management, input, file server, and so on.
			
 
				-<P>
			
 
				-Acme attaches distinct functions to the three mouse buttons:
			
 
				-the left selects text;
			
 
				-the middle executes textual commands;
			
 
				-and the right combines context search and file opening
			
 
				-functions to integrate the various applications and files in
			
 
				-the system.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme works well enough to have developed
			
 
				-a community that uses it exclusively.
			
 
				-Although Acme discourages the traditional style of interaction
			
 
				-based on typescript windows&#173;teletypes&#173;its
			
 
				-users find Acme's other services render
			
 
				-typescripts obsolete. 
			
 
				-</DL>
			
 
				-</P>
			
 
				-<H4>History and motivation
			
 
				-</H4>
			
 
				-<P>
			
 
				-The usual typescript style of interaction with
			
 
				-Unix and its relatives is an old one.
			
 
				-The typescript&#173;an intermingling of textual commands and their
			
 
				-output&#173;originates with the scrolls of paper on teletypes.
			
 
				-The advent of windowed terminals has given each user what
			
 
				-amounts to an array of teletypes, a limited and unimaginative
			
 
				-use of the powers of bitmap displays and mice.
			
 
				-Systems like the Macintosh
			
 
				-that do involve the mouse as an integral part of the interaction
			
 
				-are geared towards general users, not experts, and certainly
			
 
				-not programmers.
			
 
				-Software developers, at least on time-sharing systems, have been left behind.
			
 
				-<br><img src="acme.fig1.14050.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-
			
 
				-Figure 1.  A small Acme screen&#173;normally it runs on a larger display&#173;demonstrating
			
 
				-some of the details discussed in the text.
			
 
				-The right column contains some guide files,
			
 
				-a mailbox presented by Acme's mail program,
			
 
				-the columnated display of files in Acme's own source directory,
			
 
				-a couple of windows from the OED browser,
			
 
				-a debugger window,
			
 
				-and an error window showing diagnostics from a compilation.
			
 
				-The left column holds a couple of source files
			
 
				-(<TT>dat.h</TT>
			
 
				-and
			
 
				-<TT>acme.l</TT>),
			
 
				-another debugger window displaying a stack trace,
			
 
				-and a third source file
			
 
				-(<TT>time.l</TT>).
			
 
				-<TT>Time.l</TT>
			
 
				-was opened from the debugger by clicking the right mouse button
			
 
				-on a line in the stack window;
			
 
				-the mouse cursor landed on the offending line of
			
 
				-<TT>acme.l</TT>
			
 
				-after a click on the compiler message.
			
 
				-<br>
			
 
				-<HR>
			
 
				-</P>
			
 
				-<P>
			
 
				-Some programs have mouse-based editing of
			
 
				-text files and typescripts;
			
 
				-ones I have built include
			
 
				-the window systems
			
 
				-<TT>mux</TT>
			
 
				-[Pike88]
			
 
				-and
			
 
				-<TT>8&#189;</TT>
			
 
				-[Pike91]
			
 
				-and the text editor
			
 
				-Sam [Pike87].
			
 
				-These have put the programmer's mouse to some productive work,
			
 
				-but not wholeheartedly.  Even experienced users of these programs
			
 
				-often retype text that could be grabbed with the mouse,
			
 
				-partly because the menu-driven interface is imperfect
			
 
				-and partly because the various pieces are not well enough integrated.
			
 
				-</P>
			
 
				-<P>
			
 
				-Other programs&#173;EMACS [Stal93] is the prime example&#173;offer a high
			
 
				-degree of integration but with a user interface built around the
			
 
				-ideas of cursor-addressed terminals that date from the 1970's.
			
 
				-They are still keyboard-intensive and
			
 
				-dauntingly complex.
			
 
				-</P>
			
 
				-<P>
			
 
				-The most ambitious attempt to face these issues was the Cedar
			
 
				-system, developed at Xerox [Swei86].
			
 
				-It combined a new programming language, compilers,
			
 
				-window system, even microcode&#173;a complete system&#173;to
			
 
				-construct a productive, highly
			
 
				-integrated and interactive environment
			
 
				-for experienced users of compiled languages.
			
 
				-Although successful internally, the system was so large
			
 
				-and so tied to specific hardware that it never fledged.
			
 
				-</P>
			
 
				-<P>
			
 
				-Cedar was, however, the major inspiration for Oberon [Wirt89],
			
 
				-a system of similar scope but much smaller scale.
			
 
				-Through careful selection of Cedar's ideas, Oberon shows
			
 
				-that its lessons can be applied to a small, coherent system
			
 
				-that can run efficiently on modest hardware.
			
 
				-In fact, Oberon probably
			
 
				-errs too far towards simplicity: a single-process system
			
 
				-with weak networking, it seems an architectural throwback.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme is a new program,
			
 
				-a combined window system, editor, and shell,
			
 
				-that applies
			
 
				-some of the ideas distilled by Oberon.
			
 
				-Where Oberon uses objects and modules within a programming language (also called Oberon),
			
 
				-Acme uses files and commands within an existing operating system (Plan 9).
			
 
				-Unlike Oberon, Acme does not yet have support for graphical output, just text.
			
 
				-At least for now, the work on Acme has concentrated on
			
 
				-producing the smoothest user interface possible for a programmer
			
 
				-at work.
			
 
				-</P>
			
 
				-<P>
			
 
				-The rest of this paper describes Acme's interface,
			
 
				-explains how programs can access it,
			
 
				-compares it to existing systems,
			
 
				-and finally presents some unusual aspects of its implementation.
			
 
				-</P>
			
 
				-<H4>User interface
			
 
				-</H4>
			
 
				-<P>
			
 
				-<br><img src="acme.fig2.14051.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-
			
 
				-Figure 2.  An Acme window showing a section of code.
			
 
				-The upper line of text is the tag containing the file name,
			
 
				-relevant commands, and a scratch area (right of the vertical bar);
			
 
				-the lower portion of the window is the
			
 
				-body, or contents, of the file.
			
 
				-Here the scratch area contains a command for the middle button
			
 
				-(<TT>mk</TT>)
			
 
				-and a word to search for with the right button
			
 
				-(<TT>cxfidalloc</TT>).
			
 
				-The user has just
			
 
				-clicked the right button on
			
 
				-<TT>cxfidalloc</TT>
			
 
				-and Acme has searched for the word, highlighted it,
			
 
				-and moved the mouse cursor there.  The file has been modified:
			
 
				-the center of the layout box is black and the command
			
 
				-<TT>Put</TT>
			
 
				-appears in the tag.
			
 
				-<br>
			
 
				-<HR>
			
 
				-Acme windows are arrayed in columns (Figure 1) and are used more
			
 
				-dynamically than in an environment like X Windows or
			
 
				-<TT>8&#189;</TT>
			
 
				-[Sche86, Pike91].
			
 
				-The system frequently creates them automatically and the user
			
 
				-can order a new one with a single mouse button click.
			
 
				-The initial placement of a new window is determined
			
 
				-automatically, but the user may move an existing window anywhere
			
 
				-by clicking or dragging a
			
 
				-<I>layout box</I>
			
 
				-in the upper left corner of
			
 
				-the window.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme windows have two parts: a
			
 
				-<I>tag</I>
			
 
				-holding a single line of text,
			
 
				-above a
			
 
				-<I>body</I>
			
 
				-holding zero or more lines (Figure 2).
			
 
				-The body typically contains an image of a file being edited
			
 
				-or the editable output of a
			
 
				-program, analogous to an
			
 
				-EMACS shell
			
 
				-window.  The tag contains
			
 
				-the name of the window
			
 
				-(usually the name of the associated
			
 
				-file or directory), some built-in commands, and a scratch area to hold arbitrary text.
			
 
				-If a window represents a directory, the name in the tag ends with
			
 
				-a slash and the body contains a list of the names of the files
			
 
				-in the directory.
			
 
				-Finally, each non-empty body holds a scroll bar at the left of the text.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each column of windows also has a layout box and a tag.
			
 
				-The tag has no special meaning, although Acme pre-loads it with a few
			
 
				-built-in commands.
			
 
				-There is also a tag across the whole display, also loaded with
			
 
				-helpful commands and a list of active processes started
			
 
				-by Acme.
			
 
				-</P>
			
 
				-<P>
			
 
				-Typing with the keyboard and selecting with the left button are as in
			
 
				-many other systems, including the Macintosh,
			
 
				-<TT>8&#189;</TT>,
			
 
				-and Sam.
			
 
				-The middle and right buttons are used, somewhat like the left button,
			
 
				-to `sweep' text, but the indicated text is treated in a way
			
 
				-that depends on the text's location&#173;<I>context</I>&#173;as well as its content.
			
 
				-This context, based on the directory of the file containing the text,
			
 
				-is a central component of Acme's style of interaction.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme has no single notion of `current directory'.
			
 
				-Instead, every command, file name,
			
 
				-action, and so on is interpreted or executed in the directory named by the
			
 
				-tag of the window containing the command.  For example, the string
			
 
				-<TT>mammals</TT>
			
 
				-in a window labeled
			
 
				-<TT>/lib/</TT>
			
 
				-or
			
 
				-<TT>/lib/insects</TT>
			
 
				-will be interpreted as the file name
			
 
				-<TT>/lib/mammals</TT>
			
 
				-if such a file exists.
			
 
				-</P>
			
 
				-<P>
			
 
				-Throughout Acme, the middle mouse button is used to execute commands
			
 
				-and the right mouse button is used to locate and select files and text.
			
 
				-Even when there are no true files on which to operate&#173;for example
			
 
				-when editing mail messages&#173;Acme and its applications use
			
 
				-consistent extensions of these basic functions.
			
 
				-This idea is as vital to Acme as icons are to the Macintosh.
			
 
				-</P>
			
 
				-<P>
			
 
				-The middle button executes commands: text swept with the button
			
 
				-pressed is underlined; when the button is released, the underline is
			
 
				-removed and the indicated text is executed.
			
 
				-A modest number of commands are recognized as built-ins: words like
			
 
				-<TT>Cut</TT>,
			
 
				-<TT>Paste</TT>,
			
 
				-and
			
 
				-<TT>New</TT>
			
 
				-name
			
 
				-functions performed directly by Acme.
			
 
				-These words often appear in tags to make them always available,
			
 
				-but the tags are not menus: any text anywhere in Acme may be a command.
			
 
				-For example, in the tag or body of any window one may type
			
 
				-<TT>Cut</TT>,
			
 
				-select it with the left button, use the middle button to execute it,
			
 
				-and watch it disappear again.
			
 
				-</P>
			
 
				-<P>
			
 
				-If the middle button indicates a command that is not recognized as a built-in,
			
 
				-it is executed in the directory
			
 
				-named by the tag of the window holding the text.
			
 
				-Also, the file to be executed is searched for first in that directory.
			
 
				-Standard input is connected to
			
 
				-<TT>/dev/null</TT>,
			
 
				-but standard and error outputs are connected to an Acme window,
			
 
				-created if needed, called
			
 
				-<I>dir</I><TT>/+Errors</TT> where
			
 
				-<I>dir</I>
			
 
				-is the directory of the window.
			
 
				-(Programs that need interactive input use a different interface, described below.)
			
 
				-A typical use of this is to type
			
 
				-<TT>mk</TT>
			
 
				-(Plan 9's
			
 
				-<TT>make</TT>)
			
 
				-in the scratch area in the tag of a C source window, say
			
 
				-<TT>/sys/src/cmd/sam/regexp.c</TT>,
			
 
				-and execute it.
			
 
				-Output, including compiler errors, appears in the window labeled
			
 
				-<TT>/sys/src/cmd/sam/+Errors</TT>,
			
 
				-so file names in the output are associated with the windows and directory
			
 
				-holding the source.
			
 
				-The
			
 
				-<TT>mk</TT>
			
 
				-command remains in the tag, serving as a sort of menu item for the associated
			
 
				-window.
			
 
				-</P>
			
 
				-<P>
			
 
				-Like the middle button, the right button is used to indicate text by sweeping it out.
			
 
				-The indicated text is not a command, however, but the argument of a generalized
			
 
				-search operator.
			
 
				-If the text, perhaps after appending it to the directory of the window containing it,
			
 
				-is the name of an existing file, Acme creates a new window to hold the file
			
 
				-and reads it in.  It then moves the mouse cursor to that window.  If the file is
			
 
				-already loaded into Acme, the mouse motion happens but no new window is made.
			
 
				-For example, indicating the string
			
 
				-<TT>sam.h</TT>
			
 
				-in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include "sam.h"
			
 
				-</PRE></TT></DL>
			
 
				-in a window on the file
			
 
				-<TT>/sys/src/cmd/sam/regexp.c</TT>
			
 
				-will open the file
			
 
				-<TT>/sys/src/cmd/sam/sam.h</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-If the file name is followed immediately by a colon and a legal address in
			
 
				-Sam notation (for example a line number or a regular expression delimited in
			
 
				-slashes or a comma-separated compound of such addresses), Acme highlights
			
 
				-the target of that address in the file and places the mouse there.  One may jump to
			
 
				-line 27 of
			
 
				-<TT>dat.h</TT>
			
 
				-by indicating with the right button the text
			
 
				-<TT>dat.h:27</TT>.
			
 
				-If the file is not already open, Acme loads it.
			
 
				-If the file name is null, for example if the indicated string is
			
 
				-<TT>:/^main/</TT>,
			
 
				-the file is assumed to be that of the window containing the string.
			
 
				-Such strings, when typed and evaluated in the tag of a window, amount to
			
 
				-context searches.
			
 
				-</P>
			
 
				-<P>
			
 
				-If the indicated text is not the name of an existing file, it is taken to be literal
			
 
				-text and is searched for in the body of the window containing the text, highlighting
			
 
				-the result as if it were the result of a context search.
			
 
				-</P>
			
 
				-<P>
			
 
				-For the rare occasion when a file name
			
 
				-<I>is</I>
			
 
				-just text to search for, it can be selected with the left button and used as the
			
 
				-argument to a built-in
			
 
				-<TT>Look</TT>
			
 
				-command that always searches for literal text.
			
 
				-</P>
			
 
				-<H4>Nuances and heuristics
			
 
				-</H4>
			
 
				-<P>
			
 
				-A user interface should not only provide the necessary functions, it should also
			
 
				-<I>feel</I>
			
 
				-right.
			
 
				-In fact, it should almost not be felt at all; when one notices a
			
 
				-user interface, one is distracted from the job at hand [Pike88].
			
 
				-To approach this invisibility, some of Acme's properties and features
			
 
				-are there just to make the others easy to use.
			
 
				-Many are based on a fundamental principle of good design:
			
 
				-let the machine do the work.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme tries to avoid needless clicking and typing.
			
 
				-There is no `click-to-type', eliminating a button click.
			
 
				-There are no pop-up or pull-down menus, eliminating the mouse action needed to
			
 
				-make a menu appear.
			
 
				-The overall design is intended to make text on the screen useful without
			
 
				-copying or retyping; the ways in which this happens involve
			
 
				-the combination of many aspects of the interface.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme tiles its windows and places them automatically
			
 
				-to avoid asking the user to place and arrange them.
			
 
				-For this policy to succeed, the automatic placement must behave well enough
			
 
				-that the user is usually content with the location of a new window.
			
 
				-The system will never get it right all the time, but in practice most
			
 
				-windows are used at least for a while where Acme first places them.
			
 
				-There have been several complete rewrites of the
			
 
				-heuristics for placing a new window,
			
 
				-and with each rewrite the system became
			
 
				-noticeably more comfortable.  The rules are as follows, although
			
 
				-they are still subject to improvement.
			
 
				-The window appears in the `active' column, that most recently used for typing or
			
 
				-selecting.
			
 
				-Executing and searching do not affect the choice of active column,
			
 
				-so windows of commands and such do not draw new windows towards them,
			
 
				-but rather let them form near the targets of their actions.
			
 
				-Output (error) windows always appear towards the right, away from
			
 
				-edited text, which is typically kept towards the left.
			
 
				-Within the column, several competing desires are balanced to decide where
			
 
				-and how large the window should be:
			
 
				-large blank spaces should be consumed;
			
 
				-existing text should remain visible;
			
 
				-existing large windows should be divided before small ones;
			
 
				-and the window should appear near the one containing the action that caused
			
 
				-its creation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme binds some actions to chords of mouse buttons.
			
 
				-These include
			
 
				-<TT>Cut</TT>
			
 
				-and
			
 
				-<TT>Paste</TT>
			
 
				-so these common operations can be done without
			
 
				-moving the mouse.
			
 
				-Another is a way to apply a command in one window to text (often a file name)
			
 
				-in another, avoiding the actions needed to assemble the command textually.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another way Acme avoids the need to move the mouse is instead to move the cursor
			
 
				-to where it is likely to be used next.  When a new window is made, Acme
			
 
				-moves the cursor to the new window; in fact, to the selected text in that window.
			
 
				-When the user deletes a newly made window, the cursor is
			
 
				-returned to the point it was before the window was made,
			
 
				-reducing the irritation of windows that pop up to report annoying errors.
			
 
				-</P>
			
 
				-<P>
			
 
				-When a window is moved, Acme moves the cursor to the layout box in
			
 
				-its new place, to permit further adjustment without moving the mouse.
			
 
				-For example, when a click of the left mouse button on the layout box grows
			
 
				-the window, the cursor moves to the new location of the box so repeated clicks,
			
 
				-without moving the mouse, continue to grow it.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another form of assistance the system can offer is to supply precision in
			
 
				-pointing the mouse.  The best-known form of this is `double-clicking' to
			
 
				-select a word rather than carefully sweeping out the entire word.
			
 
				-Acme provides this feature, using context to decide whether to select
			
 
				-a word, line, quoted string, parenthesized expression, and so on.
			
 
				-But Acme takes the idea much further by applying it to execution
			
 
				-and searching.
			
 
				-A
			
 
				-<I>single</I>
			
 
				-click, that is, a null selection, with either the middle or right buttons,
			
 
				-is expanded automatically to indicate the appropriate text containing
			
 
				-the click.  What is appropriate depends on the context.
			
 
				-</P>
			
 
				-<P>
			
 
				-For example, to execute a single-word command
			
 
				-such as
			
 
				-<TT>Cut</TT>,
			
 
				-it is not necessary to sweep the entire word; just clicking the button once with
			
 
				-the mouse pointing at the word is sufficient.  `Word'
			
 
				-means the largest string of likely file name characters surrounding the location
			
 
				-of the click: click on a file name, run that program.
			
 
				-On the right button, the rules are more complicated because
			
 
				-the target of the click might be a file name, file name with address,
			
 
				-or just plain text.  Acme examines the text near the click to find
			
 
				-a likely file name;
			
 
				-if it finds one, it checks that it names an existing file (in the directory named in the tag, if the name is relative)
			
 
				-and if so, takes that as the result, after extending it with any address
			
 
				-that may be present.  If there is no file with that name, Acme
			
 
				-just takes the largest alphanumeric string under the click.
			
 
				-The effect is a natural overloading of the button to refer to plain text as
			
 
				-well as file names.
			
 
				-</P>
			
 
				-<P>
			
 
				-First, though, if the click occurs over the left-button-selected text in the window,
			
 
				-that text is taken to be what is selected.
			
 
				-This makes it easy to skip through the occurrences of a string in a file: just click
			
 
				-the right button
			
 
				-on some occurrence of the text in the window (perhaps after typing it in the tag)
			
 
				-and click once for each subsequent occurrence.  It isn't even necessary to move
			
 
				-the mouse between clicks; Acme does that.
			
 
				-To turn a complicated command into a sort of menu item, select it:
			
 
				-thereafter, clicking the middle button on it will execute the full command.
			
 
				-</P>
			
 
				-<P>
			
 
				-As an extra feature, Acme recognizes file names in angle brackets
			
 
				-<TT><></TT>
			
 
				-as names of files in standard directories of include files,
			
 
				-making it possible for instance to look at
			
 
				-<TT><stdio.h></TT>
			
 
				-with a single click.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here's an example to demonstrate how the actions and defaults work together.
			
 
				-Assume
			
 
				-<TT>/sys/src/cmd/sam/regexp.c</TT>
			
 
				-is
			
 
				-open and has been edited.  We write it (execute
			
 
				-<TT>Put</TT>
			
 
				-in the tag; once the file is written, Acme removes the word from the tag)
			
 
				-and type
			
 
				-<TT>mk</TT>
			
 
				-in the tag.  We execute
			
 
				-<TT>mk</TT>
			
 
				-and get some errors, which appear in a new window labeled
			
 
				-<TT>/sys/src/cmd/sam/+Errors</TT>.
			
 
				-The cursor moves automatically to that window.
			
 
				-Say the error is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-main.c:112: incompatible types on assignment to `pattern'
			
 
				-</PRE></TT></DL>
			
 
				-We move the mouse slightly and click the right button
			
 
				-at the left of the error message; Acme
			
 
				-makes a new window, reads
			
 
				-<TT>/sys/src/cmd/main.c</TT>
			
 
				-into it, selects line 112
			
 
				-and places the mouse there, right on the offending line.
			
 
				-</P>
			
 
				-<H4>Coupling to existing programs
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acme's syntax for file names and addresses makes it easy for other programs
			
 
				-to connect automatically to Acme's capabilities.  For example, the output of
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-grep -n variable *.[ch]
			
 
				-</PRE></TT></DL>
			
 
				-can be used to help Acme step through the occurrences of a variable in a program;
			
 
				-every line of output is potentially a command to open a file.
			
 
				-The file names need not be absolute, either: the output
			
 
				-appears in a window labeled with the directory in which
			
 
				-<TT>grep</TT>
			
 
				-was run, from which Acme can derive the full path names.
			
 
				-</P>
			
 
				-<P>
			
 
				-When necessary, we have changed the output of some programs,
			
 
				-such as compiler error messages, to match
			
 
				-Acme's syntax.
			
 
				-Some might argue that it shouldn't be necessary to change old programs,
			
 
				-but sometimes programs need to be updated when systems change,
			
 
				-and consistent output benefits people as well as programs.
			
 
				-A historical example is the retrofitting of standard error output to the
			
 
				-early Unix programs when pipes were invented.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another change was to record full path names in
			
 
				-the symbol table of executables, so line numbers reported by the debugger
			
 
				-are absolute names that may be used directly by Acme; it's not necessary
			
 
				-to run the debugger in the source directory.  (This aids debugging
			
 
				-even without Acme.)
			
 
				-</P>
			
 
				-<P>
			
 
				-A related change was to add lines of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#pragma src "/sys/src/libregexp"
			
 
				-</PRE></TT></DL>
			
 
				-to header files; coupled with Acme's ability to locate a header file,
			
 
				-this provides a fast, keyboardless way to get the source associated with a library.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, Acme directs the standard output of programs it runs to
			
 
				-windows labeled by the directory in which the program is run.
			
 
				-Acme's splitting of the
			
 
				-output into directory-labeled windows is a small feature that has a major effect:
			
 
				-local file names printed by programs can be interpreted directly by Acme.
			
 
				-By indirectly coupling the output of programs to the input,
			
 
				-it also simplifies the management of software that occupies multiple
			
 
				-directories.
			
 
				-</P>
			
 
				-<H4>Coupling to new programs
			
 
				-</H4>
			
 
				-<P>
			
 
				-Like many Plan 9 programs,
			
 
				-Acme offers a programmable interface to
			
 
				-other programs by acting as a file server.
			
 
				-The best example of such a file server is the window system
			
 
				-<TT>8&#189;</TT>
			
 
				-[Pike91],
			
 
				-which exports files with names such as
			
 
				-<TT>screen</TT>,
			
 
				-<TT>cons</TT>,
			
 
				-and
			
 
				-<TT>mouse</TT>
			
 
				-through which applications may access the I/O capabilities of the windows.
			
 
				-<TT>8&#189;</TT>
			
 
				-provides a
			
 
				-<I>distinct</I>
			
 
				-set of files for each window and builds a private file name space
			
 
				-for the clients running `in' each window;
			
 
				-clients in separate windows see distinct files with the same names
			
 
				-(for example
			
 
				-<TT>/dev/mouse</TT>).
			
 
				-Acme, like the process file system [PPTTW93], instead associates each
			
 
				-window with a directory of files; the files of each window are visible
			
 
				-to any application.
			
 
				-This difference reflects a difference in how the systems are used:
			
 
				-<TT>8&#189;</TT>
			
 
				-tells a client what keyboard and mouse activity has happened in its window;
			
 
				-Acme tells a client what changes that activity wrought on any window it asks about.
			
 
				-Putting it another way,
			
 
				-<TT>8&#189;</TT>
			
 
				-enables the construction of interactive applications;
			
 
				-Acme provides the interaction for applications.
			
 
				-</P>
			
 
				-<P>
			
 
				-The root of
			
 
				-Acme's file system is mounted using Plan 9 operations on the directory
			
 
				-<TT>/mnt/acme</TT>.
			
 
				-In
			
 
				-that root directory appears a directory for each window, numbered with the window's identifier,
			
 
				-analogous to a process identifier, for example
			
 
				-<TT>/mnt/acme/27</TT>.
			
 
				-The window's directory
			
 
				-contains 6 files:
			
 
				-<TT>/mnt/acme/27/addr</TT>,
			
 
				-<TT>body</TT>,
			
 
				-<TT>ctl</TT>,
			
 
				-<TT>data</TT>,
			
 
				-<TT>event</TT>,
			
 
				-and
			
 
				-<TT>tag</TT>.
			
 
				-The
			
 
				-<TT>body</TT>
			
 
				-and
			
 
				-<TT>tag</TT>
			
 
				-files contain the text of the respective parts of the window; they may be
			
 
				-read to recover the contents.  Data written to these files is appended to the text;
			
 
				-<TT>seeks</TT>
			
 
				-are ignored.
			
 
				-The
			
 
				-<TT>addr</TT>
			
 
				-and
			
 
				-<TT>data</TT>
			
 
				-files provide random access to the contents of the body.
			
 
				-The
			
 
				-<TT>addr</TT>
			
 
				-file is written to set a character position within the body; the
			
 
				-<TT>data</TT>
			
 
				-file may then be read to recover the contents at that position,
			
 
				-or written to change them.
			
 
				-(The tag is assumed
			
 
				-small and special-purpose enough not to need special treatment.
			
 
				-Also,
			
 
				-<TT>addr</TT>
			
 
				-indexes by character position, which is not the same as byte offset
			
 
				-in Plan 9's multi-byte character set [Pike93]).
			
 
				-The format accepted by the
			
 
				-<TT>addr</TT>
			
 
				-file is exactly the syntax of addresses within the user interface,
			
 
				-permitting regular expressions, line numbers, and compound addresses
			
 
				-to be specified.  For example, to replace the contents of lines 3 through 7,
			
 
				-write the text
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-3,7
			
 
				-</PRE></TT></DL>
			
 
				-to the
			
 
				-<TT>addr</TT>
			
 
				-file, then write the replacement text to the
			
 
				-<TT>data</TT>
			
 
				-file.  A zero-length write deletes the addressed text; further writes extend the replacement.
			
 
				-</P>
			
 
				-<P>
			
 
				-The control file,
			
 
				-<TT>ctl</TT>,
			
 
				-may be written with commands to effect actions on the window; for example
			
 
				-the command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-name /adm/users
			
 
				-</PRE></TT></DL>
			
 
				-sets the name in the tag of the window to
			
 
				-<TT>/adm/users</TT>.
			
 
				-Other commands allow deleting the window, writing it to a file, and so on.
			
 
				-Reading the
			
 
				-<TT>ctl</TT>
			
 
				-file recovers a fixed-format string containing 5 textual numbers&#173;the window
			
 
				-identifier, the number of characters in the tag, the number in the body,
			
 
				-and some status information&#173;followed by the text of the tag, up to a newline.
			
 
				-</P>
			
 
				-<P>
			
 
				-The last file,
			
 
				-<TT>event</TT>,
			
 
				-is the most unusual.
			
 
				-A program reading a window's
			
 
				-<TT>event</TT>
			
 
				-file is notified of all changes to the text of the window, and
			
 
				-is asked to interpret all middle- and right-button actions.
			
 
				-The data passed to the program is fixed-format and reports
			
 
				-the source of the action (keyboard, mouse, external program, etc.),
			
 
				-its location (what was pointed at or modified), and its nature (change,
			
 
				-search, execution, etc.).
			
 
				-This message, for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-MI15 19 0 4 time
			
 
				-</PRE></TT></DL>
			
 
				-reports that actions of the mouse
			
 
				-(<TT>M</TT>)
			
 
				-inserted in the body (capital
			
 
				-<TT>I</TT>)
			
 
				-the 4 characters of
			
 
				-<TT>time</TT>
			
 
				-at character positions 15 through 19; the zero is a flag word.
			
 
				-Programs may apply their own interpretations of searching and
			
 
				-execution, or may simply reflect the events back to Acme,
			
 
				-by writing them back to the
			
 
				-<TT>event</TT>
			
 
				-file, to have the default interpretation applied.
			
 
				-Some examples of these ideas in action are presented below.
			
 
				-</P>
			
 
				-<P>
			
 
				-Notice that changes to the window are reported
			
 
				-after the fact; the program is told about them but is not required to act
			
 
				-on them.  Compare this to a more traditional interface in which a program
			
 
				-is told, for example, that a character has been typed on the keyboard and
			
 
				-must then display and interpret it.
			
 
				-Acme's style stems from the basic model of the system, in which any
			
 
				-number of agents&#173;the keyboard, mouse, external programs
			
 
				-writing to
			
 
				-<TT>data</TT>
			
 
				-or
			
 
				-<TT>body</TT>,
			
 
				-and so on&#173;may
			
 
				-change the contents of a window.
			
 
				-The style is efficient: many programs are content
			
 
				-to have Acme do most of the work and act only when the editing is completed.
			
 
				-An example is the Acme mail program, which can ignore the changes
			
 
				-made to a message being composed
			
 
				-and just read its body when asked to send it.
			
 
				-A disadvantage is that some traditional ways of working are impossible.
			
 
				-For example, there is no way `to turn off echo': characters appear on the
			
 
				-screen and are read from there; no agent or buffer stands between
			
 
				-the keyboard and the display.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are a couple of other files made available by Acme in its root directory
			
 
				-rather than in the directory of each window.
			
 
				-The text file
			
 
				-<TT>/mnt/acme/index</TT>
			
 
				-holds a list of all window names and numerical identifiers,
			
 
				-somewhat analogous to the output of the
			
 
				-<TT>ps</TT>
			
 
				-command for processes.
			
 
				-The most important, though, is
			
 
				-<TT>/mnt/acme/new</TT>,
			
 
				-a directory that makes new windows, similar to the
			
 
				-<TT>clone</TT>
			
 
				-directory in the Plan 9 network devices [Pres93].
			
 
				-The act of opening any file in
			
 
				-<TT>new</TT>
			
 
				-creates a new Acme window; thus the shell command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-grep -n var *.c &#62; /mnt/acme/new/body
			
 
				-</PRE></TT></DL>
			
 
				-places its output in the body of a fresh window.
			
 
				-More sophisticated applications may open
			
 
				-<TT>new/ctl</TT>,
			
 
				-read it to discover the new window's identifier, and then
			
 
				-open the window's other files in the numbered directory.
			
 
				-</P>
			
 
				-<H4>Acme-specific programs
			
 
				-</H4>
			
 
				-<P>
			
 
				-Although Acme is in part an attempt to move beyond typescripts,
			
 
				-they will probably always have utility.
			
 
				-The first program written for Acme was therefore one
			
 
				-to run a shell or other traditional interactive application
			
 
				-in a window, the Acme analog of
			
 
				-<TT>xterm</TT>.
			
 
				-This program,
			
 
				-<TT>win</TT>,
			
 
				-has a simple structure:
			
 
				-it acts as a two-way intermediary between Acme and the shell,
			
 
				-cross-connecting the standard input and output of the shell to the
			
 
				-text of the window.
			
 
				-The style of interaction is modeled after
			
 
				-<TT>mux</TT>
			
 
				-[Pike88]: standard output is added to the window at the
			
 
				-<I>output point;</I>
			
 
				-text typed after the output point
			
 
				-is made available on standard input when a newline is typed.
			
 
				-After either of these actions, the output point is advanced.
			
 
				-This is different from the working of a regular terminal,
			
 
				-permitting cut-and-paste editing of an input line until the newline is typed.
			
 
				-Arbitrary editing may be done to any text in the window.
			
 
				-The implementation of
			
 
				-<TT>win</TT>,
			
 
				-using the
			
 
				-<TT>event</TT>,
			
 
				-<TT>addr</TT>,
			
 
				-and
			
 
				-<TT>data</TT>
			
 
				-files, is straightforward.
			
 
				-<TT>Win</TT>
			
 
				-needs no code for handling the keyboard and mouse; it just monitors the
			
 
				-contents of the window.  Nonetheless, it allows Acme's full editing to be
			
 
				-applied to shell commands.
			
 
				-The division of labor between
			
 
				-<TT>win</TT>
			
 
				-and
			
 
				-<TT>Acme</TT>
			
 
				-contrasted with
			
 
				-<TT>xterm</TT>
			
 
				-and the X server demonstrates how much work Acme handles automatically.
			
 
				-<TT>Win</TT>
			
 
				-is implemented by a single source file 560 lines long and has no graphics code.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Win</TT>
			
 
				-uses the middle and right buttons to connect itself in a consistent way
			
 
				-with the rest of Acme.
			
 
				-The middle button still executes commands, but in a style more suited
			
 
				-to typescripts.  Text selected with the middle button is treated as if
			
 
				-it had been typed after the output point, much as a similar feature in
			
 
				-<TT>xterm</TT>
			
 
				-or
			
 
				-<TT>8&#189;</TT>,
			
 
				-and therefore causes it to be `executed' by the application running in the window.
			
 
				-Right button actions are reflected back to Acme but refer to the appropriate
			
 
				-files because
			
 
				-<TT>win</TT>
			
 
				-places the name of the current directory in the tag of the window.
			
 
				-If the shell is running, a simple shell function replacing the
			
 
				-<TT>cd</TT>
			
 
				-command can maintain the tag as the shell navigates the file system.
			
 
				-This means, for example, that a right button click on a file mentioned in an
			
 
				-<TT>ls</TT>
			
 
				-listing opens the file within Acme.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another Acme-specific program is a mail reader that begins by presenting,
			
 
				-in a window, a listing of the messages in the user's mailbox, one per line.
			
 
				-Here the middle and right button actions are modified to refer to
			
 
				-mail commands
			
 
				-and messages, but the change feels natural.
			
 
				-Clicking the right button on a line creates a new window and displays the
			
 
				-message there, or, if it's already displayed, moves the mouse to that window.
			
 
				-The metaphor is that the mailbox is a directory whose constituent files are messages.
			
 
				-The mail program also places some relevant commands in the tag lines of
			
 
				-the windows; for example, executing the word
			
 
				-<TT>Reply</TT>
			
 
				-in a message's tag creates a new window
			
 
				-in which to compose a message to the sender of the original;
			
 
				-<TT>Post</TT>
			
 
				-then dispatches it.
			
 
				-In such windows, the addressee is just a list of names
			
 
				-on the first line of the body, which may be edited to add or change recipients.
			
 
				-The program also monitors the mailbox, updating the `directory' as new messages
			
 
				-arrive.
			
 
				-</P>
			
 
				-<P>
			
 
				-The mail program is as simple as it sounds; all the work of interaction,
			
 
				-editing, and management of the display is done by Acme.
			
 
				-The only
			
 
				-difficult sections of the 1200
			
 
				-lines of code concern honoring the external protocols for managing
			
 
				-the mailbox and connecting to
			
 
				-<TT>sendmail</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-One of the things Acme does not provide directly is a facility like
			
 
				-Sam's command language to enable actions such as global substitution;
			
 
				-within Acme, all editing is done manually.
			
 
				-It is easy, though, to write external programs for such tasks.
			
 
				-In this, Acme comes closer to the original intent of Oberon:
			
 
				-a directory,
			
 
				-<TT>/acme/edit</TT>,
			
 
				-contains a set of tools for repetitive editing and a template
			
 
				-or `guide' file that gives examples
			
 
				-of its use.  
			
 
				-Acme's editing guide,
			
 
				-<TT>/acme/edit/guide</TT>,
			
 
				-looks like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-e file | x '/regexp/' | c 'replacement'
			
 
				-e file:'0,$' | x '/.*word.*\n/' | p -n
			
 
				-e file | pipe command args ...
			
 
				-</PRE></TT></DL>
			
 
				-The syntax is reminiscent of Sam's command language, but here the individual
			
 
				-one-letter commands are all stand-alone programs connected by pipes.
			
 
				-Passed along the pipes are addresses, analogous to structural expressions
			
 
				-in Sam terminology.
			
 
				-The
			
 
				-<TT>e</TT>
			
 
				-command, unlike that of Sam, starts the process by generating the address
			
 
				-(default dot, the highlighted selection) in the named files.
			
 
				-The other commands are as in Sam:
			
 
				-<TT>p</TT>
			
 
				-prints the addressed text on standard output (the
			
 
				-<TT>-n</TT>
			
 
				-option is analogous to that of
			
 
				-<TT>grep</TT>,
			
 
				-useful in combination with the right mouse button);
			
 
				-<TT>x</TT>
			
 
				-matches a regular expression to the addressed (incoming) text,
			
 
				-subdividing the text;
			
 
				-<TT>c</TT>
			
 
				-replaces the text; and so on.  Thus, global substitution throughout a file,
			
 
				-which would be expressed in Sam as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-0,$ x/regexp/ c/replacement/
			
 
				-</PRE></TT></DL>
			
 
				-in Acme's editor becomes
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-e 'file:0,$' | x '/regexp/' | c 'replacement'
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-To use the Acme editing commands, open
			
 
				-<TT>/acme/edit/guide</TT>,
			
 
				-use the mouse and keyboard to edit one of the commands to the right form,
			
 
				-and execute it with the middle button.
			
 
				-Acme's context rules find the appropriate binaries in
			
 
				-<TT>/acme/edit</TT>
			
 
				-rather than
			
 
				-<TT>/bin</TT>;
			
 
				-the effect is to turn
			
 
				-<TT>/acme/edit</TT>
			
 
				-into a toolbox containing tools and instructions (the guide file) for their use.
			
 
				-In fact, the source for these tools is also there, in the directory
			
 
				-<TT>/acme/edit/src</TT>.
			
 
				-This setup allows some control of the file name space for binary programs;
			
 
				-not only does it group related programs, it permits the use of common
			
 
				-names for uncommon jobs.  For example, the single-letter names would
			
 
				-be unwise in a directory in everyone's search path; here they are only
			
 
				-visible when running editing commands.
			
 
				-</P>
			
 
				-<P>
			
 
				-In Oberon,
			
 
				-such a collection would be called a
			
 
				-<I>tool</I>
			
 
				-and would consist
			
 
				-of a set of entry points in a module and a menu-like piece of text containing
			
 
				-representative commands that may be edited to suit and executed.
			
 
				-There is, in fact, a tool called
			
 
				-<TT>Edit</TT>
			
 
				-in Oberon.
			
 
				-To provide related functionality,
			
 
				-Acme exploits the directory and file structure of the underlying
			
 
				-system, rather than the module structure of the language;
			
 
				-this fits well with Plan 9's
			
 
				-file-oriented philosophy.
			
 
				-Such tools are central to the working of Oberon but they are
			
 
				-less used in Acme, at least so far.
			
 
				-The main reason is probably that Acme's program interface permits
			
 
				-an external program to remain executing in the background, providing
			
 
				-its own commands as needed (for example, the
			
 
				-<TT>Reply</TT>
			
 
				-command in the mail program); Oberon uses tools to
			
 
				-implement such services because its must invoke
			
 
				-a fresh program for each command.
			
 
				-Also,
			
 
				-Acme's better integration allows more
			
 
				-basic functions to be handled internally; the right mouse button
			
 
				-covers a lot of the basic utility of the editing tools in Oberon.
			
 
				-Nonetheless, as more applications are written for Acme,
			
 
				-many are sure to take this Oberon tool-like form.
			
 
				-</P>
			
 
				-<H4>Comparison with other systems
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acme's immediate ancestor is Help [Pike92], an experimental system written
			
 
				-a few years ago as a first try at exploring some of Oberon's ideas
			
 
				-in an existing operating system.
			
 
				-Besides much better engineering, Acme's advances over Help
			
 
				-include the actions of the right button (Help had nothing comparable),
			
 
				-the ability to connect long-running programs to the user interface
			
 
				-(Help had no analog of the
			
 
				-<TT>event</TT>
			
 
				-file),
			
 
				-and the small but important change to split command output into
			
 
				-windows labeled with the directory in which the commands run.
			
 
				-</P>
			
 
				-<P>
			
 
				-Most of Acme's style, however, derives from the user interface and window
			
 
				-system of Oberon [Wirt89, Reis91].
			
 
				-Oberon includes a programming language and operating system,
			
 
				-which Acme instead borrows from an existing system, Plan 9.
			
 
				-When I first saw Oberon, in 1988, I was struck by the
			
 
				-simplicity of its user interface, particularly its lack of menus
			
 
				-and its elegant use of multiple mouse buttons.
			
 
				-The system seemed restrictive, though&#173;single process,
			
 
				-single language, no networking, event-driven programming&#173;and
			
 
				-failed to follow through on some of its own ideas.
			
 
				-For example, the middle mouse button had to be pointed accurately and
			
 
				-the right button was essentially unused.
			
 
				-Acme does follow through:
			
 
				-to the basic idea planted by Oberon, it adds
			
 
				-the ability to run on different operating systems and hardware,
			
 
				-connection to existing applications including
			
 
				-interactive ones such as shells and debuggers,
			
 
				-support for multiple processes,
			
 
				-the right mouse button's features,
			
 
				-the default actions and context-dependent properties
			
 
				-of execution and searching,
			
 
				-and a host of little touches such as moving the mouse cursor that make the system 
			
 
				-more pleasant.
			
 
				-At the moment, though, Oberon does have one distinct advantage: it incorporates
			
 
				-graphical programs well into its model, an issue Acme has not yet faced.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme shares with the Macintosh a desire to use the mouse well and it is
			
 
				-worth comparing the results.
			
 
				-The mouse on the Macintosh has a single button, so menus are essential
			
 
				-and the mouse must frequently move a long way
			
 
				-to reach the appropriate function.
			
 
				-An indication that this style has trouble is that applications provide
			
 
				-keyboard sequences to invoke menu selections and users often prefer them.
			
 
				-A deeper comparison is that the Macintosh uses pictures where Acme uses text.
			
 
				-In contrast to pictures, text can be edited quickly, created on demand,
			
 
				-and fine-tuned to the job at hand; consider adding an option to a command.
			
 
				-It is also self-referential; Acme doesn't need menus because any text can be
			
 
				-in effect a menu item.
			
 
				-The result is that, although a Macintosh screen is certainly prettier and probably
			
 
				-more attractive, especially to beginners, an Acme screen is more dynamic
			
 
				-and expressive, at least for programmers and experienced users.
			
 
				-</P>
			
 
				-<P>
			
 
				-For its role in the overall system,
			
 
				-Acme most resembles EMACS [Stal93].
			
 
				-It is tricky to compare Acme to EMACS, though, because there are
			
 
				-many versions of EMACS and, since it is fully programmable, EMACS
			
 
				-can in principle do anything Acme does.
			
 
				-Also, Acme is much younger and therefore has not
			
 
				-had the time to acquire as many features.
			
 
				-The issue therefore is less what the systems can be programmed to do than
			
 
				-how they are used.
			
 
				-The EMACS versions that come closest to Acme's style are those that
			
 
				-have been extended to provide a programming environment, usually
			
 
				-for a language such as LISP [Alle92, Lucid92].
			
 
				-For richness of the existing interface, these EMACS versions are certainly superior to Acme.
			
 
				-On the other hand, Acme's interface works equally well already for a variety
			
 
				-of languages; for example, one of its most enthusiastic users works almost
			
 
				-exclusively in Standard ML, a language nothing like C.
			
 
				-</P>
			
 
				-<P>
			
 
				-Where Acme excels is in the smoothness of its interface.
			
 
				-Until recently, EMACS did not support the mouse especially well,
			
 
				-and even with the latest version providing features such as `extents'
			
 
				-that can be programmed to behave much like Acme commands,
			
 
				-many users don't bother to upgrade.
			
 
				-Moreover, in the versions that provide extents, 
			
 
				-most EMACS packages don't take advantage of them.
			
 
				-</P>
			
 
				-<P>
			
 
				-The most important distinction is just that
			
 
				-EMACS is fundamentally keyboard-based, while
			
 
				-Acme is mouse-based.
			
 
				-</P>
			
 
				-<P>
			
 
				-People who try Acme find it hard to go back to their previous environment.
			
 
				-Acme automates so much that to return to a traditional interface
			
 
				-is to draw attention to the extra work it requires.
			
 
				-</P>
			
 
				-<H4>Concurrency in the implementation
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acme is about 8,000 lines of code in Alef, a concurrent object-oriented language syntactically similar to C [Alef].
			
 
				-Acme's structure is a set of communicating
			
 
				-processes in a single address space.
			
 
				-One subset of the processes drives the display and user interface,
			
 
				-maintaining the windows; other processes forward mouse and keyboard
			
 
				-activity and implement the file server interface for external programs.
			
 
				-The language and design worked out well;
			
 
				-as explained elsewhere [Pike89, Gans93, Reppy93],
			
 
				-user interfaces built with concurrent systems
			
 
				-can avoid the clumsy
			
 
				-top-level event loop typical of traditional interactive systems.
			
 
				-</P>
			
 
				-<P>
			
 
				-An example of the benefits of the multi-process style
			
 
				-is the management of the state of open
			
 
				-files held by clients of the file system interface.
			
 
				-The problem is that some I/O requests,
			
 
				-such as reading the
			
 
				-<TT>event</TT>
			
 
				-file, may block if no data is available, and the server must
			
 
				-maintain the state of (possibly many) requests until data appears.
			
 
				-For example,
			
 
				-in
			
 
				-<TT>8&#189;</TT>,
			
 
				-a single-process window system written in C, pending requests were queued in
			
 
				-a data structure associated with each window.
			
 
				-After activity in the window that might complete pending I/O,
			
 
				-the data structure was scanned for requests that could now finish.
			
 
				-This structure did not fit well with the rest of the program and, worse,
			
 
				-required meticulous effort
			
 
				-to guarantee correct behavior under all conditions
			
 
				-(consider raw mode, reads of partial lines, deleting a window,
			
 
				-multibyte characters, etc.).
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme instead creates a new dedicated process
			
 
				-for each I/O request.
			
 
				-This process coordinates with the rest of the system
			
 
				-using Alef's synchronous communication;
			
 
				-its state implicitly encodes the state of
			
 
				-the I/O request and obviates the need for queuing.
			
 
				-The passage of the request through Acme proceeds as follows.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme contains a file server process, F, that executes a
			
 
				-<TT>read</TT>
			
 
				-system call to receive a Plan 9 file protocol (9P) message from the client [AT&amp;T92].
			
 
				-The client blocks until Acme answers the request.
			
 
				-F communicates with an allocation process, M,
			
 
				-to acquire an object of type
			
 
				-<TT>Xfid</TT>
			
 
				-(`executing fid'; fid is a 9P term)
			
 
				-to hold the request.
			
 
				-M sits in a loop (reproduced in Figure 2) waiting for either a request for
			
 
				-a new
			
 
				-<TT>Xfid</TT>
			
 
				-or notification that an existing one has finished its task.
			
 
				-When an
			
 
				-<TT>Xfid</TT>
			
 
				-is created, an associated process, X,
			
 
				-is also made.
			
 
				-M queues idle
			
 
				-<TT>Xfids</TT>,
			
 
				-allocating new ones only when the list is empty.
			
 
				-Thus, there is always a pool of
			
 
				-<TT>Xfids</TT>,
			
 
				-some executing, some idle.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Xfid</TT>
			
 
				-object contains a channel,
			
 
				-<TT>Xfid.c</TT>,
			
 
				-for communication with its process;
			
 
				-the unpacked message; and some associated functions,
			
 
				-mostly corresponding to 9P messages such as
			
 
				-<TT>Xfid.write</TT>
			
 
				-to handle a 9P write request.
			
 
				-</P>
			
 
				-<P>
			
 
				-The file server process F parses the message to see its nature&#173;open,
			
 
				-close, read, write, etc.  Many messages, such as directory
			
 
				-lookups, can be handled immediately; these are responded to directly
			
 
				-and efficiently
			
 
				-by F without invoking the
			
 
				-<TT>Xfid</TT>,
			
 
				-which is therefore maintained until the next message.
			
 
				-When a message, such as a write to the display, requires the attention
			
 
				-of the main display process and interlocked access to its data structures,
			
 
				-F enables X
			
 
				-by sending a function pointer on
			
 
				-<TT>Xfid.c</TT>.
			
 
				-For example, if the message is a write, F executes
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x-&#62;c &#60;-= Xfid.write;
			
 
				-</PRE></TT></DL>
			
 
				-which sends
			
 
				-the address of
			
 
				-<TT>Xfid.write</TT>
			
 
				-on
			
 
				-<TT>Xfid.c</TT>,
			
 
				-waking up X.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Xfid</TT>
			
 
				-process, X, executes a simple loop:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-Xfid.ctl(Xfid *x)
			
 
				-{
			
 
				-    for(;;){
			
 
				-        (*&#60;-x-&#62;c)(x);      /* receive and execute message */
			
 
				-        bflush();          /* synchronize bitmap display */
			
 
				-        cxfidfree &#60;-= x;   /* return to free list */
			
 
				-    }
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Thus X
			
 
				-will wake up with the address of a function to call (here
			
 
				-<TT>Xfid.write</TT>)
			
 
				-and execute it; once that completes, it returns itself to the pool of
			
 
				-free processes by sending its address back to the allocator.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although this sequence may seem complicated, it is just a few lines
			
 
				-of code and is in fact far simpler
			
 
				-than the management of the I/O queues in
			
 
				-<TT>8&#189;</TT>.
			
 
				-The hard work of synchronization is done by the Alef run time system.
			
 
				-Moreover, the code worked the first time, which cannot be said for the code in
			
 
				-<TT>8&#189;</TT>.
			
 
				-</P>
			
 
				-<H4>Undo
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acme provides a general undo facility like that of Sam, permitting
			
 
				-textual changes to be unwound arbitrarily.
			
 
				-The implementation is superior to Sam's, though,
			
 
				-with much higher performance and the ability to `redo' changes.
			
 
				-</P>
			
 
				-<P>
			
 
				-Sam uses
			
 
				-a multi-pass algorithm that builds
			
 
				-a transcript of changes to be made simultaneously
			
 
				-and then executes them atomically.
			
 
				-This was thought necessary because the elements of a repetitive
			
 
				-command such as a global substitution should all be applied to the same
			
 
				-initial file and implemented simultaneously; forming the complete
			
 
				-transcript before executing any of the changes avoids the
			
 
				-cumbersome management of addresses in a changing file.
			
 
				-Acme, however, doesn't have this problem; global substitution
			
 
				-is controlled externally and may be made incrementally by exploiting
			
 
				-an observation: if the changes are sorted in address order and
			
 
				-executed in reverse, changes will not invalidate the addresses of
			
 
				-pending changes.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme therefore avoids the initial transcript.  Instead, changes are applied
			
 
				-directly to the file, with an undo transcript recorded in a separate list.
			
 
				-For example, when text is added to a window, it is added directly and a record
			
 
				-of what to delete to restore the state is appended to the undo list.
			
 
				-Each undo action and the file are marked with a sequence number;
			
 
				-actions with the same sequence number are considered a unit
			
 
				-to be undone together.
			
 
				-The invariant state of the structure
			
 
				-is that the last action in the undo list applies to the current state of the file,
			
 
				-even if that action is one of a related set from, for example, a global substitute.
			
 
				-(In Sam, a related set of actions needed to be undone simultaneously.)
			
 
				-To undo an action, pop the last item on the undo list, apply it to the file,
			
 
				-revert it, and append it to a second, redo list.
			
 
				-To redo an action, do the identical operation with the lists interchanged.
			
 
				-The expensive operations occur
			
 
				-only when actually undoing; in normal editing the overhead is minor.
			
 
				-For example, Acme reads files about seven times faster than Sam, partly
			
 
				-because of this improvement and partly because of a cleaner implementation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme uses a temporary file to hold the text, keeping in memory only the
			
 
				-visible portion, and therefore can edit large files comfortably
			
 
				-even on small-memory machines such as laptops.
			
 
				-</P>
			
 
				-<H4>Future
			
 
				-</H4>
			
 
				-<P>
			
 
				-Acme is still under development.
			
 
				-Some things are simply missing.
			
 
				-For example, Acme should support non-textual graphics, but this is being
			
 
				-deferred until it can be done using a new graphics model being developed
			
 
				-for Plan 9.  Also, it is undecided how Acme's style of interaction should best be
			
 
				-extended to graphical applications.
			
 
				-On a smaller scale, although the system feels smooth and comfortable,
			
 
				-work continues to tune the heuristics and
			
 
				-try new ideas for the user interface.
			
 
				-</P>
			
 
				-<P>
			
 
				-There need to be more programs that use Acme.  Browsers for
			
 
				-Usenet and AP News articles, the Oxford English Dictionary, and other
			
 
				-such text sources exist, but more imaginative applications will
			
 
				-be necessary to prove that Acme's approach is viable.
			
 
				-One that has recently been started is an interface to the debugger Acid [Wint94],
			
 
				-although it is still
			
 
				-unclear what form it will ultimately take.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme shows that it is possible to make a user interface a stand-alone component
			
 
				-of an interactive environment.  By absorbing more of the interactive
			
 
				-functionality than a simple window system, Acme off-loads much of the
			
 
				-computation from its applications, which helps keep them small and
			
 
				-consistent in their interface.  Acme can afford to dedicate
			
 
				-considerable effort to making that interface as good as possible; the result
			
 
				-will benefit the entire system.
			
 
				-</P>
			
 
				-<P>
			
 
				-Acme is complete and useful enough to attract users.
			
 
				-Its comfortable user interface,
			
 
				-the ease with which it handles multiple tasks and
			
 
				-programs in multiple directories,
			
 
				-and its high level of integration
			
 
				-make it addictive.
			
 
				-Perhaps most telling,
			
 
				-Acme shows that typescripts may not be the most
			
 
				-productive interface to a time-sharing system.
			
 
				-</P>
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Howard Trickey, Acme's first user, suffered buggy versions gracefully and made
			
 
				-many helpful suggestions.  Chris Fraser provided the necessary insight for the Acme editing
			
 
				-commands.
			
 
				-</P>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Alef] P. Winterbottom,
			
 
				-``Alef Language Reference Manual'',
			
 
				-Plan 9 Programmer's Manual,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1992;
			
 
				-revised in this volume.
			
 
				-<br>
			
 
				-[Alle92]
			
 
				-Allegro Common Lisp user Guide, Vol 2, 
			
 
				-Chapter 14, "The Emacs-Lisp Interface". 
			
 
				-March 1992.
			
 
				-<br>
			
 
				-[AT&amp;T92] Plan 9 Programmer's manual, Murray Hill, New Jersey, 1992.
			
 
				-<br>
			
 
				-[Far89] Far too many people, XTERM(1), Massachusetts Institute of Technology, 1989.
			
 
				-<br>
			
 
				-[Gans93] Emden R. Gansner and John H. Reppy,  ``A Multi-threaded Higher-order User Interface Toolkit'', in
			
 
				-Software Trends, Volume 1,
			
 
				-User Interface Software,
			
 
				-Bass and Dewan (Eds.),
			
 
				-John Wiley &amp; Sons 1993,
			
 
				-pp. 61-80.
			
 
				-<br>
			
 
				-[Lucid92] Richard Stallman and Lucid, Inc.,
			
 
				-Lucid GNU EMACS Manual,
			
 
				-March 1992.
			
 
				-<br>
			
 
				-[Pike87] Rob Pike, ``The Text Editor <TT>sam</TT>'', Softw. - Pract. and Exp., Nov 1987, Vol 17 #11, pp. 813-845; reprinted in this volume.
			
 
				-<br>
			
 
				-[Pike88] Rob Pike, ``Window Systems Should Be Transparent'', Comp. Sys., Summer 1988, Vol 1 #3, pp. 279-296.
			
 
				-<br>
			
 
				-[Pike89] Rob Pike, ``A Concurrent Window System'', Comp. Sys., Spring 1989, Vol 2 #2, pp. 133-153.
			
 
				-<br>
			
 
				-[PPTTW93] Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom, ``The Use of Name Spaces in Plan 9'',
			
 
				-Op. Sys. Rev.,  Vol. 27, No. 2, April 1993, pp. 72-76,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[Pike91] Rob Pike, ``8&#189;, the Plan 9 Window System'', USENIX Summer Conf. Proc., Nashville, June, 1991, pp. 257-265,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[Pike92] Rob Pike, ``A Minimalist Global User Interface'', Graphics Interface '92 Proc., Vancouver, 1992, pp. 282-293.  An earlier version appeared under the same title in USENIX Summer Conf. Proc., Nashville, June, 1991, pp. 267-279.
			
 
				-<br>
			
 
				-[Pike93] Rob Pike and Ken Thompson, ``Hello World or &#191;ALPHA&#191;&#191;MU&#191;&#191;ALPHA &#191;&#191;&#191;MUEPSILON or
			
 
				-&#191;&#191;&#191;&#191;&#191; &#191;&#191;'', USENIX Winter Conf. Proc., San Diego, 1993, pp. 43-50,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[Pres93] Dave Presotto and Phil Winterbottom, ``The Organization of Networks in Plan 9'', Proc. Usenix Winter 1993, pp. 271-287, San Diego, CA,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[Reis91] Martin Reiser, <I>The Oberon System,</I> Addison Wesley, New York, 1991.
			
 
				-<br>
			
 
				-[Reppy93] John H. Reppy,
			
 
				-``CML: A higher-order concurrent language'', Proc. SIGPLAN'91 Conf. on Programming, Lang. Design and Impl., June, 1991, pp. 293-305.
			
 
				-<br>
			
 
				-[Sche86] Robert W. Scheifler and Jim Gettys,
			
 
				-``The X Window System'',
			
 
				-ACM Trans. on Graph., Vol 5 #2, pp. 79-109.
			
 
				-<br>
			
 
				-[Stal93] Richard Stallman,
			
 
				-Gnu Emacs Manual, 9th edition, Emacs version 19.19,
			
 
				-MIT.
			
 
				-<br>
			
 
				-[Swei86] Daniel Sweinhart, Polle Zellweger, Richard Beach, and Robert Hagmann,
			
 
				-``A Structural View of the Cedar Programming Environment'',
			
 
				-ACM Trans. Prog. Lang. and Sys., Vol. 8, No. 4, pp. 419-490, Oct. 1986.
			
 
				-<br>
			
 
				-[Wint94], Philip Winterbottom, ``Acid: A Debugger based on a Language'', USENIX Winter Conf. Proc., San Francisco, CA, 1993,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[Wirt89] N. Wirth and J. Gutknecht, ``The Oberon System'', Softw. - Prac. and Exp., Sep 1989, Vol 19 #9, pp 857-894.
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/ape.html
+++ b/sys/doc/ape.html
@@ -1,532 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>APE &#173; The ANSI/POSIX Environment
			
 
				-</H1>
			
 
				-<DL><DD><I>Howard Trickey<br>
			
 
				-howard@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-When a large or frequently-updated program must be ported
			
 
				-to or from Plan 9, the ANSI/POSIX environment known as APE can be useful.
			
 
				-APE combines the set of headers and object code libraries specified by
			
 
				-the ANSI C standard (ANSI X3.159-1989) with the POSIX operating system
			
 
				-interface standard (IEEE 1003.1-1990, ISO 9945-1), the part of POSIX
			
 
				-defining the basic operating system functions.
			
 
				-Using APE will cause slower compilation and marginally slower execution speeds,
			
 
				-so if the importing or exporting happens only infrequently, due consideration
			
 
				-should be given to using the usual Plan 9 compilation environment instead.
			
 
				-Another factor to consider is that the Plan 9 header organization is
			
 
				-much simpler to remember and use.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are some aspects of required POSIX behavior that are impossible or
			
 
				-very hard to simulate in Plan 9.  They are described below.
			
 
				-Experience has shown, however, that the simulation is adequate for the
			
 
				-vast majority of programs.  A much more common problem is that
			
 
				-many programs use functions or headers not defined by POSIX.
			
 
				-APE has some extensions to POSIX to help in this regard.
			
 
				-Extensions must be explicitly enabled with an appropriate
			
 
				-<TT>#define</TT>,
			
 
				-in order that the APE environment be a good aid for testing
			
 
				-ANSI/POSIX compliance of programs.
			
 
				-</P>
			
 
				-<H4>Pcc
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>pcc</TT>
			
 
				-command acts as a front end to the Plan 9 C compilers and loaders.
			
 
				-It runs an ANSI C preprocessor over source files, using the APE
			
 
				-headers to satisfy
			
 
				-<TT>#include &lt;</TT><I>file</I><TT>&gt;</TT>
			
 
				-directives; then it runs a Plan 9 C compiler; finally, it may load
			
 
				-with APE libraries to produce an executable program.
			
 
				-The document
			
 
				-<I>How to Use the Plan 9 C Compiler</I>
			
 
				-explains how environment variables are used by convention to
			
 
				-handle compilation for differing architectures.
			
 
				-The environment variable
			
 
				-<TT></TT><I>objtype</I>
			
 
				-controls which Plan 9 compiler and loader are used by
			
 
				-</TT><TT>pcc</TT>,
			
 
				-as well as the location of header and library files.
			
 
				-For example, if
			
 
				-</TT><TT></TT><TT>objtype</TT>
			
 
				-is
			
 
				-<TT>mips</TT>,
			
 
				-then
			
 
				-<TT>pcc</TT>
			
 
				-has
			
 
				-<TT>cpp</TT>
			
 
				-look for headers in
			
 
				-<TT>/mips/include/ape</TT>
			
 
				-followed by
			
 
				-<TT>/sys/include/ape</TT>;
			
 
				-then
			
 
				-<TT>pcc</TT>
			
 
				-uses
			
 
				-<TT>vc</TT>
			
 
				-to create
			
 
				-<TT>.v</TT>
			
 
				-object files;
			
 
				-finally,
			
 
				-<TT>vl</TT>
			
 
				-is used to create an executable using libraries in
			
 
				-<TT>/mips/lib/ape</TT>.
			
 
				-</P>
			
 
				-<H4>Psh and Cc
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>pcc</TT>
			
 
				-command is intended for uses where the source code is
			
 
				-ANSI/POSIX, but the programs are built in the usual Plan 9
			
 
				-manner &#173; with
			
 
				-<TT>mk</TT>
			
 
				-and producing object files with names ending in
			
 
				-<TT>.v</TT>,
			
 
				-etc.
			
 
				-Sometimes it is best to use the standard POSIX
			
 
				-<TT>make</TT>
			
 
				-and
			
 
				-<TT>cc</TT>
			
 
				-(which produces object files with names ending in
			
 
				-<TT>.o</TT>,
			
 
				-and automatically calls the loader unless
			
 
				-<TT>-c</TT>
			
 
				-is specified).
			
 
				-Under these circumstances, execute the command:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>ape/psh</TT>
			
 
				-</PRE></TT></DL>
			
 
				-This starts a POSIX shell, with an environment that
			
 
				-includes the POSIX commands
			
 
				-<TT>ar89</TT>,
			
 
				-<TT>c89</TT>,
			
 
				-<TT>cc</TT>,
			
 
				-<TT>basename</TT>,
			
 
				-<TT>dirname</TT>,
			
 
				-<TT>expr</TT>,
			
 
				-<TT>false</TT>,
			
 
				-<TT>grep</TT>,
			
 
				-<TT>kill</TT>,
			
 
				-<TT>make</TT>,
			
 
				-<TT>rmdir</TT>,
			
 
				-<TT>sed</TT>,
			
 
				-<TT>sh</TT>,
			
 
				-<TT>stty</TT>,
			
 
				-<TT>true</TT>,
			
 
				-<TT>uname</TT>,
			
 
				-and
			
 
				-<TT>yacc</TT>.
			
 
				-There are also a few placeholders for commands that cannot be
			
 
				-implemented in Plan 9:
			
 
				-<TT>chown</TT>,
			
 
				-<TT>ln</TT>,
			
 
				-and
			
 
				-<TT>umask</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>cc</TT>
			
 
				-command accepts the options mandated for
			
 
				-the POSIX command
			
 
				-<TT>c89</TT>,
			
 
				-as specified in the C-Language Development Utilities Option
			
 
				-annex of the POSIX Shell and Utilities standard.
			
 
				-It also accepts the following nonstandard options:
			
 
				-<TT>-v</TT>
			
 
				-for echoing the commands for each pass to stdout;
			
 
				-<TT>-A</TT>
			
 
				-to turn on ANSI prototype warnings;
			
 
				-<TT>-S</TT>
			
 
				-to leave assembly language in
			
 
				-<I>file</I>.s;
			
 
				-<TT>-Wp,</TT><I>args</I><TT></TT>
			
 
				-to pass
			
 
				-<I>args</I>
			
 
				-to the
			
 
				-<TT>cpp</TT>;
			
 
				-<TT>-W0,</TT><I>args</I><TT></TT>
			
 
				-to pass
			
 
				-<I>args</I>
			
 
				-to 2c, etc.;
			
 
				-and
			
 
				-<TT>-Wl,</TT><I>args</I><TT></TT>
			
 
				-to pass
			
 
				-<I>args</I>
			
 
				-to 2l, etc.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>sh</TT>
			
 
				-command is pdksh, a mostly POSIX-compliant public domain Korn Shell.
			
 
				-The Plan 9 implementation does not include
			
 
				-the emacs and vi editing modes.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>stty</TT>
			
 
				-command only has effect if the
			
 
				-<TT>ape/ptyfs</TT>
			
 
				-command has been started to interpose a pseudo-tty interface
			
 
				-between
			
 
				-<TT>/dev/cons</TT>
			
 
				-and the running command.
			
 
				-None of the distributed commands do this automatically.
			
 
				-</P>
			
 
				-<H4>Symbols
			
 
				-</H4>
			
 
				-<P>
			
 
				-The C and POSIX standards require that certain symbols be
			
 
				-defined in headers.
			
 
				-They also require that certain other classes of symbols not
			
 
				-be defined in the headers, and specify certain other
			
 
				-symbols that may be defined in headers at the discretion
			
 
				-of the implementation.
			
 
				-POSIX defines
			
 
				-<I>feature test macros</I>,
			
 
				-which are preprocessor symbols beginning with an underscore
			
 
				-and then a capital letter;  if the program
			
 
				-<TT>#defines</TT>
			
 
				-a feature test macro before the inclusion of any headers,
			
 
				-then it is requesting that certain symbols be visible in the headers.
			
 
				-The most important feature test macro is
			
 
				-<TT>_POSIX_SOURCE</TT>:
			
 
				-when it is defined, exactly the symbols required by POSIX are
			
 
				-visible in the appropriate headers.
			
 
				-Consider
			
 
				-<TT>&lt;signal.h&gt;</TT>
			
 
				-for example:
			
 
				-ANSI defines some names that must be defined in
			
 
				-<TT>&lt;signal.h&gt;</TT>,
			
 
				-but POSIX defines others, such as
			
 
				-<TT>sigset_t</TT>,
			
 
				-which are not allowed according to ANSI.
			
 
				-The solution is to make the additional symbols visible only when
			
 
				-<TT>_POSIX_SOURCE</TT>
			
 
				-is defined.
			
 
				-</P>
			
 
				-<P>
			
 
				-To export a program, it helps to know whether it fits
			
 
				-in one of the following categories:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>1.<DD>
			
 
				-Strictly conforming ANSI C program. It only uses features of the language,
			
 
				-libraries, and headers explicitly required by the C standard.  It does not
			
 
				-depend on unspecified, undefined, or implementation-dependent behavior,
			
 
				-and does not exceed any minimum implementation limit.
			
 
				-<DT>2.<DD>
			
 
				-Strictly conforming POSIX program. Similar, but for the POSIX standard as well.
			
 
				-<DT>3.<DD>
			
 
				-Some superset of POSIX, with extensions.  Each extension
			
 
				-is selected by a feature test macro, so it is clear which extensions
			
 
				-are being used.
			
 
				-</dl>
			
 
				-<P>
			
 
				-With APE, if headers are always included to declare any library functions
			
 
				-used, then the set of feature test macros defined by a program will
			
 
				-show which of the above categories the program is in.
			
 
				-To accomplish this, no symbol is defined in a header if it is not required
			
 
				-by the C or POSIX standard, and those required by the POSIX standard
			
 
				-are protected by
			
 
				-<TT>#ifdef _POSIX_SOURCE</TT>.
			
 
				-For example,
			
 
				-<TT>&lt;errno.h&gt;</TT>
			
 
				-defines
			
 
				-<TT>EDOM</TT>,
			
 
				-<TT>ERANGE</TT>,
			
 
				-and
			
 
				-<TT>errno</TT>,
			
 
				-as required by the C standard.
			
 
				-The C standard allows more names beginning with
			
 
				-<TT>E</TT>,
			
 
				-but our header defines only those unless
			
 
				-<TT>_POSIX_SOURCE</TT>
			
 
				-is defined, in which case the symbols required by POSIX are also defined.
			
 
				-This means that a program that uses
			
 
				-<TT>ENAMETOOLONG</TT>
			
 
				-cannot masquerade as a strictly conforming ANSI C program.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Pcc</TT>
			
 
				-and
			
 
				-<TT>cc</TT>
			
 
				-do not predefine any preprocessor symbols except those required by
			
 
				-the ANSI C standard:
			
 
				-<TT>__STDC__</TT>,
			
 
				-<TT>__LINE__</TT>,
			
 
				-<TT>__FILE__</TT>,
			
 
				-<TT>__DATE__</TT>,
			
 
				-and
			
 
				-<TT>__TIME__</TT>.
			
 
				-Any others must be defined in the program itself or by using
			
 
				-<TT>-D</TT>
			
 
				-on the command line.
			
 
				-</P>
			
 
				-<H4>Extensions
			
 
				-</H4>
			
 
				-<P>
			
 
				-The discipline enforced by putting only required
			
 
				-names in the headers is useful for exporting programs,
			
 
				-but it gets in the way when importing programs.
			
 
				-The compromise is to allow additional symbols in headers,
			
 
				-additional headers, and additional library functions,
			
 
				-but only under control of extension feature test macros.
			
 
				-The following extensions are provided; unless otherwise
			
 
				-specified, the additional library functions are in the
			
 
				-default APE library.
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>   -<DD>
			
 
				-<TT>_LIBG_EXTENSION</TT>.
			
 
				-This allows the use of the Plan 9 graphics library.
			
 
				-The functions are as described in the Plan 9 manual (see
			
 
				-<A href="/magic/man2html/2/graphics"><I>graphics</I>(2))
			
 
				-</A>except that
			
 
				-<TT>div</TT>
			
 
				-had to be renamed
			
 
				-<TT>ptdiv</TT>.
			
 
				-Include the
			
 
				-<TT>&lt;libg.h&gt;</TT>
			
 
				-header to declare the needed types and functions.
			
 
				-<DT>   -<DD>
			
 
				-<TT>_LIMITS_EXTENSION</TT>.
			
 
				-POSIX does not require that names such as
			
 
				-<TT>PATH_MAX</TT>
			
 
				-and
			
 
				-<TT>OPEN_MAX</TT>
			
 
				-be defined in
			
 
				-<TT>&lt;limits.h&gt;</TT>,
			
 
				-but many programs assume they are defined there.
			
 
				-If
			
 
				-<TT>_LIMITS_EXTENSION</TT>
			
 
				-is defined, those names will all be defined when
			
 
				-<TT>&lt;limits.h&gt;</TT>
			
 
				-is included.
			
 
				-<DT>   -<DD>
			
 
				-<TT>_BSD_EXTENSION</TT>.
			
 
				-This extension includes not only Berkeley Unix routines,
			
 
				-but also a grab bag of other miscellaneous routines often
			
 
				-found in Unix implementations.
			
 
				-The extension allows the inclusion of any of:
			
 
				-<TT>&lt;bsd.h&gt;</TT>
			
 
				-for
			
 
				-<TT>bcopy()</TT>,
			
 
				-<TT>bcmp()</TT>,
			
 
				-and similar Berkeley functions;
			
 
				-<TT>&lt;netdb.h&gt;</TT>
			
 
				-for
			
 
				-<TT>gethostbyname()</TT>,
			
 
				-etc.,
			
 
				-and associated structures;
			
 
				-<TT>&lt;select.h&gt;</TT>
			
 
				-for the Berkeley
			
 
				-<TT>select</TT>
			
 
				-function and associated types and macros
			
 
				-for dealing with multiple input sources;
			
 
				-<TT>&lt;sys/ioctl.h&gt;</TT>
			
 
				-for the
			
 
				-<TT>ioctl</TT>
			
 
				-function (minimally implemented);
			
 
				-<TT>&lt;sys/param.h&gt;</TT>
			
 
				-for
			
 
				-<TT>NOFILES_MAX</TT>;
			
 
				-<TT>&lt;sys/pty.h&gt;</TT>
			
 
				-for pseudo-tty support via the
			
 
				-<TT>ptsname(int)</TT>
			
 
				-and
			
 
				-<TT>ptmname(int)</TT>
			
 
				-functions;
			
 
				-<TT>&lt;sys/resource.h&gt;</TT>;
			
 
				-<TT>&lt;sys/socket.h&gt;</TT>
			
 
				-for socket structures, constants, and functions;
			
 
				-<TT>&lt;sys/time.h&gt;</TT>
			
 
				-for definitions of the
			
 
				-<TT>timeval</TT>
			
 
				-and
			
 
				-<TT>timezone</TT>
			
 
				-structures;
			
 
				-and
			
 
				-<TT>&lt;sys/uio.h&gt;</TT>
			
 
				-for the
			
 
				-<TT>iovec</TT>
			
 
				-structure and the
			
 
				-<TT>writev</TT>
			
 
				-and
			
 
				-<TT>readv</TT>
			
 
				-functions used for scatter/gather I/O.
			
 
				-Defining
			
 
				-<TT>_BSD_EXTENSION</TT>
			
 
				-also enables various extra definitions in
			
 
				-<TT>&lt;ctype.h&gt;</TT>,
			
 
				-<TT>&lt;signal.h&gt;</TT>,
			
 
				-<TT>&lt;stdio.h&gt;</TT>,
			
 
				-<TT>&lt;unistd.h&gt;</TT>,
			
 
				-<TT>&lt;sys/stat.h&gt;</TT>,
			
 
				-and
			
 
				-<TT>&lt;sys/times.h&gt;</TT>.
			
 
				-<DT>   -<DD>
			
 
				-<TT>_NET_EXTENSION</TT>.
			
 
				-This extension allows inclusion of
			
 
				-<TT>&lt;libnet.h&gt;</TT>,
			
 
				-which defines the networking functions described in the Plan 9 manual page
			
 
				-<A href="/magic/man2html/2/dial"><I>dial</I>(2).
			
 
				-</A><DT>   -<DD>
			
 
				-<TT>_REGEXP_EXTENSION</TT>.
			
 
				-This extension allows inclusion of
			
 
				-<TT>&lt;regexp.h&gt;</TT>,
			
 
				-which defines the regular expression matching functions described
			
 
				-in the Plan 9 manual page
			
 
				-<A href="/magic/man2html/2/regexp"><I>regexp</I>(2).
			
 
				-</A><DT>   -<DD>
			
 
				-<TT>_RESEARCH_SOURCE</TT>.
			
 
				-This extension enables a small library of functions from the Tenth Edition Unix
			
 
				-Research System (V10).
			
 
				-These functions and the types needed to use them are all defined in the
			
 
				-<TT>&lt;libv.h&gt;</TT>
			
 
				-header.
			
 
				-The provided functions are:
			
 
				-<TT>srand</TT>,
			
 
				-<TT>rand</TT>,
			
 
				-<TT>nrand</TT>,
			
 
				-<TT>lrand</TT>,
			
 
				-and
			
 
				-<TT>frand</TT>
			
 
				-(better random number generators);
			
 
				-<TT>getpass</TT>,
			
 
				-<TT>tty_echoon</TT>,
			
 
				-<TT>tty_echooff</TT>
			
 
				-(for dealing with the common needs for mucking with terminal
			
 
				-characteristics);
			
 
				-<TT>min</TT>
			
 
				-and
			
 
				-<TT>max</TT>;
			
 
				-<TT>nap</TT>;
			
 
				-and
			
 
				-<TT>setfields</TT>,
			
 
				-<TT>getfields</TT>,
			
 
				-and
			
 
				-<TT>getmfields</TT>
			
 
				-(for parsing a line into fields).
			
 
				-See the Research Unix System Programmer's Manual, Tenth Edition, for a description
			
 
				-of these functions.
			
 
				-</dl>
			
 
				-<H4>Common Problems
			
 
				-</H4>
			
 
				-<P>
			
 
				-Some large systems, including X11, have been ported successfully
			
 
				-to Plan 9 using APE
			
 
				-(the X11 port is not included in the distribution, however,
			
 
				-because supporting it properly is too big a job).
			
 
				-The problems encountered fall into three categories:
			
 
				-(1) non-ANSI C/POSIX features used; (2) inadequate simulation of POSIX functions;
			
 
				-and (3) compiler/loader bugs.
			
 
				-By far the majority of problems are in the first category.
			
 
				-</P>
			
 
				-<P>
			
 
				-POSIX is just starting to be a target for programmers.
			
 
				-Most existing code is written to work with one or both of a BSD or a System V Unix.
			
 
				-System V is fairly close to POSIX, but there are some differences.
			
 
				-Also, many System V systems have imported some BSD features that are
			
 
				-not part of POSIX.
			
 
				-A good strategy for porting external programs is to first try using
			
 
				-<TT>CFLAGS=-D_POSIX_SOURCE</TT>;
			
 
				-if that doesn't work, try adding
			
 
				-<TT>_D_BSD_EXTENSION</TT>
			
 
				-and perhaps include
			
 
				-<TT>&lt;bsd.h&gt;</TT>
			
 
				-in source files.
			
 
				-Here are some solutions to problems that might remain:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>   -<DD>
			
 
				-Third (environment) argument to
			
 
				-<TT>main</TT>.
			
 
				-Use the
			
 
				-<TT>environ</TT>
			
 
				-global instead.
			
 
				-<DT>   -<DD>
			
 
				-<TT>OPEN_MAX</TT>,
			
 
				-<TT>PATH_MAX</TT>,
			
 
				-etc., assumed in
			
 
				-<TT>&lt;limits.h&gt;</TT>.
			
 
				-Rewrite to call
			
 
				-<TT>sysconf</TT>
			
 
				-or define
			
 
				-<TT>_LIMITS_EXTENSION</TT>.
			
 
				-<DT>   -<DD>
			
 
				-<TT>&lt;varargs.h&gt;</TT>.
			
 
				-Rewrite to use
			
 
				-<TT>&lt;stdarg.h&gt;</TT>.
			
 
				-</dl>
			
 
				-<P>
			
 
				-The second class of problems has to do with inadequacies in the Plan 9
			
 
				-simulation of POSIX functions.
			
 
				-These shortcomings have rarely gotten in the way
			
 
				-(except, perhaps, for the
			
 
				-<TT>link</TT>
			
 
				-problem).
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>   -<DD>
			
 
				-Functions for setting the userid, groupid, effective userid and effective groupid
			
 
				-do not do anything useful.  The concept is impossible to simulate in Plan 9.
			
 
				-<TT>Chown</TT>
			
 
				-also does nothing.
			
 
				-<DT>   -<DD>
			
 
				-<TT>execlp</TT>
			
 
				-and the related functions do not look at the
			
 
				-<TT>PATH</TT>
			
 
				-environment variable.  They just try the current directory and
			
 
				-<TT>/bin</TT>
			
 
				-if the pathname is not absolute.
			
 
				-<DT>   -<DD>
			
 
				-Advisory locking via
			
 
				-<TT>fcntl</TT>
			
 
				-is not implemented.
			
 
				-<DT>   -<DD>
			
 
				-<TT>isatty</TT>
			
 
				-is hard to do correctly.
			
 
				-The approximation used is only sometimes correct.
			
 
				-<DT>   -<DD>
			
 
				-<TT>link</TT>
			
 
				-always fails.
			
 
				-<DT>   -<DD>
			
 
				-With
			
 
				-<TT>open</TT>,
			
 
				-the
			
 
				-<TT>O_NOCTTY</TT>
			
 
				-option has no effect.
			
 
				-The concept of a controlling tty is foreign to Plan 9.
			
 
				-<DT>   -<DD>
			
 
				-<TT>setsid</TT>
			
 
				-forks the name space and note group,
			
 
				-which is only approximately the right behavior.
			
 
				-<DT>   -<DD>
			
 
				-The functions dealing with stacking signals,
			
 
				-<TT>sigpending</TT>,
			
 
				-<TT>sigprocmask</TT>
			
 
				-and
			
 
				-<TT>sigsuspend</TT>,
			
 
				-do not work.
			
 
				-<DT>   -<DD>
			
 
				-<TT>umask</TT>
			
 
				-has no effect, as there is no such concept in Plan 9.
			
 
				-<DT>   -<DD>
			
 
				-code that does
			
 
				-<TT>getenv("HOME")</TT>
			
 
				-should be changed to
			
 
				-<TT>getenv("home")</TT>
			
 
				-on Plan 9.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/asm.html
+++ b/sys/doc/asm.html
@@ -1,1358 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>A Manual for the Plan 9 assembler
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-rob@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Machines
			
 
				-</H4>
			
 
				-<P>
			
 
				-There is an assembler for each of the MIPS, SPARC, Intel 386,
			
 
				-Intel 960, AMD 29000, Motorola 68020 and 68000, Motorola Power PC, DEC Alpha, and Acorn ARM.
			
 
				-The 68020 assembler,
			
 
				-<TT>2a</TT>,
			
 
				-is the oldest and in many ways the prototype.
			
 
				-The assemblers are really just variations of a single program:
			
 
				-they share many properties such as left-to-right assignment order for
			
 
				-instruction operands and the synthesis of macro instructions
			
 
				-such as
			
 
				-<TT>MOVE</TT>
			
 
				-to hide the peculiarities of the load and store structure of the machines.
			
 
				-To keep things concrete, the first part of this manual is
			
 
				-specifically about the 68020.
			
 
				-At the end is a description of the differences among
			
 
				-the other assemblers.
			
 
				-</P>
			
 
				-<P>
			
 
				-The document, ``How to Use the Plan 9 C Compiler'', by Rob Pike,
			
 
				-is a prerequisite for this manual.
			
 
				-</P>
			
 
				-<H4>Registers
			
 
				-</H4>
			
 
				-<P>
			
 
				-All pre-defined symbols in the assembler are upper-case.
			
 
				-Data registers are
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R7</TT>;
			
 
				-address registers are
			
 
				-<TT>A0</TT>
			
 
				-through
			
 
				-<TT>A7</TT>;
			
 
				-floating-point registers are
			
 
				-<TT>F0</TT>
			
 
				-through
			
 
				-<TT>F7</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-A pointer in
			
 
				-<TT>A6</TT>
			
 
				-is used by the C compiler to point to data, enabling short addresses to
			
 
				-be used more often.
			
 
				-The value of
			
 
				-<TT>A6</TT>
			
 
				-is constant and must be set during C program initialization
			
 
				-to the address of the externally-defined symbol
			
 
				-<TT>a6base</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following hardware registers are defined in the assembler; their
			
 
				-meaning should be obvious given a 68020 manual:
			
 
				-<TT>CAAR</TT>,
			
 
				-<TT>CACR</TT>,
			
 
				-<TT>CCR</TT>,
			
 
				-<TT>DFC</TT>,
			
 
				-<TT>ISP</TT>,
			
 
				-<TT>MSP</TT>,
			
 
				-<TT>SFC</TT>,
			
 
				-<TT>SR</TT>,
			
 
				-<TT>USP</TT>,
			
 
				-and
			
 
				-<TT>VBR</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assembler also defines several pseudo-registers that
			
 
				-manipulate the stack:
			
 
				-<TT>FP</TT>,
			
 
				-<TT>SP</TT>,
			
 
				-and
			
 
				-<TT>TOS</TT>.
			
 
				-<TT>FP</TT>
			
 
				-is the frame pointer, so
			
 
				-<TT>0(FP)</TT>
			
 
				-is the first argument,
			
 
				-<TT>4(FP)</TT>
			
 
				-is the second, and so on.
			
 
				-<TT>SP</TT>
			
 
				-is the local stack pointer, where automatic variables are held
			
 
				-(SP is a pseudo-register only on the 68020);
			
 
				-<TT>0(SP)</TT>
			
 
				-is the first automatic, and so on as with
			
 
				-<TT>FP</TT>.
			
 
				-Finally,
			
 
				-<TT>TOS</TT>
			
 
				-is the top-of-stack register, used for pushing parameters to procedures,
			
 
				-saving temporary values, and so on.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assembler and loader track these pseudo-registers so
			
 
				-the above statements are true regardless of what has been
			
 
				-pushed on the hardware stack, pointed to by
			
 
				-<TT>A7</TT>.
			
 
				-The name
			
 
				-<TT>A7</TT>
			
 
				-refers to the hardware stack pointer, but beware of mixed use of
			
 
				-<TT>A7</TT>
			
 
				-and the above stack-related pseudo-registers, which will cause trouble.
			
 
				-Note, too, that the
			
 
				-<TT>PEA</TT>
			
 
				-instruction is observed by the loader to
			
 
				-alter SP and thus will insert a corresponding pop before all returns.
			
 
				-The assembler accepts a label-like name to be attached to
			
 
				-<TT>FP</TT>
			
 
				-and
			
 
				-<TT>SP</TT>
			
 
				-uses, such as
			
 
				-<TT>p+0(FP)</TT>,
			
 
				-to help document that
			
 
				-<TT>p</TT>
			
 
				-is the first argument to a routine.
			
 
				-The name goes in the symbol table but has no significance to the result
			
 
				-of the program.
			
 
				-</P>
			
 
				-<H4>Referring to data
			
 
				-</H4>
			
 
				-<P>
			
 
				-All external references must be made relative to some pseudo-register,
			
 
				-either
			
 
				-<TT>PC</TT>
			
 
				-(the virtual program counter) or
			
 
				-<TT>SB</TT>
			
 
				-(the ``static base'' register).
			
 
				-<TT>PC</TT>
			
 
				-counts instructions, not bytes of data.
			
 
				-For example, to branch to the second following instruction, that is,
			
 
				-to skip one instruction, one may write
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	BRA	2(PC)
			
 
				-</PRE></TT></DL>
			
 
				-Labels are also allowed, as in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	BRA	return
			
 
				-	NOP
			
 
				-return:
			
 
				-	RTS
			
 
				-</PRE></TT></DL>
			
 
				-When using labels, there is no
			
 
				-<TT>(PC)</TT>
			
 
				-annotation.
			
 
				-</P>
			
 
				-<P>
			
 
				-The pseudo-register
			
 
				-<TT>SB</TT>
			
 
				-refers to the beginning of the address space of the program.
			
 
				-Thus, references to global data and procedures are written as
			
 
				-offsets to
			
 
				-<TT>SB</TT>,
			
 
				-as in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVL	<I>array(SB), TOS
			
 
				-</PRE></TT></DL>
			
 
				-to push the address of a global array on the stack, or
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVL	array+4(SB), TOS
			
 
				-</PRE></TT></DL>
			
 
				-to push the second (4-byte) element of the array.
			
 
				-Note the use of an offset; the complete list of addressing modes is given below.
			
 
				-Similarly, subroutine calls must use
			
 
				-</I><TT>SB</TT><I>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	BSR	exit(SB)
			
 
				-</PRE></TT></DL>
			
 
				-File-static variables have syntax
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	local&lt;&gt;+4(SB)
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</I><TT>&lt;&gt;</TT><I>
			
 
				-will be filled in at load time by a unique integer.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-When a program starts, it must execute
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVL	a6base(SB), A6
			
 
				-</PRE></TT></DL>
			
 
				-before accessing any global data.
			
 
				-(On machines such as the MIPS and SPARC that cannot load a register
			
 
				-in a single instruction, constants are loaded through the static base
			
 
				-register.  The loader recognizes code that initializes the static
			
 
				-base register and treats it specially.  You must be careful, however,
			
 
				-not to load large constants on such machines when the static base
			
 
				-register is not set up, such as early in interrupt routines.)
			
 
				-</P>
			
 
				-<H4>Expressions
			
 
				-</H4>
			
 
				-<P>
			
 
				-Expressions are mostly what one might expect.
			
 
				-Where an offset or a constant is expected,
			
 
				-a primary expression with unary operators is allowed.
			
 
				-A general C constant expression is allowed in parentheses.
			
 
				-</P>
			
 
				-<P>
			
 
				-Source files are preprocessed exactly as in the C compiler, so
			
 
				-<TT>#define</TT>
			
 
				-and
			
 
				-<TT>#include</TT>
			
 
				-work.
			
 
				-</P>
			
 
				-<H4>Addressing modes
			
 
				-</H4>
			
 
				-<P>
			
 
				-The simple addressing modes are shared by all the assemblers.
			
 
				-Here, for completeness, follows a table of all the 68020 addressing modes,
			
 
				-since that machine has the richest set.
			
 
				-In the table,
			
 
				-<TT>o</TT>
			
 
				-is an offset, which if zero may be elided, and
			
 
				-<TT>d</TT>
			
 
				-is a displacement, which is a constant between -128 and 127 inclusive.
			
 
				-Many of the modes listed have the same name;
			
 
				-scrutiny of the format will show what default is being applied.
			
 
				-For instance, indexed mode with no address register supplied operates
			
 
				-as though a zero-valued register were used.
			
 
				-For "offset" read "displacement."
			
 
				-For "<TT>.s</TT>" read one of
			
 
				-<TT>.L</TT>,
			
 
				-or
			
 
				-<TT>.W</TT>
			
 
				-followed by
			
 
				-<TT>*1</TT>,
			
 
				-<TT>*2</TT>,
			
 
				-<TT>*4</TT>,
			
 
				-or
			
 
				-<TT>*8</TT>
			
 
				-to indicate the size and scaling of the data.
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<br><img src="data.19116310.gif"><br>
			
 
				-</dl>
			
 
				-<H4>Laying down data
			
 
				-</H4>
			
 
				-<P>
			
 
				-Placing data in the instruction stream, say for interrupt vectors, is easy:
			
 
				-the pseudo-instructions
			
 
				-<TT>LONG</TT>
			
 
				-and
			
 
				-<TT>WORD</TT>
			
 
				-(but not
			
 
				-<TT>BYTE</TT>)
			
 
				-lay down the value of their single argument, of the appropriate size,
			
 
				-as if it were an instruction:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	LONG	<I>12345
			
 
				-</PRE></TT></DL>
			
 
				-places the long 12345 (base 10)
			
 
				-in the instruction stream.
			
 
				-(On most machines,
			
 
				-the only such operator is
			
 
				-</I><TT>WORD</TT><I>
			
 
				-and it lays down 32-bit quantities.
			
 
				-The 386 has all three:
			
 
				-</I><TT>LONG</TT><I>,
			
 
				-</I><TT>WORD</TT><I>,
			
 
				-and
			
 
				-</I><TT>BYTE</TT><I>.
			
 
				-The 960 has only one,
			
 
				-</I><TT>LONG</TT><I>.)
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Placing information in the data section is more painful.
			
 
				-The pseudo-instruction
			
 
				-<TT>DATA</TT>
			
 
				-does the work, given two arguments: an address at which to place the item,
			
 
				-including its size,
			
 
				-and the value to place there.  For example, to define a character array
			
 
				-<TT>array</TT>
			
 
				-containing the characters
			
 
				-<TT>abc</TT>
			
 
				-and a terminating null:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	DATA    array+0(SB)/1, 'a'
			
 
				-	DATA    array+1(SB)/1, <I>'b'
			
 
				-	DATA    array+2(SB)/1, </I>'c'
			
 
				-	GLOBL   array(SB), <I>4
			
 
				-</PRE></TT></DL>
			
 
				-or
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	DATA    array+0(SB)/4, </I>"abc\z"
			
 
				-	GLOBL   array(SB), <I>4
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</I><TT>/1</TT><I>
			
 
				-defines the number of bytes to define,
			
 
				-</I><TT>GLOBL</TT><I>
			
 
				-makes the symbol global, and the
			
 
				-</I><TT></TT><I>4</I><TT>
			
 
				-says how many bytes the symbol occupies.
			
 
				-Uninitialized data is zeroed automatically.
			
 
				-The character
			
 
				-</TT><TT>\z</TT><TT>
			
 
				-is equivalent to the C
			
 
				-</TT><TT>\0.</TT><TT>
			
 
				-The string in a
			
 
				-</TT><TT>DATA</TT><TT>
			
 
				-statement may contain a maximum of eight bytes;
			
 
				-build larger strings piecewise.
			
 
				-Two pseudo-instructions,
			
 
				-</TT><TT>DYNT</TT><TT>
			
 
				-and
			
 
				-</TT><TT>INIT</TT><TT>,
			
 
				-allow the (obsolete) Alef compilers to build dynamic type information during the load
			
 
				-phase.
			
 
				-The
			
 
				-</TT><TT>DYNT</TT><TT>
			
 
				-pseudo-instruction has two forms:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	DYNT	, ALEF_SI_5+0(SB)
			
 
				-	DYNT	ALEF_AS+0(SB), ALEF_SI_5+0(SB)
			
 
				-</PRE></TT></DL>
			
 
				-In the first form,
			
 
				-</TT><TT>DYNT</TT><TT>
			
 
				-defines the symbol to be a small unique integer constant, chosen by the loader,
			
 
				-which is some multiple of the word size.  In the second form,
			
 
				-</TT><TT>DYNT</TT><TT>
			
 
				-defines the second symbol in the same way,
			
 
				-places the address of the most recently
			
 
				-defined text symbol in the array specified by the first symbol at the
			
 
				-index defined by the value of the second symbol,
			
 
				-and then adjusts the size of the array accordingly.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The
			
 
				-<TT>INIT</TT>
			
 
				-pseudo-instruction takes the same parameters as a
			
 
				-<TT>DATA</TT>
			
 
				-statement.  Its symbol is used as the base of an array and the
			
 
				-data item is installed in the array at the offset specified by the most recent
			
 
				-<TT>DYNT</TT>
			
 
				-pseudo-instruction.
			
 
				-The size of the array is adjusted accordingly.
			
 
				-The
			
 
				-<TT>DYNT</TT>
			
 
				-and
			
 
				-<TT>INIT</TT>
			
 
				-pseudo-instructions are not implemented on the 68020.
			
 
				-</P>
			
 
				-<H4>Defining a procedure
			
 
				-</H4>
			
 
				-<P>
			
 
				-Entry points are defined by the pseudo-operation
			
 
				-<TT>TEXT</TT>,
			
 
				-which takes as arguments the name of the procedure (including the ubiquitous
			
 
				-<TT>(SB)</TT>)
			
 
				-and the number of bytes of automatic storage to pre-allocate on the stack,
			
 
				-which will usually be zero when writing assembly language programs.
			
 
				-On machines with a link register, such as the MIPS and SPARC,
			
 
				-the special value -4 instructs the loader to generate no PC save
			
 
				-and restore instructions, even if the function is not a leaf.
			
 
				-Here is a complete procedure that returns the sum
			
 
				-of its two arguments:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-TEXT	sum(SB), <I>0
			
 
				-	MOVL	arg1+0(FP), R0
			
 
				-	ADDL	arg2+4(FP), R0
			
 
				-	RTS
			
 
				-</PRE></TT></DL>
			
 
				-An optional middle argument
			
 
				-to the
			
 
				-</I><TT>TEXT</TT><I>
			
 
				-pseudo-op is a bit field of options to the loader.
			
 
				-Setting the 1 bit suspends profiling the function when profiling is enabled for the rest of
			
 
				-the program.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-TEXT	sum(SB), 1, </I>0
			
 
				-	MOVL	arg1+0(FP), R0
			
 
				-	ADDL	arg2+4(FP), R0
			
 
				-	RTS
			
 
				-</PRE></TT></DL>
			
 
				-will not be profiled; the first version above would be.
			
 
				-Subroutines with peculiar state, such as system call routines,
			
 
				-should not be profiled.
			
 
				-</P>
			
 
				-<P>
			
 
				-Setting the 2 bit allows multiple definitions of the same
			
 
				-<TT>TEXT</TT>
			
 
				-symbol in a program; the loader will place only one such function in the image.
			
 
				-It was emitted only by the Alef compilers.
			
 
				-</P>
			
 
				-<P>
			
 
				-Subroutines to be called from C should place their result in
			
 
				-<TT>R0</TT>,
			
 
				-even if it is an address.
			
 
				-Floating point values are returned in
			
 
				-<TT>F0</TT>.
			
 
				-Functions that return a structure to a C program
			
 
				-receive as their first argument the address of the location to
			
 
				-store the result;
			
 
				-<TT>R0</TT>
			
 
				-is unused in the calling protocol for such procedures.
			
 
				-A subroutine is responsible for saving its own registers,
			
 
				-and therefore is free to use any registers without saving them (``caller saves'').
			
 
				-<TT>A6</TT>
			
 
				-and
			
 
				-<TT>A7</TT>
			
 
				-are the exceptions as described above.
			
 
				-</P>
			
 
				-<H4>When in doubt
			
 
				-</H4>
			
 
				-<P>
			
 
				-If you get confused, try using the
			
 
				-<TT>-S</TT>
			
 
				-option to
			
 
				-<TT>2c</TT>
			
 
				-and compiling a sample program.
			
 
				-The standard output is valid input to the assembler.
			
 
				-</P>
			
 
				-<H4>Instructions
			
 
				-</H4>
			
 
				-<P>
			
 
				-The instruction set of the assembler is not identical to that
			
 
				-of the machine.
			
 
				-It is chosen to match what the compiler generates, augmented
			
 
				-slightly by specific needs of the operating system.
			
 
				-For example,
			
 
				-<TT>2a</TT>
			
 
				-does not distinguish between the various forms of
			
 
				-<TT>MOVE</TT>
			
 
				-instruction: move quick, move address, etc.  Instead the context
			
 
				-does the job.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVL	<I>1, R1
			
 
				-	MOVL	A0, R2
			
 
				-	MOVW	SR, R3
			
 
				-</PRE></TT></DL>
			
 
				-generates official
			
 
				-</I><TT>MOVEQ</TT><I>,
			
 
				-</I><TT>MOVEA</TT><I>,
			
 
				-and
			
 
				-</I><TT>MOVESR</TT><I>
			
 
				-instructions.
			
 
				-A number of instructions do not have the syntax necessary to specify
			
 
				-their entire capabilities.  Notable examples are the bitfield
			
 
				-instructions, the
			
 
				-multiply and divide instructions, etc.
			
 
				-For a complete set of generated instruction names (in
			
 
				-</I><TT>2a</TT><I>
			
 
				-notation, not Motorola's) see the file
			
 
				-</I><TT>/sys/src/cmd/2c/2.out.h</TT><I>.
			
 
				-Despite its name, this file contains an enumeration of the
			
 
				-instructions that appear in the intermediate files generated
			
 
				-by the compiler, which correspond exactly to lines of assembly language.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-The MC68000 assembler,
			
 
				-<TT>1a</TT>,
			
 
				-is essentially the same, honoring the appropriate subset of the instructions
			
 
				-and addressing modes.
			
 
				-The definitions of these are, nonetheless, part of
			
 
				-<TT>2.out.h</TT>.
			
 
				-</P>
			
 
				-<H4>Laying down instructions
			
 
				-</H4>
			
 
				-<P>
			
 
				-The loader modifies the code produced by the assembler and compiler.
			
 
				-It folds branches,
			
 
				-copies short sequences of code to eliminate branches,
			
 
				-and discards unreachable code.
			
 
				-The first instruction of every function is assumed to be reachable.
			
 
				-The pseudo-instruction
			
 
				-<TT>NOP</TT>,
			
 
				-which you may see in compiler output,
			
 
				-means no instruction at all, rather than an instruction that does nothing.
			
 
				-The loader discards all
			
 
				-<TT>NOP</TT>'s.
			
 
				-</P>
			
 
				-<P>
			
 
				-To generate a true
			
 
				-<TT>NOP</TT>
			
 
				-instruction, or any other instruction not known to the assembler, use a
			
 
				-<TT>WORD</TT>
			
 
				-pseudo-instruction.
			
 
				-Such instructions on RISCs are not scheduled by the loader and must have
			
 
				-their delay slots filled manually.
			
 
				-</P>
			
 
				-<H4>MIPS
			
 
				-</H4>
			
 
				-<P>
			
 
				-The registers are only addressed by number:
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R31</TT>.
			
 
				-<TT>R29</TT>
			
 
				-is the stack pointer;
			
 
				-<TT>R30</TT>
			
 
				-is used as the static base pointer, the analogue of
			
 
				-<TT>A6</TT>
			
 
				-on the 68020.
			
 
				-Its value is the address of the global symbol
			
 
				-<TT>setR30(SB)</TT>.
			
 
				-The register holding returned values from subroutines is
			
 
				-<TT>R1</TT>.
			
 
				-When a function is called, space for the first argument
			
 
				-is reserved at
			
 
				-<TT>0(FP)</TT>
			
 
				-but in C (not Alef) the value is passed in
			
 
				-<TT>R1</TT>
			
 
				-instead.
			
 
				-</P>
			
 
				-<P>
			
 
				-The loader uses
			
 
				-<TT>R28</TT>
			
 
				-as a temporary.  The system uses
			
 
				-<TT>R26</TT>
			
 
				-and
			
 
				-<TT>R27</TT>
			
 
				-as interrupt-time temporaries.  Therefore none of these registers
			
 
				-should be used in user code.
			
 
				-</P>
			
 
				-<P>
			
 
				-The control registers are not known to the assembler.
			
 
				-Instead they are numbered registers
			
 
				-<TT>M0</TT>,
			
 
				-<TT>M1</TT>,
			
 
				-etc.
			
 
				-Use this trick to access, say,
			
 
				-<TT>STATUS</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#define	STATUS	12
			
 
				-	MOVW	M(STATUS), R1
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Floating point registers are called
			
 
				-<TT>F0</TT>
			
 
				-through
			
 
				-<TT>F31</TT>.
			
 
				-By convention,
			
 
				-<TT>F24</TT>
			
 
				-must be initialized to the value 0.0,
			
 
				-<TT>F26</TT>
			
 
				-to 0.5,
			
 
				-<TT>F28</TT>
			
 
				-to 1.0, and
			
 
				-<TT>F30</TT>
			
 
				-to 2.0;
			
 
				-this is done by the operating system.
			
 
				-</P>
			
 
				-<P>
			
 
				-The instructions and their syntax are different from those of the manufacturer's
			
 
				-manual.
			
 
				-There are no
			
 
				-<TT>lui</TT>
			
 
				-and kin; instead there are
			
 
				-<TT>MOVW</TT>
			
 
				-(move word),
			
 
				-<TT>MOVH</TT>
			
 
				-(move halfword),
			
 
				-and
			
 
				-<TT>MOVB</TT>
			
 
				-(move byte) pseudo-instructions.  If the operand is unsigned, the instructions
			
 
				-are
			
 
				-<TT>MOVHU</TT>
			
 
				-and
			
 
				-<TT>MOVBU</TT>.
			
 
				-The order of operands is from left to right in dataflow order, just as
			
 
				-on the 68020 but not as in MIPS documentation.
			
 
				-This means that the
			
 
				-<TT>Bcond</TT>
			
 
				-instructions are reversed with respect to the book; for example, a
			
 
				-<TT>va</TT>
			
 
				-<TT>BGTZ</TT>
			
 
				-generates a MIPS
			
 
				-<TT>bltz</TT>
			
 
				-instruction.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assembler is for the R2000, R3000, and most of the R4000 and R6000 architectures.
			
 
				-It understands the 64-bit instructions
			
 
				-<TT>MOVV</TT>,
			
 
				-<TT>MOVVL</TT>,
			
 
				-<TT>ADDV</TT>,
			
 
				-<TT>ADDVU</TT>,
			
 
				-<TT>SUBV</TT>,
			
 
				-<TT>SUBVU</TT>,
			
 
				-<TT>MULV</TT>,
			
 
				-<TT>MULVU</TT>,
			
 
				-<TT>DIVV</TT>,
			
 
				-<TT>DIVVU</TT>,
			
 
				-<TT>SLLV</TT>,
			
 
				-<TT>SRLV</TT>,
			
 
				-and
			
 
				-<TT>SRAV</TT>.
			
 
				-The assembler does not have any cache, load-linked, or store-conditional instructions.
			
 
				-</P>
			
 
				-<P>
			
 
				-Some assembler instructions are expanded into multiple instructions by the loader.
			
 
				-For example the loader may convert the load of a 32 bit constant into an
			
 
				-<TT>lui</TT>
			
 
				-followed by an
			
 
				-<TT>ori</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Assembler instructions should be laid out as if there
			
 
				-were no load, branch, or floating point compare delay slots;
			
 
				-the loader will rearrange&#173;<I>schedule</I>&#173;the instructions
			
 
				-to guarantee correctness and improve performance.
			
 
				-The only exception is that the correct scheduling of instructions
			
 
				-that use control registers varies from model to model of machine
			
 
				-(and is often undocumented) so you should schedule such instructions
			
 
				-by hand to guarantee correct behavior.
			
 
				-The loader generates
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	NOR	R0, R0, R0
			
 
				-</PRE></TT></DL>
			
 
				-when it needs a true no-op instruction.
			
 
				-Use exactly this instruction when scheduling code manually;
			
 
				-the loader recognizes it and schedules the code before it and after it independently.  Also,
			
 
				-<TT>WORD</TT>
			
 
				-pseudo-ops are scheduled like no-ops.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>NOSCHED</TT>
			
 
				-pseudo-op disables instruction scheduling
			
 
				-(scheduling is enabled by default);
			
 
				-<TT>SCHED</TT>
			
 
				-re-enables it.
			
 
				-Branch folding, code copying, and dead code elimination are
			
 
				-disabled for instructions that are not scheduled.
			
 
				-</P>
			
 
				-<H4>SPARC
			
 
				-</H4>
			
 
				-<P>
			
 
				-Once you understand the Plan 9 model for the MIPS, the SPARC is familiar.
			
 
				-Registers have numerical names only:
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R31</TT>.
			
 
				-Forget about register windows: Plan 9 doesn't use them at all.
			
 
				-The machine has 32 global registers, period.
			
 
				-<TT>R1</TT>
			
 
				-[sic] is the stack pointer.
			
 
				-<TT>R2</TT>
			
 
				-is the static base register, with value the address of
			
 
				-<TT>setSB(SB)</TT>.
			
 
				-<TT>R7</TT>
			
 
				-is the return register and also the register holding the first
			
 
				-argument to a C (not Alef) function, again with space reserved at
			
 
				-<TT>0(FP)</TT>.
			
 
				-<TT>R14</TT>
			
 
				-is the loader temporary.
			
 
				-</P>
			
 
				-<P>
			
 
				-Floating-point registers are exactly as on the MIPS.
			
 
				-</P>
			
 
				-<P>
			
 
				-The control registers are known by names such as
			
 
				-<TT>FSR</TT>.
			
 
				-The instructions to access these registers are
			
 
				-<TT>MOVW</TT>
			
 
				-instructions, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVW	Y, R8
			
 
				-</PRE></TT></DL>
			
 
				-for the SPARC instruction
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	rdy	%r8
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Move instructions are similar to those on the MIPS: pseudo-operations
			
 
				-that turn into appropriate sequences of
			
 
				-<TT>sethi</TT>
			
 
				-instructions, adds, etc.
			
 
				-Instructions read from left to right.  Because the arguments are
			
 
				-flipped to
			
 
				-<TT>SUBCC</TT>,
			
 
				-the condition codes are not inverted as on the MIPS.
			
 
				-</P>
			
 
				-<P>
			
 
				-The syntax for the ASI stuff is, for example to move a word from ASI 2:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVW	(R7, 2), R8
			
 
				-</PRE></TT></DL>
			
 
				-The syntax for double indexing is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVW	(R7+R8), R9
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The SPARC's instruction scheduling is similar to the MIPS's.
			
 
				-The official no-op instruction is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	ORN	R0, R0, R0
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>i960
			
 
				-</H4>
			
 
				-<P>
			
 
				-Registers are numbered
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R31</TT>.
			
 
				-Stack pointer is
			
 
				-<TT>R29</TT>;
			
 
				-return register is
			
 
				-<TT>R4</TT>;
			
 
				-static base is
			
 
				-<TT>R28</TT>;
			
 
				-it is initialized to the address of
			
 
				-<TT>setSB(SB)</TT>.
			
 
				-<TT>R3</TT>
			
 
				-must be zero; this should be done manually early in execution by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	SUBO	R3, R3
			
 
				-</PRE></TT></DL>
			
 
				-<TT>R27</TT>
			
 
				-is the loader temporary.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is no support for floating point.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Intel calling convention is not supported and cannot be used; use
			
 
				-<TT>BAL</TT>
			
 
				-instead.
			
 
				-Instructions are mostly as in the book.  The major change is that
			
 
				-<TT>LOAD</TT>
			
 
				-and
			
 
				-<TT>STORE</TT>
			
 
				-are both called
			
 
				-<TT>MOV</TT>.
			
 
				-The extension character for
			
 
				-<TT>MOV</TT>
			
 
				-is as in the manual:
			
 
				-<TT>O</TT>
			
 
				-for ordinal,
			
 
				-<TT>W</TT>
			
 
				-for signed, etc.
			
 
				-</P>
			
 
				-<H4>i386
			
 
				-</H4>
			
 
				-<P>
			
 
				-The assembler assumes 32-bit protected mode.
			
 
				-The register names are
			
 
				-<TT>SP</TT>,
			
 
				-<TT>AX</TT>,
			
 
				-<TT>BX</TT>,
			
 
				-<TT>CX</TT>,
			
 
				-<TT>DX</TT>,
			
 
				-<TT>BP</TT>,
			
 
				-<TT>DI</TT>,
			
 
				-and
			
 
				-<TT>SI</TT>.
			
 
				-The stack pointer (not a pseudo-register) is
			
 
				-<TT>SP</TT>
			
 
				-and the return register is
			
 
				-<TT>AX</TT>.
			
 
				-There is no physical frame pointer but, as for the MIPS,
			
 
				-<TT>FP</TT>
			
 
				-is a pseudo-register that acts as
			
 
				-a frame pointer.
			
 
				-</P>
			
 
				-<P>
			
 
				-Opcode names are mostly the same as those listed in the Intel manual
			
 
				-with an
			
 
				-<TT>L</TT>,
			
 
				-<TT>W</TT>,
			
 
				-or
			
 
				-<TT>B</TT>
			
 
				-appended to identify 32-bit, 
			
 
				-16-bit, and 8-bit operations.
			
 
				-The exceptions are loads, stores, and conditionals.
			
 
				-All load and store opcodes to and from general registers, special registers
			
 
				-(such as
			
 
				-<TT>CR0,</TT>
			
 
				-<TT>CR3,</TT>
			
 
				-<TT>GDTR,</TT>
			
 
				-<TT>IDTR,</TT>
			
 
				-<TT>SS,</TT>
			
 
				-<TT>CS,</TT>
			
 
				-<TT>DS,</TT>
			
 
				-<TT>ES,</TT>
			
 
				-<TT>FS,</TT>
			
 
				-and
			
 
				-<TT>GS</TT>)
			
 
				-or memory are written
			
 
				-as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOV<I>x</I>	src,dst
			
 
				-</PRE></TT></DL>
			
 
				-where
			
 
				-<I>x</I>
			
 
				-is
			
 
				-<TT>L</TT>,
			
 
				-<TT>W</TT>,
			
 
				-or
			
 
				-<TT>B</TT>.
			
 
				-Thus to get
			
 
				-<TT>AL</TT>
			
 
				-use a
			
 
				-<TT>MOVB</TT>
			
 
				-instruction.  If you need to access
			
 
				-<TT>AH</TT>,
			
 
				-you must mention it explicitly in a
			
 
				-<TT>MOVB</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVB	AH, BX
			
 
				-</PRE></TT></DL>
			
 
				-There are many examples of illegal moves, for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVB	BP, DI
			
 
				-</PRE></TT></DL>
			
 
				-that the loader actually implements as pseudo-operations.
			
 
				-</P>
			
 
				-<P>
			
 
				-The names of conditions in all conditional instructions
			
 
				-(<TT>J</TT>,
			
 
				-<TT>SET</TT>)
			
 
				-follow the conventions of the 68020 instead of those of the Intel
			
 
				-assembler:
			
 
				-<TT>JOS</TT>,
			
 
				-<TT>JOC</TT>,
			
 
				-<TT>JCS</TT>,
			
 
				-<TT>JCC</TT>,
			
 
				-<TT>JEQ</TT>,
			
 
				-<TT>JNE</TT>,
			
 
				-<TT>JLS</TT>,
			
 
				-<TT>JHI</TT>,
			
 
				-<TT>JMI</TT>,
			
 
				-<TT>JPL</TT>,
			
 
				-<TT>JPS</TT>,
			
 
				-<TT>JPC</TT>,
			
 
				-<TT>JLT</TT>,
			
 
				-<TT>JGE</TT>,
			
 
				-<TT>JLE</TT>,
			
 
				-and
			
 
				-<TT>JGT</TT>
			
 
				-instead of
			
 
				-<TT>JO</TT>,
			
 
				-<TT>JNO</TT>,
			
 
				-<TT>JB</TT>,
			
 
				-<TT>JNB</TT>,
			
 
				-<TT>JZ</TT>,
			
 
				-<TT>JNZ</TT>,
			
 
				-<TT>JBE</TT>,
			
 
				-<TT>JNBE</TT>,
			
 
				-<TT>JS</TT>,
			
 
				-<TT>JNS</TT>,
			
 
				-<TT>JP</TT>,
			
 
				-<TT>JNP</TT>,
			
 
				-<TT>JL</TT>,
			
 
				-<TT>JNL</TT>,
			
 
				-<TT>JLE</TT>,
			
 
				-and
			
 
				-<TT>JNLE</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The addressing modes have syntax like
			
 
				-<TT>AX</TT>,
			
 
				-<TT>(AX)</TT>,
			
 
				-<TT>(AX)(BX*4)</TT>,
			
 
				-<TT>10(AX)</TT>,
			
 
				-and
			
 
				-<TT>10(AX)(BX*4)</TT>.
			
 
				-The offsets from
			
 
				-<TT>AX</TT>
			
 
				-can be replaced by offsets from
			
 
				-<TT>FP</TT>
			
 
				-or
			
 
				-<TT>SB</TT>
			
 
				-to access names, for example
			
 
				-<TT>extern+5(SB)(AX*2)</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Other notes: Non-relative
			
 
				-<TT>JMP</TT>
			
 
				-and
			
 
				-<TT>CALL</TT>
			
 
				-have a
			
 
				-<TT>*</TT>
			
 
				-added to the syntax.
			
 
				-Only
			
 
				-<TT>LOOP</TT>,
			
 
				-<TT>LOOPEQ</TT>,
			
 
				-and
			
 
				-<TT>LOOPNE</TT>
			
 
				-are legal loop instructions.  Only
			
 
				-<TT>REP</TT>
			
 
				-and
			
 
				-<TT>REPN</TT>
			
 
				-are recognized repeaters.  These are not prefixes, but rather
			
 
				-stand-alone opcodes that precede the strings, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	CLD; REP; MOVSL
			
 
				-</PRE></TT></DL>
			
 
				-Segment override prefixes in
			
 
				-<TT>MOD/RM</TT>
			
 
				-fields are not supported.
			
 
				-</P>
			
 
				-<H4>Alpha
			
 
				-</H4>
			
 
				-<P>
			
 
				-On the Alpha, all registers are 64 bits.  The architecture handles 32-bit values
			
 
				-by giving them a canonical format (sign extension in the case of integer registers).
			
 
				-Registers are numbered
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R31</TT>.
			
 
				-<TT>R0</TT>
			
 
				-holds the return value from subroutines, and also the first parameter.
			
 
				-<TT>R30</TT>
			
 
				-is the stack pointer,
			
 
				-<TT>R29</TT>
			
 
				-is the static base,
			
 
				-<TT>R26</TT>
			
 
				-is the link register, and
			
 
				-<TT>R27</TT>
			
 
				-and
			
 
				-<TT>R28</TT>
			
 
				-are linker temporaries.
			
 
				-</P>
			
 
				-<P>
			
 
				-Floating point registers are numbered
			
 
				-<TT>F0</TT>
			
 
				-to
			
 
				-<TT>F31</TT>.
			
 
				-<TT>F28</TT>
			
 
				-contains
			
 
				-<TT>0.5</TT>,
			
 
				-<TT>F29</TT>
			
 
				-contains
			
 
				-<TT>1.0</TT>,
			
 
				-and
			
 
				-<TT>F30</TT>
			
 
				-contains
			
 
				-<TT>2.0</TT>.
			
 
				-<TT>F31</TT>
			
 
				-is always
			
 
				-<TT>0.0</TT>
			
 
				-on the Alpha.
			
 
				-</P>
			
 
				-<P>
			
 
				-The extension character for
			
 
				-<TT>MOV</TT>
			
 
				-follows DEC's notation:
			
 
				-<TT>B</TT>
			
 
				-for byte (8 bits),
			
 
				-<TT>W</TT>
			
 
				-for word (16 bits),
			
 
				-<TT>L</TT>
			
 
				-for long (32 bits),
			
 
				-and
			
 
				-<TT>Q</TT>
			
 
				-for quadword (64 bits).
			
 
				-Byte and ``word'' loads and stores may be made unsigned
			
 
				-by appending a
			
 
				-<TT>U</TT>.
			
 
				-<TT>S</TT>
			
 
				-and
			
 
				-<TT>T</TT>
			
 
				-refer to IEEE floating point single precision (32 bits) and double precision (64 bits), respectively.
			
 
				-</P>
			
 
				-<H4>Power PC
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Power PC follows the Plan 9 model set by the MIPS and SPARC,
			
 
				-not the elaborate ABIs.
			
 
				-The 32-bit instructions of the 60x and 8xx PowerPC architectures are supported;
			
 
				-there is no support for the older POWER instructions.
			
 
				-Registers are
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R31</TT>.
			
 
				-<TT>R0</TT>
			
 
				-is initialized to zero; this is done by C start up code
			
 
				-and assumed by the compiler and loader.
			
 
				-<TT>R1</TT>
			
 
				-is the stack pointer.
			
 
				-<TT>R2</TT>
			
 
				-is the static base register, with value the address of
			
 
				-<TT>setSB(SB)</TT>.
			
 
				-<TT>R3</TT>
			
 
				-is the return register and also the register holding the first
			
 
				-argument to a C function, with space reserved at
			
 
				-<TT>0(FP)</TT>
			
 
				-as on the MIPS.
			
 
				-<TT>R31</TT>
			
 
				-is the loader temporary.
			
 
				-The external registers in Plan 9's C are allocated from
			
 
				-<TT>R30</TT>
			
 
				-down.
			
 
				-</P>
			
 
				-<P>
			
 
				-Floating point registers are called
			
 
				-<TT>F0</TT>
			
 
				-through
			
 
				-<TT>F31</TT>.
			
 
				-By convention, several registers are initialized
			
 
				-to specific values; this is done by the operating system.
			
 
				-<TT>F27</TT>
			
 
				-must be initialized to the value
			
 
				-<TT>0x4330000080000000</TT>
			
 
				-(used by float-to-int conversion),
			
 
				-<TT>F28</TT>
			
 
				-to the value 0.0,
			
 
				-<TT>F29</TT>
			
 
				-to 0.5,
			
 
				-<TT>F30</TT>
			
 
				-to 1.0, and
			
 
				-<TT>F31</TT>
			
 
				-to 2.0.
			
 
				-</P>
			
 
				-<P>
			
 
				-As on the MIPS and SPARC, the assembler accepts arbitrary literals
			
 
				-as operands to
			
 
				-<TT>MOVW</TT>,
			
 
				-and also to
			
 
				-<TT>ADD</TT>
			
 
				-and others where `immediate' variants exist,
			
 
				-and the loader generates sequences
			
 
				-of
			
 
				-<TT>addi</TT>,
			
 
				-<TT>addis</TT>,
			
 
				-<TT>oris</TT>,
			
 
				-etc. as required.
			
 
				-The register indirect addressing modes use the same syntax as the SPARC,
			
 
				-including double indexing when allowed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The instruction names are generally derived from the Motorola ones,
			
 
				-subject to slight transformation:
			
 
				-the
			
 
				-`<TT>.</TT>'
			
 
				-marking the setting of condition codes is replaced by
			
 
				-<TT>CC</TT>,
			
 
				-and when the letter
			
 
				-`<TT>o</TT>'
			
 
				-represents `OE=1' it is replaced by
			
 
				-<TT>V</TT>.
			
 
				-Thus
			
 
				-<TT>add</TT>,
			
 
				-<TT>addo.</TT>
			
 
				-and
			
 
				-<TT>subfzeo.</TT>
			
 
				-become
			
 
				-<TT>ADD</TT>,
			
 
				-<TT>ADDVCC</TT>
			
 
				-and
			
 
				-<TT>SUBFZEVCC</TT>.
			
 
				-As well as the three-operand conditional branch instruction
			
 
				-<TT>BC</TT>,
			
 
				-the assembler provides pseudo-instructions for the common cases:
			
 
				-<TT>BEQ</TT>,
			
 
				-<TT>BNE</TT>,
			
 
				-<TT>BGT</TT>,
			
 
				-<TT>BGE</TT>,
			
 
				-<TT>BLT</TT>,
			
 
				-<TT>BLE</TT>,
			
 
				-<TT>BVC</TT>,
			
 
				-and
			
 
				-<TT>BVS</TT>.
			
 
				-The unconditional branch instruction is
			
 
				-<TT>BR</TT>.
			
 
				-Indirect branches use
			
 
				-<TT>(CTR)</TT>
			
 
				-or
			
 
				-<TT>(LR)</TT>
			
 
				-as target.
			
 
				-</P>
			
 
				-<P>
			
 
				-Load or store operations are replaced by
			
 
				-<TT>MOV</TT>
			
 
				-variants in the usual way:
			
 
				-<TT>MOVW</TT>
			
 
				-(move word),
			
 
				-<TT>MOVH</TT>
			
 
				-(move halfword with sign extension), and
			
 
				-<TT>MOVB</TT>
			
 
				-(move byte with sign extension, a pseudo-instruction),
			
 
				-with unsigned variants
			
 
				-<TT>MOVHZ</TT>
			
 
				-and
			
 
				-<TT>MOVBZ</TT>,
			
 
				-and byte-reversing
			
 
				-<TT>MOVWBR</TT>
			
 
				-and
			
 
				-<TT>MOVHBR</TT>.
			
 
				-`Load or store with update' versions are
			
 
				-<TT>MOVWU</TT>,
			
 
				-<TT>MOVHU</TT>,
			
 
				-and
			
 
				-<TT>MOVBZU</TT>.
			
 
				-Load or store multiple is
			
 
				-<TT>MOVMW</TT>.
			
 
				-The exceptions are the string instructions, which are
			
 
				-<TT>LSW</TT>
			
 
				-and
			
 
				-<TT>STSW</TT>,
			
 
				-and the reservation instructions
			
 
				-<TT>lwarx</TT>
			
 
				-and
			
 
				-<TT>stwcx.</TT>,
			
 
				-which are
			
 
				-<TT>LWAR</TT>
			
 
				-and
			
 
				-<TT>STWCCC</TT>,
			
 
				-all with operands in the usual data-flow order.
			
 
				-Floating-point load or store instructions are
			
 
				-<TT>FMOVD</TT>,
			
 
				-<TT>FMOVDU</TT>,
			
 
				-<TT>FMOVS</TT>,
			
 
				-and
			
 
				-<TT>FMOVSU</TT>.
			
 
				-The register to register move instructions
			
 
				-<TT>fmr</TT>
			
 
				-and
			
 
				-<TT>fmr.</TT>
			
 
				-are written
			
 
				-<TT>FMOVD</TT>
			
 
				-and
			
 
				-<TT>FMOVDCC</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assembler knows the commonly used special purpose registers:
			
 
				-<TT>CR</TT>,
			
 
				-<TT>CTR</TT>,
			
 
				-<TT>DEC</TT>,
			
 
				-<TT>LR</TT>,
			
 
				-<TT>MSR</TT>,
			
 
				-and
			
 
				-<TT>XER</TT>.
			
 
				-The rest, which are often architecture-dependent, are referenced as
			
 
				-<TT>SPR(n)</TT>.
			
 
				-The segment registers of the 60x series are similarly
			
 
				-<TT>SEG(n)</TT>,
			
 
				-but
			
 
				-<I>n</I>
			
 
				-can also be a register name, as in
			
 
				-<TT>SEG(R3)</TT>.
			
 
				-Moves between special purpose registers and general purpose ones,
			
 
				-when allowed by the architecture,
			
 
				-are written as
			
 
				-<TT>MOVW</TT>,
			
 
				-replacing
			
 
				-<TT>mfcr</TT>,
			
 
				-<TT>mtcr</TT>,
			
 
				-<TT>mfmsr</TT>,
			
 
				-<TT>mtmsr</TT>,
			
 
				-<TT>mtspr</TT>,
			
 
				-<TT>mfspr</TT>,
			
 
				-<TT>mftb</TT>,
			
 
				-and many others.
			
 
				-</P>
			
 
				-<P>
			
 
				-The fields of the condition register
			
 
				-<TT>CR</TT>
			
 
				-are referenced as
			
 
				-<TT>CR(0)</TT>
			
 
				-through
			
 
				-<TT>CR(7)</TT>.
			
 
				-They are used by the
			
 
				-<TT>MOVFL</TT>
			
 
				-(move field) pseudo-instruction,
			
 
				-which produces
			
 
				-<TT>mcrf</TT>
			
 
				-or
			
 
				-<TT>mtcrf</TT>.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVFL	CR(3), CR(0)
			
 
				-	MOVFL	R3, CR(1)
			
 
				-	MOVFL	R3, 7, CR
			
 
				-</PRE></TT></DL>
			
 
				-They are also accepted in
			
 
				-the conditional branch instruction, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	BEQ	CR(7), label
			
 
				-</PRE></TT></DL>
			
 
				-Fields of the
			
 
				-<TT>FPSCR</TT>
			
 
				-are accessed using
			
 
				-<TT>MOVFL</TT>
			
 
				-in a similar way:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVFL	FPSCR, F0
			
 
				-	MOVFL	F0, FPSCR
			
 
				-	MOVFL	F0, <I>7, FPSCR
			
 
				-	MOVFL	</I>0, FPSCR(3)
			
 
				-</PRE></TT></DL>
			
 
				-producing
			
 
				-<TT>mffs</TT>,
			
 
				-<TT>mtfsf</TT>
			
 
				-or
			
 
				-<TT>mtfsfi</TT>,
			
 
				-as appropriate.
			
 
				-</P>
			
 
				-<H4>ARM
			
 
				-</H4>
			
 
				-<P>
			
 
				-The assembler provides access to
			
 
				-<TT>R0</TT>
			
 
				-through
			
 
				-<TT>R14</TT>
			
 
				-and the
			
 
				-<TT>PC</TT>.
			
 
				-The stack pointer is
			
 
				-<TT>R13</TT>,
			
 
				-the link register is
			
 
				-<TT>R14</TT>,
			
 
				-and the static base register is
			
 
				-<TT>R12</TT>.
			
 
				-<TT>R0</TT>
			
 
				-is the return register and also the register holding
			
 
				-the first argument to a subroutine.
			
 
				-The assembler supports the
			
 
				-<TT>CPSR</TT>
			
 
				-and
			
 
				-<TT>SPSR</TT>
			
 
				-registers.
			
 
				-It also knows about coprocessor registers
			
 
				-<TT>C0</TT>
			
 
				-through
			
 
				-<TT>C15</TT>.
			
 
				-Floating registers are
			
 
				-<TT>F0</TT>
			
 
				-through
			
 
				-<TT>F7</TT>,
			
 
				-<TT>FPSR</TT>
			
 
				-and
			
 
				-<TT>FPCR</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-As with the other architectures, loads and stores are called
			
 
				-<TT>MOV</TT>,
			
 
				-e.g.
			
 
				-<TT>MOVW</TT>
			
 
				-for load word or store word, and
			
 
				-<TT>MOVM</TT>
			
 
				-for
			
 
				-load or store multiple,
			
 
				-depending on the operands.
			
 
				-</P>
			
 
				-<P>
			
 
				-Addressing modes are supported by suffixes to the instructions:
			
 
				-<TT>.IA</TT>
			
 
				-(increment after),
			
 
				-<TT>.IB</TT>
			
 
				-(increment before),
			
 
				-<TT>.DA</TT>
			
 
				-(decrement after), and
			
 
				-<TT>.DB</TT>
			
 
				-(decrement before).
			
 
				-These can only be used with the
			
 
				-<TT>MOV</TT>
			
 
				-instructions.
			
 
				-The move multiple instruction,
			
 
				-<TT>MOVM</TT>,
			
 
				-defines a range of registers using brackets, e.g.
			
 
				-<TT>[R0-R12]</TT>.
			
 
				-The special
			
 
				-<TT>MOVM</TT>
			
 
				-addressing mode bits
			
 
				-<TT>W</TT>,
			
 
				-<TT>U</TT>,
			
 
				-and
			
 
				-<TT>P</TT>
			
 
				-are written in the same manner, for example,
			
 
				-<TT>MOVM.DB.W</TT>.
			
 
				-A
			
 
				-<TT>.S</TT>
			
 
				-suffix allows a
			
 
				-<TT>MOVM</TT>
			
 
				-instruction to access user
			
 
				-<TT>R13</TT>
			
 
				-and
			
 
				-<TT>R14</TT>
			
 
				-when in another processor mode.
			
 
				-Shifts and rotates in addressing modes are supported by binary operators
			
 
				-<TT>&lt;&lt;</TT>
			
 
				-(logical left shift),
			
 
				-<TT>&gt;&gt;</TT>
			
 
				-(logical right shift),
			
 
				-<TT>-&gt;</TT>
			
 
				-(arithmetic right shift), and
			
 
				-<TT>@&gt;</TT>
			
 
				-(rotate right); for example
			
 
				-<TT>R7&gt;&gt;R2</TT>or
			
 
				-<TT>R2@&gt;2</TT>.
			
 
				-The assembler does not support indexing by a shifted expression;
			
 
				-only names can be doubly indexed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Any instruction can be followed by a suffix that makes the instruction conditional:
			
 
				-<TT>.EQ</TT>,
			
 
				-<TT>.NE</TT>,
			
 
				-and so on, as in the ARM manual, with synonyms
			
 
				-<TT>.HS</TT>
			
 
				-(for
			
 
				-<TT>.CS</TT>)
			
 
				-and
			
 
				-<TT>.LO</TT>
			
 
				-(for
			
 
				-for<TT>.CC</TT>),
			
 
				-<TT>ADD.NE</TT>.
			
 
				-Arithmetic
			
 
				-and logical instructions
			
 
				-can have a
			
 
				-<TT>.S</TT>
			
 
				-suffix, as ARM allows, to set condition codes.
			
 
				-</P>
			
 
				-<P>
			
 
				-The syntax of the
			
 
				-<TT>MCR</TT>
			
 
				-and
			
 
				-<TT>MRC</TT>
			
 
				-coprocessor instructions is largely as in the manual, with the usual adjustments.
			
 
				-The assembler directly supports only the ARM floating-point coprocessor
			
 
				-operations used by the compiler:
			
 
				-<TT>CMP</TT>,
			
 
				-<TT>ADD</TT>,
			
 
				-<TT>SUB</TT>,
			
 
				-<TT>MUL</TT>,
			
 
				-and
			
 
				-<TT>DIV</TT>,
			
 
				-all with
			
 
				-<TT>F</TT>
			
 
				-or
			
 
				-<TT>D</TT>
			
 
				-suffix selecting single or double precision.
			
 
				-Floating-point load or store become
			
 
				-<TT>MOVF</TT>
			
 
				-and
			
 
				-<TT>MOVD</TT>.
			
 
				-Conversion instructions are also specified by moves:
			
 
				-<TT>MOVWD</TT>,
			
 
				-<TT>MOVWF</TT>,
			
 
				-<TT>MOVDW</TT>,
			
 
				-<TT>MOVWD</TT>,
			
 
				-<TT>MOVFD</TT>,
			
 
				-and
			
 
				-<TT>MOVDF</TT>.
			
 
				-</P>
			
 
				-<H4>AMD 29000
			
 
				-</H4>
			
 
				-<P>
			
 
				-For details about this assembly language, which was built for the AMD 29240,
			
 
				-look at the sources or examine compiler output.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/auth.html
+++ b/sys/doc/auth.html
@@ -1,2096 +0,0 @@
 
				-<html>
			
 
				-<br><img src="-.19111510.gif"><br>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Security in Plan 9
			
 
				-</H1>
			
 
				-<DL><DD><I>Russ Cox, MIT LCS<br>
			
 
				-<br>
			
 
				-Eric Grosse, Bell Labs<br>
			
 
				-<br>
			
 
				-Rob Pike, Bell Labs<br>
			
 
				-<br>
			
 
				-Dave Presotto, Avaya Labs and Bell Labs<br>
			
 
				-<br>
			
 
				-Sean Quinlan, Bell Labs<br>
			
 
				-<br>
			
 
				-<TT>{rsc,ehg,rob,presotto,seanq}@plan9.bell-labs.com</TT>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-The security architecture of the Plan 9(tm)
			
 
				-operating system has recently been redesigned
			
 
				-to address some technical shortcomings.
			
 
				-This redesign provided an opportunity also to make the system more
			
 
				-convenient to use securely.
			
 
				-Plan 9 has thus improved in two ways not usually seen together:
			
 
				-it has become more secure
			
 
				-<I>and</I>
			
 
				-easier to use.
			
 
				-<br>&#32;<br>
			
 
				-The central component of the new architecture is a per-user
			
 
				-self-contained agent called
			
 
				-<TT>factotum</TT>.
			
 
				-<TT>Factotum</TT>
			
 
				-securely holds a
			
 
				-copy of the user's keys and negotiates authentication protocols, on
			
 
				-behalf of the user, with secure services around the network.
			
 
				-Concentrating security code in a single program offers several
			
 
				-advantages including: ease of update or repair to broken security
			
 
				-software and protocols; the ability to run secure services at a lower
			
 
				-privilege level; uniform management of keys for all services; and an
			
 
				-opportunity to provide single sign on, even to unchanged legacy
			
 
				-applications.
			
 
				-<TT>Factotum</TT>
			
 
				-has an unusual architecture: it is implemented
			
 
				-as a Plan 9 file server.
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> To appear, in a slightly different form, in
			
 
				-Proc. of the 2002 Usenix Security Symposium,
			
 
				-San Francisco.
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-</DL>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Secure computing systems face two challenges:
			
 
				-first, they must employ sophisticated technology that is difficult to design
			
 
				-and prove correct; and second,
			
 
				-they must be easy for regular people to use.
			
 
				-The question of ease of use is sometimes neglected, but it is essential:
			
 
				-weak but easy-to-use security can be more effective than strong but
			
 
				-difficult-to-use security if it is more likely to be used.
			
 
				-People lock their front doors when they leave the house, knowing
			
 
				-full well that a burglar is capable of picking the lock (or avoiding
			
 
				-the door altogether); yet few would accept the cost and
			
 
				-awkwardness of a bank vault door on the
			
 
				-house even though that might reduce the probability of a robbery.
			
 
				-A related point is that users need a clear model of how the security
			
 
				-operates (if not how it actually provides security) in order to use it
			
 
				-well; for example, the clarity of a lock icon on a web browser
			
 
				-is offset by the confusing and typically insecure
			
 
				-steps for installing X.509 certificates.
			
 
				-<br>&#32;<br>
			
 
				-The security architecture of the Plan 9
			
 
				-operating system
			
 
				-[Pike95]
			
 
				-has recently been redesigned to make it both more secure
			
 
				-and easier to use.
			
 
				-By
			
 
				-<I>security</I>
			
 
				-we mean three things:
			
 
				-first, the business of authenticating users and services;
			
 
				-second, the safe handling, deployment, and use of keys
			
 
				-and other secret information; and
			
 
				-third, the use of encryption and integrity checks
			
 
				-to safeguard communications
			
 
				-from prying eyes.
			
 
				-<br>&#32;<br>
			
 
				-The old security architecture of Plan 9
			
 
				-had several engineering problems in common with other operating systems.
			
 
				-First, it had an inadequate notion of security domain.
			
 
				-Once a user provided a password to connect to a local file store,
			
 
				-the system required that the same password be used to access all the other file
			
 
				-stores.
			
 
				-That is, the system treated all network services as
			
 
				-belonging to the same security domain. 
			
 
				-<br>&#32;<br>
			
 
				-Second, the algorithms and protocols used in authentication,
			
 
				-by nature tricky and difficult to get right, were compiled into the
			
 
				-various applications, kernel modules, and file servers.
			
 
				-Changes and fixes to a security protocol
			
 
				-required that all components using that protocol needed to be recompiled,
			
 
				-or at least relinked, and restarted.
			
 
				-<br>&#32;<br>
			
 
				-Third, the file transport protocol, 9P
			
 
				-[Pike93],
			
 
				-that forms the core of
			
 
				-the Plan 9 system, had its authentication protocol embedded in its design.
			
 
				-This meant that fixing or changing the authentication used by 9P
			
 
				-required deep changes to the system.
			
 
				-If someone were to find a way to break the protocol, the system would
			
 
				-be wide open and very hard to fix.
			
 
				-<br>&#32;<br>
			
 
				-These and a number of lesser problems, combined with a desire
			
 
				-for more widespread use of encryption in the system, spurred us to
			
 
				-rethink the entire security architecture of Plan 9.
			
 
				-<br>&#32;<br>
			
 
				-The centerpiece of the new architecture is an agent,
			
 
				-called
			
 
				-<TT>factotum</TT>,
			
 
				-that handles the user's keys and negotiates all security
			
 
				-interactions with system services and applications.
			
 
				-Like a trusted assistant with a copy of the owner's keys,
			
 
				-<TT>factotum</TT>
			
 
				-does all the negotiation for security and authentication.
			
 
				-Programs no longer need to be compiled with cryptographic
			
 
				-code; instead they communicate with
			
 
				-<TT>factotum</TT>
			
 
				-agents
			
 
				-that represent distinct entities in the cryptographic exchange,
			
 
				-such as a user and server of a secure service.
			
 
				-If a security protocol needs to be added, deleted, or modified,
			
 
				-only
			
 
				-<TT>factotum</TT>
			
 
				-needs to be updated for all system services
			
 
				-to be kept secure.
			
 
				-<br>&#32;<br>
			
 
				-Building on
			
 
				-<TT>factotum</TT>,
			
 
				-we modified
			
 
				-secure services in the system to move
			
 
				-user authentication code into
			
 
				-<TT>factotum</TT>;
			
 
				-made authentication a separable component of the file server protocol;
			
 
				-deployed new security protocols;
			
 
				-designed a secure file store,
			
 
				-called
			
 
				-<TT>secstore</TT>,
			
 
				-to protect our keys but make them easy to get when they are needed;
			
 
				-designed a new kernel module to support transparent use of 
			
 
				-Transport Layer Security (TLS)
			
 
				-[RFC2246];
			
 
				-and began using encryption for all communications within the system.
			
 
				-The overall architecture is illustrated in Figure 1a.
			
 
				-<br><img src="-.19111511.gif"><br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.19111512.gif"><br>
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-Figure 1a.  Components of the security architecture.
			
 
				-Each box is a (typically) separate machine; each ellipse a process.
			
 
				-n(11The ellipses labeled &lt;I&gt;F&lt;/I&gt;&lt;I&gt;X&lt;/I&gt;n(99
			
 
				-are
			
 
				-<TT>factotum</TT>
			
 
				-processes; those labeled
			
 
				-n(11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;X&lt;/I&gt;n(99
			
 
				-are the pieces and proxies of a distributed program.
			
 
				-The authentication server is one of several repositories for users' security information
			
 
				-that
			
 
				-<TT>factotum</TT>
			
 
				-processes consult as required.
			
 
				-<TT>Secstore</TT>
			
 
				-is a shared resource for storing private information such as keys;
			
 
				-<TT>factotum</TT>
			
 
				-consults it for the user during bootstrap.
			
 
				-<br>&#32;<br>
			
 
				-<br><img src="-.19111513.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-Secure protocols and algorithms are well understood
			
 
				-and are usually not the weakest link in a system's security.
			
 
				-In practice, most security problems arise from buggy servers,
			
 
				-confusing software, or administrative oversights.
			
 
				-It is these practical problems that we are addressing.
			
 
				-Although this paper describes the algorithms and protocols we are using,
			
 
				-they are included mainly for concreteness.
			
 
				-Our main intent is to present a simple security architecture built
			
 
				-upon a small trusted code base that is easy to verify (whether by manual or
			
 
				-automatic means), easy to understand, and easy to use.
			
 
				-<br>&#32;<br>
			
 
				-Although it is a subjective assessment,
			
 
				-we believe we have achieved our goal of ease of use.
			
 
				-That we have achieved
			
 
				-our goal of improved security is supported by our plan to
			
 
				-move our currently private computing environment onto the Internet
			
 
				-outside the corporate firewall.
			
 
				-The rest of this paper explains the architecture and how it is used,
			
 
				-to explain why a system that is easy to use securely is also safe
			
 
				-enough to run in the open network.
			
 
				-<H4>2 An Agent for Security
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-One of the primary reasons for the redesign of the Plan 9
			
 
				-security infrastructure was to remove the authentication
			
 
				-method both from the applications and from the kernel.
			
 
				-Cryptographic code
			
 
				-is large and intricate, so it should
			
 
				-be packaged as a separate component that can be repaired or
			
 
				-modified without altering or even relinking applications
			
 
				-and services that depend on it.
			
 
				-If a security protocol is broken, it should be trivial to repair,
			
 
				-disable, or replace it on the fly.
			
 
				-Similarly, it should be possible for multiple programs to use
			
 
				-a common security protocol without embedding it in each program.
			
 
				-<br>&#32;<br>
			
 
				-Some systems use dynamically linked libraries (DLLs) to address these configuration issues.
			
 
				-The problem with this approach is that it leaves
			
 
				-security code in the same address space as the program using it.
			
 
				-The interactions between the program and the DLL
			
 
				-can therefore accidentally or deliberately violate the interface,
			
 
				-weakening security.
			
 
				-Also, a program using a library to implement secure services
			
 
				-must run at a privilege level necessary to provide the service;
			
 
				-separating the security to a different program makes it possible
			
 
				-to run the services at a weaker privilege level, isolating the
			
 
				-privileged code to a single, more trustworthy component.
			
 
				-<br>&#32;<br>
			
 
				-Following the lead of the SSH agent
			
 
				-[Ylon96],
			
 
				-we give each user
			
 
				-an agent process responsible
			
 
				-for holding and using the user's keys.
			
 
				-The agent program is called
			
 
				-<TT>factotum</TT>
			
 
				-because of its similarity to the proverbial servant with the
			
 
				-power to act on behalf of his master because he holds the
			
 
				-keys to all the master's possessions.  It is essential that
			
 
				-<TT>factotum</TT>
			
 
				-keep the keys secret and use them only in the owner's interest.
			
 
				-Later we'll discuss some changes to the kernel to reduce the possibility of
			
 
				-<TT>factotum</TT>
			
 
				-leaking information inadvertently.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Factotum</TT>
			
 
				-is implemented, like most Plan 9 services, as a file server.
			
 
				-It is conventionally mounted upon the directory
			
 
				-<TT>/mnt/factotum</TT>,
			
 
				-and the files it serves there are analogous to virtual devices that provide access to,
			
 
				-and control of, the services of the
			
 
				-<TT>factotum</TT>.
			
 
				-The next few sections describe the design of
			
 
				-<TT>factotum</TT>
			
 
				-and how it operates with the other pieces of Plan 9 to provide
			
 
				-security services.
			
 
				-<H4>2.1 Logging in
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-To make the discussions that follow more concrete,
			
 
				-we begin with a couple of examples showing how the
			
 
				-Plan 9 security architecture appears to the user.
			
 
				-These examples both involve a user
			
 
				-<TT>gre</TT>
			
 
				-logging in after booting a local machine.
			
 
				-The user may or may not have a secure store in which
			
 
				-all his keys are kept.
			
 
				-If he does,
			
 
				-<TT>factotum</TT>
			
 
				-will prompt him for the password to the secure store
			
 
				-and obtain keys from it, prompting only when a key
			
 
				-isn't found in the store.
			
 
				-Otherwise,
			
 
				-<TT>factotum</TT>
			
 
				-must prompt for each key.
			
 
				-<br>&#32;<br>
			
 
				-In the typescripts, \n
			
 
				-represents a literal newline
			
 
				-character typed to force a default response.
			
 
				-User input is in italics, and
			
 
				-long lines are folded and indented to fit.
			
 
				-<br>&#32;<br>
			
 
				-This first example shows a user logging in without
			
 
				-help from the secure store.
			
 
				-First,
			
 
				-<TT>factotum</TT>
			
 
				-prompts for a user name that the local kernel
			
 
				-will use:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-user[none]: gre
			
 
				-</PRE></TT></DL>
			
 
				-(Default responses appear in square brackets.)
			
 
				-The kernel then starts accessing local resources
			
 
				-and requests, through
			
 
				-<TT>factotum</TT>,
			
 
				-a user/password pair to do so:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-!Adding key: dom=cs.bell-labs.com
			
 
				-    proto=p9sk1
			
 
				-user[gre]: \n
			
 
				-password: ****
			
 
				-</PRE></TT></DL>
			
 
				-Now the user is logged in to the local system, and
			
 
				-the mail client starts up:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-!Adding key: proto=apop
			
 
				-    server=plan9.bell-labs.com
			
 
				-user[gre]: \n
			
 
				-password: ****
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Factotum</TT>
			
 
				-is doing all the prompting and the applications
			
 
				-being started are not even touching the keys.
			
 
				-Note that it's always clear which key is being requested.
			
 
				-<br>&#32;<br>
			
 
				-Now consider the same login sequence, but in the case where
			
 
				-<TT>gre</TT>
			
 
				-has a secure store account:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-user[none]: gre
			
 
				-secstore password: *********
			
 
				-STA PIN+SecurID: *********
			
 
				-</PRE></TT></DL>
			
 
				-That's the last
			
 
				-<TT>gre</TT>
			
 
				-will hear from
			
 
				-<TT>factotum</TT>
			
 
				-unless an attempt is made to contact
			
 
				-a system for which no key is kept in the secure store.
			
 
				-<H4>2.2 The factotum
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Each computer running Plan 9 has one user id that owns all the
			
 
				-resources on that system &#173; the scheduler, local disks,
			
 
				-network interfaces, etc.
			
 
				-That user, the
			
 
				-<I>host owner</I>,
			
 
				-is the closest analogue in Plan 9 to a Unix
			
 
				-<TT>root</TT>
			
 
				-account (although it is far weaker;
			
 
				-rather than having special powers, as its name implies the host owner
			
 
				-is just a regular user that happens to own the
			
 
				-resources of the local machine).
			
 
				-On a single-user system, which we call a terminal,
			
 
				-the host owner is the id of the terminal's user.
			
 
				-Shared servers such as CPU servers normally have a pseudo-user
			
 
				-that initially owns all resources.
			
 
				-At boot time, the Plan 9 kernel starts a
			
 
				-<TT>factotum</TT>
			
 
				-executing as, and therefore with the privileges of,
			
 
				-the host owner.
			
 
				-<br>&#32;<br>
			
 
				-New processes run as
			
 
				-the same user as the process which created them.
			
 
				-When a process must take on the identity of a new user,
			
 
				-such as to provide a login shell
			
 
				-on a shared CPU server,
			
 
				-it does so by proving to the host owner's
			
 
				-<TT>factotum</TT>
			
 
				-that it is
			
 
				-authorized to do so.
			
 
				-This is done by running an
			
 
				-authentication protocol with
			
 
				-<TT>factotum</TT>
			
 
				-to
			
 
				-prove that the process has access to secret information
			
 
				-which only the new user should possess.
			
 
				-For example, consider the setup in Figure 1a.
			
 
				-If a user on the terminal
			
 
				-wants to log in to the CPU server using the
			
 
				-Plan 9
			
 
				-<TT>cpu</TT>
			
 
				-service
			
 
				-[Pike93],
			
 
				-then
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
			
 
				-might be the
			
 
				-<TT>cpu</TT>
			
 
				-client program and
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-the
			
 
				-<TT>cpu</TT>
			
 
				-server.
			
 
				-n(11Neither 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99 nor 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
			
 
				-knows the details of the authentication.
			
 
				-They
			
 
				-do need to be able to shuttle messages back and
			
 
				-forth between the two
			
 
				-<TT>factotums</TT>,
			
 
				-but this is
			
 
				-a generic function easily performed without
			
 
				-knowing, or being able to extract, secrets in
			
 
				-the messages.
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
			
 
				-n(11will make a network connection to 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99.
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99
			
 
				-and
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-will then relay messages between
			
 
				-the
			
 
				-<TT>factotum</TT>
			
 
				-n(11owned by the user, 11&lt;I&gt;F&lt;/I&gt;&lt;I&gt;T&lt;/I&gt;11n(99,
			
 
				-n(11and the one owned by the CPU server, 11&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99,
			
 
				-until mutual authentication has been established.
			
 
				-Later
			
 
				-sections describe the RPC between
			
 
				-<TT>factotum</TT>
			
 
				-and
			
 
				-applications and the library functions to support proxy operations.
			
 
				-<br>&#32;<br>
			
 
				-The kernel always uses a single local instance of
			
 
				-<TT>factotum</TT>,
			
 
				-running as the
			
 
				-host owner, for
			
 
				-its authentication purposes, but
			
 
				-a regular user may start other
			
 
				-<TT>factotum</TT>
			
 
				-agents.
			
 
				-In fact, the
			
 
				-<TT>factotum</TT>
			
 
				-representing the user need not be
			
 
				-running on the same machine as its client.
			
 
				-For instance, it is easy for a user on a CPU server,
			
 
				-through standard Plan 9 operations,
			
 
				-to replace the
			
 
				-<TT>/mnt/factotum</TT>
			
 
				-in the user's private file name space on the server
			
 
				-with a connection to the
			
 
				-<TT>factotum</TT>
			
 
				-running on the terminal.
			
 
				-(The usual file system permissions prevent interlopers
			
 
				-from doing so maliciously.)
			
 
				-This permits secure operations on the CPU server to be
			
 
				-transparently validated by the user's own
			
 
				-<TT>factotum</TT>,
			
 
				-so
			
 
				-secrets need never leave the user's terminal.
			
 
				-The SSH agent
			
 
				-[Ylon96]
			
 
				-does much the
			
 
				-same with special SSH protocol messages, but
			
 
				-an advantage to making our agent a file system
			
 
				-is that we need no new mechanism to access our remote
			
 
				-agent; remote file access is sufficient.
			
 
				-<br>&#32;<br>
			
 
				-Within
			
 
				-<TT>factotum</TT>,
			
 
				-each protocol is implemented as a state
			
 
				-machine with a generic interface, so protocols are in
			
 
				-essence pluggable modules, easy to add, modify, or drop.
			
 
				-Writing a message to and reading a message from
			
 
				-<TT>factotum</TT>
			
 
				-each require a separate RPC and result in
			
 
				-a single state transition.
			
 
				-Therefore
			
 
				-<TT>factotum</TT>
			
 
				-always runs to completion on every RPC and never blocks
			
 
				-waiting for input during any authentication.
			
 
				-Moreover, the number of simultaneous
			
 
				-authentications is limited only by the amount of memory we're
			
 
				-willing to dedicate to representing the state machines.
			
 
				-<br>&#32;<br>
			
 
				-Authentication protocols are implemented only
			
 
				-within
			
 
				-<TT>factotum</TT>,
			
 
				-but adding and removing
			
 
				-protocols does require relinking the binary, so
			
 
				-<TT>factotum</TT>
			
 
				-processes (but no others)
			
 
				-need to be restarted in order to take advantage of
			
 
				-new or repaired protocols.
			
 
				-<br>&#32;<br>
			
 
				-At the time of writing, 
			
 
				-<TT>factotum</TT>
			
 
				-contains authentication
			
 
				-modules for the Plan 9 shared key protocol (p9sk1),
			
 
				-SSH's RSA authentication, passwords in the clear, APOP, CRAM, PPP's CHAP,
			
 
				-Microsoft PPP's MSCHAP, and VNC's challenge/response.
			
 
				-<H4>2.3 Local capabilities
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A capability system, managed by the kernel, is used to empower
			
 
				-<TT>factotum</TT>
			
 
				-to grant permission to another process to change its user id.
			
 
				-A
			
 
				-kernel device driver
			
 
				-implements two files,
			
 
				-<TT>/dev/caphash</TT>
			
 
				-and
			
 
				-<TT>/dev/capuse</TT>.
			
 
				-The write-only file
			
 
				-<TT>/dev/caphash</TT>
			
 
				-can be opened only by the host owner, and only once.
			
 
				-<TT>Factotum</TT>
			
 
				-opens this file immediately after booting.
			
 
				-<br>&#32;<br>
			
 
				-To use the files,
			
 
				-<TT>factotum</TT>
			
 
				-creates a string of the form
			
 
				-<I>userid1</I><TT>@</TT><I>userid2</I><TT>@</TT><I>random-string</I><TT>,
			
 
				-uses SHA1 HMAC to hash
			
 
				-</TT><I>userid1</I><TT>@</TT><I>userid2</I><TT>
			
 
				-with key
			
 
				-</TT><I>random-string</I><TT>,
			
 
				-and writes that hash to
			
 
				-</TT><TT>/dev/caphash</TT><TT>.
			
 
				-</TT><TT>Factotum</TT><TT>
			
 
				-then passes the original string to another
			
 
				-process on the same machine, running
			
 
				-as user
			
 
				-</TT><I>userid1</I><TT>,
			
 
				-which
			
 
				-writes the string to
			
 
				-</TT><TT>/dev/capuse</TT><TT>.
			
 
				-The kernel hashes the string and looks for
			
 
				-a matching hash in its list.
			
 
				-If it finds one,
			
 
				-the writing process's user id changes from
			
 
				-</TT><I>userid1</I><TT>
			
 
				-to
			
 
				-</TT><I>userid2</I><TT>.
			
 
				-Once used, or if a timeout expires,
			
 
				-the capability is discarded by the kernel.
			
 
				-</TT><br>&#32;<br>
			
 
				-The capabilities are local to the machine on which they are created.
			
 
				-Hence a
			
 
				-<TT>factotum</TT>
			
 
				-running on one machine cannot pass capabilities
			
 
				-to processes on another and expect them to work.
			
 
				-<H4>2.4 Keys
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-We define the word
			
 
				-<I>key</I>
			
 
				-to mean not only a secret, but also a description of the
			
 
				-context in which that secret is to be used: the protocol,
			
 
				-server, user, etc. to which it applies.
			
 
				-That is,
			
 
				-a key is a combination of secret and descriptive information
			
 
				-used to authenticate the identities of parties
			
 
				-transmitting or receiving information.
			
 
				-The set of keys used
			
 
				-in any authentication depends both on the protocol and on
			
 
				-parameters passed by the program requesting the authentication.
			
 
				-<br>&#32;<br>
			
 
				-Taking a tip from SDSI
			
 
				-[RiLa],
			
 
				-which represents security information as textual S-expressions,
			
 
				-keys in Plan 9 are represented as plain UTF-8 text.
			
 
				-Text is easily
			
 
				-understood and manipulated by users.
			
 
				-By contrast,
			
 
				-a binary or other cryptic format
			
 
				-can actually reduce overall security.
			
 
				-Binary formats are difficult for users to examine and can only be
			
 
				-cracked by special tools, themselves poorly understood by most users.
			
 
				-For example, very few people know or understand what's inside
			
 
				-their X.509 certificates.
			
 
				-Most don't even know where in the system to
			
 
				-find them.
			
 
				-Therefore, they have no idea what they are trusting, and why, and
			
 
				-are powerless to change their trust relationships.
			
 
				-Textual, centrally stored and managed keys are easier to use and safer.
			
 
				-<br>&#32;<br>
			
 
				-Plan 9 has historically represented databases as attribute/value pairs,
			
 
				-since they are a good foundation for selection and projection operations.
			
 
				-<TT>Factotum</TT>
			
 
				-therefore represents
			
 
				-the keys in the format
			
 
				-<I>attribute</I><TT>=</TT><I>value</I><TT>,
			
 
				-where
			
 
				-</TT><I>attribute</I><TT>
			
 
				-is an identifier, possibly with a single-character prefix, and
			
 
				-</TT><I>value</I><TT>
			
 
				-is an arbitrary quoted string.
			
 
				-The pairs themselves are separated by white space.
			
 
				-For example, a Plan 9 key and an APOP key
			
 
				-might be represented like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-dom=bell-labs.com proto=p9sk1 user=gre
			
 
				-	!password='don''t tell'
			
 
				-proto=apop server=x.y.com user=gre
			
 
				-	!password='open sesame'
			
 
				-</PRE></TT></DL>
			
 
				-If a value is empty or contains white space or single quotes, it must be quoted;
			
 
				-quotes are represented by doubled single quotes.
			
 
				-Attributes that begin with an exclamation mark
			
 
				-(</TT><TT>!</TT><TT>)
			
 
				-are considered
			
 
				-</TT><I>secret</I><TT>.
			
 
				-</TT><TT>Factotum</TT><TT>
			
 
				-will never let a secret value escape its address space
			
 
				-and will suppress keyboard echo when asking the user to type one.
			
 
				-</TT><br>&#32;<br>
			
 
				-A program requesting authentication selects a key
			
 
				-by providing a
			
 
				-<I>query</I>,
			
 
				-a list of elements to be matched by the key.
			
 
				-Each element in the list is either an
			
 
				-<I>attribute</I><TT>=</TT><I>value</I><TT>
			
 
				-pair, which is satisfied by keys with
			
 
				-exactly that pair;
			
 
				-or an attribute followed by a question mark,
			
 
				-</TT><I>attribute</I><TT>?</TT><I>,
			
 
				-which is satisfied by keys with some pair specifying
			
 
				-the attribute.
			
 
				-A key matches a query if every element in the list
			
 
				-is satisfied.
			
 
				-For instance, to select the APOP key in the previous example,
			
 
				-an APOP client process might specify the query
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-server=x.y.com proto=apop
			
 
				-</PRE></TT></DL>
			
 
				-Internally,
			
 
				-</I><TT>factotum</TT><I>'s
			
 
				-APOP module would add the requirements of
			
 
				-having
			
 
				-</I><TT>user</TT><I>
			
 
				-and
			
 
				-</I><TT>!password</TT><I>
			
 
				-attributes, forming the query
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-server=x.y.com proto=apop user? !password?
			
 
				-</PRE></TT></DL>
			
 
				-when searching for an appropriate key.
			
 
				-</I><br>&#32;<br>
			
 
				-<TT>Factotum</TT>
			
 
				-modules expect keys to have some well-known attributes.
			
 
				-For instance, the
			
 
				-<TT>proto</TT>
			
 
				-attribute specifies the protocol module
			
 
				-responsible for using a particular key,
			
 
				-and protocol modules may expect other well-known attributes
			
 
				-(many expect keys to have
			
 
				-<TT>!password</TT>
			
 
				-attributes, for example).
			
 
				-Additional attributes can be used as comments or for
			
 
				-further discrimination without intervention by 
			
 
				-<TT>factotum</TT>;
			
 
				-for example, the APOP and IMAP mail clients conventionally
			
 
				-include a
			
 
				-<TT>server</TT>
			
 
				-attribute to select an appropriate key for authentication.
			
 
				-<br>&#32;<br>
			
 
				-Unlike in SDSI,
			
 
				-keys in Plan 9 have no nested structure.  This design
			
 
				-keeps the representation simple and straightforward.
			
 
				-If necessary, we could add a nested attribute
			
 
				-or, in the manner of relational databases, an attribute that
			
 
				-selects another tuple, but so far the simple design has been sufficient.
			
 
				-<br>&#32;<br>
			
 
				-A simple common structure for all keys makes them easy for users
			
 
				-to administer,
			
 
				-but the set of attributes and their interpretation is still
			
 
				-protocol-specific and can be subtle.
			
 
				-Users may still
			
 
				-need to consult a manual to understand all details.
			
 
				-Many attributes
			
 
				-(<TT>proto</TT>,
			
 
				-<TT>user</TT>,
			
 
				-<TT>password</TT>,
			
 
				-<TT>server</TT>)
			
 
				-are self-explanatory and our short experience
			
 
				-has not uncovered any particular difficulty in handling keys.
			
 
				-Things
			
 
				-will likely get messier, however,
			
 
				-when we grapple with public
			
 
				-keys and their myriad components.
			
 
				-<H4>2.5 Protecting keys
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Secrets must be prevented from escaping
			
 
				-<TT>factotum</TT>.
			
 
				-There are a number of ways they could leak:
			
 
				-another process might be able to debug the agent process, the
			
 
				-agent might swap out to disk, or the process might willingly
			
 
				-disclose the key.
			
 
				-The last is the easiest to avoid:
			
 
				-secret information in a key is marked
			
 
				-as such, and
			
 
				-whenever
			
 
				-<TT>factotum</TT>
			
 
				-prints keys or queries for new
			
 
				-ones, it is careful to avoid displaying secret information.
			
 
				-(The only exception to this is the
			
 
				-``plaintext password'' protocol, which consists
			
 
				-of sending the values of the
			
 
				-<TT>user</TT>
			
 
				-and
			
 
				-<TT>!password</TT>
			
 
				-attributes.
			
 
				-Only keys tagged with
			
 
				-<TT>proto=pass</TT>
			
 
				-can have their passwords disclosed by this mechanism.)
			
 
				-<br>&#32;<br>
			
 
				-Preventing the first two forms of leakage
			
 
				-requires help from the kernel.
			
 
				-In Plan 9, every process is
			
 
				-represented by a directory in the
			
 
				-<TT>/proc</TT>
			
 
				-file system.
			
 
				-Using the files in this directory,
			
 
				-other processes could (with appropriate access permission) examine
			
 
				-<TT>factotum</TT>'s
			
 
				-memory and registers.
			
 
				-<TT>Factotum</TT>
			
 
				-is protected from processes of other users
			
 
				-by the default access bits of its
			
 
				-<TT>/proc</TT>
			
 
				-directory.
			
 
				-However, we'd also like to protect the
			
 
				-agent from other processes owned by the same user,
			
 
				-both to avoid honest mistakes and to prevent
			
 
				-an unattended terminal being
			
 
				-exploited to discover secret passwords.
			
 
				-To do this, we added a control message to
			
 
				-<TT>/proc</TT>
			
 
				-called
			
 
				-<TT>private</TT>.
			
 
				-Once the
			
 
				-<TT>factotum</TT>
			
 
				-process has written
			
 
				-<TT>private</TT>
			
 
				-to its
			
 
				-<TT>/proc/</TT><I>pid</I><TT>/ctl</TT><I>
			
 
				-file, no process can access
			
 
				-</I><TT>factotum</TT><I>'s
			
 
				-memory
			
 
				-through
			
 
				-</I><TT>/proc</TT><I>.
			
 
				-(Plan 9 has no other mechanism, such as
			
 
				-</I><TT>/dev/kmem</TT><I>,
			
 
				-for accessing a process's memory.)
			
 
				-</I><br>&#32;<br>
			
 
				-Similarly, the agent's address space should not be
			
 
				-swapped out, to prevent discovering unencrypted
			
 
				-keys on the swapping media.
			
 
				-The
			
 
				-<TT>noswap</TT>
			
 
				-control message in
			
 
				-<TT>/proc</TT>
			
 
				-prevents this scenario.
			
 
				-Neither
			
 
				-<TT>private</TT>
			
 
				-nor
			
 
				-<TT>noswap</TT>
			
 
				-is specific to
			
 
				-<TT>factotum</TT>.
			
 
				-User-level file servers such as
			
 
				-<TT>dossrv</TT>,
			
 
				-which interprets FAT file systems,
			
 
				-could use
			
 
				-<TT>noswap</TT>
			
 
				-to keep their buffer caches from being
			
 
				-swapped to disk.
			
 
				-<br>&#32;<br>
			
 
				-Despite our precautions, attackers might still
			
 
				-find a way to gain access to a process running as the host
			
 
				-owner on a machine.
			
 
				-Although they could not directly
			
 
				-access the keys, attackers could use the local
			
 
				-<TT>factotum</TT>
			
 
				-to perform authentications for them.
			
 
				-In the case
			
 
				-of some keys, for example those locking bank
			
 
				-accounts, we want a way to disable or at least
			
 
				-detect such access.
			
 
				-That is the role of the
			
 
				-<TT>confirm</TT>
			
 
				-attribute in a key.
			
 
				-Whenever a key with a
			
 
				-<TT>confirm</TT>
			
 
				-attribute is accessed, the local user must
			
 
				-confirm use of the key via a local GUI.
			
 
				-The next section describes the actual mechanism.
			
 
				-<br>&#32;<br>
			
 
				-We have not addressed leaks possible as a result of
			
 
				-someone rebooting or resetting a machine running
			
 
				-<TT>factotum</TT>.
			
 
				-For example, someone could reset a machine
			
 
				-and reboot it with a debugger instead of a kernel,
			
 
				-allowing them to examine the contents of memory
			
 
				-and find keys.  We have not found a satisfactory
			
 
				-solution to this problem.
			
 
				-<H4>2.6 Factotum transactions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-External programs manage
			
 
				-<TT>factotum</TT>'s
			
 
				-internal key state
			
 
				-through its file interface,
			
 
				-writing textual
			
 
				-<TT>key</TT>
			
 
				-and
			
 
				-<TT>delkey</TT>
			
 
				-commands to the
			
 
				-<TT>/mnt/factotum/ctl</TT>
			
 
				-file.
			
 
				-Both commands take a list of attributes as an argument.
			
 
				-<TT>Key</TT>
			
 
				-creates a key with the given attributes, replacing any
			
 
				-extant key with an identical set of public attributes.
			
 
				-<TT>Delkey</TT>
			
 
				-deletes all keys that match the given set of attributes.
			
 
				-Reading the 
			
 
				-<TT>ctl</TT>
			
 
				-file returns a list of keys, one per line, displaying only public attributes.
			
 
				-The following example illustrates these interactions.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /mnt/factotum
			
 
				-% ls -l
			
 
				--lrw------- gre gre 0 Jan 30 22:17 confirm
			
 
				---rw------- gre gre 0 Jan 30 22:17 ctl
			
 
				--lr-------- gre gre 0 Jan 30 22:17 log
			
 
				--lrw------- gre gre 0 Jan 30 22:17 needkey
			
 
				---r--r--r-- gre gre 0 Jan 30 22:17 proto
			
 
				---rw-rw-rw- gre gre 0 Jan 30 22:17 rpc
			
 
				-% cat &gt;ctl
			
 
				-key dom=bell-labs.com proto=p9sk1 user=gre
			
 
				-    !password='don''t tell'
			
 
				-key proto=apop server=x.y.com user=gre
			
 
				-    !password='bite me'
			
 
				-^D
			
 
				-% cat ctl
			
 
				-key dom=bell-labs.com proto=p9sk1 user=gre
			
 
				-key proto=apop server=x.y.com user=gre
			
 
				-% echo 'delkey proto=apop' &gt;ctl
			
 
				-% cat ctl
			
 
				-key dom=bell-labs.com proto=p9sk1 user=gre
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-(A file with the
			
 
				-<TT>l</TT>
			
 
				-bit set can be opened by only one process at a time.)
			
 
				-<br>&#32;<br>
			
 
				-The heart of the interface is the
			
 
				-<TT>rpc</TT>
			
 
				-file.
			
 
				-Programs authenticate with
			
 
				-<TT>factotum</TT>
			
 
				-by writing a request to the
			
 
				-<TT>rpc</TT>
			
 
				-file
			
 
				-and reading back the reply; this sequence is called an RPC
			
 
				-<I>transaction</I>.
			
 
				-Requests and replies have the same format:
			
 
				-a textual verb possibly followed by arguments,
			
 
				-which may be textual or binary.
			
 
				-The most common reply verb is
			
 
				-<TT>ok</TT>,
			
 
				-indicating success.
			
 
				-An RPC session begins with a
			
 
				-<TT>start</TT>
			
 
				-transaction; the argument is a key query as described
			
 
				-earlier.
			
 
				-Once started, an RPC conversation usually consists of 
			
 
				-a sequence of
			
 
				-<TT>read</TT>
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-transactions.
			
 
				-If the conversation is successful, an
			
 
				-<TT>authinfo</TT>
			
 
				-transaction will return information about
			
 
				-the identities learned during the transaction.
			
 
				-The
			
 
				-<TT>attr</TT>
			
 
				-transaction returns a list of attributes for the current
			
 
				-conversation; the list includes any attributes given in
			
 
				-the 
			
 
				-<TT>start</TT>
			
 
				-query as well as any public attributes from keys being used.
			
 
				-<br>&#32;<br>
			
 
				-As an example of the
			
 
				-<TT>rpc</TT>
			
 
				-file in action, consider a mail client
			
 
				-connecting to a mail server and authenticating using
			
 
				-the POP3 protocol's APOP challenge-response command.
			
 
				-n(11There are four programs involved: the mail client 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99, the client
			
 
				-<TT>factotum</TT>
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99, the mail server 11&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99, and the server
			
 
				-<TT>factotum</TT>
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
			
 
				-All authentication computations are handled by the
			
 
				-<TT>factotum</TT>
			
 
				-processes.
			
 
				-The mail programs' role is just to relay messages.
			
 
				-<br>&#32;<br>
			
 
				-At startup, the mail server at
			
 
				-<TT>x.y.com</TT>
			
 
				-begins an APOP conversation
			
 
				-with its
			
 
				-<TT>factotum</TT>
			
 
				-to obtain the banner greeting, which
			
 
				-includes a challenge:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: start proto=apop role=server
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: read
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
			
 
				-</PRE></TT></DL>
			
 
				-Having obtained the challenge, the server greets the client:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
			
 
				-</PRE></TT></DL>
			
 
				-The client then uses an APOP conversation with its
			
 
				-<TT>factotum</TT>
			
 
				-to obtain a response:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: start proto=apop role=client
			
 
				-            server=x.y.com
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: write +OK POP3 &lt;I&gt;challenge&lt;/I&gt;
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: read
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: ok APOP gre &lt;I&gt;response&lt;/I&gt;
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Factotum</TT>
			
 
				-requires that
			
 
				-<TT>start</TT>
			
 
				-requests include a 
			
 
				-<TT>proto</TT>
			
 
				-attribute, and the APOP module requires an additional
			
 
				-<TT>role</TT>
			
 
				-attribute, but the other attributes are optional and only
			
 
				-restrict the key space.
			
 
				-Before responding to the
			
 
				-<TT>start</TT>
			
 
				-transaction, the client
			
 
				-<TT>factotum</TT>
			
 
				-looks for a key to
			
 
				-use for the rest of the conversation.
			
 
				-Because of the arguments in the
			
 
				-<TT>start</TT>
			
 
				-request, the key must have public attributes
			
 
				-<TT>proto=apop</TT>
			
 
				-and
			
 
				-<TT>server=x.y.com</TT>;
			
 
				-as mentioned earlier,
			
 
				-the APOP module additionally requires that the key have
			
 
				-<TT>user</TT>
			
 
				-and
			
 
				-<TT>!password</TT>
			
 
				-attributes.
			
 
				-Now that the client has obtained a response
			
 
				-from its
			
 
				-<TT>factotum</TT>,
			
 
				-it echoes that response to the server:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: APOP gre &lt;I&gt;response&lt;/I&gt;
			
 
				-</PRE></TT></DL>
			
 
				-Similarly, the server passes this message to
			
 
				-its
			
 
				-<TT>factotum</TT>
			
 
				-and obtains another to send back.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: write APOP gre &lt;I&gt;response&lt;/I&gt;
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: read
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok +OK welcome
			
 
				-
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: +OK welcome
			
 
				-</PRE></TT></DL>
			
 
				-Now the authentication protocol is done, and
			
 
				-the server can retrieve information
			
 
				-about what the protocol established.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: authinfo
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99: ok client=gre
			
 
				-            capability=<I>capability</I>
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>authinfo</TT>
			
 
				-data is a list of
			
 
				-<I>attr</I><TT>=</TT><I>value</I><TT>
			
 
				-pairs, here a client user name and a capability.
			
 
				-(Protocols that establish shared secrets or provide
			
 
				-mutual authentication indicate this by adding
			
 
				-appropriate
			
 
				-</TT><I>attr</I><TT>=</TT><I>value</I><TT>
			
 
				-pairs.)
			
 
				-The capability can be used by the server to change its
			
 
				-identity to that of the client, as described earlier.
			
 
				-Once it has changed its identity, the server can access and serve
			
 
				-the client's mailbox.
			
 
				-</TT><br>&#32;<br>
			
 
				-Two more files provide hooks for a graphical
			
 
				-<TT>factotum</TT>
			
 
				-control interface.
			
 
				-The first, 
			
 
				-<TT>confirm</TT>,
			
 
				-allows the user detailed control over the use of certain keys.
			
 
				-If a key has a
			
 
				-<TT>confirm=</TT>
			
 
				-attribute, then the user must approve each use of the key.
			
 
				-A separate program with a graphical interface reads from the
			
 
				-<TT>confirm</TT>
			
 
				-file to see when a confirmation is necessary.
			
 
				-The read blocks until a key usage needs to be approved, whereupon
			
 
				-it will return a line of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-confirm tag=1 <I>attributes</I>
			
 
				-</PRE></TT></DL>
			
 
				-requesting permission to use the key with those public attributes.
			
 
				-The graphical interface then prompts the user for approval
			
 
				-and writes back
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-tag=1 answer=yes
			
 
				-</PRE></TT></DL>
			
 
				-(or
			
 
				-<TT>answer=no</TT>).
			
 
				-<br>&#32;<br>
			
 
				-The second file,
			
 
				-<TT>needkey</TT>,
			
 
				-diverts key requests.
			
 
				-In the APOP example, if a suitable key had not been found
			
 
				-during the
			
 
				-<TT>start</TT>
			
 
				-transaction,
			
 
				-<TT>factotum</TT>
			
 
				-would have indicated failure by
			
 
				-returning a response indicating
			
 
				-what key was needed:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;F&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11-&gt;&lt;I&gt;P&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99: needkey proto=apop
			
 
				-    server=x.y.com user? !password?
			
 
				-</PRE></TT></DL>
			
 
				-A typical client would then prompt the user for the desired
			
 
				-key information, create a new key via the
			
 
				-<TT>ctl</TT>
			
 
				-file, and then reissue the 
			
 
				-<TT>start</TT>
			
 
				-request.
			
 
				-If the
			
 
				-<TT>needkey</TT>
			
 
				-file is open,
			
 
				-then instead of failing, the transaction
			
 
				-will block, and the next read from the
			
 
				-<TT>/mnt/factotum/needkey</TT>
			
 
				-file will return a line of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-needkey tag=1 <I>attributes</I><I>
			
 
				-</PRE></TT></DL>
			
 
				-The graphical interface then prompts the user for the needed
			
 
				-key information, creates the key via the
			
 
				-</I><TT>ctl</TT><I>
			
 
				-file, and writes back
			
 
				-</I><TT>tag=1</TT><I>
			
 
				-to resume the transaction.
			
 
				-</I><br>&#32;<br>
			
 
				-The remaining files are informational and used for debugging.
			
 
				-The
			
 
				-<TT>proto</TT>
			
 
				-file contains a list of supported protocols (to see what protocols the
			
 
				-system supports,
			
 
				-<TT>cat</TT>
			
 
				-<TT>/mnt/factotum/proto</TT>),
			
 
				-and the
			
 
				-<TT>log</TT>
			
 
				-file contains a log of operations and debugging output
			
 
				-enabled by a
			
 
				-<TT>debug</TT>
			
 
				-control message.
			
 
				-<br>&#32;<br>
			
 
				-The next few sections explain how
			
 
				-<TT>factotum</TT>
			
 
				-is used by system services.
			
 
				-<H4>3 Authentication in 9P
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Plan 9 uses a remote file access protocol, 9P
			
 
				-[Pike93],
			
 
				-to connect to resources such as the
			
 
				-file server and remote processes.
			
 
				-The original design for 9P included special messages at the start of a conversation
			
 
				-to authenticate the user.
			
 
				-Multiple users can share a single connection, such as when a CPU server
			
 
				-runs processes for many users connected to a single file server,
			
 
				-but each must authenticate separately.
			
 
				-The authentication protocol, similar to that of Kerberos
			
 
				-[Stei88],
			
 
				-used a sequence of messages passed between client, file server, and authentication
			
 
				-server to verify the identities of the user, calling machine, and serving machine.
			
 
				-One major drawback to the design was that the authentication method was defined by 9P
			
 
				-itself and could not be changed.  
			
 
				-Moreover, there was no mechanism to relegate
			
 
				-authentication to an external (trusted) agent,
			
 
				-so a process implementing 9P needed, besides support for file service,
			
 
				-a substantial body of cryptographic code to implement a handful of startup messages
			
 
				-in the protocol.
			
 
				-<br>&#32;<br>
			
 
				-A recent redesign of 9P
			
 
				-addressed a number of file service issues outside the scope of this paper.
			
 
				-On issues of authentication, there were two goals:
			
 
				-first, to remove details about authentication from the
			
 
				-protocol itself; second, to allow an external program to execute the authentication
			
 
				-part of the protocol.
			
 
				-In particular, we wanted a way to quickly incorporate
			
 
				-ideas found in other systems such as SFS
			
 
				-[Mazi99].
			
 
				-<br>&#32;<br>
			
 
				-Since 9P is a file service protocol, the solution involved creating a new type of file
			
 
				-to be served: an
			
 
				-<I>authentication</I>
			
 
				-<I>file</I>.
			
 
				-Connections to a 9P service begin in a state that
			
 
				-allows no general file access but permits the client
			
 
				-to open an authentication file
			
 
				-by sending a special message, generated by the new
			
 
				-<TT>fauth</TT>
			
 
				-system call:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-afd = fauth(int fd, char *servicename);
			
 
				-</PRE></TT></DL>
			
 
				-Here
			
 
				-<TT>fd</TT>
			
 
				-is the user's file descriptor for the established network connection to the 9P server
			
 
				-and
			
 
				-<TT>servicename</TT>
			
 
				-is the name of the desired service offered on that server, typically the file subsystem
			
 
				-to be accessed.
			
 
				-The returned file descriptor,
			
 
				-<TT>afd</TT>,
			
 
				-is a unique handle representing the authentication file
			
 
				-created for this connection to authenticate to
			
 
				-this service; it is analogous to a capability.
			
 
				-The authentication file represented by
			
 
				-<TT>afd</TT>
			
 
				-is not otherwise addressable on the server, such as through
			
 
				-the file name hierarchy.
			
 
				-In all other respects, it behaves like a regular file;
			
 
				-most important, it accepts standard read and write operations.
			
 
				-<br>&#32;<br>
			
 
				-To prove its identity, the user process (via
			
 
				-<TT>factotum</TT>)
			
 
				-executes the authentication protocol,
			
 
				-described in the next section of this paper,
			
 
				-over the
			
 
				-<TT>afd</TT>
			
 
				-file descriptor with ordinary reads and writes.
			
 
				-When client and server have successfully negotiated, the authentication file
			
 
				-changes state so it can be used as evidence of authority in
			
 
				-<TT>mount</TT>.
			
 
				-<br>&#32;<br>
			
 
				-Once identity is established, the process presents the (now verified)
			
 
				-<TT>afd</TT>
			
 
				-as proof of identity to the
			
 
				-<TT>mount</TT>
			
 
				-system call:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount(int fd, int afd, char *mountpoint,
			
 
				-      int flag, char *servicename)
			
 
				-</PRE></TT></DL>
			
 
				-If the
			
 
				-<TT>mount</TT>
			
 
				-succeeds, the user now
			
 
				-has appropriate permissions for the file hierarchy made
			
 
				-visible at the mount point.
			
 
				-<br>&#32;<br>
			
 
				-This sequence of events has several advantages.
			
 
				-First, the actual authentication protocol is implemented using regular reads and writes,
			
 
				-not special 9P messages, so
			
 
				-they can be processed, forwarded, proxied, and so on by
			
 
				-any 9P agent without special arrangement.
			
 
				-Second, the business of negotiating the authentication by reading and writing the
			
 
				-authentication file can be delegated to an outside agent, in particular
			
 
				-<TT>factotum</TT>;
			
 
				-the programs that implement the client and server ends of a 9P conversation need
			
 
				-no authentication or cryptographic code.
			
 
				-Third,
			
 
				-since the authentication protocol is not defined by 9P itself, it is easy to change and
			
 
				-can even be negotiated dynamically.
			
 
				-Finally, since
			
 
				-<TT>afd</TT>
			
 
				-acts like a capability, it can be treated like one:
			
 
				-handed to another process to give it special permissions;
			
 
				-kept around for later use when authentication is again required;
			
 
				-or closed to make sure no other process can use it.
			
 
				-<br>&#32;<br>
			
 
				-All these advantages stem from moving the authentication negotiation into
			
 
				-reads and writes on a separate file.
			
 
				-As is often the case in Plan 9,
			
 
				-making a resource (here authentication) accessible with a file-like interface
			
 
				-reduces
			
 
				-<I>a</I>
			
 
				-<I>priori</I>
			
 
				-the need for special interfaces.
			
 
				-<br>&#32;<br>
			
 
				-<H4>3.1 Plan 9 shared key protocol
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In addition to the various standard protocols supported by
			
 
				-<TT>factotum</TT>,
			
 
				-we use a shared key protocol for native
			
 
				-Plan 9 authentication.
			
 
				-This protocol provides backward compatibility with
			
 
				-older versions of the system.  One reason for the new
			
 
				-architecture is to let us replace such protocols
			
 
				-in the near future with more cryptographically secure ones.
			
 
				-<br>&#32;<br>
			
 
				-<I>P9sk1</I>
			
 
				-is a shared key protocol that uses tickets much like those
			
 
				-in the original Kerberos.
			
 
				-The difference is that we've
			
 
				-replaced the expiration time in Kerberos tickets with
			
 
				-a random nonce parameter and a counter.
			
 
				-We summarize it here:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
			
 
				-
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,n(99
			
 
				-n(11         11&lt;I&gt;factotum&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
			
 
				-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
			
 
				-
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
			
 
				-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
			
 
				-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
			
 
				-</PRE></TT></DL>
			
 
				-n(11(Here 11&lt;I&gt;K&lt;/I&gt;{&lt;I&gt;x&lt;/I&gt;}n(99 indicates 11&lt;I&gt;x&lt;/I&gt;n(99 encrypted with
			
 
				-n(11DES key 11&lt;I&gt;K&lt;/I&gt;n(99.)
			
 
				-The first two messages exchange nonces and server identification.
			
 
				-After this initial exchange, the client contacts the authentication
			
 
				-server to obtain a pair of encrypted tickets, one encrypted with
			
 
				-the client key and one with the server key.
			
 
				-The client relays the server ticket to the server.
			
 
				-The server believes that the ticket is new
			
 
				-because it contains
			
 
				-n(1111&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
			
 
				-and that the ticket is from the authentication
			
 
				-n(11server because it is encrypted in the server key 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
			
 
				-The ticket is basically a statement from the authentication
			
 
				-n(11server that now 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99 and 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99 share a
			
 
				-n(11secret 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99.
			
 
				-n(11The authenticator 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
			
 
				-n(11convinces the server that the client knows 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99 and thus
			
 
				-n(11must be 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99.
			
 
				-n(11Similarly, authenticator 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;counter&lt;/I&gt;}n(99
			
 
				-n(11convinces the client that the server knows 11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11n(99 and thus
			
 
				-n(11must be 11&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99.
			
 
				-Tickets can be reused, without contacting the authentication
			
 
				-server again, by incrementing the counter before each
			
 
				-authenticator is generated.
			
 
				-<br>&#32;<br>
			
 
				-In the future we hope to introduce a public key version of
			
 
				-p9sk1,
			
 
				-which would allow authentication even
			
 
				-when the authentication server is not available.
			
 
				-<H4>3.2 The authentication server
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Each Plan 9 security domain has an authentication server (AS)
			
 
				-that all users trust to keep the complete set of shared keys.
			
 
				-It also offers services for users and administrators to manage the
			
 
				-keys, create and disable accounts, and so on.
			
 
				-It typically runs on
			
 
				-a standalone machine with few other services.
			
 
				-The AS comprises two services,
			
 
				-<TT>keyfs</TT>
			
 
				-and
			
 
				-<TT>authsrv</TT>.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Keyfs</TT>
			
 
				-is a user-level file system that manages an
			
 
				-encrypted database of user accounts.
			
 
				-Each account is represented by a directory containing the
			
 
				-files
			
 
				-<TT>key</TT>,
			
 
				-containing the Plan 9 key for p9sk1;
			
 
				-<TT>secret</TT>
			
 
				-for the challenge/response protocols (APOP, VNC, CHAP, MSCHAP,
			
 
				-CRAM);
			
 
				-<TT>log</TT>
			
 
				-for authentication outcomes;
			
 
				-<TT>expire</TT>
			
 
				-for an expiration time; and
			
 
				-<TT>status</TT>.
			
 
				-If the expiration time passes,
			
 
				-if the number of successive failed authentications
			
 
				-exceeds 50, or if
			
 
				-<TT>disabled</TT>
			
 
				-is written to the status file,
			
 
				-any attempt to access the
			
 
				-<TT>key</TT>
			
 
				-or
			
 
				-<TT>secret</TT>
			
 
				-files will fail.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Authsrv</TT>
			
 
				-is a network service that brokers shared key authentications
			
 
				-for the protocols p9sk1, APOP, VNC, CHAP, MSCHAP,
			
 
				-and CRAM.  Remote users can also call
			
 
				-<TT>authsrv</TT>
			
 
				-to change their passwords.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-p9sk1
			
 
				-protocol was described in the previous
			
 
				-section.
			
 
				-The challenge/response protocols differ
			
 
				-in detail but all follow the general structure:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,n(99
			
 
				-n(11         11&lt;I&gt;hostid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
			
 
				-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11,&lt;I&gt;uid&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;,11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11},n(99
			
 
				-n(11         11&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;S&lt;/I&gt;11}n(99
			
 
				-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;nonce&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11}n(99
			
 
				-</PRE></TT></DL>
			
 
				-The password protocol is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;uid&lt;/I&gt;&lt;I&gt;C&lt;/I&gt;11n(99
			
 
				-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;c&lt;/I&gt;11{&lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11}n(99
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;A&lt;/I&gt;:  &lt;I&gt;K&lt;/I&gt;&lt;I&gt;n&lt;/I&gt;11{&lt;I&gt;password&lt;/I&gt;&lt;I&gt;old&lt;/I&gt;11,&lt;I&gt;password&lt;/I&gt;&lt;I&gt;new&lt;/I&gt;11}n(99
			
 
				-n(1111&lt;I&gt;A&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;:  &lt;I&gt;OK&lt;/I&gt;n(99
			
 
				-</PRE></TT></DL>
			
 
				-To avoid replay attacks, the pre-encryption
			
 
				-clear text for each of the protocols (as well as for p9sk1) includes
			
 
				-a tag indicating the encryption's role in the
			
 
				-protocol.  We elided them in these outlines.
			
 
				-<H4>3.3 Protocol negotiation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Rather than require particular protocols for particular services,
			
 
				-we implemented a negotiation metaprotocol,
			
 
				-<I>p9any</I>,
			
 
				-which chooses the actual authentication protocol to use.
			
 
				-P9any
			
 
				-is used now by all native services on Plan 9.
			
 
				-<br>&#32;<br>
			
 
				-The metaprotocol is simple.  The callee sends a
			
 
				-null-terminated string of the form:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(11v.11&lt;I&gt;n&lt;/I&gt;n(99 11&lt;I&gt;proto&lt;/I&gt;111n(99@11&lt;I&gt;domain&lt;/I&gt;111n(99 11&lt;I&gt;proto&lt;/I&gt;211n(99@11&lt;I&gt;domain&lt;/I&gt;211n(99 ...
			
 
				-</PRE></TT></DL>
			
 
				-where
			
 
				-<I>n</I>
			
 
				-n(11is a decimal version number, 11&lt;I&gt;proto&lt;/I&gt;&lt;I&gt;k&lt;/I&gt;11n(99
			
 
				-is the name of a protocol for which the
			
 
				-<TT>factotum</TT>
			
 
				-n(11has a key, and 11&lt;I&gt;domain&lt;/I&gt;&lt;I&gt;k&lt;/I&gt;11n(99
			
 
				-is the name of the domain in which the key is
			
 
				-valid.
			
 
				-The caller then responds
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I>proto</I>@<I>domain</I>
			
 
				-</PRE></TT></DL>
			
 
				-indicating its choice.
			
 
				-Finally the callee responds
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-OK
			
 
				-</PRE></TT></DL>
			
 
				-Any other string indicates failure.
			
 
				-At this point the chosen protocol commences.
			
 
				-The final fixed-length reply is used to make it easy to
			
 
				-delimit the I/O stream should the chosen protocol
			
 
				-require the caller rather than the callee to send the first message.
			
 
				-<br>&#32;<br>
			
 
				-With this negotiation metaprotocol, the underlying
			
 
				-authentication protocols used for Plan 9 services
			
 
				-can be changed under any application just
			
 
				-by changing the keys known by the
			
 
				-<TT>factotum</TT>
			
 
				-agents at each end.
			
 
				-<br>&#32;<br>
			
 
				-P9any is vulnerable to man in the middle attacks
			
 
				-to the extent that the attacker may constrain the
			
 
				-possible choices by changing the stream.  However,
			
 
				-we believe this is acceptable since the attacker
			
 
				-cannot force either side to choose algorithms
			
 
				-that it is unwilling to use.
			
 
				-<H4>4 Library Interface to Factotum
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Although programs can access
			
 
				-<TT>factotum</TT>'s
			
 
				-services through its file system interface,
			
 
				-it is more common to use a C library that
			
 
				-packages the interaction.
			
 
				-There are a number of routines in the library,
			
 
				-not all of which are relevant here, but a few
			
 
				-examples should give their flavor.
			
 
				-<br>&#32;<br>
			
 
				-First, consider the problem of mounting a remote file server using 9P.
			
 
				-An earlier discussion showed how the
			
 
				-<TT>fauth</TT>
			
 
				-and
			
 
				-<TT>mount</TT>
			
 
				-system calls use an authentication file,
			
 
				-<TT>afd</TT>,
			
 
				-as a capability,
			
 
				-but not how
			
 
				-<TT>factotum</TT>
			
 
				-manages
			
 
				-<TT>afd</TT>.
			
 
				-The library contains a routine,
			
 
				-<TT>amount</TT>
			
 
				-(authenticated mount), that is used by most programs in preference to
			
 
				-the raw
			
 
				-<TT>fauth</TT>
			
 
				-and
			
 
				-<TT>mount</TT>
			
 
				-calls.
			
 
				-<TT>Amount</TT>
			
 
				-engages
			
 
				-<TT>factotum</TT>
			
 
				-to validate
			
 
				-<TT>afd</TT>;
			
 
				-here is the complete code:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int
			
 
				-amount(int fd, char *mntpt,
			
 
				-	int flags, char *aname)
			
 
				-{
			
 
				-	int afd, ret;
			
 
				-	AuthInfo *ai;
			
 
				-
			
 
				-	afd = fauth(fd, aname);
			
 
				-	if(afd &gt;= 0){
			
 
				-		ai = auth_proxy(afd, amount_getkey,
			
 
				-			"proto=p9any role=client");
			
 
				-		if(ai != NULL)
			
 
				-			auth_freeAI(ai);
			
 
				-	}
			
 
				-	ret = mount(fd, afd, mntpt,
			
 
				-		flags, aname);
			
 
				-	if(afd &gt;= 0)
			
 
				-		close(afd);
			
 
				-	return ret;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-where parameter
			
 
				-<TT>fd</TT>
			
 
				-is a file descriptor returned by
			
 
				-<TT>open</TT>
			
 
				-or
			
 
				-<TT>dial</TT>
			
 
				-for a new connection to a file server.
			
 
				-The conversation with
			
 
				-<TT>factotum</TT>
			
 
				-occurs in the call to
			
 
				-<TT>auth_proxy</TT>,
			
 
				-which specifies, as a key query,
			
 
				-which authentication protocol to use
			
 
				-(here the metaprotocol
			
 
				-<TT>p9any</TT>)
			
 
				-and the role being played
			
 
				-(<TT>client</TT>).
			
 
				-<TT>Auth_proxy</TT>
			
 
				-will read and write the
			
 
				-<TT>factotum</TT>
			
 
				-files, and the authentication file descriptor
			
 
				-<TT>afd</TT>,
			
 
				-to validate the user's right to access the service.
			
 
				-If the call is successful, any auxiliary data, held in an
			
 
				-<TT>AuthInfo</TT>
			
 
				-structure, is freed.
			
 
				-In any case, the
			
 
				-<TT>mount</TT>
			
 
				-is then called with the (perhaps validated)
			
 
				-<TT>afd.</TT>
			
 
				-A 9P server can cause the
			
 
				-<TT>fauth</TT>
			
 
				-system call to fail, as an indication that authentication is
			
 
				-not required to access the service.
			
 
				-<br>&#32;<br>
			
 
				-The second argument to
			
 
				-<TT>auth_proxy</TT>
			
 
				-is a function, here
			
 
				-<TT>amount_getkey</TT>,
			
 
				-to be called if secret information such as a password or
			
 
				-response to a challenge is required as part of the authentication.
			
 
				-This function, of course, will provide this data to
			
 
				-<TT>factotum</TT>
			
 
				-as a
			
 
				-<TT>key</TT>
			
 
				-message on the
			
 
				-<TT>/mnt/factotum/ctl</TT>
			
 
				-file.
			
 
				-<br>&#32;<br>
			
 
				-Although the final argument to
			
 
				-<TT>auth_proxy</TT>
			
 
				-in this example is a simple string, in general
			
 
				-it can be a formatted-print specifier in the manner of
			
 
				-<TT>printf</TT>,
			
 
				-to enable the construction of more elaborate key queries.
			
 
				-<br>&#32;<br>
			
 
				-As another example, consider the Plan 9
			
 
				-<TT>cpu</TT>
			
 
				-service, which exports local devices to a shell process on
			
 
				-a remote machine, typically
			
 
				-to connect the local screen and keyboard to a more powerful computer.
			
 
				-At heart,
			
 
				-<TT>cpu</TT>
			
 
				-is a superset of a service called
			
 
				-<TT>exportfs</TT>
			
 
				-[Pike93],
			
 
				-which allows one machine to see an arbitrary portion of the file name space
			
 
				-of another machine, such as to
			
 
				-export the network device to another machine
			
 
				-for gatewaying.
			
 
				-However,
			
 
				-<TT>cpu</TT>
			
 
				-is not just
			
 
				-<TT>exportfs</TT>
			
 
				-because it also delivers signals such as interrupt
			
 
				-and negotiates the initial environment
			
 
				-for the remote shell.
			
 
				-<br>&#32;<br>
			
 
				-To authenticate an instance of
			
 
				-<TT>cpu</TT>
			
 
				-requires
			
 
				-<TT>factotum</TT>
			
 
				-processes on both ends: the local, client
			
 
				-end running as the user on a terminal
			
 
				-and the remote, server
			
 
				-end running as the host owner of the server machine.
			
 
				-Here is schematic code for the two ends:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/* client */
			
 
				-int
			
 
				-p9auth(int fd)
			
 
				-{
			
 
				-	AuthInfo *ai;
			
 
				-
			
 
				-	ai = auth_proxy(fd, auth_getkey,
			
 
				-		"proto=p9any role=client");
			
 
				-	if(ai == NULL)
			
 
				-		return -1;
			
 
				-
			
 
				-	/* start cpu protocol here */
			
 
				-}
			
 
				-
			
 
				-/* server */
			
 
				-int
			
 
				-srvp9auth(int fd, char *user)
			
 
				-{
			
 
				-	AuthInfo *ai;
			
 
				-
			
 
				-	ai = auth_proxy(fd, NULL,
			
 
				-		"proto=p9any role=server");
			
 
				-	if(ai == NULL)
			
 
				-		return -1;
			
 
				-	/* set user id for server process */
			
 
				-	if(auth_chuid(ai, NULL) &lt; 0)
			
 
				-		return -1;
			
 
				-
			
 
				-	/* start cpu protocol here */
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Auth_chuid</TT>
			
 
				-encapsulates the negotiation to change a user id using the
			
 
				-<TT>caphash</TT>
			
 
				-and
			
 
				-<TT>capuse</TT>
			
 
				-files of the (server) kernel.
			
 
				-Note that although the client process may ask the user for new keys, using
			
 
				-<TT>auth_getkey</TT>,
			
 
				-the server machine, presumably a shared machine with a pseudo-user for
			
 
				-the host owner, sets the key-getting function to
			
 
				-<TT>NULL</TT>.
			
 
				-<H4>5 Secure Store
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Factotum</TT>
			
 
				-keeps its keys in volatile memory, which must somehow be
			
 
				-initialized at boot time.
			
 
				-Therefore,
			
 
				-<TT>factotum</TT>
			
 
				-must be
			
 
				-supplemented by a persistent store, perhaps
			
 
				-a floppy disk containing a key file of commands to be copied into
			
 
				-<TT>/mnt/factotum/ctl</TT>
			
 
				-during bootstrap.
			
 
				-But removable media are a nuisance to carry and
			
 
				-are vulnerable to theft.
			
 
				-Keys could be stored encrypted on a shared file system, but
			
 
				-only if those keys are not necessary for authenticating to
			
 
				-the file system in the first place.
			
 
				-Even if the keys are encrypted under a user
			
 
				-password, a thief might well succeed with a dictionary attack.
			
 
				-Other risks of local storage are loss of the contents
			
 
				-through mechanical mishap or dead batteries.
			
 
				-Thus for convenience and
			
 
				-safety we provide a
			
 
				-<TT>secstore</TT>
			
 
				-(secure store) server in the network to hold each user's permanent list of keys, a
			
 
				-<I>key</I>
			
 
				-<I>file</I>.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Secstore</TT>
			
 
				-is a file server for encrypted data,
			
 
				-used only during bootstrapping.
			
 
				-It must provide strong
			
 
				-authentication and resistance to passive and active protocol attacks
			
 
				-while assuming nothing more from the client than a password.
			
 
				-Once
			
 
				-<TT>factotum</TT>
			
 
				-has loaded the key file, further encrypted or authenticated
			
 
				-file storage can be accomplished by standard mechanisms.
			
 
				-<br><img src="-.19111514.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-The cryptographic technology that enables
			
 
				-<TT>secstore</TT>
			
 
				-is a form of encrypted
			
 
				-key exchange
			
 
				-called PAK
			
 
				-[Boyk00],
			
 
				-analogous to
			
 
				-EKE
			
 
				-[Bell93],
			
 
				-SRP
			
 
				-[Wu98],
			
 
				-or
			
 
				-SPEKE
			
 
				-[Jabl].
			
 
				-PAK was chosen
			
 
				-because it comes with a proof of equivalence in strength to
			
 
				-Diffie-Hellman; subtle flaws in some earlier encrypted key exchange
			
 
				-protocols and implementations have encouraged us to take special care.
			
 
				-In outline, the PAK protocol is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;: &lt;I&gt;C&lt;/I&gt;,&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;x&lt;/I&gt;11&lt;I&gt;H&lt;/I&gt;n(99
			
 
				-n(1111&lt;I&gt;S&lt;/I&gt;-&gt;&lt;I&gt;C&lt;/I&gt;: &lt;I&gt;S&lt;/I&gt;,&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;y&lt;/I&gt;11,&lt;I&gt;hash&lt;/I&gt;(&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;xy&lt;/I&gt;11,&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;)n(99
			
 
				-n(1111&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;: &lt;I&gt;hash&lt;/I&gt;(&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;xy&lt;/I&gt;11,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;C&lt;/I&gt;)n(99
			
 
				-</PRE></TT></DL>
			
 
				-n(11where 11&lt;I&gt;H&lt;/I&gt;n(99 is a preshared secret between client 11&lt;I&gt;C&lt;/I&gt;n(99 and server 11&lt;I&gt;S&lt;/I&gt;n(99.
			
 
				-There are several variants of PAK, all presented in papers
			
 
				-mainly concerned with proofs of cryptographic properties.
			
 
				-To aid implementers, we have distilled a description of the specific
			
 
				-version we use into an Appendix to this paper.
			
 
				-The Plan 9 open source license provides for use of Lucent's
			
 
				-encrypted key exchange patents in this context.
			
 
				-<br>&#32;<br>
			
 
				-As a further layer of defense against password theft,
			
 
				-n(11we provide (within the encrypted channel 11&lt;I&gt;C&lt;/I&gt;-&gt;&lt;I&gt;S&lt;/I&gt;n(99)
			
 
				-information that is validated at a RADIUS server,
			
 
				-such as the digits from a hardware token
			
 
				-[RFC2138].
			
 
				-This provides two-factor authentication, which potentially
			
 
				-requires tricking two independent administrators in any attack by
			
 
				-social engineering.
			
 
				-<br>&#32;<br>
			
 
				-The key file stored on the server is encrypted with AES (Rijndael) using CBC
			
 
				-with a 10-byte initialization vector and trailing authentication padding.
			
 
				-All this is invisible to the user of
			
 
				-<TT>secstore</TT>.
			
 
				-For that matter, it is invisible to the
			
 
				-<TT>secstore</TT>
			
 
				-server as well;
			
 
				-if the AES Modes of Operation are standardized and a new encryption format
			
 
				-designed, it can be implemented by a client without change to the server.
			
 
				-The
			
 
				-<TT>secstore</TT>
			
 
				-is deliberately not backed up;  the user is expected to
			
 
				-use more than one
			
 
				-<TT>secstore</TT>
			
 
				-or save the key file on removable media
			
 
				-and lock it away.
			
 
				-n(11The user's password is hashed to create the 11&lt;I&gt;H&lt;/I&gt;n(99 used
			
 
				-in the PAK protocol;  a different hash of the password is used as
			
 
				-the file encryption key.
			
 
				-Finally, there is a command (inside the authenticated,
			
 
				-encrypted channel between client and
			
 
				-<TT>secstore</TT>)
			
 
				-to change passwords by sending
			
 
				-n(11a new 11&lt;I&gt;H&lt;/I&gt;n(99; 
			
 
				-for consistency, the client process must at the same time fetch and re-encrypt all files.
			
 
				-<br>&#32;<br>
			
 
				-When
			
 
				-<TT>factotum</TT>
			
 
				-starts, it dials the local
			
 
				-<TT>secstore</TT>
			
 
				-and checks whether the user has an account.
			
 
				-If so,
			
 
				-it prompts for the user's
			
 
				-<TT>secstore</TT>
			
 
				-password and fetches the key file.
			
 
				-The PAK protocol
			
 
				-ensures mutual authentication and prevents dictionary attacks on the password
			
 
				-by passive wiretappers or active intermediaries.
			
 
				-Passwords saved in
			
 
				-the key file can be long random strings suitable for
			
 
				-simpler challenge/response authentication protocols.
			
 
				-Thus the user need only remember
			
 
				-a single, weaker password to enable strong, ``single sign on'' authentication to
			
 
				-unchanged legacy applications scattered across multiple authentication domains.
			
 
				-<H4>6 Transport Layer Security
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Since the Plan 9 operating system is designed for use in network elements
			
 
				-that must withstand direct attack, unguarded by firewall or VPN, we seek
			
 
				-to ensure that all applications use channels with appropriate mutual
			
 
				-authentication and encryption.
			
 
				-A principal tool for this is TLS 1.0
			
 
				-[RFC2246].
			
 
				-(TLS 1.0 is nearly the same as SSL 3.0,
			
 
				-and our software is designed to interoperate
			
 
				-with implementations of either standard.)
			
 
				-<br>&#32;<br>
			
 
				-TLS defines a record layer protocol for message integrity and privacy
			
 
				-through the use of message digesting and encryption with shared secrets.
			
 
				-We implement this service as a kernel device, though it could
			
 
				-be performed at slightly higher cost by invoking a separate program.
			
 
				-The library interface to the TLS kernel device is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int pushtls(int fd, char *hashalg,
			
 
				-    char *cryptalg, int isclient,
			
 
				-    char *secret, char *dir);
			
 
				-</PRE></TT></DL>
			
 
				-Given a file descriptor, the names of message digest and
			
 
				-encryption algorithms, and the shared secret,
			
 
				-<TT>pushtls</TT>
			
 
				-returns a new file descriptor for the encrypted connection.
			
 
				-(The final argument
			
 
				-<TT>dir</TT>
			
 
				-receives the name of the directory in the TLS device that
			
 
				-is associated with the new connection.)
			
 
				-The function is named by analogy with the ``push'' operation
			
 
				-supported by the stream I/O system of Research Unix and the
			
 
				-first two editions of Plan 9.
			
 
				-Because adding encryption is as simple as replacing one
			
 
				-file descriptor with another, adding encryption to a particular
			
 
				-network service is usually trivial.
			
 
				-<br>&#32;<br>
			
 
				-The Plan 9 shared key authentication protocols establish a shared 56-bit secret
			
 
				-as a side effect.
			
 
				-Native Plan 9 network services such as
			
 
				-<TT>cpu</TT>
			
 
				-and
			
 
				-<TT>exportfs</TT>
			
 
				-use these protocols for authentication and then invoke 
			
 
				-<TT>pushtls</TT>
			
 
				-with the shared secret.
			
 
				-<br>&#32;<br>
			
 
				-Above the record layer, TLS specifies a handshake protocol using public keys
			
 
				-to establish the session secret.
			
 
				-This protocol is widely used with HTTP and IMAP4
			
 
				-to provide server authentication, though with client certificates it could provide
			
 
				-mutual authentication.  The library function
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int tlsClient(int fd, TLSconn *conn)
			
 
				-</PRE></TT></DL>
			
 
				-handles the initial handshake and returns the result of
			
 
				-<TT>pushtls</TT>.
			
 
				-On return, it fills the
			
 
				-<TT>conn</TT>
			
 
				-structure with the session ID used
			
 
				-and the X.509 certificate presented by the
			
 
				-server, but makes no effort to verify the certificate.
			
 
				-Although the original design intent of X.509 certificates expected
			
 
				-that they would be used with a Public Key Infrastructure,
			
 
				-reliable deployment has been so long delayed and problematic
			
 
				-that we have adopted the simpler policy of just using the
			
 
				-X.509 certificate as a representation of the public key,
			
 
				-depending on a locally-administered directory of SHA1 thumbprints
			
 
				-to allow applications to decide which public keys to trust
			
 
				-for which purposes.
			
 
				-<H4>7 Related Work and Discussion
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Kerberos, one of the earliest distributed authentication
			
 
				-systems, keeps a set of authentication tickets in a temporary file called
			
 
				-a ticket cache.  The ticket cache is protected by Unix file permissions.
			
 
				-An environment variable containing the file name of the ticket cache
			
 
				-allows for different ticket caches in different simultaneous login sessions.
			
 
				-A user logs in by typing his or her Kerberos password.
			
 
				-The login program uses the Kerberos password to obtain a temporary
			
 
				-ticket-granting ticket from the authentication server, initializes the
			
 
				-ticket cache with the ticket-granting ticket, and then forgets the password.
			
 
				-Other applications can use the ticket-granting ticket to sign tickets
			
 
				-for themselves on behalf of the user during the login session.
			
 
				-The ticket cache is removed when the user logs out
			
 
				-[Stei88].
			
 
				-The ticket cache relieves the user from typing a password
			
 
				-every time authentication is needed.
			
 
				-<br>&#32;<br>
			
 
				-The secure shell SSH develops this idea further, replacing the
			
 
				-temporary file with a named Unix domain socket connected to
			
 
				-a user-level program, called an agent.
			
 
				-Once the SSH agent is started and initialized with one or
			
 
				-more RSA private keys, SSH clients can employ it
			
 
				-to perform RSA authentications on their behalf.
			
 
				-In the absence of an agent, SSH typically uses RSA keys
			
 
				-read from encrypted disk files or uses passphrase-based
			
 
				-authentication, both of which would require prompting the user
			
 
				-for a passphrase whenever authentication is needed
			
 
				-[Ylon96].
			
 
				-The self-certifying file system SFS uses a similar agent
			
 
				-[Kami00],
			
 
				-not only for moderating the use of client authentication keys 
			
 
				-but also for verifying server public keys
			
 
				-[Mazi99].
			
 
				-<br>&#32;<br>
			
 
				-<TT>Factotum</TT>
			
 
				-is a logical continuation of this evolution,
			
 
				-replacing the program-specific SSH or SFS agents with
			
 
				-a general agent capable of serving a wide variety of programs.
			
 
				-Having one agent for all programs removes the need
			
 
				-to have one agent for each program.
			
 
				-It also allows the programs themselves to be protocol-agnostic,
			
 
				-so that, for example, one could build an SSH workalike
			
 
				-capable of using any protocol supported by
			
 
				-<TT>factotum</TT>,
			
 
				-without that program knowing anything about the protocols.
			
 
				-Traditionally each program needs to implement each
			
 
				-n(11authentication protocol for itself, an 11&lt;I&gt;O&lt;/I&gt;(&lt;I&gt;n&lt;/I&gt;^211)n(99 coding
			
 
				-problem that
			
 
				-<TT>factotum</TT>
			
 
				-n(11reduces to 11&lt;I&gt;O&lt;/I&gt;(&lt;I&gt;n&lt;/I&gt;)n(99.
			
 
				-<br>&#32;<br>
			
 
				-Previous work on agents has concentrated on their use by clients
			
 
				-authenticating to servers.
			
 
				-Looking in the other direction, Sun Microsystem's 
			
 
				-pluggable authentication module (PAM) is one
			
 
				-of the earliest attempts to 
			
 
				-provide a general authentication mechanism for Unix-like 
			
 
				-operating systems
			
 
				-[Sama96].
			
 
				-Without a central authority like PAM, system policy is tied
			
 
				-up in the various implementations of network services.
			
 
				-For example, on a typical Unix, if a system administrator
			
 
				-decides not to allow plaintext passwords for authentication,
			
 
				-the configuration files for a half dozen different servers &#173;
			
 
				-<TT>rlogind</TT>,
			
 
				-<TT>telnetd</TT>,
			
 
				-<TT>ftpd</TT>,
			
 
				-<TT>sshd</TT>,
			
 
				-and so on &#173;
			
 
				-need to be edited.
			
 
				-PAM solves this problem by hiding the details of a given
			
 
				-authentication mechanism behind a common library interface.
			
 
				-Directed by a system-wide configuration file,
			
 
				-an application selects a particular authentication mechanism
			
 
				-by dynamically loading the appropriate shared library.
			
 
				-PAM is widely used on Sun's Solaris and some Linux distributions.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Factotum</TT>
			
 
				-achieves the same goals
			
 
				-using the agent approach.
			
 
				-<TT>Factotum</TT>
			
 
				-is the only process that needs to create
			
 
				-capabilities, so all the network servers can run as 
			
 
				-untrusted users (e.g.,
			
 
				-Plan 9's
			
 
				-<TT>none</TT>
			
 
				-or Unix's
			
 
				-<TT>nobody</TT>),
			
 
				-which greatly reduces the harm done if a server is buggy
			
 
				-and is compromised.
			
 
				-In fact, if
			
 
				-<TT>factotum</TT>
			
 
				-were implemented on Unix along with
			
 
				-an analogue to the Plan 9 capability device, venerable
			
 
				-programs like
			
 
				-<TT>su</TT>
			
 
				-and
			
 
				-<TT>login</TT>
			
 
				-would no longer need to be installed ``setuid root.''
			
 
				-<br>&#32;<br>
			
 
				-Several other systems, such as Password Safe [Schn],
			
 
				-store multiple passwords in an encrypted file,
			
 
				-so that the user only needs to remember one password.
			
 
				-Our
			
 
				-<TT>secstore</TT>
			
 
				-solution differs from these by placing the storage in
			
 
				-a hardened location in the network, so that the encrypted file is
			
 
				-less liable to be stolen for offline dictionary attack and so that
			
 
				-it is available even when a user has several computers.
			
 
				-In contrast, Microsoft's Passport system
			
 
				-[Micr]
			
 
				-keeps credentials in
			
 
				-the network, but centralized at one extremely-high-value target.
			
 
				-The important feature of Passport, setting up trust relationships
			
 
				-with e-merchants, is outside our scope.
			
 
				-The
			
 
				-<TT>secstore</TT>
			
 
				-architecture is almost identical to
			
 
				-Perlman and Kaufman's
			
 
				-[Perl99]
			
 
				-but with newer EKE technology.
			
 
				-Like them, we chose to defend mainly against outside attacks
			
 
				-on
			
 
				-<TT>secstore</TT>;
			
 
				-if additional defense of the files on the server
			
 
				-itself is desired, one can use distributed techniques
			
 
				-[Ford00].
			
 
				-<br>&#32;<br>
			
 
				-We made a conscious choice of placing encryption, message integrity,
			
 
				-and key management at the application layer
			
 
				-(TLS, just above layer 4) rather than at layer 3, as in IPsec.
			
 
				-This leads to a simpler structure for the network stack, easier
			
 
				-integration with applications and, most important, easier network
			
 
				-administration since we can recognize which applications are misbehaving
			
 
				-based on TCP port numbers.  TLS does suffer (relative to IPsec) from
			
 
				-the possibility of forged TCP Reset, but we feel that this is adequately
			
 
				-dealt with by randomized TCP sequence numbers.
			
 
				-In contrast with other TLS libraries, Plan 9 does not
			
 
				-require the application to change
			
 
				-<TT>write</TT>
			
 
				-calls to
			
 
				-<TT>sslwrite</TT>
			
 
				-but simply to add a few lines of code at startup
			
 
				-[Resc01].
			
 
				-<H4>8 Conclusion
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Writing safe code is difficult.
			
 
				-Stack attacks,
			
 
				-mistakes in logic, and bugs in compilers and operating systems
			
 
				-can each make it possible for an attacker
			
 
				-to subvert the intended execution sequence of a
			
 
				-service.
			
 
				-If the server process has the privileges
			
 
				-of a powerful user, such as
			
 
				-<TT>root</TT>
			
 
				-on Unix, then so does the attacker.
			
 
				-<TT>Factotum</TT>
			
 
				-allows us
			
 
				-to constrain the privileged execution to a single
			
 
				-process whose core is a few thousand lines of code.
			
 
				-Verifying such a process, both through manual and automatic means,
			
 
				-is much easier and less error prone
			
 
				-than requiring it of all servers.
			
 
				-<br>&#32;<br>
			
 
				-An implementation of these ideas is in Plan 9 from Bell Labs, Fourth Edition,
			
 
				-freely available from <TT>http://plan9.bell-labs.com/plan9</TT>.
			
 
				-<H4>Acknowledgments
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-William Josephson contributed to the implementation of password changing in
			
 
				-<TT>secstore</TT>.
			
 
				-We thank Phil MacKenzie and Mart&iacute;n Abadi for helpful comments on early parts
			
 
				-of the design.
			
 
				-Chuck Blake,
			
 
				-Peter Bosch,
			
 
				-Frans Kaashoek,
			
 
				-Sape Mullender,
			
 
				-and
			
 
				-Lakshman Y. N.,
			
 
				-predominantly Dutchmen, gave helpful comments on the paper.
			
 
				-Russ Cox is supported by a fellowship from the Fannie and John Hertz Foundation.
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Bell93]
			
 
				-S.M. Bellovin and M. Merritt,
			
 
				-``Augmented Encrypted Key Exchange,''
			
 
				-Proceedings of the 1st ACM Conference on Computer and Communications Security, 1993, pp. 244 - 250.
			
 
				-<br>&#32;<br>
			
 
				-[Boyk00]
			
 
				-Victor Boyko, Philip MacKenzie, and Sarvar Patel,
			
 
				-``Provably Secure Password-Authenticated Key Exchange using Diffie-Hellman,''
			
 
				-Eurocrypt 2000, 156-171.
			
 
				-<br>&#32;<br>
			
 
				-[RFC2246]
			
 
				-T . Dierks and C. Allen,
			
 
				-``The TLS Protocol, Version 1.0,''
			
 
				-RFC 2246.
			
 
				-<br>&#32;<br>
			
 
				-[Ford00]
			
 
				-Warwick Ford and Burton S. Kaliski, Jr.,
			
 
				-``Server-Assisted Generation of a Strong Secret from a Password,''
			
 
				-IEEE Fifth International Workshop on Enterprise Security,
			
 
				-National Institute of Standards and Technology (NIST),
			
 
				-Gaithersburg MD, June 14 - 16, 2000.
			
 
				-<br>&#32;<br>
			
 
				-[Jabl]
			
 
				-David P. Jablon,
			
 
				-``Strong Password-Only Authenticated Key Exchange,''
			
 
				-<TT>http://integritysciences.com/speke97.html</TT>.
			
 
				-<br>&#32;<br>
			
 
				-[Kami00]
			
 
				-Michael Kaminsky.
			
 
				-``Flexible Key Management with SFS Agents,''
			
 
				-Master's Thesis, MIT, May 2000.
			
 
				-<br>&#32;<br>
			
 
				-[Mack]
			
 
				-Philip MacKenzie,
			
 
				-private communication.
			
 
				-<br>&#32;<br>
			
 
				-[Mazi99]
			
 
				-David Mazi&egrave;res, Michael Kaminsky, M. Frans Kaashoek and Emmett Witchel,
			
 
				-``Separating key management from file system security,''
			
 
				-Symposium on Operating Systems Principles, 1999, pp. 124-139.
			
 
				-<br>&#32;<br>
			
 
				-[Micr]
			
 
				-Microsoft Passport,
			
 
				-<TT>http://www.passport.com/</TT>.
			
 
				-<br>&#32;<br>
			
 
				-[Perl99]
			
 
				-Radia Perlman and Charlie Kaufman,
			
 
				-``Secure Password-Based Protocol for Downloading a Private Key,''
			
 
				-Proc. 1999 Network and Distributed System Security Symposium,
			
 
				-Internet Society, January 1999.
			
 
				-<br>&#32;<br>
			
 
				-[Pike95]
			
 
				-Rob Pike, Dave Presotto, Sean Dorward, Bob Flandrena, Ken Thompson, Howard Trickey, and Phil Winterbottom,
			
 
				-``Plan 9 from Bell Labs,''
			
 
				-Computing Systems, <B>8</B>, 3, Summer 1995, pp. 221-254.
			
 
				-<br>&#32;<br>
			
 
				-[Pike93]
			
 
				-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, Phil Winterbottom,
			
 
				-``The Use of Name Spaces in Plan 9,''
			
 
				-Operating Systems Review, <B>27</B>, 2, April 1993, pp. 72-76
			
 
				-(reprinted from Proceedings of the 5th ACM SIGOPS European Workshop,
			
 
				-Mont Saint-Michel, 1992, Paper n&#186; 34).
			
 
				-<br>&#32;<br>
			
 
				-[Resc01]
			
 
				-Eric Rescorla,
			
 
				-``SSL and TLS: Designing and Building Secure Systems,''
			
 
				-Addison-Wesley, 2001. ISBN 0-201-61598-3, p. 387.
			
 
				-<br>&#32;<br>
			
 
				-[RFC2138]
			
 
				-C. Rigney, A. Rubens, W. Simpson, S. Willens,
			
 
				-``Remote Authentication Dial In User Service (RADIUS),''
			
 
				-RFC2138, April 1997.
			
 
				-<br>&#32;<br>
			
 
				-[RiLa]
			
 
				-Ronald L. Rivest and Butler Lampson,
			
 
				-``SDSI&#173;A Simple Distributed Security Infrastructure,''
			
 
				-<TT>http://theory.lcs.mit.edu/~rivest/sdsi10.ps</TT>.
			
 
				-<br>&#32;<br>
			
 
				-[Schn]
			
 
				-Bruce Schneier, Password Safe,
			
 
				-<TT>http://www.counterpane.com/passsafe.html</TT>.
			
 
				-<br>&#32;<br>
			
 
				-[Sama96]
			
 
				-Vipin Samar,
			
 
				-``Unified Login with Pluggable Authentication Modules (PAM),''
			
 
				-Proceedings of the Third ACM Conference on Computer Communications and Security,
			
 
				-March 1996, New Delhi, India.
			
 
				-<br>&#32;<br>
			
 
				-[Stei88]
			
 
				-Jennifer G. Steiner, Clifford Neumann, and Jeffrey I. Schiller,
			
 
				-``<I>Kerberos</I>: An Authentication Service for Open Network Systems,''
			
 
				-Proceedings of USENIX Winter Conference, Dallas, Texas, February 1988, pp. 191-202.
			
 
				-<br>&#32;<br>
			
 
				-[Wu98]
			
 
				-T. Wu,
			
 
				-``The Secure Remote Password Protocol,''
			
 
				-Proceedings of
			
 
				-the 1998 Internet Society Network and Distributed System Security
			
 
				-Symposium, San Diego, CA, March 1998, pp. 97-111.
			
 
				-<br>&#32;<br>
			
 
				-[Ylon96]
			
 
				-Ylonen, T.,
			
 
				-``SSH&#173;Secure Login Connections Over the Internet,''
			
 
				-6th USENIX Security Symposium, pp. 37-42. San Jose, CA, July 1996.
			
 
				-<H4>Appendix: Summary of the PAK protocol
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-n(11Let 11&lt;I&gt;q&gt;&lt;/I&gt;2^16011n(99 and 11&lt;I&gt;p&gt;&lt;/I&gt;2^102411n(99 be primes
			
 
				-n(11such that 11&lt;I&gt;p=rq+&lt;/I&gt;1n(99 with 11&lt;I&gt;r&lt;/I&gt;n(99 not a multiple of 11&lt;I&gt;q&lt;/I&gt;n(99.
			
 
				-^&lt;I&gt;*&lt;/I&gt;h'-0w'&lt;I&gt;*&lt;/I&gt;'u+0w'&lt;I&gt;*&lt;/I&gt;'u'11
			
 
				-n(11Take 11&lt;I&gt;h&lt;/I&gt;&lt;I&gt;&isin;&lt;/I&gt;&lt;I&gt;Z&lt;/I&gt;&lt;I&gt;p&lt;/I&gt;h'-0w'&lt;I&gt;p&lt;/I&gt;'u'n(99 such that 11&lt;I&gt;g&lt;/I&gt;==&lt;I&gt;h&lt;/I&gt;^&lt;I&gt;r&lt;/I&gt;11n(99 is not 1.
			
 
				-These parameters may be chosen by the NIST algorithm for DSA,
			
 
				-and are public, fixed values.
			
 
				-n(11The client 11&lt;I&gt;C&lt;/I&gt;n(99 knows a secret 11&#960;n(99
			
 
				-n(11and computes 11&lt;I&gt;H&lt;/I&gt;==(&lt;I&gt;H&lt;/I&gt;111(&lt;I&gt;C&lt;/I&gt;, &#960;)&lt;I&gt;&lt;/I&gt;)^&lt;I&gt;r&lt;/I&gt;11n(99 and 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99,
			
 
				-^&lt;I&gt;*&lt;/I&gt;h'-0w'&lt;I&gt;*&lt;/I&gt;'u+0w'&lt;I&gt;*&lt;/I&gt;'u'11
			
 
				-n(11where 11&lt;I&gt;H&lt;/I&gt;111n(99 is a hash function yielding a random element of 11&lt;I&gt;Z&lt;/I&gt;&lt;I&gt;p&lt;/I&gt;h'-0w'&lt;I&gt;p&lt;/I&gt;'u'n(99,
			
 
				-n(11and 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99 may be computed by gcd.
			
 
				-n(11(All arithmetic is modulo 11&lt;I&gt;p&lt;/I&gt;n(99.)
			
 
				-n(11The client gives 11&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111n(99 to the server 11&lt;I&gt;S&lt;/I&gt;n(99 ahead of time by a private channel.
			
 
				-n(11To start a new connection, the client generates a random value 11&lt;I&gt;x&lt;/I&gt;n(99,
			
 
				-n(11computes 11&lt;I&gt;m&lt;/I&gt;==&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;x&lt;/I&gt;11&lt;I&gt;H&lt;/I&gt;n(99,
			
 
				-n(11then calls the server and sends 11&lt;I&gt;C&lt;/I&gt;n(99 and 11&lt;I&gt;m&lt;/I&gt;n(99.
			
 
				-n(11The server checks 11&lt;I&gt;m&lt;/I&gt;!=0 mod &lt;I&gt;p&lt;/I&gt;n(99,
			
 
				-n(11generates random 11&lt;I&gt;y&lt;/I&gt;n(99,
			
 
				-n(11computes 11&#956;==&lt;I&gt;g&lt;/I&gt;^&lt;I&gt;y&lt;/I&gt;11n(99,
			
 
				-n(1111&#963;==(&lt;I&gt;m&lt;/I&gt;&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)^&lt;I&gt;y&lt;/I&gt;11n(99,
			
 
				-n(11and sends 11&lt;I&gt;S&lt;/I&gt;n(99, 11&#956;n(99, 11&lt;I&gt;k&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("server",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
			
 
				-n(11Next the client computes 11&#963;&lt;I&gt;=&lt;/I&gt;&#956;^&lt;I&gt;x&lt;/I&gt;11n(99,
			
 
				-n(11verifies 11&lt;I&gt;k&lt;/I&gt;n(99,
			
 
				-n(11and sends 11&lt;I&gt;k&#180;&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("client",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
			
 
				-n(11The server then verifies 11&lt;I&gt;k&#180;&lt;/I&gt;n(99 and both sides begin
			
 
				-n(11using session key 11&lt;I&gt;K&lt;/I&gt;==&lt;I&gt;sha1&lt;/I&gt;("session",&lt;I&gt;C&lt;/I&gt;,&lt;I&gt;S&lt;/I&gt;,&lt;I&gt;m&lt;/I&gt;,&#956;,&#963;,&lt;I&gt;H&lt;/I&gt;^&lt;I&gt;-&lt;/I&gt;111)n(99.
			
 
				-n(11In the published version of PAK, the server name 11&lt;I&gt;S&lt;/I&gt;n(99
			
 
				-is included in the initial
			
 
				-n(11hash 11&lt;I&gt;H&lt;/I&gt;n(99, but doing so is inconvenient in our application,
			
 
				-as the server may be known by various equivalent names.
			
 
				-<br>&#32;<br>
			
 
				-MacKenzie has shown
			
 
				-[Mack]
			
 
				-that the
			
 
				-equivalence proof [Boyk00]
			
 
				-can be adapted to cover our version.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/comp.html
+++ b/sys/doc/comp.html
@@ -1,1512 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>How to Use the Plan 9 C Compiler
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-rob@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-The C compiler on Plan 9 is a wholly new program; in fact
			
 
				-it was the first piece of software written for what would
			
 
				-eventually become Plan 9 from Bell Labs.
			
 
				-Programmers familiar with existing C compilers will find
			
 
				-a number of differences in both the language the Plan 9 compiler
			
 
				-accepts and in how the compiler is used.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compiler is really a set of compilers, one for each
			
 
				-architecture &#173; MIPS, SPARC, Motorola 68020, Intel 386, etc. &#173;
			
 
				-that accept a dialect of ANSI C and efficiently produce
			
 
				-fairly good code for the target machine.
			
 
				-There is a packaging of the compiler that accepts strict ANSI C for
			
 
				-a POSIX environment, but this document focuses on the
			
 
				-native Plan 9 environment, that in which all the system source and
			
 
				-almost all the utilities are written.
			
 
				-</P>
			
 
				-<H4>Source
			
 
				-</H4>
			
 
				-<P>
			
 
				-The language accepted by the compilers is the core ANSI C language
			
 
				-with some modest extensions,
			
 
				-a greatly simplified preprocessor,
			
 
				-a smaller library that includes system calls and related facilities,
			
 
				-and a completely different structure for include files.
			
 
				-</P>
			
 
				-<P>
			
 
				-Official ANSI C accepts the old (K&amp;R) style of declarations for
			
 
				-functions; the Plan 9 compilers
			
 
				-are more demanding.
			
 
				-Without an explicit run-time flag
			
 
				-(<TT>-B</TT>)
			
 
				-whose use is discouraged, the compilers insist
			
 
				-on new-style function declarations, that is, prototypes for
			
 
				-function arguments.
			
 
				-The function declarations in the libraries' include files are
			
 
				-all in the new style so the interfaces are checked at compile time.
			
 
				-For C programmers who have not yet switched to function prototypes
			
 
				-the clumsy syntax may seem repellent but the payoff in stronger typing
			
 
				-is substantial.
			
 
				-Those who wish to import existing software to Plan 9 are urged
			
 
				-to use the opportunity to update their code.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compilers include an integrated preprocessor that accepts the familiar
			
 
				-<TT>#include</TT>,
			
 
				-<TT>#define</TT>
			
 
				-for macros both with and without arguments,
			
 
				-<TT>#undef</TT>,
			
 
				-<TT>#line</TT>,
			
 
				-<TT>#ifdef</TT>,
			
 
				-<TT>#ifndef</TT>,
			
 
				-and
			
 
				-<TT>#endif</TT>.
			
 
				-It
			
 
				-supports neither
			
 
				-<TT>#if</TT>
			
 
				-nor
			
 
				-<TT>##</TT>,
			
 
				-although it does
			
 
				-honor a few
			
 
				-<TT>#pragmas</TT>.
			
 
				-The
			
 
				-<TT>#if</TT>
			
 
				-directive was omitted because it greatly complicates the
			
 
				-preprocessor, is never necessary, and is usually abused.
			
 
				-Conditional compilation in general makes code hard to understand;
			
 
				-the Plan 9 source uses it sparingly.
			
 
				-Also, because the compilers remove dead code, regular
			
 
				-<TT>if</TT>
			
 
				-statements with constant conditions are more readable equivalents to many
			
 
				-<TT>#ifs</TT>.
			
 
				-To compile imported code ineluctably fouled by
			
 
				-<TT>#if</TT>
			
 
				-there is a separate command,
			
 
				-<TT>/bin/cpp</TT>,
			
 
				-that implements the complete ANSI C preprocessor specification.
			
 
				-</P>
			
 
				-<P>
			
 
				-Include files fall into two groups: machine-dependent and machine-independent.
			
 
				-The machine-independent files occupy the directory
			
 
				-<TT>/sys/include</TT>;
			
 
				-the others are placed in a directory appropriate to the machine, such as
			
 
				-<TT>/mips/include</TT>.
			
 
				-The compiler searches for include files
			
 
				-first in the machine-dependent directory and then
			
 
				-in the machine-independent directory.
			
 
				-At the time of writing there are thirty-one machine-independent include
			
 
				-files and two (per machine) machine-dependent ones:
			
 
				-<TT>&lt;ureg.h&gt;</TT>
			
 
				-and
			
 
				-<TT>&lt;u.h&gt;</TT>.
			
 
				-The first describes the layout of registers on the system stack,
			
 
				-for use by the debugger.
			
 
				-The second defines some
			
 
				-architecture-dependent types such as
			
 
				-<TT>jmp_buf</TT>
			
 
				-for
			
 
				-<TT>setjmp</TT>
			
 
				-and the
			
 
				-<TT>va_arg</TT>
			
 
				-and
			
 
				-<TT>va_list</TT>
			
 
				-macros for handling arguments to variadic functions,
			
 
				-as well as a set of
			
 
				-<TT>typedef</TT>
			
 
				-abbreviations for
			
 
				-<TT>unsigned</TT>
			
 
				-<TT>short</TT>
			
 
				-and so on.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here is an excerpt from
			
 
				-<TT>/68020/include/u.h</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#define nil		((void*)0)
			
 
				-typedef	unsigned short	ushort;
			
 
				-typedef	unsigned char	uchar;
			
 
				-typedef unsigned long	ulong;
			
 
				-typedef unsigned int	uint;
			
 
				-typedef   signed char	schar;
			
 
				-typedef	long long       vlong;
			
 
				-
			
 
				-typedef long	jmp_buf[2];
			
 
				-#define	JMPBUFSP	0
			
 
				-#define	JMPBUFPC	1
			
 
				-#define	JMPBUFDPC	0
			
 
				-</PRE></TT></DL>
			
 
				-Plan 9 programs use
			
 
				-<TT>nil</TT>
			
 
				-for the name of the zero-valued pointer.
			
 
				-The type
			
 
				-<TT>vlong</TT>
			
 
				-is the largest integer type available; on most architectures it
			
 
				-is a 64-bit value.
			
 
				-A couple of other types in
			
 
				-<TT>&lt;u.h&gt;</TT>
			
 
				-are
			
 
				-<TT>u32int</TT>,
			
 
				-which is guaranteed to have exactly 32 bits (a possibility on all the supported architectures) and
			
 
				-<TT>mpdigit</TT>,
			
 
				-which is used by the multiprecision math package
			
 
				-<TT>&lt;mp.h&gt;</TT>.
			
 
				-The
			
 
				-<TT>#define</TT>
			
 
				-constants permit an architecture-independent (but compiler-dependent)
			
 
				-implementation of stack-switching using
			
 
				-<TT>setjmp</TT>
			
 
				-and
			
 
				-<TT>longjmp</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Every Plan 9 C program begins
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &lt;u.h&gt;
			
 
				-</PRE></TT></DL>
			
 
				-because all the other installed header files use the
			
 
				-<TT>typedefs</TT>
			
 
				-declared in
			
 
				-<TT>&lt;u.h&gt;</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-In strict ANSI C, include files are grouped to collect related functions
			
 
				-in a single file: one for string functions, one for memory functions,
			
 
				-one for I/O, and none for system calls.
			
 
				-Each include file is protected by an
			
 
				-<TT>#ifdef</TT>
			
 
				-to guarantee its contents are seen by the compiler only once.
			
 
				-Plan 9 takes a different approach.  Other than a few include
			
 
				-files that define external formats such as archives, the files in
			
 
				-<TT>/sys/include</TT>
			
 
				-correspond to
			
 
				-<I>libraries.</I>
			
 
				-If a program is using a library, it includes the corresponding header.
			
 
				-The default C library comprises string functions, memory functions, and
			
 
				-so on, largely as in ANSI C, some formatted I/O routines,
			
 
				-plus all the system calls and related functions.
			
 
				-To use these functions, one must
			
 
				-<TT>#include</TT>
			
 
				-the file
			
 
				-<TT>&lt;libc.h&gt;</TT>,
			
 
				-which in turn must follow
			
 
				-<TT>&lt;u.h&gt;</TT>,
			
 
				-to define their prototypes for the compiler.
			
 
				-Here is the complete source to the traditional first C program:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &lt;u.h&gt;
			
 
				-#include &lt;libc.h&gt;
			
 
				-
			
 
				-void
			
 
				-main(void)
			
 
				-{
			
 
				-	print("hello world\n");
			
 
				-	exits(0);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>print</TT>
			
 
				-routine and its relatives
			
 
				-<TT>fprint</TT>
			
 
				-and
			
 
				-<TT>sprint</TT>
			
 
				-resemble the similarly-named functions in Standard I/O but are not
			
 
				-attached to a specific I/O library.
			
 
				-In Plan 9
			
 
				-<TT>main</TT>
			
 
				-is not integer-valued; it should call
			
 
				-<TT>exits</TT>,
			
 
				-which takes a string argument (or null; here ANSI C promotes the 0 to a
			
 
				-<TT>char*</TT>).
			
 
				-All these functions are, of course, documented in the Programmer's Manual.
			
 
				-</P>
			
 
				-<P>
			
 
				-To use
			
 
				-<TT>printf</TT>,
			
 
				-<TT>&lt;stdio.h&gt;</TT>
			
 
				-must be included to define the function prototype for
			
 
				-<TT>printf</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &lt;u.h&gt;
			
 
				-#include &lt;libc.h&gt;
			
 
				-#include &lt;stdio.h&gt;
			
 
				-
			
 
				-void
			
 
				-main(int argc, char *argv[])
			
 
				-{
			
 
				-	printf("%s: hello world; argc = %d\n", argv[0], argc);
			
 
				-	exits(0);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-In practice, Standard I/O is not used much in Plan 9.  I/O libraries are
			
 
				-discussed in a later section of this document.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are libraries for handling regular expressions, raster graphics,
			
 
				-windows, and so on, and each has an associated include file.
			
 
				-The manual for each library states which include files are needed.
			
 
				-The files are not protected against multiple inclusion and themselves
			
 
				-contain no nested
			
 
				-<TT>#includes</TT>.
			
 
				-Instead the
			
 
				-programmer is expected to sort out the requirements
			
 
				-and to
			
 
				-<TT>#include</TT>
			
 
				-the necessary files once at the top of each source file.  In practice this is
			
 
				-trivial: this way of handling include files is so straightforward
			
 
				-that it is rare for a source file to contain more than half a dozen
			
 
				-<TT>#includes</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compilers do their own register allocation so the
			
 
				-<TT>register</TT>
			
 
				-keyword is ignored.
			
 
				-For different reasons,
			
 
				-<TT>volatile</TT>
			
 
				-and
			
 
				-<TT>const</TT>
			
 
				-are also ignored.
			
 
				-</P>
			
 
				-<P>
			
 
				-To make it easier to share code with other systems, Plan 9 has a version
			
 
				-of the compiler,
			
 
				-<TT>pcc</TT>,
			
 
				-that provides the standard ANSI C preprocessor, headers, and libraries
			
 
				-with POSIX extensions.
			
 
				-<TT>Pcc</TT>
			
 
				-is recommended only
			
 
				-when broad external portability is mandated.  It compiles slower,
			
 
				-produces slower code (it takes extra work to simulate POSIX on Plan 9),
			
 
				-eliminates those parts of the Plan 9 interface
			
 
				-not related to POSIX, and illustrates the clumsiness of an environment
			
 
				-designed by committee.
			
 
				-<TT>Pcc</TT>
			
 
				-is described in more detail in
			
 
				-APE&#173;The ANSI/POSIX Environment,
			
 
				-by Howard Trickey.
			
 
				-</P>
			
 
				-<H4>Process
			
 
				-</H4>
			
 
				-<P>
			
 
				-Each CPU architecture supported by Plan 9 is identified by a single,
			
 
				-arbitrary, alphanumeric character:
			
 
				-<TT>k</TT>
			
 
				-for SPARC,
			
 
				-<TT>q</TT>
			
 
				-for Motorola Power PC 630 and 640,
			
 
				-<TT>v</TT>
			
 
				-for MIPS,
			
 
				-<TT>1</TT>
			
 
				-for Motorola 68000,
			
 
				-<TT>2</TT>
			
 
				-for Motorola 68020 and 68040,
			
 
				-<TT>5</TT>
			
 
				-for Acorn ARM 7500,
			
 
				-<TT>6</TT>
			
 
				-for Intel 960,
			
 
				-<TT>7</TT>
			
 
				-for DEC Alpha,
			
 
				-<TT>8</TT>
			
 
				-for Intel 386, and
			
 
				-<TT>9</TT>
			
 
				-for AMD 29000.
			
 
				-The character labels the support tools and files for that architecture.
			
 
				-For instance, for the 68020 the compiler is
			
 
				-<TT>2c</TT>,
			
 
				-the assembler is
			
 
				-<TT>2a</TT>,
			
 
				-the link editor/loader is
			
 
				-<TT>2l</TT>,
			
 
				-the object files are suffixed
			
 
				-<TT>.2</TT>,
			
 
				-and the default name for an executable file is
			
 
				-<TT>2.out</TT>.
			
 
				-Before we can use the compiler we therefore need to know which
			
 
				-machine we are compiling for.
			
 
				-The next section explains how this decision is made; for the moment
			
 
				-assume we are building 68020 binaries and make the mental substitution for
			
 
				-<TT>2</TT>
			
 
				-appropriate to the machine you are actually using.
			
 
				-</P>
			
 
				-<P>
			
 
				-To convert source to an executable binary is a two-step process.
			
 
				-First run the compiler,
			
 
				-<TT>2c</TT>,
			
 
				-on the source, say
			
 
				-<TT>file.c</TT>,
			
 
				-to generate an object file
			
 
				-<TT>file.2</TT>.
			
 
				-Then run the loader,
			
 
				-<TT>2l</TT>,
			
 
				-to generate an executable
			
 
				-<TT>2.out</TT>
			
 
				-that may be run (on a 680X0 machine):
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-2c file.c
			
 
				-2l file.2
			
 
				-2.out
			
 
				-</PRE></TT></DL>
			
 
				-The loader automatically links with whatever libraries the program
			
 
				-needs, usually including the standard C library as defined by
			
 
				-<TT>&lt;libc.h&gt;</TT>.
			
 
				-Of course the compiler and loader have lots of options, both familiar and new;
			
 
				-see the manual for details.
			
 
				-The compiler does not generate an executable automatically;
			
 
				-the output of the compiler must be given to the loader.
			
 
				-Since most compilation is done under the control of
			
 
				-<TT>mk</TT>
			
 
				-(see below), this is rarely an inconvenience.
			
 
				-</P>
			
 
				-<P>
			
 
				-The distribution of work between the compiler and loader is unusual.
			
 
				-The compiler integrates preprocessing, parsing, register allocation,
			
 
				-code generation and some assembly.
			
 
				-Combining these tasks in a single program is part of the reason for
			
 
				-the compiler's efficiency.
			
 
				-The loader does instruction selection, branch folding,
			
 
				-instruction scheduling,
			
 
				-and writes the final executable.
			
 
				-There is no separate C preprocessor and no assembler in the usual pipeline.
			
 
				-Instead the intermediate object file
			
 
				-(here a
			
 
				-<TT>.2</TT>
			
 
				-file) is a type of binary assembly language.
			
 
				-The instructions in the intermediate format are not exactly those in
			
 
				-the machine.  For example, on the 68020 the object file may specify
			
 
				-a MOVE instruction but the loader will decide just which variant of
			
 
				-the MOVE instruction &#173; MOVE immediate, MOVE quick, MOVE address,
			
 
				-etc. &#173; is most efficient.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assembler,
			
 
				-<TT>2a</TT>,
			
 
				-is just a translator between the textual and binary
			
 
				-representations of the object file format.
			
 
				-It is not an assembler in the traditional sense.  It has limited
			
 
				-macro capabilities (the same as the integral C preprocessor in the compiler),
			
 
				-clumsy syntax, and minimal error checking.  For instance, the assembler
			
 
				-will accept an instruction (such as memory-to-memory MOVE on the MIPS) that the
			
 
				-machine does not actually support; only when the output of the assembler
			
 
				-is passed to the loader will the error be discovered.
			
 
				-The assembler is intended only for writing things that need access to instructions
			
 
				-invisible from C,
			
 
				-such as the machine-dependent
			
 
				-part of an operating system;
			
 
				-very little code in Plan 9 is in assembly language.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compilers take an option
			
 
				-<TT>-S</TT>
			
 
				-that causes them to print on their standard output the generated code
			
 
				-in a format acceptable as input to the assemblers.
			
 
				-This is of course merely a formatting of the
			
 
				-data in the object file; therefore the assembler is just
			
 
				-an
			
 
				-ASCII-to-binary converter for this format.
			
 
				-Other than the specific instructions, the input to the assemblers
			
 
				-is largely architecture-independent; see
			
 
				-``A Manual for the Plan 9 Assembler'',
			
 
				-by Rob Pike,
			
 
				-for more information.
			
 
				-</P>
			
 
				-<P>
			
 
				-The loader is an integral part of the compilation process.
			
 
				-Each library header file contains a
			
 
				-<TT>#pragma</TT>
			
 
				-that tells the loader the name of the associated archive; it is
			
 
				-not necessary to tell the loader which libraries a program uses.
			
 
				-The C run-time startup is found, by default, in the C library.
			
 
				-The loader starts with an undefined
			
 
				-symbol,
			
 
				-<TT>_main</TT>,
			
 
				-that is resolved by pulling in the run-time startup code from the library.
			
 
				-(The loader undefines
			
 
				-<TT>_mainp</TT>
			
 
				-when profiling is enabled, to force loading of the profiling start-up
			
 
				-instead.)
			
 
				-</P>
			
 
				-<P>
			
 
				-Unlike its counterpart on other systems, the Plan 9 loader rearranges
			
 
				-data to optimize access.  This means the order of variables in the
			
 
				-loaded program is unrelated to its order in the source.
			
 
				-Most programs don't care, but some assume that, for example, the
			
 
				-variables declared by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int a;
			
 
				-int b;
			
 
				-</PRE></TT></DL>
			
 
				-will appear at adjacent addresses in memory.  On Plan 9, they won't.
			
 
				-</P>
			
 
				-<H4>Heterogeneity
			
 
				-</H4>
			
 
				-<P>
			
 
				-When the system starts or a user logs in the environment is configured
			
 
				-so the appropriate binaries are available in
			
 
				-<TT>/bin</TT>.
			
 
				-The configuration process is controlled by an environment variable,
			
 
				-<TT></TT><I>cputype</I><TT>,
			
 
				-with value such as
			
 
				-</TT><TT>mips</TT><TT>,
			
 
				-</TT><TT>68020</TT><TT>,
			
 
				-</TT><TT>386</TT><TT>,
			
 
				-or
			
 
				-</TT><TT>sparc</TT><TT>.
			
 
				-For each architecture there is a directory in the root,
			
 
				-with the appropriate name,
			
 
				-that holds the binary and library files for that architecture.
			
 
				-Thus
			
 
				-</TT><TT>/mips/lib</TT><TT>
			
 
				-contains the object code libraries for MIPS programs,
			
 
				-</TT><TT>/mips/include</TT><TT>
			
 
				-holds MIPS-specific include files, and
			
 
				-</TT><TT>/mips/bin</TT><TT>
			
 
				-has the MIPS binaries.
			
 
				-These binaries are attached to
			
 
				-</TT><TT>/bin</TT><TT>
			
 
				-at boot time by binding
			
 
				-</TT><TT>/</TT><TT>cputype/bin</TT><TT>
			
 
				-to
			
 
				-</TT><TT>/bin</TT><TT>,
			
 
				-so
			
 
				-</TT><TT>/bin</TT><TT>
			
 
				-always contains the correct files.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The MIPS compiler,
			
 
				-<TT>vc</TT>,
			
 
				-by definition
			
 
				-produces object files for the MIPS architecture,
			
 
				-regardless of the architecture of the machine on which the compiler is running.
			
 
				-There is a version of
			
 
				-<TT>vc</TT>
			
 
				-compiled for each architecture:
			
 
				-<TT>/mips/bin/vc</TT>,
			
 
				-<TT>/68020/bin/vc</TT>,
			
 
				-<TT>/sparc/bin/vc</TT>,
			
 
				-and so on,
			
 
				-each capable of producing MIPS object files regardless of the native
			
 
				-instruction set.
			
 
				-If one is running on a SPARC,
			
 
				-<TT>/sparc/bin/vc</TT>
			
 
				-will compile programs for the MIPS;
			
 
				-if one is running on machine
			
 
				-<TT></TT><I>cputype</I><TT>,
			
 
				-</TT><TT>/</TT><TT>cputype/bin/vc</TT><TT>
			
 
				-will compile programs for the MIPS.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Because of the bindings that assemble
			
 
				-<TT>/bin</TT>,
			
 
				-the shell always looks for a command, say
			
 
				-<TT>date</TT>,
			
 
				-in
			
 
				-<TT>/bin</TT>
			
 
				-and automatically finds the file
			
 
				-<TT>/</TT><I>cputype/bin/date</I><TT>.
			
 
				-Therefore the MIPS compiler is known as just
			
 
				-</TT><TT>vc</TT><TT>;
			
 
				-the shell will invoke
			
 
				-</TT><TT>/bin/vc</TT><TT>
			
 
				-and that is guaranteed to be the version of the MIPS compiler
			
 
				-appropriate for the machine running the command.
			
 
				-Regardless of the architecture of the compiling machine,
			
 
				-</TT><TT>/bin/vc</TT><TT>
			
 
				-is
			
 
				-</TT><I>always</I><TT>
			
 
				-the MIPS compiler.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Also, the output of
			
 
				-<TT>vc</TT>
			
 
				-and
			
 
				-<TT>vl</TT>
			
 
				-is completely independent of the machine type on which they are executed:
			
 
				-<TT>.v</TT>
			
 
				-files compiled (with
			
 
				-<TT>vc</TT>)
			
 
				-on a SPARC may be linked (with
			
 
				-<TT>vl</TT>)
			
 
				-on a 386.
			
 
				-(The resulting
			
 
				-<TT>v.out</TT>
			
 
				-will run, of course, only on a MIPS.)
			
 
				-Similarly, the MIPS libraries in
			
 
				-<TT>/mips/lib</TT>
			
 
				-are suitable for loading with
			
 
				-<TT>vl</TT>
			
 
				-on any machine; there is only one set of MIPS libraries, not one
			
 
				-set for each architecture that supports the MIPS compiler.
			
 
				-</P>
			
 
				-<H4>Heterogeneity and <TT>mk</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-Most software on Plan 9 is compiled under the control of
			
 
				-<TT>mk</TT>,
			
 
				-a descendant of
			
 
				-<TT>make</TT>
			
 
				-that is documented in the Programmer's Manual.
			
 
				-A convention used throughout the
			
 
				-<TT>mkfiles</TT>
			
 
				-makes it easy to compile the source into binary suitable for any architecture.
			
 
				-</P>
			
 
				-<P>
			
 
				-The variable
			
 
				-<TT></TT>cputype<TT>
			
 
				-is advisory: it reports the architecture of the current environment, and should
			
 
				-not be modified.  A second variable,
			
 
				-</TT><TT></TT><I>objtype</I><TT>,
			
 
				-is used to set which architecture is being
			
 
				-</TT><I>compiled</I><TT>
			
 
				-for.
			
 
				-The value of
			
 
				-</TT><TT></TT><TT>objtype</TT><TT>
			
 
				-can be used by a
			
 
				-</TT><TT>mkfile</TT><TT>
			
 
				-to configure the compilation environment.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-In each machine's root directory there is a short
			
 
				-<TT>mkfile</TT>
			
 
				-that defines a set of macros for the compiler, loader, etc.
			
 
				-Here is
			
 
				-<TT>/mips/mkfile</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/sys/src/mkfile.proto
			
 
				-
			
 
				-CC=vc
			
 
				-LD=vl
			
 
				-O=v
			
 
				-AS=va
			
 
				-</PRE></TT></DL>
			
 
				-The line
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/sys/src/mkfile.proto
			
 
				-</PRE></TT></DL>
			
 
				-causes
			
 
				-<TT>mk</TT>
			
 
				-to include the file
			
 
				-<TT>/sys/src/mkfile.proto</TT>,
			
 
				-which contains general definitions:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#
			
 
				-# common mkfile parameters shared by all architectures
			
 
				-#
			
 
				-
			
 
				-OS=v486xq7
			
 
				-CPUS=mips 386 power alpha
			
 
				-CFLAGS=-FVw
			
 
				-LEX=lex
			
 
				-YACC=yacc
			
 
				-MK=/bin/mk
			
 
				-</PRE></TT></DL>
			
 
				-<TT>CC</TT>
			
 
				-is obviously the compiler,
			
 
				-<TT>AS</TT>
			
 
				-the assembler, and
			
 
				-<TT>LD</TT>
			
 
				-the loader.
			
 
				-<TT>O</TT>
			
 
				-is the suffix for the object files and
			
 
				-<TT>CPUS</TT>
			
 
				-and
			
 
				-<TT>OS</TT>
			
 
				-are used in special rules described below.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here is a
			
 
				-<TT>mkfile</TT>
			
 
				-to build the installed source for
			
 
				-<TT>sam</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile
			
 
				-OBJ=sam.</I>O address.<I>O buffer.</I>O cmd.<I>O disc.</I>O error.<I>O \
			
 
				-	file.</I>O io.<I>O list.</I>O mesg.<I>O moveto.</I>O multi.<I>O \
			
 
				-	plan9.</I>O rasp.<I>O regexp.</I>O string.<I>O sys.</I>O xec.<I>O
			
 
				-
			
 
				-</I>O.out:	<I>OBJ
			
 
				-	</I>LD <I>OBJ
			
 
				-
			
 
				-install:	</I>O.out
			
 
				-	cp <I>O.out /</I>objtype/bin/sam
			
 
				-
			
 
				-installall:
			
 
				-	for(objtype in <I>CPUS) mk install
			
 
				-
			
 
				-%.</I>O:	%.c
			
 
				-	<I>CC </I>CFLAGS <I>stem.c
			
 
				-
			
 
				-</I>OBJ:	sam.h errors.h mesg.h
			
 
				-address.<I>O cmd.</I>O parse.<I>O xec.</I>O unix.<I>O:	parse.h
			
 
				-
			
 
				-clean:V:
			
 
				-	rm -f [</I>OS].out *.[<I>OS] y.tab.?
			
 
				-</PRE></TT></DL>
			
 
				-(The actual
			
 
				-</I><TT>mkfile</TT><I>
			
 
				-imports most of its rules from other secondary files, but
			
 
				-this example works and is not misleading.)
			
 
				-The first line causes
			
 
				-</I><TT>mk</TT><I>
			
 
				-to include the contents of
			
 
				-</I><TT>/</TT><I>objtype/mkfile</I><TT>
			
 
				-in the current
			
 
				-</TT><TT>mkfile</TT><TT>.
			
 
				-If
			
 
				-</TT><TT></TT><I>objtype</I><TT>
			
 
				-is
			
 
				-</TT><TT>mips</TT><TT>,
			
 
				-this inserts the MIPS macro definitions into the
			
 
				-</TT><TT>mkfile</TT><TT>.
			
 
				-In this case the rule for
			
 
				-</TT><TT></TT><TT>O.out</TT><TT>
			
 
				-uses the MIPS tools to build
			
 
				-</TT><TT>v.out</TT><TT>.
			
 
				-The
			
 
				-</TT><TT>%.</TT><I>O</I><TT>
			
 
				-rule in the file uses
			
 
				-</TT><TT>mk</TT><TT>'s
			
 
				-pattern matching facilities to convert the source files to the object
			
 
				-files through the compiler.
			
 
				-(The text of the rules is passed directly to the shell,
			
 
				-</TT><TT>rc</TT><TT>,
			
 
				-without further translation.
			
 
				-See the
			
 
				-</TT><TT>mk</TT><TT>
			
 
				-manual if any of this is unfamiliar.)
			
 
				-Because the default rule builds
			
 
				-</TT><TT></TT><TT>O.out</TT><TT>
			
 
				-rather than
			
 
				-</TT><TT>sam</TT><TT>,
			
 
				-it is possible to maintain binaries for multiple machines in the
			
 
				-same source directory without conflict.
			
 
				-This is also, of course, why the output files from the various
			
 
				-compilers and loaders
			
 
				-have distinct names.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The rest of the
			
 
				-<TT>mkfile</TT>
			
 
				-should be easy to follow; notice how the rules for
			
 
				-<TT>clean</TT>
			
 
				-and
			
 
				-<TT>installall</TT>
			
 
				-(that is, install versions for all architectures) use other macros
			
 
				-defined in
			
 
				-<TT>/</TT><I>objtype/mkfile</I><TT>.
			
 
				-In Plan 9,
			
 
				-</TT><TT>mkfiles</TT><TT>
			
 
				-for commands conventionally contain rules to
			
 
				-</TT><TT>install</TT><TT>
			
 
				-(compile and install the version for
			
 
				-</TT><TT></TT><TT>objtype</TT><TT>),
			
 
				-</TT><TT>installall</TT><TT>
			
 
				-(compile and install for all
			
 
				-</TT><TT></TT><I>objtypes</I><TT>),
			
 
				-and
			
 
				-</TT><TT>clean</TT><TT>
			
 
				-(remove all object files, binaries, etc.).
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The
			
 
				-<TT>mkfile</TT>
			
 
				-is easy to use.  To build a MIPS binary,
			
 
				-<TT>v.out</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% objtype=mips
			
 
				-% mk
			
 
				-</PRE></TT></DL>
			
 
				-To build and install a MIPS binary:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% objtype=mips
			
 
				-% mk install
			
 
				-</PRE></TT></DL>
			
 
				-To build and install all versions:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% mk installall
			
 
				-</PRE></TT></DL>
			
 
				-These conventions make cross-compilation as easy to manage
			
 
				-as traditional native compilation.
			
 
				-Plan 9 programs compile and run without change on machines from
			
 
				-large multiprocessors to laptops.  For more information about this process, see
			
 
				-``Plan 9 Mkfiles'',
			
 
				-by Bob Flandrena.
			
 
				-</P>
			
 
				-<H4>Portability
			
 
				-</H4>
			
 
				-<P>
			
 
				-Within Plan 9, it is painless to write portable programs, programs whose
			
 
				-source is independent of the machine on which they execute.
			
 
				-The operating system is fixed and the compiler, headers and libraries
			
 
				-are constant so most of the stumbling blocks to portability are removed.
			
 
				-Attention to a few details can avoid those that remain.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 is a heterogeneous environment, so programs must
			
 
				-<I>expect</I>
			
 
				-that external files will be written by programs on machines of different
			
 
				-architectures.
			
 
				-The compilers, for instance, must handle without confusion
			
 
				-object files written by other machines.
			
 
				-The traditional approach to this problem is to pepper the source with
			
 
				-<TT>#ifdefs</TT>
			
 
				-to turn byte-swapping on and off.
			
 
				-Plan 9 takes a different approach: of the handful of machine-dependent
			
 
				-<TT>#ifdefs</TT>
			
 
				-in all the source, almost all are deep in the libraries.
			
 
				-Instead programs read and write files in a defined format,
			
 
				-either (for low volume applications) as formatted text, or
			
 
				-(for high volume applications) as binary in a known byte order.
			
 
				-If the external data were written with the most significant
			
 
				-byte first, the following code reads a 4-byte integer correctly
			
 
				-regardless of the architecture of the executing machine (assuming
			
 
				-an unsigned long holds 4 bytes):
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-ulong
			
 
				-getlong(void)
			
 
				-{
			
 
				-	ulong l;
			
 
				-
			
 
				-	l = (getchar()&amp;0xFF)&lt;&lt;24;
			
 
				-	l |= (getchar()&amp;0xFF)&lt;&lt;16;
			
 
				-	l |= (getchar()&amp;0xFF)&lt;&lt;8;
			
 
				-	l |= (getchar()&amp;0xFF)&lt;&lt;0;
			
 
				-	return l;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note that this code does not `swap' the bytes; instead it just reads
			
 
				-them in the correct order.
			
 
				-Variations of this code will handle any binary format
			
 
				-and also avoid problems
			
 
				-involving how structures are padded, how words are aligned,
			
 
				-and other impediments to portability.
			
 
				-Be aware, though, that extra care is needed to handle floating point data.
			
 
				-</P>
			
 
				-<P>
			
 
				-Efficiency hounds will argue that this method is unnecessarily slow and clumsy
			
 
				-when the executing machine has the same byte order (and padding and alignment)
			
 
				-as the data.
			
 
				-The CPU cost of I/O processing
			
 
				-is rarely the bottleneck for an application, however,
			
 
				-and the gain in simplicity of porting and maintaining the code greatly outweighs
			
 
				-the minor speed loss from handling data in this general way.
			
 
				-This method is how the Plan 9 compilers, the window system, and even the file
			
 
				-servers transmit data between programs.
			
 
				-</P>
			
 
				-<P>
			
 
				-To port programs beyond Plan 9, where the system interface is more variable,
			
 
				-it is probably necessary to use
			
 
				-<TT>pcc</TT>
			
 
				-and hope that the target machine supports ANSI C and POSIX.
			
 
				-</P>
			
 
				-<H4>I/O
			
 
				-</H4>
			
 
				-<P>
			
 
				-The default C library, defined by the include file
			
 
				-<TT>&lt;libc.h&gt;</TT>,
			
 
				-contains no buffered I/O package.
			
 
				-It does have several entry points for printing formatted text:
			
 
				-<TT>print</TT>
			
 
				-outputs text to the standard output,
			
 
				-<TT>fprint</TT>
			
 
				-outputs text to a specified integer file descriptor, and
			
 
				-<TT>sprint</TT>
			
 
				-places text in a character array.
			
 
				-To access library routines for buffered I/O, a program must
			
 
				-explicitly include the header file associated with an appropriate library.
			
 
				-</P>
			
 
				-<P>
			
 
				-The recommended I/O library, used by most Plan 9 utilities, is
			
 
				-<TT>bio</TT>
			
 
				-(buffered I/O), defined by
			
 
				-<TT>&lt;bio.h&gt;</TT>.
			
 
				-There also exists an implementation of ANSI Standard I/O,
			
 
				-<TT>stdio</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Bio</TT>
			
 
				-is small and efficient, particularly for buffer-at-a-time or
			
 
				-line-at-a-time I/O.
			
 
				-Even for character-at-a-time I/O, however, it is significantly faster than
			
 
				-the Standard I/O library,
			
 
				-<TT>stdio</TT>.
			
 
				-Its interface is compact and regular, although it lacks a few conveniences.
			
 
				-The most noticeable is that one must explicitly define buffers for standard
			
 
				-input and output;
			
 
				-<TT>bio</TT>
			
 
				-does not predefine them.  Here is a program to copy input to output a byte
			
 
				-at a time using
			
 
				-<TT>bio</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &lt;u.h&gt;
			
 
				-#include &lt;libc.h&gt;
			
 
				-#include &lt;bio.h&gt;
			
 
				-
			
 
				-Biobuf	bin;
			
 
				-Biobuf	bout;
			
 
				-
			
 
				-main(void)
			
 
				-{
			
 
				-	int c;
			
 
				-
			
 
				-	Binit(&amp;bin, 0, OREAD);
			
 
				-	Binit(&amp;bout, 1, OWRITE);
			
 
				-
			
 
				-	while((c=Bgetc(&amp;bin)) != Beof)
			
 
				-		Bputc(&amp;bout, c);
			
 
				-	exits(0);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-For peak performance, we could replace
			
 
				-<TT>Bgetc</TT>
			
 
				-and
			
 
				-<TT>Bputc</TT>
			
 
				-by their equivalent in-line macros
			
 
				-<TT>BGETC</TT>
			
 
				-and
			
 
				-<TT>BPUTC</TT>
			
 
				-but 
			
 
				-the performance gain would be modest.
			
 
				-For more information on
			
 
				-<TT>bio</TT>,
			
 
				-see the Programmer's Manual.
			
 
				-</P>
			
 
				-<P>
			
 
				-Perhaps the most dramatic difference in the I/O interface of Plan 9 from other
			
 
				-systems' is that text is not ASCII.
			
 
				-The format for
			
 
				-text in Plan 9 is a byte-stream encoding of 16-bit characters.
			
 
				-The character set is based on the Unicode Standard and is backward compatible with
			
 
				-ASCII:
			
 
				-characters with value 0 through 127 are the same in both sets.
			
 
				-The 16-bit characters, called
			
 
				-<I>runes</I>
			
 
				-in Plan 9, are encoded using a representation called
			
 
				-UTF,
			
 
				-an encoding that is becoming accepted as a standard.
			
 
				-(ISO calls it UTF-8;
			
 
				-throughout Plan 9 it's just called
			
 
				-UTF.)
			
 
				-UTF
			
 
				-defines multibyte sequences to
			
 
				-represent character values from 0 to 65535.
			
 
				-In
			
 
				-UTF,
			
 
				-character values up to 127 decimal, 7F hexadecimal, represent themselves,
			
 
				-so straight
			
 
				-ASCII
			
 
				-files are also valid
			
 
				-UTF.
			
 
				-Also,
			
 
				-UTF
			
 
				-guarantees that bytes with values 0 to 127 (NUL to DEL, inclusive)
			
 
				-will appear only when they represent themselves, so programs that read bytes
			
 
				-looking for plain ASCII characters will continue to work.
			
 
				-Any program that expects a one-to-one correspondence between bytes and
			
 
				-characters will, however, need to be modified.
			
 
				-An example is parsing file names.
			
 
				-File names, like all text, are in
			
 
				-UTF,
			
 
				-so it is incorrect to search for a character in a string by
			
 
				-<TT>strchr(filename,</TT>
			
 
				-<TT>c)</TT>
			
 
				-because the character might have a multi-byte encoding.
			
 
				-The correct method is to call
			
 
				-<TT>utfrune(filename,</TT>
			
 
				-<TT>c)</TT>,
			
 
				-defined in
			
 
				-<A href="/magic/man2html/2/rune"><I>rune</I>(2),
			
 
				-</A>which interprets the file name as a sequence of encoded characters
			
 
				-rather than bytes.
			
 
				-In fact, even when you know the character is a single byte
			
 
				-that can represent only itself,
			
 
				-it is safer to use
			
 
				-<TT>utfrune</TT>
			
 
				-because that assumes nothing about the character set
			
 
				-and its representation.
			
 
				-</P>
			
 
				-<P>
			
 
				-The library defines several symbols relevant to the representation of characters.
			
 
				-Any byte with unsigned value less than
			
 
				-<TT>Runesync</TT>
			
 
				-will not appear in any multi-byte encoding of a character.
			
 
				-<TT>Utfrune</TT>
			
 
				-compares the character being searched against
			
 
				-<TT>Runesync</TT>
			
 
				-to see if it is sufficient to call
			
 
				-<TT>strchr</TT>
			
 
				-or if the byte stream must be interpreted.
			
 
				-Any byte with unsigned value less than
			
 
				-<TT>Runeself</TT>
			
 
				-is represented by a single byte with the same value.
			
 
				-Finally, when errors are encountered converting
			
 
				-to runes from a byte stream, the library returns the rune value
			
 
				-<TT>Runeerror</TT>
			
 
				-and advances a single byte.  This permits programs to find runes
			
 
				-embedded in binary data.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Bio</TT>
			
 
				-includes routines
			
 
				-<TT>Bgetrune</TT>
			
 
				-and
			
 
				-<TT>Bputrune</TT>
			
 
				-to transform the external byte stream
			
 
				-UTF
			
 
				-format to and from
			
 
				-internal 16-bit runes.
			
 
				-Also, the
			
 
				-<TT>%s</TT>
			
 
				-format to
			
 
				-<TT>print</TT>
			
 
				-accepts
			
 
				-UTF;
			
 
				-<TT>%c</TT>
			
 
				-prints a character after narrowing it to 8 bits.
			
 
				-The
			
 
				-<TT>%S</TT>
			
 
				-format prints a null-terminated sequence of runes;
			
 
				-<TT>%C</TT>
			
 
				-prints a character after narrowing it to 16 bits.
			
 
				-For more information, see the Programmer's Manual, in particular
			
 
				-<A href="/magic/man2html/6/utf"><I>utf</I>(6)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/2/rune"><I>rune</I>(2),
			
 
				-</A>and the paper,
			
 
				-``Hello world, or
			
 
				-&#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949;, or 
			
 
				-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;'',
			
 
				-by Rob Pike and
			
 
				-Ken Thompson;
			
 
				-there is not room for the full story here.
			
 
				-</P>
			
 
				-<P>
			
 
				-These issues affect the compiler in several ways.
			
 
				-First, the C source is in
			
 
				-UTF.
			
 
				-ANSI says C variables are formed from
			
 
				-ASCII
			
 
				-alphanumerics, but comments and literal strings may contain any characters
			
 
				-encoded in the native encoding, here
			
 
				-UTF.
			
 
				-The declaration
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-char *cp = "abc&yuml;";
			
 
				-</PRE></TT></DL>
			
 
				-initializes the variable
			
 
				-<TT>cp</TT>
			
 
				-to point to an array of bytes holding the
			
 
				-UTF
			
 
				-representation of the characters
			
 
				-<TT>abc&yuml;.</TT>
			
 
				-The type
			
 
				-<TT>Rune</TT>
			
 
				-is defined in
			
 
				-<TT>&lt;u.h&gt;</TT>
			
 
				-to be
			
 
				-<TT>ushort</TT>,
			
 
				-which is also the  `wide character' type in the compiler.
			
 
				-Therefore the declaration
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Rune *rp = L"abc&yuml;";
			
 
				-</PRE></TT></DL>
			
 
				-initializes the variable
			
 
				-<TT>rp</TT>
			
 
				-to point to an array of unsigned short integers holding the 16-bit
			
 
				-values of the characters
			
 
				-<TT>abc&yuml;</TT>.
			
 
				-Note that in both these declarations the characters in the source
			
 
				-that represent
			
 
				-<TT>abc&yuml;</TT>
			
 
				-are the same; what changes is how those characters are represented
			
 
				-in memory in the program.
			
 
				-The following two lines:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-print("%s\n", "abc&yuml;");
			
 
				-print("%S\n", L"abc&yuml;");
			
 
				-</PRE></TT></DL>
			
 
				-produce the same
			
 
				-UTF
			
 
				-string on their output, the first by copying the bytes, the second
			
 
				-by converting from runes to bytes.
			
 
				-</P>
			
 
				-<P>
			
 
				-In C, character constants are integers but narrowed through the
			
 
				-<TT>char</TT>
			
 
				-type.
			
 
				-The Unicode character
			
 
				-<TT>&yuml;</TT>
			
 
				-has value 255, so if the
			
 
				-<TT>char</TT>
			
 
				-type is signed,
			
 
				-the constant
			
 
				-<TT>'&yuml;'</TT>
			
 
				-has value -1 (which is equal to EOF).
			
 
				-On the other hand,
			
 
				-<TT>L'&yuml;'</TT>
			
 
				-narrows through the wide character type,
			
 
				-<TT>ushort</TT>,
			
 
				-and therefore has value 255.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, although it's not ANSI C, the Plan 9 C compilers
			
 
				-assume any character with value above
			
 
				-<TT>Runeself</TT>
			
 
				-is an alphanumeric,
			
 
				-so &#945; is a legal, if non-portable, variable name.
			
 
				-</P>
			
 
				-<H4>Arguments
			
 
				-</H4>
			
 
				-<P>
			
 
				-Some macros are defined
			
 
				-in
			
 
				-<TT>&lt;libc.h&gt;</TT>
			
 
				-for parsing the arguments to
			
 
				-<TT>main()</TT>.
			
 
				-They are described in
			
 
				-<A href="/magic/man2html/2/arg"><I>arg</I>(2)
			
 
				-</A>but are fairly self-explanatory.
			
 
				-There are four macros:
			
 
				-<TT>ARGBEGIN</TT>
			
 
				-and
			
 
				-<TT>ARGEND</TT>
			
 
				-are used to bracket a hidden
			
 
				-<TT>switch</TT>
			
 
				-statement within which
			
 
				-<TT>ARGC</TT>
			
 
				-returns the current option character (rune) being processed and
			
 
				-<TT>ARGF</TT>
			
 
				-returns the argument to the option, as in the loader option
			
 
				-<TT>-o</TT>
			
 
				-<TT>file</TT>.
			
 
				-Here, for example, is the code at the beginning of
			
 
				-<TT>main()</TT>
			
 
				-in
			
 
				-<TT>ramfs.c</TT>
			
 
				-(see
			
 
				-<A href="/magic/man2html/1/ramfs"><I>ramfs</I>(1))
			
 
				-</A>that cracks its arguments:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-main(int argc, char *argv[])
			
 
				-{
			
 
				-	char *defmnt;
			
 
				-	int p[2];
			
 
				-	int mfd[2];
			
 
				-	int stdio = 0;
			
 
				-
			
 
				-	defmnt = "/tmp";
			
 
				-	ARGBEGIN{
			
 
				-	case 'i':
			
 
				-		defmnt = 0;
			
 
				-		stdio = 1;
			
 
				-		mfd[0] = 0;
			
 
				-		mfd[1] = 1;
			
 
				-		break;
			
 
				-	case 's':
			
 
				-		defmnt = 0;
			
 
				-		break;
			
 
				-	case 'm':
			
 
				-		defmnt = ARGF();
			
 
				-		break;
			
 
				-	default:
			
 
				-		usage();
			
 
				-	}ARGEND
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Extensions
			
 
				-</H4>
			
 
				-<P>
			
 
				-The compiler has several extensions to ANSI C, all of which are used
			
 
				-extensively in the system source.
			
 
				-First,
			
 
				-<I>structure</I>
			
 
				-<I>displays</I>
			
 
				-permit 
			
 
				-<TT>struct</TT>
			
 
				-expressions to be formed dynamically.
			
 
				-Given these declarations:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef struct Point Point;
			
 
				-typedef struct Rectangle Rectangle;
			
 
				-
			
 
				-struct Point
			
 
				-{
			
 
				-	int x, y;
			
 
				-};
			
 
				-
			
 
				-struct Rectangle
			
 
				-{
			
 
				-	Point min, max;
			
 
				-};
			
 
				-
			
 
				-Point	p, q, add(Point, Point);
			
 
				-Rectangle r;
			
 
				-int	x, y;
			
 
				-</PRE></TT></DL>
			
 
				-this assignment may appear anywhere an assignment is legal:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-r = (Rectangle){add(p, q), (Point){x, y+3}};
			
 
				-</PRE></TT></DL>
			
 
				-The syntax is the same as for initializing a structure but with
			
 
				-a leading cast.
			
 
				-</P>
			
 
				-<P>
			
 
				-If an
			
 
				-<I>anonymous</I>
			
 
				-<I>structure</I>
			
 
				-or
			
 
				-<I>union</I>
			
 
				-is declared within another structure or union, the members of the internal
			
 
				-structure or union are addressable without prefix in the outer structure.
			
 
				-This feature eliminates the clumsy naming of nested structures and,
			
 
				-particularly, unions.
			
 
				-For example, after these declarations,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-struct Lock
			
 
				-{
			
 
				-	int	locked;
			
 
				-};
			
 
				-
			
 
				-struct Node
			
 
				-{
			
 
				-	int	type;
			
 
				-	union{
			
 
				-		double  dval;
			
 
				-		double  fval;
			
 
				-		long    lval;
			
 
				-	};		/* anonymous union */
			
 
				-	struct Lock;	/* anonymous structure */
			
 
				-} *node;
			
 
				-
			
 
				-void	lock(struct Lock*);
			
 
				-</PRE></TT></DL>
			
 
				-one may refer to
			
 
				-<TT>node-&gt;type</TT>,
			
 
				-<TT>node-&gt;dval</TT>,
			
 
				-<TT>node-&gt;fval</TT>,
			
 
				-<TT>node-&gt;lval</TT>,
			
 
				-and
			
 
				-<TT>node-&gt;locked</TT>.
			
 
				-Moreover, the address of a
			
 
				-<TT>struct</TT>
			
 
				-<TT>Node</TT>
			
 
				-may be used without a cast anywhere that the address of a
			
 
				-<TT>struct</TT>
			
 
				-<TT>Lock</TT>
			
 
				-is used, such as in argument lists.
			
 
				-The compiler automatically promotes the type and adjusts the address.
			
 
				-Thus one may invoke
			
 
				-<TT>lock(node)</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Anonymous structures and unions may be accessed by type name
			
 
				-if (and only if) they are declared using a
			
 
				-<TT>typedef</TT>
			
 
				-name.
			
 
				-For example, using the above declaration for
			
 
				-<TT>Point</TT>,
			
 
				-one may declare
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-struct
			
 
				-{
			
 
				-	int	type;
			
 
				-	Point;
			
 
				-} p;
			
 
				-</PRE></TT></DL>
			
 
				-and refer to
			
 
				-<TT>p.Point</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-In the initialization of arrays, a number in square brackets before an
			
 
				-element sets the index for the initialization.  For example, to initialize
			
 
				-some elements in
			
 
				-a table of function pointers indexed by
			
 
				-ASCII
			
 
				-character,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void	percent(void), slash(void);
			
 
				-
			
 
				-void	(*func[128])(void) =
			
 
				-{
			
 
				-	['%']	percent,
			
 
				-	['/']	slash,
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-A similar syntax allows one to initialize structure elements:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Point p =
			
 
				-{
			
 
				-	.y 100,
			
 
				-	.x 200
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-These initialization syntaxes were later added to ANSI C, with the addition of an
			
 
				-equals sign between the index or tag and the value.
			
 
				-The Plan 9 compiler accepts either form.
			
 
				-<P>
			
 
				-Finally, the declaration
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-extern register reg;
			
 
				-</PRE></TT></DL>
			
 
				-(<I>this</I>
			
 
				-appearance of the register keyword is not ignored)
			
 
				-allocates a global register to hold the variable
			
 
				-<TT>reg</TT>.
			
 
				-External registers must be used carefully: they need to be declared in
			
 
				-<I>all</I>
			
 
				-source files and libraries in the program to guarantee the register
			
 
				-is not allocated temporarily for other purposes.
			
 
				-Especially on machines with few registers, such as the i386,
			
 
				-it is easy to link accidentally with code that has already usurped
			
 
				-the global registers and there is no diagnostic when this happens.
			
 
				-Used wisely, though, external registers are powerful.
			
 
				-The Plan 9 operating system uses them to access per-process and
			
 
				-per-machine data structures on a multiprocessor.  The storage class they provide
			
 
				-is hard to create in other ways.
			
 
				-</P>
			
 
				-<H4>The compile-time environment
			
 
				-</H4>
			
 
				-<P>
			
 
				-The code generated by the compilers is `optimized' by default:
			
 
				-variables are placed in registers and peephole optimizations are
			
 
				-performed.
			
 
				-The compiler flag
			
 
				-<TT>-N</TT>
			
 
				-disables these optimizations.
			
 
				-Registerization is done locally rather than throughout a function:
			
 
				-whether a variable occupies a register or
			
 
				-the memory location identified in the symbol
			
 
				-table depends on the activity of the variable and may change
			
 
				-throughout the life of the variable.
			
 
				-The
			
 
				-<TT>-N</TT>
			
 
				-flag is rarely needed;
			
 
				-its main use is to simplify debugging.
			
 
				-There is no information in the symbol table to identify the
			
 
				-registerization of a variable, so
			
 
				-<TT>-N</TT>
			
 
				-guarantees the variable is always where the symbol table says it is.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another flag,
			
 
				-<TT>-w</TT>,
			
 
				-turns
			
 
				-<I>on</I>
			
 
				-warnings about portability and problems detected in flow analysis.
			
 
				-Most code in Plan 9 is compiled with warnings enabled;
			
 
				-these warnings plus the type checking offered by function prototypes
			
 
				-provide most of the support of the Unix tool
			
 
				-<TT>lint</TT>
			
 
				-more accurately and with less chatter.
			
 
				-Two of the warnings,
			
 
				-`used and not set' and `set and not used', are almost always accurate but
			
 
				-may be triggered spuriously by code with invisible control flow,
			
 
				-such as in routines that call
			
 
				-<TT>longjmp</TT>.
			
 
				-The compiler statements
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-SET(v1);
			
 
				-USED(v2);
			
 
				-</PRE></TT></DL>
			
 
				-decorate the flow graph to silence the compiler.
			
 
				-Either statement accepts a comma-separated list of variables.
			
 
				-Use them carefully: they may silence real errors.
			
 
				-For the common case of unused parameters to a function,
			
 
				-leaving the name off the declaration silences the warnings.
			
 
				-That is, listing the type of a parameter but giving it no
			
 
				-associated variable name does the trick.
			
 
				-</P>
			
 
				-<H4>Debugging
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are two debuggers available on Plan 9.
			
 
				-The first, and older, is
			
 
				-<TT>db</TT>,
			
 
				-a revision of Unix
			
 
				-<TT>adb</TT>.
			
 
				-The other,
			
 
				-<TT>acid</TT>,
			
 
				-is a source-level debugger whose commands are statements in
			
 
				-a true programming language.
			
 
				-<TT>Acid</TT>
			
 
				-is the preferred debugger, but since it
			
 
				-borrows some elements of
			
 
				-<TT>db</TT>,
			
 
				-notably the formats for displaying values, it is worth knowing a little bit about
			
 
				-<TT>db</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Both debuggers support multiple architectures in a single program; that is,
			
 
				-the programs are
			
 
				-<TT>db</TT>
			
 
				-and
			
 
				-<TT>acid</TT>,
			
 
				-not for example
			
 
				-<TT>vdb</TT>
			
 
				-and
			
 
				-<TT>vacid</TT>.
			
 
				-They also support cross-architecture debugging comfortably:
			
 
				-one may debug a 68020 binary on a MIPS.
			
 
				-</P>
			
 
				-<P>
			
 
				-Imagine a program has crashed mysteriously:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% X11/X
			
 
				-Fatal server bug!
			
 
				-failed to create default stipple
			
 
				-X 106: suicide: sys: trap: fault read addr=0x0 pc=0x00105fb8
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-When a process dies on Plan 9 it hangs in the `broken' state
			
 
				-for debugging.
			
 
				-Attach a debugger to the process by naming its process id:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% acid 106
			
 
				-/proc/106/text:mips plan 9 executable
			
 
				-
			
 
				-/sys/lib/acid/port
			
 
				-/sys/lib/acid/mips
			
 
				-acid: 
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>acid</TT>
			
 
				-function
			
 
				-<TT>stk()</TT>
			
 
				-reports the stack traceback:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: stk()
			
 
				-At pc:0x105fb8:abort+0x24 /sys/src/ape/lib/ap/stdio/abort.c:6
			
 
				-abort() /sys/src/ape/lib/ap/stdio/abort.c:4
			
 
				-	called from FatalError+#4e
			
 
				-		/sys/src/X/mit/server/dix/misc.c:421
			
 
				-FatalError(s9=#e02, s8=#4901d200, s7=#2, s6=#72701, s5=#1,
			
 
				-    s4=#7270d, s3=#6, s2=#12, s1=#ff37f1c, s0=#6, f=#7270f)
			
 
				-    /sys/src/X/mit/server/dix/misc.c:416
			
 
				-	called from gnotscreeninit+#4ce
			
 
				-		/sys/src/X/mit/server/ddx/gnot/gnot.c:792
			
 
				-gnotscreeninit(snum=#0, sc=#80db0)
			
 
				-    /sys/src/X/mit/server/ddx/gnot/gnot.c:766
			
 
				-	called from AddScreen+#16e
			
 
				-		/n/bootes/sys/src/X/mit/server/dix/main.c:610
			
 
				-AddScreen(pfnInit=0x0000129c,argc=0x00000001,argv=0x7fffffe4)
			
 
				-    /sys/src/X/mit/server/dix/main.c:530
			
 
				-	called from InitOutput+0x80
			
 
				-		/sys/src/X/mit/server/ddx/brazil/brddx.c:522
			
 
				-InitOutput(argc=0x00000001,argv=0x7fffffe4)
			
 
				-    /sys/src/X/mit/server/ddx/brazil/brddx.c:511
			
 
				-	called from main+0x294
			
 
				-		/sys/src/X/mit/server/dix/main.c:225
			
 
				-main(argc=0x00000001,argv=0x7fffffe4)
			
 
				-    /sys/src/X/mit/server/dix/main.c:136
			
 
				-	called from _main+0x24
			
 
				-		/sys/src/ape/lib/ap/mips/main9.s:8
			
 
				-</PRE></TT></DL>
			
 
				-The function
			
 
				-<TT>lstk()</TT>
			
 
				-is similar but
			
 
				-also reports the values of local variables.
			
 
				-Note that the traceback includes full file names; this is a boon to debugging,
			
 
				-although it makes the output much noisier.
			
 
				-</P>
			
 
				-<P>
			
 
				-To use
			
 
				-<TT>acid</TT>
			
 
				-well you will need to learn its input language; see the
			
 
				-``Acid Manual'',
			
 
				-by Phil Winterbottom,
			
 
				-for details.  For simple debugging, however, the information in the manual page is
			
 
				-sufficient.  In particular, it describes the most useful functions
			
 
				-for examining a process.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compiler does not place
			
 
				-information describing the types of variables in the executable,
			
 
				-but a compile-time flag provides crude support for symbolic debugging.
			
 
				-The
			
 
				-<TT>-a</TT>
			
 
				-flag to the compiler suppresses code generation
			
 
				-and instead emits source text in the
			
 
				-<TT>acid</TT>
			
 
				-language to format and display data structure types defined in the program.
			
 
				-The easiest way to use this feature is to put a rule in the
			
 
				-<TT>mkfile</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-syms:   main.O
			
 
				-        <I>CC -a main.c &gt; syms
			
 
				-</PRE></TT></DL>
			
 
				-Then from within
			
 
				-</I><TT>acid</TT><I>,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-acid: include("sourcedirectory/syms")
			
 
				-</PRE></TT></DL>
			
 
				-to read in the relevant definitions.
			
 
				-(For multi-file source, you need to be a little fancier;
			
 
				-see
			
 
				-<A href="/magic/man2html/1/2c"></I><I>2c</I><I>(1)).
			
 
				-</A>This text includes, for each defined compound
			
 
				-type, a function with that name that may be called with the address of a structure
			
 
				-of that type to display its contents.
			
 
				-For example, if
			
 
				-</I><TT>rect</TT><I>
			
 
				-is a global variable of type
			
 
				-</I><TT>Rectangle</TT><I>,
			
 
				-one may execute
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Rectangle(*rect)
			
 
				-</PRE></TT></DL>
			
 
				-to display it.
			
 
				-The
			
 
				-</I><TT>*</TT><I>
			
 
				-(indirection) operator is necessary because
			
 
				-of the way
			
 
				-</I><TT>acid</TT><I>
			
 
				-works: each global symbol in the program is defined as a variable by
			
 
				-</I><TT>acid</TT><I>,
			
 
				-with value equal to the
			
 
				-</I><I>address</I><I>
			
 
				-of the symbol.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Another common technique is to write by hand special
			
 
				-<TT>acid</TT>
			
 
				-code to define functions to aid debugging, initialize the debugger, and so on.
			
 
				-Conventionally, this is placed in a file called
			
 
				-<TT>acid</TT>
			
 
				-in the source directory; it has a line
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-include("sourcedirectory/syms");
			
 
				-</PRE></TT></DL>
			
 
				-to load the compiler-produced symbols.  One may edit the compiler output directly but
			
 
				-it is wiser to keep the hand-generated
			
 
				-<TT>acid</TT>
			
 
				-separate from the machine-generated.
			
 
				-</P>
			
 
				-<P>
			
 
				-To make things simple, the default rules in the system
			
 
				-<TT>mkfiles</TT>
			
 
				-include entries to make
			
 
				-<TT>foo.acid</TT>
			
 
				-from
			
 
				-<TT>foo.c</TT>,
			
 
				-so one may use
			
 
				-<TT>mk</TT>
			
 
				-to automate the production of
			
 
				-<TT>acid</TT>
			
 
				-definitions for a given C source file.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is much more to say here.  See
			
 
				-<TT>acid</TT>
			
 
				-manual page, the reference manual, or the paper
			
 
				-``Acid: A Debugger Built From A Language'',
			
 
				-also by Phil Winterbottom.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/compiler.html
+++ b/sys/doc/compiler.html
@@ -1,1117 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Plan 9 C Compilers
			
 
				-</H1>
			
 
				-<DL><DD><I>Ken Thompson<br>
			
 
				-ken@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Originally appeared, in a different form, in
			
 
				-Proceedings of the Summer 1990 UKUUG Conference,
			
 
				-pp. 41-51,
			
 
				-London, 1990.
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-This paper describes the overall structure and function of the Plan 9 C compilers.
			
 
				-A more detailed implementation document
			
 
				-for any one of the compilers
			
 
				-is yet to be written.
			
 
				-</DL>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-There are many compilers in the series.
			
 
				-Six of the compilers (MIPS 3000, SPARC, Intel 386, Power PC, DEC Alpha, and Motorola 68020)
			
 
				-are considered active and are used to compile
			
 
				-current versions of Plan 9.
			
 
				-Several others (Motorola 68000, Intel 960, ARM 7500, AMD 29000) have had only limited use, such as
			
 
				-to program peripherals or experimental devices.
			
 
				-<H4>2 Structure
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compiler is a single program that produces an
			
 
				-object file.
			
 
				-Combined in the compiler are the traditional
			
 
				-roles of preprocessor, lexical analyzer, parser, code generator,
			
 
				-local optimizer,
			
 
				-and first half of the assembler.
			
 
				-The object files are binary forms of assembly
			
 
				-language,
			
 
				-similar to what might be passed between
			
 
				-the first and second passes of an assembler.
			
 
				-<br>&#32;<br>
			
 
				-Object files and libraries
			
 
				-are combined by a loader
			
 
				-program to produce the executable binary.
			
 
				-The loader combines the roles of second half
			
 
				-of the assembler, global optimizer, and loader.
			
 
				-The names of the compliers, loaders, and assemblers
			
 
				-are as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-SPARC	<TT>kc</TT>  <TT>kl</TT>  <TT>ka</TT>
			
 
				-Power	<TT>PC</TT>  <TT>qc</TT>  <TT>ql</TT>
			
 
				-MIPS	<TT>vc</TT>  <TT>vl</TT>  <TT>va</TT>
			
 
				-Motorola	<TT>68000</TT>  <TT>1c</TT>  <TT>1l</TT>
			
 
				-Motorola	<TT>68020</TT>  <TT>2c</TT>  <TT>2l</TT>
			
 
				-ARM	<TT>7500</TT>  <TT>5c</TT>  <TT>5l</TT>
			
 
				-Intel	<TT>960</TT>  <TT>6c</TT>  <TT>6l</TT>
			
 
				-DEC	<TT>Alpha</TT>  <TT>7c</TT>  <TT>7l</TT>
			
 
				-Intel	<TT>386</TT>  <TT>8c</TT>  <TT>8l</TT>
			
 
				-AMD	<TT>29000</TT>  <TT>9c</TT>  <TT>9l</TT>
			
 
				-</PRE></TT></DL>
			
 
				-There is a further breakdown
			
 
				-in the source of the compilers into
			
 
				-object-independent and
			
 
				-object-dependent
			
 
				-parts.
			
 
				-All of the object-independent parts
			
 
				-are combined into source files in the
			
 
				-directory
			
 
				-<TT>/sys/src/cmd/cc</TT>.
			
 
				-The object-dependent parts are collected
			
 
				-in a separate directory for each compiler,
			
 
				-for example
			
 
				-<TT>/sys/src/cmd/vc</TT>.
			
 
				-All of the code,
			
 
				-both object-independent and
			
 
				-object-dependent,
			
 
				-is machine-independent
			
 
				-and may be cross-compiled and executed on any
			
 
				-of the architectures.
			
 
				-<H4>3 The Language
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compiler implements ANSI C with some
			
 
				-restrictions and extensions
			
 
				-[ANSI90].
			
 
				-Most of the restrictions are due to
			
 
				-personal preference, while
			
 
				-most of the extensions were to help in
			
 
				-the implementation of Plan 9.
			
 
				-There are other departures from the standard,
			
 
				-particularly in the libraries,
			
 
				-that are beyond the scope of this
			
 
				-paper.
			
 
				-<H4>3.1 Register, volatile, const
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The keyword
			
 
				-<TT>register</TT>
			
 
				-is recognized syntactically
			
 
				-but is semantically ignored.
			
 
				-Thus taking the address of a
			
 
				-<TT>register</TT>
			
 
				-variable is not diagnosed.
			
 
				-The keyword
			
 
				-<TT>volatile</TT>
			
 
				-disables all optimizations, in particular registerization, of the corresponding variable.
			
 
				-The keyword
			
 
				-<TT>const</TT>
			
 
				-generates warnings (if warnings are enabled by the compiler's
			
 
				-<TT>-w</TT>
			
 
				-option) of non-constant use of the variable,
			
 
				-but does not affect the generated code.
			
 
				-<H4>3.2 The preprocessor
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The C preprocessor is probably the
			
 
				-biggest departure from the ANSI standard.
			
 
				-<br>&#32;<br>
			
 
				-The preprocessor built into the Plan 9 compilers does not support
			
 
				-<TT>#if</TT>,
			
 
				-although it does handle
			
 
				-<TT>#ifdef</TT>
			
 
				-and
			
 
				-<TT>#include</TT>.
			
 
				-If it is necessary to be more standard,
			
 
				-the source text can first be run through the separate ANSI C
			
 
				-preprocessor,
			
 
				-<TT>cpp</TT>.
			
 
				-<H4>3.3 Unnamed substructures
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The most important and most heavily used of the
			
 
				-extensions is the declaration of an
			
 
				-unnamed substructure or subunion.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	typedef
			
 
				-	struct	lock
			
 
				-	{
			
 
				-		int    locked;
			
 
				-	} Lock;
			
 
				-
			
 
				-	typedef
			
 
				-	struct	node
			
 
				-	{
			
 
				-		int	type;
			
 
				-		union
			
 
				-		{
			
 
				-			double dval;
			
 
				-			float  fval;
			
 
				-			long   lval;
			
 
				-		};
			
 
				-		Lock;
			
 
				-	} Node;
			
 
				-
			
 
				-	Lock*	lock;
			
 
				-	Node*	node;
			
 
				-</PRE></TT></DL>
			
 
				-The declaration of
			
 
				-<TT>Node</TT>
			
 
				-has an unnamed substructure of type
			
 
				-<TT>Lock</TT>
			
 
				-and an unnamed subunion.
			
 
				-One use of this feature allows references to elements of the
			
 
				-subunit to be accessed as if they were in
			
 
				-the outer structure.
			
 
				-Thus
			
 
				-<TT>node-&gt;dval</TT>
			
 
				-and
			
 
				-<TT>node-&gt;locked</TT>
			
 
				-are legitimate references.
			
 
				-<br>&#32;<br>
			
 
				-When an outer structure is used
			
 
				-in a context that is only legal for
			
 
				-an unnamed substructure,
			
 
				-the compiler promotes the reference to the
			
 
				-unnamed substructure.
			
 
				-This is true for references to structures and
			
 
				-to references to pointers to structures.
			
 
				-This happens in assignment statements and
			
 
				-in argument passing where prototypes have been
			
 
				-declared.
			
 
				-Thus, continuing with the example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	lock = node;
			
 
				-</PRE></TT></DL>
			
 
				-would assign a pointer to the unnamed
			
 
				-<TT>Lock</TT>
			
 
				-in
			
 
				-the
			
 
				-<TT>Node</TT>
			
 
				-to the variable
			
 
				-<TT>lock</TT>.
			
 
				-Another example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	extern void lock(Lock*);
			
 
				-	func(...)
			
 
				-	{
			
 
				-		...
			
 
				-		lock(node);
			
 
				-		...
			
 
				-	}
			
 
				-</PRE></TT></DL>
			
 
				-will pass a pointer to the
			
 
				-<TT>Lock</TT>
			
 
				-substructure.
			
 
				-<br>&#32;<br>
			
 
				-Finally, in places where context is insufficient to identify the unnamed structure,
			
 
				-the type name (it must be a
			
 
				-<TT>typedef</TT>)
			
 
				-of the unnamed structure can be used as an identifier.
			
 
				-In our example,
			
 
				-<TT>&node-&gt;Lock</TT>
			
 
				-gives the address of the anonymous
			
 
				-<TT>Lock</TT>
			
 
				-structure.
			
 
				-<H4>3.4 Structure displays
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A structure cast followed by a list of expressions in braces is
			
 
				-an expression with the type of the structure and elements assigned from
			
 
				-the corresponding list.
			
 
				-Structures are now almost first-class citizens of the language.
			
 
				-It is common to see code like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	r = (Rectangle){point1, (Point){x,y+2}};
			
 
				-</PRE></TT></DL>
			
 
				-<H4>3.5 Initialization indexes
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In initializers of arrays,
			
 
				-one may place a constant expression
			
 
				-in square brackets before an initializer.
			
 
				-This causes the next initializer to assign
			
 
				-the indicated element.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	enum	errors
			
 
				-	{
			
 
				-		Etoobig,
			
 
				-		Ealarm,
			
 
				-		Egreg
			
 
				-	};
			
 
				-	char* errstrings[] =
			
 
				-	{
			
 
				-		[Ealarm]	"Alarm call",
			
 
				-		[Egreg]	"Panic: out of mbufs",
			
 
				-		[Etoobig]	"Arg list too long",
			
 
				-	};
			
 
				-</PRE></TT></DL>
			
 
				-In the same way,
			
 
				-individual structures members may
			
 
				-be initialized in any order by preceding the initialization with
			
 
				-<TT>.tagname</TT>.
			
 
				-Both forms allow an optional
			
 
				-<TT>=</TT>,
			
 
				-to be compatible with a proposed
			
 
				-extension to ANSI C.
			
 
				-<H4>3.6 External register
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The declaration
			
 
				-<TT>extern</TT>
			
 
				-<TT>register</TT>
			
 
				-will dedicate a register to
			
 
				-a variable on a global basis.
			
 
				-It can be used only under special circumstances.
			
 
				-External register variables must be identically
			
 
				-declared in all modules and
			
 
				-libraries.
			
 
				-The feature is not intended for efficiency,
			
 
				-although it can produce efficient code;
			
 
				-rather it represents a unique storage class that
			
 
				-would be hard to get any other way.
			
 
				-On a shared-memory multi-processor,
			
 
				-an external register is
			
 
				-one-per-processor and neither one-per-procedure (automatic)
			
 
				-or one-per-system (external).
			
 
				-It is used for two variables in the Plan 9 kernel,
			
 
				-<TT>u</TT>
			
 
				-and
			
 
				-<TT>m</TT>.
			
 
				-<TT>U</TT>
			
 
				-is a pointer to the structure representing the currently running process
			
 
				-and
			
 
				-<TT>m</TT>
			
 
				-is a pointer to the per-machine data structure.
			
 
				-<H4>3.7 Long long
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compilers accept
			
 
				-<TT>long</TT>
			
 
				-<TT>long</TT>
			
 
				-as a basic type meaning 64-bit integer.
			
 
				-On all of the machines
			
 
				-this type is synthesized from 32-bit instructions.
			
 
				-<H4>3.8 Pragma
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compilers accept
			
 
				-<TT>#pragma</TT>
			
 
				-<TT>lib</TT>
			
 
				-<I>libname</I>
			
 
				-and pass the
			
 
				-library name string uninterpreted
			
 
				-to the loader.
			
 
				-The loader uses the library name to
			
 
				-find libraries to load.
			
 
				-If the name contains
			
 
				-<TT>%O</TT>,
			
 
				-it is replaced with
			
 
				-the single character object type of the compiler
			
 
				-(e.g.,
			
 
				-<TT>v</TT>
			
 
				-for the MIPS).
			
 
				-If the name contains
			
 
				-<TT>%M</TT>,
			
 
				-it is replaced with
			
 
				-the architecture type for the compiler
			
 
				-(e.g.,
			
 
				-<TT>mips</TT>
			
 
				-for the MIPS).
			
 
				-If the name starts with
			
 
				-<TT>/</TT>
			
 
				-it is an absolute pathname;
			
 
				-if it starts with
			
 
				-<TT>.</TT>
			
 
				-then it is searched for in the loader's current directory.
			
 
				-Otherwise, the name is searched from
			
 
				-<TT>/%M/lib</TT>.
			
 
				-Such
			
 
				-<TT>#pragma</TT>
			
 
				-statements in header files guarantee that the correct
			
 
				-libraries are always linked with a program without the
			
 
				-need to specify them explicitly at link time.
			
 
				-<br>&#32;<br>
			
 
				-They also accept
			
 
				-<TT>#pragma</TT>
			
 
				-<TT>hjdicks</TT>
			
 
				-<TT>on</TT>
			
 
				-(or
			
 
				-<TT>yes</TT>
			
 
				-or
			
 
				-<TT>1</TT>)
			
 
				-to cause subsequently declared data, until
			
 
				-<TT>#pragma</TT>
			
 
				-<TT>hjdicks</TT>
			
 
				-<TT>off</TT>
			
 
				-(or
			
 
				-<TT>no</TT>
			
 
				-or
			
 
				-<TT>0</TT>),
			
 
				-to be laid out in memory tightly packed in successive bytes, disregarding
			
 
				-the usual alignment rules.
			
 
				-Accessing such data can cause faults.
			
 
				-<br>&#32;<br>
			
 
				-Similarly, 
			
 
				-<TT>#pragma</TT>
			
 
				-<TT>profile</TT>
			
 
				-<TT>off</TT>
			
 
				-(or
			
 
				-<TT>no</TT>
			
 
				-or
			
 
				-<TT>0</TT>)
			
 
				-causes subsequently declared functions, until
			
 
				-<TT>#pragma</TT>
			
 
				-<TT>profile</TT>
			
 
				-<TT>on</TT>
			
 
				-(or
			
 
				-<TT>yes</TT>
			
 
				-or
			
 
				-<TT>1</TT>),
			
 
				-to be marked as unprofiled.
			
 
				-Such functions will not be profiled when 
			
 
				-profiling is enabled for the rest of the program.
			
 
				-<br>&#32;<br>
			
 
				-Two
			
 
				-<TT>#pragma</TT>
			
 
				-statements allow type-checking of
			
 
				-<TT>print</TT>-like
			
 
				-functions.
			
 
				-The first, of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#pragma varargck argpos error 2
			
 
				-</PRE></TT></DL>
			
 
				-tells the compiler that the second argument to
			
 
				-<TT>error</TT>
			
 
				-is a
			
 
				-<TT>print</TT>
			
 
				-format string (see the manual page
			
 
				-<A href="/magic/man2html/2/print"><I>print</I>(2))
			
 
				-</A>that specifies how to format
			
 
				-<TT>error</TT>'s
			
 
				-subsequent arguments.
			
 
				-The second, of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#pragma varargck type "s" char*
			
 
				-</PRE></TT></DL>
			
 
				-says that the
			
 
				-<TT>print</TT>
			
 
				-format verb
			
 
				-<TT>s</TT>
			
 
				-processes an argument of
			
 
				-type
			
 
				-<TT>char*</TT>.
			
 
				-If the compiler's
			
 
				-<TT>-F</TT>
			
 
				-option is enabled, the compiler will use this information
			
 
				-to report type violations in the arguments to
			
 
				-<TT>print</TT>,
			
 
				-<TT>error</TT>,
			
 
				-and similar routines.
			
 
				-<H4>4 Object module conventions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The overall conventions of the runtime environment
			
 
				-are important
			
 
				-to runtime efficiency.
			
 
				-In this section,
			
 
				-several of these conventions are discussed.
			
 
				-<H4>4.1 Register saving
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In the Plan 9 compilers,
			
 
				-the caller of a procedure saves the registers.
			
 
				-With caller-saves,
			
 
				-the leaf procedures can use all the
			
 
				-registers and never save them.
			
 
				-If you spend a lot of time at the leaves,
			
 
				-this seems preferable.
			
 
				-With callee-saves,
			
 
				-the saving of the registers is done
			
 
				-in the single point of entry and return.
			
 
				-If you are interested in space,
			
 
				-this seems preferable.
			
 
				-In both,
			
 
				-there is a degree of uncertainty
			
 
				-about what registers need to be saved.
			
 
				-Callee-saved registers make it difficult to
			
 
				-find variables in registers in debuggers.
			
 
				-Callee-saved registers also complicate
			
 
				-the implementation of
			
 
				-<TT>longjmp</TT>.
			
 
				-The convincing argument is
			
 
				-that with caller-saves,
			
 
				-the decision to registerize a variable
			
 
				-can include the cost of saving the register
			
 
				-across calls.
			
 
				-For a further discussion of caller- vs. callee-saves,
			
 
				-see the paper by Davidson and Whalley [Dav91].
			
 
				-<br>&#32;<br>
			
 
				-In the Plan 9 operating system,
			
 
				-calls to the kernel look like normal procedure
			
 
				-calls, which means
			
 
				-the caller
			
 
				-has saved the registers and the system
			
 
				-entry does not have to.
			
 
				-This makes system calls considerably faster.
			
 
				-Since this is a potential security hole,
			
 
				-and can lead to non-determinism,
			
 
				-the system may eventually save the registers
			
 
				-on entry,
			
 
				-or more likely clear the registers on return.
			
 
				-<H4>4.2 Calling convention
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Older C compilers maintain a frame pointer, which is at a known constant
			
 
				-offset from the stack pointer within each function.
			
 
				-For machines where the stack grows towards zero,
			
 
				-the argument pointer is at a known constant offset
			
 
				-from the frame pointer.
			
 
				-Since the stack grows down in Plan 9,
			
 
				-the Plan 9 compilers
			
 
				-keep neither an
			
 
				-explicit frame pointer nor
			
 
				-an explicit argument pointer;
			
 
				-instead they generate addresses relative to the stack pointer.
			
 
				-<br>&#32;<br>
			
 
				-On some architectures, the first argument to a subroutine is passed in a register.
			
 
				-<H4>4.3 Functions returning structures
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Structures longer than one word are awkward to implement
			
 
				-since they do not fit in registers and must
			
 
				-be passed around in memory.
			
 
				-Functions that return structures
			
 
				-are particularly clumsy.
			
 
				-The Plan 9 compilers pass the return address of
			
 
				-a structure as the first argument of a
			
 
				-function that has a structure return value.
			
 
				-Thus
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	x = f(...)
			
 
				-</PRE></TT></DL>
			
 
				-is rewritten as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	f(&amp;x, ...).
			
 
				-</PRE></TT></DL>
			
 
				-This saves a copy and makes the compilation
			
 
				-much less clumsy.
			
 
				-A disadvantage is that if you call this
			
 
				-function without an assignment,
			
 
				-a dummy location must be invented.
			
 
				-<br>&#32;<br>
			
 
				-There is also a danger of calling a function
			
 
				-that returns a structure without declaring
			
 
				-it as such.
			
 
				-With ANSI C function prototypes,
			
 
				-this error need never occur.
			
 
				-<H4>5 Implementation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compiler is divided internally into
			
 
				-four machine-independent passes,
			
 
				-four machine-dependent passes,
			
 
				-and an output pass.
			
 
				-The next nine sections describe each pass in order.
			
 
				-<H4>5.1 Parsing
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The first pass is a YACC-based parser
			
 
				-[Joh79].
			
 
				-Declarations are interpreted immediately,
			
 
				-building a block structured symbol table.
			
 
				-Executable statements are put into a parse tree
			
 
				-and collected,
			
 
				-without interpretation.
			
 
				-At the end of each procedure,
			
 
				-the parse tree for the function is
			
 
				-examined by the other passes of the compiler.
			
 
				-<br>&#32;<br>
			
 
				-The input stream of the parser is
			
 
				-a pushdown list of input activations.
			
 
				-The preprocessor
			
 
				-expansions of
			
 
				-macros
			
 
				-and
			
 
				-<TT>#include</TT>
			
 
				-are implemented as pushdowns.
			
 
				-Thus there is no separate
			
 
				-pass for preprocessing.
			
 
				-<H4>5.2 Typing
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The next pass distributes typing information
			
 
				-to every node of the tree.
			
 
				-Implicit operations on the tree are added,
			
 
				-such as type promotions and taking the
			
 
				-address of arrays and functions.
			
 
				-<H4>5.3 Machine-independent optimization
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The next pass performs optimizations
			
 
				-and transformations of the tree, such as converting
			
 
				-<TT>&*x</TT>
			
 
				-and
			
 
				-<TT>*&x</TT>
			
 
				-into
			
 
				-<TT>x</TT>.
			
 
				-Constant expressions are converted to constants in this pass.
			
 
				-<H4>5.4 Arithmetic rewrites
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-This is another machine-independent optimization.
			
 
				-Subtrees of add, subtract, and multiply of integers are
			
 
				-rewritten for easier compilation.
			
 
				-The major transformation is factoring:
			
 
				-<TT>4+8*a+16*b+5</TT>
			
 
				-is transformed into
			
 
				-<TT>9+8*(a+2*b)</TT>.
			
 
				-Such expressions arise from address
			
 
				-manipulation and array indexing.
			
 
				-<H4>5.5 Addressability
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-This is the first of the machine-dependent passes.
			
 
				-The addressability of a processor is defined as the set of
			
 
				-expressions that is legal in the address field
			
 
				-of a machine language instruction.
			
 
				-The addressability of different processors varies widely.
			
 
				-At one end of the spectrum are the 68020 and VAX,
			
 
				-which allow a complex mix of incrementing,
			
 
				-decrementing,
			
 
				-indexing, and relative addressing.
			
 
				-At the other end is the MIPS,
			
 
				-which allows only registers and constant offsets from the
			
 
				-contents of a register.
			
 
				-The addressability can be different for different instructions
			
 
				-within the same processor.
			
 
				-<br>&#32;<br>
			
 
				-It is important to the code generator to know when a
			
 
				-subtree represents an address of a particular type.
			
 
				-This is done with a bottom-up walk of the tree.
			
 
				-In this pass, the leaves are labeled with small integers.
			
 
				-When an internal node is encountered,
			
 
				-it is labeled by consulting a table indexed by the
			
 
				-labels on the left and right subtrees.
			
 
				-For example,
			
 
				-on the 68020 processor,
			
 
				-it is possible to address an
			
 
				-offset from a named location.
			
 
				-In C, this is represented by the expression
			
 
				-<TT>*(&name+constant)</TT>.
			
 
				-This is marked addressable by the following table.
			
 
				-In the table,
			
 
				-a node represented by the left column is marked
			
 
				-with a small integer from the right column.
			
 
				-Marks of the form
			
 
				-<TT>A<small><small><sub>i</sub></small></small></TT>
			
 
				-are addressable while
			
 
				-marks of the form
			
 
				-<TT>N<small><small><sub>i</sub></small></small></TT>
			
 
				-are not addressable.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	Node	Marked
			
 
				-	name	A<small><small><sub>1</sub></small></small>
			
 
				-	const	A<small><small><sub>2</sub></small></small>
			
 
				-	&amp;A<small><small><sub>1</sub></small></small>	A<small><small><sub>3</sub></small></small>
			
 
				-	A<small><small><sub>3</sub></small></small>+A<small><small><sub>1</sub></small></small>	N<small><small><sub>1</sub></small></small> (note that this is not addressable)
			
 
				-	*N<small><small><sub>1</sub></small></small>	A<small><small><sub>4</sub></small></small>
			
 
				-</PRE></TT></DL>
			
 
				-Here there is a distinction between
			
 
				-a node marked
			
 
				-<TT>A<small><small><sub>1</sub></small></small></TT>
			
 
				-and a node marked
			
 
				-<TT>A<small><small><sub>4</sub></small></small></TT>
			
 
				-because the address operator of an
			
 
				-<TT>A<small><small><sub>4</sub></small></small></TT>
			
 
				-node is not addressable.
			
 
				-So to extend the table:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	Node	Marked
			
 
				-	&amp;A<small><small><sub>4</sub></small></small>	N<small><small><sub>2</sub></small></small>
			
 
				-	N<small><small><sub>2</sub></small></small>+N<small><small><sub>1</sub></small></small>	N<small><small><sub>1</sub></small></small>
			
 
				-</PRE></TT></DL>
			
 
				-The full addressability of the 68020 is expressed
			
 
				-in 18 rules like this,
			
 
				-while the addressability of the MIPS is expressed
			
 
				-in 11 rules.
			
 
				-When one ports the compiler,
			
 
				-this table is usually initialized
			
 
				-so that leaves are labeled as addressable and nothing else.
			
 
				-The code produced is poor,
			
 
				-but porting is easy.
			
 
				-The table can be extended later.
			
 
				-<br>&#32;<br>
			
 
				-This pass also rewrites some complex operators
			
 
				-into procedure calls.
			
 
				-Examples include 64-bit multiply and divide.
			
 
				-<br>&#32;<br>
			
 
				-In the same bottom-up pass of the tree,
			
 
				-the nodes are labeled with a Sethi-Ullman complexity
			
 
				-[Set70].
			
 
				-This number is roughly the number of registers required
			
 
				-to compile the tree on an ideal machine.
			
 
				-An addressable node is marked 0.
			
 
				-A function call is marked infinite.
			
 
				-A unary operator is marked as the
			
 
				-maximum of 1 and the mark of its subtree.
			
 
				-A binary operator with equal marks on its subtrees is
			
 
				-marked with a subtree mark plus 1.
			
 
				-A binary operator with unequal marks on its subtrees is
			
 
				-marked with the maximum mark of its subtrees.
			
 
				-The actual values of the marks are not too important,
			
 
				-but the relative values are.
			
 
				-The goal is to compile the harder
			
 
				-(larger mark)
			
 
				-subtree first.
			
 
				-<H4>5.6 Code generation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Code is generated by recursive
			
 
				-descent.
			
 
				-The Sethi-Ullman complexity completely guides the
			
 
				-order.
			
 
				-The addressability defines the leaves.
			
 
				-The only difficult part is compiling a tree
			
 
				-that has two infinite (function call)
			
 
				-subtrees.
			
 
				-In this case,
			
 
				-one subtree is compiled into the return register
			
 
				-(usually the most convenient place for a function call)
			
 
				-and then stored on the stack.
			
 
				-The other subtree is compiled into the return register
			
 
				-and then the operation is compiled with
			
 
				-operands from the stack and the return register.
			
 
				-<br>&#32;<br>
			
 
				-There is a separate boolean code generator that compiles
			
 
				-conditional expressions.
			
 
				-This is fundamentally different from compiling an arithmetic expression.
			
 
				-The result of the boolean code generator is the
			
 
				-position of the program counter and not an expression.
			
 
				-The boolean code generator makes extensive use of De Morgan's rule.
			
 
				-The boolean code generator is an expanded version of that described
			
 
				-in chapter 8 of Aho, Sethi, and Ullman
			
 
				-[Aho87].
			
 
				-<br>&#32;<br>
			
 
				-There is a considerable amount of talk in the literature
			
 
				-about automating this part of a compiler with a machine
			
 
				-description.
			
 
				-Since this code generator is so small
			
 
				-(less than 500 lines of C)
			
 
				-and easy,
			
 
				-it hardly seems worth the effort.
			
 
				-<H4>5.7 Registerization
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Up to now,
			
 
				-the compiler has operated on syntax trees
			
 
				-that are roughly equivalent to the original source language.
			
 
				-The previous pass has produced machine language in an internal
			
 
				-format.
			
 
				-The next two passes operate on the internal machine language
			
 
				-structures.
			
 
				-The purpose of the next pass is to reintroduce
			
 
				-registers for heavily used variables.
			
 
				-<br>&#32;<br>
			
 
				-All of the variables that can be
			
 
				-potentially registerized within a procedure are
			
 
				-placed in a table.
			
 
				-(Suitable variables are any automatic or external
			
 
				-scalars that do not have their addresses extracted.
			
 
				-Some constants that are hard to reference are also
			
 
				-considered for registerization.)
			
 
				-Four separate data flow equations are evaluated
			
 
				-over the procedure on all of these variables.
			
 
				-Two of the equations are the normal set-behind
			
 
				-and used-ahead
			
 
				-bits that define the life of a variable.
			
 
				-The two new bits tell if a variable life
			
 
				-crosses a function call ahead or behind.
			
 
				-By examining a variable over its lifetime,
			
 
				-it is possible to get a cost
			
 
				-for registerizing.
			
 
				-Loops are detected and the costs are multiplied
			
 
				-by three for every level of loop nesting.
			
 
				-Costs are sorted and the variables
			
 
				-are replaced by available registers on a greedy basis.
			
 
				-<br>&#32;<br>
			
 
				-The 68020 has two different
			
 
				-types of registers.
			
 
				-For the 68020,
			
 
				-two different costs are calculated for
			
 
				-each variable life and the register type that
			
 
				-affords the better cost is used.
			
 
				-Ties are broken by counting the number of available
			
 
				-registers of each type.
			
 
				-<br>&#32;<br>
			
 
				-Note that externals are registerized together with automatics.
			
 
				-This is done by evaluating the semantics of a ``call'' instruction
			
 
				-differently for externals and automatics.
			
 
				-Since a call goes outside the local procedure,
			
 
				-it is assumed that a call references all externals.
			
 
				-Similarly,
			
 
				-externals are assumed to be set before an ``entry'' instruction
			
 
				-and assumed to be referenced after a ``return'' instruction.
			
 
				-This makes sure that externals are in memory across calls.
			
 
				-<br>&#32;<br>
			
 
				-The overall results are satisfactory.
			
 
				-It would be nice to be able to do this processing in
			
 
				-a machine-independent way,
			
 
				-but it is impossible to get all of the costs and
			
 
				-side effects of different choices by examining the parse tree.
			
 
				-<br>&#32;<br>
			
 
				-Most of the code in the registerization pass is machine-independent.
			
 
				-The major machine-dependency is in
			
 
				-examining a machine instruction to ask if it sets or references
			
 
				-a variable.
			
 
				-<H4>5.8 Machine code optimization
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The next pass walks the machine code
			
 
				-for opportunistic optimizations.
			
 
				-For the most part,
			
 
				-this is highly specific to a particular
			
 
				-processor.
			
 
				-One optimization that is performed
			
 
				-on all of the processors is the
			
 
				-removal of unnecessary ``move''
			
 
				-instructions.
			
 
				-Ironically,
			
 
				-most of these instructions were inserted by
			
 
				-the previous pass.
			
 
				-There are two patterns that are repetitively
			
 
				-matched and replaced until no more matches are
			
 
				-found.
			
 
				-The first tries to remove ``move'' instructions
			
 
				-by relabeling variables.
			
 
				-<br>&#32;<br>
			
 
				-When a ``move'' instruction is encountered,
			
 
				-if the destination variable is set before the
			
 
				-source variable is referenced,
			
 
				-then all of the references to the destination
			
 
				-variable can be renamed to the source and the ``move''
			
 
				-can be deleted.
			
 
				-This transformation uses the reverse data flow
			
 
				-set up in the previous pass.
			
 
				-<br>&#32;<br>
			
 
				-An example of this pattern is depicted in the following
			
 
				-table.
			
 
				-The pattern is in the left column and the
			
 
				-replacement action is in the right column.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	MOVE	a-&gt;b		(remove)
			
 
				-	(sequence with no mention of <TT>a</TT>)
			
 
				-	USE	b		USE	a
			
 
				-	(sequence with no mention of <TT>a</TT>)
			
 
				-	SET	b		SET	b
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-Experiments have shown that it is marginally
			
 
				-worthwhile to rename uses of the destination variable
			
 
				-with uses of the source variable up to
			
 
				-the first use of the source variable.
			
 
				-<br>&#32;<br>
			
 
				-The second transform will do relabeling
			
 
				-without deleting instructions.
			
 
				-When a ``move'' instruction is encountered,
			
 
				-if the source variable has been set prior
			
 
				-to the use of the destination variable
			
 
				-then all of the references to the source
			
 
				-variable are replaced by the destination and
			
 
				-the ``move'' is inverted.
			
 
				-Typically,
			
 
				-this transformation will alter two ``move''
			
 
				-instructions and allow the first transformation
			
 
				-another chance to remove code.
			
 
				-This transformation uses the forward data flow
			
 
				-set up in the previous pass.
			
 
				-<br>&#32;<br>
			
 
				-Again,
			
 
				-the following is a depiction of the transformation where
			
 
				-the pattern is in the left column and the
			
 
				-rewrite is in the right column.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	SET	a		SET	b
			
 
				-	(sequence with no use of <TT>b</TT>)
			
 
				-	USE	a		USE	b
			
 
				-	(sequence with no use of <TT>b</TT>)
			
 
				-	MOVE	a-&gt;b		MOVE	b-&gt;a
			
 
				-</PRE></TT></DL>
			
 
				-Iterating these transformations
			
 
				-will usually get rid of all redundant ``move'' instructions.
			
 
				-<br>&#32;<br>
			
 
				-A problem with this organization is that the costs
			
 
				-of registerization calculated in the previous pass
			
 
				-must depend on how well this pass can detect and remove
			
 
				-redundant instructions.
			
 
				-Often,
			
 
				-a fine candidate for registerization is rejected
			
 
				-because of the cost of instructions that are later
			
 
				-removed.
			
 
				-<H4>5.9 Writing the object file
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The last pass walks the internal assembly language
			
 
				-and writes the object file.
			
 
				-The object file is reduced in size by about a factor
			
 
				-of three with simple compression
			
 
				-techniques.
			
 
				-The most important aspect of the object file
			
 
				-format is that it is independent of the compiling machine.
			
 
				-All integer and floating numbers in the object
			
 
				-code are converted to known formats and byte
			
 
				-orders.
			
 
				-<H4>6 The loader
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The loader is a multiple pass program that
			
 
				-reads object files and libraries and produces
			
 
				-an executable binary.
			
 
				-The loader also does some minimal
			
 
				-optimizations and code rewriting.
			
 
				-Many of the operations performed by the
			
 
				-loader are machine-dependent.
			
 
				-<br>&#32;<br>
			
 
				-The first pass of the loader reads the
			
 
				-object modules into an internal data
			
 
				-structure that looks like binary assembly language.
			
 
				-As the instructions are read,
			
 
				-code is reordered to remove
			
 
				-unconditional branch instructions.
			
 
				-Conditional branch instructions are inverted
			
 
				-to prevent the insertion of unconditional branches.
			
 
				-The loader will also make a copy of a few instructions
			
 
				-to remove an unconditional branch.
			
 
				-<br>&#32;<br>
			
 
				-The next pass allocates addresses for
			
 
				-all external data.
			
 
				-Typical of processors is the MIPS,
			
 
				-which can reference &#177;32K bytes from a
			
 
				-register.
			
 
				-The loader allocates the register
			
 
				-<TT>R30</TT>
			
 
				-as the static pointer.
			
 
				-The value placed in
			
 
				-<TT>R30</TT>
			
 
				-is the base of the data segment plus 32K.
			
 
				-It is then cheap to reference all data in the
			
 
				-first 64K of the data segment.
			
 
				-External variables are allocated to
			
 
				-the data segment
			
 
				-with the smallest variables allocated first.
			
 
				-If all of the data cannot fit into the first
			
 
				-64K of the data segment,
			
 
				-then usually only a few large arrays
			
 
				-need more expensive addressing modes.
			
 
				-<br>&#32;<br>
			
 
				-For the MIPS processor,
			
 
				-the loader makes a pass over the internal
			
 
				-structures,
			
 
				-exchanging instructions to try
			
 
				-to fill ``delay slots'' with useful work.
			
 
				-If a useful instruction cannot be found
			
 
				-to fill a delay slot,
			
 
				-the loader will insert
			
 
				-``noop''
			
 
				-instructions.
			
 
				-This pass is very expensive and does not
			
 
				-do a good job.
			
 
				-About 40% of all instructions are in
			
 
				-delay slots.
			
 
				-About 65% of these are useful instructions and
			
 
				-35% are ``noops.''
			
 
				-The vendor-supplied assembler does this job
			
 
				-more effectively,
			
 
				-filling about 80%
			
 
				-of the delay slots with useful instructions.
			
 
				-<br>&#32;<br>
			
 
				-On the 68020 processor,
			
 
				-branch instructions come in a variety of
			
 
				-sizes depending on the relative distance
			
 
				-of the branch.
			
 
				-Thus the size of branch instructions
			
 
				-can be mutually dependent.
			
 
				-The loader uses a multiple pass algorithm
			
 
				-to resolve the branch lengths
			
 
				-[Szy78].
			
 
				-Initially, all branches are assumed minimal length.
			
 
				-On each subsequent pass,
			
 
				-the branches are reassessed
			
 
				-and expanded if necessary.
			
 
				-When no more expansions occur,
			
 
				-the locations of the instructions in
			
 
				-the text segment are known.
			
 
				-<br>&#32;<br>
			
 
				-On the MIPS processor,
			
 
				-all instructions are one size.
			
 
				-A single pass over the instructions will
			
 
				-determine the locations of all addresses
			
 
				-in the text segment.
			
 
				-<br>&#32;<br>
			
 
				-The last pass of the loader produces the
			
 
				-executable binary.
			
 
				-A symbol table and other tables are
			
 
				-produced to help the debugger to
			
 
				-interpret the binary symbolically.
			
 
				-<br>&#32;<br>
			
 
				-The loader places absolute source line numbers in the symbol table.
			
 
				-The name and absolute line number of all
			
 
				-<TT>#include</TT>
			
 
				-files is also placed in the
			
 
				-symbol table so that the debuggers can
			
 
				-associate object code to source files.
			
 
				-<H4>7 Performance
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The following is a table of the source size of the MIPS
			
 
				-compiler.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	lines	module
			
 
				-	 509	machine-independent headers
			
 
				-	1070	machine-independent YACC source
			
 
				-	6090	machine-independent C source
			
 
				-
			
 
				-	 545	machine-dependent headers
			
 
				-	6532	machine-dependent C source
			
 
				-
			
 
				-	 298	loader headers
			
 
				-	5215	loader C source
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-The following table shows timing
			
 
				-of a test program
			
 
				-that plays checkers, running on a MIPS R4000.
			
 
				-The test program is 26 files totaling 12600 lines of C.
			
 
				-The execution time does not significantly
			
 
				-depend on library implementation.
			
 
				-Since no other compiler runs on Plan 9,
			
 
				-the Plan 9 tests were done with the Plan 9 operating system;
			
 
				-the other tests were done on the vendor's operating system.
			
 
				-The hardware was identical in both cases.
			
 
				-The optimizer in the vendor's compiler
			
 
				-is reputed to be extremely good.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	  4.49s	Plan 9 <TT>vc</TT> <TT>-N</TT> compile time (opposite of <TT>-O</TT>)
			
 
				-	  1.72s	Plan 9 <TT>vc</TT> <TT>-N</TT> load time
			
 
				-	148.69s	Plan 9 <TT>vc</TT> <TT>-N</TT> run time
			
 
				-
			
 
				-	 15.07s	Plan 9 <TT>vc</TT> compile time (<TT>-O</TT> implicit)
			
 
				-	  1.66s	Plan 9 <TT>vc</TT> load time
			
 
				-	 89.96s	Plan 9 <TT>vc</TT> run time
			
 
				-
			
 
				-	 14.83s	vendor <TT>cc</TT> compile time
			
 
				-	  0.38s	vendor <TT>cc</TT> load time
			
 
				-	104.75s	vendor <TT>cc</TT> run time
			
 
				-
			
 
				-	 43.59s	vendor <TT>cc</TT> <TT>-O</TT> compile time
			
 
				-	  0.38s	vendor <TT>cc</TT> <TT>-O</TT> load time
			
 
				-	 76.19s	vendor <TT>cc</TT> <TT>-O</TT> run time
			
 
				-
			
 
				-	  8.19s	vendor <TT>cc</TT> <TT>-O3</TT> compile time
			
 
				-	 35.97s	vendor <TT>cc</TT> <TT>-O3</TT> load time
			
 
				-	 71.16s	vendor <TT>cc</TT> <TT>-O3</TT> run time
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-To compare the Intel compiler,
			
 
				-a program that is about 40% bit manipulation and
			
 
				-about 60% single precision floating point was
			
 
				-run on the same 33 MHz 486, once under Windows
			
 
				-compiled with the Watcom compiler, version 10.0,
			
 
				-in 16-bit mode and once under
			
 
				-Plan 9 in 32-bit mode.
			
 
				-The Plan 9 execution time was 27 sec while the Windows
			
 
				-execution time was 31 sec.
			
 
				-<H4>8 Conclusions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The new compilers compile
			
 
				-quickly,
			
 
				-load slowly,
			
 
				-and produce
			
 
				-medium quality
			
 
				-object code.
			
 
				-The compilers are relatively
			
 
				-portable,
			
 
				-requiring but a couple of weeks' work to
			
 
				-produce a compiler for a different computer.
			
 
				-For Plan 9,
			
 
				-where we needed several compilers
			
 
				-with specialized features and
			
 
				-our own object formats,
			
 
				-this project was indispensable.
			
 
				-It is also necessary for us to
			
 
				-be able to freely distribute our compilers
			
 
				-with the Plan 9 distribution.
			
 
				-<br>&#32;<br>
			
 
				-Two problems have come up in retrospect.
			
 
				-The first has to do with the
			
 
				-division of labor between compiler and loader.
			
 
				-Plan 9 runs on multi-processors and as such
			
 
				-compilations are often done in parallel.
			
 
				-Unfortunately,
			
 
				-all compilations must be complete before loading
			
 
				-can begin.
			
 
				-The load is then single-threaded.
			
 
				-With this model,
			
 
				-any shift of work from compile to load
			
 
				-results in a significant increase in real time.
			
 
				-The same is true of libraries that are compiled
			
 
				-infrequently and loaded often.
			
 
				-In the future,
			
 
				-we may try to put some of the loader work
			
 
				-back into the compiler.
			
 
				-<br>&#32;<br>
			
 
				-The second problem comes from
			
 
				-the various optimizations performed over several
			
 
				-passes.
			
 
				-Often optimizations in different passes depend
			
 
				-on each other.
			
 
				-Iterating the passes could compromise efficiency,
			
 
				-or even loop.
			
 
				-We see no real solution to this problem.
			
 
				-<H4>9 References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Aho87] A. V. Aho, R. Sethi, and J. D. Ullman,
			
 
				-Compilers - Principles, Techniques, and Tools,
			
 
				-Addison Wesley,
			
 
				-Reading, MA,
			
 
				-1987.
			
 
				-<br>&#32;<br>
			
 
				-[ANSI90] <I>American National Standard for Information Systems -
			
 
				-Programming Language C</I>, American National Standards Institute, Inc.,
			
 
				-New York, 1990.
			
 
				-<br>&#32;<br>
			
 
				-[Dav91] J. W. Davidson and D. B. Whalley,
			
 
				-``Methods for Saving and Restoring Register Values across Function Calls'',
			
 
				-Software-Practice and Experience,
			
 
				-Vol 21(2), pp. 149-165, February 1991.
			
 
				-<br>&#32;<br>
			
 
				-[Joh79] S. C. Johnson,
			
 
				-``YACC - Yet Another Compiler Compiler'',
			
 
				-UNIX Programmer's Manual, Seventh Ed., Vol. 2A,
			
 
				-AT&amp;T Bell Laboratories,
			
 
				-Murray Hill, NJ,
			
 
				-1979.
			
 
				-<br>&#32;<br>
			
 
				-[Set70] R. Sethi and J. D. Ullman,
			
 
				-``The Generation of Optimal Code for Arithmetic Expressions'',
			
 
				-Journal of the ACM,
			
 
				-Vol 17(4), pp. 715-728, 1970.
			
 
				-<br>&#32;<br>
			
 
				-[Szy78] T. G. Szymanski,
			
 
				-``Assembling Code for Machines with Span-dependent Instructions'',
			
 
				-Communications of the ACM,
			
 
				-Vol 21(4), pp. 300-308, 1978.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/fs/fs.html
+++ b/sys/doc/fs/fs.html
@@ -1,837 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The Plan 9 File Server
			
 
				-</H1>
			
 
				-<DL><DD><I>Ken Thompson<br>
			
 
				-ken@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-This paper describes the structure
			
 
				-and the operation of Plan 9 file servers.
			
 
				-The specifics apply to
			
 
				-our main Plan 9 file server
			
 
				-Emelie,
			
 
				-but
			
 
				-the code is also the basis for
			
 
				-the user level file server
			
 
				-<TT>kfs</TT>.
			
 
				-</DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Plan 9 file server
			
 
				-Emelie
			
 
				-is the oldest piece of system software
			
 
				-still in use on Plan 9.
			
 
				-It evolved from a user-level program that served
			
 
				-serial lines on a Sequent multi-processor.
			
 
				-The current implementation is neither clean nor
			
 
				-portable,
			
 
				-but it has slowly come to terms with
			
 
				-its particular set of cranky computers
			
 
				-and devices.
			
 
				-</P>
			
 
				-<H4>Process Structure
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Plan 9 file system server is made from
			
 
				-an ancient version of the Plan 9 kernel.
			
 
				-The kernel contains process control,
			
 
				-synchronization,
			
 
				-locks,
			
 
				-and some memory
			
 
				-allocation.
			
 
				-The kernel has no user processes or
			
 
				-virtual memory.
			
 
				-</P>
			
 
				-<P>
			
 
				-The structure of the file system server
			
 
				-is a set of kernel processes
			
 
				-synchronizing mostly through message passing.
			
 
				-In Emelie there are 26 processes of 10 types:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-number name  function
			
 
				-  15       <TT>srv</TT>   Main file system server processes
			
 
				-   1       <TT>rah</TT>   Block read-ahead processes
			
 
				-  h'w'0'u'1       <TT>scp</TT>   Sync process
			
 
				-  h'w'0'u'1       <TT>wcp</TT>   WORM copy process
			
 
				-  h'w'0'u'1       <TT>con</TT>   Console process
			
 
				-  h'w'0'u'1       <TT>ilo</TT>   IL protocol process
			
 
				-  h'w'0'u'1       <TT>ilt</TT>   IL timer process
			
 
				-  h'w'0'u'2       <TT>ethi</TT>   Ethernet input process
			
 
				-  h'w'0'u'2       <TT>etho</TT>   Ethernet output process
			
 
				-  h'w'0'u'1       <TT>flo</TT>   Floppy disk process
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>The server processes
			
 
				-</H4>
			
 
				-<P>
			
 
				-The main file system algorithm is a set
			
 
				-of identical processes
			
 
				-named
			
 
				-<TT>srv</TT>
			
 
				-that honor the
			
 
				-9P protocol.
			
 
				-Each file system process waits on
			
 
				-a message queue for an incoming request.
			
 
				-The request contains a 9P message and
			
 
				-the address of a reply queue.
			
 
				-A
			
 
				-<TT>srv</TT>
			
 
				-process parses the message,
			
 
				-performs pseudo-disk I/O
			
 
				-to the corresponding file system block device,
			
 
				-formulates a response,
			
 
				-and sends the
			
 
				-response back to the reply queue.
			
 
				-</P>
			
 
				-<P>
			
 
				-The unit of storage is a
			
 
				-block of data on a device:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    enum
			
 
				-    {
			
 
				-        RBUFSIZE = 16*1024
			
 
				-    };
			
 
				-
			
 
				-    typedef
			
 
				-    struct
			
 
				-    {
			
 
				-        short   pad;
			
 
				-        short	tag;
			
 
				-        long	path;
			
 
				-    } Tag;
			
 
				-
			
 
				-    enum
			
 
				-    {
			
 
				-        BUFSIZE = RBUFSIZE - sizeof(Tag)
			
 
				-    };
			
 
				-
			
 
				-    typedef
			
 
				-    struct
			
 
				-    {
			
 
				-        uchar   data[BUFSIZE];
			
 
				-        Tag     tag;
			
 
				-    } Block;
			
 
				-</PRE></TT></DL>
			
 
				-All devices are idealized as a perfect disk
			
 
				-of contiguously numbered blocks each of size
			
 
				-<TT>RBUFSIZE</TT>.
			
 
				-Each block has a tag that identifies what type
			
 
				-of block it is and a unique id of the file or directory
			
 
				-where this block resides.
			
 
				-The remaining data in the block depends on
			
 
				-what type of block it is.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>srv</TT>
			
 
				-process's main data structure is the directory entry.
			
 
				-This is the equivalent of a UNIX i-node and
			
 
				-defines the set of block addresses that comprise a file or directory.
			
 
				-Unlike the i-node,
			
 
				-the directory entry also has the name of the
			
 
				-file or directory in it:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    enum
			
 
				-    {
			
 
				-        NAMELEN = 28,
			
 
				-        NDBLOCK = 6
			
 
				-    };
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    typedef
			
 
				-    struct
			
 
				-    {
			
 
				-        char    name[NAMELEN];
			
 
				-        short   uid;
			
 
				-        short   gid;
			
 
				-        ushort  mode;
			
 
				-        short   wuid;
			
 
				-        Qid     qid;
			
 
				-        long    size;
			
 
				-        long    dblock[NDBLOCK];
			
 
				-        long    iblock;
			
 
				-        long    diblock;
			
 
				-        long    atime;
			
 
				-        long    mtime;
			
 
				-    } Dentry;
			
 
				-</PRE></TT></DL>
			
 
				-Each directory entry holds the file or directory
			
 
				-name, protection mode, access times, user-id, group-id, and addressing
			
 
				-information.
			
 
				-The entry
			
 
				-<TT>wuid</TT>
			
 
				-is the user-id of the last writer of the file
			
 
				-and
			
 
				-<TT>size</TT>
			
 
				-is the size of the file in bytes.
			
 
				-The first 6
			
 
				-blocks of the file are held in the
			
 
				-<TT>dblock</TT>
			
 
				-array.
			
 
				-If the file is larger than that,
			
 
				-an indirect block is allocated that holds
			
 
				-the next
			
 
				-<TT>BUFSIZE/sizeof(long)</TT>
			
 
				-blocks of the file.
			
 
				-The indirect block address is held in the structure member
			
 
				-<TT>iblock</TT>.
			
 
				-If the file is larger yet,
			
 
				-then there is a double indirect block that points
			
 
				-at indirect blocks.
			
 
				-The double indirect address is held in
			
 
				-<TT>diblock</TT>
			
 
				-and can point at another
			
 
				-<TT>(BUFSIZE/sizeof(long))<sup>2</sup></TT>
			
 
				-blocks of data.
			
 
				-The maximum addressable size of a file is
			
 
				-therefore 275 Gbytes.
			
 
				-There is a tighter restriction of
			
 
				-2<sup>32</sup>
			
 
				-bytes because the length of a file is maintained in
			
 
				-a long.
			
 
				-Even so,
			
 
				-sloppy use of long arithmetic restricts the length to
			
 
				-2<sup>31</sup>
			
 
				-bytes.
			
 
				-These numbers are based on Emelie
			
 
				-which has a block size of 16K and
			
 
				-<TT>sizeof(long)</TT>
			
 
				-is 4.
			
 
				-It would be different if the size of a block
			
 
				-changed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The declarations of the indirect and double indirect blocks
			
 
				-are as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    enum
			
 
				-    {
			
 
				-        INDPERBUF = BUFSIZE/sizeof(long),
			
 
				-    };
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    typedef
			
 
				-    {
			
 
				-        long    dblock[INDPERBUF];
			
 
				-        Tag     ibtag;
			
 
				-    } Iblock;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    typedef
			
 
				-    {
			
 
				-        long    iblock[INDPERBUF];
			
 
				-        Tag     dibtag;
			
 
				-    } Diblock;
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The root of a file system is a single directory entry
			
 
				-at a known block address.
			
 
				-A directory is a file that consists of a list of
			
 
				-directory entries.
			
 
				-To make access easier,
			
 
				-a directory entry cannot cross blocks.
			
 
				-In Emelie there are 233 directory entries per block.
			
 
				-</P>
			
 
				-<P>
			
 
				-The device on which the blocks reside is implicit
			
 
				-and ultimately comes from the 9P
			
 
				-<TT>attach</TT>
			
 
				-message that specifies the name of the
			
 
				-device containing the root.
			
 
				-</P>
			
 
				-<H4>Buffer Cache
			
 
				-</H4>
			
 
				-<P>
			
 
				-When the file server is
			
 
				-booted,
			
 
				-all of the unused memory is allocated to
			
 
				-a block buffer pool.
			
 
				-There are two major operations on the buffer
			
 
				-pool.
			
 
				-<TT>Getbuf</TT>
			
 
				-will find the buffer associated with a
			
 
				-particular block on a particular device.
			
 
				-The returned buffer is locked so that the
			
 
				-caller has exclusive use.
			
 
				-If the requested buffer is not in the pool,
			
 
				-some other buffer will be relabeled and
			
 
				-the data will be read from the requested device.
			
 
				-<TT>Putbuf</TT>
			
 
				-will unlock a buffer and
			
 
				-if the contents are marked as modified,
			
 
				-the buffer will be written to the device before
			
 
				-the buffer is relabeled.
			
 
				-If there is some special mapping
			
 
				-or CPU cache flushing
			
 
				-that must occur in order for the physical I/O
			
 
				-device to access the buffers,
			
 
				-this is done between
			
 
				-<TT>getbuf</TT>
			
 
				-and
			
 
				-<TT>putbuf</TT>.
			
 
				-The contents of a buffer is never touched
			
 
				-except while it is locked between
			
 
				-<TT>getbuf</TT>
			
 
				-and
			
 
				-<TT>putbuf</TT>
			
 
				-calls.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-file system server processes
			
 
				-prevent deadlock in the buffers by
			
 
				-always locking parent and child
			
 
				-directory entries in that order.
			
 
				-Since the entire directory structure
			
 
				-is a hierarchy,
			
 
				-this makes the locking well-ordered,
			
 
				-preventing deadlock.
			
 
				-The major problem in the locking strategy is
			
 
				-that locks are at a block level and there are many
			
 
				-directory entries in a single block.
			
 
				-There are unnecessary lock conflicts
			
 
				-in the directory blocks.
			
 
				-When one of these directory blocks is tied up
			
 
				-accessing the very slow WORM,
			
 
				-then all I/O to dozens of unrelated directories
			
 
				-is blocked.
			
 
				-</P>
			
 
				-<H4>Block Devices
			
 
				-</H4>
			
 
				-<P>
			
 
				-The block device I/O system is like a
			
 
				-protocol stack of filters.
			
 
				-There are a set of pseudo-devices that call
			
 
				-recursively to other pseudo-devices and real devices.
			
 
				-The protocol stack is compiled from a configuration
			
 
				-string that specifies the order of pseudo-devices and devices.
			
 
				-Each pseudo-device and device has a set of entry points
			
 
				-that corresponds to the operations that the file system
			
 
				-requires of a device.
			
 
				-The most notable operations are
			
 
				-<TT>read</TT>,
			
 
				-<TT>write</TT>,
			
 
				-and
			
 
				-<TT>size</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The device stack can best be described by
			
 
				-describing the syntax of the configuration string
			
 
				-that specifies the stack.
			
 
				-Configuration strings are used
			
 
				-during the setup of the file system.
			
 
				-For a description see
			
 
				-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8).
			
 
				-</A>In the following recursive definition,
			
 
				-<I>D</I>
			
 
				-represents a
			
 
				-string that specifies a block device.
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><I>D</I> = (<I>DD</I>...)<DD>
			
 
				-<br>
			
 
				-This is a set of devices that
			
 
				-are concatenated to form a single device.
			
 
				-The size of the catenated device is the
			
 
				-sum of the sizes of each sub-device.
			
 
				-<DT><I>D</I> = [<I>DD</I>...]<DD>
			
 
				-<br>
			
 
				-This is the interleaving of the
			
 
				-individual devices.
			
 
				-If there are N devices in the list,
			
 
				-then the pseudo-device is the N-way block
			
 
				-interleaving of the sub-devices.
			
 
				-The size of the interleaved device is
			
 
				-N times the size of the smallest sub-device.
			
 
				-<DT><I>D</I> = <TT>p</TT><I>DN1.N2</I><DD>
			
 
				-<br>
			
 
				-This is a partition of a sub-device.
			
 
				-The sub-device is partitioned into 100 equal pieces.
			
 
				-If the size of the sub-device is not divisible by 100,
			
 
				-then there will be some slop thrown away at the top.
			
 
				-The pseudo-device starts at the N1-th piece and
			
 
				-continues for N2 pieces. Thus
			
 
				-<TT>p<I>D</I>67.33</TT>
			
 
				-will be the
			
 
				-last third of the device
			
 
				-<I>D</I>.
			
 
				-<DT><I>D</I> = <TT>f</TT><I>D</I><DD>
			
 
				-<br>
			
 
				-This is a fake write-once-read-many device simulated by a
			
 
				-second read-write device.
			
 
				-This second device is partitioned
			
 
				-into a set of block flags and a set of blocks.
			
 
				-The flags are used to generate errors if a
			
 
				-block is ever written twice or read without being written first.
			
 
				-<DT><I>D</I> = <TT>c</TT><I>DD</I><DD>
			
 
				-<br>
			
 
				-This is the cache/WORM device made up of a cache (read-write)
			
 
				-device and a WORM (write-once-read-many) device.
			
 
				-More on this later.
			
 
				-<DT><I>D</I> = <TT>o</TT><DD>
			
 
				-<br>
			
 
				-This is the dump file system that is the
			
 
				-two-level hierarchy of all dumps ever taken on a cache/WORM.
			
 
				-The read-only root of the cache/WORM file system
			
 
				-(on the dump taken Feb 18, 1995) can
			
 
				-be referenced as
			
 
				-<TT>/1995/0218</TT>
			
 
				-in this pseudo device.
			
 
				-The second dump taken that day will be
			
 
				-<TT>/1995/02181</TT>.
			
 
				-<DT><I>D</I> = <TT>w</TT><I>N1.N2</I><DD>
			
 
				-<br>
			
 
				-This is a SCSI disk on controller N1 and target N2.
			
 
				-<DT><I>D</I> = <TT>l</TT><I>N1.N2</I><DD>
			
 
				-<br>
			
 
				-This is the same as
			
 
				-<TT>w</TT>,
			
 
				-but one block from the SCSI disk is removed for labeling.
			
 
				-<DT><I>D</I> = <TT>j(</TT><I>D<sub>1</sub></I><I>D<sub>2</sub></I><TT>*)</TT><I>D<sub>3</sub></I><DD>
			
 
				-<br>
			
 
				-<I>D<sub>1</sub></I>
			
 
				-is the juke box SCSI interface.
			
 
				-The
			
 
				-<I>D<sub>2</sub></I>'s
			
 
				-are the SCSI drives in the juke box
			
 
				-and  the
			
 
				-<I>D<sub>3</sub></I>'s
			
 
				-are the demountable platters in the juke box.
			
 
				-<I>D<sub>1</sub></I>
			
 
				-and
			
 
				-<I>D<sub>2</sub></I>
			
 
				-must be
			
 
				-<TT>w</TT>.
			
 
				-<I>D<sub>3</sub></I>
			
 
				-must be pseudo devices of
			
 
				-<TT>w</TT>
			
 
				-or
			
 
				-<TT>l</TT>
			
 
				-devices.
			
 
				-</dl>
			
 
				-<P>
			
 
				-For both
			
 
				-<TT>w</TT>
			
 
				-and
			
 
				-<TT>r</TT>
			
 
				-devices any of the configuration numbers
			
 
				-can be replaced by an iterator of the form
			
 
				-<TT><<I>N1-N2</I>></TT>.
			
 
				-Thus
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    [w0.&#60;2-6&#62;]
			
 
				-</PRE></TT></DL>
			
 
				-is the interleaved SCSI disks on SCSI targets
			
 
				-2 through 6 of SCSI controller 0.
			
 
				-The main file system on
			
 
				-Emelie
			
 
				-is defined by the configuration string
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    c[w1.&#60;0-5&#62;.0]j(w6w5w4w3w2)l(&#60;0-236&#62;l&#60;238-474&#62;)
			
 
				-</PRE></TT></DL>
			
 
				-This is a cache/WORM driver.
			
 
				-The cache is three interleaved disks on SCSI controller 1
			
 
				-targets 0, 1, 2, 3, 4, and 5.
			
 
				-The WORM half of the cache/WORM
			
 
				-is 474 jukebox disks.
			
 
				-</P>
			
 
				-<H4>The read-ahead processes
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are a set of file system processes,
			
 
				-<TT>rah</TT>,
			
 
				-that wait for messages consisting of a device and block
			
 
				-address.
			
 
				-When a message comes in,
			
 
				-the process reads the specified block from the device.
			
 
				-This is done by calling
			
 
				-<TT>getbuf</TT>
			
 
				-and
			
 
				-<TT>putbuf</TT>.
			
 
				-The purpose of this is the hope that these blocks
			
 
				-will be used later and that they will reside in the
			
 
				-buffer cache long enough not to be discarded before
			
 
				-they are used.
			
 
				-</P>
			
 
				-<P>
			
 
				-The messages to the read-ahead processes are
			
 
				-generated by the server processes.
			
 
				-The server processes maintain a relative block mark in every
			
 
				-open file.
			
 
				-Whenever an open file reads that relative block,
			
 
				-the next 110 block addresses of the file are sent
			
 
				-to the read-ahead processes and
			
 
				-the relative block mark is advanced by 100.
			
 
				-The initial relative block is set to 1.
			
 
				-If the file is opened and
			
 
				-only a few bytes are read,
			
 
				-then no anticipating reads are performed
			
 
				-since the relative block mark is set to 1
			
 
				-and only block offset 0 is read.
			
 
				-This is to prevent some
			
 
				-fairly common action such as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    file *
			
 
				-</PRE></TT></DL>
			
 
				-from swamping the file system with read-ahead
			
 
				-requests that will never be used.
			
 
				-</P>
			
 
				-<H4>Cache/WORM Driver
			
 
				-</H4>
			
 
				-<P>
			
 
				-The cache/WORM (cw) driver is by far the
			
 
				-largest and most complicated device driver in the file server.
			
 
				-There are four devices involved in the cw driver.
			
 
				-It implements a read/write pseudo-device (the cw-device)
			
 
				-and a read-only pseudo-device (the dump device)
			
 
				-by performing operations on its two constituent devices
			
 
				-the read-write c-device and the write-once-read-many
			
 
				-w-device.
			
 
				-The block numbers on the four devices are distinct,
			
 
				-although the cw addresses,
			
 
				-dump addresses,
			
 
				-and the w addresses are
			
 
				-highly correlated.
			
 
				-</P>
			
 
				-<P>
			
 
				-The cw-driver uses the w-device as the
			
 
				-stable storage of the file system at the time of the
			
 
				-last dump.
			
 
				-All newly written and a large number of recently used
			
 
				-exact copies of blocks of the w-device are kept on the c-device.
			
 
				-The c-device is much smaller than the w-device and
			
 
				-so the subset of w-blocks that are kept on the c-device are
			
 
				-mapped through a hash table kept on a partition of the c-device.
			
 
				-</P>
			
 
				-<P>
			
 
				-The map portion of the c-device consists of blocks of buckets of entries.
			
 
				-The declarations follow.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    enum
			
 
				-    {
			
 
				-        BKPERBLK = 10,
			
 
				-        CEPERBK  = (BUFSIZE - BKPERBLK*sizeof(long)) /
			
 
				-                   (sizeof(Centry)*BKPERBLK),
			
 
				-    };
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    typedef
			
 
				-    struct
			
 
				-    {
			
 
				-        ushort   age;
			
 
				-        short    state;
			
 
				-        long     waddr;
			
 
				-    } Centry;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    typedef
			
 
				-    struct
			
 
				-    {
			
 
				-        long     agegen;
			
 
				-        Centry   entry[CEPERBK];
			
 
				-    } Bucket;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    Bucket   bucket[BKPERBLK];
			
 
				-</PRE></TT></DL>
			
 
				-There is exactly one entry structure for each block in the
			
 
				-data partition of the c-device.
			
 
				-A bucket contains all of the w-addresses that have
			
 
				-the same hash code.
			
 
				-There are as many buckets as will fit
			
 
				-in a block and enough blocks to have the required
			
 
				-number of entries.
			
 
				-The entries in the bucket are maintained
			
 
				-in FIFO order with an age variable and an incrementing age generator.
			
 
				-When the age generator is about to overflow,
			
 
				-all of the ages in the bucket are rescaled
			
 
				-from zero.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following steps go into converting a w-address into a c-address.
			
 
				-The bucket is found by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    bucket_number = w-address % total_buckets
			
 
				-    getbuf(c-device, bucket_offset + bucket_number/BKPERBLK);
			
 
				-</PRE></TT></DL>
			
 
				-After the desired bucket is found,
			
 
				-the desired entry is found by a linear search within the bucket for the
			
 
				-entry with the desired
			
 
				-<TT>waddr</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The state variable in the entry is
			
 
				-one of the following.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-    enum
			
 
				-    {
			
 
				-        Cnone    = 0,
			
 
				-        Cdirty,
			
 
				-        Cdump,
			
 
				-        Cread,
			
 
				-        Cwrite,
			
 
				-        Cdump1,
			
 
				-    };
			
 
				-</PRE></TT></DL>
			
 
				-Every w-address has a state.
			
 
				-Blocks that are not in the
			
 
				-c-device have the implied
			
 
				-state
			
 
				-<TT>Cnone</TT>.
			
 
				-The
			
 
				-<TT>Cread</TT>
			
 
				-state is for blocks that have the
			
 
				-same data as the corresponding block in
			
 
				-the w-device.
			
 
				-Since the c-device is much faster than the
			
 
				-w-device,
			
 
				-<TT>Cread</TT>
			
 
				-blocks are kept as long as possible and
			
 
				-used in preference to reading the w-device.
			
 
				-<TT>Cread</TT>
			
 
				-blocks may be discarded from the c-device
			
 
				-when the space is needed for newer data.
			
 
				-The
			
 
				-<TT>Cwrite</TT>
			
 
				-state is when the c-device contains newer data
			
 
				-than the corresponding block on the w-device.
			
 
				-This happens when a
			
 
				-<TT>Cnone</TT>,
			
 
				-<TT>Cread</TT>,
			
 
				-or
			
 
				-<TT>Cwrite</TT>
			
 
				-block is written.
			
 
				-The
			
 
				-<TT>Cdirty</TT>
			
 
				-state
			
 
				-is when the c-device contains
			
 
				-new data and the corresponding block
			
 
				-on the w-device has never been written.
			
 
				-This happens when a new block has been
			
 
				-allocated from the free space on the w-device.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Cwrite</TT>
			
 
				-and
			
 
				-<TT>Cdirty</TT>
			
 
				-blocks are created and never removed.
			
 
				-Unless something is done to
			
 
				-convert these blocks,
			
 
				-the c-device will gradually
			
 
				-fill up and stop functioning.
			
 
				-Once a day,
			
 
				-or by command,
			
 
				-a
			
 
				-<I>dump</I>
			
 
				-of the cw-device
			
 
				-is taken.
			
 
				-The purpose of
			
 
				-a dump is to queue the writes that
			
 
				-have been shunted to the c-device
			
 
				-to be written to the w-device.
			
 
				-Since the w-device is a WORM,
			
 
				-blocks cannot be rewritten.
			
 
				-Blocks that have already been written to the WORM must be
			
 
				-relocated to the unused portion of the w-device.
			
 
				-These are precisely the
			
 
				-blocks with
			
 
				-<TT>Cwrite</TT>
			
 
				-state.
			
 
				-</P>
			
 
				-<P>
			
 
				-The dump algorithm is as follows:
			
 
				-a) The tree on the cw-device is walked
			
 
				-as long as the blocks visited have been
			
 
				-modified since the last dump.
			
 
				-These are the blocks with state
			
 
				-<TT>Cwrite</TT>
			
 
				-and
			
 
				-<TT>Cdirty</TT>.
			
 
				-It is possible to restrict the search
			
 
				-to within these blocks
			
 
				-since the directory containing a modified
			
 
				-file must have been accessed to modify the
			
 
				-file and accessing a directory will set its
			
 
				-modified time thus causing the block containing it
			
 
				-to be written.
			
 
				-The directory containing that directory must be
			
 
				-modified for the same reason.
			
 
				-The tree walk is thus drastically restrained and the
			
 
				-tree walk does not take much time.
			
 
				-b) All
			
 
				-<TT>Cwrite</TT>
			
 
				-blocks found in the tree search
			
 
				-are relocated to new blank blocks on the w-device
			
 
				-and converted to
			
 
				-<TT>Cdump</TT>
			
 
				-state.
			
 
				-All
			
 
				-<TT>Cdirty</TT>
			
 
				-blocks are converted to
			
 
				-<TT>Cdump</TT>
			
 
				-state without relocation.
			
 
				-At this point,
			
 
				-all modified blocks in the cw-device
			
 
				-have w-addresses that point to unwritten
			
 
				-WORM blocks.
			
 
				-These blocks are marked for later
			
 
				-writing to the w-device
			
 
				-with the state
			
 
				-<TT>Cdump</TT>.
			
 
				-c) All open files that were pointing to modified
			
 
				-blocks are reopened to point at the corresponding
			
 
				-reallocated blocks.
			
 
				-This causes the directories leading to the
			
 
				-open files to be modified.
			
 
				-Thus the invariant discussed in a) is maintained.
			
 
				-d) The background dumping process will slowly
			
 
				-go through the map of the c-device and write out
			
 
				-all blocks with
			
 
				-<TT>Cdump</TT>
			
 
				-state.
			
 
				-</P>
			
 
				-<P>
			
 
				-The dump takes a few minutes to walk the tree
			
 
				-and mark the blocks.
			
 
				-It can take hours to write the marked blocks
			
 
				-to the WORM.
			
 
				-If a marked block is rewritten before the old
			
 
				-copy has been written to the WORM,
			
 
				-it must be forced to the WORM before it is rewritten.
			
 
				-There is no problem if another dump is taken before the first one
			
 
				-is finished.
			
 
				-The newly marked blocks are just added to the marked blocks
			
 
				-left from the first dump.
			
 
				-</P>
			
 
				-<P>
			
 
				-If there is an error writing a marked block
			
 
				-to the WORM
			
 
				-then the
			
 
				-<TT>dump</TT>
			
 
				-state is converted to
			
 
				-<TT>Cdump1</TT>
			
 
				-and manual intervention is needed.
			
 
				-(See the
			
 
				-<TT>cwcmd</TT>
			
 
				-<TT>mvstate</TT>
			
 
				-command in
			
 
				-<A href="/magic/man2html/8/fs"><I>fs</I>(8)).
			
 
				-</A>These blocks can be disposed of by converting
			
 
				-their state back to
			
 
				-<TT>Cdump</TT>
			
 
				-so that they will be written again.
			
 
				-They can also be converted to
			
 
				-<TT>Cwrite</TT>
			
 
				-state so that they will be allocated new
			
 
				-addresses at the next dump.
			
 
				-In most other respects,
			
 
				-a
			
 
				-<TT>Cdump1</TT>
			
 
				-block behaves like a
			
 
				-<TT>Cwrite</TT>
			
 
				-block.
			
 
				-</P>
			
 
				-<H4>Sync Copy and WORM Copy Processes
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>scp</TT>
			
 
				-process
			
 
				-wakes up every ten seconds and
			
 
				-issues writes to blocks in the buffer cache
			
 
				-that have been modified.
			
 
				-This is done automatically on important
			
 
				-console commands such as
			
 
				-<TT>halt</TT>
			
 
				-and
			
 
				-<TT>dump</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>wcp</TT>
			
 
				-process also wakes up every ten seconds
			
 
				-and tries to copy a
			
 
				-<TT>dump</TT>
			
 
				-block from the cache to the WORM.
			
 
				-As long as there are
			
 
				-<TT>dump</TT>
			
 
				-blocks to copy and there is no competition for
			
 
				-the WORM device,
			
 
				-the copy will continue at full speed.
			
 
				-Whenever there is competition for the WORM
			
 
				-or there are no more blocks to
			
 
				-copy,
			
 
				-then the process will sleep ten seconds
			
 
				-before looking again.
			
 
				-</P>
			
 
				-<P>
			
 
				-The HP WORM jukebox consists of
			
 
				-238 disks divided into 476 sides
			
 
				-or platters.
			
 
				-Platter 0 is the
			
 
				-<I>A</I>
			
 
				-side of disk 0.
			
 
				-Platter 1 is the
			
 
				-<I>A</I>
			
 
				-side of the disk 1.
			
 
				-Platter 238 is the
			
 
				-<I>B</I>
			
 
				-side of disk 0.
			
 
				-On Emelie,
			
 
				-the main file system is configured
			
 
				-on both sides of the first 237 disks,
			
 
				-platters 0-236 and 238-474.
			
 
				-</P>
			
 
				-<H4>9P Protocol Drivers
			
 
				-</H4>
			
 
				-<P>
			
 
				-The file server described so far
			
 
				-waits for 9P protocol messages to
			
 
				-appear in its input queue.
			
 
				-It processes each message and
			
 
				-sends the reply back to the originator.
			
 
				-There are groups of processes that
			
 
				-perform protocol I/O on some network or
			
 
				-device and the resulting messages
			
 
				-are sent to the file system queue.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are two sets of processes
			
 
				-<TT>ethi</TT>
			
 
				-and
			
 
				-<TT>etho</TT>
			
 
				-that perform Ethernet input and output on two different networks.
			
 
				-These processes send Ethernet messages
			
 
				-to/from two more processes
			
 
				-<TT>ilo</TT>
			
 
				-and
			
 
				-<TT>ilt</TT>
			
 
				-that do the IL reliable datagram protocol
			
 
				-on top of IP packets.
			
 
				-</P>
			
 
				-<P>
			
 
				-The last process in Emelie,
			
 
				-<TT>con</TT>,
			
 
				-reads the console
			
 
				-and calls internal subroutines to
			
 
				-executes commands typed.
			
 
				-Since there is only one process,
			
 
				-only one command can be executing at a
			
 
				-time.
			
 
				-See
			
 
				-<A href="/magic/man2html/8/fs"><I>fs</I>(8)
			
 
				-</A>for a description of the
			
 
				-commands available at the console.
			
 
				-
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/il/il.html
+++ b/sys/doc/il/il.html
@@ -1,427 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The IL protocol
			
 
				-</H1>
			
 
				-<DL><DD><I>Dave Presotto<br>
			
 
				-Phil Winterbottom<br>
			
 
				-<br>&#32;<br>
			
 
				-presotto,philw@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-To transport the remote procedure call messages of the Plan 9 file system
			
 
				-protocol 9P, we have implemented a new network protocol, called IL.
			
 
				-It is a connection-based, lightweight transport protocol that carries
			
 
				-datagrams encapsulated by IP.
			
 
				-IL provides retransmission of lost messages and in-sequence delivery, but has
			
 
				-no flow control and no blind retransmission.
			
 
				-</DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 uses a file system protocol, called 9P [PPTTW93], that assumes
			
 
				-in-sequence guaranteed delivery of delimited messages
			
 
				-holding remote procedure call
			
 
				-(RPC) requests and responses.
			
 
				-None of the standard IP protocols [RFC791] is suitable for transmission of
			
 
				-9P messages over an Ethernet or the Internet.
			
 
				-TCP [RFC793] has a high overhead and does not preserve delimiters.
			
 
				-UDP [RFC768], while cheap and preserving message delimiters, does not provide
			
 
				-reliable sequenced delivery.
			
 
				-When we were implementing IP, TCP, and UDP in our system we
			
 
				-tried to choose a protocol suitable for carrying 9P.
			
 
				-The properties we desired were:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>*<DD>
			
 
				-Reliable datagram service
			
 
				-<DT>*<DD>
			
 
				-In-sequence delivery
			
 
				-<DT>*<DD>
			
 
				-Internetworking using IP
			
 
				-<DT>*<DD>
			
 
				-Low complexity, high performance
			
 
				-<DT>*<DD>
			
 
				-Adaptive timeouts
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-No standard protocol met our needs so we designed a new one,
			
 
				-called IL (Internet Link).
			
 
				-<P>
			
 
				-IL is a lightweight protocol encapsulated by IP.
			
 
				-It is connection-based and
			
 
				-provides reliable transmission of sequenced messages.
			
 
				-No provision is made for flow control since the protocol
			
 
				-is designed to transport RPC
			
 
				-messages between client and server, a structure with inherent flow limitations.
			
 
				-A small window for outstanding messages prevents too
			
 
				-many incoming messages from being buffered;
			
 
				-messages outside the window are discarded
			
 
				-and must be retransmitted.
			
 
				-Connection setup uses a two-way handshake to generate
			
 
				-initial sequence numbers at each end of the connection;
			
 
				-subsequent data messages increment the
			
 
				-sequence numbers to allow
			
 
				-the receiver to resequence out of order messages. 
			
 
				-In contrast to other protocols, IL avoids blind retransmission.
			
 
				-This helps performance in congested networks,
			
 
				-where blind retransmission could cause further
			
 
				-congestion.
			
 
				-Like TCP, IL has adaptive timeouts,
			
 
				-so the protocol performs well both on the
			
 
				-Internet and on local Ethernets.
			
 
				-A round-trip timer is used
			
 
				-to calculate acknowledge and retransmission times
			
 
				-that match the network speed.
			
 
				-</P>
			
 
				-<H4>Connections
			
 
				-</H4>
			
 
				-<P>
			
 
				-An IL connection carries a stream of data between two end points.
			
 
				-While the connection persists,
			
 
				-data entering one side is sent to the other side in the same sequence.
			
 
				-The functioning of a connection is described by the state machine in Figure 1,
			
 
				-which shows the states (circles) and transitions between them (arcs).
			
 
				-Each transition is labeled with the list of events that can cause
			
 
				-the transition and, separated by a horizontal line,
			
 
				-the messages sent or received on that transition.
			
 
				-The remainder of this paper is a discussion of this state machine.
			
 
				-
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.15070.gif"><br>
			
 
				-
			
 
				-<DL><DD>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><I>ackok</I><DD>
			
 
				-any sequence number between id0 and next inclusive
			
 
				-<DT><I>!x</I><DD>
			
 
				-any value except x
			
 
				-<DT>-<DD>
			
 
				-any value
			
 
				-</DL>
			
 
				-<br>&#32;<br>
			
 
				-<I>Figure 1 - IL State Transitions</I>
			
 
				-</dl>
			
 
				-<P>
			
 
				-The IL state machine has five states:
			
 
				-<I>Closed</I>,
			
 
				-<I>Syncer</I>,
			
 
				-<I>Syncee</I>,
			
 
				-<I>Established</I>,
			
 
				-and
			
 
				-<I>Closing</I>.
			
 
				-The connection is identified by the IP address and port number used at each end.
			
 
				-The addresses ride in the IP protocol header, while the ports are part of the
			
 
				-18-byte IL header.
			
 
				-The local variables identifying the state of a connection are:
			
 
				-<DL><DD>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>state<DD>
			
 
				-one of the states
			
 
				-<DT>laddr<DD>
			
 
				-32-bit local IP address
			
 
				-<DT>lport<DD>
			
 
				-16-bit local IL port
			
 
				-<DT>raddr<DD>
			
 
				-32-bit remote IP address
			
 
				-<DT>rport<DD>
			
 
				-16-bit remote IL port
			
 
				-<DT>id0<DD>
			
 
				-32-bit starting sequence number of the local side
			
 
				-<DT>rid0<DD>
			
 
				-32-bit starting sequence number of the remote side
			
 
				-<DT>next<DD>
			
 
				-sequence number of the next message to be sent from the local side
			
 
				-<DT>rcvd<DD>
			
 
				-the last in-sequence message received from the remote side
			
 
				-<DT>unacked<DD>
			
 
				-sequence number of the first unacked message
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<P>
			
 
				-Unused connections are in the
			
 
				-<I>Closed</I>
			
 
				-state with no assigned addresses or ports.
			
 
				-Two events open a connection: the reception of
			
 
				-a message whose addresses and ports match no open connection
			
 
				-or a user explicitly opening a connection.
			
 
				-In the first case, the message's source address and port become the
			
 
				-connection's remote address and port and the message's destination address
			
 
				-and port become the local address and port.
			
 
				-The connection state is set to
			
 
				-<I>Syncee</I>
			
 
				-and the message is processed.
			
 
				-In the second case, the user specifies both local and remote addresses and ports.
			
 
				-The connection's state is set to
			
 
				-<I>Syncer</I>
			
 
				-and a
			
 
				-<TT>sync</TT>
			
 
				-message is sent to the remote side.
			
 
				-The legal values for the local address are constrained by the IP implementation.
			
 
				-</P>
			
 
				-<H4>Sequence Numbers
			
 
				-</H4>
			
 
				-<P>
			
 
				-IL carries data messages.
			
 
				-Each message corresponds to a single write from
			
 
				-the operating system and is identified by a 32-bit
			
 
				-sequence number.
			
 
				-The starting sequence number for each direction in a
			
 
				-connection is picked at random and transmitted in the initial
			
 
				-<TT>sync</TT>
			
 
				-message.
			
 
				-The number is incremented for each subsequent data message.
			
 
				-A retransmitted message contains its original sequence number.
			
 
				-</P>
			
 
				-<H4>Transmission/Retransmission
			
 
				-</H4>
			
 
				-<P>
			
 
				-Each message contains two sequence numbers:
			
 
				-an identifier (ID) and an acknowledgement.
			
 
				-The acknowledgement is the last in-sequence
			
 
				-data message received by the transmitter of the message.
			
 
				-For
			
 
				-<TT>data</TT>
			
 
				-and
			
 
				-<TT>dataquery</TT>
			
 
				-messages, the ID is its sequence number.
			
 
				-For the control messages
			
 
				-<TT>sync</TT>,
			
 
				-<TT>ack</TT>,
			
 
				-<TT>query</TT>,
			
 
				-<TT>state</TT>,
			
 
				-and
			
 
				-<TT>close</TT>,
			
 
				-the ID is one greater than the sequence number of
			
 
				-the highest sent data message.
			
 
				-</P>
			
 
				-<P>
			
 
				-The sender transmits data messages with type
			
 
				-<TT>data</TT>.
			
 
				-Any messages traveling in the opposite direction carry acknowledgements.
			
 
				-An
			
 
				-<TT>ack</TT>
			
 
				-message will be sent within 200 milliseconds of receiving the data message
			
 
				-unless a returning message has already piggy-backed an
			
 
				-acknowledgement to the sender.
			
 
				-</P>
			
 
				-<P>
			
 
				-In IP, messages may be delivered out of order or
			
 
				-may be lost due to congestion or faults.
			
 
				-To overcome this,
			
 
				-IL uses a modified ``go back n'' protocol that also attempts
			
 
				-to avoid aggravating network congestion.
			
 
				-An average round trip time is maintained by measuring the delay between
			
 
				-the transmission of a message and the
			
 
				-receipt of its acknowledgement.
			
 
				-Until the first acknowledge is received, the average round trip time
			
 
				-is assumed to be 100ms.
			
 
				-If an acknowledgement is not received within four round trip times
			
 
				-of the first unacknowledged message
			
 
				-(<I>rexmit timeout</I>
			
 
				-in Figure 1), IL assumes the message or the acknowledgement
			
 
				-has been lost.
			
 
				-The sender then resends only the first unacknowledged message,
			
 
				-setting the type to
			
 
				-<TT>dataquery</TT>.
			
 
				-When the receiver receives a
			
 
				-<TT>dataquery</TT>,
			
 
				-it responds with a
			
 
				-<TT>state</TT>
			
 
				-message acknowledging the highest received in-sequence data message.
			
 
				-This may be the retransmitted message or, if the receiver has been
			
 
				-saving up out-of-sequence messages, some higher numbered message.
			
 
				-Implementations of the receiver are free to choose whether to save out-of-sequence messages.
			
 
				-Our implementation saves up to 10 packets ahead.
			
 
				-When the sender receives the
			
 
				-<TT>state</TT>
			
 
				-message, it will immediately resend the next unacknowledged message
			
 
				-with type
			
 
				-<TT>dataquery</TT>.
			
 
				-This continues until all messages are acknowledged.
			
 
				-</P>
			
 
				-<P>
			
 
				-If no acknowledgement is received after the first
			
 
				-<TT>dataquery</TT>,
			
 
				-the transmitter continues to timeout and resend the
			
 
				-<TT>dataquery</TT>
			
 
				-message.
			
 
				-The intervals between retransmissions increase exponentially.
			
 
				-After 300 times the round trip time
			
 
				-(<I>death timeout</I>
			
 
				-in Figure 1), the sender gives up and
			
 
				-assumes the connection is dead.
			
 
				-</P>
			
 
				-<P>
			
 
				-Retransmission also occurs in the states
			
 
				-<I>Syncer</I>,
			
 
				-<I>Syncee</I>,
			
 
				-and
			
 
				-<I>Close</I>.
			
 
				-The retransmission intervals are the same as for data messages.
			
 
				-</P>
			
 
				-<H4>Keep Alive
			
 
				-</H4>
			
 
				-<P>
			
 
				-Connections to dead systems must be discovered and torn down
			
 
				-lest they consume resources.
			
 
				-If the surviving system does not need to send any data and
			
 
				-all data it has sent has been acknowledged, the protocol
			
 
				-described so far will not discover these connections.
			
 
				-Therefore, in the
			
 
				-<I>Established</I>
			
 
				-state, if no other messages are sent for a 6 second period,
			
 
				-a
			
 
				-<TT>query</TT>
			
 
				-is sent.
			
 
				-The receiver always replies to a
			
 
				-<TT>query</TT>
			
 
				-with a
			
 
				-<TT>state</TT>
			
 
				-message.
			
 
				-If no messages are received for 30 seconds, the
			
 
				-connection is torn down.
			
 
				-This is not shown in Figure 1.
			
 
				-</P>
			
 
				-<H4>Byte Ordering
			
 
				-</H4>
			
 
				-<P>
			
 
				-All 32- and 16-bit quantities are transmitted high-order byte first, as
			
 
				-is the custom in IP.
			
 
				-</P>
			
 
				-<H4>Formats
			
 
				-</H4>
			
 
				-<P>
			
 
				-The following is a C language description of an IP+IL
			
 
				-header, assuming no IP options:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef unsigned char byte;
			
 
				-struct IPIL
			
 
				-{
			
 
				-	byte	vihl;       /* Version and header length */
			
 
				-	byte	tos;        /* Type of service */
			
 
				-	byte	length[2];  /* packet length */
			
 
				-	byte	id[2];      /* Identification */
			
 
				-	byte	frag[2];    /* Fragment information */
			
 
				-	byte	ttl;        /* Time to live */
			
 
				-	byte	proto;      /* Protocol */
			
 
				-	byte	cksum[2];   /* Header checksum */
			
 
				-	byte	src[4];     /* Ip source */
			
 
				-	byte	dst[4];     /* Ip destination */
			
 
				-	byte	ilsum[2];   /* Checksum including header */
			
 
				-	byte	illen[2];   /* Packet length */
			
 
				-	byte	iltype;     /* Packet type */
			
 
				-	byte	ilspec;     /* Special */
			
 
				-	byte	ilsrc[2];   /* Src port */
			
 
				-	byte	ildst[2];   /* Dst port */
			
 
				-	byte	ilid[4];    /* Sequence id */
			
 
				-	byte	ilack[4];   /* Acked sequence */
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-Data is assumed to immediately follow the header in the message.
			
 
				-<TT>Ilspec</TT>
			
 
				-is an extension reserved for future protocol changes.
			
 
				-<P>
			
 
				-The checksum is calculated with
			
 
				-<TT>ilsum</TT>
			
 
				-and
			
 
				-<TT>ilspec</TT>
			
 
				-set to zero.
			
 
				-It is the standard IP checksum, that is, the 16-bit one's complement of the one's
			
 
				-complement sum of all 16 bit words in the header and text.  If a
			
 
				-message contains an odd number of header and text bytes to be
			
 
				-checksummed, the last byte is padded on the right with zeros to
			
 
				-form a 16-bit word for the checksum.
			
 
				-The checksum covers from
			
 
				-<TT>cksum</TT>
			
 
				-to  the end of the data.
			
 
				-</P>
			
 
				-<P>
			
 
				-The possible
			
 
				-<I>iltype</I>
			
 
				-values are:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-enum {
			
 
				-	sync=		0,
			
 
				-	data=		1,
			
 
				-	dataquery=	2,
			
 
				-	ack=		3,
			
 
				-	query=		4,
			
 
				-	state=		5,
			
 
				-	close=		6,
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>illen</TT>
			
 
				-field is the size in bytes of the IL header (18 bytes) plus the size of the data.
			
 
				-<H4>Numbers
			
 
				-</H4>
			
 
				-<P>
			
 
				-The IP protocol number for IL is 40.
			
 
				-</P>
			
 
				-<P>
			
 
				-The assigned IL port numbers are:
			
 
				-<DL><DD>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>7<DD>
			
 
				-echo all input to output
			
 
				-<DT>9<DD>
			
 
				-discard input
			
 
				-<DT>19<DD>
			
 
				-send a standard pattern to output
			
 
				-<DT>565<DD>
			
 
				-send IP addresses of caller and callee to output
			
 
				-<DT>566<DD>
			
 
				-Plan 9 authentication protocol
			
 
				-<DT>17005<DD>
			
 
				-Plan 9 CPU service, data
			
 
				-<DT>17006<DD>
			
 
				-Plan 9 CPU service, notes
			
 
				-<DT>17007<DD>
			
 
				-Plan 9 exported file systems
			
 
				-<DT>17008<DD>
			
 
				-Plan 9 file service
			
 
				-<DT>17009<DD>
			
 
				-Plan 9 remote execution
			
 
				-<DT>17030<DD>
			
 
				-Alef Name Server
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[PPTTW93] Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
			
 
				-``The Use of Name Spaces in Plan 9'',
			
 
				-<I>Op. Sys. Rev.,</I>
			
 
				-Vol. 27, No. 2, April 1993, pp. 72-76,
			
 
				-reprinted in this volume.
			
 
				-<br>
			
 
				-[RFC791] RFC791,
			
 
				-<I>Internet Protocol,</I>
			
 
				-<I>DARPA Internet Program Protocol Specification,</I>
			
 
				-September 1981.
			
 
				-<br>
			
 
				-[RFC793] RFC793,
			
 
				-<I>Transmission Control Protocol,</I>
			
 
				-<I>DARPA Internet Program Protocol Specification,</I>
			
 
				-September 1981.
			
 
				-<br>
			
 
				-[RFC768] J. Postel, RFC768,
			
 
				-<I>User Datagram Protocol,</I>
			
 
				-<I>DARPA Internet Program Protocol Specification,</I>
			
 
				-August 1980.
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/index.html
+++ b/sys/doc/index.html
@@ -1,174 +0,0 @@
 
				-<HEAD>
			
 
				-<TITLE>Plan 9 Manual - Volume 2</TITLE>
			
 
				-</HEAD>
			
 
				-
			
 
				-<H2>Plan 9 Documents (Volume 2)</H2>
			
 
				-
			
 
				-<H3>Introduction</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>Plan 9 From Bell Labs
			
 
				-[<A HREF="9.html">html</A>, <A HREF="9.ps">ps</A>, <A HREF="9.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike, Dave Presotto, Sean Dorward, Bob Flandrena, Ken Thompson, Howard Trickey, and Phil Winterbottom</I>
			
 
				-<BR>An overview of the system; read at least this paper before you install.
			
 
				-
			
 
				-<DT>The Use of Name Spaces in Plan 9
			
 
				-[<A HREF="names.html">html</A>, <A HREF="names.ps">ps</A>, <A HREF="names.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom</I>
			
 
				-<BR>What's in a name?
			
 
				-
			
 
				-<DT>The Organization of Networks in Plan 9
			
 
				-[<A HREF="net/net.html">html</A>, <A HREF="net/net.ps">ps</A>, <A HREF="net/net.pdf">pdf</A>]
			
 
				-<DD><I>Dave Presotto and Phil Winterbottom</I>
			
 
				-<BR>Connecting the pieces.  The details in the paper are outdated but the ideas still apply.
			
 
				-
			
 
				-<DT>Security in Plan 9
			
 
				-[<A HREF="auth.html">html</A>, <A HREF="auth.ps">ps</A>, <A HREF="auth.pdf">pdf</A>]
			
 
				-<DD><I>Russ Cox, Eric Grosse, Rob Pike, Dave Presotto, and Sean Quinlan</I>
			
 
				-<BR>An overview of the security architecture.
			
 
				-</DL>
			
 
				-
			
 
				-<H3>Programming</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>How to Use the Plan 9 C Compiler
			
 
				-[<A HREF="comp.html">html</A>, <A HREF="comp.ps">ps</A>, <A HREF="comp.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>The starting point for C programming under Plan 9.
			
 
				-
			
 
				-<DT>Changes to the Programming Environment in the Fourth Release of Plan 9
			
 
				-[<A HREF="prog4.html">html</A>, <A HREF="prog4.ps">ps</A>, <A HREF="prog4.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>An overview of the changes to the C library since the Third Release.
			
 
				-
			
 
				-<DT>APE - The ANSI/POSIX Environment
			
 
				-[<A HREF="ape.html">html</A>, <A HREF="ape.ps">ps</A>, <A HREF="ape.pdf">pdf</A>]
			
 
				-<DD><I>Howard Trickey</I>
			
 
				-<BR>Moving C code between UNIX and Plan 9.
			
 
				-
			
 
				-<DT>Acid: A Debugger Built From A Language
			
 
				-[<A HREF="acidpaper.html">html</A>, <A HREF="acidpaper.ps">ps</A>, <A HREF="acidpaper.pdf">pdf</A>]
			
 
				-<DD><I>Phil Winterbottom</I>
			
 
				-<BR>An overview paper about the Acid debugger.
			
 
				-
			
 
				-<DT>Acid Manual
			
 
				-[<A HREF="acid.html">html</A>, <A HREF="acid.ps">ps</A>, <A HREF="acid.pdf">pdf</A>]
			
 
				-<DD><I>Phil Winterbottom</I>
			
 
				-<BR>The reference manual for the language and its libraries.
			
 
				-
			
 
				-<DT>Maintaining Files on Plan 9 with Mk
			
 
				-[<A HREF="mk.html">html</A>, <A HREF="mk.ps">ps</A>, <A HREF="mk.pdf">pdf</A>]
			
 
				-<DD><I>Andrew G. Hume and Bob Flandrena</I>
			
 
				-<BR>An introduction to Plan 9's replacement for <TT>make</TT>.
			
 
				-
			
 
				-<DT>Plan 9 Mkfiles
			
 
				-[<A HREF="mkfiles.html">html</A>, <A HREF="mkfiles.ps">ps</A>, <A HREF="mkfiles.pdf">pdf</A>]
			
 
				-<DD><I>Bob Flandrena</I>
			
 
				-<BR>The conventions for using <TT>mk</TT> in Plan 9.
			
 
				-
			
 
				-<DT>A Manual for the Plan 9 assembler
			
 
				-[<A HREF="asm.html">html</A>, <A HREF="asm.ps">ps</A>, <A HREF="asm.pdf">pdf</A>
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>Things you'd rather not know.
			
 
				-</DL>
			
 
				-
			
 
				-<H3>User Interfaces</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>8&#189;, the Plan 9 Window System
			
 
				-[<A HREF="8%bd/8%bd.html">html</A>, <A HREF="8%bd/8%bd.ps">ps</A>, <A HREF="8%bd/8%bd.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>An introduction to the (previous) window system and its unusual implementation.
			
 
				-
			
 
				-<DT>Rc - The Plan 9 Shell
			
 
				-[<A HREF="rc.html">html</A>, <A HREF="rc.ps">ps</A>, <A HREF="rc.pdf">pdf</A>]
			
 
				-<DD><I>Tom Duff</I>
			
 
				-<BR>An introduction to the new shell, complete with examples.
			
 
				-
			
 
				-<DT>The Text Editor <TT>sam</TT>
			
 
				-[<A HREF="sam/sam.html">html</A>, <A HREF="sam/sam.ps">ps</A>, <A HREF="sam/sam.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR><TT>Sam</TT> is the standard editor on Plan 9.
			
 
				-
			
 
				-<DT>Acme: A User Interface for Programmers
			
 
				-[<A HREF="acme/acme.html">html</A>, <A HREF="acme/acme.ps">ps</A>, <A HREF="acme/acme.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>A system with a more radical approach to programming and editing.
			
 
				-
			
 
				-<DT>Plumbing and Other Utilities
			
 
				-[<A HREF="plumb.html">html</A>, <A HREF="plumb.ps">ps</A>, <A HREF="plumb.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>Inter-process communication that enlivens the interactive user interface.
			
 
				-</DL>
			
 
				-
			
 
				-<H3>Implementation</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>Hello World
			
 
				-[<A HREF="utf.html">html</A>, <A HREF="utf.ps">ps</A>, <A HREF="utf.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike and Ken Thompson</I>
			
 
				-<BR>The details about Plan 9's character set: the Unicode Standard plus an ASCII-compatible encoding.
			
 
				-
			
 
				-<DT>Plan 9 C Compilers
			
 
				-[<A HREF="compiler.html">html</A>, <A HREF="compiler.ps">ps</A>, <A HREF="compiler.pdf">pdf</A>]
			
 
				-<DD><I>Ken Thompson</I>
			
 
				-<BR>The design and some internals of the compiler suite.
			
 
				-
			
 
				-<DT>Adding Application Support for a New Architecture in Plan 9
			
 
				-[<A HREF="libmach.html">html</A>, <A HREF="libmach.ps">ps</A>, <A HREF="libmach.pdf">pdf</A>]
			
 
				-<DD><I>Bob Flandrena</I>
			
 
				-<BR>The procedures necessary to add a new instruction set to Plan 9's programming environment.
			
 
				-
			
 
				-<DT>The Plan 9 File Server
			
 
				-[<A HREF="fs/fs.html">html</A>, <A HREF="fs/fs.ps">ps</A>, <A HREF="fs/fs.pdf">pdf</A>]
			
 
				-<DD><I>Ken Thompson</I>
			
 
				-<BR>The design of the central file server and its novel backup system.
			
 
				-
			
 
				-<DT>Venti: A new approach to archival storage
			
 
				-[<A HREF="venti/venti.html">html</A>, <A HREF="venti/venti.ps">ps</A>, <A HREF="venti/venti.pdf">pdf</A>]
			
 
				-<DD><I>Sean Quinlan and Sean Dorward</I>
			
 
				-<BR>Archival block-level storage using secure hashes as block identifiers.
			
 
				-
			
 
				-<DT>The IL protocol
			
 
				-[<A HREF="il/il.html">html</A>, <A HREF="il/il.ps">ps</A>, <A HREF="il/il.pdf">pdf</A>]
			
 
				-<DD><I>Dave Presotto and Phil Winterbottom</I>
			
 
				-<BR>A description of the Internet protocol Plan 9 uses for internal communication.
			
 
				-
			
 
				-<DT>Lexical File Names in Plan 9, or, Getting Dot-Dot Right
			
 
				-[<A HREF="lexnames.html">html</A>, <A HREF="lexnames.ps">ps</A>, <A HREF="lexnames.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike</I>
			
 
				-<BR>A vexing old problem solved: how to make <TT>pwd</TT> get the right answer in the face of multiply-bound directories.
			
 
				-
			
 
				-<DT>Process Sleep and Wakeup on a Shared-memory Multiprocessor
			
 
				-[<A HREF="sleep.html">html</A>, <A HREF="sleep.ps">ps</A>, <A HREF="sleep.pdf">pdf</A>]
			
 
				-<DD><I>Rob Pike, Dave Presotto, Ken Thompson, and Gerard Holzmann</I>
			
 
				-<BR>A detailed study of a central issue in the Plan 9 kernel.
			
 
				-</DL>
			
 
				-
			
 
				-<H3>Miscellany</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>A Guide to the Lp Printer Spooler
			
 
				-[<A HREF="lp.html">html</A>, <A HREF="lp.ps">ps</A>, <A HREF="lp.pdf">pdf</A>]
			
 
				-<DD><I>Paul Glick</I>
			
 
				-<BR>Adminstering the suite of tools to drive PostScript printers.
			
 
				-
			
 
				-<DT>Troff User's Manual
			
 
				-[<A HREF="troff.html">html</A>, <A HREF="troff.ps">ps</A>, <A HREF="troff.pdf">pdf</A>]
			
 
				-<DD><I>Joseph F. Ossanna and Brian W. Kernighan</I>
			
 
				-<BR>The old warhorse, updated for Unicode characters.
			
 
				-.bp
			
 
				-
			
 
				-<DT>Using SPIN
			
 
				-[<A HREF="spin.html">html</A>, <A HREF="spin.ps">ps</A>, <A HREF="spin.pdf">pdf</A>]
			
 
				-<DD><I>Gerard Holzmann</I>
			
 
				-<BR>An introduction to a tool for analyzing parallel and distributed programs.
			
 
				-</DL>
			
 
				-
			
 
				-<H3>Installation</H3>
			
 
				-<DL>
			
 
				-
			
 
				-<DT>The Various Ports
			
 
				-[<A HREF="port.html">html</A>, <A HREF="port.ps">ps</A>, <A HREF="port.pdf">pdf</A>]
			
 
				-<DD>
			
 
				-<BR>The hardware requirements for the Plan 9 compilers and kernels.
			
--- a/sys/doc/lexnames.html
+++ b/sys/doc/lexnames.html
@@ -1,1220 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Lexical File Names in Plan 9
			
 
				-<br>
			
 
				-or
			
 
				-<br>
			
 
				-Getting Dot-Dot Right
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-<TT>rob@plan9.bell-labs.com</TT>
			
 
				-Bell Laboratories, Murray Hill, NJ, 07974
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<br>&#32;<br>
			
 
				-Symbolic links make the Unix file system non-hierarchical, resulting in
			
 
				-multiple valid path names for a given file.
			
 
				-This ambiguity is a source of confusion, especially since some shells
			
 
				-work overtime to present a consistent view from programs such as
			
 
				-<TT>pwd</TT>,
			
 
				-while other programs and
			
 
				-the kernel itself do nothing about the problem.
			
 
				-<br>&#32;<br>
			
 
				-Plan 9 has no symbolic links but it does have other mechanisms that produce the same difficulty.
			
 
				-Moreover, Plan 9 is founded on the ability to control a program's environment
			
 
				-by manipulating its name space.
			
 
				-Ambiguous names muddle the result of operations such as copying a name space across
			
 
				-the network.
			
 
				-<br>&#32;<br>
			
 
				-To address these problems,
			
 
				-the Plan 9 kernel has been modified to maintain an accurate path name for every active
			
 
				-file (open file, working directory, mount table entry) in the system.
			
 
				-The definition of `accurate' is that the path name for a file is guaranteed to be the rooted,
			
 
				-absolute name
			
 
				-the program used to acquire it.
			
 
				-These names are maintained by an efficient method that combines lexical processing&#173;such as
			
 
				-evaluating
			
 
				-<TT>..</TT>
			
 
				-by just removing the last path name element of a directory&#173;with
			
 
				-local operations within the file system to maintain a consistently, easily understood view
			
 
				-of the name system.
			
 
				-Ambiguous situations are resolved by examining the lexically maintained names themselves.
			
 
				-<br>&#32;<br>
			
 
				-A new kernel call,
			
 
				-<TT>fd2path</TT>,
			
 
				-returns the file name associated with an open file,
			
 
				-permitting the use of reliable names to improve system
			
 
				-services ranging from
			
 
				-<TT>pwd</TT>
			
 
				-to debugging.
			
 
				-Although this work was done in Plan 9,
			
 
				-Unix systems could also benefit from the addition of
			
 
				-a method to recover the accurate name of an
			
 
				-open file or the current directory.
			
 
				-</DL>
			
 
				-<H4>Motivation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Consider the following unedited transcript of a session running the Bourne shell on a modern
			
 
				-Unix system:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% echo <I>HOME
			
 
				-/home/rob
			
 
				-% cd </I>HOME
			
 
				-% pwd
			
 
				-/n/bopp/v7/rob
			
 
				-% cd /home/rob
			
 
				-% cd /home/ken
			
 
				-% cd ../rob
			
 
				-../rob: bad directory
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-(The same output results from running
			
 
				-<TT>tcsh</TT>;
			
 
				-we'll discuss
			
 
				-<TT>ksh</TT>
			
 
				-in a moment.)
			
 
				-To a neophyte being schooled in the delights of a hierarchical file name space,
			
 
				-this behavior must be baffling.
			
 
				-It is, of course, the consequence of a series of symbolic links intended to give users
			
 
				-the illusion they share a disk, when in fact their files are scattered over several devices:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% ls -ld /home/rob /home/ken
			
 
				-lrwxr-xr-x  1 root  sys   14 Dec 26  1998 /home/ken -&gt; /n/bopp/v6/ken
			
 
				-lrwxr-xr-x  1 root  sys   14 Dec 23  1998 /home/rob -&gt; /n/bopp/v7/rob
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-The introduction of symbolic links has changed the Unix file system from a true
			
 
				-hierarchy into a directed graph, rendering
			
 
				-<TT>..</TT>
			
 
				-ambiguous and sowing confusion.
			
 
				-<br>&#32;<br>
			
 
				-Unix popularized hierarchical naming, but the introduction of symbolic links
			
 
				-made its naming irregular.
			
 
				-Worse, the
			
 
				-<TT>pwd</TT>
			
 
				-command, through the underlying
			
 
				-<TT>getwd</TT>
			
 
				-library routine,
			
 
				-uses a tricky, expensive algorithm that often delivers the wrong answer.
			
 
				-Starting from the current directory,
			
 
				-<TT>getwd</TT>
			
 
				-opens the parent,
			
 
				-<TT>..</TT>,
			
 
				-and searches it for an entry whose i-number matches the current directory;
			
 
				-the matching entry is the final path element of the ultimate result.
			
 
				-Applying this process iteratively,
			
 
				-<TT>getwd</TT>
			
 
				-works back towards the root.
			
 
				-Since
			
 
				-<TT>getwd</TT>
			
 
				-knows nothing about symbolic links, it will recover surprising names for
			
 
				-directories reached by them,
			
 
				-as illustrated by the example;
			
 
				-the backward paths
			
 
				-<TT>getwd</TT>
			
 
				-traverses will not backtrack across the links.
			
 
				-<br>&#32;<br>
			
 
				-Partly for efficiency and partly to make
			
 
				-<TT>cd</TT>
			
 
				-and
			
 
				-<TT>pwd</TT>
			
 
				-more predictable, the Korn shell
			
 
				-<TT>ksh</TT>
			
 
				-[Korn94]
			
 
				-implements
			
 
				-<TT>pwd</TT>
			
 
				-as a builtin.
			
 
				-(The
			
 
				-<TT>cd</TT>
			
 
				-command must be a builtin in any shell, since the current directory is unique to each process.)
			
 
				-<TT>Ksh</TT>
			
 
				-maintains its own private view of the file system to try to disguise symbolic links;
			
 
				-in particular,
			
 
				-<TT>cd</TT>
			
 
				-and
			
 
				-<TT>pwd</TT>
			
 
				-involve some lexical processing (somewhat like the
			
 
				-<TT>cleanname</TT>
			
 
				-function discussed later
			
 
				-in this paper), augmented by heuristics such as examining the environment
			
 
				-for names like
			
 
				-<TT></TT><I>HOME</I><TT>
			
 
				-and
			
 
				-</TT><TT></TT><TT>PWD</TT><TT>
			
 
				-to assist initialization of the state of the private view. [Korn00]
			
 
				-</TT><br>&#32;<br>
			
 
				-This transcript begins with a Bourne shell running:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /home/rob
			
 
				-% pwd
			
 
				-/n/bopp/v7/rob
			
 
				-% ksh
			
 
				-<I> pwd
			
 
				-/home/rob
			
 
				-</I> 
			
 
				-</PRE></TT></DL>
			
 
				-This result is encouraging.  Another example, again starting from a Bourne shell:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /home/rob
			
 
				-% cd ../ken
			
 
				-../ken: bad directory
			
 
				-% ksh
			
 
				-<I> pwd
			
 
				-/home/rob
			
 
				-</I> cd ../ken
			
 
				-<I> pwd
			
 
				-/home/ken
			
 
				-</I>
			
 
				-</PRE></TT></DL>
			
 
				-By doing extra work,
			
 
				-the Korn shell is providing more sensible behavior,
			
 
				-but it is easy to defeat:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /home/rob
			
 
				-% pwd
			
 
				-/n/bopp/v7/rob
			
 
				-% cd bin
			
 
				-% pwd
			
 
				-/n/bopp/v7/rob/bin
			
 
				-% ksh
			
 
				-<I> pwd
			
 
				-/n/bopp/v7/rob/bin
			
 
				-</I> exit
			
 
				-% cd /home/ken
			
 
				-% pwd
			
 
				-/n/bopp/v6/ken
			
 
				-% ksh
			
 
				-<I> pwd
			
 
				-/n/bopp/v6/ken
			
 
				-</I> 
			
 
				-</PRE></TT></DL>
			
 
				-In these examples,
			
 
				-<TT>ksh</TT>'s
			
 
				-built-in
			
 
				-<TT>pwd</TT>
			
 
				-failed to produce the results
			
 
				-(<TT>/home/rob/bin</TT>
			
 
				-and
			
 
				-<TT>/home/ken</TT>)
			
 
				-that the previous example might have led us to expect.
			
 
				-The Korn shell is hiding the problem, not solving it, and in fact is not even hiding it very well.
			
 
				-<br>&#32;<br>
			
 
				-A deeper question is whether the shell should even be trying to make
			
 
				-<TT>pwd</TT>
			
 
				-and
			
 
				-<TT>cd</TT>
			
 
				-do a better job.
			
 
				-If it does, then the
			
 
				-<TT>getwd</TT>
			
 
				-library call and every program that uses it will behave differently from the shell,
			
 
				-a situation that is sure to confuse.
			
 
				-Moreover, the ability to change directory to
			
 
				-<TT>../ken</TT>
			
 
				-with the Korn shell's
			
 
				-<TT>cd</TT>
			
 
				-command but not with the
			
 
				-<TT>chdir</TT>
			
 
				-system call is a symptom of a diseased system, not a healthy shell.
			
 
				-<br>&#32;<br>
			
 
				-The operating system should provide names that work and make sense.
			
 
				-Symbolic links, though, are here to stay, so we need a way to provide
			
 
				-sensible, unambiguous names in the face of a non-hierarchical name space.
			
 
				-This paper shows how the challenge was met on Plan 9, an operating system
			
 
				-with Unix-like naming.
			
 
				-<H4>Names in Plan 9
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Except for some details involved with bootstrapping, file names in Plan 9 have the same syntax as in Unix.
			
 
				-Plan 9 has no symbolic links, but its name space construction operators,
			
 
				-<TT>bind</TT>
			
 
				-and
			
 
				-<TT>mount</TT>,
			
 
				-make it possible to build the same sort of non-hierarchical structures created
			
 
				-by symbolically linking directories on Unix.
			
 
				-<br>&#32;<br>
			
 
				-Plan 9's
			
 
				-<TT>mount</TT>
			
 
				-system call takes a file descriptor
			
 
				-and attaches to the local name space the file system service it represents:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount(fd, "/dir", flags)
			
 
				-</PRE></TT></DL>
			
 
				-Here
			
 
				-<TT>fd</TT>
			
 
				-is a file descriptor to a communications port such as a pipe or network connection;
			
 
				-at the other end of the port is a service, such as file server, that talks 9P, the Plan 9 file
			
 
				-system protocol.
			
 
				-After the call succeeds, the root directory of the service will be visible at the
			
 
				-<I>mount point</I>
			
 
				-<TT>/dir</TT>,
			
 
				-much as with the
			
 
				-<TT>mount</TT>
			
 
				-call of Unix.
			
 
				-The
			
 
				-<TT>flag</TT>
			
 
				-argument specifies the nature of the attachment:
			
 
				-<TT>MREPL</TT>
			
 
				-says that the contents of the root directory (appear to) replace the current contents of
			
 
				-<TT>/dir</TT>;
			
 
				-<TT>MAFTER</TT>
			
 
				-says that the current contents of
			
 
				-<TT>dir</TT>
			
 
				-remain visible, with the mounted directory's contents appearing
			
 
				-<I>after</I>
			
 
				-any existing files;
			
 
				-and
			
 
				-<TT>MBEFORE</TT>
			
 
				-says that the contents remain visible, with
			
 
				-the mounted directory's contents appearing
			
 
				-<I>before</I>
			
 
				-any existing files.
			
 
				-These multicomponent directories are called
			
 
				-<I>union directories</I>
			
 
				-and are somewhat different from union directories in 4.4BSD-Lite [PeMc95], because
			
 
				-only the top-level directory itself is unioned, not its descendents, recursively.
			
 
				-(Plan 9's union directories are used differently from 4.4BSD-Lite's, as will become apparent.)
			
 
				-<br>&#32;<br>
			
 
				-For example, to bootstrap a diskless computer the system builds a local name space containing
			
 
				-only the root directory,
			
 
				-<TT>/</TT>,
			
 
				-then uses the network to open a connection
			
 
				-to the main file server.
			
 
				-It then executes
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount(rootfd, "/", MREPL);
			
 
				-</PRE></TT></DL>
			
 
				-After this call, the entire file server's tree is visible, starting from the root of the local machine.
			
 
				-<br>&#32;<br>
			
 
				-While
			
 
				-<TT>mount</TT>
			
 
				-connects a new service to the local name space,
			
 
				-<TT>bind</TT>
			
 
				-rearranges the existing name space:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-bind("tofile", "fromfile", flags)
			
 
				-</PRE></TT></DL>
			
 
				-causes subsequent mention of the
			
 
				-<TT>fromfile</TT>
			
 
				-(which may be a plain file or a directory)
			
 
				-to behave as though
			
 
				-<TT>tofile</TT>
			
 
				-had been mentioned instead, somewhat like a symbolic link.
			
 
				-(Note, however, that the arguments are in the opposite order
			
 
				-compared to
			
 
				-<TT>ln</TT>
			
 
				-<TT>-s</TT>).
			
 
				-The
			
 
				-<TT>flags</TT>
			
 
				-argument is the same as with
			
 
				-<TT>mount</TT>.
			
 
				-<br>&#32;<br>
			
 
				-As an example, a sequence something like the following is done at bootstrap time to
			
 
				-assemble, under the single directory
			
 
				-<TT>/bin</TT>,
			
 
				-all of the binaries suitable for this architecture, represented by (say) the string
			
 
				-<TT>sparc</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-bind("/sparc/bin", "/bin", MREPL);
			
 
				-bind("/usr/rob/sparc/bin", "/bin", MAFTER);
			
 
				-</PRE></TT></DL>
			
 
				-This sequence of
			
 
				-<TT>binds</TT>
			
 
				-causes
			
 
				-<TT>/bin</TT>
			
 
				-to contain first the standard binaries, then the contents of
			
 
				-<TT>rob</TT>'s
			
 
				-private SPARC binaries.
			
 
				-The ability to build such union directories
			
 
				-obviates the need for a shell
			
 
				-<TT></TT><I>PATH</I><TT>
			
 
				-variable
			
 
				-while providing opportunities for managing heterogeneity.
			
 
				-If the system were a Power PC, the same sequence would be run with
			
 
				-</TT><TT>power</TT><TT>
			
 
				-textually substituted for
			
 
				-</TT><TT>sparc</TT><TT>
			
 
				-to place the Power PC binaries in
			
 
				-</TT><TT>/bin</TT><TT>
			
 
				-rather than the SPARC binaries.
			
 
				-</TT><br>&#32;<br>
			
 
				-Trouble is already brewing.  After these bindings are set up,
			
 
				-where does
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /bin
			
 
				-% cd ..
			
 
				-</PRE></TT></DL>
			
 
				-set the current working directory, to
			
 
				-<TT>/</TT>
			
 
				-or
			
 
				-<TT>/sparc</TT>
			
 
				-or
			
 
				-<TT>/usr/rob/sparc</TT>?
			
 
				-We will return to this issue.
			
 
				-<br>&#32;<br>
			
 
				-There are some important differences between
			
 
				-<TT>binds</TT>
			
 
				-and symbolic links.
			
 
				-First,
			
 
				-symbolic links are a static part of the file system, while
			
 
				-Plan 9 bindings are created at run time, are stored in the kernel,
			
 
				-and endure only as long as the system maintains them;
			
 
				-they are temporary.
			
 
				-Since they are known to the kernel but not the file system, they must
			
 
				-be set up each time the kernel boots or a user logs in;
			
 
				-permanent bindings are created by editing system initialization scripts
			
 
				-and user profiles rather than by building them in the file system itself.
			
 
				-<br>&#32;<br>
			
 
				-The Plan 9 kernel records what bindings are active for a process,
			
 
				-whereas symbolic links, being held on the Unix file server, may strike whenever the process evaluates
			
 
				-a file name.
			
 
				-Also, symbolic links apply to all processes that evaluate the affected file, whereas
			
 
				-<TT>bind</TT>
			
 
				-has a local scope, applying only to the process that executes it and possibly some of its
			
 
				-peers, as discussed in the next section.
			
 
				-Symbolic links cannot construct the sort of
			
 
				-<TT>/bin</TT>
			
 
				-directory built here; it is possible to have multiple directories point to
			
 
				-<TT>/bin</TT>
			
 
				-but not the other way around.
			
 
				-<br>&#32;<br>
			
 
				-Finally,
			
 
				-symbolic links are symbolic, like macros: they evaluate the associated names each time
			
 
				-they are accessed.
			
 
				-Bindings, on the other hand, are evaluated only once, when the bind is executed;
			
 
				-after the binding is set up, the kernel associates the underlying files, rather than their names.
			
 
				-In fact, the kernel's representation of a bind is identical to its representation of a mount;
			
 
				-in effect, a bind is a mount of the
			
 
				-<TT>tofile</TT>
			
 
				-upon the
			
 
				-<TT>fromfile</TT>.
			
 
				-The binds and mounts coexist in a single
			
 
				-<I>mount table</I>,
			
 
				-the subject of the next section.
			
 
				-<H4>The Mount Table
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Unix has a single global mount table
			
 
				-for all processes in the system, but Plan 9's mount tables are local to each process.
			
 
				-By default it is inherited when a process forks, so mounts and binds made by one
			
 
				-process affect the other, but a process may instead inherit a copy,
			
 
				-so modifications it makes will be invisible to other processes.
			
 
				-The convention is that related processes, such
			
 
				-as processes running in a single window, share a mount table, while sets of processes
			
 
				-in different windows have distinct mount tables.
			
 
				-In practice, the name spaces of the two windows will appear largely the same,
			
 
				-but the possibility for different processes to see different files (hence services) under
			
 
				-the same name is fundamental to the system,
			
 
				-affecting the design of key programs such as the
			
 
				-window system [Pike91].
			
 
				-<br>&#32;<br>
			
 
				-The Plan 9 mount table is little more than an ordered list of pairs, mapping the
			
 
				-<TT>fromfiles</TT>
			
 
				-to the
			
 
				-<TT>tofiles</TT>.
			
 
				-For mounts, the
			
 
				-<TT>tofile</TT>
			
 
				-will be an item called a
			
 
				-<TT>Channel</TT>,
			
 
				-similar to a Unix
			
 
				-<TT>vnode</TT>,
			
 
				-pointing to the root of the file service,
			
 
				-while for a bind it will be the
			
 
				-<TT>Channel</TT>
			
 
				-pointing to the
			
 
				-<TT>tofile</TT>
			
 
				-mentioned in the
			
 
				-<TT>bind</TT>
			
 
				-call.
			
 
				-In both cases, the
			
 
				-<TT>fromfile</TT>
			
 
				-entry in the table
			
 
				-will be a
			
 
				-<TT>Channel</TT>
			
 
				-pointing to the
			
 
				-<TT>fromfile</TT>
			
 
				-itself.
			
 
				-<br>&#32;<br>
			
 
				-The evaluation of a file name proceeds as follows.
			
 
				-If the name begins with a slash, start with the
			
 
				-<TT>Channel</TT>
			
 
				-for the root; otherwise start with the
			
 
				-<TT>Channel</TT>
			
 
				-for the current directory of the process.
			
 
				-For each path element in the name,
			
 
				-such as
			
 
				-<TT>usr</TT>
			
 
				-in
			
 
				-<TT>/usr/rob</TT>,
			
 
				-try to `walk' the
			
 
				-<TT>Channel</TT>
			
 
				-to that element [Pike93].
			
 
				-If the walk succeeds, look to see if the resulting
			
 
				-<TT>Channel</TT>
			
 
				-is the same as any
			
 
				-<TT>fromfile</TT>
			
 
				-in the mount table, and if so, replace it by the corresponding
			
 
				-<TT>tofile</TT>.
			
 
				-Advance to the next element and continue.
			
 
				-<br>&#32;<br>
			
 
				-There are a couple of nuances.  If the directory being walked is a union directory,
			
 
				-the walk is attempted in the elements of the union, in order, until a walk succeeds.
			
 
				-If none succeed, the operation fails.
			
 
				-Also, when the destination of a walk is a directory for a purpose such as the
			
 
				-<TT>chdir</TT>
			
 
				-system call or the
			
 
				-<TT>fromfile</TT>
			
 
				-in a
			
 
				-<TT>bind</TT>,
			
 
				-once the final walk of the sequence has completed the operation stops;
			
 
				-the final check through the mount table is not done.
			
 
				-Among other things, this simplifies the management of union directories;
			
 
				-for example, subsequent
			
 
				-<TT>bind</TT>
			
 
				-calls will append to the union associated with the underlying
			
 
				-<TT>fromfile</TT>
			
 
				-instead of what is bound upon it.
			
 
				-<H4>A Definition of Dot-Dot
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The ability to construct union directories and other intricate naming structures
			
 
				-introduces some thorny problems: as with symbolic links,
			
 
				-the name space is no longer hierarchical, files and directories can have multiple
			
 
				-names, and the meaning of
			
 
				-<TT>..</TT>,
			
 
				-the parent directory, can be ambiguous.
			
 
				-<br>&#32;<br>
			
 
				-The meaning of
			
 
				-<TT>..</TT>
			
 
				-is straightforward if the directory is in a locally hierarchical part of the name space,
			
 
				-but if we ask what
			
 
				-<TT>..</TT>
			
 
				-should identify when the current directory is a mount point or union directory or
			
 
				-multiply symlinked spot (which we will henceforth call just a mount point, for brevity),
			
 
				-there is no obvious answer.
			
 
				-Name spaces have been part of Plan 9 from the beginning, but the definition of
			
 
				-<TT>..</TT>
			
 
				-has changed several times as we grappled with this issue.
			
 
				-In fact, several attempts to clarify the meaning of
			
 
				-<TT>..</TT>
			
 
				-by clever coding
			
 
				-resulted in definitions that could charitably be summarized as `what the implementation gives.'
			
 
				-<br>&#32;<br>
			
 
				-Frustrated by this situation, and eager to have better-defined names for some of the
			
 
				-applications described later in this paper, we recently proposed the following definition
			
 
				-for
			
 
				-<TT>..</TT>:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The parent of a directory
			
 
				-<I>X</I>,
			
 
				-<I>X</I><TT>/..</TT>,<TT>
			
 
				-is the same directory that would obtain if
			
 
				-we instead accessed the directory named by stripping away the last
			
 
				-path name element of
			
 
				-</TT><I>X</I><TT>.
			
 
				-</dl>
			
 
				-</TT><br>&#32;<br>
			
 
				-For example, if we are in the directory
			
 
				-<TT>/a/b/c</TT>
			
 
				-and
			
 
				-<TT>chdir</TT>
			
 
				-to
			
 
				-<TT>..</TT>,
			
 
				-the result is
			
 
				-<I>exactly</I>
			
 
				-as if we had executed a
			
 
				-<TT>chdir</TT>
			
 
				-to
			
 
				-<TT>/a/b</TT>.
			
 
				-<br>&#32;<br>
			
 
				-This definition is easy to understand and seems natural.
			
 
				-It is, however, a purely
			
 
				-<I>lexical</I>
			
 
				-definition that flatly ignores evaluated file names, mount tables, and
			
 
				-other kernel-resident data structures.
			
 
				-Our challenge is to implement it efficiently.
			
 
				-One obvious (and correct)
			
 
				-implementation is to rewrite path names lexically to fold out
			
 
				-<TT>..</TT>,
			
 
				-and then evaluate the file name forward from the root,
			
 
				-but this is expensive and unappealing.
			
 
				-We want to be able to use local operations to evaluate file names,
			
 
				-but maintain the global, lexical definition of dot-dot.
			
 
				-It isn't too hard.
			
 
				-<H4>The Implementation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-To operate lexically on file names, we associate a name with each open file in the kernel, that
			
 
				-is, with each 
			
 
				-<TT>Channel</TT>
			
 
				-data structure.
			
 
				-The first step is therefore to store a
			
 
				-<TT>char*</TT>
			
 
				-with each
			
 
				-<TT>Channel</TT>
			
 
				-in the system, called its
			
 
				-<TT>Cname</TT>,
			
 
				-that records the
			
 
				-<I>absolute</I>
			
 
				-rooted
			
 
				-file name for the
			
 
				-<TT>Channel</TT>.
			
 
				-<TT>Cnames</TT>
			
 
				-are stored as full text strings, shared copy-on-write for efficiency.
			
 
				-The task is to maintain each
			
 
				-<TT>Cname</TT>
			
 
				-as an accurate absolute name using only local operations.
			
 
				-<br>&#32;<br>
			
 
				-When a file is opened, the file name argument in the
			
 
				-<TT>open</TT>
			
 
				-(or
			
 
				-<TT>chdir</TT>
			
 
				-or
			
 
				-<TT>bind</TT>
			
 
				-or ...) call is recorded in the
			
 
				-<TT>Cname</TT>
			
 
				-of the resulting
			
 
				-<TT>Channel</TT>.
			
 
				-When the file name begins with a slash, the name is stored as is,
			
 
				-subject to a cleanup pass described in the next section.
			
 
				-Otherwise, it is a local name, and the file name must be made
			
 
				-absolute by prefixing it with the
			
 
				-<TT>Cname</TT>
			
 
				-of the current directory, followed by a slash.
			
 
				-For example, if we are in
			
 
				-<TT>/home/rob</TT>
			
 
				-and
			
 
				-<TT>chdir</TT>
			
 
				-to
			
 
				-<TT>bin</TT>,
			
 
				-the
			
 
				-<TT>Cname</TT>
			
 
				-of the resulting
			
 
				-<TT>Channel</TT>
			
 
				-will be the string
			
 
				-<TT>/home/rob/bin</TT>.
			
 
				-<br>&#32;<br>
			
 
				-This assumes, of course, that the local file name contains no
			
 
				-<TT>..</TT>
			
 
				-elements.
			
 
				-If it does, instead of storing for example
			
 
				-<TT>/home/rob/..</TT>
			
 
				-we delete the last element of the existing name and set the
			
 
				-<TT>Cname</TT>
			
 
				-to
			
 
				-<TT>/home</TT>.
			
 
				-To maintain the lexical naming property we must guarantee that the resulting
			
 
				-<TT>Cname</TT>,
			
 
				-if it were to be evaluated, would yield the identical directory to the one
			
 
				-we actually do get by the local
			
 
				-<TT>..</TT>
			
 
				-operation.
			
 
				-<br>&#32;<br>
			
 
				-If the current directory is not a mount point, it is easy to maintain the lexical property.
			
 
				-If it is a mount point, though, it is still possible to maintain it on Plan 9
			
 
				-because the mount table, a kernel-resident data structure, contains all the
			
 
				-information about the non-hierarchical connectivity of the name space.
			
 
				-(On Unix, by contrast, symbolic links are stored on the file server rather than in the kernel.)
			
 
				-Moreover, the presence of a full file name for each
			
 
				-<TT>Channel</TT>
			
 
				-in the mount table provides the information necessary to resolve ambiguities.
			
 
				-<br>&#32;<br>
			
 
				-The mount table is examined in the
			
 
				-<TT>from</TT>-><TT>to</TT>
			
 
				-direction when evaluating a name, but
			
 
				-<TT>..</TT>
			
 
				-points backwards in the hierarchy, so to evaluate
			
 
				-<TT>..</TT>
			
 
				-the table must be examined in the
			
 
				-<TT>to</TT>-><TT>from</TT>
			
 
				-direction.
			
 
				-(``How did we get here?'')
			
 
				-<br>&#32;<br>
			
 
				-The value of
			
 
				-<TT>..</TT>
			
 
				-is ambiguous when there are multiple bindings (mount points) that point to
			
 
				-the directories involved in the evaluation of
			
 
				-<TT>..</TT>.
			
 
				-For example, return to our original script with
			
 
				-<TT>/n/bopp/v6</TT>
			
 
				-(containing a home directory for
			
 
				-<TT>ken</TT>)
			
 
				-and
			
 
				-<TT>/n/bopp/v7</TT>
			
 
				-(containing a home directory for
			
 
				-<TT>rob</TT>)
			
 
				-unioned into
			
 
				-<TT>/home</TT>.
			
 
				-This is represented by two entries in the mount table,
			
 
				-<TT>from=/home</TT>,
			
 
				-<TT>to=/n/bopp/v6</TT>
			
 
				-and
			
 
				-<TT>from=/home</TT>,
			
 
				-<TT>to=/n/bopp/v7</TT>.
			
 
				-If we have set our current directory to
			
 
				-<TT>/home/rob</TT>
			
 
				-(which has landed us in the physical location
			
 
				-<TT>/n/bopp/v7/rob</TT>)
			
 
				-our current directory is not a mount point but its parent is.
			
 
				-The value of
			
 
				-<TT>..</TT>
			
 
				-is ambiguous: it could be
			
 
				-<TT>/home</TT>,
			
 
				-<TT>/n/bopp/v7</TT>,
			
 
				-or maybe even
			
 
				-<TT>/n/bopp/v6</TT>,
			
 
				-and the ambiguity is caused by two
			
 
				-<TT>tofiles</TT>
			
 
				-bound to the same
			
 
				-<TT>fromfile</TT>.
			
 
				-By our definition, if we now evaluate
			
 
				-<TT>..</TT>,
			
 
				-we should acquire the directory
			
 
				-<TT>/home</TT>;
			
 
				-otherwise
			
 
				-<TT>../ken</TT>
			
 
				-could not possibly result in
			
 
				-<TT>ken</TT>'s
			
 
				-home directory, which it should.
			
 
				-On the other hand, if we had originally gone to
			
 
				-<TT>/n/bopp/v7/rob</TT>,
			
 
				-the name
			
 
				-<TT>../ken</TT>
			
 
				-should
			
 
				-<I>not</I>
			
 
				-evaluate to
			
 
				-<TT>ken</TT>'s
			
 
				-home directory because there is no directory
			
 
				-<TT>/n/bopp/v7/ken</TT>
			
 
				-(<TT>ken</TT>'s
			
 
				-home directory is on
			
 
				-<TT>v6</TT>).
			
 
				-The problem is that by using local file operations, it is impossible
			
 
				-to distinguish these cases: regardless of whether we got here using the name
			
 
				-<TT>/home/rob</TT>
			
 
				-or
			
 
				-<TT>/n/bopp/v7/rob</TT>,
			
 
				-the resulting directory is the same.
			
 
				-Moreover, the mount table does not itself have enough information
			
 
				-to disambiguate: when we do a local operation to evaluate
			
 
				-<TT>..</TT>
			
 
				-and land in
			
 
				-<TT>/n/bopp/v7</TT>,
			
 
				-we discover that the directory is a
			
 
				-<TT>tofile</TT>
			
 
				-in the mount table; should we step back through the table to
			
 
				-<TT>/home</TT>
			
 
				-or not?
			
 
				-<br>&#32;<br>
			
 
				-The solution comes from the
			
 
				-<TT>Cnames</TT>
			
 
				-themselves.
			
 
				-Whether to step back through the mount point
			
 
				-<TT>from=/home</TT>,
			
 
				-<TT>to=/n/bopp/v7</TT>
			
 
				-when evaluating
			
 
				-<TT>..</TT>
			
 
				-in
			
 
				-<TT>rob</TT>'s
			
 
				-directory is trivially resolved by asking the question,
			
 
				-Does the
			
 
				-<TT>Cname</TT>
			
 
				-for the directory begin
			
 
				-<TT>/home</TT>?
			
 
				-If it does, then the path that was evaluated to get us to the current
			
 
				-directory must have gone through this mount point, and we should
			
 
				-back up through it to evaluate
			
 
				-<TT>..</TT>;
			
 
				-if not, then this mount table entry is irrelevant.
			
 
				-<br>&#32;<br>
			
 
				-More precisely,
			
 
				-both
			
 
				-<I>before</I>
			
 
				-and
			
 
				-<I>after</I>
			
 
				-each
			
 
				-<TT>..</TT>
			
 
				-element in the path name is evaluated,
			
 
				-if the directory is a
			
 
				-<TT>tofile</TT>
			
 
				-in the mount table, the corresponding
			
 
				-<TT>fromfile</TT>
			
 
				-is taken instead, provided the
			
 
				-<TT>Cname</TT>
			
 
				-of the corresponding
			
 
				-<TT>fromfile</TT>
			
 
				-is the prefix of the
			
 
				-<TT>Cname</TT>
			
 
				-of the original directory.
			
 
				-Since we always know the full name of the directory
			
 
				-we are evaluating, we can always compare it against all the entries in the mount table that point
			
 
				-to it, thereby resolving ambiguous situations
			
 
				-and maintaining the
			
 
				-lexical property of
			
 
				-<TT>..</TT>.
			
 
				-This check also guarantees we don't follow a misleading mount point, such as the entry pointing to
			
 
				-<TT>/home</TT>
			
 
				-when we are really in
			
 
				-<TT>/n/bopp/v7/rob</TT>.
			
 
				-Keeping the full names with the
			
 
				-<TT>Channels</TT>
			
 
				-makes it easy to use the mount table to decide how we got here and, therefore,
			
 
				-how to get back.
			
 
				-<br>&#32;<br>
			
 
				-In summary, the algorithm is as follows.
			
 
				-Use the usual file system operations to walk to
			
 
				-<TT>..</TT>;
			
 
				-call the resulting directory
			
 
				-<I>d</I>.
			
 
				-Lexically remove
			
 
				-the last element of the initial file name.
			
 
				-Examine all entries in the mount table whose
			
 
				-<TT>tofile</TT>
			
 
				-is
			
 
				-<I>d</I>
			
 
				-and whose
			
 
				-<TT>fromfile</TT>
			
 
				-has a
			
 
				-<TT>Cname</TT>
			
 
				-identical to the truncated name.
			
 
				-If one exists, that
			
 
				-<TT>fromfile</TT>
			
 
				-is the correct result; by construction, it also has the right
			
 
				-<TT>Cname</TT>.
			
 
				-In our example, evaluating
			
 
				-<TT>..</TT>
			
 
				-in
			
 
				-<TT>/home/rob</TT>
			
 
				-(really
			
 
				-<TT>/n/bopp/v7/rob</TT>)
			
 
				-will set
			
 
				-<I>d</I>
			
 
				-to
			
 
				-<TT>/n/bopp/v7</TT>;
			
 
				-that is a
			
 
				-<TT>tofile</TT>
			
 
				-whose
			
 
				-<TT>fromfile</TT>
			
 
				-is
			
 
				-<TT>/home</TT>.
			
 
				-Removing the
			
 
				-<TT>/rob</TT>
			
 
				-from the original
			
 
				-<TT>Cname</TT>,
			
 
				-we find the name
			
 
				-<TT>/home</TT>,
			
 
				-which matches that of the
			
 
				-<TT>fromfile</TT>,
			
 
				-so the result is the
			
 
				-<TT>fromfile</TT>,
			
 
				-<TT>/home</TT>.
			
 
				-<br>&#32;<br>
			
 
				-Since this implementation uses only local operations to maintain its names,
			
 
				-it is possible to confuse it by external changes to the file system.
			
 
				-Deleting or renaming directories and files that are part of a
			
 
				-<TT>Cname</TT>,
			
 
				-or modifying the mount table, can introduce errors.
			
 
				-With more implementation work, such mistakes could probably be caught,
			
 
				-but in a networked environment, with machines sharing a remote file server, renamings
			
 
				-and deletions made by one machine may go unnoticed by others.
			
 
				-These problems, however, are minor, uncommon and, most important, easy to understand.
			
 
				-The method maintains the lexical property of file names unless an external
			
 
				-agent changes the name surreptitiously;
			
 
				-within a stable file system, it is always maintained and
			
 
				-<TT>pwd</TT>
			
 
				-is always right.
			
 
				-<br>&#32;<br>
			
 
				-To recapitulate, maintaining the
			
 
				-<TT>Channel</TT>'s
			
 
				-absolute file names lexically and using the names to disambiguate the
			
 
				-mount table entries when evaluating
			
 
				-<TT>..</TT>
			
 
				-at a mount point
			
 
				-combine to maintain the lexical definition of
			
 
				-<TT>..</TT>
			
 
				-efficiently.
			
 
				-<H4>Cleaning names
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The lexical processing can generate names that are messy or redundant,
			
 
				-ones with extra slashes or embedded
			
 
				-<TT>../</TT>
			
 
				-or
			
 
				-<TT>./</TT>
			
 
				-elements and other extraneous artifacts.
			
 
				-As part of the kernel's implementation, we wrote a procedure,
			
 
				-<TT>cleanname</TT>,
			
 
				-that rewrites a name in place to canonicalize its appearance.
			
 
				-The procedure is useful enough that it is now part of the Plan 9 C
			
 
				-library and is employed by many programs to make sure they always
			
 
				-present clean file names.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Cleanname</TT>
			
 
				-is analogous to the URL-cleaning rules defined in RFC 1808 [Field95], although
			
 
				-the rules are slightly different.
			
 
				-<TT>Cleanname</TT>
			
 
				-iteratively does the following until no further processing can be done:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-1. Reduce multiple slashes to a single slash.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-2. Eliminate
			
 
				-<TT>.</TT>
			
 
				-path name elements
			
 
				-(the current directory).
			
 
				-<DT><DT>&#32;<DD>
			
 
				-3. Eliminate
			
 
				-<TT>..</TT>
			
 
				-path name elements (the parent directory) and the
			
 
				-non-<TT>.</TT>
			
 
				-non-<TT>..,</TT>
			
 
				-element that precedes them.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-4. Eliminate
			
 
				-<TT>..</TT>
			
 
				-elements that begin a rooted path, that is, replace
			
 
				-<TT>/..</TT>
			
 
				-by
			
 
				-<TT>/</TT>
			
 
				-at the beginning of a path.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-5. Leave intact
			
 
				-<TT>..</TT>
			
 
				-elements that begin a non-rooted path.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-If the result of this process is a null string,
			
 
				-<TT>cleanname</TT>
			
 
				-returns the string
			
 
				-<TT>"."</TT>,
			
 
				-representing the current directory.
			
 
				-<H4>The fd2path system call
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Plan 9 has a new system call,
			
 
				-<TT>fd2path</TT>,
			
 
				-to enable programs to extract the
			
 
				-<TT>Cname</TT>
			
 
				-associated with an open file descriptor.
			
 
				-It takes three arguments: a file descriptor, a buffer, and the size of the buffer:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int fd2path(int fd, char *buf, int nbuf)
			
 
				-</PRE></TT></DL>
			
 
				-It returns an error if the file descriptor is invalid; otherwise it fills the buffer with the name
			
 
				-associated with
			
 
				-<TT>fd</TT>.
			
 
				-(If the name is too long, it is truncated; perhaps this condition should also draw an error.)
			
 
				-The
			
 
				-<TT>fd2path</TT>
			
 
				-system call is very cheap, since all it does is copy the
			
 
				-<TT>Cname</TT>
			
 
				-string to user space.
			
 
				-<br>&#32;<br>
			
 
				-The Plan 9 implementation of
			
 
				-<TT>getwd</TT>
			
 
				-uses
			
 
				-<TT>fd2path</TT>
			
 
				-rather than the tricky algorithm necessary in Unix:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-char*
			
 
				-getwd(char *buf, int nbuf)
			
 
				-{
			
 
				-	int n, fd;
			
 
				-
			
 
				-	fd = open(".", OREAD);
			
 
				-	if(fd &lt; 0)
			
 
				-		return NULL;
			
 
				-	n = fd2path(fd, buf, nbuf);
			
 
				-	close(fd);
			
 
				-	if(n &lt; 0)
			
 
				-		return NULL;
			
 
				-	return buf;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-(The Unix specification of
			
 
				-<TT>getwd</TT>
			
 
				-does not include a count argument.)
			
 
				-This version of
			
 
				-<TT>getwd</TT>
			
 
				-is not only straightforward, it is very efficient, reducing the performance
			
 
				-advantage of a built-in
			
 
				-<TT>pwd</TT>
			
 
				-command while guaranteeing that all commands, not just
			
 
				-<TT>pwd</TT>,
			
 
				-see sensible directory names.
			
 
				-<br>&#32;<br>
			
 
				-Here is a routine that prints the file name associated
			
 
				-with each of its open file descriptors; it is useful for tracking down file descriptors
			
 
				-left open by network listeners, text editors that spawn commands, and the like:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-openfiles(void)
			
 
				-{
			
 
				-	int i;
			
 
				-	char buf[256];
			
 
				-
			
 
				-	for(i=0; i&lt;NFD; i++)
			
 
				-		if(fd2path(i, buf, sizeof buf) &gt;= 0)
			
 
				-			print("%d: %s\n", i, buf);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<H4>Uses of good names
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Although
			
 
				-<TT>pwd</TT>
			
 
				-was the motivation for getting names right, good file names are useful in many contexts
			
 
				-and have become a key part of the Plan 9 programming environment.
			
 
				-The compilers record in the symbol table the full name of the source file, which makes
			
 
				-it easy to track down the source of buggy, old software and also permits the
			
 
				-implementation of a program,
			
 
				-<TT>src</TT>,
			
 
				-to automate tracking it down.
			
 
				-Given the name of a program,
			
 
				-<TT>src</TT>
			
 
				-reads its symbol table, extracts the file information,
			
 
				-and triggers the editor to open a window on the program's
			
 
				-source for its
			
 
				-<TT>main</TT>
			
 
				-routine.
			
 
				-No guesswork, no heuristics.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>openfiles</TT>
			
 
				-routine was the inspiration for a new file in the
			
 
				-<TT>/proc</TT>
			
 
				-file system [Kill84].
			
 
				-For process
			
 
				-<I>n</I>,
			
 
				-the file
			
 
				-<TT>/proc/</TT><I>n</I><TT>/fd</TT><I>
			
 
				-is a list of all its open files, including its working directory,
			
 
				-with associated information including its open status,
			
 
				-I/O offset, unique id (analogous to i-number)
			
 
				-and file name.
			
 
				-Here is the contents of the
			
 
				-</I><TT>fd</TT><I>
			
 
				-file for a process in the window system on the machine being used to write this paper:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cat /proc/125099/fd 
			
 
				-/usr/rob
			
 
				-  0 r  M 5141 00000001.00000000        0 /mnt/term/dev/cons
			
 
				-  1 w  M 5141 00000001.00000000       51 /mnt/term/dev/cons
			
 
				-  2 w  M 5141 00000001.00000000       51 /mnt/term/dev/cons
			
 
				-  3 r  M 5141 0000000b.00000000     1166 /dev/snarf
			
 
				-  4 rw M 5141 0ffffffc.00000000      288 /dev/draw/new
			
 
				-  5 rw M 5141 00000036.00000000  4266337 /dev/draw/3/data
			
 
				-  6 r  M 5141 00000037.00000000        0 /dev/draw/3/refresh
			
 
				-  7 r  c    0 00000004.00000000  6199848 /dev/bintime
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-(The Linux implementation of
			
 
				-</I><TT>/proc</TT><I>
			
 
				-provides a related service by giving a directory in which each file-descriptor-numbered file is
			
 
				-a symbolic link to the file itself.)
			
 
				-When debugging errant systems software, such information can be valuable.
			
 
				-</I><br>&#32;<br>
			
 
				-Another motivation for getting names right was the need to extract from the system
			
 
				-an accurate description of the mount table, so that a process's name space could be
			
 
				-recreated on another machine, in order to move (or simulate) a computing environment
			
 
				-across the network.
			
 
				-One program that does this is Plan 9's
			
 
				-<TT>cpu</TT>
			
 
				-command, which recreates the local name space on a remote machine, typically a large
			
 
				-fast multiprocessor.
			
 
				-Without accurate names, it was impossible to do the job right; now
			
 
				-<TT>/proc</TT>
			
 
				-provides a description of the name space of each process,
			
 
				-<TT>/proc/</TT><I>n</I><TT>/ns</TT><I>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cat /proc/125099/ns
			
 
				-bind  / /
			
 
				-mount -aC #s/boot / 
			
 
				-bind  #c /dev
			
 
				-bind  #d /fd
			
 
				-bind -c #e /env
			
 
				-bind  #p /proc
			
 
				-bind -c #s /srv
			
 
				-bind  /386/bin /bin
			
 
				-bind -a /rc/bin /bin
			
 
				-bind  /net /net
			
 
				-bind -a #l /net
			
 
				-mount -a #s/cs /net 
			
 
				-mount -a #s/dns /net 
			
 
				-bind -a #D /net
			
 
				-mount -c #s/boot /n/emelie 
			
 
				-bind -c /n/emelie/mail /mail
			
 
				-mount -c /net/il/134/data /mnt/term 
			
 
				-bind -a /usr/rob/bin/rc /bin
			
 
				-bind -a /usr/rob/bin/386 /bin
			
 
				-mount  #s/boot /n/emelieother other
			
 
				-bind -c /n/emelieother/rob /tmp
			
 
				-mount  #s/boot /n/dump dump
			
 
				-bind  /mnt/term/dev/cons /dev/cons
			
 
				-...
			
 
				-cd /usr/rob
			
 
				-% 
			
 
				-</PRE></TT></DL>
			
 
				-(The
			
 
				-</I><TT>#</TT><I>
			
 
				-notation identifies raw device drivers so they may be attached to the name space.)
			
 
				-The last line of the file gives the working directory of the process.
			
 
				-The format of this file is that used by a library routine,
			
 
				-</I><TT>newns</TT><I>,
			
 
				-which reads a textual description like this and reconstructs a name space.
			
 
				-Except for the need to quote
			
 
				-</I><TT>#</TT><I>
			
 
				-characters, the output is also a shell script that invokes the user-level commands
			
 
				-</I><TT>bind</TT><I>
			
 
				-and
			
 
				-</I><TT>mount</TT><I>,
			
 
				-which are just interfaces to the underlying system calls.
			
 
				-However,
			
 
				-files like
			
 
				-</I><TT>/net/il/134/data</TT><I>
			
 
				-represent network connections; to find out where they point, so that the corresponding
			
 
				-calls can be reestablished for another process,
			
 
				-they must be examined in more detail using the network device files [PrWi93].  Another program,
			
 
				-</I><TT>ns</TT><I>,
			
 
				-does this; it reads the
			
 
				-</I><TT>/proc/</TT><I>n</I><TT>/ns</TT><I>
			
 
				-file, decodes the information, and interprets it, translating the network
			
 
				-addresses and quoting the names when required:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-...
			
 
				-mount -a '#s/dns' /net 
			
 
				-...
			
 
				-mount -c il!135.104.3.100!12884 /mnt/term 
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-These tools make it possible to capture an accurate description of a process's
			
 
				-name space and recreate it elsewhere.
			
 
				-And like the open file descriptor table,
			
 
				-they are a boon to debugging; it is always helpful to know
			
 
				-exactly what resources a program is using.
			
 
				-</I><H4>Adapting to Unix
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-This work was done for the Plan 9 operating system, which has the advantage that
			
 
				-the non-hierarchical aspects of the name space are all known to the kernel.
			
 
				-It should be possible, though, to adapt it to a Unix system.
			
 
				-The problem is that Unix has nothing corresponding precisely to a
			
 
				-<TT>Channel</TT>,
			
 
				-which in Plan 9 represents the unique result of evaluating a name.
			
 
				-The
			
 
				-<TT>vnode</TT>
			
 
				-structure is a shared structure that may represent a file
			
 
				-known by several names, while the
			
 
				-<TT>file</TT>
			
 
				-structure refers only to open files, but for example the current working
			
 
				-directory of a process is not open.
			
 
				-Possibilities to address this discrepancy include
			
 
				-introducing a
			
 
				-<TT>Channel</TT>-like
			
 
				-structure that connects a name and a
			
 
				-<TT>vnode</TT>,
			
 
				-or maintaining a separate per-process table that maps names to
			
 
				-<TT>vnodes</TT>,
			
 
				-disambiguating using the techniques described here.
			
 
				-If it could be done
			
 
				-the result would be an implementation of
			
 
				-<TT>..</TT>
			
 
				-that reduces the need for a built-in
			
 
				-<TT>pwd</TT>
			
 
				-in the shell and offers a consistent, sensible interpretation of the `parent directory'.
			
 
				-<br>&#32;<br>
			
 
				-We have not done this adaptation, but we recommend that the Unix community try it.
			
 
				-<H4>Conclusions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-It should be easy to discover a well-defined, absolute path name for every open file and
			
 
				-directory in the system, even in the face of symbolic links and other non-hierarchical
			
 
				-elements of the file name space.
			
 
				-In earlier versions of Plan 9, and all current versions of Unix,
			
 
				-names can instead be inconsistent and confusing.
			
 
				-<br>&#32;<br>
			
 
				-The Plan 9 operating system now maintains an accurate name for each file,
			
 
				-using inexpensive lexical operations coupled with local file system actions.
			
 
				-Ambiguities are resolved by examining the names themselves;
			
 
				-since they reflect the path that was used to reach the file, they also reflect the path back,
			
 
				-permitting a dependable answer to be recovered even when stepping backwards through
			
 
				-a multiply-named directory.
			
 
				-<br>&#32;<br>
			
 
				-Names make sense again: they are sensible and consistent.
			
 
				-Now that dependable names are available, system services can depend on them,
			
 
				-and recent work in Plan 9 is doing just that.
			
 
				-We&#173;the community of Unix and Unix-like systems&#173;should have done this work a long time ago.
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Phil Winterbottom devised the
			
 
				-<TT>ns</TT>
			
 
				-command and the
			
 
				-<TT>fd</TT>
			
 
				-and
			
 
				-<TT>ns</TT>
			
 
				-files in
			
 
				-<TT>/proc</TT>,
			
 
				-based on an earlier implementation of path name management that
			
 
				-the work in this paper replaces.
			
 
				-Russ Cox wrote the final version of
			
 
				-<TT>cleanname</TT>
			
 
				-and helped debug the code for reversing the mount table.
			
 
				-Ken Thompson, Dave Presotto, and Jim McKie offered encouragement and consultation.
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Field95]
			
 
				-R. Fielding,
			
 
				-``Relative Uniform Resource Locators'',
			
 
				-<I>Network Working Group Request for Comments: 1808</I>,
			
 
				-June, 1995.
			
 
				-<br>&#32;<br>
			
 
				-[Kill84]
			
 
				-T. J. Killian,
			
 
				-``Processes as Files'',
			
 
				-<I>Proceedings of the Summer 1984 USENIX Conference</I>,
			
 
				-Salt Lake City, 1984, pp. 203-207.
			
 
				-<br>&#32;<br>
			
 
				-[Korn94]
			
 
				-David G. Korn,
			
 
				-``ksh: An Extensible High Level Language'',
			
 
				-<I>Proceedings of the USENIX Very High Level Languages Symposium</I>,
			
 
				-Santa Fe, 1994, pp. 129-146.
			
 
				-<br>&#32;<br>
			
 
				-[Korn00]
			
 
				-David G. Korn,
			
 
				-personal communication.
			
 
				-<br>&#32;<br>
			
 
				-[PeMc95]
			
 
				-Jan-Simon Pendry and Marshall Kirk McKusick,
			
 
				-``Union Mounts in 4.4BSD-Lite'',
			
 
				-<I>Proceedings of the 1995 USENIX Conference</I>,
			
 
				-New Orleans, 1995.
			
 
				-<br>&#32;<br>
			
 
				-[Pike91]
			
 
				-Rob Pike,
			
 
				-``8&#189;, the Plan 9 Window System'',
			
 
				-<I>Proceedings of the Summer 1991 USENIX Conference</I>,
			
 
				-Nashville, 1991, pp. 257-265.
			
 
				-<br>&#32;<br>
			
 
				-[Pike93]
			
 
				-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
			
 
				-``The Use of Name Spaces in Plan 9'',
			
 
				-<I>Operating Systems Review</I>,
			
 
				-<B>27</B>,
			
 
				-2, April 1993, pp. 72-76.
			
 
				-<br>&#32;<br>
			
 
				-[PrWi93]
			
 
				-Dave Presotto and Phil Winterbottom,
			
 
				-``The Organization of Networks in Plan 9'',
			
 
				-<I>Proceedings of the Winter 1993 USENIX Conference</I>,
			
 
				-San Diego, 1993, pp. 43-50.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/libmach.html
+++ b/sys/doc/libmach.html
@@ -1,910 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Adding Application Support for a New Architecture in Plan 9
			
 
				-</H1>
			
 
				-<DL><DD><I>Bob Flandrena<br>
			
 
				-bobf@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Plan 9 has five classes of architecture-dependent software:
			
 
				-headers, kernels, compilers and loaders, the
			
 
				-<TT>libc</TT>
			
 
				-system library, and a few application programs.  In general,
			
 
				-architecture-dependent programs
			
 
				-consist of a portable part shared by all architectures and a
			
 
				-processor-specific portion for each supported architecture.
			
 
				-The portable code is often compiled and stored in a library
			
 
				-associated with
			
 
				-each architecture.  A program is built by
			
 
				-compiling the architecture-specific code and loading it with the
			
 
				-library.  Support for a new architecture is provided
			
 
				-by building a compiler for the architecture, using it to
			
 
				-compile the portable code into libraries,
			
 
				-writing the architecture-specific code, and
			
 
				-then loading that code with
			
 
				-the libraries.
			
 
				-<br>&#32;<br>
			
 
				-This document describes the organization of the architecture-dependent
			
 
				-code and headers on Plan 9.
			
 
				-The first section briefly discusses the layout of
			
 
				-the headers and the source code for the kernels, compilers, loaders, and the
			
 
				-system library, 
			
 
				-<TT>libc</TT>.
			
 
				-The second section provides a detailed
			
 
				-discussion of the structure of
			
 
				-<TT>libmach</TT>,
			
 
				-a library containing almost
			
 
				-all architecture-dependent code
			
 
				-used by application programs.
			
 
				-The final section describes the steps required to add
			
 
				-application program support for a new architecture.
			
 
				-<H4>Directory Structure
			
 
				-</H4>
			
 
				-<P>
			
 
				-Architecture-dependent information for the new processor
			
 
				-is stored in the directory tree rooted at <TT>/</TT><I>m</I>
			
 
				-where
			
 
				-<I>m</I>
			
 
				-is the name of the new architecture (e.g.,
			
 
				-<TT>mips</TT>).
			
 
				-The new directory should be initialized with several important
			
 
				-subdirectories, notably
			
 
				-<TT>bin</TT>,
			
 
				-<TT>include</TT>,
			
 
				-and
			
 
				-<TT>lib</TT>.
			
 
				-The directory tree of an existing architecture
			
 
				-serves as a good model for the new tree.
			
 
				-The architecture-dependent
			
 
				-<TT>mkfile</TT>
			
 
				-must be stored in the newly created root directory
			
 
				-for the architecture.  It is easiest to copy the
			
 
				-mkfile for an existing architecture and modify
			
 
				-it for the new architecture.  When the mkfile
			
 
				-is correct, change the
			
 
				-<TT>OS</TT>
			
 
				-and
			
 
				-<TT>CPUS</TT>
			
 
				-variables in the
			
 
				-<TT>/sys/src/mkfile.proto</TT>
			
 
				-to reflect the addition of the new architecture.
			
 
				-</P>
			
 
				-<H4>Headers
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Architecture-dependent headers are stored in directory
			
 
				-<TT>/</TT><I>m</I><TT>/include</TT><I>
			
 
				-where
			
 
				-</I><I>m</I><I>
			
 
				-is the name of the architecture (e.g.,
			
 
				-</I><TT>mips</TT><I>).
			
 
				-Two header files are required:
			
 
				-</I><TT>u.h</TT><I>
			
 
				-and
			
 
				-</I><TT>ureg.h</TT><I>.
			
 
				-The first defines fundamental data types,
			
 
				-bit settings for the floating point
			
 
				-status and control registers, and
			
 
				-</I><TT>va_list</TT><I>
			
 
				-processing which depends on the stack
			
 
				-model for the architecture.  This file
			
 
				-is best built by copying and modifying the
			
 
				-</I><TT>u.h</TT><I>
			
 
				-file from an architecture
			
 
				-with a similar stack model.
			
 
				-The
			
 
				-</I><TT>ureg.h</TT><I>
			
 
				-file
			
 
				-contains a structure describing the layout
			
 
				-of the saved register set for
			
 
				-the architecture; it is defined by the kernel.
			
 
				-</I><br>&#32;<br>
			
 
				-Header file
			
 
				-<TT>/sys/include/a.out.h</TT>
			
 
				-contains the definitions of the magic
			
 
				-numbers used to identify executables for
			
 
				-each architecture.  When support for a new
			
 
				-architecture is added, the magic number
			
 
				-for the architecture must be added to this file.
			
 
				-<br>&#32;<br>
			
 
				-The header format of a bootable executable is defined by
			
 
				-each manufacturer.  Header file
			
 
				-<TT>/sys/include/bootexec.h</TT>
			
 
				-contains structures describing the headers currently
			
 
				-supported.  If the new architecture uses a common header
			
 
				-such as COFF,
			
 
				-the header format is probably already defined,
			
 
				-but if the bootable header format is non-standard,
			
 
				-a structure defining the format must be added to this file.
			
 
				-<br>&#32;<br>
			
 
				-<H4>Kernel
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Although the kernel depends critically on the properties of the underlying
			
 
				-hardware, most of the
			
 
				-higher-level kernel functions, including process
			
 
				-management, paging, pseudo-devices, and some
			
 
				-networking code, are independent of processor
			
 
				-architecture.  The portable kernel code
			
 
				-is divided into two parts: that implementing kernel
			
 
				-functions and that devoted to the boot process.
			
 
				-Code in the first class is stored in directory
			
 
				-<TT>/sys/src/9/port</TT>
			
 
				-and the portable boot code is stored in
			
 
				-<TT>/sys/src/9/boot</TT>.
			
 
				-Architecture-dependent kernel code is stored in the
			
 
				-subdirectories of
			
 
				-<TT>/sys/src/9</TT>
			
 
				-named for each architecture.
			
 
				-<br>&#32;<br>
			
 
				-The relationship between the kernel code and the boot code
			
 
				-is convoluted and subtle.  The portable boot code
			
 
				-is compiled into a library for each architecture.  An architecture-specific
			
 
				-main program is loaded with the appropriate library and the resulting
			
 
				-executable is compiled into the kernel where it is executed as
			
 
				-a user process during the final stages of kernel initialization.  The boot process
			
 
				-performs authentication, attaches the name space root to the appropriate
			
 
				-file system and starts the
			
 
				-<TT>init</TT>
			
 
				-process.
			
 
				-<br>&#32;<br>
			
 
				-The organization of the portable kernel source code differs from that
			
 
				-of most other architecture-specific code.
			
 
				-Instead of storing the portable code in a library
			
 
				-and loading it with the architecture-specific
			
 
				-code, the portable code is compiled directly into
			
 
				-the directory containing the architecture-specific code
			
 
				-and linked with the object files built from the source in that directory.
			
 
				-<br>&#32;<br>
			
 
				-<H4>Compilers and Loaders
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The compiler source code conforms to the usual
			
 
				-organization: portable code is compiled into a library
			
 
				-for each architecture
			
 
				-and the architecture-dependent code is loaded with
			
 
				-that library.
			
 
				-The common compiler code is stored in
			
 
				-<TT>/sys/src/cmd/cc</TT>.
			
 
				-The
			
 
				-<TT>mkfile</TT>
			
 
				-in this directory compiles the portable source and
			
 
				-archives the objects in a library for each architecture.
			
 
				-The architecture-specific compiler source
			
 
				-is stored in a subdirectory of
			
 
				-<TT>/sys/src/cmd</TT>
			
 
				-with the same name as the compiler (e.g.,
			
 
				-<TT>/sys/src/cmd/vc</TT>).
			
 
				-<br>&#32;<br>
			
 
				-There is no portable code shared by the loaders.
			
 
				-Each directory of loader source
			
 
				-code is self-contained, except for
			
 
				-a header file and an instruction name table
			
 
				-included from the
			
 
				-directory of the associated
			
 
				-compiler.
			
 
				-<br>&#32;<br>
			
 
				-<H4>Libraries
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Most C library modules are
			
 
				-portable; the source code is stored in
			
 
				-directories
			
 
				-<TT>/sys/src/libc/port</TT>
			
 
				-and
			
 
				-<TT>/sys/src/libc/9sys</TT>.
			
 
				-Architecture-dependent library code
			
 
				-is stored in the subdirectory of
			
 
				-<TT>/sys/src/libc</TT>
			
 
				-named the same as the target processor.
			
 
				-Non-portable functions not only
			
 
				-implement architecture-dependent operations
			
 
				-but also supply assembly language implementations
			
 
				-of functions where speed is critical.
			
 
				-Directory
			
 
				-<TT>/sys/src/libc/9syscall</TT>
			
 
				-is unusual because it
			
 
				-contains architecture-dependent information
			
 
				-for all architectures.
			
 
				-It holds only a header file defining
			
 
				-the names and numbers of system calls
			
 
				-and a
			
 
				-<TT>mkfile</TT>.
			
 
				-The
			
 
				-<TT>mkfile</TT>
			
 
				-executes an
			
 
				-<TT>rc</TT>
			
 
				-script that parses the header file, constructs
			
 
				-assembler language functions implementing the system
			
 
				-call for each architecture, assembles the code,
			
 
				-and archives the object files in
			
 
				-<TT>libc</TT>.
			
 
				-The assembler language syntax and the system interface
			
 
				-differ for each architecture.
			
 
				-The
			
 
				-<TT>rc</TT>
			
 
				-script in this
			
 
				-<TT>mkfile</TT>
			
 
				-must be modified to support a new architecture.
			
 
				-<br>&#32;<br>
			
 
				-<H4>Applications
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Application programs process two forms of architecture-dependent
			
 
				-information: executable images and intermediate object files.
			
 
				-Almost all processing is on executable files.
			
 
				-System library
			
 
				-<TT>libmach</TT>
			
 
				-provides functions that convert
			
 
				-architecture-specific data
			
 
				-to a portable format so application programs
			
 
				-can process this data independent of its
			
 
				-underlying representation.
			
 
				-Further, when a new architecture is implemented
			
 
				-almost all code changes
			
 
				-are confined to the library;
			
 
				-most affected application programs need only be reloaded.
			
 
				-The source code for the library is stored in
			
 
				-<TT>/sys/src/libmach</TT>.
			
 
				-<br>&#32;<br>
			
 
				-An application program running on one type of
			
 
				-processor must be able to interpret
			
 
				-architecture-dependent information for all
			
 
				-supported processors.
			
 
				-For example, a debugger must be able to debug
			
 
				-the executables of
			
 
				-all architectures, not just the
			
 
				-architecture on which it is executing, since
			
 
				-<TT>/proc</TT>
			
 
				-may be imported from a different machine.
			
 
				-<br>&#32;<br>
			
 
				-A small part of the application library
			
 
				-provides functions to
			
 
				-extract symbol references from object files.
			
 
				-The remainder provides the following processing
			
 
				-of executable files or memory images:
			
 
				-<DL><DD>
			
 
				-<br>&#32;<br>
			
 
				-<UL>
			
 
				-<LI>
			
 
				-Header interpretation.
			
 
				-<LI>
			
 
				-Symbol table interpretation.
			
 
				-<LI>
			
 
				-Execution context interpretation, such as stack traces
			
 
				-and stack frame location.
			
 
				-<LI>
			
 
				-Instruction interpretation including disassembly and
			
 
				-instruction size and follow-set calculations.
			
 
				-<LI>
			
 
				-Exception and floating point number interpretation.
			
 
				-<LI>
			
 
				-Architecture-independent read and write access through a
			
 
				-relocation map.
			
 
				-</DL>
			
 
				-</ul>
			
 
				-<br>&#32;<br>
			
 
				-Header file
			
 
				-<TT>/sys/include/mach.h</TT>
			
 
				-defines the interfaces to the
			
 
				-application library.  Manual pages
			
 
				-<A href="/magic/man2html/2/mach"><I>mach</I>(2),
			
 
				-</A><A href="/magic/man2html/2/symbol"><I>symbol</I>(2),
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/2/object"><I>object</I>(2)
			
 
				-</A>describe the details of the
			
 
				-library functions.
			
 
				-<br>&#32;<br>
			
 
				-Two data structures, called
			
 
				-<TT>Mach</TT>
			
 
				-and
			
 
				-<TT>Machdata</TT>,
			
 
				-contain architecture-dependent  parameters and
			
 
				-a jump table of functions.
			
 
				-Global variables
			
 
				-<TT>mach</TT>
			
 
				-and
			
 
				-<TT>machdata</TT>
			
 
				-point to the
			
 
				-<TT>Mach</TT>
			
 
				-and
			
 
				-<TT>Machdata</TT>
			
 
				-data structures associated with the target architecture.
			
 
				-An application determines the target architecture of
			
 
				-a file or executable image, sets the global pointers
			
 
				-to the data structures associated with that architecture,
			
 
				-and subsequently performs all references indirectly through the
			
 
				-pointers.
			
 
				-As a result, direct references to the tables for each
			
 
				-architecture are avoided and the application code intrinsically
			
 
				-supports all architectures (though only one at a time).
			
 
				-<br>&#32;<br>
			
 
				-Object file processing is handled similarly: architecture-dependent
			
 
				-functions identify and
			
 
				-decode the intermediate files for the processor.
			
 
				-The application indirectly
			
 
				-invokes a classification function to identify
			
 
				-the architecture of the object code and to select the
			
 
				-appropriate decoding function.  Subsequent calls
			
 
				-then use that function to decode each record.  Again,
			
 
				-the layer of indirection allows the application code
			
 
				-to support all architectures without modification.
			
 
				-<br>&#32;<br>
			
 
				-Splitting the architecture-dependent information
			
 
				-between the
			
 
				-<TT>Mach</TT>
			
 
				-and
			
 
				-<TT>Machdata</TT>
			
 
				-data structures
			
 
				-allows applications to choose
			
 
				-an appropriate level of service.  Even though an application
			
 
				-does not directly reference the architecture-specific data structures,
			
 
				-it must load the
			
 
				-architecture-dependent tables and code 
			
 
				-for all architectures it supports.  The size of this data
			
 
				-can be substantial and many applications do not require
			
 
				-the full range of architecture-dependent functionality.
			
 
				-For example, the
			
 
				-<TT>size</TT>
			
 
				-command does not require the disassemblers for every architecture;
			
 
				-it only needs to decode the header.
			
 
				-The
			
 
				-<TT>Mach</TT>
			
 
				-data structure contains a few architecture-specific parameters
			
 
				-and a description of the processor register set.
			
 
				-The size of the structure
			
 
				-varies with the size of the register
			
 
				-set but is generally small.
			
 
				-The
			
 
				-<TT>Machdata</TT>
			
 
				-data structure contains
			
 
				-a jump table of architecture-dependent functions;
			
 
				-the amount of code and data referenced by this table
			
 
				-is usually large.
			
 
				-<H4>Libmach Source Code Organization
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>libmach</TT>
			
 
				-library provides four classes of functionality:
			
 
				-<br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>Header and Symbol Table Decoding - <DD>
			
 
				-Files
			
 
				-<TT>executable.c</TT>
			
 
				-and
			
 
				-<TT>sym.c</TT>
			
 
				-contain code to interpret the header and
			
 
				-symbol tables of
			
 
				-an executable file or executing image.
			
 
				-Function
			
 
				-<TT>crackhdr</TT>
			
 
				-decodes the header,
			
 
				-reformats the
			
 
				-information into an
			
 
				-<TT>Fhdr</TT>
			
 
				-data structure, and points
			
 
				-global variable
			
 
				-<TT>mach</TT>
			
 
				-to the
			
 
				-<TT>Mach</TT>
			
 
				-data structure of the target architecture.
			
 
				-The symbol table processing
			
 
				-uses the data in the
			
 
				-<TT>Fhdr</TT>
			
 
				-structure to decode the symbol table.
			
 
				-A variety of symbol table access functions then support
			
 
				-queries on the reformatted table.
			
 
				-<DT>Debugger Support - <DD>
			
 
				-Files named
			
 
				-<TT></TT><I>m</I><TT>.c</TT><I>,
			
 
				-where
			
 
				-</I><I>m</I><I>
			
 
				-is the code letter assigned to the architecture,
			
 
				-contain the initialized
			
 
				-</I><TT>Mach</TT><I>
			
 
				-data structure and the definition of the register
			
 
				-set for each architecture.
			
 
				-Architecture-specific debugger support functions and
			
 
				-an initialized
			
 
				-</I><TT>Machdata</TT><I>
			
 
				-structure are stored in
			
 
				-files named
			
 
				-</I><TT></TT><I>m</I><TT>db.c</TT><I>.
			
 
				-Files
			
 
				-</I><TT>machdata.c</TT><I>
			
 
				-and
			
 
				-</I><TT>setmach.c</TT><I>
			
 
				-contain debugger support functions shared
			
 
				-by multiple architectures.
			
 
				-<DT>Architecture-Independent Access - <DD>
			
 
				-Files
			
 
				-</I><TT>map.c</TT><I>,
			
 
				-</I><TT>access.c</TT><I>,
			
 
				-and
			
 
				-</I><TT>swap.c</TT><I>
			
 
				-provide accesses through a relocation map
			
 
				-to data in an executable file or executing image.
			
 
				-Byte-swapping is performed as needed.  Global variables
			
 
				-</I><TT>mach</TT><I>
			
 
				-and
			
 
				-</I><TT>machdata</TT><I>
			
 
				-must point to the
			
 
				-</I><TT>Mach</TT><I>
			
 
				-and
			
 
				-</I><TT>Machdata</TT><I>
			
 
				-data structures of the target architecture.
			
 
				-<DT>Object File Interpretation - <DD>
			
 
				-These files contain functions to identify the
			
 
				-target architecture of an
			
 
				-intermediate object file
			
 
				-and extract references to symbols.  File
			
 
				-</I><TT>obj.c</TT><I>
			
 
				-contains code common to all architectures;
			
 
				-file
			
 
				-</I><TT></TT><I>m</I><TT>obj.c</TT><I>
			
 
				-contains the architecture-specific source code
			
 
				-for the machine with code character
			
 
				-</I><I>m</I><I>.
			
 
				-</dl>
			
 
				-</I><br>&#32;<br>
			
 
				-The
			
 
				-<TT>Machdata</TT>
			
 
				-data structure is primarily a jump
			
 
				-table of architecture-dependent debugger support
			
 
				-functions. Functions select the
			
 
				-<TT>Machdata</TT>
			
 
				-structure for a target architecture based
			
 
				-on the value of the
			
 
				-<TT>type</TT>
			
 
				-code in the
			
 
				-<TT>Fhdr</TT>
			
 
				-structure or the name of the architecture.
			
 
				-The jump table provides functions to swap bytes, interpret
			
 
				-machine instructions,
			
 
				-perform stack
			
 
				-traces, find stack frames, format floating point
			
 
				-numbers, and decode machine exceptions.  Some functions, such as
			
 
				-machine exception decoding, are idiosyncratic and must be
			
 
				-supplied for each architecture.  Others depend
			
 
				-on the compiler run-time model and several
			
 
				-architectures may share code common to a model.  For
			
 
				-example, many architectures share the code to
			
 
				-process the fixed-frame stack model implemented by
			
 
				-several of the compilers.
			
 
				-Finally, some
			
 
				-functions, such as byte-swapping, provide a general capability and
			
 
				-the jump table need only select an implementation appropriate
			
 
				-to the architecture.
			
 
				-<br>&#32;<br>
			
 
				-<H4>Adding Application Support for a New Architecture
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-This section describes the
			
 
				-steps required to add application-level
			
 
				-support for a new architecture.
			
 
				-We assume
			
 
				-the kernel, compilers, loaders and system libraries
			
 
				-for the new architecture are already in place.  This
			
 
				-implies that a code-character has been assigned and
			
 
				-that the architecture-specific headers have been
			
 
				-updated.
			
 
				-With the exception of two programs,
			
 
				-application-level changes are confined to header
			
 
				-files and the source code in
			
 
				-<TT>/sys/src/libmach</TT>.
			
 
				-<br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>1.<DD>
			
 
				-Begin by updating the application library
			
 
				-header file in
			
 
				-<TT>/sys/include/mach.h</TT>.
			
 
				-Add the following symbolic codes to the
			
 
				-<TT>enum</TT>
			
 
				-statement near the beginning of the file:
			
 
				-<DL><DD>
			
 
				-<DT>*<DD>
			
 
				-The processor type code, e.g., 
			
 
				-<TT>MSPARC</TT>.
			
 
				-<DT>*<DD>
			
 
				-The type of the executable.  There are usually
			
 
				-two codes needed: one for a bootable
			
 
				-executable (i.e., a kernel) and one for an
			
 
				-application executable.
			
 
				-<DT>*<DD>
			
 
				-The disassembler type code.  Add one entry for
			
 
				-each supported disassembler for the architecture.
			
 
				-<DT>*<DD>
			
 
				-A symbolic code for the object file.
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>2.<DD>
			
 
				-In a file name
			
 
				-<TT>/sys/src/libmach/</TT><I>m</I><TT>.c</TT><I>
			
 
				-(where
			
 
				-</I><I>m</I><I>
			
 
				-is the identifier character assigned to the architecture),
			
 
				-initialize
			
 
				-</I><TT>Reglist</TT><I>
			
 
				-and
			
 
				-</I><TT>Mach</TT><I>
			
 
				-data structures with values defining
			
 
				-the register set and various system parameters.
			
 
				-The source file for a similar architecture
			
 
				-can serve as template.
			
 
				-Most of the fields of the
			
 
				-</I><TT>Mach</TT><I>
			
 
				-data structure are obvious
			
 
				-but a few require further explanation.
			
 
				-<DL><DD>
			
 
				-<DT></I><TT>kbase</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of the kernel 
			
 
				-</I><TT>ublock</TT><I>.
			
 
				-The debuggers
			
 
				-assume the first entry of the kernel
			
 
				-</I><TT>ublock</TT><I>
			
 
				-points to the
			
 
				-</I><TT>Proc</TT><I>
			
 
				-structure for a kernel thread.
			
 
				-<DT></I><TT>ktmask</TT><I> - <DD>
			
 
				-This field
			
 
				-is a bit mask used to calculate the kernel text address from
			
 
				-the kernel 
			
 
				-</I><TT>ublock</TT><I>
			
 
				-address.
			
 
				-The first page of the
			
 
				-kernel text segment is calculated by
			
 
				-ANDing
			
 
				-the negation of this mask with
			
 
				-</I><TT>kbase</TT><I>.
			
 
				-<DT></I><TT>kspoff</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the byte offset in the
			
 
				-</I><TT>Proc</TT><I>
			
 
				-data structure to the saved kernel
			
 
				-stack pointer for a suspended kernel thread.  This
			
 
				-is the offset to the 
			
 
				-</I><TT>sched.sp</TT><I>
			
 
				-field of a
			
 
				-</I><TT>Proc</TT><I>
			
 
				-table entry.
			
 
				-<DT></I><TT>kpcoff</TT><I> - <DD>
			
 
				-This field contains the byte offset into the
			
 
				-</I><TT>Proc</TT><I>
			
 
				-data structure
			
 
				-of
			
 
				-the program counter of a suspended kernel thread.
			
 
				-This is the offset to
			
 
				-field
			
 
				-</I><TT>sched.pc</TT><I>
			
 
				-in that structure.
			
 
				-<DT></I><TT>kspdelta</TT><I> and </I><TT>kpcdelta</TT><I> - <DD>
			
 
				-These fields
			
 
				-contain corrections to be added to
			
 
				-the stack pointer and program counter, respectively,
			
 
				-to properly locate the stack and next
			
 
				-instruction of a kernel thread.  These
			
 
				-values bias the saved registers retrieved
			
 
				-from the
			
 
				-</I><TT>Label</TT><I>
			
 
				-structure named
			
 
				-</I><TT>sched</TT><I>
			
 
				-in the
			
 
				-</I><TT>Proc</TT><I>
			
 
				-data structure.
			
 
				-Most architectures require no bias
			
 
				-and these fields contain zeros.
			
 
				-<DT></I><TT>scalloff</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the byte offset of the
			
 
				-</I><TT>scallnr</TT><I>
			
 
				-field in the
			
 
				-</I><TT>ublock</TT><I>
			
 
				-data structure associated with a process.
			
 
				-The
			
 
				-</I><TT>scallnr</TT><I>
			
 
				-field contains the number of the
			
 
				-last system call executed by the process.
			
 
				-The location of the field varies depending on
			
 
				-the size of the floating point register set
			
 
				-which precedes it in the
			
 
				-</I><TT>ublock</TT><I>.
			
 
				-</DL>
			
 
				-</dl>
			
 
				-</I><br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>3.<DD>
			
 
				-Add an entry to the initialization of the
			
 
				-<TT>ExecTable</TT>
			
 
				-data structure at the beginning of file
			
 
				-<TT>/sys/src/libmach/executable.c</TT>.
			
 
				-Most architectures
			
 
				-require two entries: one for
			
 
				-a normal executable and
			
 
				-one for a bootable
			
 
				-image.  Each table entry contains:
			
 
				-<DL><DD>
			
 
				-<DT>*<DD>
			
 
				-Magic Number - 
			
 
				-The big-endian magic number assigned to the architecture in
			
 
				-<TT>/sys/include/a.out.h</TT>.
			
 
				-<DT>*<DD>
			
 
				-Name - 
			
 
				-A string describing the executable.
			
 
				-<DT>*<DD>
			
 
				-Executable type code - 
			
 
				-The executable code assigned in
			
 
				-<TT>/sys/include/mach.h</TT>.
			
 
				-<DT>*<DD>
			
 
				-<TT>Mach</TT> pointer - 
			
 
				-The address of the initialized
			
 
				-<TT>Mach</TT>
			
 
				-data structure constructed in Step 2.
			
 
				-You must also add the name of this table to the
			
 
				-list of
			
 
				-<TT>Mach</TT>
			
 
				-table definitions immediately preceding the
			
 
				-<TT>ExecTable</TT>
			
 
				-initialization.
			
 
				-<DT>*<DD>
			
 
				-Header size - 
			
 
				-The number of bytes in the executable file header.
			
 
				-The size of a normal executable header is always
			
 
				-<TT>sizeof(Exec)</TT>.
			
 
				-The size of a bootable header is
			
 
				-determined by the size of the structure
			
 
				-for the architecture defined in
			
 
				-<TT>/sys/include/bootexec.h</TT>.
			
 
				-<DT>*<DD>
			
 
				-Byte-swapping function - 
			
 
				-The address of
			
 
				-<TT>beswal</TT>
			
 
				-or
			
 
				-<TT>leswal</TT>
			
 
				-for big-endian and little-endian
			
 
				-architectures, respectively.
			
 
				-<DT>*<DD>
			
 
				-Decoder function - 
			
 
				-The address of a function to decode the header.
			
 
				-Function
			
 
				-<TT>adotout</TT>
			
 
				-decodes the common header shared by all normal
			
 
				-(i.e., non-bootable) executable files.
			
 
				-The header format of bootable
			
 
				-executable files is defined by the manufacturer and
			
 
				-a custom function is almost always
			
 
				-required to decode it.
			
 
				-Header file
			
 
				-<TT>/sys/include/bootexec.h</TT>
			
 
				-contains data structures defining the bootable
			
 
				-headers for all architectures.  If the new architecture
			
 
				-uses an existing format, the appropriate
			
 
				-decoding function should already be in
			
 
				-<TT>executable.c</TT>.
			
 
				-If the header format is unique, then
			
 
				-a new function must be added to this file.
			
 
				-Usually the decoding function for an existing
			
 
				-architecture can be adopted with minor modifications.
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>4.<DD>
			
 
				-Write an object file parser and
			
 
				-store it in file
			
 
				-<TT>/sys/src/libmach/</TT><I>m</I><TT>obj.c</TT><I>
			
 
				-where
			
 
				-</I><I>m</I><I>
			
 
				-is the identifier character assigned to the architecture.
			
 
				-Two functions are required: a predicate to identify an
			
 
				-object file for the architecture and a function to extract
			
 
				-symbol references from the object code.
			
 
				-The object code format is obscure but
			
 
				-it is often possible to adopt the
			
 
				-code of an existing architecture
			
 
				-with minor modifications.
			
 
				-When these
			
 
				-functions are in hand, insert their addresses
			
 
				-in the jump table at the beginning of file
			
 
				-</I><TT>/sys/src/libmach/obj.c</TT><I>.
			
 
				-</dl>
			
 
				-</I><br>&#32;<br>
			
 
				-<DL COMPACT>
			
 
				-<DT>5.<DD>
			
 
				-Implement the required debugger support functions and
			
 
				-initialize the parameters and jump table of the
			
 
				-<TT>Machdata</TT>
			
 
				-data structure for the architecture.
			
 
				-This code is conventionally stored in
			
 
				-a file named
			
 
				-<TT>/sys/src/libmach/</TT><I>m</I><TT>db.c</TT><I>
			
 
				-where
			
 
				-</I><I>m</I><I>
			
 
				-is the identifier character assigned to the architecture.
			
 
				-The fields of the
			
 
				-</I><TT>Machdata</TT><I>
			
 
				-structure are:
			
 
				-<DL><DD>
			
 
				-<DT></I><TT>bpinst</TT><I> and </I><TT>bpsize</TT><I> - <DD>
			
 
				-These fields
			
 
				-contain the breakpoint instruction and the size
			
 
				-of the instruction, respectively.
			
 
				-<DT></I><TT>swab</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to
			
 
				-byte-swap a 16-bit value.  Choose
			
 
				-</I><TT>leswab</TT><I>
			
 
				-or
			
 
				-</I><TT>beswab</TT><I>
			
 
				-for little-endian or big-endian architectures, respectively.
			
 
				-<DT></I><TT>swal</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to
			
 
				-byte-swap a 32-bit value.  Choose
			
 
				-</I><TT>leswal</TT><I>
			
 
				-or
			
 
				-</I><TT>beswal</TT><I>
			
 
				-for little-endian or big-endian architectures, respectively.
			
 
				-<DT></I><TT>ctrace</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to perform a
			
 
				-C-language stack trace.  Two general trace functions,
			
 
				-</I><TT>risctrace</TT><I>
			
 
				-and
			
 
				-</I><TT>cisctrace</TT><I>,
			
 
				-traverse fixed-frame and relative-frame stacks,
			
 
				-respectively.  If the compiler for the
			
 
				-new architecture conforms to one of
			
 
				-these models, select the appropriate function.  If the
			
 
				-stack model is unique,
			
 
				-supply a custom stack trace function.
			
 
				-<DT></I><TT>findframe</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to locate the stack
			
 
				-frame associated with a text address.
			
 
				-Generic functions
			
 
				-</I><TT>riscframe</TT><I>
			
 
				-and
			
 
				-</I><TT>ciscframe</TT><I>
			
 
				-process fixed-frame and relative-frame stack
			
 
				-models.
			
 
				-<DT></I><TT>ufixup</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to adjust
			
 
				-the base address of the register save area.
			
 
				-Currently, only the
			
 
				-68020 requires this bias
			
 
				-to offset over the active
			
 
				-exception frame.
			
 
				-<DT></I><TT>excep</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to produce a
			
 
				-text
			
 
				-string describing the
			
 
				-current exception.
			
 
				-Each architecture stores exception
			
 
				-information uniquely, so this code must always be supplied.
			
 
				-<DT></I><TT>bpfix</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to adjust an
			
 
				-address prior to laying down a breakpoint.
			
 
				-<DT></I><TT>sftos</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to convert a single
			
 
				-precision floating point value
			
 
				-to a string.  Choose
			
 
				-</I><TT>leieeesftos</TT><I>
			
 
				-for little-endian
			
 
				-or
			
 
				-</I><TT>beieeesftos</TT><I>
			
 
				-for big-endian architectures.
			
 
				-<DT></I><TT>dftos</TT><I> - <DD>
			
 
				-This field
			
 
				-contains the address of a function to convert a double
			
 
				-precision floating point value
			
 
				-to a string.  Choose
			
 
				-</I><TT>leieeedftos</TT><I>
			
 
				-for little-endian
			
 
				-or
			
 
				-</I><TT>beieeedftos</TT><I>
			
 
				-for big-endian architectures.
			
 
				-<DT></I><TT>foll</TT><I>, </I><TT>das</TT><I>, </I><TT>hexinst</TT><I>, and </I><TT>instsize</TT><I> - <DD>
			
 
				-These fields point to functions that interpret machine
			
 
				-instructions.
			
 
				-They rely on disassembly of the instruction
			
 
				-and are unique to each architecture.
			
 
				-</I><TT>Foll</TT><I>
			
 
				-calculates the follow set of an instruction.
			
 
				-</I><TT>Das</TT><I>
			
 
				-disassembles a machine instruction to assembly language.
			
 
				-</I><TT>Hexinst</TT><I>
			
 
				-formats a machine instruction as a text
			
 
				-string of
			
 
				-hexadecimal digits.
			
 
				-</I><TT>Instsize</TT><I>
			
 
				-calculates the size in bytes, of an instruction.
			
 
				-Once the disassembler is written, the other functions
			
 
				-can usually be implemented as trivial extensions of it.
			
 
				-</dl>
			
 
				-</I><br>&#32;<br>
			
 
				-It is possible to provide support for a new architecture
			
 
				-incrementally by filling the jump table entries
			
 
				-of the
			
 
				-<TT>Machdata</TT>
			
 
				-structure as code is written.  In general, if
			
 
				-a jump table entry contains a zero, application
			
 
				-programs requiring that function will issue an
			
 
				-error message instead of attempting to
			
 
				-call the function.  For example,
			
 
				-the
			
 
				-<TT>foll</TT>,
			
 
				-<TT>das</TT>,
			
 
				-<TT>hexinst</TT>,
			
 
				-and
			
 
				-<TT>instsize</TT>
			
 
				-jump table slots can be zeroed until a
			
 
				-disassembler is written.
			
 
				-Other capabilities, such as
			
 
				-stack trace or variable inspection,
			
 
				-can be supplied and will be available to
			
 
				-the debuggers but attempts to use the
			
 
				-disassembler will result in an error message.
			
 
				-</DL>
			
 
				-<DL COMPACT>
			
 
				-<DT>6.<DD>
			
 
				-Update the table named
			
 
				-<TT>machines</TT>
			
 
				-near the beginning of
			
 
				-<TT>/sys/src/libmach/setmach.c</TT>.
			
 
				-This table binds the
			
 
				-file type code and machine name to the
			
 
				-<TT>Mach</TT>
			
 
				-and
			
 
				-<TT>Machdata</TT>
			
 
				-structures of an architecture.
			
 
				-The names of the initialized
			
 
				-<TT>Mach</TT>
			
 
				-and
			
 
				-<TT>Machdata</TT>
			
 
				-structures built in steps 2 and 5
			
 
				-must be added to the list of
			
 
				-structure definitions immediately
			
 
				-preceding the table initialization.
			
 
				-If both Plan 9 and
			
 
				-native disassembly are supported, add
			
 
				-an entry for each disassembler to the table.  The
			
 
				-entry for the default disassembler (usually
			
 
				-Plan 9) must be first.
			
 
				-<DT>7.<DD>
			
 
				-Add an entry describing the architecture to
			
 
				-the table named
			
 
				-<TT>trans</TT>
			
 
				-near the end of
			
 
				-<TT>/sys/src/cmd/prof.c</TT>.
			
 
				-</DL>
			
 
				-<DT>8.<DD>
			
 
				-Add an entry describing the architecture to
			
 
				-the table named
			
 
				-<TT>objtype</TT>
			
 
				-near the start of
			
 
				-<TT>/sys/src/cmd/pcc.c</TT>.
			
 
				-</DL>
			
 
				-<DT>9.<DD>
			
 
				-Recompile and install
			
 
				-all application programs that include header file
			
 
				-<TT>mach.h</TT>
			
 
				-and load with
			
 
				-<TT>libmach.a</TT>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/lp.html
+++ b/sys/doc/lp.html
@@ -1,746 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>A Guide to the Lp
			
 
				-Printer Spooler
			
 
				-</H1>
			
 
				-<DL><DD><I>Paul Glick<br>
			
 
				-pg@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<P>
			
 
				-<I>Lp</I>
			
 
				-is a collection of programs used to provide an easy-to-use
			
 
				-interface for printing a variety of document types on a variety
			
 
				-of printers.
			
 
				-<I>Lp</I>
			
 
				-is the glue that connects various document language
			
 
				-translators and printer communication programs together so that
			
 
				-the users may have a consistent view of printers.
			
 
				-Most of the glue
			
 
				-is shell script, which can be easily modified.
			
 
				-The user need not
			
 
				-specify options to get sensible output in most cases.
			
 
				-<I>Lp</I>
			
 
				-is described here
			
 
				-so that others may make additions and changes.
			
 
				-</DL>
			
 
				-
			
 
				-</P>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Lp</I>
			
 
				-is used to format and print data on a variety of output devices.
			
 
				-The need for
			
 
				-<I>lp</I>
			
 
				-was rooted in the inability of other printer spoolers to do simple
			
 
				-tasks without a great deal of user specification of options.
			
 
				-At the time
			
 
				-<I>lp</I>
			
 
				-was written, there were several printer
			
 
				-languages, such as ImPress and PostScript, and
			
 
				-an internally developed printer that would accept
			
 
				-<I>troff</I>
			
 
				-output.
			
 
				-Now, all our printers take PostScript,
			
 
				-but printers that use HPCL and HPGL abound and
			
 
				-support for those printers may be added easily.
			
 
				-A great deal of what underlies
			
 
				-<I>lp</I>
			
 
				-is taken from BSD's
			
 
				-<I>lpr</I>
			
 
				-and System V's
			
 
				-<I>lp</I>.
			
 
				-The important features of this system are that most of the programs
			
 
				-are easily modified shell scripts and the user need not
			
 
				-learn to use the large amount of underlying software developed by others.
			
 
				-<I>Lp</I>
			
 
				-runs under Plan 9 and several flavors of
			
 
				-UNIX.
			
 
				-This document deals with
			
 
				-<I>lp</I>
			
 
				-as it relates to Plan 9.
			
 
				-<I>Lp</I>
			
 
				-was developed using both Datakit and Ethernet to transport data between machines.
			
 
				-Now only the Ethernet transport mechanism remains.
			
 
				-</P>
			
 
				-<P>
			
 
				-Text, graphics, and formatted text files are appropriately processed and
			
 
				-placed into a spool directory from which they are taken to be printed by a daemon process.
			
 
				-Additional functions include checking the status of a printer queue
			
 
				-and removing jobs from the printer queue.
			
 
				-</P>
			
 
				-<P>
			
 
				-All the shell scripts (see
			
 
				-<A href="/magic/man2html/1/rc"><I>rc</I>(1))
			
 
				-</A>associated with
			
 
				-<I>lp</I>
			
 
				-reside in the spool directory
			
 
				-<TT>/sys/lib/lp</TT>
			
 
				-except for the
			
 
				-<I>lp</I>
			
 
				-command itself, which resides in
			
 
				-<TT>/rc/bin</TT>.
			
 
				-Commands related to
			
 
				-<I>lp</I>
			
 
				-that are not shell scripts can most often be found
			
 
				-in
			
 
				-<TT>/</TT><I>cputype/bin/aux</I><TT>.
			
 
				-The directory where all the
			
 
				-</TT><I>lp</I><TT>
			
 
				-scripts  reside is defined within
			
 
				-</TT><I>lp</I><TT>
			
 
				-by the shell variable
			
 
				-</TT><TT>LPLIB</TT><TT>.
			
 
				-In the remainder of this document, file names will be specified
			
 
				-with this shell variable as their root.
			
 
				-</P>
			
 
				-</TT><H4>2 Usage
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Lp</I>
			
 
				-requires an output device to be specified
			
 
				-before it will process input.
			
 
				-This can be done in any of three ways described here.
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>1)<DD>
			
 
				-The file
			
 
				-<TT></TT>LPLIB/defdevice<TT>
			
 
				-may contain the name of a default output device.
			
 
				-This may not be practical for environments where
			
 
				-there are many printers.
			
 
				-<DT>2)<DD>
			
 
				-The user's environment variable
			
 
				-</TT><TT>LPDEST</TT><TT>
			
 
				-may be set to the name of the device to be used.
			
 
				-This is often a more practical solution when there are several printers
			
 
				-available.
			
 
				-This overrides a
			
 
				-</TT><TT>defdevice</TT><TT>
			
 
				-specification.
			
 
				-<DT>3)<DD>
			
 
				-The
			
 
				-</TT><TT>-d</TT><TT>
			
 
				-</TT><I>printer</I><TT>
			
 
				-option to the
			
 
				-</TT><I>lp</I><TT>
			
 
				-command specifies
			
 
				-</TT><I>printer</I><TT>
			
 
				-as the device to which output should be directed, overriding the
			
 
				-previous two specifications.
			
 
				-</dl>
			
 
				-</TT><P>
			
 
				-<br>
			
 
				-If
			
 
				-<I>printer</I>
			
 
				-is
			
 
				-<TT>?</TT>,
			
 
				-a list of printers and other information in the
			
 
				-<TT>devices</TT>
			
 
				-file is printed, as shown in Figure 1.
			
 
				-Quote the question mark to prevent it from being
			
 
				-interpreted by the shell language as a metacharacter.
			
 
				-
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% lp -d'?'
			
 
				-device   location  host             class
			
 
				-fn       2C-501    helix            post/2+600dpi+duplex
			
 
				-pcclone  -         -                post+nohead
			
 
				-peacock  2C-501    cetus            post/2+300dpi+nohead+color
			
 
				-ps83     st8_fl3   rice             post+300dpi+reverse
			
 
				-psu      2C-501    cetus            post/2+1200dpi
			
 
				-     .
			
 
				-     .
			
 
				-     .
			
 
				-%
			
 
				-</PRE></TT></DL>
			
 
				-<I>Figure 1.  Sample listing of installed printers</I>
			
 
				-</P>
			
 
				-<P>
			
 
				-Normally,
			
 
				-<I>lp</I>
			
 
				-uses the
			
 
				-<TT>file</TT>
			
 
				-command to figure out what type of input it is receiving.
			
 
				-This is done within the
			
 
				-<TT>generic</TT>
			
 
				-process which is discussed later in this paper in the
			
 
				-<B>Process directory</B>
			
 
				-section.
			
 
				-To select a specific input processor the
			
 
				-<TT>-p</TT><I>process</I>
			
 
				-option is used where
			
 
				-<I>process</I>
			
 
				-is one of the shell scripts in the
			
 
				-<TT>process</TT>
			
 
				-directory.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-Troff
			
 
				-output can be printed, in this case, on printer
			
 
				-<I>fn</I>
			
 
				-with
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% troff -ms lp.ms | lp -dfn
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-A file can be converted to PostScript using the pseudo-printer
			
 
				-<TT>stdout</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% troff -ms lp.ms | lp -dstdout &gt; lp.ps
			
 
				-</PRE></TT></DL>
			
 
				-LaTeX (and analogously TeX)
			
 
				-documents are printed in two steps:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% latex lp.tex
			
 
				-     .
			
 
				-     .
			
 
				-% lp lp.dvi
			
 
				-     .
			
 
				-     .
			
 
				-%
			
 
				-</PRE></TT></DL>
			
 
				-LaTeX
			
 
				-produces a `.dvi' file and
			
 
				-does not permit the use of a pipe
			
 
				-connection to the standard input of
			
 
				-<I>lp</I>.
			
 
				-To look at the status and queue of a device, use
			
 
				-<TT>-q</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% lp -dpsu -q
			
 
				-daemon status:
			
 
				-:  67.17% sent
			
 
				-printer status:
			
 
				-%%[ status: busy; source: lpd ]%%
			
 
				-
			
 
				-queue on cetus:
			
 
				-job		user	try	size
			
 
				-rice29436.1	pg	0	17454
			
 
				-slocum17565.1	ches	1	49995
			
 
				-%
			
 
				-</PRE></TT></DL>
			
 
				-This command can print the status and queue of the local
			
 
				-and remote hosts.
			
 
				-Administrators should be advised that working in an environment where the
			
 
				-<I>lp</I>
			
 
				-spool directory is shared among the local and remote hosts,
			
 
				-no spooling should be done on the local hosts.
			
 
				-The format of the status and queue printout is up to the administrator.
			
 
				-The job started above can be killed with
			
 
				-<TT>-k</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I> lp -dpsu -k rice29436.1
			
 
				-rice29436.1 removed from psu queue on cetus
			
 
				-</PRE></TT></DL>
			
 
				-</I><H4>3 Options
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are options available to modify the way in which a job is handled.
			
 
				-It is the job of the
			
 
				-<I>lp</I>
			
 
				-programs to convert the option settings so they may be used by each of the
			
 
				-different translation and interface programs.
			
 
				-Not all options are applicable to all printer environments.
			
 
				-Table 1 lists the standard
			
 
				-<I>lp</I>
			
 
				-options, the shell variable settings, and description of the options.
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<br><img src="data.19118320.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-<I>Table 1. Lp Option List</I>
			
 
				-<br>&#32;<br>
			
 
				-
			
 
				-</P>
			
 
				-<H4>4 Devices file
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>devices</TT>
			
 
				-file is found in the spool directory.
			
 
				-Each line in the file is composed of 12 fields, separated
			
 
				-by tabs or spaces, that describe the attributes
			
 
				-of the printer and how it should be serviced.
			
 
				-Within the
			
 
				-<TT>lp</TT>
			
 
				-command, a shell variable is set for each attribute;
			
 
				-the following list describes them:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>0<DD>
			
 
				-LPDEST0</TT> is the name of the device as given to
			
 
				-<I>lp</I>
			
 
				-with the
			
 
				-<TT>-d</TT>
			
 
				-option
			
 
				-or as specified by the shell environment variable
			
 
				-<TT>LPDEST</TT>
			
 
				-or as specified by
			
 
				-the file
			
 
				-<TT></TT>LPLIB/defdevice<TT>.
			
 
				-This name is used in creating directories and log files that are associated with
			
 
				-the printers operation.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-LOC0</TT><TT> just describes where the printer is physically located.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-DEST_HOST0</TT><TT> is the host from which the files are printed.
			
 
				-Files may be spooled on other machines before being transferred to the
			
 
				-destination host.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-OUT_DEV0</TT><TT> is the physical device name or network address needed by the printer daemon
			
 
				-to connect to the printer.
			
 
				-This field depends on the requirements of the daemon and may contain a `&#191;'
			
 
				-if not required.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-SPEED0</TT><TT> is the baud rate setting for the port.
			
 
				-This field depends on the requirements of the daemon and may contain a `&#191;'
			
 
				-if not required.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-LPCLASS0</TT><TT> is used to encode minor printer differences.
			
 
				-The keyword
			
 
				-</TT><TT>reverse</TT><TT>
			
 
				-is used by some of the preprocessors
			
 
				-to reverse the order the pages are printed to accommodate different output
			
 
				-trays (either face up or face down).
			
 
				-The keyword
			
 
				-</TT><TT>nohead</TT><TT>
			
 
				-is used to suppress the header page.
			
 
				-This is used for special and color printers.
			
 
				-The keyword
			
 
				-</TT><TT>duplex</TT><TT>
			
 
				-is used to coax double sided output from duplex printers.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-LPPROC0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/process</TT><TT>
			
 
				-directory to be used to convert input to a format
			
 
				-that will be accepted by the device.
			
 
				-The preprocessor is invoked by the spooler.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-SPOOLER0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/spooler</TT><TT>
			
 
				-directory which will select files using the
			
 
				-</TT><TT>SCHED</TT><TT>
			
 
				-command and invoke the
			
 
				-</TT><TT>LPPROC</TT><TT>
			
 
				-command, putting its output
			
 
				-into the remote spool directory.
			
 
				-The output is sent directly to the spool directory on the
			
 
				-destination machine to avoid conflicts when client and
			
 
				-server machines share spool directories.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-STAT0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/stat</TT><TT>
			
 
				-directory that prints the status of the device and the list of jobs
			
 
				-waiting on the queue for the device.
			
 
				-The status information depends on what is available from the printer
			
 
				-and interface software.
			
 
				-The queue information should be changed to show information
			
 
				-useful in tracking down problems.
			
 
				-The
			
 
				-</TT><TT>SCHED</TT><TT>
			
 
				-command is used to show the jobs in the order
			
 
				-in which they will be printed.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-KILL0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/kill</TT><TT>
			
 
				-that removes jobs from the queue.
			
 
				-The jobs to be removed are given as arguments to the
			
 
				-</TT><I>lp</I><TT>
			
 
				-command.
			
 
				-When possible, it should also abort the currently running job
			
 
				-if it has to be killed.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-DAEMON0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/daemon</TT><TT>
			
 
				-that is meant to run asynchronously to remove
			
 
				-jobs from the queue.
			
 
				-Jobs may either be passed on to another host or sent to the
			
 
				-printing device.
			
 
				-</TT><I>Lp</I><TT>
			
 
				-always tries to start a daemon process when one is specified.
			
 
				-<DT></TT><TT>0<DD>
			
 
				-SCHED0</TT><TT> is the command from the
			
 
				-</TT><TT>LPLIB/sched</TT><TT>
			
 
				-that is used to present the job names to the
			
 
				-daemon and stat programs
			
 
				-in some order, e.g., first-in-first-out, smallest first.
			
 
				-</dl>
			
 
				-</TT><H4>5 Support programs
			
 
				-</H4>
			
 
				-<P>
			
 
				-The following sections describe the basic functions of the programs
			
 
				-that are found in the subdirectories of
			
 
				-<TT></TT><I>LPLIB</I><TT>.
			
 
				-The programs in a specific directory vary with the
			
 
				-type of output device or networks that have to be used.
			
 
				-</P>
			
 
				-</TT><H4>5.1 Process directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>generic</TT>
			
 
				-preprocessor
			
 
				-is the default preprocessor for most printers.
			
 
				-It uses the
			
 
				-<A href="/magic/man2html/1/file"><I>file</I>(1)
			
 
				-</A>command to determine the format of the input file.
			
 
				-The appropriate preprocessor is then selected to transform the
			
 
				-file to a format suitable for the printer.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here is a list of some of the preprocessors and
			
 
				-a description of their function.
			
 
				-A complete list of preprocessors and their descriptions can be found in the manual page
			
 
				-<A href="/magic/man2html/8/lp"><I>lp</I>(8).
			
 
				-</A><br>&#32;<br>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>dvipost</TT><DD>
			
 
				-Converts TeX or LaTeX output (<TT>.dvi</TT> files) to PostScript
			
 
				-<DT><TT>ppost</TT><DD>
			
 
				-Converts UTF text to PostScript.
			
 
				-The default font is Courier with Lucida fonts filling in
			
 
				-the remainder of the (available) Unicode character space.
			
 
				-<DT><TT>tr2post</TT><DD>
			
 
				-Converts (device independent) troff output for the device type
			
 
				-<TT>utf</TT>.
			
 
				-See
			
 
				-<TT>/sys/lib/troff/font/devutf</TT>
			
 
				-directory for troff font width table descriptions.
			
 
				-See also the
			
 
				-<TT>/sys/lib/postscript/troff</TT>
			
 
				-directory for mappings of
			
 
				-troff
			
 
				-<TT>UTF</TT>
			
 
				-character space to PostScript font space.
			
 
				-<DT><TT>p9bitpost</TT><DD>
			
 
				-Converts Plan 9 bitmaps (see
			
 
				-<I>bitfile</I>(9.6))
			
 
				-to PostScript.
			
 
				-<DT><TT>g3post</TT><DD>
			
 
				-Converts fax (CCITT-G31 format) to PostScript.
			
 
				-<DT><TT>hpost</TT><DD>
			
 
				-Does header page processing and page reversal processing, if
			
 
				-necessary.
			
 
				-Page reversal is done here so the header page always comes
			
 
				-out at the beginning of the job.
			
 
				-Header page processing is very location-dependent.
			
 
				-</dl>
			
 
				-<H4>5.2 Spool directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>generic</TT>
			
 
				-spooler is responsible for executing the preprocessor
			
 
				-and directing its output to a file in the printer's queue.
			
 
				-An additional file is created containing information such as the system name,
			
 
				-user id, job number, and number of times this job was attempted.
			
 
				-</P>
			
 
				-<P>
			
 
				-Certain printer handling programs do not require separate
			
 
				-preprocessing and spooling.
			
 
				-For such circumstances a
			
 
				-<TT>nospool</TT>
			
 
				-spooler is available that just executes the preprocessing program.
			
 
				-The processing and spooling functions are assumed by this program and the output is sent to
			
 
				-<TT>OUT_DEV</TT>
			
 
				-or standard output if
			
 
				-<TT>OUT_DEV</TT>
			
 
				-is '-'.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>pcclone</TT>
			
 
				-spooler is used to send print jobs directly to a printer connected
			
 
				-to a 386 compatible printer port (See
			
 
				-<A href="/magic/man2html/3/lpt"><I>lpt</I>(3)).
			
 
				-</A></P>
			
 
				-<H4>5.3 Stat directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The function of the shell scripts in the
			
 
				-<TT>stat</TT>
			
 
				-directory is to present status information about the
			
 
				-printer and its queue.
			
 
				-When necessary, the
			
 
				-<TT>stat</TT>
			
 
				-scripts may be designed
			
 
				-to return information about the local queue as well as the remote queue.
			
 
				-This is not done on Plan 9 because many systems share the same queue directory.
			
 
				-The scheduler is used to print the queue in the order in which the jobs
			
 
				-will be executed.
			
 
				-</P>
			
 
				-<H4>5.4 Kill directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>kill</TT>
			
 
				-scripts receive command line arguments passed to them by
			
 
				-<I>lp</I>
			
 
				-and remove the job and id files which match the arguments
			
 
				-for the particular queue.
			
 
				-When a job is killed, the generic kill procedure:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>1)<DD>
			
 
				-kills the daemon for this queue if the job being killed
			
 
				-is first in the queue,
			
 
				-<DT>2)<DD>
			
 
				-removes the files associated with the job from the queue,
			
 
				-<DT>3)<DD>
			
 
				-attempts to restart the daemon.
			
 
				-</dl>
			
 
				-<H4>5.5 Daemon directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>daemon</TT>
			
 
				-shell scripts are the last to be invoked by
			
 
				-<I>lp</I>
			
 
				-if the
			
 
				-<TT>-Q</TT>
			
 
				-option has not been given.
			
 
				-The daemon process is executed asynchronously
			
 
				-with its standard output and standard error appended to
			
 
				-the printer log file.
			
 
				-The log file is described in a subsequent section.
			
 
				-Because the daemon runs asynchronously, it must
			
 
				-catch signals that could cause it to terminate abnormally.
			
 
				-The daemon first checks to see that it is the only one running
			
 
				-by using the
			
 
				-<TT>LOCK</TT>
			
 
				-program found in the
			
 
				-<TT>/</TT>cputype/bin/aux<TT>
			
 
				-directory.
			
 
				-The
			
 
				-</TT><TT>LOCK</TT><TT>
			
 
				-command creates a
			
 
				-</TT><TT>LOCK</TT><TT>
			
 
				-file in the printer's queue directory.
			
 
				-The daemon then executes the scheduler to obtain the name of the
			
 
				-next job on the queue.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The processing of jobs may entail transfer to another host
			
 
				-or transmission to a printer.
			
 
				-The details of this are specific to the individual daemons.
			
 
				-If a job is processed without error, it is removed from the queue.
			
 
				-If a job does not succeed, the associated files may be
			
 
				-moved to a printer specific directory in
			
 
				-<TT></TT><I>LPLIB/prob</I><TT>.
			
 
				-In either case, the daemon can make an entry in the printer's
			
 
				-log file.
			
 
				-Before exiting, the daemon should clean up lock files by calling
			
 
				-</TT><TT>UNLOCK</TT><TT>.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Several non-standard daemon programs have been designed
			
 
				-to suit various requirements and whims.
			
 
				-One such program announces job completion and empty paper trays
			
 
				-by causing icons to appear in peoples'
			
 
				-<TT>seemail</TT>
			
 
				-window.
			
 
				-Another, using a voice synthesizer, makes verbal announcements.
			
 
				-Other daemons may be designed to taste.
			
 
				-</P>
			
 
				-<H4>5.6 Sched directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The scheduler must decide which job files should be executed and
			
 
				-in what order.
			
 
				-The most commonly used scheduler program is
			
 
				-<TT>FIFO</TT>,
			
 
				-which looks like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-ls -tr * | sed -n -e 's/.*  *//' \
			
 
				-  -e '/^[0-9][0-9]*.[1-9][0-9]*<I>/p'
			
 
				-</PRE></TT></DL>
			
 
				-This lists all the job files in this printer's queue in modification
			
 
				-time order.
			
 
				-Jobs entering the queue have a dot (.) prefixed to their name
			
 
				-to keep the scheduler from selecting them before they are complete.
			
 
				-</P>
			
 
				-</I><H4>6 Where Things Go Wrong
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are four directories where
			
 
				-<I>lp</I>
			
 
				-writes files.
			
 
				-On the Plan 9 release these directories may be found
			
 
				-in a directory on a scratch filesystem that is not
			
 
				-backed-up.
			
 
				-This directory is
			
 
				-<TT>/n/emelieother/lp</TT>.
			
 
				-It is built on top of a file system
			
 
				-<TT>other</TT>
			
 
				-that is mounted on the file server
			
 
				-<TT>emelie</TT>.
			
 
				-The four directories in
			
 
				-this scratch directory
			
 
				-are
			
 
				-<TT>log</TT>,
			
 
				-<TT>prob</TT>,
			
 
				-<TT>queue</TT>,
			
 
				-and
			
 
				-<TT>tmp</TT>.
			
 
				-<I>Lp</I>
			
 
				-binds (see
			
 
				-<A href="/magic/man2html/1/bind"><I>bind</I>(1))
			
 
				-</A>the first three into the directory
			
 
				-<TT>/sys/lib/lp</TT>
			
 
				-for its processes and their children.
			
 
				-The
			
 
				-<TT>tmp</TT>
			
 
				-directory is bound to the
			
 
				-<TT>/tmp</TT>
			
 
				-directory so that the lp daemons, which run as user `none',
			
 
				-may write into this directory.
			
 
				-</P>
			
 
				-<P>
			
 
				-On any new installation, it is important that these directories
			
 
				-be set up and that the
			
 
				-<I>/rc/bin/lp</I>
			
 
				-command be editted to reflect the change.
			
 
				-If you do not have a scratch filesystem for these directories,
			
 
				-create the four directories
			
 
				-<TT>log</TT>,
			
 
				-<TT>prob</TT>,
			
 
				-<TT>queue</TT>,
			
 
				-and
			
 
				-<TT>tmp</TT>
			
 
				-in
			
 
				-<TT></TT>LPLIB<TT>
			
 
				-</TT><TT>(/sys/lib/lp)</TT><TT>
			
 
				-so that they are writable by anyone.
			
 
				-</P>
			
 
				-</TT><H4>6.1 Log directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-The log files for a particular
			
 
				-<I>printer</I>
			
 
				-appear in a subdirectory of the spool directory
			
 
				-<TT>log</TT>/<I>printer</I>.
			
 
				-There are currently two types of log files.
			
 
				-One is for the daemon to log errors and successful completions
			
 
				-of jobs.
			
 
				-These are named
			
 
				-<I>printer.day</I>
			
 
				-where
			
 
				-<I>day</I>
			
 
				-is the three letter abbreviation for the day of the week.
			
 
				-These are overwritten once a week to avoid the need for regular
			
 
				-cleanup.
			
 
				-The other type of log file contains the status of the printer and
			
 
				-is written by the program that communicates with the printer itself.
			
 
				-These are named
			
 
				-<I>printer</I>.<TT>st</TT>.
			
 
				-These are overwritten with each new job and are saved in the
			
 
				-<TT></TT><I>LPLIB/prob</I><TT>
			
 
				-directory along with the job under circumstances described below.
			
 
				-When a printer does not appear to be functioning these files are the
			
 
				-place to look first.
			
 
				-</P>
			
 
				-</TT><H4>6.2 Prob directory
			
 
				-</H4>
			
 
				-<P>
			
 
				-When a job fails to produce output,
			
 
				-the log files should be checked for any obvious problems.
			
 
				-If none can be found, a directory with full read and write permissions
			
 
				-should be created with the name of the printer in the
			
 
				-<TT></TT>LPLIB/prob<TT>
			
 
				-directory.
			
 
				-Subsequent failure of a job will cause the daemon to leave a
			
 
				-copy of the job and the printer communication log in
			
 
				-</TT><TT></TT><I>LPLIB/prob/</I><TT></TT><I>printer</I><TT>
			
 
				-directory.
			
 
				-It is common for a printer to enter states from which
			
 
				-it cannot be rescued except by manually cycling the power on the printer.
			
 
				-After this is done the print daemon should recover by itself
			
 
				-(give it a minute).
			
 
				-If it does not recover, remove the
			
 
				-</TT><TT>LOCK</TT><TT>
			
 
				-file from the printer's spool directory to kill the daemon.
			
 
				-The daemon will have to be restarted by sending another job
			
 
				-to the printer.
			
 
				-For PostScript printers just use:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo '%!PS' | lp
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</TT><H4>6.3 Repairing Stuck Daemons
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are conditions that occur which are not handled
			
 
				-by the daemons.
			
 
				-One such problem can only be described as the printer entering a
			
 
				-comatose state.
			
 
				-The printer does not respond to any messages sent to it.
			
 
				-The daemon should recover from the reset and an error message
			
 
				-will appear in the log files.
			
 
				-If all else fails, one can kill the first job in the queue
			
 
				-or remove the
			
 
				-<TT>LOCK</TT>
			
 
				-file from the queue directory.
			
 
				-This will kill the daemon, which will have to be restarted.
			
 
				-</P>
			
 
				-<H4>7 Interprocessor Communication
			
 
				-</H4>
			
 
				-<P>
			
 
				-A Plan 9 CPU server can be set up as a printer's spooling host.
			
 
				-That is, the machine where jobs are spooled and from which those jobs
			
 
				-are sent directly to the printer.
			
 
				-To do this, the CPU must listen on TCP port 515 which is the well known
			
 
				-port for the BSD line printer daemon.
			
 
				-The file
			
 
				-<TT>/rc/bin/service/tcp515</TT>
			
 
				-is executed when a call comes in on that port.
			
 
				-The Plan 9
			
 
				-<TT>lpdaemon</TT>
			
 
				-will accept jobs sent from BSD LPR/LPD systems.
			
 
				-The
			
 
				-<TT>/</TT>cputype/bin/aux/lpdaemon<TT>
			
 
				-command is executed from the service call and it accepts print jobs, requests for status,
			
 
				-and requests to kill jobs.
			
 
				-The command
			
 
				-</TT><TT>/</TT><I>cputype/bin/aux/lpsend</I><TT>
			
 
				-is used to send jobs
			
 
				-to other Plan 9 machines and is usually called from
			
 
				-within a spooler or daemon script.
			
 
				-The command
			
 
				-</TT><TT>/</TT><TT>cputype/bin/aux/lpdsend</TT><TT>
			
 
				-is used to send jobs
			
 
				-to machines and printers that use the BSD LPR/LPD protocol and is also usually called from
			
 
				-within a spooler or daemon script.
			
 
				-</P>
			
 
				-</TT><H4>8 Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Special thanks to Rich Drechsler for supplying and maintaining most of
			
 
				-the PostScript translation and interface programs,
			
 
				-without which
			
 
				-<I>lp</I>
			
 
				-would be an empty shell.
			
 
				-Tomas Rokicki provided the
			
 
				-TeX
			
 
				-to PostScript
			
 
				-translation program.
			
 
				-</P>
			
 
				-<H4>9 References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Camp86] Ralph Campbell,
			
 
				-``4.3BSD Line Printer Spooler Manual'', UNIX System Manager's Manual,
			
 
				-May, 1986, Berkeley, CA
			
 
				-<br>
			
 
				-[RFC1179] Request for Comments: 1179, Line Printer Daemon Protocol, Aug 1990
			
 
				-<br>
			
 
				-[Sys5] System V manual, date unknown
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/mk.html
+++ b/sys/doc/mk.html
@@ -1,1594 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Maintaining Files on Plan 9 with Mk
			
 
				-</H1>
			
 
				-<DL><DD><I>Andrew G. Hume<br>
			
 
				-andrew@research.att.com<br>
			
 
				-Bob Flandrena<br>
			
 
				-bobf@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-is a tool
			
 
				-for describing and maintaining dependencies between
			
 
				-files.
			
 
				-It is similar to the
			
 
				-UNIX program
			
 
				-<TT>make</TT>,
			
 
				-but provides several extensions.
			
 
				-<TT>Mk</TT>'<TT>s</TT>
			
 
				-flexible rule specifications, implied
			
 
				-dependency derivation, and parallel
			
 
				-execution of maintenance actions are
			
 
				-well-suited to the Plan 9 environment.
			
 
				-Almost all Plan 9 maintenance procedures
			
 
				-are automated using
			
 
				-<TT>mk</TT>.
			
 
				-</DL>
			
 
				-</P>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-This document describes how
			
 
				-<TT>mk</TT>,
			
 
				-a program functionally similar to
			
 
				-<TT>make</TT>
			
 
				-[Feld79],
			
 
				-is used to maintain dependencies between
			
 
				-files in Plan 9.
			
 
				-<TT>Mk</TT>
			
 
				-provides several extensions to the
			
 
				-capabilities of its predecessor that work
			
 
				-well in Plan 9's distributed, multi-architecture
			
 
				-environment.  It
			
 
				-exploits the power of multiprocessors by executing
			
 
				-maintenance actions in parallel and interacts with
			
 
				-the Plan 9 command interpreter
			
 
				-<TT>rc</TT>
			
 
				-to provide a powerful set of maintenance tools.
			
 
				-It accepts pattern-based dependency specifications
			
 
				-that are not limited to describing
			
 
				-rules for program construction.
			
 
				-The result is a tool that is flexible enough to
			
 
				-perform many maintenance tasks including
			
 
				-database maintenance,
			
 
				-hardware design, and document production.
			
 
				-</P>
			
 
				-<P>
			
 
				-This document begins by discussing 
			
 
				-the syntax of the control file,
			
 
				-the pattern matching capabilities, and
			
 
				-the special rules for maintaining archives.
			
 
				-A brief description of
			
 
				-<TT>mk</TT>'<TT>s</TT>
			
 
				-algorithm for deriving dependencies
			
 
				-is followed by a discussion
			
 
				-of the conventions used to resolve ambiguous
			
 
				-specifications.  The final sections
			
 
				-describe parallel execution
			
 
				-and special features.
			
 
				-</P>
			
 
				-<P>
			
 
				-An earlier paper [Hume87]
			
 
				-provides a detailed discussion of
			
 
				-<TT>mk</TT>'<TT>s</TT>
			
 
				-design and an appendix summarizes
			
 
				-the differences between
			
 
				-<TT>mk</TT>
			
 
				-and
			
 
				-<TT>make</TT>.
			
 
				-</P>
			
 
				-<H4>2 The <TT>Mkfile</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-reads a file describing relationships among files
			
 
				-and executes commands to bring the files up to date.
			
 
				-The specification file, called a
			
 
				-<TT>mkfile</TT>,
			
 
				-contains three types of statements:
			
 
				-assignments, includes, and rules.
			
 
				-Assignment and include statements are similar
			
 
				-to those in C.
			
 
				-Rules specify dependencies between a
			
 
				-<I>target</I>
			
 
				-and its
			
 
				-<I>prerequisites</I>.
			
 
				-When the target and prerequisites are files, their
			
 
				-modification times determine if they
			
 
				-are out of date.  Rules often contain a
			
 
				-<I>recipe</I>,
			
 
				-an
			
 
				-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
			
 
				-</A>script that produces the target from
			
 
				-the prerequisites.
			
 
				-</P>
			
 
				-<P>
			
 
				-This simple
			
 
				-<TT>mkfile</TT>
			
 
				-produces an executable
			
 
				-from a C source file:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-CC=pcc
			
 
				-f1:	f1.c
			
 
				-	<I>CC -o f1 f1.c
			
 
				-</PRE></TT></DL>
			
 
				-The first line assigns the name of the portable ANSI/POSIX compiler
			
 
				-to the
			
 
				-</I><TT>mk</TT><I>
			
 
				-variable
			
 
				-</I><TT>CC</TT><I>;
			
 
				-subsequent references of the form
			
 
				-</I><TT></TT><I>CC</I><TT>
			
 
				-select this compiler.
			
 
				-The only rule specifies a dependence between the target file
			
 
				-</TT><TT>f1</TT><TT>
			
 
				-and the prerequisite file
			
 
				-</TT><TT>f1.c</TT><TT>.
			
 
				-If the target does not exist or if the
			
 
				-prerequisite has been modified more recently than
			
 
				-the target,
			
 
				-</TT><TT>mk</TT><TT>
			
 
				-passes the recipe to
			
 
				-</TT><TT>rc</TT><TT>
			
 
				-for execution.  Here,
			
 
				-</TT><TT>f1.c</TT><TT>
			
 
				-is compiled and loaded to produce
			
 
				-</TT><TT>f1</TT><TT>.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The native Plan 9 environment
			
 
				-requires executables for
			
 
				-all architectures, not only the current one.
			
 
				-The Plan 9 version of the same
			
 
				-<TT>mkfile</TT>
			
 
				-looks like:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile
			
 
				-
			
 
				-f1:	f1.</I>O
			
 
				-	<I>LD </I>LDFLAGS -o f1 f1.<I>O
			
 
				-f1.</I>O:	f1.c
			
 
				-	<I>CC </I>CFLAGS f1.c
			
 
				-</PRE></TT></DL>
			
 
				-The first line is an include statement
			
 
				-that replaces itself with the contents of the file
			
 
				-<TT>/</TT><I>objtype/mkfile</I><TT>.
			
 
				-The variable
			
 
				-</TT><TT></TT><TT>objtype</TT><TT>
			
 
				-is inherited from the environment and
			
 
				-contains the name of the target architecture.
			
 
				-The prototype
			
 
				-</TT><TT>mkfile</TT><TT>
			
 
				-for that architecture defines architecture-specific variables:
			
 
				-</TT><TT>CC</TT><TT>
			
 
				-and
			
 
				-</TT><TT>LD</TT><TT>
			
 
				-are the names of the compiler and loader,
			
 
				-</TT><TT>O</TT><TT>
			
 
				-is the code character of the architecture.
			
 
				-The rules compile the source file into an object
			
 
				-file and invoke the loader to produce
			
 
				-</TT><TT>f1</TT><TT>.
			
 
				-Invoking
			
 
				-</TT><TT>mk</TT><TT>
			
 
				-from the command line as follows
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% objtype=mips mk
			
 
				-vc -w f1.c
			
 
				-vl </TT><I>LDFLAGS -o f1 f1.k
			
 
				-%
			
 
				-</PRE></TT></DL>
			
 
				-produces the
			
 
				-</I><TT>mips</TT><I>
			
 
				-executable of program
			
 
				-</I><TT>f1</TT><I>
			
 
				-regardless of the current architecture type.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-We can extend the
			
 
				-<TT>mkfile</TT>
			
 
				-to build two programs:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/objtype/mkfile
			
 
				-ALL=f1 f2
			
 
				-
			
 
				-all:V:	<I>ALL
			
 
				-
			
 
				-f1:	f1.</I>O
			
 
				-	<I>LD </I>LDFLAGS -o f1 f1.<I>O
			
 
				-f1.</I>O:	f1.c
			
 
				-	<I>CC </I>CFLAGS f1.c
			
 
				-f2:	f2.<I>O
			
 
				-	</I>LD <I>LDFLAGS -o f2 f2.</I>O
			
 
				-f2.<I>O:	f2.c
			
 
				-	</I>CC <I>CFLAGS f2.c
			
 
				-</PRE></TT></DL>
			
 
				-The target
			
 
				-</I><TT>all</TT><I>,
			
 
				-modified by the
			
 
				-</I><I>attribute</I><I>
			
 
				-</I><TT>V</TT><I>,
			
 
				-builds both programs.
			
 
				-The attribute identifies 
			
 
				-</I><TT>all</TT><I>
			
 
				-as a dummy target that is
			
 
				-not related to a file of the same name;
			
 
				-its precise effect is explained later.
			
 
				-This example describes cascading dependencies:
			
 
				-the first target depends on another which depends on a third and
			
 
				-so on.
			
 
				-Here, individual rules build each
			
 
				-program; later we'll see how to do this with a
			
 
				-general rule.
			
 
				-</P>
			
 
				-</I><H4>3 Variables and the environment
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-does not distinguish between its
			
 
				-internal variables and
			
 
				-<TT>rc</TT>
			
 
				-variables in the environment.
			
 
				-When
			
 
				-<TT>mk</TT>
			
 
				-starts, it imports each environment variable into a
			
 
				-<TT>mk</TT>
			
 
				-variable of the same name.  Before executing a recipe,
			
 
				-<TT>mk</TT>
			
 
				-exports all variables, including those
			
 
				-inherited from the environment,
			
 
				-to the environment in which
			
 
				-<TT>rc</TT>
			
 
				-executes the recipe.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are several ways for a
			
 
				-variable to take a value.
			
 
				-It can be set with an assignment statement,
			
 
				-inherited from the environment, or specified
			
 
				-on the command line.
			
 
				-<TT>Mk</TT>
			
 
				-also maintains several special internal variables
			
 
				-that are described in
			
 
				-<A href="/magic/man2html/1/mk"><I>mk</I>(1).
			
 
				-</A>Assignments have the following decreasing order of precedence:
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<DL><DT><DD>
			
 
				-1)  Command line assignment
			
 
				-<br>
			
 
				-2)  Assignment statement
			
 
				-<br>
			
 
				-3)  Imported from the environment
			
 
				-<br>
			
 
				-4)  Implicitly set by <TT>mk</TT>
			
 
				-</DL>
			
 
				-<br>&#32;<br>
			
 
				-For example, a command line assignment overrides
			
 
				-a value imported from the environment.
			
 
				-<P>
			
 
				-All variable values are strings.  They can be
			
 
				-used for pattern matching and
			
 
				-comparison but not for arithmetic.
			
 
				-A
			
 
				-<I>list</I>
			
 
				-is a string containing several values separated by
			
 
				-white space.  Each member is
			
 
				-handled individually during pattern matching,
			
 
				-target selection, and prerequisite evaluation.
			
 
				-</P>
			
 
				-<P>
			
 
				-A
			
 
				-<I>namelist</I>
			
 
				-is a list produced by
			
 
				-transforming the members of an existing list.
			
 
				-The transform applies a pattern to each member,
			
 
				-replacing each matched string with a new string,
			
 
				-much as in the substitute command in
			
 
				-<A href="/magic/man2html/1/sam"><I>sam</I>(1)
			
 
				-</A>or
			
 
				-<A href="/magic/man2html/1/ed"><I>ed</I>(1).
			
 
				-</A>The syntax is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-{<I>var</I>:A%B=C%D}
			
 
				-</PRE></TT></DL>
			
 
				-where
			
 
				-<I>var</I>
			
 
				-is a variable.
			
 
				-The pattern
			
 
				-<TT>A%B</TT>
			
 
				-matches a member beginning with the string
			
 
				-<I>A</I>
			
 
				-and ending with the string
			
 
				-<I>B</I>
			
 
				-with any string in between;
			
 
				-it behaves like the regular expression
			
 
				-<TT>A.*B</TT>.
			
 
				-When a member of the
			
 
				-<I>var</I>
			
 
				-list
			
 
				-matches this pattern,
			
 
				-the string
			
 
				-<I>C</I>
			
 
				-replaces
			
 
				-<I>A</I>,
			
 
				-<I>D</I>
			
 
				-replaces
			
 
				-<I>B</I>,
			
 
				-and the matched string replaces itself.
			
 
				-Any of
			
 
				-<I>A</I>,
			
 
				-<I>B</I>,
			
 
				-<I>C</I>,
			
 
				-or
			
 
				-<I>D</I>
			
 
				-may be the empty string.  In effect, a namelist is
			
 
				-generated by applying the
			
 
				-<A href="/magic/man2html/1/ed"><I>ed</I>(1)
			
 
				-</A>substitute command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	s/<I>A</I>(.*)<I>B</I>/<I>C</I>\1<I>D</I>/
			
 
				-</PRE></TT></DL>
			
 
				-to each member of a variable list.
			
 
				-</P>
			
 
				-<P>
			
 
				-Namelists are useful for generating
			
 
				-a list based on a predictable transformation.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	SRC=a.c b.c c.c
			
 
				-	OBJ=<I>{SRC:%.c=%.v}
			
 
				-</PRE></TT></DL>
			
 
				-assigns the list </I><TT>(a.v b.v c.v)</TT><I> to
			
 
				-</I><TT>OBJ</TT><I>.
			
 
				-A namelist may be used anywhere a variable is allowed
			
 
				-except in a recipe.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Command output is assigned to a variable
			
 
				-using the normal
			
 
				-<TT>rc</TT>
			
 
				-syntax:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	var=`{rc command}
			
 
				-</PRE></TT></DL>
			
 
				-The command executes in an environment populated
			
 
				-with previously assigned variables, including those
			
 
				-inherited from
			
 
				-<TT>mk</TT>'<TT>s</TT>
			
 
				-execution environment.
			
 
				-The command may
			
 
				-be arbitrarily complex; for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	TARG=`{ls -d *.[cy] | sed 's/..//'}
			
 
				-</PRE></TT></DL>
			
 
				-assigns a list of the C and yacc source files in the current
			
 
				-directory, stripped of their suffix, to the variable
			
 
				-<TT>TARG</TT>.
			
 
				-</P>
			
 
				-<H4>4 The include statement
			
 
				-</H4>
			
 
				-<P>
			
 
				-The include statement
			
 
				-replaces itself with the contents of a file.
			
 
				-It is functionally similar to the C
			
 
				-<TT>#include</TT>
			
 
				-statement but uses a different syntax:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	&lt;<I>filename</I>
			
 
				-</PRE></TT></DL>
			
 
				-The contents of the file are evaluated
			
 
				-as they are read.
			
 
				-An include statement may be used anywhere except
			
 
				-in a recipe.
			
 
				-</P>
			
 
				-<P>
			
 
				-Unlike
			
 
				-<TT>make</TT>,
			
 
				-<TT>mk</TT>
			
 
				-has no built-in rules.  Instead,
			
 
				-the include statement allows generic rules
			
 
				-to be imported from a prototype
			
 
				-<TT>mkfile</TT>;
			
 
				-most Plan 9
			
 
				-<TT>mkfiles</TT>
			
 
				-use this approach [Flan95].
			
 
				-</P>
			
 
				-<H4>5 Rules
			
 
				-</H4>
			
 
				-<P>
			
 
				-A rule has four elements: targets,
			
 
				-prerequisites, attributes, and a recipe.
			
 
				-It has the form:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I>targets</I>:<I>attributes</I>:<I>prerequisites</I>
			
 
				-	<I>recipe</I>
			
 
				-</PRE></TT></DL>
			
 
				-The first line, containing the
			
 
				-targets, attributes, and prerequisites is
			
 
				-the
			
 
				-<I>rule header</I>;
			
 
				-it
			
 
				-must begin at the left margin.
			
 
				-The recipe contains zero or more lines,
			
 
				-each of which begins with white space.
			
 
				-One or more targets must be specified but the
			
 
				-attributes, prerequisites, and recipe are optional.
			
 
				-A rule specifies
			
 
				-a dependency between the target(s) and its prerequisite(s),
			
 
				-the recipe brings the target(s)
			
 
				-up to date with the prerequisite(s) and
			
 
				-attributes modify
			
 
				-<TT>mk</TT>'<TT>s</TT>
			
 
				-evaluation of the dependency.
			
 
				-</P>
			
 
				-<P>
			
 
				-Normally the target is a file that depends
			
 
				-on one or more prerequisite files.
			
 
				-<TT>Mk</TT>
			
 
				-compares the modification times of each target
			
 
				-and each prerequisite; a target is considered out of date
			
 
				-when it does not exist or when a prerequisite has been modified
			
 
				-more recently.
			
 
				-When a target is out of date,
			
 
				-<TT>mk</TT>
			
 
				-executes the
			
 
				-recipe to bring it up to date.
			
 
				-When the recipe completes,
			
 
				-the modification time of the target is checked and
			
 
				-used in later dependency evaluations.
			
 
				-If the recipe does not update the target,
			
 
				-evaluation continues with the out of date target.
			
 
				-</P>
			
 
				-<P>
			
 
				-A prerequisite of one rule
			
 
				-may be the target of another.  When
			
 
				-this happens, the rules cascade
			
 
				-to define a multi-step procedure.
			
 
				-For example,
			
 
				-an executable target depends on prerequisite
			
 
				-object files, each of which is a target
			
 
				-in a rule with a C source file as the prerequisite.
			
 
				-<TT>Mk</TT>
			
 
				-follows a chain of dependencies until it encounters
			
 
				-a prerequisite that is not a target of another rule
			
 
				-or it finds a target that
			
 
				-is up to date.  It then
			
 
				-executes the recipes in reverse order to produce
			
 
				-the desired target.
			
 
				-</P>
			
 
				-<P>
			
 
				-The rule header is evaluated when the rule is read.
			
 
				-Variables are replaced by their values, namelists are
			
 
				-generated, and
			
 
				-commands are replaced by their
			
 
				-output at this time.
			
 
				-</P>
			
 
				-<P>
			
 
				-Most attributes modify
			
 
				-<TT>mk</TT>'<TT>s</TT>
			
 
				-evaluation of a rule.
			
 
				-An attribute is usually a single letter but some
			
 
				-are more complicated.
			
 
				-This paper only discusses commonly used attributes;
			
 
				-see
			
 
				-<A href="/magic/man2html/1/mk"><I>mk</I>(1)
			
 
				-</A>for a complete list.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>V</TT>
			
 
				-attribute identifies a
			
 
				-<I>virtual</I>
			
 
				-target;
			
 
				-that is, a target that is not a file.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-clean:V:
			
 
				-	rm *.<I>O </I>O.out
			
 
				-</PRE></TT></DL>
			
 
				-removes executables and compiler intermediate files.
			
 
				-The target is virtual because it does not refer to a file named
			
 
				-<TT>clean</TT>.
			
 
				-Without the attribute, the recipe would not be
			
 
				-executed if a file named
			
 
				-<TT>clean</TT>
			
 
				-existed.
			
 
				-The
			
 
				-<TT>Q</TT>
			
 
				-attribute
			
 
				-silences the printing of a recipe before
			
 
				-execution.
			
 
				-It is useful when the output of a recipe is
			
 
				-similar to the recipe:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-default:QV:
			
 
				-	echo 'No default target; use mk all or mk install'
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The recipe is an
			
 
				-<TT>rc</TT>
			
 
				-script.  It is optional but when it is
			
 
				-missing, the rule is handled specially, as described later.
			
 
				-Unlike
			
 
				-<TT>make</TT>,
			
 
				-<TT>mk</TT>
			
 
				-executes recipes without interpretation.
			
 
				-After
			
 
				-stripping the first white space character from each line
			
 
				-it passes the entire recipe to
			
 
				-<TT>rc</TT>
			
 
				-on standard input.
			
 
				-Since
			
 
				-<TT>mk</TT>
			
 
				-does not interpret a recipe,
			
 
				-escape conventions are exactly those of
			
 
				-<TT>rc</TT>.
			
 
				-Scripts for
			
 
				-<TT>awk</TT>
			
 
				-and
			
 
				-<TT>sed</TT>
			
 
				-commands can be embedded exactly as they would
			
 
				-be entered from the command line.
			
 
				-<TT>Mk</TT>
			
 
				-invokes
			
 
				-<TT>rc</TT>
			
 
				-with the
			
 
				-<TT>-e</TT>
			
 
				-flag, which causes
			
 
				-<TT>rc</TT>
			
 
				-to stop if any command
			
 
				-in the recipe exits with a non-zero status; the
			
 
				-<TT>E</TT>
			
 
				-attribute overrides this behavior and allows
			
 
				-<TT>rc</TT>
			
 
				-to continue executing in the face of errors.
			
 
				-Before a recipe is executed, variables are exported
			
 
				-to the environment where they are available to
			
 
				-<TT>rc</TT>.
			
 
				-Commands in the recipe may not read from
			
 
				-standard input because
			
 
				-<TT>mk</TT>
			
 
				-uses it internally.
			
 
				-</P>
			
 
				-<P>
			
 
				-References to a variable can yield different
			
 
				-values depending on the location of the
			
 
				-reference in the
			
 
				-<TT>mkfile</TT>.
			
 
				-<TT>Mk</TT>
			
 
				-resolves variable references
			
 
				-in assignment statements and rule headers
			
 
				-when the statement is read.  Variable references
			
 
				-in recipes are evaluated by
			
 
				-<TT>rc</TT>
			
 
				-when the recipe is executed; this
			
 
				-happens after the entire
			
 
				-<TT>mkfile</TT>
			
 
				-has been read.  The value of a variable in a recipe
			
 
				-is the last value assigned in the file.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-STRING=all
			
 
				-
			
 
				-all:VQ:
			
 
				-	echo <I>STRING
			
 
				-STRING=none
			
 
				-</PRE></TT></DL>
			
 
				-produces the message
			
 
				-</I><TT>none</TT><I>.
			
 
				-A variable assignment in a recipe
			
 
				-does not affect the value of the variable in the
			
 
				-</I><TT>mkfile</TT><I>
			
 
				-for two reasons.
			
 
				-First,
			
 
				-</I><TT>mk</TT><I>
			
 
				-does not import values from
			
 
				-the environment when a recipe completes;
			
 
				-one recipe cannot pass a value through
			
 
				-the environment to another recipe.
			
 
				-Second, no recipe is executed until 
			
 
				-</I><TT>mk</TT><I>
			
 
				-has completed its evaluation, so even if a variable
			
 
				-were changed,
			
 
				-it would not affect the dependency evaluation.
			
 
				-</P>
			
 
				-</I><H4>6 Metarules
			
 
				-</H4>
			
 
				-<P>
			
 
				-A
			
 
				-<I>metarule</I>
			
 
				-is a rule based on a pattern.
			
 
				-The pattern selects a class of target(s) and 
			
 
				-identifies related prerequisites.
			
 
				-<TT>Mk</TT>
			
 
				-metarules may select targets and prerequisites
			
 
				-based on any criterion that can be described by a pattern, not just
			
 
				-the suffix transformations associated with program
			
 
				-construction.
			
 
				-</P>
			
 
				-<P>
			
 
				-Metarule patterns are either
			
 
				-<I>intrinsic</I>
			
 
				-or regular expressions conforming to the
			
 
				-syntax of
			
 
				-<A href="/magic/man2html/6/regexp"><I>regexp</I>(6).
			
 
				-</A>The intrinsic patterns are shorthand
			
 
				-for common regular expressions.
			
 
				-The intrinsic pattern
			
 
				-<TT>%</TT>
			
 
				-matches one or more of anything; it is equivalent to
			
 
				-the regular expression
			
 
				-<TT>`.+'</TT>.
			
 
				-The other intrinsic pattern,
			
 
				-<TT>&</TT>,
			
 
				-matches one or more of any characters except <TT>`/'</TT>
			
 
				-and <TT>`.'</TT>.
			
 
				-It matches a portion of a path and is
			
 
				-equivalent to the regular expression
			
 
				-<TT>`[^./]+'</TT>.
			
 
				-An intrinsic pattern in a prerequisite references
			
 
				-the string matched by the same intrinsic pattern in the target.
			
 
				-For example, the rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	%.v:	%.c
			
 
				-</PRE></TT></DL>
			
 
				-says that a file ending in
			
 
				-<TT>.v</TT>
			
 
				-depends on a file of the same name with a
			
 
				-<TT>.c</TT>
			
 
				-suffix:
			
 
				-<TT>foo.v</TT>
			
 
				-depends on
			
 
				-<TT>foo.c</TT>,
			
 
				-<TT>bar.v</TT>
			
 
				-depends on
			
 
				-<TT>bar.c</TT>,
			
 
				-and so on.
			
 
				-The string matched by an intrinsic pattern in the target
			
 
				-is supplied to the recipe in the variable
			
 
				-<TT></TT>stem<TT>.
			
 
				-Thus the rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.</TT><I>O:	%.c
			
 
				-	</I><TT>CC </TT><I>CFLAGS </I><TT>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-creates an object file for the target architecture from
			
 
				-a similarly named C source file.  If several object
			
 
				-files are out of date, the rule is applied repeatedly and
			
 
				-</TT><TT></TT><I>stem</I><TT>
			
 
				-refers to each file in turn.
			
 
				-Since there is only one
			
 
				-</TT><TT>stem</TT><TT>
			
 
				-variable, there can only be one
			
 
				-</TT><TT>%</TT><TT>
			
 
				-or
			
 
				-</TT><TT>&</TT><TT>
			
 
				-pattern in a target;
			
 
				-the pattern
			
 
				-</TT><TT>%-%.c</TT><TT>
			
 
				-is illegal.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Metarules simplify the
			
 
				-<TT>mkfile</TT>
			
 
				-for building programs
			
 
				-<TT>f1</TT>
			
 
				-and
			
 
				-<TT>f2</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/objtype/mkfile
			
 
				-
			
 
				-ALL=f1 f2
			
 
				-
			
 
				-all:V:	<I>ALL
			
 
				-
			
 
				-%:	%.</I>O
			
 
				-	<I>LD -o </I>target <I>prereq
			
 
				-%.</I>O:	%.c
			
 
				-	<I>CC </I>CFLAGS <I>stem.c
			
 
				-clean:V:
			
 
				-	rm -f </I>ALL *.[<I>OS]
			
 
				-</PRE></TT></DL>
			
 
				-(The variable
			
 
				-</I><TT></TT><I>OS</I><TT>
			
 
				-is a list of code characters for all architectures.)
			
 
				-Here, metarules specify
			
 
				-compile and load steps for all C source files.
			
 
				-The loader rule relies on two internal variables
			
 
				-set by
			
 
				-</TT><TT>mk</TT><TT>
			
 
				-during evaluation of the rule:
			
 
				-</TT><TT></TT><I>target</I><TT>
			
 
				-is the name of the target(s) and
			
 
				-</TT><TT></TT><TT>prereq</TT><TT>
			
 
				-the name of all prerequisite(s).
			
 
				-Metarules allow this
			
 
				-</TT><TT>mkfile</TT><TT>
			
 
				-to be easily extended; a new program
			
 
				-is supported by adding its name to the third line.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-A regular expression metarule must have an
			
 
				-<TT>R</TT>
			
 
				-attribute.
			
 
				-Prerequisites may reference matching substrings in
			
 
				-the target using the form
			
 
				-<TT>\</TT><I>n</I><TT></TT><I>
			
 
				-where
			
 
				-</I><I>n</I><I>
			
 
				-is a digit from 1 to 9 specifying the
			
 
				-</I><I>n</I><I>th
			
 
				-parenthesized sub-expression.  In a recipe,
			
 
				-</I><TT></TT><I>stem</I><I>n</I><I></I><I>
			
 
				-is the equivalent reference.
			
 
				-For example, a compile rule could be
			
 
				-specified using regular expressions:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(.+)\.</I>O:R:	\1.c
			
 
				-	<I>CC </I>CFLAGS <I>stem1.c
			
 
				-</PRE></TT></DL>
			
 
				-Here,
			
 
				-</I><TT>\1</TT><I>
			
 
				-and
			
 
				-</I><TT></TT><I>stem1</I><TT>
			
 
				-refer to the name of the target object file without the
			
 
				-suffix.  The variable
			
 
				-</TT><TT></TT><I>stem</I><TT>
			
 
				-associated with an intrinsic pattern is undefined
			
 
				-in a regular expression metarule.
			
 
				-</P>
			
 
				-</TT><H4>7 Archives
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-provides a special mechanism for maintaining an archive.
			
 
				-An archive member is referenced using the form
			
 
				-<TT></TT><I>lib</I><TT>(</TT><I>file</I><TT>)</TT><I>
			
 
				-where
			
 
				-</I><I>lib</I><I>
			
 
				-is the name of the archive and 
			
 
				-</I><I>file</I><I>
			
 
				-is the name of the member.  Two rules define the
			
 
				-dependency between an object file and its membership
			
 
				-in an archive:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I>LIB(foo.8):N:	foo.8
			
 
				-<I>LIB:	</I>LIB(foo.8)
			
 
				-	ar rv <I>LIB foo.8
			
 
				-</PRE></TT></DL>
			
 
				-The first rule establishes a dependency between the
			
 
				-archive member and the object file.
			
 
				-Normally,
			
 
				-</I><TT>mk</TT><I>
			
 
				-detects an error when a target does not exist and the rule
			
 
				-contains no recipe; the
			
 
				-</I><TT>N</TT><I>
			
 
				-attribute overrides this behavior because the subsequent rule
			
 
				-updates the member.
			
 
				-The second
			
 
				-rule establishes the dependency between the member and
			
 
				-the archive; its recipe inserts the member
			
 
				-into the archive.
			
 
				-This two-step specification allows the modification time
			
 
				-of the archive
			
 
				-to represent the state of its members.  Other rules
			
 
				-can then specify the archive as a prerequisite instead of
			
 
				-listing each member.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-A metarule generalizes library maintenance:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-LIB=lib.a
			
 
				-OBJS=etoa.O atoe.<I>O ebcdic.</I>O
			
 
				-
			
 
				-<I>LIB(%):N:	%
			
 
				-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
			
 
				-	ar rv <I>LIB </I>OBJS
			
 
				-</PRE></TT></DL>
			
 
				-The namelist prerequisite of the
			
 
				-<TT></TT><I>LIB</I><TT>
			
 
				-target generates archive member names for each object file name;
			
 
				-for example, 
			
 
				-</TT><TT>etoa.</TT><TT>O</TT><TT>
			
 
				-becomes
			
 
				-</TT><TT>lib.a(etoa.</TT><I>O)</I><TT>.
			
 
				-This formulation always updates all members.
			
 
				-This is acceptable for a small archive, but may 
			
 
				-be slow for a big one.
			
 
				-The rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</TT><I>LIB:	</I><I>{OBJS:%=</I><I>LIB(%)}
			
 
				-	ar rv </I><I>LIB `{membername </I><I>newprereq}
			
 
				-</PRE></TT></DL>
			
 
				-only updates out of date object files.
			
 
				-The internal variable
			
 
				-</I><TT></TT><I>newprereq</I><TT>
			
 
				-contains the names of the out of
			
 
				-date prerequisites.  The
			
 
				-</TT><TT>rc</TT><TT>
			
 
				-script
			
 
				-</TT><TT>membername</TT><TT>
			
 
				-transforms an archive member specification into a file name:
			
 
				-it translates
			
 
				-</TT><TT>lib.a(etoa.</TT><TT>O)</TT><TT>
			
 
				-into
			
 
				-</TT><TT>etoa.</TT><I>O</I><TT>.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The
			
 
				-<TT>mkfile</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/objtype/mkfile
			
 
				-LIB=lib.a
			
 
				-OBJS=etoa.<I>O atoe.</I>O ebcdic.<I>O
			
 
				-
			
 
				-prog:	main.</I>O <I>LIB
			
 
				-	</I>LD -o <I>target </I>prereq
			
 
				-
			
 
				-<I>LIB(%):N:	%
			
 
				-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
			
 
				-	ar rv <I>LIB </I>OBJS
			
 
				-</PRE></TT></DL>
			
 
				-builds a program by loading it with a library.
			
 
				-</P>
			
 
				-<H4>8 Evaluation algorithm
			
 
				-</H4>
			
 
				-<P>
			
 
				-For each target of interest,
			
 
				-<TT>mk</TT>
			
 
				-uses the rules in a
			
 
				-<TT>mkfile</TT>
			
 
				-to build a data
			
 
				-structure called a dependency graph.  The nodes of
			
 
				-the graph represent targets and prerequisites;
			
 
				-a directed arc
			
 
				-from one node to another indicates that
			
 
				-the file associated with the first node depends
			
 
				-on the file associated with the second.
			
 
				-When the
			
 
				-<TT>mkfile</TT>
			
 
				-has been completely read, the graph is analyzed.
			
 
				-In the first step, implied dependencies are resolved by
			
 
				-computing the
			
 
				-<I>transitive closure</I>
			
 
				-of the graph.
			
 
				-This calculation extends the graph to include all
			
 
				-targets that are potentially
			
 
				-derivable from the rules in the
			
 
				-<TT>mkfile</TT>.
			
 
				-Next the graph is checked for cycles;
			
 
				-<TT>make</TT>
			
 
				-accepts cyclic dependencies, but
			
 
				-<TT>mk</TT>
			
 
				-does not allow them.
			
 
				-Subsequent steps
			
 
				-prune subgraphs that are irrelevant for producing the
			
 
				-desired target and verify that there is only one way
			
 
				-to build it.
			
 
				-The recipes associated with the
			
 
				-nodes on the longest path between the
			
 
				-target and an out of date prerequisite
			
 
				-are then executed in reverse order.
			
 
				-</P>
			
 
				-<P>
			
 
				-The transitive closure calculation is sensitive to
			
 
				-metarules; the patterns often select many potential targets
			
 
				-and cause the graph to grow rapidly.
			
 
				-Fortunately,
			
 
				-dependencies associated with the desired target
			
 
				-usually form a small part of the graph, so, after
			
 
				-pruning, analysis is tractable.
			
 
				-For example, the rules
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%:	x.%
			
 
				-	recipe1
			
 
				-x.%:	%.k
			
 
				-	recipe2
			
 
				-%.k:	%.f
			
 
				-	recipe3
			
 
				-</PRE></TT></DL>
			
 
				-produce a graph with four nodes for each file in the
			
 
				-current directory.
			
 
				-If the desired target is
			
 
				-<TT>foo</TT>,
			
 
				-<TT>mk</TT>
			
 
				-detects the dependency between it
			
 
				-and the original file
			
 
				-<TT>foo.f</TT>
			
 
				-through intermediate dependencies on
			
 
				-<TT>foo.k</TT>
			
 
				-and
			
 
				-<TT>x.foo</TT>.
			
 
				-Nodes associated with other files are deleted during pruning because
			
 
				-they are irrelevant to the production of
			
 
				-<TT>foo</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-avoids infinite cycles by evaluating
			
 
				-each metarule once.
			
 
				-Thus, the rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%:	%.z
			
 
				-	cp <I>prereq </I>prereq.z
			
 
				-</PRE></TT></DL>
			
 
				-copies the prerequisite file once.
			
 
				-</P>
			
 
				-<H4>9 Conventions for evaluating rules
			
 
				-</H4>
			
 
				-<P>
			
 
				-There must be only one
			
 
				-way to build each target.  However, during evaluation
			
 
				-metarule patterns often select potential targets that
			
 
				-conflict with the
			
 
				-targets of other rules.
			
 
				-<TT>Mk</TT>
			
 
				-uses several conventions to resolve ambiguities
			
 
				-and to select the proper dependencies.
			
 
				-</P>
			
 
				-<P>
			
 
				-When a target selects more than one rule,
			
 
				-<TT>mk</TT>
			
 
				-chooses a regular rule
			
 
				-over a metarule.
			
 
				-For example, the
			
 
				-<TT>mkfile</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile
			
 
				-
			
 
				-FILES=f1.</I>O f2.<I>O f3.</I>O
			
 
				-
			
 
				-prog:	<I>FILES
			
 
				-	</I>LD -o <I>target </I>prereq
			
 
				-
			
 
				-%.<I>O:	%.c
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-
			
 
				-f2.<I>O:	f2.c
			
 
				-	</I>CC f2.c
			
 
				-</PRE></TT></DL>
			
 
				-contains two rules that could build
			
 
				-<TT>f2.</TT><I>O</I><TT>.
			
 
				-</TT><TT>Mk</TT><TT>
			
 
				-selects the last rule because its target,
			
 
				-</TT><TT>f2.</TT><TT>O</TT><TT>,
			
 
				-is explicitly specified, while the 
			
 
				-</TT><TT>%.</TT><I>O</I><TT>
			
 
				-rule is a metarule.  In effect,
			
 
				-the explicit rule for
			
 
				-</TT><TT>f2.</TT><TT>O</TT><TT>
			
 
				-overrides the general rule for building object files from
			
 
				-C source files.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-When a rule has a target and prerequisites but no recipe,
			
 
				-those prerequisites are added to all other rules with
			
 
				-recipes that have the same target.
			
 
				-All prerequisites, regardless of where they were specified, are
			
 
				-exported to the recipe in variable
			
 
				-<TT></TT><I>prereq</I><TT>.
			
 
				-For example, in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/</TT>objtype/mkfile
			
 
				-
			
 
				-FILES=f1.<I>O f2.</I>O f3.<I>O
			
 
				-
			
 
				-prog:	</I>FILES
			
 
				-	<I>LD -o </I>target <I>prereq
			
 
				-
			
 
				-%.</I>O:	hdr.h
			
 
				-
			
 
				-%.<I>O:	%.c
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-the second rule adds
			
 
				-<TT>hdr.h</TT>
			
 
				-as a prerequisite of the compile metarule;
			
 
				-an object file produced from a C source file
			
 
				-depends on
			
 
				-<TT>hdr.h</TT>
			
 
				-as well as the source file.  Notice that the recipe of 
			
 
				-the compile rule uses
			
 
				-<TT></TT><I>stem.c</I><TT>
			
 
				-instead of
			
 
				-</TT><TT></TT><TT>prereq</TT><TT>
			
 
				-because the latter specification would attempt to compile
			
 
				-</TT><TT>hdr.h</TT><TT>.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-When a target is virtual and there is no other rule with
			
 
				-the same target,
			
 
				-<TT>mk</TT>
			
 
				-evaluates each prerequisite.
			
 
				-For example, adding the rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-all:V:	prog
			
 
				-</PRE></TT></DL>
			
 
				-to the preceding example builds the executable
			
 
				-when either
			
 
				-<TT>prog</TT>
			
 
				-or
			
 
				-<TT>all</TT>
			
 
				-is the specified target.  In effect, the
			
 
				-<TT>all</TT>
			
 
				-target is an alias for
			
 
				-<TT>prog</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-When two rules have identical rule headers and both have
			
 
				-recipes, the later rule replaces the former one.
			
 
				-For example,
			
 
				-if a file named
			
 
				-<TT>mkrules</TT>
			
 
				-contains
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I>O.out:	</I>OFILES
			
 
				-	<I>LD </I>LFLAGS <I>OFILES
			
 
				-%.</I>O:	%.c
			
 
				-	<I>CC </I>CFLAGS <I>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-the
			
 
				-</I><TT>mkfile</TT><I>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-OFILES=f1.</I>O f2.<I>O f3.</I>O
			
 
				-
			
 
				-&lt;mkrules
			
 
				-
			
 
				-<I>O.out:	</I>OFILES
			
 
				-	<I>LD </I>LFLAGS -l <I>OFILES -lbio -lc
			
 
				-</PRE></TT></DL>
			
 
				-overrides the general loader rule with a special
			
 
				-rule using a non-standard library search sequence.
			
 
				-A rule is neutralized by overriding it with a rule
			
 
				-with a null recipe:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;mkrules
			
 
				-
			
 
				-</I>O.out:Q:	<I>OFILES
			
 
				-	;
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</I><TT>Q</TT><I>
			
 
				-attribute suppresses the printing of the semicolon.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-When a rule has no prerequisites, the recipe is executed
			
 
				-only when the target does not exist.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-marker:
			
 
				-	touch target
			
 
				-</PRE></TT></DL>
			
 
				-defines a rule to manage a marker file.
			
 
				-If the file exists, it is considered up to date
			
 
				-regardless of its modification time.
			
 
				-When a virtual target has no prerequisites the
			
 
				-recipe is always executed.
			
 
				-The
			
 
				-<TT>clean</TT>
			
 
				-rule is of this type:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-clean:V:
			
 
				-	rm -f [<I>OS].out *.[</I>OS]
			
 
				-</PRE></TT></DL>
			
 
				-When a rule without prerequisites has multiple targets, the
			
 
				-extra targets are aliases for the rule.
			
 
				-For example, in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-clean tidy nuke:V:
			
 
				-	rm -f [<I>OS].out *.[</I>OS]
			
 
				-</PRE></TT></DL>
			
 
				-the
			
 
				-rule can be invoked by any of three names.
			
 
				-The first rule in a
			
 
				-<TT>mkfile</TT>
			
 
				-is handled specially:
			
 
				-when
			
 
				-<TT>mk</TT>
			
 
				-is invoked without a command line target
			
 
				-all targets of the first non-metarule are built.
			
 
				-If that rule has multiple targets, the recipe
			
 
				-is executed once for each target; normally, the recipe
			
 
				-of a rule with multiple targets is only executed once.
			
 
				-</P>
			
 
				-<P>
			
 
				-A rule applies to a target only when its prerequisites
			
 
				-exist or can be derived.  More than one rule may have the
			
 
				-same target as long as only one rule with a recipe
			
 
				-remains applicable after the dependency evaluation completes.
			
 
				-For example, consider a program built from C
			
 
				-and assembler source files.  Two rules produce
			
 
				-object files:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.<I>O:	%.c
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-%.<I>O:	%.s
			
 
				-	</I>AS <I>AFLAGS </I>stem.s
			
 
				-</PRE></TT></DL>
			
 
				-As long as there are not two source files with names like
			
 
				-<TT></TT><I>foo</I><TT>.c</TT><I>
			
 
				-and
			
 
				-</I><TT></TT><I>foo</I><TT>.s</TT><I>,
			
 
				-</I><TT>mk</TT><I>
			
 
				-can unambiguously select the proper rule.
			
 
				-If both files exist,
			
 
				-the rules are ambiguous
			
 
				-and
			
 
				-</I><TT>mk</TT><I>
			
 
				-exits with an error message.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-In Plan 9, many programs consist of portable code stored
			
 
				-in one directory and architecture-specific source stored in
			
 
				-another.
			
 
				-For example, the
			
 
				-<TT>mkfile</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile
			
 
				-
			
 
				-FILES=f1.</I>O f2.<I>O f3.</I>O f3.<I>O
			
 
				-
			
 
				-prog:	</I>FILES
			
 
				-	<I>LD -o </I>target <I>prereq
			
 
				-
			
 
				-%.</I>O:	%.<I>c
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-
			
 
				-%.<I>O:	../port/%.c
			
 
				-	</I>CC <I>CFLAGS ../port/</I>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-builds the program named
			
 
				-<TT>prog</TT>
			
 
				-using portable code in directory
			
 
				-<TT>../port</TT>
			
 
				-and architecture-specific code in the current directory.
			
 
				-As long as the
			
 
				-names of the C source files in 
			
 
				-<TT>../port</TT>
			
 
				-do not conflict with the names of files in the current directory,
			
 
				-<TT>mk</TT>
			
 
				-selects the appropriate rule to build the object file.
			
 
				-If like-named files exist in both directories, the
			
 
				-specification is ambiguous and an explicit target
			
 
				-must be specified to resolve the ambiguity.
			
 
				-For example,
			
 
				-adding the rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-f2.<I>O:	f2.c
			
 
				-	</I>CC <I>CFLAGS </I>f2.c
			
 
				-</PRE></TT></DL>
			
 
				-to the previous
			
 
				-<TT>mkfile</TT>
			
 
				-uses the architecture-specific version of
			
 
				-<TT>f2.c</TT>
			
 
				-instead of the portable one.
			
 
				-Here, the explicit rule unambiguously
			
 
				-documents which of the
			
 
				-like-named source files is used to build the program.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Mk</TT>'<TT></TT>s
			
 
				-heuristics can produce unintended results
			
 
				-when rules are not carefully specified.
			
 
				-For example, the rules that build
			
 
				-object files from C or assembler source files
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.<I>O:	%.c
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-%.<I>O:	%.s
			
 
				-	</I>AS <I>AFLAGS </I>stem.s
			
 
				-</PRE></TT></DL>
			
 
				-illustrate a subtle pratfall.
			
 
				-Adding a header file dependency to the compile rule
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.<I>O:	%.c hdr.h
			
 
				-	</I>CC <I>CFLAGS </I>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-produces the error message
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>don't know how to make '</TT><I>file</I><TT>.c'</TT><I>
			
 
				-</PRE></TT></DL>
			
 
				-when </I><I>file</I><I>.s is an assembler
			
 
				-source file.
			
 
				-This occurs because 
			
 
				-</I><TT></TT><I>file</I><TT>.s</TT><I>
			
 
				-satisfies the assemble rule and
			
 
				-</I><TT>hdr.h</TT><I>
			
 
				-satisfies the compile rule, so
			
 
				-either rule can potentially produce the target.
			
 
				-When a prerequisite exists or can be
			
 
				-derived,
			
 
				-all other prerequisites in that
			
 
				-rule header must exist or be derivable; here,
			
 
				-the existence of
			
 
				-</I><TT>hdr.h</TT><I>
			
 
				-forces the evaluation of a C source file.
			
 
				-Specifying the dependencies in different
			
 
				-rules avoids this interpretation:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.</I><I>O:	hdr.h
			
 
				-%.</I><I>O:	%.c
			
 
				-	</I><I>CC </I><I>CFLAGS </I><I>stem.c
			
 
				-</PRE></TT></DL>
			
 
				-Although
			
 
				-</I><TT>hdr.h</TT><I>
			
 
				-is an additional prerequisite of the compile rule,
			
 
				-the two rules are evaluated independently and
			
 
				-the existence of the C source file is not linked
			
 
				-to the existence of the header file.
			
 
				-However, this specification describes a different
			
 
				-dependency.  Originally, only object
			
 
				-files derived from C files depended on
			
 
				-</I><TT>hdr.h</TT><I>;
			
 
				-now all object files, including those built
			
 
				-from assembler source, depend on the header file.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Metarule patterns should be as restrictive as possible to
			
 
				-prevent conflicts with other rules.
			
 
				-Consider the
			
 
				-<TT>mkfile</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/objtype/mkfile
			
 
				-BIN=/<I>objtype/bin
			
 
				-PROG=foo
			
 
				-
			
 
				-install:V:	</I>BIN/<I>PROG
			
 
				-
			
 
				-%:	%.c
			
 
				-	</I>CC <I>stem.c
			
 
				-	</I>LD -o <I>target </I>stem.<I>O
			
 
				-
			
 
				-</I>BIN/%:	%
			
 
				-	mv <I>stem </I>target
			
 
				-</PRE></TT></DL>
			
 
				-The first target builds an executable
			
 
				-in the local directory; the second
			
 
				-installs it in the directory
			
 
				-of executables for the architecture.
			
 
				-Invoking
			
 
				-<TT>mk</TT>
			
 
				-with the
			
 
				-<TT>install</TT>
			
 
				-target produces:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mk: ambiguous recipes for /mips/bin/foo:
			
 
				-/mips/bin/foo &lt;-(mkfile:8)- /mips/bin/foo.c &lt;-(mkfile:12)- foo.c
			
 
				-/mips/bin/foo &lt;-(mkfile:12)- foo &lt;-(mkfile:8)- foo.c
			
 
				-</PRE></TT></DL>
			
 
				-The prerequisite of the
			
 
				-<TT>install</TT>
			
 
				-rule,
			
 
				-<TT></TT><I>BIN/</I><TT>PROG</TT><I>,
			
 
				-matches both metarules because the
			
 
				-</I><TT>%</TT><I>
			
 
				-pattern matches everything.
			
 
				-The
			
 
				-</I><TT>&</TT><I>
			
 
				-pattern restricts the compile rule to files in the
			
 
				-current directory and avoids the conflict:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;:	&amp;.c
			
 
				-	</I><I>CC </I><I>stem.c
			
 
				-	</I><I>LD -o </I><I>target </I><I>stem.</I><I>O
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</I><H4>10 Missing intermediates
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Mk</TT>
			
 
				-does not build a missing intermediate file if a target
			
 
				-is up to date with the prerequisites of the intermediate.
			
 
				-For example,
			
 
				-when an executable is up to date with its source file,
			
 
				-<TT>mk</TT>
			
 
				-does not compile the source to create a missing object file.
			
 
				-The evaluation only applies
			
 
				-when a target is considered up to date by pretending that the
			
 
				-intermediate exists.  Thus, it does not apply
			
 
				-when the intermediate is a command line target
			
 
				-or when it has no prerequisites.
			
 
				-</P>
			
 
				-<P>
			
 
				-This capability is useful for
			
 
				-maintaining archives.  We can modify the archive
			
 
				-update recipe to remove object files after
			
 
				-they are archived:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I>LIB(%):N:	%
			
 
				-</I>LIB:	<I>{OBJS:%=</I>LIB(%)}
			
 
				-	names=`{membername <I>newprereq}
			
 
				-	ar rv </I>LIB <I>names
			
 
				-	rm -f </I>names
			
 
				-</PRE></TT></DL>
			
 
				-A subsequent
			
 
				-<TT>mk</TT>
			
 
				-does not remake the object files as long as the members
			
 
				-of the archive remain up to date with the source files.
			
 
				-The
			
 
				-<TT>-i</TT>
			
 
				-command line option overrides this behavior
			
 
				-and causes all intermediates to be built.
			
 
				-</P>
			
 
				-<H4>11 Alternative out-of-date determination
			
 
				-</H4>
			
 
				-<P>
			
 
				-Sometimes the modification time is not useful
			
 
				-for deciding when a target and prerequisite are out of date.
			
 
				-The
			
 
				-<TT>P</TT>
			
 
				-attribute replaces the default mechanism with the result of
			
 
				-a command.  The command immediately follows the attribute
			
 
				-and is repeatedly executed with each
			
 
				-target and each prerequisite as its arguments;
			
 
				-if its exit status is non-zero, they are considered out of date
			
 
				-and the recipe is executed.  Consider the
			
 
				-<TT>mkfile</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-foo.ref:Pcmp -s:	foo
			
 
				-	cp <I>prereq </I>target
			
 
				-</PRE></TT></DL>
			
 
				-The command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cmp -s foo.ref foo
			
 
				-</PRE></TT></DL>
			
 
				-is executed and if 
			
 
				-<TT>foo.ref</TT>
			
 
				-differs from
			
 
				-<TT>foo</TT>,
			
 
				-the latter file is copied to the former.
			
 
				-</P>
			
 
				-<H4>12 Parallel processing
			
 
				-</H4>
			
 
				-<P>
			
 
				-When possible,
			
 
				-<TT>mk</TT>
			
 
				-executes recipes in parallel.
			
 
				-The variable
			
 
				-<TT></TT><I>NPROC</I><TT>
			
 
				-specifies the maximum number of simultaneously executing
			
 
				-recipes.
			
 
				-Normally it is imported from the environment,
			
 
				-where the system has set it to the number of available processors.
			
 
				-It can be decreased by assigning a new
			
 
				-value and can be set to 1 to force single-threaded recipe execution.
			
 
				-This is necessary when several targets access
			
 
				-a common resource such as
			
 
				-a status file or data base.
			
 
				-When there is no dependency between targets,
			
 
				-</TT><TT>mk</TT><TT>
			
 
				-assumes the
			
 
				-recipes can be
			
 
				-executed concurrently.
			
 
				-Normally, this allows
			
 
				-multiple prerequisites to be built simultaneously;
			
 
				-for example, the object file prerequisites of
			
 
				-a load rule can be produced by compiling the source files in parallel.
			
 
				-</TT><TT>Mk</TT><TT>
			
 
				-does not define the order of execution of independent recipes.
			
 
				-When the prerequisites of a rule are not independent,
			
 
				-the dependencies between them should be specified in a rule or the
			
 
				-</TT><TT>mkfile</TT><TT>
			
 
				-should be single-threaded.
			
 
				-For example, the archive update rules
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</TT>LIB(%):N:	%
			
 
				-<I>LIB:	</I>{OBJS:%=<I>LIB(%)}
			
 
				-	ar rv </I>LIB `{membername <I>newprereq}
			
 
				-</PRE></TT></DL>
			
 
				-compile source files in parallel but update
			
 
				-all members of the archive at once.
			
 
				-It is a mistake to merge the two rules
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I>LIB(%):	%
			
 
				-	ar rv <I>LIB </I>stem
			
 
				-</PRE></TT></DL>
			
 
				-because an
			
 
				-<TT>ar</TT>
			
 
				-command is executed for every
			
 
				-member of the library.  Not only is this
			
 
				-inefficient, but the archive is updated
			
 
				-in parallel, making interference likely.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT></TT><I>nproc</I><TT>
			
 
				-environment variable contains a number associated
			
 
				-with the processor executing a recipe.
			
 
				-It can be used to create unique
			
 
				-names when the
			
 
				-recipe may be executing simultaneously on several processors.
			
 
				-Other maintenance tools provide mechanisms to control recipe
			
 
				-scheduling explicitly [Cmel86], but
			
 
				-</TT><TT>mk</TT>'<TT>s</TT>
			
 
				-general rules are sufficient for all but the most unusual cases.
			
 
				-</P>
			
 
				-<H4>13 Deleting target files on errors
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>D</TT>
			
 
				-attribute
			
 
				-causes
			
 
				-<TT>mk</TT>
			
 
				-to remove the target file when a
			
 
				-recipe terminates prematurely.
			
 
				-The error message describing the
			
 
				-termination condition warns
			
 
				-of the deletion.
			
 
				-A partially built file is doubly dangerous:
			
 
				-it is not only wrong, but is also
			
 
				-considered to be up to date so
			
 
				-a subsequent
			
 
				-<TT>mk</TT>
			
 
				-will not rebuild it.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pic.out:D:	mk.ms
			
 
				-		pic prereq | tbl | troff -ms &gt; <I>target
			
 
				-</PRE></TT></DL>
			
 
				-produces the message
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><TT>mk: pic mk.ms | ...  : exit status=rc 685: deleting 'pic.out'</TT><I>
			
 
				-</PRE></TT></DL>
			
 
				-if any program in the recipe exits with an error status.
			
 
				-</P>
			
 
				-</I><H4>14 Unspecified dependencies
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>-w</TT>
			
 
				-command line flag forces the
			
 
				-files following the flag to be treated
			
 
				-as if they were just modified.
			
 
				-We can use this flag with a command that selects files
			
 
				-to force a build based on the selection criterion.
			
 
				-For example, if the declaration of
			
 
				-a global variable named
			
 
				-<I>var</I>
			
 
				-is changed in a header file,
			
 
				-all source files that reference
			
 
				-it can be rebuilt with the command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				- mk -w`{grep -l <I>var</I> *.[cyl]}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>15 Conclusion
			
 
				-</H4>
			
 
				-<P>
			
 
				-There are many programs related to
			
 
				-<TT>make</TT>,
			
 
				-each choosing a different balance between
			
 
				-specialization and generality.
			
 
				-<TT>Mk</TT>
			
 
				-emphasizes generality but allows
			
 
				-customization through its pattern specifications and
			
 
				-include facilities.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 presents a difficult maintenance environment
			
 
				-with its heterogeneous
			
 
				-architectures and languages.
			
 
				-<TT>Mk</TT>'<TT>s</TT>
			
 
				-flexible specification language and simple
			
 
				-interaction with
			
 
				-<TT>rc</TT>
			
 
				-work well in this environment.
			
 
				-As a result,
			
 
				-Plan 9 relies on
			
 
				-<TT>mk</TT>
			
 
				-to automate almost all maintenance.
			
 
				-Tasks as diverse as updating the
			
 
				-network data base, producing the manual,
			
 
				-or building a release are expressed as
			
 
				-<TT>mk</TT>
			
 
				-procedures.
			
 
				-</P>
			
 
				-<H4>16 References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Cmel86] R. F. Cmelik,
			
 
				-``Concurrent Make: A Distributed Program in Concurrent C'',
			
 
				-AT&amp;T Bell Laboratories Technical Report, 1986.
			
 
				-<br>&#32;<br>
			
 
				-[Feld79] S. I. Feldman,
			
 
				-``Make &#173; a program for maintaining computer programs'',
			
 
				-Software Practice &amp; Experience ,
			
 
				-1979
			
 
				-Vol 9 #4,
			
 
				-pp. 255-266.
			
 
				-<br>&#32;<br>
			
 
				-[Flan95] Bob Flandrena,
			
 
				-``Plan 9 Mkfiles'',
			
 
				-this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Hume87] A. G. Hume,
			
 
				-``Mk: A Successor to Make'',
			
 
				-USENIX Summer Conf. Proc.,
			
 
				-Phoenix, Az.
			
 
				-<H4>17 Appendix: Differences between
			
 
				-<TT>make</TT>
			
 
				-and
			
 
				-<TT>mk</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-The differences between
			
 
				-<TT>mk</TT>
			
 
				-and
			
 
				-<TT>make</TT>
			
 
				-are:
			
 
				-</P>
			
 
				-<UL>
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-builds targets when it needs them, allowing systematic use of side effects.
			
 
				-<TT>Mk</TT>
			
 
				-constructs the entire dependency graph before building any target.
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-supports suffix rules and
			
 
				-<TT>%</TT>
			
 
				-metarules.
			
 
				-<TT>Mk</TT>
			
 
				-supports
			
 
				-<TT>%</TT>
			
 
				-and regular expression metarules.
			
 
				-(Older versions of
			
 
				-<TT>make</TT>
			
 
				-support only suffix rules.)
			
 
				-<LI>
			
 
				-<TT>Mk</TT>
			
 
				-performs transitive closure on metarules,
			
 
				-<TT>make</TT>
			
 
				-does not.
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-supports cyclic dependencies,
			
 
				-<TT>mk</TT>
			
 
				-does not.
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-evaluates recipes one line at a time, replacing variables by their values and
			
 
				-executing some commands internally.
			
 
				-<TT>Mk</TT>
			
 
				-passes the entire recipe to the shell without
			
 
				-interpretation or internal execution.
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-supports parallel execution of single-line recipes when building
			
 
				-the prerequisites for specified targets.
			
 
				-<TT>Mk</TT>
			
 
				-supports parallel execution of all recipes.
			
 
				-(Older versions of
			
 
				-<TT>make</TT>
			
 
				-did not support parallel execution.)
			
 
				-<LI>
			
 
				-<TT>Make</TT>
			
 
				-uses special targets (beginning with a period)
			
 
				-to indicate special processing.
			
 
				-<TT>Mk</TT>
			
 
				-uses attributes to modify rule evaluation.
			
 
				-<LI>
			
 
				-<TT>Mk</TT>
			
 
				-supports virtual
			
 
				-targets that are independent of the file system.
			
 
				-<LI>
			
 
				-<TT>Mk</TT>
			
 
				-allows non-standard out-of-date determination,
			
 
				-<TT>make</TT>
			
 
				-does not.
			
 
				-</ul>
			
 
				-<P>
			
 
				-It is usually easy to convert a
			
 
				-<TT>makefile</TT>
			
 
				-to or from an equivalent
			
 
				-<TT>mkfile</TT>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/mkfiles.html
+++ b/sys/doc/mkfiles.html
@@ -1,666 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Plan 9 Mkfiles
			
 
				-</H1>
			
 
				-<DL><DD><I>Bob Flandrena<br>
			
 
				-bobf@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Every Plan 9 source directory contains a file, called
			
 
				-<TT>mkfile</TT>,
			
 
				-specifying the rules for building the executable or
			
 
				-library that is the product of the directory.
			
 
				-<A href="/magic/man2html/1/mk"><I>mk</I>(1)
			
 
				-</A>interprets the rules in the file, calculates
			
 
				-the dependencies, and executes an
			
 
				-<A href="/magic/man2html/1/rc"><I>rc</I>(1)
			
 
				-</A>script to construct the product.
			
 
				-If necessary components are supplied by
			
 
				-neighboring directories or sub-directories, the mkfiles in those
			
 
				-directories are first executed to build the components
			
 
				-before the local construction proceeds.
			
 
				-<br>&#32;<br>
			
 
				-Most application source directories produce one of
			
 
				-four types of product:
			
 
				-a single executable, several
			
 
				-executables, a local library, or
			
 
				-a system library.
			
 
				-Four generic
			
 
				-mkfiles
			
 
				-define the normal rules
			
 
				-for building each type of product.  The simplest
			
 
				-mkfiles need only
			
 
				-list the components
			
 
				-and include the appropriate
			
 
				-generic
			
 
				-mkfile 
			
 
				-to do the work.
			
 
				-More complex 
			
 
				-mkfiles
			
 
				-may supply additional rules
			
 
				-to augment, modify, or override the generic rules.
			
 
				-<H4>Using a Mkfile
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-To build a product, change to the directory containing
			
 
				-its source and invoke
			
 
				-<I>mk</I>
			
 
				-with the appropriate target as an argument.
			
 
				-All mkfiles provide the following standard targets:
			
 
				-<br><img src="data.19114400.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-If no target is specified on the
			
 
				-<TT>mk</TT>
			
 
				-command line, the
			
 
				-<TT>all</TT>
			
 
				-target is built by default.  In a directory
			
 
				-producing multiple executables, there is
			
 
				-no default target.
			
 
				-<br>&#32;<br>
			
 
				-In addition to the five standard targets,
			
 
				-additional targets may be supplied by each
			
 
				-generic mkfile or by the directory's mkfile.
			
 
				-<br>&#32;<br>
			
 
				-The environment variable
			
 
				-<TT>NPROC</TT>
			
 
				-is set by the system to the number of
			
 
				-available processors.
			
 
				-Setting
			
 
				-this variable, either in the environment or in
			
 
				-a mkfile, controls the amount of parallelism in
			
 
				-the build.  For example, the command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	NPROC=1 mk
			
 
				-</PRE></TT></DL>
			
 
				-restricts a build to a single thread of execution.
			
 
				-<H4>Creating a Mkfile
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The easiest way to build a new mkfile is to copy and modify
			
 
				-an existing mkfile of the same type.
			
 
				-Failing that, it is usually possible to create a new
			
 
				-mkfile with minimal effort, since the appropriate
			
 
				-generic mkfile predefines the rules that do all the work.
			
 
				-In the simplest and most common cases, the new mkfile
			
 
				-need only define a couple of variables and include the appropriate
			
 
				-architecture-specific
			
 
				-and generic mkfiles.
			
 
				-<H4></H4>
			
 
				-<br>&#32;<br>
			
 
				-There are four generic mkfiles containing commonly
			
 
				-used rules for building a product:
			
 
				-<TT>mkone</TT>,
			
 
				-<TT>mkmany</TT>,
			
 
				-<TT>mklib</TT>,
			
 
				-and
			
 
				-<TT>mksyslib</TT>.
			
 
				-These rules
			
 
				-perform such actions as compiling C source files,
			
 
				-loading object files, archiving libraries, and
			
 
				-installing executables in the
			
 
				-<TT>bin</TT>
			
 
				-directory of the appropriate architecture.
			
 
				-The generic mkfiles are stored in directory
			
 
				-<TT>/sys/src/cmd</TT>.
			
 
				-Mkfile
			
 
				-<TT>mkone</TT>
			
 
				-builds a single executable,
			
 
				-<TT>mkmany</TT>
			
 
				-builds several executables from the source in a single
			
 
				-directory, and
			
 
				-<TT>mklib</TT>
			
 
				-and
			
 
				-<TT>mksyslib</TT>,
			
 
				-maintain local and system libraries, respectively.
			
 
				-The rules in the generic mkfiles are driven by
			
 
				-the values of variables, some of which must be
			
 
				-set by the product mkfile and some of which are
			
 
				-supplied by the generic mkfile.  Variables in the
			
 
				-latter class include:
			
 
				-<br><img src="data.19114401.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-The following variables are set by the product mkfile
			
 
				-and used by the generic mkfile.
			
 
				-Any may be empty depending on the specific product being
			
 
				-made.
			
 
				-<br><img src="data.19114402.gif"><br>
			
 
				-<H4>Mkfile Organization
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-All
			
 
				-mkfiles
			
 
				-share the following common structure:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile	# </I>architecture-dependent definitions<I>
			
 
				-<br>&#32;<br>
			
 
				-</I><I>variable definitions</I><I>		# TARG</I>, <I>OFILES</I>, <I>HFILES</I>, etc.<I>
			
 
				-<br>&#32;<br>
			
 
				-&lt;/sys/src/cmd/</I><I>generic</I><I>	# mkone</I>, <I>mkmany</I>, <I>mklib</I>, or <I>mksyslib
			
 
				-<br>&#32;<br>
			
 
				-</I><I>variable overrides</I><I>		# CFLAGS</I>, <I>objtype</I>, etc.<I>
			
 
				-<br>&#32;<br>
			
 
				-</I><I>extra rules</I><I>			# </I>overrides, augmented rules, additional targets<I>
			
 
				-</PRE></TT></DL>
			
 
				-Note that the architecture-dependent mkfiles include file
			
 
				-</I><TT>/sys/src/mkfile.proto</TT><I>
			
 
				-for system-wide variables that are common to all architectures.
			
 
				-</I><br>&#32;<br>
			
 
				-The variables driving the expansion of the generic mkfile
			
 
				-may be specified in any order as long as they are defined
			
 
				-before the inclusion of the generic mkfile.  The value
			
 
				-of a variable may be changed by assigning a new value
			
 
				-following the inclusion of the generic mkfile, but the
			
 
				-effects are sometimes counter-intuitive.
			
 
				-Such variable assignments do not apply to the target and
			
 
				-prerequisite portions of any previously defined rules;
			
 
				-the new values only apply to the recipes of rules preceding
			
 
				-the assignment statement and
			
 
				-to all parts of any rules following it.
			
 
				-<br>&#32;<br>
			
 
				-The rules supplied by the generic mkfile may
			
 
				-be overridden or augmented.  The new rules must
			
 
				-be specified after the inclusion of the generic
			
 
				-mkfile.  If the target and prerequisite portion
			
 
				-of the rule exactly match the target and prerequisite
			
 
				-portion of a previously defined rule and the new rule contains
			
 
				-a recipe, the new rule replaces the old one.
			
 
				-If the target of a new rule exactly matches the
			
 
				-target of a previous rule and one or more new
			
 
				-prerequisites are specified and the new rule contains
			
 
				-no recipe, the new prerequisites are added to the prerequisites
			
 
				-of the old rule.
			
 
				-<br>&#32;<br>
			
 
				-Following sections discuss
			
 
				-each generic mkfile in detail.
			
 
				-<H4>Mkone
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>mkone</TT>
			
 
				-generic mkfile contains rules for building
			
 
				-a single executable from one or more files
			
 
				-in a directory.
			
 
				-The variable
			
 
				-<TT>TARG</TT>
			
 
				-specifies the name of the executable and
			
 
				-variables
			
 
				-<TT>OFILES</TT>
			
 
				-and
			
 
				-<TT>YFILES</TT>
			
 
				-specify the object files and
			
 
				-<TT>yacc</TT>
			
 
				-source files used to build it.
			
 
				-<TT>HFILES</TT>
			
 
				-contains the names of the local header files
			
 
				-included in all source files.
			
 
				-<TT>BIN</TT>
			
 
				-is the name of the directory where the executable
			
 
				-is installed.
			
 
				-<TT>LIB</TT>
			
 
				-contains the names of local libraries used by the
			
 
				-linker.  This variable is rarely needed
			
 
				-as libraries referenced by a
			
 
				-<TT>#pragma</TT>
			
 
				-directive in an associated header file, including
			
 
				-all system libraries, are automatically
			
 
				-searched by the loader.
			
 
				-<br>&#32;<br>
			
 
				-If
			
 
				-<TT>mk</TT>
			
 
				-is executed without a target, the
			
 
				-<TT>all</TT>
			
 
				-target is built; it
			
 
				-produces an executable in
			
 
				-<TT></TT>O.out<TT>.
			
 
				-Variable
			
 
				-</TT><TT>HFILES</TT><TT>
			
 
				-identifies the header files that
			
 
				-are included in all or most or
			
 
				-the C source files.  Occasionally,
			
 
				-a program has other header files
			
 
				-that are only used in some
			
 
				-source files.  A
			
 
				-header can be added to the prerequisites for
			
 
				-those object files by adding a rule of
			
 
				-the following form following the inclusion of generic mkfile
			
 
				-</TT><TT>mkone</TT><TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-file.</TT><I>O:	header.h
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>&#32;<br>
			
 
				-The mkfile for a directory producing a single
			
 
				-executable using the normal set of rules is
			
 
				-trivial: a list of some files followed by the
			
 
				-inclusion of
			
 
				-<I>mkone.</I>
			
 
				-For example, 
			
 
				-<TT>/sys/src/cmd/diff/mkfile</TT>
			
 
				-contains:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt; /objtype/mkfile
			
 
				-
			
 
				-TARG=diff
			
 
				-OFILES=\
			
 
				-	diffdir.<I>O\
			
 
				-	diffio.</I>O\
			
 
				-	diffreg.<I>O\
			
 
				-	main.</I>O\
			
 
				-
			
 
				-HFILES=diff.h
			
 
				-
			
 
				-BIN=/<I>objtype/bin
			
 
				-&lt;/sys/src/cmd/mkone
			
 
				-</PRE></TT></DL>
			
 
				-The more complex mkfile in
			
 
				-</I><TT>/sys/src/cmd/awk</TT><I>
			
 
				-overrides compiler and loader variables to
			
 
				-select the ANSI/POSIX Computing Environment with appropriately
			
 
				-defined command line variables.  It also overrides
			
 
				-the default
			
 
				-</I><TT>yacc</TT><I>
			
 
				-rule to place the output soure in file
			
 
				-</I><TT>awkgram.c</TT><I>
			
 
				-and the
			
 
				-</I><TT>clean</TT><I>
			
 
				-and
			
 
				-</I><TT>nuke</TT><I>
			
 
				-rules, so it can remove the non-standard intermediate
			
 
				-files.  Finally, the last three rules build a version of
			
 
				-</I><TT>maketab</TT><I>
			
 
				-appropriate for the architecture where the
			
 
				-</I><TT>mk</TT><I>
			
 
				-is being
			
 
				-run and then executes it to create source file
			
 
				-</I><TT>proctab.c</TT><I>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/</I>objtype/mkfile
			
 
				-
			
 
				-TARG=awk
			
 
				-OFILES=re.<I>O\
			
 
				-	lex.</I>O\
			
 
				-	main.<I>O\
			
 
				-	parse.</I>O\
			
 
				-	proctab.<I>O\
			
 
				-	tran.</I>O\
			
 
				-	lib.<I>O\
			
 
				-	run.</I>O\
			
 
				-	awkgram.<I>O\
			
 
				-
			
 
				-HFILES=awk.h\
			
 
				-	y.tab.h\
			
 
				-	proto.h\
			
 
				-
			
 
				-YFILES=awkgram.y
			
 
				-
			
 
				-BIN=/</I>objtype/bin
			
 
				-&lt;/sys/src/cmd/mkone
			
 
				-CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE \
			
 
				-	-D_BSD_EXTENSION -DUTF
			
 
				-YFLAGS=-S -d -v
			
 
				-CC=pcc
			
 
				-LD=pcc
			
 
				-cpuobjtype=`{sed -n 's/^O=//p' /<I>cputype/mkfile}
			
 
				-
			
 
				-y.tab.h awkgram.c:	</I>YFILES
			
 
				-	<I>YACC -o awkgram.c </I>YFLAGS <I>prereq
			
 
				-
			
 
				-clean:V:
			
 
				-	rm -f *.[</I>OS] [<I>OS].out [</I>OS].maketab y.tab.? y.debug\
			
 
				-		 y.output <I>TARG
			
 
				-
			
 
				-nuke:V:
			
 
				-	rm -f *.[</I>OS] [<I>OS].out [</I>OS].maketab y.tab.? y.debug\
			
 
				-		 y.output awkgram.c <I>TARG
			
 
				-
			
 
				-proctab.c:	</I>cpuobjtype.maketab
			
 
				-	./<I>cpuobjtype.maketab &gt;proctab.c
			
 
				-
			
 
				-</I>cpuobjtype.maketab:	y.tab.h maketab.c
			
 
				-	objtype=<I>cputype
			
 
				-	mk maketab.</I>cputype
			
 
				-
			
 
				-maketab.<I>cputype:V:	y.tab.h maketab.</I>O
			
 
				-	<I>LD -o </I>O.maketab maketab.<I>O
			
 
				-</PRE></TT></DL>
			
 
				-</I><H4>Mkmany
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>mkmany</TT>
			
 
				-generic mkfile builds several
			
 
				-executables from the files in a
			
 
				-directory.  It differs from the operation of
			
 
				-<TT>mkone</TT>
			
 
				-in three respects:
			
 
				-<TT>TARG</TT>
			
 
				-specifies the names of all executables,
			
 
				-there is no default command-line target,
			
 
				-and additional rules allow a single executable to
			
 
				-be built or installed.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>TARG</TT>
			
 
				-variable specifies the names of all
			
 
				-executables produced by the mkfile.  The
			
 
				-rules assume the name of each executable is also
			
 
				-the name of the file containing its
			
 
				-<TT>main</TT>
			
 
				-function.
			
 
				-<TT>OFILES</TT>
			
 
				-specifies files containing
			
 
				-common subroutines loaded with all executables.
			
 
				-Consider the mkfile:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/objtype/mkfile
			
 
				-
			
 
				-TARG=alpha beta
			
 
				-OFILES=common.<I>O
			
 
				-BIN=/</I>objtype/bin
			
 
				-&lt;/sys/src/cmd/mkmany
			
 
				-</PRE></TT></DL>
			
 
				-It assumes the main functions for executables
			
 
				-<TT>alpha</TT>
			
 
				-and
			
 
				-<TT>beta</TT>
			
 
				-are in files
			
 
				-<TT>alpha.</TT><I>O</I><TT>
			
 
				-and
			
 
				-</TT><TT>beta.</TT><TT>O</TT><TT>
			
 
				-and that both programs use the subroutines
			
 
				-in file
			
 
				-</TT><TT>common.</TT><I>O</I><TT>.
			
 
				-The
			
 
				-</TT><TT>all</TT><TT>
			
 
				-target builds all executables, leaving each in
			
 
				-a file with a name of the form
			
 
				-</TT><TT></TT><TT>O.</TT><I>progname</I><TT></TT><I>
			
 
				-where
			
 
				-</I><I>progname</I><I>
			
 
				-is the name of the executable.  In this
			
 
				-example the
			
 
				-</I><TT>all</TT><I>
			
 
				-target produces executables
			
 
				-</I><TT></TT><I>O.alpha</I><TT>
			
 
				-and 
			
 
				-</TT><TT></TT><TT>O.beta</TT><TT>.
			
 
				-</TT><br>&#32;<br>
			
 
				-The
			
 
				-<TT>mkmany</TT>
			
 
				-rules provide additional
			
 
				-targets for building a single
			
 
				-executable:
			
 
				-<br><img src="data.19114403.gif"><br>
			
 
				-<H4>Mklib
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>mklib</TT>
			
 
				-generic mkfile builds a local library.
			
 
				-Since this form of mkfile constructs no
			
 
				-executable, the
			
 
				-<TT>TARG</TT>
			
 
				-and
			
 
				-<TT>BIN</TT>
			
 
				-variables are not needed.  Instead, the
			
 
				-<TT>LIB</TT>
			
 
				-variable specifies the library
			
 
				-to be built or updated.  Variable
			
 
				-<TT>OFILES</TT>
			
 
				-contains the names of the object files to be archived
			
 
				-in the library.  The use of variables
			
 
				-<TT>YFILES</TT>
			
 
				-and
			
 
				-<TT>HFILES</TT>
			
 
				-does not change.  When possible, only the
			
 
				-out-of-date members of the library are updated.
			
 
				-<br>&#32;<br>
			
 
				-The variable
			
 
				-<TT>LIBDIR</TT>
			
 
				-contains the name of the directory where the
			
 
				-library is installed; by default it selects
			
 
				-the current directory.  It can be overridden
			
 
				-by assigning the new directory name after the
			
 
				-point where
			
 
				-<TT>mklib</TT>
			
 
				-is included.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>clean</TT>
			
 
				-target removes object files and
			
 
				-<TT>yacc</TT>
			
 
				-intermediate files but does not touch the
			
 
				-library.  The
			
 
				-<TT>nuke</TT>
			
 
				-target removes the library as well as the
			
 
				-files removed by the
			
 
				-<TT>clean</TT>
			
 
				-target.  The command
			
 
				-<DL><DD>
			
 
				-<TT>mk -s clean all</TT>
			
 
				-</DL>
			
 
				-causes the existing library to be updated, or
			
 
				-created if it doesn't already exist.  The command
			
 
				-<DL><DD>
			
 
				-<TT>mk -s nuke all</TT>
			
 
				-</DL>
			
 
				-forces the library to be rebuilt from scratch.
			
 
				-<br>&#32;<br>
			
 
				-The mkfile from
			
 
				-<TT>/sys/src/cmd/upas/libString</TT>
			
 
				-contains the following specifications to
			
 
				-build the local library
			
 
				-<TT>libString.a</TT><I>O</I><TT>
			
 
				-for the object architecture referenced by
			
 
				-</TT><TT></TT><TT>O</TT>:<TT></TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/<I>objtype/mkfile
			
 
				-
			
 
				-LIB=libString.a</I>O
			
 
				-OFILES=	s_alloc.<I>O\
			
 
				-	s_append.</I>O\
			
 
				-	s_array.<I>O\
			
 
				-	s_copy.</I>O\
			
 
				-	s_getline.<I>O\
			
 
				-	s_grow.</I>O\
			
 
				-	s_nappend.<I>O\
			
 
				-	s_parse.</I>O\
			
 
				-	s_read.<I>O\
			
 
				-	s_read_line.</I>O\
			
 
				-	s_tolower.<I>O\
			
 
				-
			
 
				-&lt;/sys/src/cmd/mklib
			
 
				-
			
 
				-nuke:V:
			
 
				-	mk clean
			
 
				-	rm -f libString.a[</I>OS]
			
 
				-</PRE></TT></DL>
			
 
				-The override of the rule for target
			
 
				-<TT>nuke</TT>
			
 
				-removes the libraries for all architectures as
			
 
				-opposed to the default recipe for this target
			
 
				-which removes the library for the current architecture.
			
 
				-<H4>Mksyslib
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>mksyslib</TT>
			
 
				-generic mkfile is similar to the
			
 
				-<TT>mklib</TT>
			
 
				-mkfile except that it operates on a system library
			
 
				-instead of a local library.
			
 
				-The
			
 
				-<TT>install</TT>
			
 
				-and
			
 
				-<TT>all</TT>
			
 
				-targets are the same; since there is no local copy of
			
 
				-the library, all updates are performed on the
			
 
				-installed library.
			
 
				-The rule for the
			
 
				-<TT>nuke</TT>
			
 
				-target is identical to that of the
			
 
				-<TT>clean</TT>
			
 
				-target; unlike the
			
 
				-<TT>nuke</TT>
			
 
				-target for local libraries,
			
 
				-the library is never removed.
			
 
				-<br>&#32;<br>
			
 
				-No attempt is made to determine if individual library
			
 
				-members are up-to-date; all members of a
			
 
				-library are always updated.
			
 
				-Special targets support manipulation of a single
			
 
				-object file; the target
			
 
				-<TT>objfile</TT>
			
 
				-updates file
			
 
				-<TT>objfile</TT><TT>.</TT><I>O</I><TT></TT><I>
			
 
				-in the library of the current architecture and the target
			
 
				-</I><TT>objfile.all</TT><I>
			
 
				-updates
			
 
				-</I><TT>objfile</TT><TT>.</TT><TT>O</TT><TT></TT><TT>
			
 
				-in the libraries of all architectures.
			
 
				-</TT><H4>Overrides
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The rules provided by a generic mkfile or
			
 
				-the variables used to control the evaluation
			
 
				-of those rules may be overridden in most
			
 
				-circumstances.  Overrides
			
 
				-must be specified in the product mkfile
			
 
				-after the point where the generic
			
 
				-mkfile is included; in general, variable
			
 
				-and rule overrides occupy the end of a
			
 
				-product mkfile.
			
 
				-<br>&#32;<br>
			
 
				-The value of a variable is overridden by
			
 
				-assigning a new value to the variable.
			
 
				-Most variable overrides modify the
			
 
				-values of flags or the names of commands executed
			
 
				-in recipes.  For example, the default value of
			
 
				-<TT>CFLAGS</TT>
			
 
				-is often overridden or augmented and
			
 
				-the ANSI/POSIX Computing Environment is selected by
			
 
				-setting the
			
 
				-<TT>CC</TT>
			
 
				-and
			
 
				-<TT>LD</TT>
			
 
				-variables to
			
 
				-<TT>pcc.</TT>
			
 
				-<br>&#32;<br>
			
 
				-Modifying rules is trickier than modifying
			
 
				-variables.  Additional constraints can be added
			
 
				-to a rule by specifying the target and
			
 
				-the new prerequisite.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-%.<I>O:	header.h
			
 
				-</PRE></TT></DL>
			
 
				-adds file
			
 
				-</I><TT>header.h</TT><I>
			
 
				-the set of prerequisites for all object files.
			
 
				-There is no mechanism for adding additional
			
 
				-commands to an existing recipe; if a
			
 
				-recipe is unsatisfactory, the rule and its recipe
			
 
				-must be completely overridden.
			
 
				-A rule is overridden only when the replacement rule
			
 
				-matches the target and prerequisite portions
			
 
				-of the original rule exactly.  The recipe
			
 
				-associated with the new rule
			
 
				-then replaces the recipe of the original rule.
			
 
				-For example,
			
 
				-</I><TT>/sys/src/cmd/lex/mkfile</TT><I>
			
 
				-overrides the default
			
 
				-</I><TT>installall</TT><I>
			
 
				-rule to perform the normal loop on all
			
 
				-architectures and then copy a prototype file
			
 
				-to the system library directory.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&lt;/</I>objtype/mkfile
			
 
				-
			
 
				-TARG=lex
			
 
				-OFILES=lmain.<I>O\
			
 
				-	y.tab.</I>O\
			
 
				-	sub1.<I>O\
			
 
				-	sub2.</I>O\
			
 
				-	header.<I>O\
			
 
				-
			
 
				-HFILES=ldefs.h\
			
 
				-
			
 
				-YFILES=parser.y\
			
 
				-
			
 
				-BIN=/</I>objtype/bin
			
 
				-&lt;/sys/src/cmd/mkone
			
 
				-
			
 
				-installall:V:
			
 
				-	for(objtype in <I>CPUS)
			
 
				-		mk install
			
 
				-	cp ncform /sys/lib/lex
			
 
				-</PRE></TT></DL>
			
 
				-Another way to perform the same override is to
			
 
				-add a dependency to the default
			
 
				-</I><TT>installall</TT><I>
			
 
				-rule that executes an additional rule to
			
 
				-install the prototype file:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-installall:V:	ncform.install
			
 
				-
			
 
				-ncform.install:V:
			
 
				-	cp ncform /sys/lib/lex
			
 
				-</PRE></TT></DL>
			
 
				-</I><H4>Special Tricks
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Two special cases
			
 
				-require extra deviousness.
			
 
				-<br>&#32;<br>
			
 
				-In the first, a file needed to build an
			
 
				-executable is generated by a program that,
			
 
				-in turn, is built from a source file that
			
 
				-is not part of the product.  In this case,
			
 
				-the
			
 
				-executable must be built for the
			
 
				-target architecture, but the intermediate
			
 
				-executable must be built for the architecture
			
 
				-<TT>mk</TT>
			
 
				-is executing on.  The intermediate executable
			
 
				-is built by recursively invoking
			
 
				-<TT>mk</TT>
			
 
				-with the appropriate target and the
			
 
				-executing architecture as the target
			
 
				-architecture.  When that
			
 
				-<TT>mk</TT>
			
 
				-completes, the intermediate is
			
 
				-executed to generate the source file to
			
 
				-complete the build for the target architecture.
			
 
				-The earlier example of
			
 
				-<TT>/sys/src/cmd/awk/mkfile</TT>
			
 
				-illustrates this technique.
			
 
				-<br>&#32;<br>
			
 
				-Another awkward situation
			
 
				-occurs when a directory contains
			
 
				-source to build an executable as
			
 
				-well as source for auxiliary executables
			
 
				-that are not to be installed.  In this case
			
 
				-the
			
 
				-<TT>mkmany</TT>
			
 
				-generic rules are inappropriate, because
			
 
				-all executables would be built and installed.
			
 
				-Instead, use the
			
 
				-<TT>mkone</TT>
			
 
				-generic file to build the primary executable
			
 
				-and provide extra targets to
			
 
				-build the auxiliary files.  This
			
 
				-approach is also useful when the auxiliary
			
 
				-files are not executables;
			
 
				-<TT>/sys/src/cmd/spell/mkfile</TT>
			
 
				-augments the default rules to build and install the
			
 
				-<TT>spell</TT>
			
 
				-executable with
			
 
				-elaborate rules to generate
			
 
				-and maintain the auxiliary spelling lists.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/names.html
+++ b/sys/doc/names.html
@@ -1,695 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The Use of Name Spaces in Plan 9
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-Dave Presotto<br>
			
 
				-Ken Thompson<br>
			
 
				-Howard Trickey<br>
			
 
				-Phil Winterbottom<br>
			
 
				-Bell Laboratories, Murray Hill, NJ, 07974
			
 
				-USA<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Appeared in
			
 
				-Operating Systems Review,
			
 
				-Vol. 27, #2, April 1993, pp. 72-76
			
 
				-(reprinted from
			
 
				-Proceedings of the 5th ACM SIGOPS European Workshop,
			
 
				-Mont Saint-Michel, 1992, Paper n&#186; 34).
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-Plan 9 is a distributed system built at the Computing Sciences Research
			
 
				-Center of AT&amp;T Bell Laboratories (now Lucent Technologies, Bell Labs) over the last few years.
			
 
				-Its goal is to provide a production-quality system for software
			
 
				-development and general computation using heterogeneous hardware
			
 
				-and minimal software.  A Plan 9 system comprises CPU and file
			
 
				-servers in a central location connected together by fast networks.
			
 
				-Slower networks fan out to workstation-class machines that serve as
			
 
				-user terminals.  Plan 9 argues that given a few carefully
			
 
				-implemented abstractions
			
 
				-it is possible to
			
 
				-produce a small operating system that provides support for the largest systems
			
 
				-on a variety of architectures and networks. The foundations of the system are
			
 
				-built on two ideas: a per-process name space and a simple message-oriented 
			
 
				-file system protocol.
			
 
				-</DL>
			
 
				-<P>
			
 
				-The operating system for the CPU servers and terminals is
			
 
				-structured as a traditional kernel: a single compiled image
			
 
				-containing code for resource management, process control,
			
 
				-user processes,
			
 
				-virtual memory, and I/O.  Because the file server is a separate
			
 
				-machine, the file system is not compiled in, although the management
			
 
				-of the name space, a per-process attribute, is.
			
 
				-The entire kernel for the multiprocessor SGI Power Series machine
			
 
				-is 25000 lines of C,
			
 
				-the largest part of which is code for four networks including the
			
 
				-Ethernet with the Internet protocol suite.
			
 
				-Fewer than 1500 lines are machine-specific, and a
			
 
				-functional kernel with minimal I/O can be put together from
			
 
				-source files totaling 6000 lines. [Pike90]
			
 
				-</P>
			
 
				-<P>
			
 
				-The system is relatively small for several reasons.
			
 
				-First, it is all new: it has not had time to accrete as many fixes
			
 
				-and features as other systems.
			
 
				-Also, other than the network protocol, it adheres to no
			
 
				-external interface; in particular, it is not Unix-compatible.
			
 
				-Economy stems from careful selection of services and interfaces.
			
 
				-Finally, wherever possible the system is built around
			
 
				-two simple ideas:
			
 
				-every resource in the system, either local or remote,
			
 
				-is represented by a hierarchical file system; and
			
 
				-a user or process
			
 
				-assembles a private view of the system by constructing a file
			
 
				-name space
			
 
				-that connects these resources. [Needham]
			
 
				-</P>
			
 
				-<H4>File Protocol
			
 
				-</H4>
			
 
				-<P>
			
 
				-All resources in Plan 9 look like file systems.
			
 
				-That does not mean that they are repositories for
			
 
				-permanent files on disk, but that the interface to them
			
 
				-is file-oriented: finding files (resources) in a hierarchical
			
 
				-name tree, attaching to them by name, and accessing their contents
			
 
				-by read and write calls.
			
 
				-There are dozens of file system types in Plan 9, but only a few
			
 
				-represent traditional files.
			
 
				-At this level of abstraction, files in Plan 9 are similar
			
 
				-to objects, except that files are already provided with naming,
			
 
				-access, and protection methods that must be created afresh for
			
 
				-objects.  Object-oriented readers may approach the rest of this
			
 
				-paper as a study in how to make objects look like files.
			
 
				-</P>
			
 
				-<P>
			
 
				-The interface to file systems is defined by a protocol, called 9P,
			
 
				-analogous but not very similar to the NFS protocol.
			
 
				-The protocol talks about files, not blocks; given a connection to the root
			
 
				-directory of a file server,
			
 
				-the 9P messages navigate the file hierarchy, open files for I/O,
			
 
				-and read or write arbitrary bytes in the files.
			
 
				-9P contains 17 message types: three for
			
 
				-initializing and
			
 
				-authenticating a connection and fourteen for manipulating objects.
			
 
				-The messages are generated by the kernel in response to user- or
			
 
				-kernel-level I/O requests.
			
 
				-Here is a quick tour of the major message types.
			
 
				-The
			
 
				-<TT>auth</TT>
			
 
				-and
			
 
				-<TT>attach</TT>
			
 
				-messages authenticate a connection, established by means outside 9P,
			
 
				-and validate its user.
			
 
				-The result is an authenticated
			
 
				-<I>channel</I>
			
 
				-that points to the root of the
			
 
				-server.
			
 
				-The
			
 
				-<TT>clone</TT>
			
 
				-message makes a new channel identical to an existing channel,
			
 
				-which may be moved to a file on the server using a
			
 
				-<TT>walk</TT>
			
 
				-message to descend each level in the hierarchy.
			
 
				-The
			
 
				-<TT>stat</TT>
			
 
				-and
			
 
				-<TT>wstat</TT>
			
 
				-messages read and write the attributes of the file pointed to by a channel.
			
 
				-The
			
 
				-<TT>open</TT>
			
 
				-message prepares a channel for subsequent
			
 
				-<TT>read</TT>
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-messages to access the contents of the file, while
			
 
				-<TT>create</TT>
			
 
				-and
			
 
				-<TT>remove</TT>
			
 
				-perform, on the files, the actions implied by their names.
			
 
				-The
			
 
				-<TT>clunk</TT>
			
 
				-message discards a channel without affecting the file.
			
 
				-None of the 9P messages consider caching; file caches are provided,
			
 
				-when needed, either within the server (centralized caching)
			
 
				-or by implementing the cache as a transparent file system between the
			
 
				-client and the 9P connection to the server (client caching).
			
 
				-</P>
			
 
				-<P>
			
 
				-For efficiency, the connection to local
			
 
				-kernel-resident file systems, misleadingly called
			
 
				-<I>devices,</I>
			
 
				-is by regular rather than remote procedure calls.
			
 
				-The procedures map one-to-one with 9P message  types.
			
 
				-Locally each channel has an associated data structure
			
 
				-that holds a type field used to index
			
 
				-a table of procedure calls, one set per file system type,
			
 
				-analogous to selecting the method set for an object. 
			
 
				-One kernel-resident file system, the
			
 
				-mount device,
			
 
				-translates the local 9P procedure calls into RPC messages to
			
 
				-remote services over a separately provided transport protocol
			
 
				-such as TCP or IL, a new reliable datagram protocol, or over a pipe to
			
 
				-a user process.
			
 
				-Write and read calls transmit the messages over the transport layer.
			
 
				-The mount device is the sole bridge between the procedural
			
 
				-interface seen by user programs and remote and user-level services.
			
 
				-It does all associated marshaling, buffer
			
 
				-management, and multiplexing and is
			
 
				-the only integral RPC mechanism in Plan 9.
			
 
				-The mount device is in effect a proxy object.
			
 
				-There is no RPC stub compiler; instead the mount driver and
			
 
				-all servers just share a library that packs and unpacks 9P messages.
			
 
				-</P>
			
 
				-<H4>Examples
			
 
				-</H4>
			
 
				-<P>
			
 
				-One file system type serves
			
 
				-permanent files from the main file server,
			
 
				-a stand-alone multiprocessor system with a
			
 
				-350-gigabyte
			
 
				-optical WORM jukebox that holds the data, fronted by a two-level
			
 
				-block cache comprising 7 gigabytes of
			
 
				-magnetic disk and 128 megabytes of RAM.
			
 
				-Clients connect to the file server using any of a variety of
			
 
				-networks and protocols and access files using 9P.
			
 
				-The file server runs a distinct operating system and has no
			
 
				-support for user processes; other than a restricted set of commands
			
 
				-available on the console, all it does is answer 9P messages from clients.
			
 
				-</P>
			
 
				-<P>
			
 
				-Once a day, at 5:00 AM,
			
 
				-the file server sweeps through the cache blocks and marks dirty blocks
			
 
				-copy-on-write.
			
 
				-It creates a copy of the root directory
			
 
				-and labels it with the current date, for example
			
 
				-<TT>1995/0314</TT>.
			
 
				-It then starts a background process to copy the dirty blocks to the WORM.
			
 
				-The result is that the server retains an image of the file system as it was
			
 
				-early each morning.
			
 
				-The set of old root directories is accessible using 9P, so a client
			
 
				-may examine backup files using ordinary commands.
			
 
				-Several advantages stem from having the backup service implemented
			
 
				-as a plain file system.
			
 
				-Most obviously, ordinary commands can access them.
			
 
				-For example, to see when a bug was fixed
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-grep 'mouse bug fix' 1995/*/sys/src/cmd/8&#189;/file.c
			
 
				-</PRE></TT></DL>
			
 
				-The owner, access times, permissions, and other properties of the
			
 
				-files are also backed up.
			
 
				-Because it is a file system, the backup
			
 
				-still has protections;
			
 
				-it is not possible to subvert security by looking at the backup.
			
 
				-</P>
			
 
				-<P>
			
 
				-The file server is only one type of file system.
			
 
				-A number of unusual services are provided within the kernel as
			
 
				-local file systems.
			
 
				-These services are not limited to I/O devices such
			
 
				-as disks.  They include network devices and their associated protocols,
			
 
				-the bitmap display and mouse,
			
 
				-a representation of processes similar to
			
 
				-<TT>/proc</TT>
			
 
				-[Killian], the name/value pairs that form the `environment'
			
 
				-passed to a new process, profiling services,
			
 
				-and other resources.
			
 
				-Each of these is represented as a file system &#173;
			
 
				-directories containing sets of files &#173;
			
 
				-but the constituent files do not represent permanent storage on disk.
			
 
				-Instead, they are closer in properties to UNIX device files.
			
 
				-</P>
			
 
				-<P>
			
 
				-For example, the
			
 
				-<I>console</I>
			
 
				-device contains the file
			
 
				-<TT>/dev/cons</TT>,
			
 
				-similar to the UNIX file
			
 
				-<TT>/dev/console</TT>:
			
 
				-when written,
			
 
				-<TT>/dev/cons</TT>
			
 
				-appends to the console typescript; when read,
			
 
				-it returns characters typed on the keyboard.
			
 
				-Other files in the console device include
			
 
				-<TT>/dev/time</TT>,
			
 
				-the number of seconds since the epoch,
			
 
				-<TT>/dev/cputime</TT>,
			
 
				-the computation time used by the process reading the device,
			
 
				-<TT>/dev/pid</TT>,
			
 
				-the process id of the process reading the device, and
			
 
				-<TT>/dev/user</TT>,
			
 
				-the login name of the user accessing the device.
			
 
				-All these files contain text, not binary numbers,
			
 
				-so their use is free of byte-order problems.
			
 
				-Their contents are synthesized on demand when read; when written,
			
 
				-they cause modifications to kernel data structures.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<I>process</I>
			
 
				-device contains one directory per live local process, named by its numeric
			
 
				-process id:
			
 
				-<TT>/proc/1</TT>,
			
 
				-<TT>/proc/2</TT>,
			
 
				-etc.
			
 
				-Each directory contains a set of files that access the process.
			
 
				-For example, in each directory the file
			
 
				-<TT>mem</TT>
			
 
				-is an image of the virtual memory of the process that may be read or
			
 
				-written for debugging.
			
 
				-The
			
 
				-<TT>text</TT>
			
 
				-file is a sort of link to the file from which the process was executed;
			
 
				-it may be opened to read the symbol tables for the process.
			
 
				-The
			
 
				-<TT>ctl</TT>
			
 
				-file may be written textual messages such as
			
 
				-<TT>stop</TT>
			
 
				-or
			
 
				-<TT>kill</TT>
			
 
				-to control the execution of the process.
			
 
				-The
			
 
				-<TT>status</TT>
			
 
				-file contains a fixed-format line of text containing information about
			
 
				-the process: its name, owner, state, and so on.
			
 
				-Text strings written to the
			
 
				-<TT>note</TT>
			
 
				-file are delivered to the process as
			
 
				-<I>notes,</I>
			
 
				-analogous to UNIX signals.
			
 
				-By providing these services as textual I/O on files rather
			
 
				-than as system calls (such as
			
 
				-<TT>kill</TT>)
			
 
				-or special-purpose operations (such as
			
 
				-<TT>ptrace</TT>),
			
 
				-the Plan 9 process device simplifies the implementation of
			
 
				-debuggers and related programs.
			
 
				-For example, the command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cat /proc/*/status
			
 
				-</PRE></TT></DL>
			
 
				-is a crude form of the
			
 
				-<TT>ps</TT>
			
 
				-command; the actual
			
 
				-<TT>ps</TT>
			
 
				-merely reformats the data so obtained.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<I>bitmap</I>
			
 
				-device contains three files,
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-<TT>/dev/screen</TT>,
			
 
				-and
			
 
				-<TT>/dev/bitblt</TT>,
			
 
				-that provide an interface to the local bitmap display (if any) and pointing device.
			
 
				-The
			
 
				-<TT>mouse</TT>
			
 
				-file returns a fixed-format record containing
			
 
				-1 byte of button state and 4 bytes each of
			
 
				-<I>x</I>
			
 
				-and
			
 
				-<I>y</I>
			
 
				-position of the mouse.
			
 
				-If the mouse has not moved since the file was last read, a subsequent read will
			
 
				-block.
			
 
				-The
			
 
				-<TT>screen</TT>
			
 
				-file contains a memory image of the contents of the display;
			
 
				-the
			
 
				-<TT>bitblt</TT>
			
 
				-file provides a procedural interface.
			
 
				-Calls to the graphics library are translated into messages that are written
			
 
				-to the
			
 
				-<TT>bitblt</TT>
			
 
				-file to perform bitmap graphics operations.  (This is essentially a nested
			
 
				-RPC protocol.)
			
 
				-</P>
			
 
				-<P>
			
 
				-The various services being used by a process are gathered together into the
			
 
				-process's
			
 
				-name space,
			
 
				-a single rooted hierarchy of file names.
			
 
				-When a process forks, the child process shares the name space with the parent.
			
 
				-Several system calls manipulate name spaces.
			
 
				-Given a file descriptor
			
 
				-<TT>fd</TT>
			
 
				-that holds an open communications channel to a service,
			
 
				-the call
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mount(int fd, char *old, int flags)
			
 
				-</PRE></TT></DL>
			
 
				-authenticates the user and attaches the file tree of the service to
			
 
				-the directory named by
			
 
				-<TT>old</TT>.
			
 
				-The
			
 
				-<TT>flags</TT>
			
 
				-specify how the tree is to be attached to
			
 
				-<TT>old</TT>:
			
 
				-replacing the current contents or appearing before or after the
			
 
				-current contents of the directory.
			
 
				-A directory with several services mounted is called a
			
 
				-<I>union</I>
			
 
				-directory and is searched in the specified order.
			
 
				-The call
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-bind(char *new, char *old, int flags)
			
 
				-</PRE></TT></DL>
			
 
				-takes the portion of the existing name space visible at
			
 
				-<TT>new</TT>,
			
 
				-either a file or a directory, and makes it also visible at
			
 
				-<TT>old</TT>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-bind("1995/0301/sys/include", "/sys/include", REPLACE)
			
 
				-</PRE></TT></DL>
			
 
				-causes the directory of include files to be overlaid with its
			
 
				-contents from the dump on March first.
			
 
				-</P>
			
 
				-<P>
			
 
				-A process is created by the
			
 
				-<TT>rfork</TT>
			
 
				-system call, which takes as argument a bit vector defining which
			
 
				-attributes of the process are to be shared between parent
			
 
				-and child instead of copied.
			
 
				-One of the attributes is the name space: when shared, changes
			
 
				-made by either process are visible in the other; when copied,
			
 
				-changes are independent.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although there is no global name space,
			
 
				-for a process to function sensibly the local name spaces must adhere
			
 
				-to global conventions. 
			
 
				-Nonetheless, the use of local name spaces is critical to the system.
			
 
				-Both these ideas are illustrated by the use of the name space to
			
 
				-handle heterogeneity.
			
 
				-The binaries for a given architecture are contained in a directory
			
 
				-named by the architecture, for example
			
 
				-<TT>/mips/bin</TT>;
			
 
				-in use, that directory is bound to the conventional location
			
 
				-<TT>/bin</TT>.
			
 
				-Programs such as shell scripts need not know the CPU type they are
			
 
				-executing on to find binaries to run.
			
 
				-A directory of private binaries
			
 
				-is usually unioned with
			
 
				-<TT>/bin</TT>.
			
 
				-(Compare this to the
			
 
				-ad hoc
			
 
				-and special-purpose idea of the
			
 
				-<TT>PATH</TT>
			
 
				-variable, which is not used in the Plan 9 shell.)
			
 
				-Local bindings are also helpful for debugging, for example by binding
			
 
				-an old library to the standard place and linking a program to see
			
 
				-if recent changes to the library are responsible for a bug in the program.
			
 
				-</P>
			
 
				-<P>
			
 
				-The window system,
			
 
				-<TT>8&#189;</TT>
			
 
				-[Pike91], is a server for files such as
			
 
				-<TT>/dev/cons</TT>
			
 
				-and
			
 
				-<TT>/dev/bitblt</TT>.
			
 
				-Each client sees a distinct copy of these files in its local
			
 
				-name space: there are many instances of
			
 
				-<TT>/dev/cons</TT>,
			
 
				-each served by
			
 
				-<TT>8&#189;</TT>
			
 
				-to the local name space of a window.
			
 
				-Again,
			
 
				-<TT>8&#189;</TT>
			
 
				-implements services using
			
 
				-local name spaces plus the use
			
 
				-of I/O to conventionally named files.
			
 
				-Each client just connects its standard input, output, and error files
			
 
				-to
			
 
				-<TT>/dev/cons</TT>,
			
 
				-with analogous operations to access bitmap graphics.
			
 
				-Compare this to the implementation of
			
 
				-<TT>/dev/tty</TT>
			
 
				-on UNIX, which is done by special code in the kernel
			
 
				-that overloads the file, when opened,
			
 
				-with the standard input or output of the process.
			
 
				-Special arrangement must be made by a UNIX window system for
			
 
				-<TT>/dev/tty</TT>
			
 
				-to behave as expected;
			
 
				-<TT>8&#189;</TT>
			
 
				-instead uses the provision of the corresponding file as its
			
 
				-central idea, which to succeed depends critically on local name spaces.
			
 
				-</P>
			
 
				-<P>
			
 
				-The environment
			
 
				-<TT>8&#189;</TT>
			
 
				-provides its clients is exactly the environment under which it is implemented:
			
 
				-a conventional set of files in
			
 
				-<TT>/dev</TT>.
			
 
				-This permits the window system to be run recursively in one of its own
			
 
				-windows, which is handy for debugging.
			
 
				-It also means that if the files are exported to another machine,
			
 
				-as described below, the window system or client applications may be
			
 
				-run transparently on remote machines, even ones without graphics hardware.
			
 
				-This mechanism is used for Plan 9's implementation of the X window
			
 
				-system: X is run as a client of
			
 
				-<TT>8&#189;</TT>,
			
 
				-often on a remote machine with lots of memory.
			
 
				-In this configuration, using Ethernet to connect
			
 
				-MIPS machines, we measure only a 10% degradation in graphics
			
 
				-performance relative to running X on
			
 
				-a bare Plan 9 machine.
			
 
				-</P>
			
 
				-<P>
			
 
				-An unusual application of these ideas is a statistics-gathering
			
 
				-file system implemented by a command called
			
 
				-<TT>iostats</TT>.
			
 
				-The command encapsulates a process in a local name space, monitoring 9P
			
 
				-requests from the process to the outside world &#173; the name space in which
			
 
				-<TT>iostats</TT>
			
 
				-is itself running.  When the command completes,
			
 
				-<TT>iostats</TT>
			
 
				-reports usage and performance figures for file activity.
			
 
				-For example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-iostats 8&#189;
			
 
				-</PRE></TT></DL>
			
 
				-can be used to discover how much I/O the window system
			
 
				-does to the bitmap device, font files, and so on.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>import</TT>
			
 
				-command connects a piece of name space from a remote system
			
 
				-to the local name space.
			
 
				-Its implementation is to dial the remote machine and start
			
 
				-a process there that serves the remote name space using 9P.
			
 
				-It then calls
			
 
				-<TT>mount</TT>
			
 
				-to attach the connection to the name space and finally dies;
			
 
				-the remote process continues to serve the files.
			
 
				-One use is to access devices not available
			
 
				-locally.  For example, to write a floppy one may say
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import lab.pc /a: /n/dos
			
 
				-cp foo /n/dos/bar
			
 
				-</PRE></TT></DL>
			
 
				-The call to
			
 
				-<TT>import</TT>
			
 
				-connects the file tree from
			
 
				-<TT>/a:</TT>
			
 
				-on the machine
			
 
				-<TT>lab.pc</TT>
			
 
				-(which must support 9P) to the local directory
			
 
				-<TT>/n/dos</TT>.
			
 
				-Then the file
			
 
				-<TT>foo</TT>
			
 
				-can be written to the floppy just by copying it across.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another application is remote debugging:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import helix /proc
			
 
				-</PRE></TT></DL>
			
 
				-makes the process file system on machine
			
 
				-<TT>helix</TT>
			
 
				-available locally; commands such as
			
 
				-<TT>ps</TT>
			
 
				-then see
			
 
				-<TT>helix</TT>'s
			
 
				-processes instead of the local ones.
			
 
				-The debugger may then look at a remote process:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-db /proc/27/text /proc/27/mem
			
 
				-</PRE></TT></DL>
			
 
				-allows breakpoint debugging of the remote process.
			
 
				-Since
			
 
				-<TT>db</TT>
			
 
				-infers the CPU type of the process from the executable header on
			
 
				-the text file, it supports
			
 
				-cross-architecture debugging, too.
			
 
				-Care is taken within
			
 
				-<TT>db</TT>
			
 
				-to handle issues of byte order and floating point; it is possible to
			
 
				-breakpoint debug a big-endian MIPS process from a little-endian i386.
			
 
				-</P>
			
 
				-<P>
			
 
				-Network interfaces are also implemented as file systems [Presotto].
			
 
				-For example,
			
 
				-<TT>/net/tcp</TT>
			
 
				-is a directory somewhat like
			
 
				-<TT>/proc</TT>:
			
 
				-it contains a set of numbered directories, one per connection,
			
 
				-each of which contains files to control and communicate on the connection.
			
 
				-A process allocates a new connection by accessing
			
 
				-<TT>/net/tcp/clone</TT>,
			
 
				-which evaluates to the directory of an unused connection.
			
 
				-To make a call, the process writes a textual message such as
			
 
				-<TT>'connect</TT>
			
 
				-<TT>135.104.53.2!512'</TT>
			
 
				-to the
			
 
				-<TT>ctl</TT>
			
 
				-file and then reads and writes the
			
 
				-<TT>data</TT>
			
 
				-file.
			
 
				-An
			
 
				-<TT>rlogin</TT>
			
 
				-service can be implemented in a few of lines of shell code.
			
 
				-</P>
			
 
				-<P>
			
 
				-This structure makes network gatewaying easy to provide.
			
 
				-We have machines with Datakit interfaces but no Internet interface.
			
 
				-On such a machine one may type
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import helix /net
			
 
				-telnet tcp!ai.mit.edu
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>import</TT>
			
 
				-uses Datakit to pull in the TCP interface from
			
 
				-<TT>helix</TT>,
			
 
				-which can then be used directly; the
			
 
				-<TT>tcp!</TT>
			
 
				-notation is necessary because we routinely use multiple networks
			
 
				-and protocols on Plan 9&#173;it identifies the network in which
			
 
				-<TT>ai.mit.edu</TT>
			
 
				-is a valid name.
			
 
				-</P>
			
 
				-<P>
			
 
				-In practice we do not use
			
 
				-<TT>rlogin</TT>
			
 
				-or
			
 
				-<TT>telnet</TT>
			
 
				-between Plan 9 machines.  Instead a command called
			
 
				-<TT>cpu</TT>
			
 
				-in effect replaces the CPU in a window with that
			
 
				-on another machine, typically a fast multiprocessor CPU server.
			
 
				-The implementation is to recreate the
			
 
				-name space on the remote machine, using the equivalent of
			
 
				-<TT>import</TT>
			
 
				-to connect pieces of the terminal's name space to that of
			
 
				-the process (shell) on the CPU server, making the terminal
			
 
				-a file server for the CPU.
			
 
				-CPU-local devices such as fast file system connections
			
 
				-are still local; only terminal-resident devices are
			
 
				-imported.
			
 
				-The result is unlike UNIX
			
 
				-<TT>rlogin</TT>,
			
 
				-which moves into a distinct name space on the remote machine,
			
 
				-or file sharing with
			
 
				-<TT>NFS</TT>,
			
 
				-which keeps the name space the same but forces processes to execute
			
 
				-locally.
			
 
				-Bindings in
			
 
				-<TT>/bin</TT>
			
 
				-may change because of a change in CPU architecture, and
			
 
				-the networks involved may be different because of differing hardware,
			
 
				-but the effect feels like simply speeding up the processor in the
			
 
				-current name space.
			
 
				-</P>
			
 
				-<H4>Position
			
 
				-</H4>
			
 
				-<P>
			
 
				-These examples illustrate how the ideas of representing resources
			
 
				-as file systems and per-process name spaces can be used to solve
			
 
				-problems often left to more exotic mechanisms.
			
 
				-Nonetheless there are some operations in Plan 9 that are not
			
 
				-mapped into file I/O.
			
 
				-An example is process creation.
			
 
				-We could imagine a message to a control file in
			
 
				-<TT>/proc</TT>
			
 
				-that creates a process, but the details of
			
 
				-constructing the environment of the new process &#173; its open files,
			
 
				-name space, memory image, etc. &#173; are too intricate to
			
 
				-be described easily in a simple I/O operation.
			
 
				-Therefore new processes on Plan 9 are created by fairly conventional
			
 
				-<TT>rfork</TT>
			
 
				-and
			
 
				-<TT>exec</TT>
			
 
				-system calls;
			
 
				-<TT>/proc</TT>
			
 
				-is used only to represent and control existing processes.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 does not attempt to map network name spaces into the file
			
 
				-system name space, for several reasons.
			
 
				-The different addressing rules for various networks and protocols
			
 
				-cannot be mapped uniformly into a hierarchical file name space.
			
 
				-Even if they could be,
			
 
				-the various mechanisms to authenticate,
			
 
				-select a service,
			
 
				-and control the connection would not map consistently into
			
 
				-operations on a file.
			
 
				-</P>
			
 
				-<P>
			
 
				-Shared memory is another resource not adequately represented by a
			
 
				-file name space.
			
 
				-Plan 9 takes care to provide mechanisms
			
 
				-to allow groups of local processes to share and map memory.
			
 
				-Memory is controlled
			
 
				-by system calls rather than special files, however,
			
 
				-since a representation in the file system would imply that memory could
			
 
				-be imported from remote machines.
			
 
				-</P>
			
 
				-<P>
			
 
				-Despite these limitations, file systems and name spaces offer an effective
			
 
				-model around which to build a distributed system.
			
 
				-Used well, they can provide a uniform, familiar, transparent
			
 
				-interface to a diverse set of distributed resources.
			
 
				-They carry well-understood properties of access, protection,
			
 
				-and naming.
			
 
				-The integration of devices into the hierarchical file system
			
 
				-was the best idea in UNIX.
			
 
				-Plan 9 pushes the concepts much further and shows that
			
 
				-file systems, when used inventively, have plenty of scope
			
 
				-for productive research.
			
 
				-</P>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Killian] T. Killian, ``Processes as Files'', USENIX Summer Conf. Proc., Salt Lake City, 1984
			
 
				-<br>
			
 
				-[Needham] R. Needham, ``Names'', in
			
 
				-Distributed systems,
			
 
				-S. Mullender, ed.,
			
 
				-Addison Wesley, 1989
			
 
				-<br>
			
 
				-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
			
 
				-``Plan 9 from Bell Labs'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf.,
			
 
				-London, England,
			
 
				-1990
			
 
				-<br>
			
 
				-[Presotto] D. Presotto, ``Multiprocessor Streams for Plan 9'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf.,
			
 
				-London, England,
			
 
				-1990
			
 
				-<br>
			
 
				-[Pike91] Pike, R., ``8.5, The Plan 9 Window System'', USENIX Summer
			
 
				-Conf. Proc., Nashville, 1991
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/net/net.html
+++ b/sys/doc/net/net.html
@@ -1,1379 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The Organization of Networks in Plan 9
			
 
				-</H1>
			
 
				-<DL><DD><I>Dave Presotto<br>
			
 
				-Phil Winterbottom<br>
			
 
				-<br>&#32;<br>
			
 
				-presotto,philw@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Originally appeared in
			
 
				-Proc. of the Winter 1993 USENIX Conf.,
			
 
				-pp. 271-280,
			
 
				-San Diego, CA
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-In a distributed system networks are of paramount importance. This
			
 
				-paper describes the implementation, design philosophy, and organization
			
 
				-of network support in Plan 9. Topics include network requirements
			
 
				-for distributed systems, our kernel implementation, network naming, user interfaces,
			
 
				-and performance. We also observe that much of this organization is relevant to
			
 
				-current systems.
			
 
				-</DL>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 [Pike90] is a general-purpose, multi-user, portable distributed system
			
 
				-implemented on a variety of computers and networks.
			
 
				-What distinguishes Plan 9 is its organization.
			
 
				-The goals of this organization were to
			
 
				-reduce administration
			
 
				-and to promote resource sharing. One of the keys to its success as a distributed
			
 
				-system is the organization and management of its networks.
			
 
				-</P>
			
 
				-<P>
			
 
				-A Plan 9 system comprises file servers, CPU servers and terminals.
			
 
				-The file servers and CPU servers are typically centrally
			
 
				-located multiprocessor machines with large memories and
			
 
				-high speed interconnects.
			
 
				-A variety of workstation-class machines
			
 
				-serve as terminals
			
 
				-connected to the central servers using several networks and protocols.
			
 
				-The architecture of the system demands a hierarchy of network
			
 
				-speeds matching the needs of the components.
			
 
				-Connections between file servers and CPU servers are high-bandwidth point-to-point
			
 
				-fiber links.
			
 
				-Connections from the servers fan out to local terminals
			
 
				-using medium speed networks
			
 
				-such as Ethernet [Met80] and Datakit [Fra80].
			
 
				-Low speed connections via the Internet and
			
 
				-the AT&amp;T backbone serve users in Oregon and Illinois.
			
 
				-Basic Rate ISDN data service and 9600 baud serial lines provide slow
			
 
				-links to users at home.
			
 
				-</P>
			
 
				-<P>
			
 
				-Since CPU servers and terminals use the same kernel,
			
 
				-users may choose to run programs locally on
			
 
				-their terminals or remotely on CPU servers.
			
 
				-The organization of Plan 9 hides the details of system connectivity
			
 
				-allowing both users and administrators to configure their environment
			
 
				-to be as distributed or centralized as they wish.
			
 
				-Simple commands support the
			
 
				-construction of a locally represented name space
			
 
				-spanning many machines and networks.
			
 
				-At work, users tend to use their terminals like workstations,
			
 
				-running interactive programs locally and
			
 
				-reserving the CPU servers for data or compute intensive jobs
			
 
				-such as compiling and computing chess endgames.
			
 
				-At home or when connected over
			
 
				-a slow network, users tend to do most work on the CPU server to minimize
			
 
				-traffic on the slow links.
			
 
				-The goal of the network organization is to provide the same
			
 
				-environment to the user wherever resources are used.
			
 
				-</P>
			
 
				-<H4>2 Kernel Network Support
			
 
				-</H4>
			
 
				-<P>
			
 
				-Networks play a central role in any distributed system. This is particularly
			
 
				-true in Plan 9 where most resources are provided by servers external to the kernel.
			
 
				-The importance of the networking code within the kernel
			
 
				-is reflected by its size;
			
 
				-of 25,000 lines of kernel code, 12,500 are network and protocol related.
			
 
				-Networks are continually being added and the fraction of code
			
 
				-devoted to communications
			
 
				-is growing.
			
 
				-Moreover, the network code is complex.
			
 
				-Protocol implementations consist almost entirely of
			
 
				-synchronization and dynamic memory management, areas demanding 
			
 
				-subtle error recovery
			
 
				-strategies.
			
 
				-The kernel currently supports Datakit, point-to-point fiber links,
			
 
				-an Internet (IP) protocol suite and ISDN data service.
			
 
				-The variety of networks and machines
			
 
				-has raised issues not addressed by other systems running on commercial
			
 
				-hardware supporting only Ethernet or FDDI.
			
 
				-</P>
			
 
				-<H4>2.1 The File System protocol
			
 
				-</H4>
			
 
				-<P>
			
 
				-A central idea in Plan 9 is the representation of a resource as a hierarchical
			
 
				-file system.
			
 
				-Each process assembles a view of the system by building a
			
 
				-<I>name space</I>
			
 
				-[Needham] connecting its resources.
			
 
				-File systems need not represent disc files; in fact, most Plan 9 file systems have no
			
 
				-permanent storage.
			
 
				-A typical file system dynamically represents
			
 
				-some resource like a set of network connections or the process table.
			
 
				-Communication between the kernel, device drivers, and local or remote file servers uses a
			
 
				-protocol called 9P. The protocol consists of 17 messages
			
 
				-describing operations on files and directories.
			
 
				-Kernel resident device and protocol drivers use a procedural version
			
 
				-of the protocol while external file servers use an RPC form.
			
 
				-Nearly all traffic between Plan 9 systems consists
			
 
				-of 9P messages.
			
 
				-9P relies on several properties of the underlying transport protocol.
			
 
				-It assumes messages arrive reliably and in sequence and
			
 
				-that delimiters between messages
			
 
				-are preserved.
			
 
				-When a protocol does not meet these
			
 
				-requirements (for example, TCP does not preserve delimiters)
			
 
				-we provide mechanisms to marshal messages before handing them
			
 
				-to the system.
			
 
				-</P>
			
 
				-<P>
			
 
				-A kernel data structure, the
			
 
				-<I>channel</I>,
			
 
				-is a handle to a file server.
			
 
				-Operations on a channel generate the following 9P messages.
			
 
				-The
			
 
				-<TT>session</TT>
			
 
				-and
			
 
				-<TT>attach</TT>
			
 
				-messages authenticate a connection, established by means external to 9P,
			
 
				-and validate its user.
			
 
				-The result is an authenticated
			
 
				-channel
			
 
				-referencing the root of the
			
 
				-server.
			
 
				-The
			
 
				-<TT>clone</TT>
			
 
				-message makes a new channel identical to an existing channel, much like
			
 
				-the
			
 
				-<TT>dup</TT>
			
 
				-system call.
			
 
				-A
			
 
				-channel
			
 
				-may be moved to a file on the server using a
			
 
				-<TT>walk</TT>
			
 
				-message to descend each level in the hierarchy.
			
 
				-The
			
 
				-<TT>stat</TT>
			
 
				-and
			
 
				-<TT>wstat</TT>
			
 
				-messages read and write the attributes of the file referenced by a channel.
			
 
				-The
			
 
				-<TT>open</TT>
			
 
				-message prepares a channel for subsequent
			
 
				-<TT>read</TT>
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-messages to access the contents of the file.
			
 
				-<TT>Create</TT>
			
 
				-and
			
 
				-<TT>remove</TT>
			
 
				-perform the actions implied by their names on the file
			
 
				-referenced by the channel.
			
 
				-The
			
 
				-<TT>clunk</TT>
			
 
				-message discards a channel without affecting the file.
			
 
				-</P>
			
 
				-<P>
			
 
				-A kernel resident file server called the
			
 
				-<I>mount driver</I>
			
 
				-converts the procedural version of 9P into RPCs.
			
 
				-The
			
 
				-<I>mount</I>
			
 
				-system call provides a file descriptor, which can be
			
 
				-a pipe to a user process or a network connection to a remote machine, to
			
 
				-be associated with the mount point.
			
 
				-After a mount, operations
			
 
				-on the file tree below the mount point are sent as messages to the file server.
			
 
				-The
			
 
				-mount
			
 
				-driver manages buffers, packs and unpacks parameters from
			
 
				-messages, and demultiplexes among processes using the file server.
			
 
				-</P>
			
 
				-<H4>2.2 Kernel Organization
			
 
				-</H4>
			
 
				-<P>
			
 
				-The network code in the kernel is divided into three layers: hardware interface,
			
 
				-protocol processing, and program interface.
			
 
				-A device driver typically uses streams to connect the two interface layers.
			
 
				-Additional stream modules may be pushed on
			
 
				-a device to process protocols.
			
 
				-Each device driver is a kernel-resident file system.
			
 
				-Simple device drivers serve a single level
			
 
				-directory containing just a few files;
			
 
				-for example, we represent each UART
			
 
				-by a data and a control file.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cpu% cd /dev
			
 
				-cpu% ls -l eia*
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia1
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia1ctl
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia2
			
 
				---rw-rw-rw- t 0 bootes bootes 0 Jul 16 17:28 eia2ctl
			
 
				-cpu%
			
 
				-</PRE></TT></DL>
			
 
				-The control file is used to control the device;
			
 
				-writing the string
			
 
				-<TT>b1200</TT>
			
 
				-to
			
 
				-<TT>/dev/eia1ctl</TT>
			
 
				-sets the line to 1200 baud.
			
 
				-</P>
			
 
				-<P>
			
 
				-Multiplexed devices present
			
 
				-a more complex interface structure.
			
 
				-For example, the LANCE Ethernet driver
			
 
				-serves a two level file tree (Figure 1)
			
 
				-providing
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>*<DD>
			
 
				-device control and configuration
			
 
				-<DT>*<DD>
			
 
				-user-level protocols like ARP
			
 
				-<DT>*<DD>
			
 
				-diagnostic interfaces for snooping software.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-The top directory contains a
			
 
				-<TT>clone</TT>
			
 
				-file and a directory for each connection, numbered
			
 
				-<TT>1</TT>
			
 
				-to
			
 
				-<TT>n</TT>.
			
 
				-Each connection directory corresponds to an Ethernet packet type.
			
 
				-Opening the
			
 
				-<TT>clone</TT>
			
 
				-file finds an unused connection directory
			
 
				-and opens its
			
 
				-<TT>ctl</TT>
			
 
				-file.
			
 
				-Reading the control file returns the ASCII connection number; the user
			
 
				-process can use this value to construct the name of the proper 
			
 
				-connection directory.
			
 
				-In each connection directory files named
			
 
				-<TT>ctl</TT>,
			
 
				-<TT>data</TT>,
			
 
				-<TT>stats</TT>,
			
 
				-and 
			
 
				-<TT>type</TT>
			
 
				-provide access to the connection.
			
 
				-Writing the string
			
 
				-<TT>connect 2048</TT>
			
 
				-to the
			
 
				-<TT>ctl</TT>
			
 
				-file sets the packet type to 2048
			
 
				-and
			
 
				-configures the connection to receive
			
 
				-all IP packets sent to the machine.
			
 
				-Subsequent reads of the file
			
 
				-<TT>type</TT>
			
 
				-yield the string
			
 
				-<TT>2048</TT>.
			
 
				-The
			
 
				-<TT>data</TT>
			
 
				-file accesses the media;
			
 
				-reading it
			
 
				-returns the
			
 
				-next packet of the selected type.
			
 
				-Writing the file
			
 
				-queues a packet for transmission after
			
 
				-appending a packet header containing the source address and packet type.
			
 
				-The
			
 
				-<TT>stats</TT>
			
 
				-file returns ASCII text containing the interface address,
			
 
				-packet input/output counts, error statistics, and general information
			
 
				-about the state of the interface.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="data.7580.gif"><br>
			
 
				-</PRE></TT></DL>
			
 
				-If several connections on an interface
			
 
				-are configured for a particular packet type, each receives a
			
 
				-copy of the incoming packets.
			
 
				-The special packet type
			
 
				-<TT>-1</TT>
			
 
				-selects all packets.
			
 
				-Writing the strings
			
 
				-<TT>promiscuous</TT>
			
 
				-and
			
 
				-<TT>connect</TT>
			
 
				-<TT>-1</TT>
			
 
				-to the
			
 
				-<TT>ctl</TT>
			
 
				-file
			
 
				-configures a conversation to receive all packets on the Ethernet.
			
 
				-<P>
			
 
				-Although the driver interface may seem elaborate,
			
 
				-the representation of a device as a set of files using ASCII strings for
			
 
				-communication has several advantages.
			
 
				-Any mechanism supporting remote access to files immediately
			
 
				-allows a remote machine to use our interfaces as gateways.
			
 
				-Using ASCII strings to control the interface avoids byte order problems and
			
 
				-ensures a uniform representation for
			
 
				-devices on the same machine and even allows devices to be accessed remotely.
			
 
				-Representing dissimilar devices by the same set of files allows common tools
			
 
				-to serve
			
 
				-several networks or interfaces.
			
 
				-Programs like
			
 
				-<TT>stty</TT>
			
 
				-are replaced by
			
 
				-<TT>echo</TT>
			
 
				-and shell redirection.
			
 
				-</P>
			
 
				-<H4>2.3 Protocol devices
			
 
				-</H4>
			
 
				-<P>
			
 
				-Network connections are represented as pseudo-devices called protocol devices.
			
 
				-Protocol device drivers exist for the Datakit URP protocol and for each of the
			
 
				-Internet IP protocols TCP, UDP, and IL.
			
 
				-IL, described below, is a new communication protocol used by Plan 9 for
			
 
				-transmitting file system RPC's.
			
 
				-All protocol devices look identical so user programs contain no
			
 
				-network-specific code.
			
 
				-</P>
			
 
				-<P>
			
 
				-Each protocol device driver serves a directory structure
			
 
				-similar to that of the Ethernet driver.
			
 
				-The top directory contains a
			
 
				-<TT>clone</TT>
			
 
				-file and a directory for each connection numbered
			
 
				-<TT>0</TT>
			
 
				-to
			
 
				-<TT>n</TT>.
			
 
				-Each connection directory contains files to control one
			
 
				-connection and to send and receive information.
			
 
				-A TCP connection directory looks like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cpu% cd /net/tcp/2
			
 
				-cpu% ls -l
			
 
				---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 ctl
			
 
				---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 data
			
 
				---rw-rw---- I 0 ehg    bootes 0 Jul 13 21:14 listen
			
 
				---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 local
			
 
				---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 remote
			
 
				---r--r--r-- I 0 bootes bootes 0 Jul 13 21:14 status
			
 
				-cpu% cat local remote status
			
 
				-135.104.9.31 5012
			
 
				-135.104.53.11 564
			
 
				-tcp/2 1 Established connect
			
 
				-cpu%
			
 
				-</PRE></TT></DL>
			
 
				-The files
			
 
				-<TT>local</TT>,
			
 
				-<TT>remote</TT>,
			
 
				-and
			
 
				-<TT>status</TT>
			
 
				-supply information about the state of the connection.
			
 
				-The
			
 
				-<TT>data</TT>
			
 
				-and
			
 
				-<TT>ctl</TT>
			
 
				-files
			
 
				-provide access to the process end of the stream implementing the protocol.
			
 
				-The
			
 
				-<TT>listen</TT>
			
 
				-file is used to accept incoming calls from the network.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following steps establish a connection.
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>1)<DD>
			
 
				-The clone device of the
			
 
				-appropriate protocol directory is opened to reserve an unused connection.
			
 
				-<DT>2)<DD>
			
 
				-The file descriptor returned by the open points to the
			
 
				-<TT>ctl</TT>
			
 
				-file of the new connection.
			
 
				-Reading that file descriptor returns an ASCII string containing
			
 
				-the connection number.
			
 
				-<DT>3)<DD>
			
 
				-A protocol/network specific ASCII address string is written to the
			
 
				-<TT>ctl</TT>
			
 
				-file.
			
 
				-<DT>4)<DD>
			
 
				-The path of the
			
 
				-<TT>data</TT>
			
 
				-file is constructed using the connection number.
			
 
				-When the
			
 
				-<TT>data</TT>
			
 
				-file is opened the connection is established.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-A process can read and write this file descriptor
			
 
				-to send and receive messages from the network.
			
 
				-If the process opens the
			
 
				-<TT>listen</TT>
			
 
				-file it blocks until an incoming call is received.
			
 
				-An address string written to the
			
 
				-<TT>ctl</TT>
			
 
				-file before the listen selects the
			
 
				-ports or services the process is prepared to accept.
			
 
				-When an incoming call is received, the open completes
			
 
				-and returns a file descriptor
			
 
				-pointing to the
			
 
				-<TT>ctl</TT>
			
 
				-file of the new connection.
			
 
				-Reading the
			
 
				-<TT>ctl</TT>
			
 
				-file yields a connection number used to construct the path of the
			
 
				-<TT>data</TT>
			
 
				-file.
			
 
				-A connection remains established while any of the files in the connection directory
			
 
				-are referenced or until a close is received from the network.
			
 
				-<H4>2.4 Streams
			
 
				-</H4>
			
 
				-<P>
			
 
				-A
			
 
				-<I>stream</I>
			
 
				-[Rit84a][Presotto] is a bidirectional channel connecting a
			
 
				-physical or pseudo-device to user processes.
			
 
				-The user processes insert and remove data at one end of the stream.
			
 
				-Kernel processes acting on behalf of a device insert data at
			
 
				-the other end.
			
 
				-Asynchronous communications channels such as pipes,
			
 
				-TCP conversations, Datakit conversations, and RS232 lines are implemented using
			
 
				-streams.
			
 
				-</P>
			
 
				-<P>
			
 
				-A stream comprises a linear list of
			
 
				-<I>processing modules</I>.
			
 
				-Each module has both an upstream (toward the process) and
			
 
				-downstream (toward the device)
			
 
				-<I>put routine</I>.
			
 
				-Calling the put routine of the module on either end of the stream
			
 
				-inserts data into the stream.
			
 
				-Each module calls the succeeding one to send data up or down the stream.
			
 
				-</P>
			
 
				-<P>
			
 
				-An instance of a processing module is represented by a pair of
			
 
				-<I>queues</I>,
			
 
				-one for each direction.
			
 
				-The queues point to the put procedures and can be used
			
 
				-to queue information traveling along the stream.
			
 
				-Some put routines queue data locally and send it along the stream at some
			
 
				-later time, either due to a subsequent call or an asynchronous
			
 
				-event such as a retransmission timer or a device interrupt.
			
 
				-Processing modules create helper kernel processes to
			
 
				-provide a context for handling asynchronous events.
			
 
				-For example, a helper kernel process awakens periodically
			
 
				-to perform any necessary TCP retransmissions.
			
 
				-The use of kernel processes instead of serialized run-to-completion service routines
			
 
				-differs from the implementation of Unix streams.
			
 
				-Unix service routines cannot
			
 
				-use any blocking kernel resource and they lack a local long-lived state.
			
 
				-Helper kernel processes solve these problems and simplify the stream code.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is no implicit synchronization in our streams.
			
 
				-Each processing module must ensure that concurrent processes using the stream
			
 
				-are synchronized.
			
 
				-This maximizes concurrency but introduces the
			
 
				-possibility of deadlock.
			
 
				-However, deadlocks are easily avoided by careful programming; to
			
 
				-date they have not caused us problems.
			
 
				-</P>
			
 
				-<P>
			
 
				-Information is represented by linked lists of kernel structures called
			
 
				-<I>blocks</I>.
			
 
				-Each block contains a type, some state flags, and pointers to
			
 
				-an optional buffer.
			
 
				-Block buffers can hold either data or control information, i.e., directives
			
 
				-to the processing modules.
			
 
				-Blocks and block buffers are dynamically allocated from kernel memory.
			
 
				-</P>
			
 
				-<H4>2.4.1 User Interface
			
 
				-</H4>
			
 
				-<P>
			
 
				-A stream is represented at user level as two files, 
			
 
				-<TT>ctl</TT>
			
 
				-and
			
 
				-<TT>data</TT>.
			
 
				-The actual names can be changed by the device driver using the stream,
			
 
				-as we saw earlier in the example of the UART driver.
			
 
				-The first process to open either file creates the stream automatically.
			
 
				-The last close destroys it.
			
 
				-Writing to the
			
 
				-<TT>data</TT>
			
 
				-file copies the data into kernel blocks
			
 
				-and passes them to the downstream put routine of the first processing module.
			
 
				-A write of less than 32K is guaranteed to be contained by a single block.
			
 
				-Concurrent writes to the same stream are not synchronized, although the
			
 
				-32K block size assures atomic writes for most protocols.
			
 
				-The last block written is flagged with a delimiter
			
 
				-to alert downstream modules that care about write boundaries.
			
 
				-In most cases the first put routine calls the second, the second
			
 
				-calls the third, and so on until the data is output.
			
 
				-As a consequence, most data is output without context switching.
			
 
				-</P>
			
 
				-<P>
			
 
				-Reading from the
			
 
				-<TT>data</TT>
			
 
				-file returns data queued at the top of the stream.
			
 
				-The read terminates when the read count is reached
			
 
				-or when the end of a delimited block is encountered.
			
 
				-A per stream read lock ensures only one process
			
 
				-can read from a stream at a time and guarantees
			
 
				-that the bytes read were contiguous bytes from the
			
 
				-stream.
			
 
				-</P>
			
 
				-<P>
			
 
				-Like UNIX streams [Rit84a],
			
 
				-Plan 9 streams can be dynamically configured.
			
 
				-The stream system intercepts and interprets
			
 
				-the following control blocks:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>push</TT> <I>name</I><DD>
			
 
				-adds an instance of the processing module 
			
 
				-<I>name</I>
			
 
				-to the top of the stream.
			
 
				-<DT><TT>pop</TT><DD>
			
 
				-removes the top module of the stream.
			
 
				-<DT><TT>hangup</TT><DD>
			
 
				-sends a hangup message
			
 
				-up the stream from the device end.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-Other control blocks are module-specific and are interpreted by each
			
 
				-processing module
			
 
				-as they pass.
			
 
				-<P>
			
 
				-The convoluted syntax and semantics of the UNIX
			
 
				-<TT>ioctl</TT>
			
 
				-system call convinced us to leave it out of Plan 9.
			
 
				-Instead,
			
 
				-<TT>ioctl</TT>
			
 
				-is replaced by the
			
 
				-<TT>ctl</TT>
			
 
				-file.
			
 
				-Writing to the
			
 
				-<TT>ctl</TT>
			
 
				-file
			
 
				-is identical to writing to a
			
 
				-<TT>data</TT>
			
 
				-file except the blocks are of type
			
 
				-<I>control</I>.
			
 
				-A processing module parses each control block it sees.
			
 
				-Commands in control blocks are ASCII strings, so
			
 
				-byte ordering is not an issue when one system
			
 
				-controls streams in a name space implemented on another processor.
			
 
				-The time to parse control blocks is not important, since control
			
 
				-operations are rare.
			
 
				-</P>
			
 
				-<H4>2.4.2 Device Interface
			
 
				-</H4>
			
 
				-<P>
			
 
				-The module at the downstream end of the stream is part of a device interface.
			
 
				-The particulars of the interface vary with the device.
			
 
				-Most device interfaces consist of an interrupt routine, an output
			
 
				-put routine, and a kernel process.
			
 
				-The output put routine stages data for the
			
 
				-device and starts the device if it is stopped.
			
 
				-The interrupt routine wakes up the kernel process whenever
			
 
				-the device has input to be processed or needs more output staged.
			
 
				-The kernel process puts information up the stream or stages more data for output.
			
 
				-The division of labor among the different pieces varies depending on
			
 
				-how much must be done at interrupt level.
			
 
				-However, the interrupt routine may not allocate blocks or call
			
 
				-a put routine since both actions require a process context.
			
 
				-</P>
			
 
				-<H4>2.4.3 Multiplexing
			
 
				-</H4>
			
 
				-<P>
			
 
				-The conversations using a protocol device must be
			
 
				-multiplexed onto a single physical wire.
			
 
				-We push a multiplexer processing module
			
 
				-onto the physical device stream to group the conversations.
			
 
				-The device end modules on the conversations add the necessary header
			
 
				-onto downstream messages and then put them to the module downstream
			
 
				-of the multiplexer.
			
 
				-The multiplexing module looks at each message moving up its stream and
			
 
				-puts it to the correct conversation stream after stripping
			
 
				-the header controlling the demultiplexing.
			
 
				-</P>
			
 
				-<P>
			
 
				-This is similar to the Unix implementation of multiplexer streams.
			
 
				-The major difference is that we have no general structure that
			
 
				-corresponds to a multiplexer.
			
 
				-Each attempt to produce a generalized multiplexer created a more complicated
			
 
				-structure and underlined the basic difficulty of generalizing this mechanism.
			
 
				-We now code each multiplexer from scratch and favor simplicity over
			
 
				-generality.
			
 
				-</P>
			
 
				-<H4>2.4.4 Reflections
			
 
				-</H4>
			
 
				-<P>
			
 
				-Despite five year's experience and the efforts of many programmers,
			
 
				-we remain dissatisfied with the stream mechanism.
			
 
				-Performance is not an issue;
			
 
				-the time to process protocols and drive
			
 
				-device interfaces continues to dwarf the
			
 
				-time spent allocating, freeing, and moving blocks
			
 
				-of data.
			
 
				-However the mechanism remains inordinately
			
 
				-complex.
			
 
				-Much of the complexity results from our efforts
			
 
				-to make streams dynamically configurable, to
			
 
				-reuse processing modules on different devices
			
 
				-and to provide kernel synchronization
			
 
				-to ensure data structures
			
 
				-don't disappear under foot.
			
 
				-This is particularly irritating since we seldom use these properties.
			
 
				-</P>
			
 
				-<P>
			
 
				-Streams remain in our kernel because we are unable to
			
 
				-devise a better alternative.
			
 
				-Larry Peterson's X-kernel [Pet89a]
			
 
				-is the closest contender but
			
 
				-doesn't offer enough advantage to switch.
			
 
				-If we were to rewrite the streams code, we would probably statically
			
 
				-allocate resources for a large fixed number of conversations and burn
			
 
				-memory in favor of less complexity.
			
 
				-</P>
			
 
				-<H4>3 The IL Protocol
			
 
				-</H4>
			
 
				-<P>
			
 
				-None of the standard IP protocols is suitable for transmission of
			
 
				-9P messages over an Ethernet or the Internet.
			
 
				-TCP has a high overhead and does not preserve delimiters.
			
 
				-UDP, while cheap, does not provide reliable sequenced delivery.
			
 
				-Early versions of the system used a custom protocol that was
			
 
				-efficient but unsatisfactory for internetwork transmission.
			
 
				-When we implemented IP, TCP, and UDP we looked around for a suitable
			
 
				-replacement with the following properties:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>*<DD>
			
 
				-Reliable datagram service with sequenced delivery
			
 
				-<DT>*<DD>
			
 
				-Runs over IP
			
 
				-<DT>*<DD>
			
 
				-Low complexity, high performance
			
 
				-<DT>*<DD>
			
 
				-Adaptive timeouts
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-None met our needs so a new protocol was designed.
			
 
				-IL is a lightweight protocol designed to be encapsulated by IP.
			
 
				-It is a connection-based protocol
			
 
				-providing reliable transmission of sequenced messages between machines.
			
 
				-No provision is made for flow control since the protocol is designed to transport RPC
			
 
				-messages between client and server.
			
 
				-A small outstanding message window prevents too
			
 
				-many incoming messages from being buffered;
			
 
				-messages outside the window are discarded
			
 
				-and must be retransmitted.
			
 
				-Connection setup uses a two way handshake to generate
			
 
				-initial sequence numbers at each end of the connection;
			
 
				-subsequent data messages increment the
			
 
				-sequence numbers allowing
			
 
				-the receiver to resequence out of order messages. 
			
 
				-In contrast to other protocols, IL does not do blind retransmission.
			
 
				-If a message is lost and a timeout occurs, a query message is sent.
			
 
				-The query message is a small control message containing the current
			
 
				-sequence numbers as seen by the sender.
			
 
				-The receiver responds to a query by retransmitting missing messages.
			
 
				-This allows the protocol to behave well in congested networks,
			
 
				-where blind retransmission would cause further
			
 
				-congestion.
			
 
				-Like TCP, IL has adaptive timeouts.
			
 
				-A round-trip timer is used
			
 
				-to calculate acknowledge and retransmission times in terms of the network speed.
			
 
				-This allows the protocol to perform well on both the Internet and on local Ethernets.
			
 
				-<P>
			
 
				-In keeping with the minimalist design of the rest of the kernel, IL is small.
			
 
				-The entire protocol is 847 lines of code, compared to 2200 lines for TCP.
			
 
				-IL is our protocol of choice.
			
 
				-</P>
			
 
				-<H4>4 Network Addressing
			
 
				-</H4>
			
 
				-<P>
			
 
				-A uniform interface to protocols and devices is not sufficient to
			
 
				-support the transparency we require.
			
 
				-Since each network uses a different
			
 
				-addressing scheme,
			
 
				-the ASCII strings written to a control file have no common format.
			
 
				-As a result, every tool must know the specifics of the networks it
			
 
				-is capable of addressing.
			
 
				-Moreover, since each machine supplies a subset
			
 
				-of the available networks, each user must be aware of the networks supported
			
 
				-by every terminal and server machine.
			
 
				-This is obviously unacceptable.
			
 
				-</P>
			
 
				-<P>
			
 
				-Several possible solutions were considered and rejected; one deserves
			
 
				-more discussion.
			
 
				-We could have used a user-level file server
			
 
				-to represent the network name space as a Plan 9 file tree. 
			
 
				-This global naming scheme has been implemented in other distributed systems.
			
 
				-The file hierarchy provides paths to
			
 
				-directories representing network domains.
			
 
				-Each directory contains
			
 
				-files representing the names of the machines in that domain;
			
 
				-an example might be the path
			
 
				-<TT>/net/name/usa/edu/mit/ai</TT>.
			
 
				-Each machine file contains information like the IP address of the machine.
			
 
				-We rejected this representation for several reasons.
			
 
				-First, it is hard to devise a hierarchy encompassing all representations
			
 
				-of the various network addressing schemes in a uniform manner.
			
 
				-Datakit and Ethernet address strings have nothing in common.
			
 
				-Second, the address of a machine is
			
 
				-often only a small part of the information required to connect to a service on
			
 
				-the machine.
			
 
				-For example, the IP protocols require symbolic service names to be mapped into
			
 
				-numeric port numbers, some of which are privileged and hence special.
			
 
				-Information of this sort is hard to represent in terms of file operations.
			
 
				-Finally, the size and number of the networks being represented burdens users with
			
 
				-an unacceptably large amount of information about the organization of the network
			
 
				-and its connectivity.
			
 
				-In this case the Plan 9 representation of a
			
 
				-resource as a file is not appropriate.
			
 
				-</P>
			
 
				-<P>
			
 
				-If tools are to be network independent, a third-party server must resolve
			
 
				-network names.
			
 
				-A server on each machine, with local knowledge, can select the best network
			
 
				-for any particular destination machine or service.
			
 
				-Since the network devices present a common interface,
			
 
				-the only operation which differs between networks is name resolution.
			
 
				-A symbolic name must be translated to
			
 
				-the path of the clone file of a protocol
			
 
				-device and an ASCII address string to write to the
			
 
				-<TT>ctl</TT>
			
 
				-file.
			
 
				-A connection server (CS) provides this service.
			
 
				-</P>
			
 
				-<H4>4.1 Network Database
			
 
				-</H4>
			
 
				-<P>
			
 
				-On most systems several
			
 
				-files such as
			
 
				-<TT>/etc/hosts</TT>,
			
 
				-<TT>/etc/networks</TT>,
			
 
				-<TT>/etc/services</TT>,
			
 
				-<TT>/etc/hosts.equiv</TT>,
			
 
				-<TT>/etc/bootptab</TT>,
			
 
				-and
			
 
				-<TT>/etc/named.d</TT>
			
 
				-hold network information.
			
 
				-Much time and effort is spent
			
 
				-administering these files and keeping
			
 
				-them mutually consistent.
			
 
				-Tools attempt to
			
 
				-automatically derive one or more of the files from
			
 
				-information in other files but maintenance continues to be
			
 
				-difficult and error prone.
			
 
				-</P>
			
 
				-<P>
			
 
				-Since we were writing an entirely new system, we were free to
			
 
				-try a simpler approach.
			
 
				-One database on a shared server contains all the information
			
 
				-needed for network administration.
			
 
				-Two ASCII files comprise the main database:
			
 
				-<TT>/lib/ndb/local</TT>
			
 
				-contains locally administered information and
			
 
				-<TT>/lib/ndb/global</TT>
			
 
				-contains information imported from elsewhere.
			
 
				-The files contain sets of attribute/value pairs of the form
			
 
				-<I>attr<TT>=</TT>value</I>,
			
 
				-where
			
 
				-<I>attr</I>
			
 
				-and
			
 
				-<I>value</I>
			
 
				-are alphanumeric strings.
			
 
				-Systems are described by multi-line entries;
			
 
				-a header line at the left margin begins each entry followed by zero or more
			
 
				-indented attribute/value pairs specifying
			
 
				-names, addresses, properties, etc.
			
 
				-For example, the entry for our CPU server
			
 
				-specifies a domain name, an IP address, an Ethernet address,
			
 
				-a Datakit address, a boot file, and supported protocols.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-sys = helix
			
 
				-	dom=helix.research.bell-labs.com
			
 
				-	bootf=/mips/9power
			
 
				-	ip=135.104.9.31 ether=0800690222f0
			
 
				-	dk=nj/astro/helix
			
 
				-	proto=il flavor=9cpu
			
 
				-</PRE></TT></DL>
			
 
				-If several systems share entries such as
			
 
				-network mask and gateway, we specify that information
			
 
				-with the network or subnetwork instead of the system.
			
 
				-The following entries define a Class B IP network and 
			
 
				-a few subnets derived from it.
			
 
				-The entry for the network specifies the IP mask,
			
 
				-file system, and authentication server for all systems
			
 
				-on the network.
			
 
				-Each subnetwork specifies its default IP gateway.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-ipnet=mh-astro-net ip=135.104.0.0 ipmask=255.255.255.0
			
 
				-	fs=bootes.research.bell-labs.com
			
 
				-	auth=1127auth
			
 
				-ipnet=unix-room ip=135.104.117.0
			
 
				-	ipgw=135.104.117.1
			
 
				-ipnet=third-floor ip=135.104.51.0
			
 
				-	ipgw=135.104.51.1
			
 
				-ipnet=fourth-floor ip=135.104.52.0
			
 
				-	ipgw=135.104.52.1
			
 
				-</PRE></TT></DL>
			
 
				-Database entries also define the mapping of service names
			
 
				-to port numbers for TCP, UDP, and IL.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-tcp=echo	port=7
			
 
				-tcp=discard	port=9
			
 
				-tcp=systat	port=11
			
 
				-tcp=daytime	port=13
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-All programs read the database directly so
			
 
				-consistency problems are rare.
			
 
				-However the database files can become large.
			
 
				-Our global file, containing all information about
			
 
				-both Datakit and Internet systems in AT&amp;T, has 43,000
			
 
				-lines.
			
 
				-To speed searches, we build hash table files for each
			
 
				-attribute we expect to search often.
			
 
				-The hash file entries point to entries
			
 
				-in the master files.
			
 
				-Every hash file contains the modification time of its master
			
 
				-file so we can avoid using an out-of-date hash table.
			
 
				-Searches for attributes that aren't hashed or whose hash table
			
 
				-is out-of-date still work, they just take longer.
			
 
				-</P>
			
 
				-<H4>4.2 Connection Server
			
 
				-</H4>
			
 
				-<P>
			
 
				-On each system a user level connection server process, CS, translates
			
 
				-symbolic names to addresses.
			
 
				-CS uses information about available networks, the network database, and
			
 
				-other servers (such as DNS) to translate names.
			
 
				-CS is a file server serving a single file,
			
 
				-<TT>/net/cs</TT>.
			
 
				-A client writes a symbolic name to
			
 
				-<TT>/net/cs</TT>
			
 
				-then reads one line for each matching destination reachable
			
 
				-from this system.
			
 
				-The lines are of the form
			
 
				-<I>filename message</I>,
			
 
				-where
			
 
				-<I>filename</I>
			
 
				-is the path of the clone file to open for a new connection and
			
 
				-<I>message</I>
			
 
				-is the string to write to it to make the connection.
			
 
				-The following example illustrates this.
			
 
				-<TT>Ndb/csquery</TT>
			
 
				-is a program that prompts for strings to write to
			
 
				-<TT>/net/cs</TT>
			
 
				-and prints the replies.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% ndb/csquery
			
 
				-&#62; net!helix!9fs
			
 
				-/net/il/clone 135.104.9.31!17008
			
 
				-/net/dk/clone nj/astro/helix!9fs
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-CS provides meta-name translation to perform complicated
			
 
				-searches.
			
 
				-The special network name
			
 
				-<TT>net</TT>
			
 
				-selects any network in common between source and
			
 
				-destination supporting the specified service.
			
 
				-A host name of the form <TT>$</TT><I>attr</I>
			
 
				-is the name of an attribute in the network database.
			
 
				-The database search returns the value
			
 
				-of the matching attribute/value pair
			
 
				-most closely associated with the source host.
			
 
				-Most closely associated is defined on a per network basis.
			
 
				-For example, the symbolic name
			
 
				-<TT>tcp!$auth!rexauth</TT>
			
 
				-causes CS to search for the
			
 
				-<TT>auth</TT>
			
 
				-attribute in the database entry for the source system, then its
			
 
				-subnetwork (if there is one) and then its network.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% ndb/csquery
			
 
				-&#62; net!$auth!rexauth
			
 
				-/net/il/clone 135.104.9.34!17021
			
 
				-/net/dk/clone nj/astro/p9auth!rexauth
			
 
				-/net/il/clone 135.104.9.6!17021
			
 
				-/net/dk/clone nj/astro/musca!rexauth
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Normally CS derives naming information from its database files.
			
 
				-For domain names however, CS first consults another user level
			
 
				-process, the domain name server (DNS).
			
 
				-If no DNS is reachable, CS relies on its own tables.
			
 
				-</P>
			
 
				-<P>
			
 
				-Like CS, the domain name server is a user level process providing
			
 
				-one file,
			
 
				-<TT>/net/dns</TT>.
			
 
				-A client writes a request of the form
			
 
				-<I>domain-name type</I>,
			
 
				-where
			
 
				-<I>type</I>
			
 
				-is a domain name service resource record type.
			
 
				-DNS performs a recursive query through the
			
 
				-Internet domain name system producing one line
			
 
				-per resource record found.  The client reads
			
 
				-<TT>/net/dns</TT>
			
 
				-to retrieve the records.
			
 
				-Like other domain name servers, DNS caches information
			
 
				-learned from the network.
			
 
				-DNS is implemented as a multi-process shared memory application
			
 
				-with separate processes listening for network and local requests.
			
 
				-</P>
			
 
				-<H4>5 Library routines
			
 
				-</H4>
			
 
				-<P>
			
 
				-The section on protocol devices described the details
			
 
				-of making and receiving connections across a network.
			
 
				-The dance is straightforward but tedious.
			
 
				-Library routines are provided to relieve
			
 
				-the programmer of the details.
			
 
				-</P>
			
 
				-<H4>5.1 Connecting
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>dial</TT>
			
 
				-library call establishes a connection to a remote destination.
			
 
				-It
			
 
				-returns an open file descriptor for the
			
 
				-<TT>data</TT>
			
 
				-file in the connection directory.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int  dial(char *dest, char *local, char *dir, int *cfdp)
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>dest</TT><DD>
			
 
				-is the symbolic name/address of the destination.
			
 
				-<DT><TT>local</TT><DD>
			
 
				-is the local address.
			
 
				-Since most networks do not support this, it is
			
 
				-usually zero.
			
 
				-<DT><TT>dir</TT><DD>
			
 
				-is a pointer to a buffer to hold the path name of the protocol directory
			
 
				-representing this connection.
			
 
				-<TT>Dial</TT>
			
 
				-fills this buffer if the pointer is non-zero.
			
 
				-<DT><TT>cfdp</TT><DD>
			
 
				-is a pointer to a file descriptor for the
			
 
				-<TT>ctl</TT>
			
 
				-file of the connection.
			
 
				-If the pointer is non-zero,
			
 
				-<TT>dial</TT>
			
 
				-opens the control file and tucks the file descriptor here.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-Most programs call
			
 
				-<TT>dial</TT>
			
 
				-with a destination name and all other arguments zero.
			
 
				-<TT>Dial</TT>
			
 
				-uses CS to
			
 
				-translate the symbolic name to all possible destination addresses
			
 
				-and attempts to connect to each in turn until one works.
			
 
				-Specifying the special name
			
 
				-<TT>net</TT>
			
 
				-in the network portion of the destination
			
 
				-allows CS to pick a network/protocol in common
			
 
				-with the destination for which the requested service is valid.
			
 
				-For example, assume the system
			
 
				-<TT>research.bell-labs.com</TT>
			
 
				-has the Datakit address
			
 
				-<TT>nj/astro/research</TT>
			
 
				-and IP addresses
			
 
				-<TT>135.104.117.5</TT>
			
 
				-and
			
 
				-<TT>129.11.4.1</TT>.
			
 
				-The call
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fd = dial("net!research.bell-labs.com!login", 0, 0, 0, 0);
			
 
				-</PRE></TT></DL>
			
 
				-tries in succession to connect to
			
 
				-<TT>nj/astro/research!login</TT>
			
 
				-on the Datakit and both
			
 
				-<TT>135.104.117.5!513</TT>
			
 
				-and
			
 
				-<TT>129.11.4.1!513</TT>
			
 
				-across the Internet.
			
 
				-<P>
			
 
				-<TT>Dial</TT>
			
 
				-accepts addresses instead of symbolic names.
			
 
				-For example, the destinations
			
 
				-<TT>tcp!135.104.117.5!513</TT>
			
 
				-and
			
 
				-<TT>tcp!research.bell-labs.com!login</TT>
			
 
				-are equivalent
			
 
				-references to the same machine.
			
 
				-</P>
			
 
				-<H4>5.2 Listening
			
 
				-</H4>
			
 
				-<P>
			
 
				-A program uses
			
 
				-four routines to listen for incoming connections.
			
 
				-It first
			
 
				-<TT>announce()</TT>s
			
 
				-its intention to receive connections,
			
 
				-then
			
 
				-<TT>listen()</TT>s
			
 
				-for calls and finally
			
 
				-<TT>accept()</TT>s
			
 
				-or
			
 
				-<TT>reject()</TT>s
			
 
				-them.
			
 
				-<TT>Announce</TT>
			
 
				-returns an open file descriptor for the
			
 
				-<TT>ctl</TT>
			
 
				-file of a connection and fills
			
 
				-<TT>dir</TT>
			
 
				-with the
			
 
				-path of the protocol directory
			
 
				-for the announcement.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int  announce(char *addr, char *dir)
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Addr</TT>
			
 
				-is the symbolic name/address announced;
			
 
				-if it does not contain a service, the announcement is for
			
 
				-all services not explicitly announced.
			
 
				-Thus, one can easily write the equivalent of the
			
 
				-<TT>inetd</TT>
			
 
				-program without
			
 
				-having to announce each separate service.
			
 
				-An announcement remains in force until the control file is
			
 
				-closed.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Listen</TT>
			
 
				-returns an open file descriptor for the
			
 
				-<TT>ctl</TT>
			
 
				-file and fills
			
 
				-<TT>ldir</TT>
			
 
				-with the path
			
 
				-of the protocol directory
			
 
				-for the received connection.
			
 
				-It is passed
			
 
				-<TT>dir</TT>
			
 
				-from the announcement.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int  listen(char *dir, char *ldir)
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Accept</TT>
			
 
				-and
			
 
				-<TT>reject</TT>
			
 
				-are called with the control file descriptor and
			
 
				-<TT>ldir</TT>
			
 
				-returned by
			
 
				-<TT>listen.</TT>
			
 
				-Some networks such as Datakit accept a reason for a rejection;
			
 
				-networks such as IP ignore the third argument.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int  accept(int ctl, char *ldir)
			
 
				-int  reject(int ctl, char *ldir, char *reason)
			
 
				-</PRE></TT></DL>
			
 
				-<P>
			
 
				-The following code implements a typical TCP listener.
			
 
				-It announces itself, listens for connections, and forks a new
			
 
				-process for each.
			
 
				-The new process echoes data on the connection until the
			
 
				-remote end closes it.
			
 
				-The "*" in the symbolic name means the announcement is valid for
			
 
				-any addresses bound to the machine the program is run on.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int
			
 
				-echo_server(void)
			
 
				-{
			
 
				-	int dfd, lcfd;
			
 
				-	char adir[40], ldir[40];
			
 
				-	int n;
			
 
				-	char buf[256];
			
 
				-
			
 
				-	afd = announce("tcp!*!echo", adir);
			
 
				-	if(afd &#60; 0)
			
 
				-		return -1;
			
 
				-
			
 
				-	for(;;){
			
 
				-		/* listen for a call */
			
 
				-		lcfd = listen(adir, ldir);
			
 
				-		if(lcfd &#60; 0)
			
 
				-			return -1;
			
 
				-
			
 
				-		/* fork a process to echo */
			
 
				-		switch(fork()){
			
 
				-		case 0:
			
 
				-			/* accept the call and open the data file */
			
 
				-			dfd = accept(lcfd, ldir);
			
 
				-			if(dfd &#60; 0)
			
 
				-				return -1;
			
 
				-
			
 
				-			/* echo until EOF */
			
 
				-			while((n = read(dfd, buf, sizeof(buf))) &#62; 0)
			
 
				-				write(dfd, buf, n);
			
 
				-			exits(0);
			
 
				-		case -1:
			
 
				-			perror("forking");
			
 
				-		default:
			
 
				-			close(lcfd);
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-	}
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>6 User Level
			
 
				-</H4>
			
 
				-<P>
			
 
				-Communication between Plan 9 machines is done almost exclusively in
			
 
				-terms of 9P messages. Only the two services
			
 
				-<TT>cpu</TT>
			
 
				-and
			
 
				-<TT>exportfs</TT>
			
 
				-are used.
			
 
				-The
			
 
				-<TT>cpu</TT>
			
 
				-service is analogous to
			
 
				-<TT>rlogin</TT>.
			
 
				-However, rather than emulating a terminal session
			
 
				-across the network,
			
 
				-<TT>cpu</TT>
			
 
				-creates a process on the remote machine whose name space is an analogue of the window
			
 
				-in which it was invoked.
			
 
				-<TT>Exportfs</TT>
			
 
				-is a user level file server which allows a piece of name space to be
			
 
				-exported from machine to machine across a network. It is used by the
			
 
				-<TT>cpu</TT>
			
 
				-command to serve the files in the terminal's name space when they are
			
 
				-accessed from the
			
 
				-cpu server.
			
 
				-</P>
			
 
				-<P>
			
 
				-By convention, the protocol and device driver file systems are mounted in a
			
 
				-directory called
			
 
				-<TT>/net</TT>.
			
 
				-Although the per-process name space allows users to configure an
			
 
				-arbitrary view of the system, in practice their profiles build
			
 
				-a conventional name space.
			
 
				-</P>
			
 
				-<H4>6.1 Exportfs
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Exportfs</TT>
			
 
				-is invoked by an incoming network call.
			
 
				-The
			
 
				-<I>listener</I>
			
 
				-(the Plan 9 equivalent of
			
 
				-<TT>inetd</TT>)
			
 
				-runs the profile of the user
			
 
				-requesting the service to construct a name space before starting
			
 
				-<TT>exportfs</TT>.
			
 
				-After an initial protocol
			
 
				-establishes the root of the file tree being
			
 
				-exported,
			
 
				-the remote process mounts the connection,
			
 
				-allowing
			
 
				-<TT>exportfs</TT>
			
 
				-to act as a relay file server. Operations in the imported file tree
			
 
				-are executed on the remote server and the results returned.
			
 
				-As a result
			
 
				-the name space of the remote machine appears to be exported into a
			
 
				-local file tree.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>import</TT>
			
 
				-command calls
			
 
				-<TT>exportfs</TT>
			
 
				-on a remote machine, mounts the result in the local name space,
			
 
				-and
			
 
				-exits.
			
 
				-No local process is required to serve mounts;
			
 
				-9P messages are generated by the kernel's mount driver and sent
			
 
				-directly over the network.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Exportfs</TT>
			
 
				-must be multithreaded since the system calls
			
 
				-<TT>open,</TT>
			
 
				-<TT>read</TT>
			
 
				-and
			
 
				-<TT>write</TT>
			
 
				-may block.
			
 
				-Plan 9 does not implement the 
			
 
				-<TT>select</TT>
			
 
				-system call but does allow processes to share file descriptors,
			
 
				-memory and other resources.
			
 
				-<TT>Exportfs</TT>
			
 
				-and the configurable name space
			
 
				-provide a means of sharing resources between machines.
			
 
				-It is a building block for constructing complex name spaces
			
 
				-served from many machines.
			
 
				-</P>
			
 
				-<P>
			
 
				-The simplicity of the interfaces encourages naive users to exploit the potential
			
 
				-of a richly connected environment.
			
 
				-Using these tools it is easy to gateway between networks.
			
 
				-For example a terminal with only a Datakit connection can import from the server
			
 
				-<TT>helix</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-import -a helix /net
			
 
				-telnet ai.mit.edu
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>import</TT>
			
 
				-command makes a Datakit connection to the machine
			
 
				-<TT>helix</TT>
			
 
				-where
			
 
				-it starts an instance
			
 
				-<TT>exportfs</TT>
			
 
				-to serve
			
 
				-<TT>/net</TT>.
			
 
				-The
			
 
				-<TT>import</TT>
			
 
				-command mounts the remote
			
 
				-<TT>/net</TT>
			
 
				-directory after (the
			
 
				-<TT>-a</TT>
			
 
				-option to
			
 
				-<TT>import</TT>)
			
 
				-the existing contents
			
 
				-of the local
			
 
				-<TT>/net</TT>
			
 
				-directory.
			
 
				-The directory contains the union of the local and remote contents of
			
 
				-<TT>/net</TT>.
			
 
				-Local entries supersede remote ones of the same name so
			
 
				-networks on the local machine are chosen in preference
			
 
				-to those supplied remotely.
			
 
				-However, unique entries in the remote directory are now visible in the local
			
 
				-<TT>/net</TT>
			
 
				-directory.
			
 
				-All the networks connected to
			
 
				-<TT>helix</TT>,
			
 
				-not just Datakit,
			
 
				-are now available in the terminal. The effect on the name space is shown by the following
			
 
				-example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-philw-gnot% ls /net
			
 
				-/net/cs
			
 
				-/net/dk
			
 
				-philw-gnot% import -a musca /net
			
 
				-philw-gnot% ls /net
			
 
				-/net/cs
			
 
				-/net/cs
			
 
				-/net/dk
			
 
				-/net/dk
			
 
				-/net/dns
			
 
				-/net/ether
			
 
				-/net/il
			
 
				-/net/tcp
			
 
				-/net/udp
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>6.2 Ftpfs
			
 
				-</H4>
			
 
				-<P>
			
 
				-We decided to make our interface to FTP
			
 
				-a file system rather than the traditional command.
			
 
				-Our command,
			
 
				-<I>ftpfs,</I>
			
 
				-dials the FTP port of a remote system, prompts for login and password, sets image mode,
			
 
				-and mounts the remote file system onto
			
 
				-<TT>/n/ftp</TT>.
			
 
				-Files and directories are cached to reduce traffic.
			
 
				-The cache is updated whenever a file is created.
			
 
				-Ftpfs works with TOPS-20, VMS, and various Unix flavors
			
 
				-as the remote system.
			
 
				-</P>
			
 
				-<H4>7 Cyclone Fiber Links
			
 
				-</H4>
			
 
				-<P>
			
 
				-The file servers and CPU servers are connected by
			
 
				-high-bandwidth
			
 
				-point-to-point links.
			
 
				-A link consists of two VME cards connected by a pair of optical
			
 
				-fibers.
			
 
				-The VME cards use 33MHz Intel 960 processors and AMD's TAXI
			
 
				-fiber transmitter/receivers to drive the lines at 125 Mbit/sec.
			
 
				-Software in the VME card reduces latency by copying messages from system memory
			
 
				-to fiber without intermediate buffering.
			
 
				-</P>
			
 
				-<H4>8 Performance
			
 
				-</H4>
			
 
				-<P>
			
 
				-We measured both latency and throughput
			
 
				-of reading and writing bytes between two processes
			
 
				-for a number of different paths.
			
 
				-Measurements were made on two- and four-CPU SGI Power Series processors.
			
 
				-The CPUs are 25 MHz MIPS 3000s.
			
 
				-The latency is measured as the round trip time
			
 
				-for a byte sent from one process to another and
			
 
				-back again.
			
 
				-Throughput is measured using 16k writes from
			
 
				-one process to another.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="data.7581.gif"><br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>9 Conclusion
			
 
				-</H4>
			
 
				-<P>
			
 
				-The representation of all resources as file systems
			
 
				-coupled with an ASCII interface has proved more powerful
			
 
				-than we had originally imagined.
			
 
				-Resources can be used by any computer in our networks
			
 
				-independent of byte ordering or CPU type.
			
 
				-The connection server provides an elegant means
			
 
				-of decoupling tools from the networks they use.
			
 
				-Users successfully use Plan 9 without knowing the
			
 
				-topology of the system or the networks they use.
			
 
				-More information about 9P can be found in the Section 5 of the Plan 9 Programmer's
			
 
				-Manual, Volume I.
			
 
				-</P>
			
 
				-<H4>10 References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
			
 
				-``Plan 9 from Bell Labs'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf. ,
			
 
				-London, England,
			
 
				-1990.
			
 
				-<br>&#32;<br>
			
 
				-[Needham] R. Needham, ``Names'', in
			
 
				-Distributed systems,
			
 
				-S. Mullender, ed.,
			
 
				-Addison Wesley, 1989.
			
 
				-<br>&#32;<br>
			
 
				-[Presotto] D. Presotto, ``Multiprocessor Streams for Plan 9'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf. ,
			
 
				-London, England, 1990.
			
 
				-<br>&#32;<br>
			
 
				-[Met80] R. Metcalfe, D. Boggs, C. Crane, E. Taf and J. Hupp, ``The
			
 
				-Ethernet Local Network: Three reports'',
			
 
				-CSL-80-2,
			
 
				-XEROX Palo Alto Research Center, February 1980.
			
 
				-<br>&#32;<br>
			
 
				-[Fra80] A. G. Fraser, ``Datakit - A Modular Network for Synchronous
			
 
				-and Asynchronous Traffic'', 
			
 
				-Proc. Int'l Conf. on Communication,
			
 
				-Boston, June 1980.
			
 
				-<br>&#32;<br>
			
 
				-[Pet89a] L. Peterson, ``RPC in the X-Kernel: Evaluating new Design Techniques'',
			
 
				-Proc. Twelfth Symp. on Op. Sys. Princ.,
			
 
				-Litchfield Park, AZ, December 1990.
			
 
				-<br>&#32;<br>
			
 
				-[Rit84a] D. M. Ritchie, ``A Stream Input-Output System'',
			
 
				-AT&amp;T Bell Laboratories Technical Journal, 68(8),
			
 
				-October 1984.
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/plumb.html
+++ b/sys/doc/plumb.html
@@ -1,1406 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Plumbing and Other Utilities
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-Bell Laboratories, Murray Hill, NJ, 07974
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<br>&#32;<br>
			
 
				-Plumbing is a new mechanism for inter-process communication in Plan 9,
			
 
				-specifically the passing of messages between interactive programs as part of
			
 
				-the user interface.
			
 
				-Although plumbing shares some properties with familiar notions
			
 
				-such as cut and paste,
			
 
				-it offers a more general data exchange mechanism without imposing
			
 
				-a particular user interface.
			
 
				-<br>&#32;<br>
			
 
				-The core of the plumbing system is a program called the
			
 
				-<I>plumber</I>,
			
 
				-which handles all messages and dispatches and reformats them
			
 
				-according to configuration rules written in a special-purpose language.
			
 
				-This approach allows the contents and context of a piece of data to define how
			
 
				-it is handled.
			
 
				-Unlike with drag and drop or cut and paste,
			
 
				-the user doesn't need to deliver the data;
			
 
				-the contents of a plumbing message, as interpreted by the plumbing rules,
			
 
				-determine its destination.
			
 
				-<br>&#32;<br>
			
 
				-The plumber has an unusual architecture: it is a language-driven file server.
			
 
				-This design has distinct advantages.
			
 
				-It makes plumbing easy to add to an existing, Unix-like command environment;
			
 
				-it guarantees uniform handling of inter-application messages;
			
 
				-it off-loads from those applications most of the work of extracting and dispatching messages;
			
 
				-and it works transparently across a network.
			
 
				-</DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Data moves from program to program in myriad ways.
			
 
				-Command-line arguments,
			
 
				-shell pipe lines,
			
 
				-cut and paste,
			
 
				-drag and drop, and other user interface techniques all provide some form
			
 
				-of interprocess communication.
			
 
				-Then there are tricks associated with special domains,
			
 
				-such as HTML hyperlinks or the heuristics mail readers
			
 
				-use to highlight URLs embedded in mail messages.
			
 
				-Some systems provide implicit ways to automate the attachment of program to data&#173;the
			
 
				-best known examples are probably the resource forks in MacOS and the
			
 
				-file name extension `associations' in Microsoft Windows&#173;but in practice
			
 
				-humans must too often carry their data from program to program.
			
 
				-<br>&#32;<br>
			
 
				-Why should a human do the work?
			
 
				-Usually there is one obvious thing to do with a piece of data,
			
 
				-and the data itself suggests what this is.
			
 
				-Resource forks and associations speak to this issue directly, but statically and narrowly and with
			
 
				-little opportunity to control the behavior.
			
 
				-Mechanisms with more generality,
			
 
				-such as cut and paste or drag and drop, demand too much manipulation by
			
 
				-the user and are (therefore) too error-prone.
			
 
				-<br>&#32;<br>
			
 
				-We want a system that, given a piece of data,
			
 
				-hands it to the appropriate application by default with little or no human intervention,
			
 
				-while still permitting the user to override the defaults if desired.
			
 
				-<br>&#32;<br>
			
 
				-The plumbing system is an attempt to address some of these issues in a single,
			
 
				-coherent, central way.
			
 
				-It provides a mechanism for
			
 
				-formatting and sending arbitrary messages between applications,
			
 
				-typically interactive programs such as text editors, web browsers, and the window system,
			
 
				-under the control of a central message-handling server called the
			
 
				-<I>plumber</I>.
			
 
				-Interactive programs provide application-specific connections to the plumber,
			
 
				-triggering with minimal user action the transfer of data or control to other programs.
			
 
				-The result is similar to a hypertext system in which all the links are implicit,
			
 
				-extracted automatically by examining the data and the user's actions.
			
 
				-It obviates
			
 
				-cut and paste and other such hand-driven interprocess communication mechanisms.
			
 
				-Plumbing delivers the goods to the right place automatically.
			
 
				-<H4>Overview
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The plumber is implemented as a Plan 9 file server [Pike93];
			
 
				-programs send messages by writing them to the plumber's file
			
 
				-<TT>/mnt/plumb/send</TT>,
			
 
				-and receive messages by reading them from
			
 
				-<I>ports</I>,
			
 
				-which are other plumber files in
			
 
				-<TT>/mnt/plumb</TT>.
			
 
				-For example,
			
 
				-<TT>/mnt/plumb/edit</TT>
			
 
				-is by convention the file from which a text editor reads messages requesting it to
			
 
				-open and display a file for editing.
			
 
				-(See Figure 1.)
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.19117390.gif"><br>
			
 
				-</PRE></TT></DL>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-Figure 1. The plumber controls the flow of messages between applications.
			
 
				-Programs write to the file
			
 
				-<TT>send</TT>
			
 
				-and receive on `ports' of various names representing services such as
			
 
				-<TT>edit</TT>
			
 
				-or
			
 
				-<TT>web</TT>.
			
 
				-Although the figure doesn't illustrate it, some programs may both send and receive messages,
			
 
				-and some ports are read by multiple applications.
			
 
				-<br>&#32;<br>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-The plumber takes messages from the
			
 
				-<TT>send</TT>
			
 
				-file and interprets their contents using rules defined by
			
 
				-a special-purpose pattern-action language.
			
 
				-The language specifies any rewriting of the message that is to be done by the plumber
			
 
				-and defines how to dispose of a message, such as by sending it to a port or
			
 
				-starting a new process to handle it.
			
 
				-<br>&#32;<br>
			
 
				-The behavior is best described by example.
			
 
				-Imagine that the user has, in a terminal emulator window,
			
 
				-just run a compilation that has failed:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% make
			
 
				-cc -c rmstar.c
			
 
				-rmstar.c:32: syntax error
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-The user points the typing cursor somewhere in the string
			
 
				-<TT>rmstar.c:32:</TT>
			
 
				-and executes the
			
 
				-<TT>plumb</TT>
			
 
				-menu entry.
			
 
				-This causes the terminal emulator to format a plumbing message
			
 
				-containing the entire string surrounding the cursor,
			
 
				-<TT>rmstar:32:</TT>,
			
 
				-and to write it to
			
 
				-<TT>/mnt/plumb/send</TT>.
			
 
				-The plumber receives this message and compares it sequentially to the various
			
 
				-patterns in its configuration.
			
 
				-Eventually, it will find one that breaks the string into pieces,
			
 
				-<TT>rmstar.c</TT>,
			
 
				-a colon,
			
 
				-<TT>32</TT>,
			
 
				-and the final colon.
			
 
				-Other associated patterns verify that
			
 
				-<TT>rmstar.c</TT>
			
 
				-is a file in the current directory of the program generating
			
 
				-the message, and that
			
 
				-<TT>32</TT>
			
 
				-looks like a line number within it.
			
 
				-The plumber rewrites the message,
			
 
				-setting the data to the string
			
 
				-<TT>rmstar.c</TT>
			
 
				-and attaching an indication that
			
 
				-<TT>32</TT>
			
 
				-is a line number to display.
			
 
				-Finally, it sends the resulting message to the
			
 
				-<TT>edit</TT>
			
 
				-port.
			
 
				-The text editor picks up the message, opens
			
 
				-<TT>rmstar.c</TT>
			
 
				-(if it's not already open) and highlights line 32, the location of the syntax error.
			
 
				-<br>&#32;<br>
			
 
				-From the user's point of view, this process is simple: the error message appears,
			
 
				-it is `plumbed', and the editor jumps to the problem.
			
 
				-<br>&#32;<br>
			
 
				-Of course, there are many different ways to cause compiler messages to
			
 
				-pop up the source of an error,
			
 
				-but the design of the plumber addresses more general issues than the specific
			
 
				-goal of shortening the compile/debug/edit cycle.
			
 
				-It facilitates the general exchange of data among programs, interactive or otherwise,
			
 
				-throughout the environment, and its
			
 
				-architecture&#173;a central, language-driven file server&#173;although
			
 
				-unusual, has distinct advantages.
			
 
				-It makes plumbing easy to add to an existing, Unix-like command environment;
			
 
				-it guarantees uniform handling of inter-application messages;
			
 
				-it off-loads from those applications most of the work of extracting and dispatching messages;
			
 
				-and it works transparently and effortlessly across a network.
			
 
				-<br>&#32;<br>
			
 
				-This paper is organized bottom-up, beginning with the format of the messages
			
 
				-and proceeding through the plumbing language, the handling of messages,
			
 
				-and the interactive user interface.
			
 
				-The last sections discuss the implications of the design
			
 
				-and compare the plumbing system to other environments that
			
 
				-provide similar services.
			
 
				-<H4>Format of messages
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Since the language that controls the plumber is defined in terms of the
			
 
				-contents of plumbing messages, we begin by describing their layout.
			
 
				-<br>&#32;<br>
			
 
				-Plumbing messages have a fixed-format textual
			
 
				-header followed by a free-format data section.
			
 
				-The header consists of six lines of text, in set order,
			
 
				-each specifying a property of the message.
			
 
				-Any line may be blank except the last, which is the length of the data portion of the
			
 
				-message, as a decimal string.
			
 
				-The lines are, in order:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The source application, the name of the program generating the message.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The destination port, the name of the port to which the messages should be sent.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The working directory in which the message was generated.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The type of the data, analogous to a MIME type, such as
			
 
				-<TT>text</TT>
			
 
				-or
			
 
				-<TT>image/gif</TT>.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-Attributes of the message, given as blank-separated
			
 
				-<I>name</I><TT>=</TT><I>value</I><TT>
			
 
				-pairs.
			
 
				-The values may be quoted to protect
			
 
				-blanks or quotes; values may not contain newlines.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The length of the data section, in bytes.
			
 
				-</dl>
			
 
				-</TT><br>&#32;<br>
			
 
				-Here is a sample message, one that (conventionally) tells the editor to open the file
			
 
				-<TT>/usr/rob/src/mem.c</TT>
			
 
				-and display line
			
 
				-27 within it:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-plumbtest
			
 
				-edit
			
 
				-/usr/rob/src
			
 
				-text
			
 
				-addr=27
			
 
				-5
			
 
				-mem.c
			
 
				-</PRE></TT></DL>
			
 
				-Because in general it need not be text, the data section of the message has no terminating newline.
			
 
				-<br>&#32;<br>
			
 
				-A library interface simplifies the processing of messages by translating them
			
 
				-to and from a data structure,
			
 
				-<TT>Plumbmsg</TT>,
			
 
				-defined like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef struct Plumbattr Plumbattr;
			
 
				-typedef struct Plumbmsg  Plumbmsg;
			
 
				-
			
 
				-struct Plumbmsg
			
 
				-{
			
 
				-	char			*src;		/* source application */
			
 
				-	char			*dst;		/* destination port */
			
 
				-	char			*wdir;	/* working directory */
			
 
				-	char			*type;	/* type of data */
			
 
				-	Plumbattr	*attr;	/* attribute list */
			
 
				-	int			ndata;	/* #bytes of data */
			
 
				-	char			*data;
			
 
				-};
			
 
				-
			
 
				-struct Plumbattr
			
 
				-{
			
 
				-	char			*name;
			
 
				-	char			*value;
			
 
				-	Plumbattr	*next;
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-The library also includes routines to send a message, receive a message,
			
 
				-manipulate the attribute list, and so on.
			
 
				-<H4>The Language
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-An instance of the plumber runs for each user on each terminal or workstation.
			
 
				-It
			
 
				-begins by reading its rules from the file
			
 
				-<TT>lib/plumbing</TT>
			
 
				-in the user's home directory,
			
 
				-which in turn may use
			
 
				-<TT>include</TT>
			
 
				-statements to interpolate macro definitions and
			
 
				-rules from standard plumbing rule libraries stored in
			
 
				-<TT>/sys/lib/plumb</TT>.
			
 
				-<br>&#32;<br>
			
 
				-The rules control the processing of messages.
			
 
				-They are written in
			
 
				-a pattern-action language comprising a sequence of blank-line-separated
			
 
				-<I>rule</I>
			
 
				-<I>sets</I>,
			
 
				-each of which contains one or more
			
 
				-<I>patterns</I>
			
 
				-followed by one or more
			
 
				-<I>actions</I>.
			
 
				-Each incoming message is compared against the rule sets in order.
			
 
				-If all the patterns within a rule set succeed,
			
 
				-one of the associated actions is taken and processing completes.
			
 
				-<br>&#32;<br>
			
 
				-The syntax of the language is straightforward.
			
 
				-Each rule (pattern or action) has three components, separated by white space:
			
 
				-an
			
 
				-<I>object</I>,
			
 
				-a
			
 
				-<I>verb</I>,
			
 
				-and optional
			
 
				-<I>arguments</I>.
			
 
				-The object
			
 
				-identifies a part of the message, such as
			
 
				-the source application
			
 
				-(<TT>src</TT>),
			
 
				-or the data
			
 
				-portion of the message
			
 
				-(<TT>data</TT>),
			
 
				-or the rule's own arguments
			
 
				-(<TT>arg</TT>);
			
 
				-or it is the keyword
			
 
				-<TT>plumb</TT>,
			
 
				-which introduces an action.
			
 
				-The verb specifies an operation to perform on the object, such as the word
			
 
				-`<TT>is</TT>'
			
 
				-to require precise equality between the object and the argument, or
			
 
				-`<TT>isdir</TT>'
			
 
				-to require that the object be the name of a directory.
			
 
				-<br>&#32;<br>
			
 
				-For instance, this rule set sends messages containing the names of files
			
 
				-ending in
			
 
				-<TT>.gif</TT>,
			
 
				-<TT>.jpg</TT>,
			
 
				-etc. to a program,
			
 
				-<TT>page</TT>,
			
 
				-to display them; it is analogous to a Windows association rule:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# image files go to page
			
 
				-type is text
			
 
				-data matches '[a-zA-Z0-9_\-./]+'
			
 
				-data matches '([a-zA-Z0-9_\-./]+)\.(jpe?g|gif|bit|tiff|ppm)'
			
 
				-arg isfile <I>0
			
 
				-plumb to image
			
 
				-plumb client page -wi
			
 
				-</PRE></TT></DL>
			
 
				-(Lines beginning with
			
 
				-</I><TT>#</TT><I>
			
 
				-are commentary.)
			
 
				-Consider how this rule handles the following message, annotated down the left column for clarity:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><I>src</I><I>	plumbtest
			
 
				-</I><I>dst</I><I>
			
 
				-</I><I>wdir</I><I>	/usr/rob/pics
			
 
				-</I><I>type</I><I>	text
			
 
				-</I><I>attr</I><I>
			
 
				-</I><I>ndata</I><I>	9
			
 
				-</I><I>data</I><I>	horse.gif
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</I><TT>is</TT><I>
			
 
				-verb specifies a precise match, and the
			
 
				-</I><TT>type</TT><I>
			
 
				-field of the message is the string
			
 
				-</I><TT>text</TT><I>,
			
 
				-so the first pattern succeeds.
			
 
				-The
			
 
				-</I><TT>matches</TT><I>
			
 
				-verb invokes a regular expression pattern match of the object (here
			
 
				-</I><TT>data</TT><I>)
			
 
				-against the argument pattern.
			
 
				-Both
			
 
				-</I><TT>matches</TT><I>
			
 
				-patterns in this rule set will succeed, and in the process set the variables
			
 
				-</I><TT></TT><I>0</I><TT>
			
 
				-to the matched string,
			
 
				-</TT><TT></TT><I>1</I><TT>
			
 
				-to the first parenthesized submatch, and so on (analogous to
			
 
				-</TT><TT>&</TT><TT>,
			
 
				-</TT><TT>\1</TT><TT>,
			
 
				-etc. in
			
 
				-</TT><TT>ed</TT><TT>'s
			
 
				-regular expressions).
			
 
				-The pattern
			
 
				-</TT><TT>arg</TT><TT>
			
 
				-</TT><TT>isfile</TT><TT>
			
 
				-</TT><TT></TT><TT>0</TT><TT>
			
 
				-verifies that the named file,
			
 
				-</TT><TT>horse.gif</TT><TT>,
			
 
				-is an actual file in the directory
			
 
				-</TT><TT>/usr/rob/pics</TT><TT>.
			
 
				-If all the patterns succeed, one of the actions will be executed.
			
 
				-</TT><br>&#32;<br>
			
 
				-There are two actions in this rule set.
			
 
				-The
			
 
				-<TT>plumb</TT>
			
 
				-<TT>to</TT>
			
 
				-rule specifies
			
 
				-<TT>image</TT>
			
 
				-as the destination port of the message.
			
 
				-By convention, the plumber mounts its services in the directory
			
 
				-<TT>/mnt/plumb</TT>,
			
 
				-so in this case if the file
			
 
				-<TT>/mnt/plumb/image</TT>
			
 
				-has been opened, the message will be made available to the program reading from it.
			
 
				-Note that the message does not name a port, but the rule set that matches
			
 
				-the message does, and that is sufficient to dispatch the message.
			
 
				-If on the other hand a message matches no rule but has an explicit port mentioned,
			
 
				-that too is sufficient.
			
 
				-<br>&#32;<br>
			
 
				-If no client has opened the
			
 
				-<TT>image</TT>
			
 
				-port,
			
 
				-that is, if the program
			
 
				-<TT>page</TT>
			
 
				-is not already running, the
			
 
				-<TT>plumb</TT>
			
 
				-<TT>client</TT>
			
 
				-action gives the execution script to start the application
			
 
				-and send the message on its way; the
			
 
				-<TT>-wi</TT>
			
 
				-arguments tell
			
 
				-<TT>page</TT>
			
 
				-to create a window and to receive its initial arguments from the plumbing port.
			
 
				-The process by which the plumber starts a program is described in more detail in the next section.
			
 
				-<br>&#32;<br>
			
 
				-It may seem odd that there are two
			
 
				-<TT>matches</TT>
			
 
				-rules in this example.
			
 
				-The reason is related to the way the plumber can use the rules themselves
			
 
				-to refine the
			
 
				-<I>data</I>
			
 
				-in the message, somewhat in the manner of Structural Regular Expressions [Pike87a].
			
 
				-For example, consider what happens if the cursor is at the last character of
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% make nightmare&gt;horse.gif
			
 
				-</PRE></TT></DL>
			
 
				-and the user asks to plumb what the cursor is pointing at.
			
 
				-The program creating the plumbing
			
 
				-message&#173;in this case the terminal emulator running the window&#173;can send the
			
 
				-entire white-space-delimited string
			
 
				-<TT>nightmare&gt;horse.gif</TT>
			
 
				-or even the entire line, and the combination of
			
 
				-<TT>matches</TT>
			
 
				-rules can determine that the user was referring to the string
			
 
				-<TT>horse.gif</TT>.
			
 
				-The user could of course select the entire string
			
 
				-<TT>horse.gif</TT>,
			
 
				-but it's more convenient just to point in the general location and let the machine
			
 
				-figure out what should be done.
			
 
				-The process is as follows.
			
 
				-<br>&#32;<br>
			
 
				-The application generating the message adds a special attribute to the message, named
			
 
				-<TT>click</TT>,
			
 
				-whose numerical value is the offset of the cursor&#173;the selection point&#173;within the data string.
			
 
				-This attribute tells the plumber two things:
			
 
				-first, that the regular expressions in
			
 
				-<TT>matches</TT>
			
 
				-rules should be used to identify the relevant data;
			
 
				-and second, approximately where the relevant data lies.
			
 
				-The plumber 
			
 
				-will then use the first
			
 
				-<TT>matches</TT>
			
 
				-pattern to identify the longest leftmost match that touches the cursor, which will extract the string
			
 
				-<TT>horse.gif</TT>,
			
 
				-and the second pattern will then verify that that names a picture file.
			
 
				-The rule set succeeds and the data is winnowed to the matching substring
			
 
				-before being sent to its destination.
			
 
				-<br>&#32;<br>
			
 
				-Each
			
 
				-<TT>matches</TT>
			
 
				-pattern within a given rule set must match the same portion of the string, which
			
 
				-guarantees that the rule set fails to match a string for which the
			
 
				-second pattern matches only a portion.
			
 
				-For instance, our example rule set should not execute if the data is the string
			
 
				-<TT>horse.gift</TT>,
			
 
				-and although the first pattern will match
			
 
				-<TT>horse.gift</TT>,
			
 
				-the second will match only
			
 
				-<TT>horse.gif</TT>
			
 
				-and the rule set will fail.
			
 
				-<br>&#32;<br>
			
 
				-The same approach of multiple
			
 
				-<TT>matches</TT>
			
 
				-rules can be used to exclude, for instance, a terminal period from
			
 
				-a file name or URL, so a file name or URL at the end of a sentence is recognized properly.
			
 
				-<br>&#32;<br>
			
 
				-If a
			
 
				-<TT>click</TT>
			
 
				-attribute is not specified, all patterns must match the entire string,
			
 
				-so the user has an option:
			
 
				-he or she may select exactly what data to send,
			
 
				-or may instead indicate where the data is by clicking the selection button on the mouse
			
 
				-and letting the machine locate the URL or image file name within the text.
			
 
				-In other words,
			
 
				-the user can control the contents of the message precisely when required,
			
 
				-but the default, simplest action in the user interface does the right thing most of the time.
			
 
				-<H4>How Messages are Handled in the Plumber
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-An application creates a message header, fills in whatever fields it wishes to define,
			
 
				-attaches the data, and writes the result to the file
			
 
				-<TT>send</TT>
			
 
				-in the plumber's service directory,
			
 
				-<TT>/mnt/plumb</TT>.
			
 
				-The plumber receives the message and applies the plumbing rules successively to it.
			
 
				-When a rule set matches, the message is dispatched as indicated by that rule set
			
 
				-and processing continues with the next message.
			
 
				-If no rule set matches the message, the plumber indicates this by returning a write
			
 
				-error to the application, that is, the write to
			
 
				-<TT>/mnt/plumb/send</TT>
			
 
				-fails, with the resulting error string
			
 
				-describing the failure.
			
 
				-(Plan 9 uses strings rather than pre-defined numbers to describe error conditions.)
			
 
				-Thus a program can discover whether a plumbing message has been sent successfully.
			
 
				-<br>&#32;<br>
			
 
				-After a matching rule set has been identified, the plumber applies a series of rewriting
			
 
				-steps to the message.  Some rewritings are defined by the rule set; others are implicit.
			
 
				-For example, if the message does not specify a destination port, the outgoing message
			
 
				-will be rewritten to identify it.
			
 
				-If the message does specify the port, the rule set will only match if any
			
 
				-<TT>plumb</TT>
			
 
				-<TT>to</TT>
			
 
				-action in the rule set names the same port.
			
 
				-(If it matches no rule sets, but mentions a port, it will be sent there unmodified.)
			
 
				-<br>&#32;<br>
			
 
				-The rule set may contain actions that explicitly rewrite components of the message.
			
 
				-These may modify the attribute list or replace the data section of the message.
			
 
				-Here is a sample rule set that does both.
			
 
				-It matches strings of the form
			
 
				-<TT>plumb.h</TT>
			
 
				-or
			
 
				-<TT>plumb.h:27</TT>.
			
 
				-If that string identifies a file in the standard C include directory,
			
 
				-<TT>/sys/include</TT>,
			
 
				-perhaps with an optional line number, the outgoing message
			
 
				-is rewritten to contain the full path name and an attribute,
			
 
				-<TT>addr</TT>,
			
 
				-to hold the line number:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# .h files are looked up in /sys/include and passed to edit
			
 
				-type is text
			
 
				-data matches '([a-zA-Z0-9]+\.h)(:([0-9]+))?'
			
 
				-arg isfile /sys/include/<I>1
			
 
				-data set /sys/include/</I>1
			
 
				-attr add addr=<I>3
			
 
				-plumb to edit
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</I><TT>data</TT><I>
			
 
				-</I><TT>set</TT><I>
			
 
				-rule replaces the contents of the data, and the
			
 
				-</I><TT>attr</TT><I>
			
 
				-</I><TT>add</TT><I>
			
 
				-rule adds a new attribute to the message.
			
 
				-The intent of this rule is to permit one to plumb an include file name in a C program
			
 
				-to trigger the opening of that file, perhaps at a specified line, in the text editor.
			
 
				-A variant of this rule, discussed below,
			
 
				-tells the editor how to interpret syntax errors from the compiler,
			
 
				-or the output of
			
 
				-</I><TT>grep</TT><I>
			
 
				-</I><TT>-n</TT><I>,
			
 
				-both of which use a fixed syntax
			
 
				-</I><I>file</I><TT>:</TT><I>line</I><TT>
			
 
				-to identify a line of source.
			
 
				-</TT><br>&#32;<br>
			
 
				-The Plan 9 text editors interpret the
			
 
				-<TT>addr</TT>
			
 
				-attribute as the definition of which portion of the file to display.
			
 
				-In fact, the real rule includes a richer definition of the address syntax,
			
 
				-so one may plumb strings such as
			
 
				-<TT>plumb.h:/plumbsend</TT>
			
 
				-(using a regular expression after the
			
 
				-<TT>/</TT>)
			
 
				-to pop up the declaration of a function in a C header file.
			
 
				-<br>&#32;<br>
			
 
				-Another form of rewriting is that the plumber may modify the attribute list of
			
 
				-the message to clarify how to handle the message.
			
 
				-The primary example of this involves the treatment of the
			
 
				-<TT>click</TT>
			
 
				-attribute, described in the previous section.
			
 
				-If the message contains a
			
 
				-<TT>click</TT>
			
 
				-attribute and the matching rule set uses it to extract the matching substring from the data,
			
 
				-the plumber
			
 
				-deletes the
			
 
				-<TT>click</TT>
			
 
				-attribute and replaces the data with the matching substring.
			
 
				-<br>&#32;<br>
			
 
				-Once the message is rewritten, the actions of the matching rule set are examined.
			
 
				-If the rule set contains a
			
 
				-<TT>plumb</TT>
			
 
				-<TT>to</TT>
			
 
				-action and the corresponding port is open&#173;that is, if a program is already reading
			
 
				-from that port&#173;the message is delivered to the port.
			
 
				-The application will receive the message and handle it as it sees fit.
			
 
				-If the port is not open, a
			
 
				-<TT>plumb</TT>
			
 
				-<TT>start</TT>
			
 
				-or
			
 
				-<TT>plumb</TT>
			
 
				-<TT>client</TT>
			
 
				-action will start a new program to handle the message.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>plumb</TT>
			
 
				-<TT>start</TT>
			
 
				-action is the simpler: its argument specifies a command to run
			
 
				-instead of passing on the message; the message is discarded.
			
 
				-Here for instance is a rule that, given the process id (pid) of an existing process,
			
 
				-starts the
			
 
				-<TT>acid</TT>
			
 
				-debugger [Wint94] in a new window to examine that process:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# processes go to acid (assuming strlen(pid) &gt;= 2)
			
 
				-type is text
			
 
				-data matches '[a-zA-Z0-9.:_\-/]+'
			
 
				-data matches '[0-9][0-9]+'
			
 
				-arg isdir /proc/0
			
 
				-plumb start window acid <I>0
			
 
				-</PRE></TT></DL>
			
 
				-(Note the use of multiple
			
 
				-</I><TT>matches</TT><I>
			
 
				-rules to avoid misfires from strings like
			
 
				-</I><TT>party.1999</TT><I>.)
			
 
				-The
			
 
				-</I><TT>arg</TT><I>
			
 
				-</I><TT>isdir</TT><I>
			
 
				-rule checks that the pid represents a running process (or broken one; Plan 9 does not create
			
 
				-</I><TT>core</TT><I>
			
 
				-files but leaves broken processes around for debugging) by checking that the process file
			
 
				-system has a directory for that pid [Kill84].
			
 
				-Using this rule, one may plumb the pid string printed by the
			
 
				-</I><TT>ps</TT><I>
			
 
				-command or by the operating system when the program breaks;
			
 
				-the debugger will then start automatically.
			
 
				-</I><br>&#32;<br>
			
 
				-The other startup action,
			
 
				-<TT>plumb</TT>
			
 
				-<TT>client</TT>,
			
 
				-is used when a program will read messages from the plumbing port.
			
 
				-For example,
			
 
				-text editors can read files specified as command arguments, so one could use a
			
 
				-<TT>plumb</TT>
			
 
				-<TT>start</TT>
			
 
				-rule to begin editing a file.
			
 
				-If, however, the editor will read messages from the
			
 
				-<TT>edit</TT>
			
 
				-plumbing port, letting it read the message
			
 
				-from the port insures that it uses other information in the message,
			
 
				-such as the line number to display.
			
 
				-The
			
 
				-<TT>plumb</TT>
			
 
				-<TT>client</TT>
			
 
				-action is therefore like
			
 
				-<TT>plumb</TT>
			
 
				-<TT>start</TT>,
			
 
				-but keeps the message around for delivery when the application opens the port.
			
 
				-Here is the full rule set to pass a regular file to the text editor:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# existing files, possibly tagged by address, go to editor
			
 
				-type is text
			
 
				-data matches '([.a-zA-Z0-9_/\-]*[a-zA-Z0-9_/\-])('addr')?'
			
 
				-arg isfile <I>1
			
 
				-data set </I>1
			
 
				-attr add addr=<I>3
			
 
				-plumb to edit
			
 
				-plumb client window </I>editor
			
 
				-</PRE></TT></DL>
			
 
				-If the editor is already running, the
			
 
				-<TT>plumb</TT>
			
 
				-<TT>to</TT>
			
 
				-rule causes it to receive the message on the port.
			
 
				-If not,
			
 
				-the command
			
 
				-`<TT>window</TT>
			
 
				-<TT></TT><I>editor</I><TT>'
			
 
				-will create a new window (using the Plan 9 program
			
 
				-</TT><TT>window</TT><TT>)
			
 
				-to run the editor, and once that starts it will open the
			
 
				-</TT><TT>edit</TT><TT>
			
 
				-plumbing port as usual and discover this first message already waiting.
			
 
				-</TT><br>&#32;<br>
			
 
				-The variables
			
 
				-<TT></TT>editor<TT>
			
 
				-and
			
 
				-</TT><TT></TT><I>addr</I><TT>
			
 
				-in this rule set
			
 
				-are macros defined in the plumbing rules file; they specify the name of the user's favorite text editor
			
 
				-and a regular expression
			
 
				-that matches that editor's address syntax, such as line numbers and patterns.
			
 
				-This rule set lives in a library of shared plumbing rules that
			
 
				-users' private rules can build on,
			
 
				-so the rule set needs to be adaptable to different editors and their address syntax.
			
 
				-The macro definitions for Acme and Sam [Pike94,Pike87b] look like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-editor=acme
			
 
				-# or editor=sam
			
 
				-addrelem='((#?[0-9]+)|(/[A-Za-z0-9_\^]+/?)|[.</TT>])'
			
 
				-addr=:(<I>addrelem([,;+\-]</I>addrelem)*)
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-Finally, the application reads the message from the appropriate port, such as
			
 
				-<TT>/mnt/plumb/edit</TT>,
			
 
				-unpacks it, and goes to work.
			
 
				-<H4>Message Delivery
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In summary, a message is delivered by writing it to the
			
 
				-<TT>send</TT>
			
 
				-file and having the plumber, perhaps after some rewriting, send it to the destination
			
 
				-port or start a new application to handle it.
			
 
				-If no destination can be found by the plumber, the original write to the
			
 
				-<TT>send</TT>
			
 
				-file will fail, and the application will know the message could not be delivered.
			
 
				-<br>&#32;<br>
			
 
				-If multiple applications are reading from the destination port, each will receive
			
 
				-an identical copy of the message; that is, the plumber implements fan-out.
			
 
				-The number of messages delivered is equal to the number of clients that have
			
 
				-opened the destination port.
			
 
				-The plumber queues the messages and makes sure that each application that opened
			
 
				-the port before the message was written gets exactly one copy.
			
 
				-<br>&#32;<br>
			
 
				-This design minimizes blocking in the sending applications, since the write to the
			
 
				-<TT>send</TT>
			
 
				-file can complete as soon as the message has been queued for the appropriate port.
			
 
				-If the plumber waited for the message to be read by the recipient, the sender could
			
 
				-block unnecessarily.
			
 
				-Unfortunately, this design also means that there is no way for a sender to know when
			
 
				-the message has been handled; in fact, there are cases when
			
 
				-the message will not be delivered at all, such as if the recipient exits while there are
			
 
				-still messages in the queue.
			
 
				-Since the plumber is part of a user interface, and not
			
 
				-an autonomous message delivery system,
			
 
				-the decision was made to give the
			
 
				-non-blocking property priority over reliability of message delivery.
			
 
				-In practice, this tradeoff has worked out well:
			
 
				-applications almost always know when a message has failed to be delivered (the
			
 
				-<TT>write</TT>
			
 
				-fails because no destination could be found),
			
 
				-and those occasions when the sender believes incorrectly that the message has been delivered
			
 
				-are both extremely rare and easily recognized by the user&#173;usually because the recipient
			
 
				-application has exited.
			
 
				-<H4>The Rules File
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The plumber begins execution by reading the user's startup plumbing rules file,
			
 
				-<TT>lib/plumbing</TT>.
			
 
				-Since the plumber is implemented as a file server, it can also present its current rules
			
 
				-as a dynamic file, a design that provides an easily understood way to maintain the rules.
			
 
				-<br>&#32;<br>
			
 
				-The file
			
 
				-<TT>/mnt/plumb/rules</TT>
			
 
				-is the text of the rule set the plumber is currently using,
			
 
				-and it may be edited like a regular file to update those rules.
			
 
				-To clear the rules, truncate that file;
			
 
				-to add a new rule set, append to it:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% echo 'type is text
			
 
				-data is self-destruct
			
 
				-plumb start rm -rf <I>HOME' &gt;&gt; /mnt/plumb/rules
			
 
				-</PRE></TT></DL>
			
 
				-This rule set will take effect immediately.
			
 
				-If it has a syntax error, the write will fail with an error message from the plumber,
			
 
				-such as `malformed rule' or 'undefined verb'.
			
 
				-</I><br>&#32;<br>
			
 
				-To restore the plumber to its startup configuration,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cp /usr/user/lib/plumbing /mnt/plumb/rules
			
 
				-</PRE></TT></DL>
			
 
				-For more sophisticated changes,
			
 
				-one can of course use a regular text editor to modify
			
 
				-<TT>/mnt/plumb/rules</TT>.
			
 
				-<br>&#32;<br>
			
 
				-This simple way of maintaining an active service could profitably be adopted by other systems.
			
 
				-It avoids the need to reboot, to update registries with special tools, or to send asynchronous signals
			
 
				-to critical programs.
			
 
				-<H4>The User Interface
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-One unusual property of the plumbing system is that
			
 
				-the user interface that programs provide to access it can vary considerably, yet
			
 
				-the result is nonetheless a unifying force in the environment.
			
 
				-Shells talk to editors, image viewers, and web browsers; debuggers talk to editors;
			
 
				-editors talk to themselves; and the window system talks to everybody.
			
 
				-<br>&#32;<br>
			
 
				-The plumber grew out of some of the ideas of the Acme editor/window-system/user interface [Pike94],
			
 
				-in particular its `acquisition' feature.
			
 
				-With a three-button mouse, clicking the right button in Acme on a piece of text tells Acme to
			
 
				-get the thing being pointed to.
			
 
				-If it is a file name, open the file;
			
 
				-if it is a directory, open a viewer for its contents;
			
 
				-if a line number, go to that line;
			
 
				-if a regular expression, search for it.
			
 
				-This one-click access to anything describable textually was very powerful but had several
			
 
				-limitations, of which the most important were that Acme's rules for interpreting the
			
 
				-text (that is, the implicit hyperlinks) were hard-wired and inflexible, and
			
 
				-that they only applied to and within Acme itself.
			
 
				-One could not, for example, use Acme's power to open an image file, since Acme is
			
 
				-a text-only system.
			
 
				-<br>&#32;<br>
			
 
				-The plumber addresses these limitations, even with Acme itself:
			
 
				-Acme now uses the plumber to interpret the right button clicks for it.
			
 
				-When the right button is clicked on some text,
			
 
				-Acme constructs a plumbing message much as described above,
			
 
				-using the
			
 
				-<TT>click</TT>
			
 
				-attribute and the white-space-delimited text surrounding the click.
			
 
				-It then writes the message to the plumber; if the write succeeds, all is well.
			
 
				-If not, it falls back to its original, internal rules, which will result in a context search
			
 
				-for the word within the current document.
			
 
				-<br>&#32;<br>
			
 
				-If the message is sent successfully, the recipient is likely to be Acme itself, of course:
			
 
				-the request may be to open a file, for example.
			
 
				-Thus Acme has turned the plumber into an external component of its own operation,
			
 
				-while expanding the possibilities; the operation might be to start an image viewer to
			
 
				-open a picture file, something Acme cannot do itself.
			
 
				-The plumber expands the power of Acme's original user interface.
			
 
				-<br>&#32;<br>
			
 
				-Traditional menu-driven programs such as the text editor Sam [Pike87b] and the default
			
 
				-shell window of the window
			
 
				-system
			
 
				-<TT>8&#189;</TT>
			
 
				-[Pike91] cannot dedicate a mouse button solely to plumbing, but they can certainly
			
 
				-dedicate a menu entry.
			
 
				-The editing menu for such programs now contains an entry,
			
 
				-<TT>plumb</TT>,
			
 
				-that creates a plumbing message using the current selection.
			
 
				-(Acme manages to send a message by clicking on the text with one button;
			
 
				-other programs require a click with the select button and then a menu operation.)
			
 
				-For example, after this happens in a shell window:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% make
			
 
				-cc -c shaney.c
			
 
				-shaney.c:232: i undefined
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-one can click anywhere on the string
			
 
				-<TT>shaney.c:232</TT>,
			
 
				-execute the
			
 
				-<TT>plumb</TT>
			
 
				-menu entry, and have line 232 appear in the text editor, be it Sam or Acme&#173;whichever has the
			
 
				-<TT>edit</TT>
			
 
				-port open.
			
 
				-(If this were an Acme shell window, it would be sufficient to right-click on the string.)
			
 
				-<br>&#32;<br>
			
 
				-[An interesting side line is how the window system knows what directory the
			
 
				-shell is running in; in other words, what value to place in the
			
 
				-<TT>wdir</TT>
			
 
				-field of the plumb message.
			
 
				-Recall that
			
 
				-<TT>8&#189;</TT>
			
 
				-is, like many Plan 9 programs, a file server.
			
 
				-It now serves a new file,
			
 
				-<TT>/dev/wdir</TT>,
			
 
				-that is private to each window.
			
 
				-Programs, in particular the
			
 
				-Plan 9 shell,
			
 
				-<TT>rc</TT>,
			
 
				-can write that file to inform the window system of its current directory.
			
 
				-When a
			
 
				-<TT>cd</TT>
			
 
				-command is executed in an interactive shell,
			
 
				-<TT>rc</TT>
			
 
				-updates the contents of
			
 
				-<TT>/dev/wdir</TT>
			
 
				-and plumbing can proceed with local file names.]
			
 
				-<br>&#32;<br>
			
 
				-Of course, users can plumb image file names, process ids, URLs, and other items&#173;any string
			
 
				-whose syntax and disposition are defined in the plumbing rules file.
			
 
				-An example of how the pieces fit together is the way Plan 9 now handles mail, particularly
			
 
				-MIME-encoded messages.
			
 
				-<br>&#32;<br>
			
 
				-When a new mail message arrives, the mail receiver process sends a plumbing message to the
			
 
				-<TT>newmail</TT>
			
 
				-port, which notifies any interested process that new mail is here.
			
 
				-The plumbing message contains information about the mail, including
			
 
				-its sender, date, and current location in the file system.
			
 
				-The interested processes include a program,
			
 
				-<TT>faces</TT>,
			
 
				-that gives a graphical display of the mail box using
			
 
				-faces to represent the senders of messages [PiPr85],
			
 
				-as well as interactive mail programs such as the Acme mail viewer [Pike94].
			
 
				-The user can then click on the face that appears, and the
			
 
				-<TT>faces</TT>
			
 
				-program will send another plumbing message, this time to the
			
 
				-<TT>showmail</TT>
			
 
				-port.
			
 
				-Here is the rule for that port:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# faces -&gt; new mail window for message
			
 
				-type is text
			
 
				-data matches '[a-zA-Z0-9_\-./]+'
			
 
				-data matches '/mail/fs/[a-zA-Z0-9/]+/[0-9]+'
			
 
				-plumb to showmail
			
 
				-plumb start window edmail -s <I>0
			
 
				-</PRE></TT></DL>
			
 
				-If a program, such as the Acme mail reader, is reading that port, it will open a new window
			
 
				-in which to display the message.
			
 
				-If not, the
			
 
				-</I><TT>plumb</TT><I>
			
 
				-</I><TT>start</TT><I>
			
 
				-rule will create a new window and run
			
 
				-</I><TT>edmail</TT><I>,
			
 
				-a conventional mail reading process, to examine it.
			
 
				-Notice how the plumbing connects the components of the interface together the same way
			
 
				-regardless of which components are actually being used to view mail.
			
 
				-</I><br>&#32;<br>
			
 
				-There is more to the mail story.
			
 
				-Naturally, mail boxes in Plan 9 are treated as little file systems, which are synthesized
			
 
				-on demand by a special-purpose file server that takes a flat mail box file and converts
			
 
				-it into a set of directories, one per message, with component files containing the header,
			
 
				-body, MIME information, and so on.
			
 
				-Multi-part MIME messages are unpacked into multi-level directories, like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% ls -l /mail/fs/mbox/25
			
 
				-d-r-xr-xr-x M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/1
			
 
				-d-r-xr-xr-x M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/2
			
 
				---r--r--r-- M 20 rob rob 28678 Nov 21 13:06 /mail/fs/mbox/25/body
			
 
				---r--r--r-- M 20 rob rob     0 Nov 21 13:06 /mail/fs/mbox/25/cc
			
 
				-...
			
 
				-% mail
			
 
				-25 messages
			
 
				-: 25
			
 
				-From: presotto
			
 
				-Date: Sun Nov 21 13:05:51 EST 1999
			
 
				-To: rob
			
 
				-
			
 
				-Check this out.
			
 
				-
			
 
				-===&gt; 2/ (image/jpeg) [inline]
			
 
				-	/mail/fs/mbox/25/2/fabio.jpg
			
 
				-:
			
 
				-</PRE></TT></DL>
			
 
				-Since the components are all (synthetic) files, the user can plumb the pieces
			
 
				-to view embedded pictures, URLs, and so on.
			
 
				-Note that the mail program can plumb the contents of
			
 
				-<TT>inline</TT>
			
 
				-attachments automatically, without user interaction;
			
 
				-in other words, plumbing lets the mailer handle multimedia data
			
 
				-without itself interpreting it.
			
 
				-<br>&#32;<br>
			
 
				-At a more mundane level, a shell command,
			
 
				-<TT>plumb</TT>,
			
 
				-can be used to send messages:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% cd /usr/rob/src
			
 
				-% plumb mem.c
			
 
				-</PRE></TT></DL>
			
 
				-will send the appropriate message to the
			
 
				-<TT>edit</TT>
			
 
				-port.
			
 
				-A surprising use of the
			
 
				-<TT>plumb</TT>
			
 
				-command is in actions within the plumbing rules file.
			
 
				-In our lab, we commonly receive Microsoft Word documents by mail,
			
 
				-but we do not run Microsoft operating systems on our machines so we cannot
			
 
				-view them without at least rebooting.
			
 
				-Therefore, when a Word document arrives in mail, we could plumb the
			
 
				-<TT>.doc</TT>
			
 
				-file but the text editor could not decode it.
			
 
				-However, we have a program,
			
 
				-<TT>doc2txt</TT>,
			
 
				-that decodes the Word file format to extract and format the embedded text.
			
 
				-The solution is to use
			
 
				-<TT>plumb</TT>
			
 
				-in a
			
 
				-<TT>plumb</TT>
			
 
				-<TT>start</TT>
			
 
				-action to invoke
			
 
				-<TT>doc2txt</TT>
			
 
				-on
			
 
				-<TT>.doc</TT>
			
 
				-files and synthesize a plain text file:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# rule set for microsoft word documents
			
 
				-type is text
			
 
				-data matches '[a-zA-Z0-9_\-./]+'
			
 
				-data matches '([a-zA-Z0-9_\-./]+)\.doc'
			
 
				-arg isfile 0
			
 
				-plumb start doc2txt <I>data | \
			
 
				-    plumb -i -d edit -a action=showdata -a filename=</I>0
			
 
				-</PRE></TT></DL>
			
 
				-The arguments to
			
 
				-<TT>plumb</TT>
			
 
				-tell it to take standard input as its data rather than the text of the arguments
			
 
				-(<TT>-i</TT>),
			
 
				-define the destination port
			
 
				-(<TT>-d</TT>
			
 
				-<TT>edit</TT>),
			
 
				-and set a conventional attribute so the editor knows to show the message data
			
 
				-itself rather than interpret it as a file name
			
 
				-(<TT>-a</TT>
			
 
				-<TT>action=showdata</TT>)
			
 
				-and provide the original file name
			
 
				-(<TT>-a</TT>
			
 
				-<TT>filename=</TT><I>0</I><TT>).
			
 
				-Now when a user plumbs a
			
 
				-</TT><TT>.doc</TT><TT>
			
 
				-file the plumbing rules run a process to extract the text and send it as a
			
 
				-temporary file to the editor for viewing.
			
 
				-It's imperfect, but it's easy and it beats rebooting.
			
 
				-</TT><br>&#32;<br>
			
 
				-Another simple example is a rule that turns man pages into hypertext.
			
 
				-Manual page entries of the form
			
 
				-<TT>plumber(1)</TT>
			
 
				-can be clicked on to pop up a window containing the formatted `man page'.
			
 
				-That man page will in turn contain more such citations, which will also be clickable.
			
 
				-The rule is a little like that for Word documents:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# man index entries are synthesized
			
 
				-type is text
			
 
				-data matches '([a-zA-Z0-9_\-./]+)\(([0-9])\)'
			
 
				-plumb start man 2 <I>1 | \
			
 
				-    plumb -i -d edit -a action=showdata -a filename=/man/</I>1(<I>2)
			
 
				-</PRE></TT></DL>
			
 
				-</I><br>&#32;<br>
			
 
				-There are many other inventive uses of plumbing.
			
 
				-One more should give some of the flavor.
			
 
				-We have a shell script,
			
 
				-<TT>src</TT>,
			
 
				-that takes as argument the name of an executable binary file.
			
 
				-It examines the symbol table of the binary to find the source file
			
 
				-from which it was compiled.
			
 
				-Since the Plan 9 compilers place full source path names in the symbol table,
			
 
				-<TT>src</TT>
			
 
				-can discover the complete file name.
			
 
				-That is then passed to
			
 
				-<TT>plumb</TT>,
			
 
				-complete with the line number to find the
			
 
				-symbol
			
 
				-<TT>main</TT>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% src plumb
			
 
				-</PRE></TT></DL>
			
 
				-is all it takes to pop up an editor window on the
			
 
				-<TT>main</TT>
			
 
				-routine of the
			
 
				-<TT>plumb</TT>
			
 
				-command, beginning at line 39 of
			
 
				-<TT>/sys/src/cmd/plumb/plumb.c</TT>.
			
 
				-Like most uses of plumbing,
			
 
				-this is not a breakthrough in functionality, but it is a great convenience.
			
 
				-<H4>Why This Architecture?
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The design of the plumbing system is peculiar:
			
 
				-a centralized language-based file server does most of the work,
			
 
				-while compared to other systems the applications themselves
			
 
				-contribute relatively little.
			
 
				-This architecture is deliberate, of course.
			
 
				-<br>&#32;<br>
			
 
				-That the plumber's behavior is derived from a linguistic description
			
 
				-gives the system great flexibility and dynamism&#173;rules can be added
			
 
				-and changed at will, without rebooting&#173;but the existence of a central library of rules
			
 
				-ensures that, for most users, the environment behaves in well-established ways.
			
 
				-<br>&#32;<br>
			
 
				-That the plumber is a file server is perhaps the most unusual aspect of its design,
			
 
				-but is also one of the most important.
			
 
				-Messages are passed by regular I/O operations on files, so no extra technology
			
 
				-such as remote procedure call or request brokers needs to be provided;
			
 
				-messages are transmitted by familiar means.
			
 
				-Almost every service in Plan 9 is a file server, so services can be exported
			
 
				-trivially using the system's remote file system operations [Pike93].
			
 
				-The plumber is no exception;
			
 
				-plumbing messages pass routinely across the network to remote applications without
			
 
				-any special provision,
			
 
				-in contrast to some commercial IPC mechanisms that become
			
 
				-significantly more complex when they involve multiple machines.
			
 
				-As I write this, my window system is talking to applications running on three
			
 
				-different machines, but they all share a single instance of the plumber and so
			
 
				-can interoperate to integrate my environment.
			
 
				-Plan 9 uses a shared file name space
			
 
				-to combine multiple networked machines&#173;compute servers,
			
 
				-file servers, and interactive workstations&#173;into a single
			
 
				-computing environment; plumbing's design as a file server
			
 
				-is a natural by-product of, and contributor to, the overall system architecture
			
 
				-[Pike92].
			
 
				-<br>&#32;<br>
			
 
				-The centrality of the plumber is also unusual.
			
 
				-Other systems tend to let the applications determine where messages will go;
			
 
				-consider mail readers that recognize and highlight URLs in the messages.
			
 
				-Why should just the mail readers do this, and why should they just do it for URLs?
			
 
				-(Acme was guilty of similar crimes.)
			
 
				-The plumber, by removing such decisions to a central authority,
			
 
				-guarantees that all applications behave the same and simultaneously
			
 
				-frees them all from figuring out what's important.
			
 
				-The ability for the plumber to excerpt useful data from within a message
			
 
				-is critical to the success of this model.
			
 
				-<br>&#32;<br>
			
 
				-The entire system is remarkably small.
			
 
				-The plumber itself is only about two thousand lines of C code.
			
 
				-Most applications work fine in a plumbing environment without knowing about it at all;
			
 
				-some need trivial changes such as to standardize their error output;
			
 
				-a few need to generate and receive plumbing messages.
			
 
				-But even to add the ability to send and receive messages in a program such as text editor is short work,
			
 
				-involving typically a few dozen lines of code.
			
 
				-Plumbing fits well into the existing environment.
			
 
				-<br>&#32;<br>
			
 
				-But plumbing is new and it hasn't been pushed far enough yet.
			
 
				-Most of the work so far has been with textual messages, although
			
 
				-the underlying system is capable of handling general data.
			
 
				-We plan to reimplement some of the existing data movement operations,
			
 
				-such as cut and paste or drag and drop, to use plumbing as their exchange mechanism.
			
 
				-Since the plumber is a central message handler, it is an obvious place to store the `clipboard'.
			
 
				-The clipboard could be built as a special port that holds onto messages rather than
			
 
				-deleting them after delivery.
			
 
				-Since the clipboard would then be holding a plumbing
			
 
				-message rather than plain text, as in the current Plan 9 environment,
			
 
				-it would become possible to cut and paste arbitrary data without
			
 
				-providing new mechanism.
			
 
				-In effect, we would be providing a new user interface to the existing plumbing facilities.
			
 
				-<br>&#32;<br>
			
 
				-Another possible extension is the ability to override plumbing operations interactively.
			
 
				-Originally, the plan was to provide a mechanism, perhaps a pop-up menu, that one could
			
 
				-use to direct messages, for example to send a PostScript file to the editor rather than the
			
 
				-PostScript viewer by naming an explicit destination in the message.
			
 
				-Although this deficiency should one day be addressed, it should be done without
			
 
				-complicating the interface for invoking the default behavior.
			
 
				-Meanwhile, in practice the default behavior seems to work very well in practice&#173;as it
			
 
				-must if plumbing is to be successful&#173;so the lack of
			
 
				-overrides is not keenly felt.
			
 
				-<H4>Comparison with Other Systems
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The ideas of the plumbing system grew from an
			
 
				-attempt to generalize the way Acme acquires files and data.
			
 
				-Systems further from that lineage also share some properties with plumbing.
			
 
				-Most, however, require explicit linking or message passing rather than
			
 
				-plumbing's implicit, context-based pattern matching, and none
			
 
				-has the plumber's design of a language-based file server.
			
 
				-<br>&#32;<br>
			
 
				-Reiss's FIELD system [Reis95] probably comes the closest to providing the facilities of the plumber.
			
 
				-It has a central message-passing mechanism that connects applications together through
			
 
				-a combination of a library and a pattern-matching central message dispatcher that handles
			
 
				-message send and reply.
			
 
				-The main differences between FIELD's message dispatcher and the plumber are first
			
 
				-that the plumber is based on a special-purpose language while the FIELD
			
 
				-system uses an object-oriented library, second that the plumber has no concept
			
 
				-of a reply to a message, and finally that the FIELD system
			
 
				-has no concept of port.
			
 
				-But the key distinction is probably in the level of use.
			
 
				-In FIELD, the message dispatcher is a critical integrating force of the underlying
			
 
				-programming environment, handling everything from debugging events to
			
 
				-changing the working directory of a program.
			
 
				-Plumbing, by contrast, is intended primarily for integrating the user interface
			
 
				-of existing tools; it is more modest and very much simpler.
			
 
				-The central advantage of the plumber is its convenience and dynamism;
			
 
				-the FIELD system does not share the ease with which
			
 
				-message dispatch rules can be added or modified.
			
 
				-<br>&#32;<br>
			
 
				-The inspiration for Acme was
			
 
				-the user interface to the object-oriented Oberon system [WiGu92].
			
 
				-Oberon's user interface interprets mouse clicks on strings such as
			
 
				-<TT>Obj.meth</TT>
			
 
				-to invoke calls to the method
			
 
				-<TT>meth</TT>
			
 
				-of the object
			
 
				-<TT>Obj</TT>.
			
 
				-This was the starting point for Acme's middle-button execution [Pike94],
			
 
				-but nothing in Oberon is much like Acme's right-button `acquisition',
			
 
				-which was the starting point for the plumber.
			
 
				-Oberon's implicit method-based linking is not nearly as general as the pattern-matched
			
 
				-linking of the plumber, nor does its style of user-triggered method call
			
 
				-correspond well to the more general idea of inter-application communication
			
 
				-of plumbing messages.
			
 
				-<br>&#32;<br>
			
 
				-Microsoft's OLE interface is another relative.
			
 
				-It allows one application to
			
 
				-<I>embed</I>
			
 
				-its own data within another's,
			
 
				-for example to place an Excel spreadsheet within a Frame document;
			
 
				-when Frame needs to format the page, it will start Excel itself, or at least some of its
			
 
				-DLLs, to format the spreadsheet.
			
 
				-OLE data can only be understood by the application that created it;
			
 
				-plumbing messages, by contrast, contain arbitrary data with a rigidly formatted header
			
 
				-that will be interpreted by the pattern matcher and the destination application.
			
 
				-The plumber's simplified message format may limit its
			
 
				-flexibility but makes messages easy and efficient to dispatch and to interpret.
			
 
				-At least for the cut-and-paste style of exchange OLE encourages,
			
 
				-plumbing gives up some power in return for simplicity, while avoiding
			
 
				-the need to invoke a vestigial program (if Excel can be called a vestige) every time
			
 
				-the pasted data is examined.
			
 
				-Plumbing is also better suited to
			
 
				-other styles of data exchange, such as connecting compiler errors to the
			
 
				-text editor.
			
 
				-<br>&#32;<br>
			
 
				-The Hyperbole [Wein] package for Emacs adds hypertext facilities to existing documents.
			
 
				-It includes explicit links and, like plumbing, a rule-driven way to form implicit links.
			
 
				-Since Emacs is purely textual, like Acme, Hyperbole does not easily extend to driving
			
 
				-graphical applications, nor does it provide a general interprocess communication method.
			
 
				-For instance, although Hyperbole provides some integration for mail applications,
			
 
				-it cannot provide the glue that allows a click on a face icon in an external program to open a
			
 
				-mail message within the viewer.
			
 
				-Moreover, since it is not implemented as a file server,
			
 
				-Hyperbole does not share the advantages of that architecture.
			
 
				-<br>&#32;<br>
			
 
				-Henry's
			
 
				-<TT>error</TT>
			
 
				-program in 4BSD echoes a small but common use of plumbing.
			
 
				-It takes the error messages produced by a compiler and drives a text editor
			
 
				-through the steps of looking at each one in turn; the notion is to quicken the
			
 
				-compile/edit/debug cycle.
			
 
				-Similar results are achieved in EMACS by writing special M-LISP
			
 
				-macros to parse the error messages from various compilers.
			
 
				-Although for this particular purpose they may be more convenient than plumbing,
			
 
				-these are specific solutions to a specific problem and lack plumbing's generality.
			
 
				-<br>&#32;<br>
			
 
				-Of course, the resource forks in MacOS and the association rules for
			
 
				-file name extensions in Windows also provide some of the functionality of
			
 
				-the plumber, although again without the generality or dynamic nature.
			
 
				-<br>&#32;<br>
			
 
				-Closer to home, Ousterhout's Tcl (Tool Command Language) [Oust90]
			
 
				-was originally designed to embed a little command interpreter
			
 
				-in each application to control interprocess communication and
			
 
				-provide a level of integration.
			
 
				-Plumbing, on the other hand, provides minimal support within
			
 
				-the application, offloading most of the message handling and all the
			
 
				-command execution to the central plumber.
			
 
				-<br>&#32;<br>
			
 
				-The most obvious relative to plumbing is perhaps the hypertext links of a web browser.
			
 
				-Plumbing differs by synthesizing
			
 
				-the links on demand.
			
 
				-Rather than constructing links within a document as in HTML,
			
 
				-plumbing uses the context of a button click to derive what it should link to.
			
 
				-That the rules for this decision can be modified dynamically gives it a more
			
 
				-fluid feel than a standard web browsing world.
			
 
				-One possibility for future work is to adapt a web browser to use
			
 
				-plumbing as its link-following engine, much as Acme used plumbing to offload
			
 
				-its acquisition rules.
			
 
				-This would connect the web browser to the existing tools, rather than the
			
 
				-current trend in most systems of replacing the tools by a browser.
			
 
				-<br>&#32;<br>
			
 
				-Each of these prior systems&#173;and there are others, e.g. [Pasa93, Free93]&#173;addresses
			
 
				-a particular need or subset of the
			
 
				-issues of system integration.
			
 
				-Plumbing differs because its particular choices were different.
			
 
				-It focuses on two key issues:
			
 
				-centralizing and automating the handling of interprocess communication
			
 
				-among interactive programs,
			
 
				-and maximizing the convenience (or minimizing the trouble) for the human user
			
 
				-of its services.
			
 
				-Moreover, the plumber's implementation as a file server, with messages
			
 
				-passed over files it controls,
			
 
				-permits the architecture to work transparently across a network.
			
 
				-None of the other systems discussed here integrates distributed systems
			
 
				-as smoothly as local ones without the addition of significant extra technology.
			
 
				-<H4>Discussion
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-There were a few surprises during the development of plumbing.
			
 
				-The first version of plumbing was done for the Inferno system [Dorw97a,Dorw97b],
			
 
				-using its file-to-channel mechanism to mediate the IPC.
			
 
				-Although it was very simple to build, it encountered difficulties because
			
 
				-the plumber was too disconnected from its clients; in particular, there was
			
 
				-no way to discover whether a port was in use.
			
 
				-When plumbing was implemented afresh for Plan 9, it was provided through a true file server.
			
 
				-Although this was much more work, it paid off handsomely.
			
 
				-The plumber now knows whether a port is open, which makes it easy to decide whether
			
 
				-a new program must be started to handle a message,
			
 
				-and the ability to edit the rules file dynamically is a major advantage.
			
 
				-Other advantages arise from the file-server design,
			
 
				-such as
			
 
				-the ease of exporting plumbing ports across the network to remote machines
			
 
				-and the implicit security model a file-based interface provides: no one has
			
 
				-permission to open my private plumbing files.
			
 
				-<br>&#32;<br>
			
 
				-On the other hand, Inferno was an all-new environment and the user interface for plumbing was
			
 
				-able to be made uniform for all applications.
			
 
				-This was impractical for Plan 9, so more
			
 
				-<I>ad hoc</I>
			
 
				-interfaces had to be provided for that environment.
			
 
				-Yet even in Plan 9 the advantages of efficient,
			
 
				-convenient, dynamic interprocess communication outweigh the variability of
			
 
				-the user interface.
			
 
				-In fact, it is perhaps a telling point that the system works well for a variety of interfaces;
			
 
				-the provision of a central, convenient message-passing
			
 
				-service is a good idea regardless of how the programs use it.
			
 
				-<br>&#32;<br>
			
 
				-Plumbing's rule language uses only regular expressions and a few special
			
 
				-rules such as
			
 
				-<TT>isfile</TT>
			
 
				-for matching text.
			
 
				-There is much more that could be done.  For example, in the current system a JPEG
			
 
				-file can be recognized by a
			
 
				-<TT>.jpg</TT>
			
 
				-suffix but not by its contents, since the plumbing language has no facility
			
 
				-for examining the
			
 
				-<I>contents</I>
			
 
				-of files named in its messages.
			
 
				-To address this issue without adding more special rules requires rethinking
			
 
				-the language itself.
			
 
				-Although the current system seems a good balance of complexity
			
 
				-and functionality,
			
 
				-perhaps a richer, more general-purpose language would
			
 
				-permit more exotic applications of the plumbing model.
			
 
				-<br>&#32;<br>
			
 
				-In conclusion, plumbing adds an effective, easy-to-use inter-application
			
 
				-communication mechanism to the Plan 9
			
 
				-user interface.
			
 
				-Its unusual design as a language-driven file server makes it easy to add
			
 
				-context-dependent, dynamically interpreted, general-purpose hyperlinks
			
 
				-to the desktop, for both existing tools and new ones.
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Dave Presotto wrote the mail file system and
			
 
				-<TT>edmail</TT>.
			
 
				-He, Russ Cox, Sape Mullender, and Cliff Young influenced the design, offered useful suggestions,
			
 
				-and suffered early versions of the software.
			
 
				-They also made helpful comments on this paper, as did Dennis Ritchie and Brian Kernighan.
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Dorw97a]
			
 
				-Sean Dorward, Rob Pike, David Leo Presotto, Dennis M. Ritchie,
			
 
				-Howard W. Trickey, and Philip Winterbottom,
			
 
				-``Inferno'',
			
 
				-<I>Proceedings of the IEEE Compcon 97 Conference</I>,
			
 
				-San Jose, 1997, pp. 241-244.
			
 
				-<br>&#32;<br>
			
 
				-[Dorw97b]
			
 
				-Sean Dorward, Rob Pike, David Leo Presotto, Dennis M. Ritchie,
			
 
				-Howard W. Trickey, and Philip Winterbottom,
			
 
				-``The Inferno Operating System'',
			
 
				-<I>Bell Labs Technical Journal</I>,
			
 
				-<B>2</B>,
			
 
				-1, Winter, 1997.
			
 
				-<br>&#32;<br>
			
 
				-[Free93]
			
 
				-FreeBSD,
			
 
				-Syslog configuration file manual
			
 
				-<A href="/magic/man2html/0/syslog.conf"><I>syslog.conf</I>(0).
			
 
				-</A><br>&#32;<br>
			
 
				-[Kill84]
			
 
				-T. J. Killian,
			
 
				-``Processes as Files'',
			
 
				-<I>Proceedings of the Summer 1984 USENIX Conference</I>,
			
 
				-Salt Lake City, 1984, pp. 203-207.
			
 
				-<br>&#32;<br>
			
 
				-[Oust90]
			
 
				-John K. Ousterhout,
			
 
				-``Tcl: An Embeddable Command Languages'',
			
 
				-<I>Proceedings of the Winter 1990 USENIX Conference</I>,
			
 
				-Washington, 1990, pp. 133-146.
			
 
				-<br>&#32;<br>
			
 
				-[Pasa93]
			
 
				-Vern Paxson and Chris Saltmarsh,
			
 
				-"Glish: A User-Level Software Bus for Loosely-Coupled Distributed Systems" ,
			
 
				-<I>Proceedings of the Winter 1993 USENIX Conference</I>,
			
 
				-San Diego, 1993, pp. 141-155.
			
 
				-<br>&#32;<br>
			
 
				-[Pike87a]
			
 
				-Rob Pike,
			
 
				-``Structural Regular Expressions'',
			
 
				-<I>EUUG Spring 1987 Conference Proceedings</I>,
			
 
				-Helsinki, May 1987, pp. 21-28.
			
 
				-<br>&#32;<br>
			
 
				-[Pike87b]
			
 
				-Rob Pike,
			
 
				-``The Text Editor sam'',
			
 
				-<I>Software - Practice and Experience</I>,
			
 
				-<B>17</B>,
			
 
				-5, Nov. 1987, pp. 813-845.
			
 
				-<br>&#32;<br>
			
 
				-[Pike91]
			
 
				-Rob Pike,
			
 
				-``8&#189;, the Plan 9 Window System'',
			
 
				-<I>Proceedings of the Summer 1991 USENIX Conference</I>,
			
 
				-Nashville, 1991, pp. 257-265.
			
 
				-<br>&#32;<br>
			
 
				-[Pike93]
			
 
				-Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom,
			
 
				-``The Use of Name Spaces in Plan 9'',
			
 
				-<I>Operating Systems Review</I>,
			
 
				-<B>27</B>,
			
 
				-2, April 1993, pp. 72-76.
			
 
				-<br>&#32;<br>
			
 
				-[Pike94]
			
 
				-Rob Pike,
			
 
				-``Acme: A User Interface for Programmers'',
			
 
				-<I>Proceedings of the Winter 1994 USENIX Conference</I>,
			
 
				-San Francisco, 1994, pp. 223-234.
			
 
				-<br>&#32;<br>
			
 
				-[PiPr85]
			
 
				-Rob Pike and Dave Presotto,
			
 
				-``Face the Nation'',
			
 
				-<I>Proceedings of the USENIX Summer 1985 Conference</I>,
			
 
				-Portland, 1985, pg. 81.
			
 
				-<br>&#32;<br>
			
 
				-[Reis95]
			
 
				-Steven P. Reiss,
			
 
				-<I>The FIELD Programming Environment: A Friendly Integrated Environment for Learning and Development</I>,
			
 
				-Kluwer, Boston, 1995.
			
 
				-<br>&#32;<br>
			
 
				-[Wein]
			
 
				-Bob Weiner,
			
 
				-<I>Hyperbole User Manual</I>,
			
 
				-<TT>http://www.cs.indiana.edu/elisp/hyperbole/hyperbole_1.html</TT>
			
 
				-<br>&#32;<br>
			
 
				-[Wint94]
			
 
				-Philip Winterbottom,
			
 
				-``ACID: A Debugger based on a Language'',
			
 
				-<I>Proceedings of the USENIX Winter Conference</I>,
			
 
				-San Francisco, CA, 1994.
			
 
				-<br>&#32;<br>
			
 
				-[WiGu92]
			
 
				-Niklaus Wirth and Jurg Gutknecht,
			
 
				-<I>Project Oberon: The Design of an Operating System and Compilers</I>,
			
 
				-Addison-Wesley, Reading, 1992.
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/port.html
+++ b/sys/doc/port.html
@@ -1,500 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The Various Ports
			
 
				-</H1>
			
 
				-<P>
			
 
				-This document collects comments about the various
			
 
				-architectures supported by Plan 9.
			
 
				-The system tries to hide most of the differences between machines,
			
 
				-so the machines as seen by a Plan 9
			
 
				-user look different from how they are perceived through commercial software.
			
 
				-Also, because we are a small group, we couldn't do everything:
			
 
				-exploit every optimization, support every model,
			
 
				-drive every device.
			
 
				-This document records what we
			
 
				-<I>have</I>
			
 
				-done.
			
 
				-The first section discusses the compiler/assembler/loader suite for each machine.
			
 
				-The second talks about
			
 
				-the operating system implemented on each of the various
			
 
				-machines.
			
 
				-</P>
			
 
				-<H4>The Motorola MC68020 compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This is the oldest compiler of the bunch.  Relative to its
			
 
				-competitors&#173;commercial compilers for the same machine&#173;it generates
			
 
				-quite good code.
			
 
				-It assumes at least a 68020 architecture: some of the addressing
			
 
				-modes it generates are not on the 68000 or 68010.
			
 
				-</P>
			
 
				-<P>
			
 
				-We also use this compiler for the 68040.  Except for a few
			
 
				-instructions and registers available only from assembly language,
			
 
				-the only user-visible difference between these machines is in
			
 
				-floating point.  Our 68020s all have 68881 or 68882 floating
			
 
				-point units attached, so to execute floating point programs we
			
 
				-depend on there being appropriate hardware.
			
 
				-Unfortunately, the 68040 is not quite so thorough in its implementation
			
 
				-of the IEEE 754 standard or in its provision of built-in instructions
			
 
				-for the
			
 
				-transcendental functions.  The latter was easy to get around: we
			
 
				-don't use them on the 68020 either, but we do have a library,
			
 
				-<TT>-l68881</TT>,
			
 
				-that you can use if you need the performance (which can be
			
 
				-substantial:
			
 
				-<TT>astro</TT>
			
 
				-runs twice as fast).
			
 
				-We don't use this library by default because we want to run the same
			
 
				-binaries on both machines and don't want to emulate
			
 
				-<TT>FCOSH</TT>
			
 
				-in the operating system.
			
 
				-</P>
			
 
				-<P>
			
 
				-The problem with IEEE is nastier.  We didn't really want to deal
			
 
				-with gradual underflow and all that, especially since we had
			
 
				-half a dozen machines we'd need to do it on, so on the 68040
			
 
				-we implement non-trapping underflow as truncation to zero and
			
 
				-do nothing about denormalized numbers and not-a-numbers.
			
 
				-This means the 68020
			
 
				-and the 68040 are not precisely compatible.
			
 
				-</P>
			
 
				-<H4>The Motorola MC68000 compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This compiler is a stripped-down version of the MC68020 compiler
			
 
				-built for an abortive port to the Dragonball processor on the Palm Pilot.
			
 
				-It generates position-independent code whose overall quality is much
			
 
				-poorer than the code for the MC68020.
			
 
				-</P>
			
 
				-<H4>The MIPS compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This compiler generates code for the R2000, R3000, and R4000 machines configured
			
 
				-to be big-endians.  The compiler generates no R4000-specific instructions
			
 
				-although the assembler and loader support the new user-mode instructions.
			
 
				-There is no support for little-endian machines.
			
 
				-(A little-endian port exists, but is not included in the distribution.
			
 
				-Contact us if you need it.)
			
 
				-Considering its speed, the Plan 9 compiler generates good code,
			
 
				-but the commercial
			
 
				-MIPS compiler with all the stops pulled out consistently beats it
			
 
				-by 20% or so, sometimes more.  Since ours compiles about 10 times
			
 
				-faster and we spend most of our time compiling anyway,
			
 
				-we are content with the tradeoff.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compiler is solid: we've used it for several big projects and, of course,
			
 
				-all our applications run under it.
			
 
				-The behavior of floating-point programs is much like on the 68040:
			
 
				-the operating system emulates where necessary to get past non-trapping
			
 
				-underflow and overflow, but does not handle gradual underflow or
			
 
				-denormalized numbers or not-a-numbers.
			
 
				-</P>
			
 
				-<H4>The SPARC compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-The SPARC compiler is also solid and fast, although we haven't
			
 
				-used it for a few years, due to a lack of current hardware.  We have seen it do
			
 
				-much better than GCC with all the optimizations, but on average
			
 
				-it is probably about the same.
			
 
				-</P>
			
 
				-<P>
			
 
				-We used to run some old SPARC machines with no multiply or divide instructions,
			
 
				-so the compiler
			
 
				-does not produce them by default.
			
 
				-Instead it calls internal subroutines.
			
 
				-A loader flag,
			
 
				-<TT>-M</TT>,
			
 
				-causes the instructions to be emitted.  The operating system has
			
 
				-trap code to emulate them if necessary, but the traps are slower than
			
 
				-emulating them in user mode.
			
 
				-In any modern lab, in which SPARCS have the instructions, it would be worth enabling the
			
 
				-<TT>-M</TT>
			
 
				-flag by default.
			
 
				-</P>
			
 
				-<P>
			
 
				-The floating point story is the same as on the MIPS.
			
 
				-</P>
			
 
				-<H4>The Intel i386 compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This is really an
			
 
				-<I>x</I>86
			
 
				-compiler, for
			
 
				-<I>x</I>&gt;2.
			
 
				-It works only
			
 
				-if the machine is in 32-bit protected mode.
			
 
				-It is solid and generates tolerable code; it is our main compiler these days.
			
 
				-</P>
			
 
				-<P>
			
 
				-Floating point is well-behaved, but the compiler assumes i387-compatible
			
 
				-hardware to execute
			
 
				-the instructions.  With 387 hardware,
			
 
				-the system does the full IEEE 754 job, just like
			
 
				-the MC68881.  By default, the libraries don't use the 387 built-ins for
			
 
				-transcendentals.
			
 
				-If you want them,
			
 
				-build the code in
			
 
				-<TT>/sys/src/libc/386/387</TT>.
			
 
				-</P>
			
 
				-<H4>The Intel i960 compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This compiler was built as a weekend hack to let us get the Cyclone
			
 
				-boards running.  It has only been used to run one program&#173;the on-board
			
 
				-code in the Cyclone&#173;and is therefore likely to be buggy.
			
 
				-There are a number of obvious optimizations to the code that have
			
 
				-never been attempted.
			
 
				-For example, the compiler does not support pipelining.
			
 
				-The code runs in little-endian mode.
			
 
				-</P>
			
 
				-<H4>The DEC Alpha compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-The Alpha compiler is based on a port done by David Hogan while
			
 
				-studying at the Basser Department of Computer Science, University of Sydney.
			
 
				-It has been used to build a running version of the operating system, but has
			
 
				-not been stressed as much as some of the other compilers.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although the Alpha is a 64-bit architecture, this compiler treats
			
 
				-<TT>int</TT>s,
			
 
				-<TT>long</TT>s
			
 
				-and pointers as 32 bits.  Access to the 64-bit operations is available through the
			
 
				-<TT>vlong</TT>
			
 
				-type, as with the other architectures.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compiler assumes that the target CPU supports the optional byte and
			
 
				-word memory operations (the ``BWX'' extension).
			
 
				-If you have an old system, you can generate code without using the extension
			
 
				-by passing the loader the
			
 
				-<TT>-x</TT>
			
 
				-option.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are a number of optimizations that the Alpha Architecture Handbook
			
 
				-recommends, but this compiler does not do.  In particular, there is currently
			
 
				-no support for the code alignment and code scheduling optimizations.
			
 
				-</P>
			
 
				-<P>
			
 
				-The compiler tries to conform to IEEE, but some Alpha CPUs do not implement
			
 
				-all of the rounding and trapping modes in silicon.  Fixing this problem requires
			
 
				-some software emulation code in the kernel; to date, this has not been attempted.
			
 
				-</P>
			
 
				-<H4>The PowerPC compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-The PowerPC compiler supports the 32-bit PowerPC architecture only;
			
 
				-it does not support either the 64-bit extensions or the POWER compatibility instructions.
			
 
				-It has been used for production operating system work on the 603, 603e, 604e, 821, 823, and 860.
			
 
				-On the 8xx floating-point instructions must be emulated.
			
 
				-Instruction scheduling is not implemented; otherwise the code generated
			
 
				-is similar to that for the other load-store architectures.
			
 
				-The compiler makes little or no use of unusual PowerPC features such as the
			
 
				-counter register, several condition code registers, and multiply-accumulate
			
 
				-instructions, but they are sometimes
			
 
				-used by assembly language routines in the libraries.
			
 
				-</P>
			
 
				-<H4>The Acorn ARM compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-The ARM compiler is fairly solid; it has been used for some production
			
 
				-operating system work including Inferno and the Plan 9 kernel
			
 
				-for the iPAQ, which uses a StrongArm SA1.
			
 
				-The compiler supports the ARMv4 architecture;
			
 
				-it does not support the Thumb instruction set.
			
 
				-It has been used on ARM7500FE processors and the Strongarm SA1 core machines.
			
 
				-The compiler generates instructions for the ARM floating-point coprocessor.
			
 
				-</P>
			
 
				-<H4>The AMD 29000 compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-This compiler was used to port an operating system to an AMD 29240 processor.
			
 
				-The project is long abandoned, but the compiler lives on.
			
 
				-</P>
			
 
				-<H4>The Carrera operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-We used to have a number of MIPS R4400 PC-like devices called Carreras,
			
 
				-with custom-built frame buffers, that we used as terminals.
			
 
				-They're almost all decommissioned now, but we're including the source as a reference
			
 
				-in case someone wants to get another MIPS-based system running.
			
 
				-</P>
			
 
				-<H4>The IBM PC operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-The PC version of Plan 9 can boot either from MS-DOS
			
 
				-or directly from a disk created by the
			
 
				-<TT>format</TT>
			
 
				-command; see
			
 
				-<A href="/magic/man2html/8/prep"><I>prep</I>(8).
			
 
				-</A>Plan 9 runs in 32-bit mode&#173;which requires a 386 or later model x86 processor&#173;and
			
 
				-has an interrupt-driven I/O system, so it does not
			
 
				-use the BIOS (except for a small portion of the boot program and floppy boot block).
			
 
				-This helps performance but limits the set of I/O devices that it can support without
			
 
				-special code.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 supports the ISA, EISA, and PCI buses as well as PCMCIA and PC card devices.
			
 
				-It is infeasible to list all the supported machines, because
			
 
				-the PC-clone marketplace is too volatile and there is
			
 
				-no guarantee that the machine you buy today will contain the
			
 
				-same components as the one you bought yesterday.
			
 
				-(For our lab, we buy components and assemble the machines
			
 
				-ourselves in an attempt to lessen this effect.)
			
 
				-Both IDE/ATA and SCSI disks are supported, and
			
 
				-there is support for large ATA drives.
			
 
				-CD-ROMs are supported two ways, either on the SCSI bus, or as ATA(PI) devices.
			
 
				-The SCSI adapter must be a member of the Mylex Multimaster (old Buslogic BT-*) series
			
 
				-or the Symbios 53C8XX series.
			
 
				-Supported Ethernet cards include the
			
 
				-AMD79C790,
			
 
				-3COM Etherlink III and 3C589 series,
			
 
				-Lucent Wavelan and compatibles,
			
 
				-NE2000,
			
 
				-WD8003,
			
 
				-WD8013,
			
 
				-SMC Elite and Elite Ultra,
			
 
				-Linksys Combo EthernetCard and EtherFast 10/100,
			
 
				-and a variety of controllers based on the
			
 
				-Intel i8255[789] and Digital (now Intel) 21114x chips.
			
 
				-We mostly use Etherlink III, i8255[789], and 21114x, so those drivers may be more robust.
			
 
				-There must be an explicit Plan 9 driver for peripherals;
			
 
				-it cannot use DOS or Windows drivers.
			
 
				-Also,
			
 
				-Plan 9 cannot exploit special hardware-related features that fall outside of the
			
 
				-IBM PC model,
			
 
				-such as power management,
			
 
				-unless architecture-dependent code is added to the kernel.
			
 
				-For more details see
			
 
				-<A href="/magic/man2html/8/plan9.ini"><I>plan9.ini</I>(8).
			
 
				-</A></P>
			
 
				-<P>
			
 
				-Over the years,
			
 
				-Plan 9 has run on a number of VGA cards.
			
 
				-Recent changes to the graphics system have not been
			
 
				-tested on most of the older cards; some effort may be needed to get them working again.
			
 
				-In our lab, most of our machines use the ATI Mach64, S3 ViRGE, or S3 Savage chips,
			
 
				-so such devices are probably
			
 
				-the most reliable.
			
 
				-We also use a few Matrox and TNT cards.
			
 
				-The system requires a hardware cursor.
			
 
				-For more details see
			
 
				-<A href="/magic/man2html/6/vgadb"><I>vgadb</I>(6)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/8/vga"><I>vga</I>(8).
			
 
				-</A>The wiki
			
 
				-(<TT>http://plan9.bell-labs.com/wiki/plan9</TT>)
			
 
				-contains the definitive list of cards that are known to work; see the ``supported PC hardware''
			
 
				-page.
			
 
				-</P>
			
 
				-<P>
			
 
				-For audio, Plan 9 supports the Sound Blaster 16 and compatibles.
			
 
				-(Note that audio doesn't work under Plan 9 with 8-bit Sound Blasters.)
			
 
				-There is also user-level support for USB audio devices; see 
			
 
				-<A href="/magic/man2html/4/usb"><I>usb</I>(4).
			
 
				-</A></P>
			
 
				-<P>
			
 
				-Finally, it's important to have a three-button mouse with Plan 9.
			
 
				-The system currently works only with mice on the PS/2 port or USB.
			
 
				-Serial mouse support should return before long.
			
 
				-</P>
			
 
				-<P>
			
 
				-Once you have Plan 9 installed (see the wiki's installation document)
			
 
				-run the program
			
 
				-<TT>ld</TT>
			
 
				-from DOS
			
 
				-or use a boot disk.  See
			
 
				-<A href="/magic/man2html/8/booting"><I>booting</I>(8),
			
 
				-</A><A href="/magic/man2html/8/9load"><I>9load</I>(8),
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/8/prep"><I>prep</I>(8)
			
 
				-</A>for more information.
			
 
				-</P>
			
 
				-<H4>The Alpha PC operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 runs on the Alpha PC 164.
			
 
				-The Alpha port has not been used as much as the others,
			
 
				-and should be considered a preliminary release.
			
 
				-</P>
			
 
				-<P>
			
 
				-The port uses the OSF/1 flavor
			
 
				-of PALcode, and should be booted from the SRM firmware (booting
			
 
				-from ARC is not supported).
			
 
				-Supported devices are a subset of the PC ones; currently
			
 
				-this includes DECchip 2114x-based ethernet cards, S3 VGA cards,
			
 
				-Sound Blaster 16-compatible audio, floppy drives, and ATA hard disks.
			
 
				-</P>
			
 
				-<P>
			
 
				-The system has to be booted via tftp.
			
 
				-See
			
 
				-<A href="/magic/man2html/8/booting"><I>booting</I>(8)
			
 
				-</A>for details.
			
 
				-</P>
			
 
				-<H4>The PowerPC operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-We have a version of the system that runs on the PowerPC
			
 
				-on a home-grown machine called Viaduct.
			
 
				-The Viaduct minibrick is a small (12x9x3 cm) low-cost embedded
			
 
				-computer consisting of a 50Mhz MPC850, 16MB sdram, 2MB flash,
			
 
				-and two 10Mb Ethernet ports.  It is designed for home/SOHO
			
 
				-networking applications such as VPN, firewalls, NAT, etc.
			
 
				-</P>
			
 
				-<P>
			
 
				-The kernel has also been ported to the Motorola MTX embedded motherboard;
			
 
				-that port is included in the distribution.
			
 
				-The port only works with a 604e processor (the 603e is substantially different)
			
 
				-and at present only a single CPU is permitted.
			
 
				-</P>
			
 
				-<H4>The Compaq iPAQ operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 was ported to Compaq's iPAQ Pocket PC,
			
 
				-which uses the StrongArm SA1 processor.
			
 
				-The model we have is a 3630; neighboring models also work.
			
 
				-The kernel can drive a PCMCIA sleeve with a WaveLAN card, but no other PCMCIA
			
 
				-devices have been ported yet.
			
 
				-</P>
			
 
				-<P>
			
 
				-The iPAQ runs
			
 
				-<TT>rio</TT>
			
 
				-with a small keyboard application that allows Palm-style handwriting
			
 
				-input as well as typing with the stylus on a miniature keyboard.
			
 
				-</P>
			
 
				-<P>
			
 
				-Fco. J. Ballesteros
			
 
				-(<TT>nemo@plan9.escet.urjc.es</TT>)
			
 
				-added support for hibernation, but we haven't been able to
			
 
				-get that to work again in the new kernel; the code is there, however,
			
 
				-for volunteers to play with.
			
 
				-See the file
			
 
				-<TT>/sys/src/9/bitsy/Booting101</TT>
			
 
				-for information about installing Plan 9 on the iPAQ.
			
 
				-</P>
			
 
				-<H4>The file server
			
 
				-</H4>
			
 
				-<P>
			
 
				-The file server runs on only a handful of distinct machines.
			
 
				-It is a stand-alone program, distantly related to the CPU server
			
 
				-code, that runs no user code: all it does is serve files on
			
 
				-network connections.
			
 
				-It supports only SCSI disks, which can be interleaved for
			
 
				-faster throughput.
			
 
				-A DOS file on
			
 
				-an IDE drive can hold the configuration information.
			
 
				-See
			
 
				-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8)
			
 
				-</A>for an explanation of how
			
 
				-to configure a file server.
			
 
				-</P>
			
 
				-<P>
			
 
				-To boot a file server, follow the directions for booting a CPU server
			
 
				-using the file name
			
 
				-<TT>9</TT><I>machtype</I><TT>fs</TT><I>
			
 
				-where
			
 
				-</I><I>machtype</I><I>
			
 
				-is
			
 
				-</I><TT>pc</TT><I>,
			
 
				-etc. as appropriate.
			
 
				-We are releasing only the PC version.
			
 
				-</P>
			
 
				-</I><H4>The IBM PC file server
			
 
				-</H4>
			
 
				-<P>
			
 
				-Except for the restriction to SCSI disks,
			
 
				-the PC file server has the same hardware requirements as
			
 
				-the regular PC operating system.
			
 
				-However, only a subset of the supported SCSI (Adaptec 1542, Mylex Multimaster,
			
 
				-and Symbios 53C8XX) and Ethernet (Digital 2114x,
			
 
				-Intel 8255x, and 3Com) controllers
			
 
				-may be
			
 
				-used.
			
 
				-Any of the boot methods described in
			
 
				-<A href="/magic/man2html/8/9load"><I>9load</I>(8)
			
 
				-</A>will work.
			
 
				-</P>
			
 
				-<P>
			
 
				-To boot any PC, the file
			
 
				-<TT>9load</TT>
			
 
				-must reside on a MS-DOS formatted floppy, IDE disk,
			
 
				-or SCSI disk.
			
 
				-However, PCs have no non-volatile RAM in which the
			
 
				-file server can store its configuration information, so the system
			
 
				-stores it in a file on an MS-DOS file system instead.
			
 
				-This file, however, cannot live on a SCSI disk, only a floppy or IDE.
			
 
				-(This restriction avoids a lot of duplicated interfaces in the
			
 
				-system.)
			
 
				-Thus the file server cannot be all-SCSI.
			
 
				-See
			
 
				-<A href="/magic/man2html/8/plan9.ini"><I>plan9.ini</I>(8)
			
 
				-</A>for details about the
			
 
				-<I>nvr</I>
			
 
				-variable and specifying the console device.
			
 
				-</P>
			
 
				-<H4>Backup
			
 
				-</H4>
			
 
				-<P>
			
 
				-Our main file server is unlikely to be much like yours.
			
 
				-It is a PC with 128 megabytes
			
 
				-of cache memory, 56 gigabytes of SCSI magnetic
			
 
				-disk, and a Hewlett-Packard SureStore Optical 1200ex
			
 
				-magneto-optical jukebox, with 1.2 terabytes of storage.
			
 
				-This driver runs the SCSI standard jukebox protocol.
			
 
				-We also have a driver for a (non-standard)
			
 
				-SONY WDA-610
			
 
				-Writable Disk Auto Changer (WORM),
			
 
				-which stores almost 350 gigabytes of data.
			
 
				-</P>
			
 
				-<P>
			
 
				-The WORM is actually the prime storage; the SCSI disk is just
			
 
				-a cache to improve performance.
			
 
				-Early each morning the system constructs on WORM an image of
			
 
				-the entire system as it appears that day.  Our backup system
			
 
				-is therefore just a file server that lets
			
 
				-you look at yesterday's (or last year's) file system.
			
 
				-</P>
			
 
				-<P>
			
 
				-If you don't have a magneto-optical jukebox,
			
 
				-you might consider attaching a CD-R jukebox or even just
			
 
				-using a single WORM drive and managing the dumps a little less
			
 
				-automatically.  This is just a long way of saying that the
			
 
				-system as distributed has no explicit method of backup other
			
 
				-than through the WORM jukebox.
			
 
				-</P>
			
 
				-<P>
			
 
				-Not everyone can invest in such expensive hardware, however.
			
 
				-Although it wouldn't be as luxurious,
			
 
				-it would be possible to use
			
 
				-<A href="/magic/man2html/8/mkfs"><I>mkfs</I>(8)
			
 
				-</A>to build regular file system archives and use
			
 
				-<A href="/magic/man2html/8/scuzz"><I>scuzz</I>(8)
			
 
				-</A>to stream them to a SCSI 8mm tape drive.
			
 
				-<TT>Mkext</TT>
			
 
				-could then extract them.
			
 
				-Another alternative is to use
			
 
				-<I>dump9660</I>
			
 
				-(see
			
 
				-<A href="/magic/man2html/8/mk9660"><I>mk9660</I>(8)),
			
 
				-</A>which stores incremental backups on CD images
			
 
				-in the form of a dump hierarchy.
			
 
				-</P>
			
 
				-<P>
			
 
				-It is also possible to treat a regular disk, or even a part of a disk,
			
 
				-as a fake WORM, which can then be streamed to tape when it fills.
			
 
				-This is a bad idea for a production system but a good way to
			
 
				-learn about the WORM software.
			
 
				-Again, see
			
 
				-<A href="/magic/man2html/8/fsconfig"><I>fsconfig</I>(8)
			
 
				-</A>for details.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/prog4.html
+++ b/sys/doc/prog4.html
@@ -1,642 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Changes to the Programming Environment
			
 
				-<br>
			
 
				-in the
			
 
				-<br>
			
 
				-Fourth Release of Plan 9
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-<br>&#32;<br>
			
 
				-rob@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-The fourth release of Plan 9 includes changes at many levels of the system,
			
 
				-with repercussions in the libraries and program interfaces.
			
 
				-This document summarizes the changes and describes how
			
 
				-existing programs must be modified to run in the new release.
			
 
				-It is not exhaustive, of course; for further detail about any of the
			
 
				-topics refer to the manual pages, as always.
			
 
				-</P>
			
 
				-<P>
			
 
				-Programmers new to Plan 9 may find valuable tidbits here, but the
			
 
				-real audience for this paper is those with a need to update applications
			
 
				-and servers written in C for earlier releases of the Plan 9 operating system.
			
 
				-</P>
			
 
				-<H4>9P, NAMELEN, and strings
			
 
				-</H4>
			
 
				-<P>
			
 
				-The underlying file service protocol for Plan 9, 9P, retains its basic form
			
 
				-but has had a number of adjustments to deal with longer file names and error strings,
			
 
				-new authentication mechanisms, and to make it more efficient at
			
 
				-evaluating file names.
			
 
				-The change to file names affects a number of system interfaces;
			
 
				-because file name elements are no longer of fixed size, they can
			
 
				-no longer be stored as arrays.
			
 
				-</P>
			
 
				-<P>
			
 
				-9P used to be a fixed-format protocol with
			
 
				-<TT>NAMELEN</TT>-sized
			
 
				-byte arrays representing file name elements.
			
 
				-Now, it is a variable-format protocol, as described in
			
 
				-<A href="/magic/man2html/5/intro"><I>intro</I>(5),
			
 
				-</A>in which strings are represented by a count followed by that many bytes.
			
 
				-Thus, the string
			
 
				-<TT>ken</TT>
			
 
				-would previously have occupied 28
			
 
				-(<TT>NAMELEN</TT>)
			
 
				-bytes in the message; now it occupies 5: a two-byte count followed by the three bytes of
			
 
				-<TT>ken</TT>
			
 
				-and no terminal zero.
			
 
				-(And of course, a name could now be much longer.)
			
 
				-A similar format change has been made to
			
 
				-<TT>stat</TT>
			
 
				-buffers: they are no longer
			
 
				-<TT>DIRLEN</TT>
			
 
				-bytes long but instead have variable size prefixed by a two-byte count.
			
 
				-And in fact the entire 9P message syntax has changed: every message
			
 
				-now begins with a message length field that makes it trivial to break the
			
 
				-string into messages without parsing them, so
			
 
				-<TT>aux/fcall</TT>
			
 
				-is gone.
			
 
				-A new library entry point,
			
 
				-<TT>read9pmsg</TT>,
			
 
				-makes it easy for user-level servers to break the client data stream into 9P messages.
			
 
				-All servers should switch from using
			
 
				-<TT>read</TT>
			
 
				-(or the now gone
			
 
				-<TT>getS)</TT>
			
 
				-to using
			
 
				-<TT>read9pmsg</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-This change to 9P affects the way strings are handled by the kernel and throughout
			
 
				-the system.
			
 
				-The consequences are primarily that fixed-size arrays have been replaced
			
 
				-by pointers and counts in a variety of system interfaces.
			
 
				-Most programs will need at least some adjustment to the new style.
			
 
				-In summary:
			
 
				-<TT>NAMELEN</TT>
			
 
				-is gone, except as a vestige in the authentication libraries, where it has been
			
 
				-rechristened
			
 
				-<TT>ANAMELEN</TT>.
			
 
				-<TT>DIRLEN</TT>
			
 
				-and
			
 
				-<TT>ERRLEN</TT>
			
 
				-are also gone.
			
 
				-All programs that mention
			
 
				-these constants
			
 
				-will need to be fixed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The simplest place to see this change is in the
			
 
				-<TT>errstr</TT>
			
 
				-system call, which no longer assumes a buffer of length
			
 
				-<TT>ERRLEN</TT>
			
 
				-but now requires a byte-count argument:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-char buf[...];
			
 
				-
			
 
				-errstr(buf, sizeof buf);
			
 
				-</PRE></TT></DL>
			
 
				-The buffer can be any size you like.
			
 
				-For convenience, the kernel stores error strings internally as 256-byte arrays,
			
 
				-so if you like &#173; but it's not required &#173; you can use the defined constant
			
 
				-<TT>ERRMAX=</TT>256
			
 
				-as a good buffer size.
			
 
				-Unlike the old
			
 
				-<TT>ERRLEN</TT>
			
 
				-(which had value 64),
			
 
				-<TT>ERRMAX</TT>
			
 
				-is advisory, not mandatory, and is not part of the 9P specification.
			
 
				-</P>
			
 
				-<P>
			
 
				-With names, stat buffers, and directories, there isn't even an echo of a fixed-size array any more.
			
 
				-</P>
			
 
				-<H4>Directories and wait messages
			
 
				-</H4>
			
 
				-<P>
			
 
				-With strings now variable-length, a number of system calls needed to change:
			
 
				-<TT>errstr</TT>,
			
 
				-<TT>stat</TT>,
			
 
				-<TT>fstat</TT>,
			
 
				-<TT>wstat</TT>,
			
 
				-<TT>fwstat</TT>,
			
 
				-and
			
 
				-<TT>wait</TT>
			
 
				-are all affected, as is
			
 
				-<TT>read</TT>
			
 
				-when applied to directories.
			
 
				-</P>
			
 
				-<P>
			
 
				-As far as directories are concerned, most programs don't use the system calls
			
 
				-directly anyway, since they operate on the machine-independent form, but
			
 
				-instead call the machine-dependent
			
 
				-<TT>Dir</TT>
			
 
				-routines
			
 
				-<TT>dirstat</TT>,
			
 
				-<TT>dirread</TT>,
			
 
				-etc.
			
 
				-These used to fill user-provided fixed-size buffers; now they return objects allocated
			
 
				-by
			
 
				-<TT>malloc</TT>
			
 
				-(which must therefore be freed after use).
			
 
				-To `stat' a file:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Dir *d;
			
 
				-
			
 
				-d = dirstat(filename);
			
 
				-if(d == nil){
			
 
				-	fprint(2, "can't stat %s: %r\n", filename);
			
 
				-	exits("stat");
			
 
				-}
			
 
				-use(d);
			
 
				-free(d);
			
 
				-</PRE></TT></DL>
			
 
				-A common new bug is to forget to free a
			
 
				-<TT>Dir</TT>
			
 
				-returned by
			
 
				-<TT>dirstat</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Dirfstat</TT>
			
 
				-and
			
 
				-<TT>Dirfwstat</TT>
			
 
				-work pretty much as before, but changes to 9P make
			
 
				-it possible to exercise finer-grained control on what fields
			
 
				-of the
			
 
				-<TT>Dir</TT>
			
 
				-are to be changed; see
			
 
				-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/5/stat"><I>stat</I>(5)
			
 
				-</A>for details.
			
 
				-</P>
			
 
				-<P>
			
 
				-Reading a directory works in a similar way to
			
 
				-<TT>dirstat</TT>,
			
 
				-with
			
 
				-<TT>dirread</TT>
			
 
				-allocating and filling in an array of
			
 
				-<TT>Dir</TT>
			
 
				-structures.
			
 
				-The return value is the number of elements of the array.
			
 
				-The arguments to
			
 
				-<TT>dirread</TT>
			
 
				-now include a pointer to a
			
 
				-<TT>Dir*</TT>
			
 
				-to be filled in with the address of the allocated array:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Dir *d;
			
 
				-int i, n;
			
 
				-
			
 
				-while((n = dirread(fd, &amp;d)) &gt; 0){
			
 
				-	for(i=0; i&lt;n; i++)
			
 
				-		use(&amp;d[i]);
			
 
				-	free(d);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-A new library function,
			
 
				-<TT>dirreadall</TT>,
			
 
				-has the same form as
			
 
				-<TT>dirread</TT>
			
 
				-but returns the entire directory in one call:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n = dirreadall(fd, &amp;d)
			
 
				-for(i=0; i&lt;n; i++)
			
 
				-	use(&amp;d[i]);
			
 
				-free(d);
			
 
				-</PRE></TT></DL>
			
 
				-If your program insists on using the underlying
			
 
				-<TT>stat</TT>
			
 
				-system call or its relatives, or wants to operate directly on the
			
 
				-machine-independent format returned by
			
 
				-<TT>stat</TT>
			
 
				-or
			
 
				-<TT>read</TT>,
			
 
				-it will need to be modified.
			
 
				-Such programs are rare enough that we'll not discuss them here beyond referring to
			
 
				-the man page
			
 
				-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
			
 
				-</A>for details.
			
 
				-Be aware, though, that it used to be possible to regard the buffer returned by
			
 
				-<TT>stat</TT>
			
 
				-as a byte array that began with the zero-terminated
			
 
				-name of the file; this is no longer true.
			
 
				-With very rare exceptions, programs that call
			
 
				-<TT>stat</TT>
			
 
				-would be better recast to use the
			
 
				-<TT>dir</TT>
			
 
				-routines or, if their goal is just to test the existence of a file,
			
 
				-<TT>access</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Similar changes have affected the
			
 
				-<TT>wait</TT>
			
 
				-system call.  In fact,
			
 
				-<TT>wait</TT>
			
 
				-is no longer a system call but a library routine that calls the new
			
 
				-<TT>await</TT>
			
 
				-system call and returns a newly allocated machine-dependent
			
 
				-<TT>Waitmsg</TT>
			
 
				-structure:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Waitmsg *w;
			
 
				-
			
 
				-w = wait();
			
 
				-if(w == nil)
			
 
				-	error("wait: %r");
			
 
				-print("pid is %d; exit string %s\n", w-&gt;pid, w-&gt;msg);
			
 
				-free(w);
			
 
				-</PRE></TT></DL>
			
 
				-The exit string
			
 
				-<TT>w-&gt;msg</TT>
			
 
				-may be empty but it will never be a nil pointer.
			
 
				-Again, don't forget to free the structure returned by
			
 
				-<TT>wait</TT>.
			
 
				-If all you need is the pid, you can call
			
 
				-<TT>waitpid</TT>,
			
 
				-which reports just the pid and doesn't return an allocated structure:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int pid;
			
 
				-
			
 
				-pid = waitpid();
			
 
				-if(pid &lt; 0)
			
 
				-	error("wait: %r");
			
 
				-print("pid is %d\n", pid);
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Quoted strings and tokenize
			
 
				-</H4>
			
 
				-<P>
			
 
				-<TT>Wait</TT>
			
 
				-gives us a good opportunity to describe how the system copes with all this
			
 
				-free-format data.
			
 
				-Consider the text returned by the
			
 
				-<TT>await</TT>
			
 
				-system call, which includes a set of integers (pids and times) and a string (the exit status).
			
 
				-This information is formatted free-form; here is the statement in the kernel that
			
 
				-generates the message:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-n = snprint(a, n, "%d %lud %lud %lud %q",
			
 
				-	wq-&gt;w.pid,
			
 
				-	wq-&gt;w.time[TUser], wq-&gt;w.time[TSys], wq-&gt;w.time[TReal],
			
 
				-	wq-&gt;w.msg);
			
 
				-</PRE></TT></DL>
			
 
				-Note the use of
			
 
				-<TT>%q</TT>
			
 
				-to produce a quoted-string representation of the exit status.
			
 
				-The
			
 
				-<TT>%q</TT>
			
 
				-format is like %s but will wrap
			
 
				-<TT>rc</TT>-style
			
 
				-single quotes around the string if it contains white space or is otherwise ambiguous.
			
 
				-The library routine
			
 
				-<TT>tokenize</TT>
			
 
				-can be used to parse data formatted this way: it splits white-space-separated
			
 
				-fields but understands the
			
 
				-<TT>%q</TT>
			
 
				-quoting conventions.
			
 
				-Here is how the
			
 
				-<TT>wait</TT>
			
 
				-library routine builds its
			
 
				-<TT>Waitmsg</TT>
			
 
				-from the data returned by
			
 
				-<TT>await</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Waitmsg*
			
 
				-wait(void)
			
 
				-{
			
 
				-	int n, l;
			
 
				-	char buf[512], *fld[5];
			
 
				-	Waitmsg *w;
			
 
				-
			
 
				-	n = await(buf, sizeof buf-1);
			
 
				-	if(n &lt; 0)
			
 
				-		return nil;
			
 
				-	buf[n] = ' ';
			
 
				-	if(tokenize(buf, fld, nelem(fld)) != nelem(fld)){
			
 
				-		werrstr("couldn't parse wait message");
			
 
				-		return nil;
			
 
				-	}
			
 
				-	l = strlen(fld[4])+1;
			
 
				-	w = malloc(sizeof(Waitmsg)+l);
			
 
				-	if(w == nil)
			
 
				-		return nil;
			
 
				-	w-&gt;pid = atoi(fld[0]);
			
 
				-	w-&gt;time[0] = atoi(fld[1]);
			
 
				-	w-&gt;time[1] = atoi(fld[2]);
			
 
				-	w-&gt;time[2] = atoi(fld[3]);
			
 
				-	w-&gt;msg = (char*)&amp;w[1];
			
 
				-	memmove(w-&gt;msg, fld[4], l);
			
 
				-	return w;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-This style of quoted-string and
			
 
				-<TT>tokenize</TT>
			
 
				-is used all through the system now.
			
 
				-In particular, devices now
			
 
				-<TT>tokenize</TT>
			
 
				-the messages written to their
			
 
				-<TT>ctl</TT>
			
 
				-files, which means that you can send messages that contain white space, by quoting them,
			
 
				-and that you no longer need to worry about whether or not the device accepts a newline.
			
 
				-In other words, you can say
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo message &gt; /dev/xx/ctl
			
 
				-</PRE></TT></DL>
			
 
				-instead of
			
 
				-<TT>echo</TT>
			
 
				-<TT>-n</TT>
			
 
				-because
			
 
				-<TT>tokenize</TT>
			
 
				-treats the newline character as white space and discards it.
			
 
				-</P>
			
 
				-<P>
			
 
				-While we're on the subject of quotes and strings, note that the implementation of
			
 
				-<TT>await</TT>
			
 
				-used
			
 
				-<TT>snprint</TT>
			
 
				-rather than
			
 
				-<TT>sprint</TT>.
			
 
				-We now deprecate
			
 
				-<TT>sprint</TT>
			
 
				-because it has no protection against buffer overflow.
			
 
				-We prefer
			
 
				-<TT>snprint</TT>
			
 
				-or
			
 
				-<TT>seprint</TT>,
			
 
				-to constrain the output.
			
 
				-The
			
 
				-<TT>%q</TT>
			
 
				-format is cleverer than most in this regard:
			
 
				-if the string is too long to be represented in full,
			
 
				-<TT>%q</TT>
			
 
				-is smart enough to produce a truncated but correctly quoted
			
 
				-string within the available space.
			
 
				-</P>
			
 
				-<H4>Mount
			
 
				-</H4>
			
 
				-<P>
			
 
				-Although strings in 9P are now variable-length and not zero-terminated,
			
 
				-this has little direct effect in most of the system interfaces.
			
 
				-File and user names are still zero-terminated strings as always;
			
 
				-the kernel does the work of translating them as necessary for
			
 
				-transport.
			
 
				-And of course, they are now free to be as long as you might want;
			
 
				-the only hard limit is that their length must be represented in 16 bits.
			
 
				-</P>
			
 
				-<P>
			
 
				-One example where this matters is that the file system specification in the
			
 
				-<TT>mount</TT>
			
 
				-system call can now be much longer.
			
 
				-Programs like
			
 
				-<TT>rio</TT>
			
 
				-that used the specification string in creative ways were limited by the
			
 
				-<TT>NAMELEN</TT>
			
 
				-restriction; now they can use the string more freely.
			
 
				-<TT>Rio</TT>
			
 
				-now accepts a simple but less cryptic specification language for the window
			
 
				-to be created by the
			
 
				-<TT>mount</TT>
			
 
				-call, e.g.:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-% mount <I>wsys /mnt/wsys 'new -dx 250 -dy 250 -pid 1234'
			
 
				-</PRE></TT></DL>
			
 
				-In the old system, this sort of control was impossible through the
			
 
				-</I><TT>mount</TT><I>
			
 
				-interface.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-While we're on the subject of
			
 
				-<TT>mount</TT>,
			
 
				-note that with the new security architecture
			
 
				-(see
			
 
				-<A href="/magic/man2html/4/factotum"><I>factotum</I>(4)),
			
 
				-</A>9P has moved its authentication outside the protocol proper.
			
 
				-(For a full description of this change to 9P, see
			
 
				-<A href="/magic/man2html/2/fauth"><I>fauth</I>(2),
			
 
				-</A><A href="/magic/man2html/5/attach"><I>attach</I>(5),
			
 
				-</A>and the paper
			
 
				-<I>Security in Plan 9</I>.)<I>
			
 
				-The most explicit effect of this change is that
			
 
				-</I><TT>mount</TT><I>
			
 
				-now takes another argument,
			
 
				-</I><TT>afd</TT><I>,
			
 
				-a file descriptor for the
			
 
				-authentication file through which the authentication will be made.
			
 
				-For most user-level file servers, which do not require authentication, it is
			
 
				-sufficient to provide
			
 
				-</I><TT>-1</TT><I>
			
 
				-as the value of
			
 
				-</I><TT>afd:</TT><I>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if(mount(fd, -1, "/mnt/wsys", MREPL,
			
 
				-   "new -dx 250 -dy 250 -pid 1234") &lt; 0)
			
 
				-	error("mount failed: %r");
			
 
				-</PRE></TT></DL>
			
 
				-To connect to servers that require authentication, use the new
			
 
				-</I><TT>fauth</TT><I>
			
 
				-system call or the reimplemented
			
 
				-</I><TT>amount</TT><I>
			
 
				-(authenticated mount) library call.
			
 
				-In fact, since
			
 
				-</I><TT>amount</TT><I>
			
 
				-handles both authenticating and non-authenticating servers, it is often
			
 
				-easiest just to replace calls to
			
 
				-</I><TT>mount</TT><I>
			
 
				-by calls to
			
 
				-</I><TT>amount</TT><I>;
			
 
				-see
			
 
				-<A href="/magic/man2html/2/auth"></I><I>auth</I><I>(2)
			
 
				-</A>for details.
			
 
				-</P>
			
 
				-</I><H4>Print
			
 
				-</H4>
			
 
				-<P>
			
 
				-The C library has been heavily reworked in places.
			
 
				-Besides the changes mentioned above, it
			
 
				-now has a much more complete set of routines for handling
			
 
				-<TT>Rune</TT>
			
 
				-strings (that is, zero-terminated arrays of 16-bit character values).
			
 
				-The most sweeping changes, however, are in the way formatted I/O is performed.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>print</TT>
			
 
				-routine and all its relatives have been reimplemented to offer a number
			
 
				-of improvements:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>(1)<DD>
			
 
				-Better buffer management, including the provision of an internal flush
			
 
				-routine, makes it unnecessary to provide large buffers.
			
 
				-For example,
			
 
				-<TT>print</TT>
			
 
				-uses a much smaller buffer now (reducing stack load) while simultaneously
			
 
				-removing the need to truncate the output string if it doesn't fit in the buffer.
			
 
				-<DT>(2)<DD>
			
 
				-Global variables have been eliminated so no locking is necessary.
			
 
				-<DT>(3)<DD>
			
 
				-The combination of (1) and (2) means that the standard implementation of
			
 
				-<TT>print</TT>
			
 
				-now works fine in threaded programs, and
			
 
				-<TT>threadprint</TT>
			
 
				-is gone.
			
 
				-<DT>(4)<DD>
			
 
				-The new routine
			
 
				-<TT>smprint</TT>
			
 
				-prints into, and returns, storage allocated on demand by
			
 
				-<TT>malloc</TT>.
			
 
				-<DT>(5)<DD>
			
 
				-It is now possible to print into a
			
 
				-<TT>Rune</TT>
			
 
				-string; for instance,
			
 
				-<TT>runesmprint</TT>
			
 
				-is the
			
 
				-<TT>Rune</TT>
			
 
				-analog of
			
 
				-<TT>smprint</TT>.
			
 
				-<DT>(6)<DD>
			
 
				-There is improved support for custom
			
 
				-print verbs and custom output routines such as error handlers.
			
 
				-The routine
			
 
				-<TT>doprint</TT>
			
 
				-is gone, but
			
 
				-<TT>vseprint</TT>
			
 
				-can always be used instead.
			
 
				-However, the new routines
			
 
				-<TT>fmtfdinit</TT>,
			
 
				-<TT>fmtstrinit</TT>,
			
 
				-<TT>fmtprint</TT>,
			
 
				-and friends
			
 
				-are often a better replacement.
			
 
				-The details are too long for exposition here;
			
 
				-<A href="/magic/man2html/2/fmtinstall"><I>fmtinstall</I>(2)
			
 
				-</A>explains the new interface and provides examples.
			
 
				-<DT>(7)<DD>
			
 
				-Two new format flags, space and comma, close somewhat the gap between
			
 
				-Plan 9 and ANSI C.
			
 
				-</dl>
			
 
				-<P>
			
 
				-Despite these changes, most programs will be unaffected;
			
 
				-<TT>print</TT>
			
 
				-is still
			
 
				-<TT>print</TT>.
			
 
				-Don't forget, though, that
			
 
				-you should eliminate calls to
			
 
				-<TT>sprint</TT>
			
 
				-and use the
			
 
				-<TT>%q</TT>
			
 
				-format when appropriate.
			
 
				-</P>
			
 
				-<H4>Binary compatibility
			
 
				-</H4>
			
 
				-<P>
			
 
				-The discussion so far has been about changes at the source level.
			
 
				-Existing binaries will probably run without change in the new
			
 
				-environment, since the kernel provides backward-compatible
			
 
				-system calls for
			
 
				-<TT>errstr</TT>,
			
 
				-<TT>stat</TT>,
			
 
				-<TT>wait</TT>,
			
 
				-etc.
			
 
				-The only exceptions are programs that do either a
			
 
				-<TT>mount</TT>
			
 
				-system call, because of the security changes and because
			
 
				-the file descriptor in
			
 
				-<TT>mount</TT>
			
 
				-must point to a new 9P connection; or a
			
 
				-<TT>read</TT>
			
 
				-system call on a directory, since the returned data will
			
 
				-be in the new format.
			
 
				-A moment's reflection will discover that this means old
			
 
				-user-level file servers will need to be fixed to run on the new system.
			
 
				-</P>
			
 
				-<H4>File servers
			
 
				-</H4>
			
 
				-<P>
			
 
				-A full description of what user-level servers must do to provide service with
			
 
				-the new 9P is beyond the scope of this paper.
			
 
				-Your best source of information is section 5 of the manual,
			
 
				-combined with study of a few examples.
			
 
				-<TT>/sys/src/cmd/ramfs.c</TT>
			
 
				-is a simple example; it has a counterpart
			
 
				-<TT>/sys/src/lib9p/ramfs.c</TT>
			
 
				-that implements the same service using the new
			
 
				-<A href="/magic/man2html/2/9p"><I>9p</I>(2)
			
 
				-</A>library.
			
 
				-</P>
			
 
				-<P>
			
 
				-That said, it's worth summarizing what to watch for when converting a file server.
			
 
				-The
			
 
				-<TT>session</TT>
			
 
				-message is gone, and there is a now a
			
 
				-<TT>version</TT>
			
 
				-message that is exchanged at the start of a connection to establish
			
 
				-the version of the protocol to use (there's only one at the moment, identified by
			
 
				-the string
			
 
				-<TT>9P2000</TT>)
			
 
				-and what the maximum message size will be.
			
 
				-This negotiation makes it easier to handle 9P encapsulation, such as with
			
 
				-<TT>exportfs</TT>,
			
 
				-and also permits larger message sizes when appropriate.
			
 
				-</P>
			
 
				-<P>
			
 
				-If your server wants to authenticate, it will need to implement an authentication file
			
 
				-and implement the
			
 
				-<TT>auth</TT>
			
 
				-message; otherwise it should return a helpful error string to the
			
 
				-<TT>Tauth</TT>
			
 
				-request to signal that authentication is not required.
			
 
				-</P>
			
 
				-<P>
			
 
				-The handling of
			
 
				-<TT>stat</TT>
			
 
				-and directory reads will require some changes but they should not be fundamental.
			
 
				-Be aware that seeking on directories is forbidden, so it is fine if you disregard the
			
 
				-file offset when implementing directory reads; this makes it a little easier to handle
			
 
				-the variable-length entries.
			
 
				-You should still never return a partial directory entry; if the I/O count is too small
			
 
				-to return even one entry, you should return two bytes containing the byte count
			
 
				-required to represent the next entry in the directory.
			
 
				-User code can use this value to formulate a retry if it desires.
			
 
				-See the
			
 
				-DIAGNOSTICS section of
			
 
				-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
			
 
				-</A>for a description of this process.
			
 
				-</P>
			
 
				-<P>
			
 
				-The trickiest part of updating a file server is that the
			
 
				-<TT>clone</TT>
			
 
				-and
			
 
				-<TT>walk</TT>
			
 
				-messages have been merged into a single message, a sort of `clone-multiwalk'.
			
 
				-The new message, still called
			
 
				-<TT>walk</TT>,
			
 
				-proposes a sequence of file name elements to be evaluated using a possibly
			
 
				-cloned fid.
			
 
				-The return message contains the qids of the files reached by
			
 
				-walking to the sequential elements.
			
 
				-If all the elements can be walked, the fid will be cloned if requested.
			
 
				-If a non-zero number of elements are requested, but none
			
 
				-can be walked, an error should be returned.
			
 
				-If only some can be walked, the fid is not cloned, the original fid is left
			
 
				-where it was, and the returned
			
 
				-<TT>Rwalk</TT>
			
 
				-message should contain the partial list of successfully reached qids.
			
 
				-See
			
 
				-<A href="/magic/man2html/5/walk"><I>walk</I>(5)
			
 
				-</A>for a full description.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/rc.html
+++ b/sys/doc/rc.html
@@ -1,1668 +0,0 @@
 
				-<html>
			
 
				-
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Rc &#173; The Plan 9 Shell
			
 
				-</H1>
			
 
				-<DL><DD><I>Tom Duff<br>
			
 
				-td@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<I>Rc</I>
			
 
				-is a command interpreter for Plan 9 that
			
 
				-provides similar facilities to UNIX's
			
 
				-Bourne shell,
			
 
				-with some small additions and less idiosyncratic syntax.
			
 
				-This paper uses numerous examples to describe
			
 
				-<I>rc</I>'s
			
 
				-features, and contrasts
			
 
				-<I>rc</I>
			
 
				-with the Bourne shell, a model that many readers will be familiar with.
			
 
				-</DL>
			
 
				-<H4>1 Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-is similar in spirit but different in detail from UNIX's
			
 
				-Bourne shell.  This paper describes
			
 
				-<I>rc</I>'s
			
 
				-principal features with many small examples and a few larger ones.
			
 
				-It assumes familiarity with the Bourne shell.
			
 
				-</P>
			
 
				-<H4>2 Simple commands
			
 
				-</H4>
			
 
				-<P>
			
 
				-For the simplest uses
			
 
				-<I>rc</I>
			
 
				-has syntax familiar to Bourne-shell users.
			
 
				-All of the following behave as expected:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-date
			
 
				-cat /lib/news/build
			
 
				-who &gt;user.names
			
 
				-who &gt;&gt;user.names
			
 
				-wc &lt;file
			
 
				-echo [a-f]*.c
			
 
				-who | wc
			
 
				-who; date
			
 
				-vc *.c &amp;
			
 
				-mk &amp;&amp; v.out /*/bin/fb/*
			
 
				-rm -r junk || echo rm failed!
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>3 Quotation
			
 
				-</H4>
			
 
				-<P>
			
 
				-An argument that contains a space or one of
			
 
				-<I>rc</I>'s
			
 
				-other syntax characters must be enclosed in apostrophes
			
 
				-(<TT>'</TT>):
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-rm 'odd file name'
			
 
				-</PRE></TT></DL>
			
 
				-An apostrophe in a quoted argument must be doubled:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo 'How''s your father?'
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>4 Patterns
			
 
				-</H4>
			
 
				-<P>
			
 
				-An unquoted argument that contains any of the characters
			
 
				-<TT>*</TT>
			
 
				-<TT>?</TT>
			
 
				-<TT>[</TT>
			
 
				-is a pattern to be matched against file names.
			
 
				-A
			
 
				-<TT>*</TT>
			
 
				-character matches any sequence of characters,
			
 
				-<TT>?</TT>
			
 
				-matches any single character, and
			
 
				-<TT>[</TT><I>class</I><TT>]</TT><I>
			
 
				-matches any character in the
			
 
				-</I><TT>class</TT><I>,
			
 
				-unless the first character of
			
 
				-</I><I>class</I><I>
			
 
				-is
			
 
				-</I><TT>~</TT><I>,
			
 
				-in which case the class is complemented.
			
 
				-The
			
 
				-</I><I>class</I><I>
			
 
				-may also contain pairs of characters separated by
			
 
				-</I><TT>-</TT><I>,
			
 
				-standing for all characters lexically between the two.
			
 
				-The character
			
 
				-</I><TT>/</TT><I>
			
 
				-must appear explicitly in a pattern, as must the path name components
			
 
				-</I><TT>.</TT><I>
			
 
				-and
			
 
				-</I><TT>..</TT><I>.
			
 
				-A pattern is replaced by a list of arguments, one for each path name matched,
			
 
				-except that a pattern matching no names is not replaced by the empty list;
			
 
				-rather it stands for itself.
			
 
				-</P>
			
 
				-</I><H4>5 Variables
			
 
				-</H4>
			
 
				-<P>
			
 
				-UNIX's Bourne shell offers string-valued variables.
			
 
				-<I>Rc</I>
			
 
				-provides variables whose values are lists of arguments &#173;
			
 
				-that is, arrays of strings.  This is the principal difference
			
 
				-between
			
 
				-<I>rc</I>
			
 
				-and traditional UNIX command interpreters.
			
 
				-Variables may be given values by typing, for example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-path=(. /bin)
			
 
				-user=td
			
 
				-font=/lib/font/bit/pelm/ascii.9.font
			
 
				-</PRE></TT></DL>
			
 
				-The parentheses indicate that the value assigned to
			
 
				-<TT>path</TT>
			
 
				-is a list of two strings. The variables
			
 
				-<TT>user</TT>
			
 
				-and
			
 
				-<TT>font</TT>
			
 
				-are assigned lists containing a single string.
			
 
				-</P>
			
 
				-<P>
			
 
				-The value of a variable can be substituted into a command by
			
 
				-preceding its name with a
			
 
				-<TT></TT><I></I><TT>,
			
 
				-like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo </TT>path
			
 
				-</PRE></TT></DL>
			
 
				-If
			
 
				-<TT>path</TT>
			
 
				-had been set as above, this would be equivalent to
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo . /bin
			
 
				-</PRE></TT></DL>
			
 
				-Variables may be subscripted by numbers or lists of numbers,
			
 
				-like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo <I>path(2)
			
 
				-echo </I>path(2 1 2)
			
 
				-</PRE></TT></DL>
			
 
				-These are equivalent to
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo /bin
			
 
				-echo /bin . /bin
			
 
				-</PRE></TT></DL>
			
 
				-There can be no space separating the variable's name from the
			
 
				-left parenthesis; otherwise, the subscript would be considered
			
 
				-a separate parenthesized list.
			
 
				-</P>
			
 
				-<P>
			
 
				-The number of strings in a variable can be determined by the
			
 
				-<TT></TT><I>#</I><TT>
			
 
				-operator.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo </TT>#path
			
 
				-</PRE></TT></DL>
			
 
				-would print 2 for this example.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following two assignments are subtly different:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-empty=()
			
 
				-null=''
			
 
				-</PRE></TT></DL>
			
 
				-The first sets
			
 
				-<TT>empty</TT>
			
 
				-to a list containing no strings.
			
 
				-The second sets
			
 
				-<TT>null</TT>
			
 
				-to a list containing a single string,
			
 
				-but the string contains no characters.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although these may seem like more or less
			
 
				-the same thing (in Bourne's shell, they are
			
 
				-indistinguishable), they behave differently
			
 
				-in almost all circumstances.
			
 
				-Among other things
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo <I>#empty
			
 
				-</PRE></TT></DL>
			
 
				-prints 0, whereas
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo </I>#null
			
 
				-</PRE></TT></DL>
			
 
				-prints 1.
			
 
				-</P>
			
 
				-<P>
			
 
				-All variables that have never been set have the value
			
 
				-<TT>()</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Occasionally, it is convenient to treat a variable's value
			
 
				-as a single string.  The elements of a string are concatenated
			
 
				-into a single string, with spaces between the elements, by
			
 
				-the
			
 
				-<TT></TT><I>"</I><TT>
			
 
				-operator.
			
 
				-Thus, if we set
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-list=(How now brown cow)
			
 
				-string=</TT>"list
			
 
				-</PRE></TT></DL>
			
 
				-then both
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo <I>list
			
 
				-</PRE></TT></DL>
			
 
				-and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo </I>string
			
 
				-</PRE></TT></DL>
			
 
				-cause the same output, viz:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-How now brown cow
			
 
				-</PRE></TT></DL>
			
 
				-but
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo <I>#list </I>#string
			
 
				-</PRE></TT></DL>
			
 
				-will output
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-4 1
			
 
				-</PRE></TT></DL>
			
 
				-because
			
 
				-<TT></TT><I>list</I><TT>
			
 
				-has four members, but
			
 
				-</TT><TT></TT><TT>string</TT><TT>
			
 
				-has a single member, with three spaces separating its words.
			
 
				-</P>
			
 
				-</TT><H4>6 Arguments
			
 
				-</H4>
			
 
				-<P>
			
 
				-When
			
 
				-<I>rc</I>
			
 
				-is reading its input from a file, the file has access
			
 
				-to the arguments supplied on
			
 
				-<I>rc</I>'s
			
 
				-command line.  The variable
			
 
				-<TT></TT><I>*</I><TT>
			
 
				-initially has the list of arguments assigned to it.
			
 
				-The names
			
 
				-</TT><TT></TT><TT>1</TT><TT>,
			
 
				-</TT><TT></TT><I>2</I><TT>,
			
 
				-etc. are synonyms for
			
 
				-</TT><TT></TT><TT>*(1)</TT><TT>,
			
 
				-</TT><TT></TT><I>*(2)</I><TT>,
			
 
				-etc.
			
 
				-In addition,
			
 
				-</TT><TT></TT><TT>0</TT><TT>
			
 
				-is the name of the file from which
			
 
				-</TT><I>rc</I><TT>'s
			
 
				-input is being read.
			
 
				-</P>
			
 
				-</TT><H4>7 Concatenation
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-has a string concatenation operator, the caret 
			
 
				-<TT>^</TT>,
			
 
				-to build arguments out of pieces.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo hully^gully
			
 
				-</PRE></TT></DL>
			
 
				-is exactly equivalent to
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo hullygully
			
 
				-</PRE></TT></DL>
			
 
				-Suppose variable
			
 
				-<TT>i</TT>
			
 
				-contains the name of a command.
			
 
				-Then
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc <I>i^.c
			
 
				-vl -o </I>1 <I>i^.v
			
 
				-</PRE></TT></DL>
			
 
				-might compile the command's source code, leaving the
			
 
				-result in the appropriate file.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Concatenation distributes over lists. The following
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo (a b c)^(1 2 3)
			
 
				-src=(main subr io)
			
 
				-cc src^.c
			
 
				-</PRE></TT></DL>
			
 
				-are equivalent to
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo a1 b2 c3
			
 
				-cc main.c subr.c io.c
			
 
				-</PRE></TT></DL>
			
 
				-In detail, the rule is: if both operands of
			
 
				-<TT>^</TT>
			
 
				-are lists of the same non-zero number of strings, they are concatenated
			
 
				-pairwise.  Otherwise, if one of the operands is a single string,
			
 
				-it is concatenated with each member of the other operand in turn.
			
 
				-Any other combination of operands is an error.
			
 
				-</P>
			
 
				-<H4>8 Free carets
			
 
				-</H4>
			
 
				-<P>
			
 
				-User demand has dictated that
			
 
				-<I>rc</I>
			
 
				-insert carets in certain places, to make the syntax
			
 
				-look more like the Bourne shell.  For example, this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cc -<I>flags </I>stems.c
			
 
				-</PRE></TT></DL>
			
 
				-is equivalent to
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cc -^<I>flags </I>stems^.c
			
 
				-</PRE></TT></DL>
			
 
				-In general,
			
 
				-<I>rc</I>
			
 
				-will insert
			
 
				-<TT>^</TT>
			
 
				-between two arguments that are not separated by white space.
			
 
				-Specifically, whenever one of
			
 
				-<TT></TT><I>'`</I><TT>
			
 
				-follows a quoted or unquoted word, or an unquoted word follows
			
 
				-a quoted word with no intervening blanks or tabs, an implicit
			
 
				-</TT><TT>^</TT><TT>
			
 
				-is inserted between the two.  If an unquoted word immediately following a
			
 
				-</TT><TT></TT><TT></TT><TT>
			
 
				-contains a character other than an alphanumeric, underscore or
			
 
				-</TT><TT>*</TT><TT>,
			
 
				-a
			
 
				-</TT><TT>^</TT><TT>
			
 
				-is inserted before the first such character.
			
 
				-</P>
			
 
				-</TT><H4>9 Command substitution
			
 
				-</H4>
			
 
				-<P>
			
 
				-It is often useful to build an argument list from the output of a command.
			
 
				-<I>Rc</I>
			
 
				-allows a command, enclosed in braces and preceded by a left quote,
			
 
				-<TT>`{...}</TT>,
			
 
				-anywhere that an argument is required.  The command is executed and its
			
 
				-standard output captured.
			
 
				-The characters stored in the variable
			
 
				-<TT>ifs</TT>
			
 
				-are used to split the output into arguments.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cat `{ls -tr|sed 10q}
			
 
				-</PRE></TT></DL>
			
 
				-will concatenate the ten oldest files in the current directory in temporal order, given the
			
 
				-default
			
 
				-<TT>ifs</TT>
			
 
				-setting of space, tab, and newline.
			
 
				-</P>
			
 
				-<H4>10 Pipeline branching
			
 
				-</H4>
			
 
				-<P>
			
 
				-The normal pipeline notation is general enough for almost all cases.
			
 
				-Very occasionally it is useful to have pipelines that are not linear.
			
 
				-Pipeline topologies more general than trees can require arbitrarily large pipe buffers,
			
 
				-or worse, can cause deadlock.
			
 
				-<I>Rc</I>
			
 
				-has syntax for some kinds of non-linear but treelike pipelines.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	cmp &lt;{old} &lt;{new}
			
 
				-</PRE></TT></DL>
			
 
				-will regression-test a new version of a command.
			
 
				-<TT>&lt;</TT>
			
 
				-or
			
 
				-<TT>&gt;</TT>
			
 
				-followed by a command in braces causes the command to be run with
			
 
				-its standard output or input attached to a pipe.  The parent command
			
 
				-(<TT>cmp</TT>
			
 
				-in the example)
			
 
				-is started with the other end of the pipe attached to some file descriptor
			
 
				-or other, and with an argument that will connect to the pipe when opened
			
 
				-(e.g.,
			
 
				-<TT>/dev/fd/6</TT>).
			
 
				-Some commands are unprepared to deal with input files that turn out not to be seekable.
			
 
				-For example
			
 
				-<TT>diff</TT>
			
 
				-needs to read its input twice.
			
 
				-</P>
			
 
				-<H4>11 Exit status
			
 
				-</H4>
			
 
				-<P>
			
 
				-When a command exits it returns status to the program that executed it.
			
 
				-On Plan 9 status is a character string describing an error condition.
			
 
				-On normal termination it is empty.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-captures command exit status in the variable
			
 
				-<TT></TT><I>status</I><TT>.
			
 
				-For a simple command the value of
			
 
				-</TT><TT></TT><TT>status</TT><TT>
			
 
				-is just as described above.  For a pipeline
			
 
				-</TT><TT></TT><I>status</I><TT>
			
 
				-is set to the concatenation of the statuses of the pipeline components with
			
 
				-</TT><TT>|</TT><TT>
			
 
				-characters for separators.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-<I>Rc</I>
			
 
				-has a several kinds of control flow,
			
 
				-many of them conditioned by the status returned from previously
			
 
				-executed commands.  Any
			
 
				-<TT></TT>status<TT>
			
 
				-containing only
			
 
				-</TT><TT>0</TT><TT>'s
			
 
				-and
			
 
				-</TT><TT>|</TT><TT>'s
			
 
				-has boolean value
			
 
				-</TT><I>true</I><TT>.
			
 
				-Any other status is
			
 
				-</TT><I>false</I><TT>.
			
 
				-</P>
			
 
				-</TT><H4>12 Command grouping
			
 
				-</H4>
			
 
				-<P>
			
 
				-A sequence of commands enclosed in
			
 
				-<TT>{}</TT>
			
 
				-may be used anywhere a command is required.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-{sleep 3600;echo 'Time''s up!'}&amp;
			
 
				-</PRE></TT></DL>
			
 
				-will wait an hour in the background, then print a message.
			
 
				-Without the braces,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-sleep 3600;echo 'Time''s up!'&amp;
			
 
				-</PRE></TT></DL>
			
 
				-would lock up the terminal for an hour,
			
 
				-then print the message in the background.
			
 
				-</P>
			
 
				-<H4>13 Control flow &#173; <TT>for</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-A command may be executed once for each member of a list
			
 
				-by typing, for example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i in printf scanf putchar) look <I>i /usr/td/lib/dw.dat
			
 
				-</PRE></TT></DL>
			
 
				-This looks for each of the words
			
 
				-</I><TT>printf</TT><I>,
			
 
				-</I><TT>scanf</TT><I>
			
 
				-and
			
 
				-</I><TT>putchar</TT><I>
			
 
				-in the given file.
			
 
				-The general form is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(</I><I>name</I><I> in </I><I>list</I><I>) </I><I>command</I><I>
			
 
				-</PRE></TT></DL>
			
 
				-or
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(</I><I>name</I><I>) </I><I>command</I><I>
			
 
				-</PRE></TT></DL>
			
 
				-In the first case
			
 
				-</I><I>command</I><I>
			
 
				-is executed once for each member of
			
 
				-</I><I>list</I><I>
			
 
				-with that member assigned to variable
			
 
				-</I><I>name</I><I>.
			
 
				-If the clause
			
 
				-``</I><TT>in</TT><I>
			
 
				-</I><I>list</I><I>''
			
 
				-is missing,
			
 
				-``</I><TT>in</TT><I>
			
 
				-</I><TT></TT><I>*</I><TT>''
			
 
				-is assumed.
			
 
				-</P>
			
 
				-</TT><H4>14 Conditional execution &#173; <TT>if</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-also provides a general if-statement.  For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i in *.c) if(cpp <I>i &gt;/tmp/</I>i) vc /tmp/<I>i
			
 
				-</PRE></TT></DL>
			
 
				-runs the C compiler on each C source program that
			
 
				-cpp processes without error.
			
 
				-An `if not' statement provides a two-tailed conditional.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i){
			
 
				-    if(test -f /tmp/</I>i) echo <I>i already in /tmp
			
 
				-    if not cp </I>i /tmp
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-This loops over each file in
			
 
				-<TT></TT><I>*</I><TT>,
			
 
				-copying to
			
 
				-</TT><TT>/tmp</TT><TT>
			
 
				-those that do not already appear there, and
			
 
				-printing a message for those that do.
			
 
				-</P>
			
 
				-</TT><H4>15 Control flow &#173; <TT>while</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>'s
			
 
				-while statement looks like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-while(newer subr.v subr.c) sleep 5
			
 
				-</PRE></TT></DL>
			
 
				-This waits until
			
 
				-<TT>subr.v</TT>
			
 
				-is newer than
			
 
				-<TT>subr.c</TT>,
			
 
				-presumably because the C compiler finished with it.
			
 
				-</P>
			
 
				-<P>
			
 
				-If the controlling command is empty, the loop will not terminate.
			
 
				-Thus,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-while() echo y
			
 
				-</PRE></TT></DL>
			
 
				-emulates the
			
 
				-<I>yes</I>
			
 
				-command.
			
 
				-</P>
			
 
				-<H4>16 Control flow &#173; <TT>switch</TT>
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-provides a switch statement to do pattern-matching on
			
 
				-arbitrary strings.  Its general form is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-switch(<I>word</I>){
			
 
				-case <I>pattern ...</I>
			
 
				-    <I>commands</I>
			
 
				-case <I>pattern ...</I>
			
 
				-    <I>commands</I>
			
 
				-...
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<I>Rc</I>
			
 
				-attempts to match the word against the patterns in each case statement in turn.
			
 
				-Patterns are the same as for filename matching, except that
			
 
				-<TT>/</TT>
			
 
				-and
			
 
				-<TT>.</TT>
			
 
				-and
			
 
				-<TT>..</TT>
			
 
				-need not be matched explicitly.
			
 
				-</P>
			
 
				-<P>
			
 
				-If any pattern matches, the
			
 
				-commands following that case up to
			
 
				-the next case (or the end of the switch)
			
 
				-are executed, and execution of the switch
			
 
				-is complete.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-switch(#*){
			
 
				-case 1
			
 
				-    cat &gt;&gt;<I>1
			
 
				-case 2
			
 
				-    cat &gt;&gt;</I>2 &lt;<I>1
			
 
				-case *
			
 
				-    echo 'Usage: append [from] to'
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-is an append command.  Called with one file argument,
			
 
				-it appends its standard input to the named file.  With two, the
			
 
				-first is appended to the second.  Any other number
			
 
				-elicits an error message.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-The built-in
			
 
				-<TT>~</TT>
			
 
				-command also matches patterns, and is often more concise than a switch.
			
 
				-Its arguments are a string and a list of patterns.  It sets
			
 
				-<TT></TT>status<TT>
			
 
				-to true if and only if any of the patterns matches the string.
			
 
				-The following example processes option arguments for the
			
 
				-<A href="/magic/man2html/1/man"></TT><I>man</I><TT>(1)
			
 
				-</A>command:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-opt=()
			
 
				-while(~ </TT><I>1 -* [1-9] 10){
			
 
				-    switch(</I><TT>1){
			
 
				-    case [1-9] 10
			
 
				-        sec=</TT><I>1 secn=</I><TT>1
			
 
				-    case -f
			
 
				-        c=f s=f
			
 
				-    case -[qwnt]
			
 
				-        cmd=</TT><I>1
			
 
				-    case -T*
			
 
				-        T=</I><TT>1
			
 
				-    case -*
			
 
				-        opt=(</TT><I>opt </I><TT>1)
			
 
				-    }
			
 
				-    shift
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</TT><H4>17 Functions
			
 
				-</H4>
			
 
				-<P>
			
 
				-Functions may be defined by typing
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fn <I>name</I> { <I>commands</I> }
			
 
				-</PRE></TT></DL>
			
 
				-Subsequently, whenever a command named
			
 
				-<I>name</I>
			
 
				-is encountered, the remainder of the command's
			
 
				-argument list will assigned to
			
 
				-<TT></TT><I>*</I><TT>
			
 
				-and
			
 
				-</TT><I>rc</I><TT>
			
 
				-will execute the
			
 
				-</TT><I>commands</I><TT>.
			
 
				-The value of
			
 
				-</TT><TT></TT><TT>*</TT><TT>
			
 
				-will be restored on completion.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fn g {
			
 
				-    grep </TT><I>1 *.[hcyl]
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-defines
			
 
				-</I><TT>g</TT><I> pattern</I>n(Sf
			
 
				-to look for occurrences of
			
 
				-<I>pattern</I>
			
 
				-in all program source files in the current directory.
			
 
				-</P>
			
 
				-<P>
			
 
				-Function definitions are deleted by writing
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fn <I>name</I>
			
 
				-</PRE></TT></DL>
			
 
				-with no function body.
			
 
				-</P>
			
 
				-<H4>18 Command execution
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-does one of several things to execute a simple command.
			
 
				-If the command name is the name of a function defined using
			
 
				-<TT>fn</TT>,
			
 
				-the function is executed.
			
 
				-Otherwise, if it is the name of a built-in command, the
			
 
				-built-in is executed directly by
			
 
				-<I>rc</I>.
			
 
				-Otherwise, directories mentioned in the variable
			
 
				-<TT></TT>path<TT>
			
 
				-are searched until an executable file is found.
			
 
				-Extensive use of the
			
 
				-</TT><TT></TT><I>path</I><TT>
			
 
				-variable is discouraged in Plan 9.  Instead, use the default
			
 
				-</TT><TT>(.</TT><TT>
			
 
				-</TT><TT>/bin)</TT><TT>
			
 
				-and bind what you need into
			
 
				-</TT><TT>/bin</TT><TT>.
			
 
				-</P>
			
 
				-</TT><H4>19 Built-in commands
			
 
				-</H4>
			
 
				-<P>
			
 
				-Several commands are executed internally by
			
 
				-<I>rc</I>
			
 
				-because they are difficult to implement otherwise.
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>.<DD>
			
 
				- [-i] </TT><I>file ...</I><TT></TT>.if w'<TT>. [-i] </TT><I>file ...</I><TT></TT>'-4n .br
			
 
				-Execute commands from
			
 
				-<I>file</I>.
			
 
				-<TT></TT>*<TT>
			
 
				-is set for the duration to the reminder of the argument list following
			
 
				-</TT><I>file</I><TT>.
			
 
				-</TT><TT></TT><I>path</I><TT>
			
 
				-is used to search for
			
 
				-</TT><I>file</I><TT>.
			
 
				-Option
			
 
				-</TT><TT>-i</TT><TT>
			
 
				-indicates interactive input &#173; a prompt
			
 
				-(found in
			
 
				-</TT><TT></TT><TT>prompt</TT><TT>)
			
 
				-is printed before each command is read.
			
 
				-<DT></TT><TT>b<DD>
			
 
				-uiltin </TT><I>command ...</I><TT></TT>.if w'<TT>builtin </TT><I>command ...</I><TT></TT>'-4n .br
			
 
				-Execute
			
 
				-<I>command</I>
			
 
				-as usual except that any function named
			
 
				-<I>command</I>
			
 
				-is ignored.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fn cd{
			
 
				-    builtin cd <I>* &amp;&amp; pwd
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-defines a replacement for the
			
 
				-</I><TT>cd</TT><I>
			
 
				-built-in (see below) that announces the full name of the new directory.
			
 
				-<DT></I><TT>c<DD>
			
 
				-d [</TT><I>dir</I><TT>]</TT>.if w'<TT>cd [</TT><I>dir</I><TT>]</TT>'-4n .br
			
 
				-Change the current directory to
			
 
				-<I>dir</I>.
			
 
				-The default argument is
			
 
				-<TT></TT>home<TT>.
			
 
				-</TT><TT></TT><I>cdpath</I><TT>
			
 
				-is a list of places in which to search for
			
 
				-</TT><I>dir</I><TT>.
			
 
				-<DT></TT><TT>e<DD>
			
 
				-val [</TT><I>arg ...</I><TT>]</TT>.if w'<TT>eval [</TT><I>arg ...</I><TT>]</TT>'-4n .br
			
 
				-The arguments are concatenated (separated by spaces) into a string, read as input to
			
 
				-<I>rc</I>,
			
 
				-and executed.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x='<TT>y'
			
 
				-y=Doody
			
 
				-eval echo Howdy, </TT><I>x
			
 
				-</PRE></TT></DL>
			
 
				-would echo
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Howdy, Doody
			
 
				-</PRE></TT></DL>
			
 
				-since the arguments of
			
 
				-</I><TT>eval</TT><I>
			
 
				-would be
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo Howdy, </I><TT>y
			
 
				-</PRE></TT></DL>
			
 
				-after substituting for
			
 
				-</TT><TT></TT><I>x</I><TT>.
			
 
				-<DT></TT><TT>e<DD>
			
 
				-xec [</TT><I>command ...</I><TT>]</TT>.if w'<TT>exec [</TT><I>command ...</I><TT>]</TT>'-4n .br
			
 
				-<I>Rc</I>
			
 
				-replaces itself with the given
			
 
				-<I>command</I>.
			
 
				-This is like a
			
 
				-<I>goto</I>
			
 
				-&#173;
			
 
				-<I>rc</I>
			
 
				-does not wait for the command to exit, and does not return to read any more commands.
			
 
				-<DT><TT>e<DD>
			
 
				-xit [</TT><I>status</I><TT>]</TT>.if w'<TT>exit [</TT><I>status</I><TT>]</TT>'-4n .br
			
 
				-<I>Rc</I>
			
 
				-exits immediately with the given status.  If none is given, the current value of
			
 
				-<TT></TT>status<TT>
			
 
				-is used.
			
 
				-<DT></TT><TT>f<DD>
			
 
				-lag </TT><I>f</I><TT> [+-]</TT>.if w'<TT>flag </TT><I>f</I><TT> [+-]</TT>'-4n .br
			
 
				-This command manipulates and tests the command line flags (described below).
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-flag <I>f</I><TT> +
			
 
				-</PRE></TT></DL>
			
 
				-sets flag
			
 
				-</TT><I>f</I><TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-flag </TT><I>f</I><TT> -
			
 
				-</PRE></TT></DL>
			
 
				-clears flag
			
 
				-</TT><I>f</I><TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-flag </TT><I>f</I><TT>
			
 
				-</PRE></TT></DL>
			
 
				-tests flag
			
 
				-</TT><I>f</I><TT>,
			
 
				-setting
			
 
				-</TT><TT></TT><I>status</I><TT>
			
 
				-appropriately.
			
 
				-Thus
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if(flag x) flag v +
			
 
				-</PRE></TT></DL>
			
 
				-sets the
			
 
				-</TT><TT>-v</TT><TT>
			
 
				-flag if the
			
 
				-</TT><TT>-x</TT><TT>
			
 
				-flag is already set.
			
 
				-<DT></TT><TT>r<DD>
			
 
				-fork [nNeEsfF]</TT>.if w'<TT>rfork [nNeEsfF]</TT>'-4n .br
			
 
				-This uses the Plan 9
			
 
				-<I>rfork</I>
			
 
				-system entry to put
			
 
				-<I>rc</I>
			
 
				-into a new process group with the following attributes:
			
 
				-<br><img src="data.19116850.gif"><br>
			
 
				-Section
			
 
				-<A href="/magic/man2html/2/fork"><I>fork</I>(2)
			
 
				-</A>of the Programmer's Manual describes these attributes in more detail.
			
 
				-<DT><TT>s<DD>
			
 
				-hift [</TT><I>n</I><TT>]</TT>.if w'<TT>shift [</TT><I>n</I><TT>]</TT>'-4n .br
			
 
				-Delete the first
			
 
				-<I>n</I>
			
 
				-(default 1) elements of
			
 
				-<TT></TT>*<TT>.
			
 
				-<DT></TT><TT>w<DD>
			
 
				-ait [</TT><I>pid</I><TT>]</TT>.if w'<TT>wait [</TT><I>pid</I><TT>]</TT>'-4n .br
			
 
				-Wait for the process with the given
			
 
				-<I>pid</I>
			
 
				-to exit.  If no
			
 
				-<I>pid</I>
			
 
				-is given, all outstanding processes are waited for.
			
 
				-<DT><TT>w<DD>
			
 
				-hatis </TT><I>name ...</I><TT></TT>.if w'<TT>whatis </TT><I>name ...</I><TT></TT>'-4n .br
			
 
				-Print the value of each
			
 
				-<I>name</I>
			
 
				-in a form suitable for input to
			
 
				-<I>rc</I>.
			
 
				-The output is an assignment to a variable, the definition of a function,
			
 
				-a call to
			
 
				-<TT>builtin</TT>
			
 
				-for a built-in command, or the path name of a binary program.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-whatis path g cd who
			
 
				-</PRE></TT></DL>
			
 
				-might print
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-path=(. /bin)
			
 
				-fn g {gre -e <I>1 *.[hycl]}
			
 
				-builtin cd
			
 
				-/bin/who
			
 
				-</PRE></TT></DL>
			
 
				-<DT></I><TT>~<DD>
			
 
				- </TT><I>subject pattern ...</I><TT></TT>.if w'<TT>~ </TT><I>subject pattern ...</I><TT></TT>'-4n .br
			
 
				-The
			
 
				-<I>subject</I>
			
 
				-is matched against each
			
 
				-<I>pattern</I>
			
 
				-in turn.  On a match,
			
 
				-<TT></TT>status<TT>
			
 
				-is set to true.
			
 
				-Otherwise, it is set to 
			
 
				-</TT><TT>'no match'</TT><TT>.
			
 
				-Patterns are the same as for filename matching.
			
 
				-The
			
 
				-</TT><I>patterns</I><TT>
			
 
				-are not subjected to filename replacement before the
			
 
				-</TT><TT>~</TT><TT>
			
 
				-command is executed, so they need not be enclosed in
			
 
				-quotation marks, unless of course, a literal match for
			
 
				-</TT><TT>*</TT><TT>
			
 
				-</TT><TT>[</TT><TT>
			
 
				-or
			
 
				-</TT><TT>?</TT><TT>
			
 
				-is required.
			
 
				-For example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-~ </TT><I>1 ?
			
 
				-</PRE></TT></DL>
			
 
				-matches any single character, whereas
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-~ </I><TT>1 '?'
			
 
				-</PRE></TT></DL>
			
 
				-only matches a literal question mark.
			
 
				-</dl>
			
 
				-</TT><H4>20 Advanced I/O Redirection
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-allows redirection of file descriptors other than 0 and 1
			
 
				-(standard input and output) by specifying the file descriptor
			
 
				-in square brackets
			
 
				-<TT>[ ]</TT>
			
 
				-after the
			
 
				-<TT>&lt;</TT>
			
 
				-or
			
 
				-<TT>&gt;</TT>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c &gt;[2]junk.diag
			
 
				-</PRE></TT></DL>
			
 
				-saves the compiler's diagnostics from standard error in
			
 
				-<TT>junk.diag</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-File descriptors may be replaced by a copy, in the sense of
			
 
				-<A href="/magic/man2html/2/dup"><I>dup</I>(2),
			
 
				-</A>of an already-open file by typing, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c &gt;[2=1]
			
 
				-</PRE></TT></DL>
			
 
				-This replaces file descriptor 2 with a copy of file descriptor 1.
			
 
				-It is more useful in conjunction with other redirections, like this
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c &gt;junk.out &gt;[2=1]
			
 
				-</PRE></TT></DL>
			
 
				-Redirections are evaluated from left to right, so this redirects
			
 
				-file descriptor 1 to
			
 
				-<TT>junk.out</TT>,
			
 
				-then points file descriptor 2 at the same file.
			
 
				-By contrast,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c &gt;[2=1] &gt;junk.out
			
 
				-</PRE></TT></DL>
			
 
				-redirects file descriptor 2 to a copy of file descriptor 1
			
 
				-(presumably the terminal), and then directs file descriptor 1
			
 
				-to a file.  In the first case, standard and diagnostic output
			
 
				-will be intermixed in
			
 
				-<TT>junk.out</TT>.
			
 
				-In the second, diagnostic output will appear on the terminal,
			
 
				-and standard output will be sent to the file.
			
 
				-</P>
			
 
				-<P>
			
 
				-File descriptors may be closed by using the duplication notation
			
 
				-with an empty right-hand side.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c &gt;[2=]
			
 
				-</PRE></TT></DL>
			
 
				-will discard diagnostics from the compilation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Arbitrary file descriptors may be sent through
			
 
				-a pipe by typing, for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-vc junk.c |[2] grep -v '^<I>'
			
 
				-</PRE></TT></DL>
			
 
				-This deletes blank lines
			
 
				-from the C compiler's error output.  Note that the output
			
 
				-of
			
 
				-</I><TT>grep</TT><I>
			
 
				-still appears on file descriptor 1.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-Occasionally you may wish to connect the input side of
			
 
				-a pipe to some file descriptor other than zero.
			
 
				-The notation
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cmd1 |[5=19] cmd2
			
 
				-</PRE></TT></DL>
			
 
				-creates a pipeline with
			
 
				-<TT>cmd1</TT>'s
			
 
				-file descriptor 5 connected through a pipe to
			
 
				-<TT>cmd2</TT>'s
			
 
				-file descriptor 19.
			
 
				-</P>
			
 
				-<H4>21 Here documents
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-procedures may include data, called ``here documents'',
			
 
				-to be provided as input to commands, as in this version of the
			
 
				-<I>tel</I>
			
 
				-command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i) grep i &lt;&lt;!
			
 
				-...
			
 
				-tor 2T-402 2912
			
 
				-kevin 2C-514 2842
			
 
				-bill 2C-562 7214
			
 
				-...
			
 
				-!
			
 
				-</PRE></TT></DL>
			
 
				-A here document is introduced by the redirection symbol
			
 
				-<TT>&lt;&lt;</TT>,
			
 
				-followed by an arbitrary EOF marker
			
 
				-(<TT>!</TT>
			
 
				-in the example).  Lines following the command,
			
 
				-up to a line containing only the EOF marker are saved
			
 
				-in a temporary file that is connected to the command's
			
 
				-standard input when it is run.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-does variable substitution in here documents.  The following command:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-ed <I>3 &lt;&lt;EOF
			
 
				-g/</I>1/s//<I>2/g
			
 
				-w
			
 
				-EOF
			
 
				-</PRE></TT></DL>
			
 
				-changes all occurrences of
			
 
				-</I><TT></TT><I>1</I><TT>
			
 
				-to
			
 
				-</TT><TT></TT><I>2</I><TT>
			
 
				-in file
			
 
				-</TT><TT></TT><TT>3</TT><TT>.
			
 
				-To include a literal
			
 
				-</TT><TT></TT><I></I><TT>
			
 
				-in a here document, type
			
 
				-</TT><TT></TT><TT></TT><I></I><TT>.
			
 
				-If the name of a variable is followed immediately by
			
 
				-</TT><TT>^</TT><TT>,
			
 
				-the caret is deleted.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Variable substitution can be entirely suppressed by enclosing
			
 
				-the EOF marker following
			
 
				-<TT>&lt;&lt;</TT>
			
 
				-in quotation marks, as in
			
 
				-<TT>&lt;&lt;'EOF'</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here documents may be provided on file descriptors other than 0 by typing, for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cmd &lt;&lt;[4]End
			
 
				-...
			
 
				-End
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-If a here document appears within a compound block, the contents of the document
			
 
				-must be after the whole block:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i in *){
			
 
				-	mail <I>i &lt;&lt;EOF
			
 
				-}
			
 
				-words to live by
			
 
				-EOF
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</I><H4>22 Catching Notes
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-scripts normally terminate when an interrupt is received from the terminal.
			
 
				-A function with the name of a UNIX signal, in lower case, is defined in the usual way,
			
 
				-but called when
			
 
				-<I>rc</I>
			
 
				-receives the corresponding note.
			
 
				-The
			
 
				-<A href="/magic/man2html/2/notify"><I>notify</I>(2)
			
 
				-</A>section of the Programmer's Manual discusses notes in some detail.
			
 
				-Notes of interest are:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT><TT>s<DD>
			
 
				-ighup</TT>.if w'<TT>sighup</TT>'-4n .br
			
 
				-The note was `hangup'.
			
 
				-Plan 9 sends this when the terminal has disconnected from
			
 
				-<I>rc</I>.
			
 
				-<DT><TT>s<DD>
			
 
				-igint</TT>.if w'<TT>sigint</TT>'-4n .br
			
 
				-The note was `interrupt', usually sent when
			
 
				-the interrupt character (ASCII DEL) is typed on the terminal.
			
 
				-<DT><TT>s<DD>
			
 
				-igterm</TT>.if w'<TT>sigterm</TT>'-4n .br
			
 
				-The note was `kill', normally sent by
			
 
				-<A href="/magic/man2html/1/kill"><I>kill</I>(1).
			
 
				-</A><DT><TT>s<DD>
			
 
				-igexit</TT>.if w'<TT>sigexit</TT>'-4n .br
			
 
				-An artificial note sent when
			
 
				-<I>rc</I>
			
 
				-is about to exit.
			
 
				-</dl>
			
 
				-<P>
			
 
				-As an example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-fn sigint{
			
 
				-    rm /tmp/junk
			
 
				-    exit
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-sets a trap for the keyboard interrupt that
			
 
				-removes a temporary file before exiting.
			
 
				-</P>
			
 
				-<P>
			
 
				-Notes will be ignored if the note routine is set to
			
 
				-<TT>{}</TT>.
			
 
				-Signals revert to their default behavior when their handlers'
			
 
				-definitions are deleted.
			
 
				-</P>
			
 
				-<H4>23 Environment
			
 
				-</H4>
			
 
				-<P>
			
 
				-The environment is a list of name-value pairs made available to
			
 
				-executing binaries.
			
 
				-On Plan 9, the environment is stored in a file system named
			
 
				-<TT>#e</TT>,
			
 
				-normally mounted on
			
 
				-<TT>/env</TT>.
			
 
				-The value of each variable is stored in a separate file, with components
			
 
				-terminated by zero bytes.
			
 
				-(The file system is
			
 
				-maintained entirely in core, so no disk or network access is involved.)
			
 
				-The contents of
			
 
				-<TT>/env</TT>
			
 
				-are shared on a per-process group basis - when a new process group is
			
 
				-created it effectively attaches
			
 
				-<TT>/env</TT>
			
 
				-to a new file system initialized with a copy of the old one.
			
 
				-A consequence of this organization is that commands can change environment
			
 
				-entries and see the changes reflected in
			
 
				-<I>rc</I>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Functions also appear in the environment, named by prefixing
			
 
				-<TT>fn#</TT>
			
 
				-to their names, like
			
 
				-<TT>/env/fn#roff</TT>.
			
 
				-</P>
			
 
				-<H4>24 Local Variables
			
 
				-</H4>
			
 
				-<P>
			
 
				-It is often useful to set a variable for the duration
			
 
				-of a single command.  An assignment followed by a command
			
 
				-has this effect.  For example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-a=global
			
 
				-a=local echo a
			
 
				-echo <I>a
			
 
				-</PRE></TT></DL>
			
 
				-will print
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-local
			
 
				-global
			
 
				-</PRE></TT></DL>
			
 
				-This works even for compound commands, like
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-f=/fairly/long/file/name {
			
 
				-    { wc </I>f; spell <I>f; diff </I>f.old <I>f } |
			
 
				-      pr -h 'Facts about '</I>f | lp -dfn
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>25 Examples &#173; <I>cd, pwd</I>
			
 
				-</H4>
			
 
				-<P>
			
 
				-Here is a pair of functions that provide
			
 
				-enhanced versions of the standard
			
 
				-<TT>cd</TT>
			
 
				-and
			
 
				-<TT>pwd</TT>
			
 
				-commands.  (Thanks to Rob Pike for these.)
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-ps1='% '	# default prompt
			
 
				-tab='	'	# a tab character
			
 
				-fn cd{
			
 
				-  builtin cd <I>1 &amp;&amp;
			
 
				-  switch(</I>#*){
			
 
				-  case 0
			
 
				-    dir=<I>home
			
 
				-    prompt=(</I>ps1 <I>tab)
			
 
				-  case *
			
 
				-    switch(</I>1)
			
 
				-    case /*
			
 
				-      dir=<I>1
			
 
				-      prompt=(`{basename `{pwd}}^</I>ps1 <I>tab)
			
 
				-    case */* ..*
			
 
				-      dir=()
			
 
				-      prompt=(`{basename `{pwd}}^</I>ps1 <I>tab)
			
 
				-    case *
			
 
				-      dir=()
			
 
				-      prompt=(</I>1^<I>ps1 </I>tab)
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-fn pwd{
			
 
				-  if(~ <I>#dir 0)
			
 
				-    dir=`{/bin/pwd}
			
 
				-  echo </I>dir
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Function
			
 
				-<TT>pwd</TT>
			
 
				-is a version of the standard
			
 
				-<TT>pwd</TT>
			
 
				-that caches its value in variable
			
 
				-<TT></TT><I>dir</I><TT>,
			
 
				-because the genuine
			
 
				-</TT><TT>pwd</TT><TT>
			
 
				-can be quite slow to execute.
			
 
				-(Recent versions of Plan 9 have very fast implementations of
			
 
				-</TT><TT>pwd</TT><TT>,
			
 
				-reducing the advantage of the
			
 
				-</TT><TT>pwd</TT><TT>
			
 
				-function.)
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Function
			
 
				-<TT>cd</TT>
			
 
				-calls the
			
 
				-<TT>cd</TT>
			
 
				-built-in, and checks that it was successful.
			
 
				-If so, it sets
			
 
				-<TT></TT>dir<TT>
			
 
				-and
			
 
				-</TT><TT></TT><I>prompt</I><TT>.
			
 
				-The prompt will include the last component of the
			
 
				-current directory (except in the home directory,
			
 
				-where it will be null), and
			
 
				-</TT><TT></TT><TT>dir</TT><TT>
			
 
				-will be reset either to the correct value or to
			
 
				-</TT><TT>()</TT><TT>,
			
 
				-so that the
			
 
				-</TT><TT>pwd</TT><TT>
			
 
				-function will work correctly.
			
 
				-</P>
			
 
				-</TT><H4>26 Examples &#173; <I>man</I>
			
 
				-</H4>
			
 
				-<P>
			
 
				-The
			
 
				-<I>man</I>
			
 
				-command prints pages of the Programmer's Manual.
			
 
				-It is called, for example, as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-man 2 sinh
			
 
				-man rc
			
 
				-man -t cat
			
 
				-</PRE></TT></DL>
			
 
				-In the first case, the page for
			
 
				-<I>sinh</I>
			
 
				-in section 2 is printed.
			
 
				-In the second case, the manual page for
			
 
				-<I>rc</I>
			
 
				-is printed.  Since no manual section is specified,
			
 
				-all sections are searched for the page, and it is found
			
 
				-in section 1.
			
 
				-In the third case, the page for
			
 
				-<I>cat</I>
			
 
				-is typeset (the
			
 
				-<TT>-t</TT>
			
 
				-option).
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-cd /sys/man || {
			
 
				-  echo <I>0: No manual! &gt;[1=2]
			
 
				-  exit 1
			
 
				-}
			
 
				-NT=n  # default nroff
			
 
				-s='*' # section, default try all
			
 
				-for(i) switch(</I>i){
			
 
				-case -t
			
 
				-  NT=t
			
 
				-case -n
			
 
				-  NT=n
			
 
				-case -*
			
 
				-  echo Usage: <I>0 '[-nt] [section] page ...' &gt;[1=2]
			
 
				-  exit 1
			
 
				-case [1-9] 10
			
 
				-  s=</I>i
			
 
				-case *
			
 
				-  eval 'pages='<I>s/</I>i
			
 
				-  for(page in <I>pages){
			
 
				-    if(test -f </I>page)
			
 
				-      <I>NT^roff -man </I>page
			
 
				-    if not
			
 
				-      echo <I>0: </I>i not found &gt;[1=2]
			
 
				-  }
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note the use of
			
 
				-<TT>eval</TT>
			
 
				-to make a list of candidate manual pages.
			
 
				-Without
			
 
				-<TT>eval</TT>,
			
 
				-the
			
 
				-<TT>*</TT>
			
 
				-stored in
			
 
				-<TT></TT><I>s</I><TT>
			
 
				-would not trigger filename matching
			
 
				-&#173; it's enclosed in quotation marks,
			
 
				-and even if it weren't, it would be expanded
			
 
				-when assigned to
			
 
				-</TT><TT></TT><TT>s</TT><TT>.
			
 
				-Eval causes its arguments
			
 
				-to be re-processed by
			
 
				-</TT><I>rc</I><TT>'s
			
 
				-parser and interpreter, effectively delaying
			
 
				-evaluation of the
			
 
				-</TT><TT>*</TT><TT>
			
 
				-until the assignment to
			
 
				-</TT><TT></TT><I>pages</I><TT>.
			
 
				-</P>
			
 
				-</TT><H4>27 Examples &#173; <I>holmdel</I>
			
 
				-</H4>
			
 
				-<P>
			
 
				-The following
			
 
				-<I>rc</I>
			
 
				-script plays the deceptively simple game
			
 
				-<I>holmdel</I>,
			
 
				-in which the players alternately name Bell Labs locations,
			
 
				-the winner being the first to mention Holmdel.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-t=/tmp/holmdelpid
			
 
				-fn read{
			
 
				-	<I>1=`{awk '{print;exit}'}
			
 
				-}
			
 
				-ifs='
			
 
				-'	# just a newline
			
 
				-fn sigexit sigint sigquit sighup{
			
 
				-	rm -f </I>t
			
 
				-	exit
			
 
				-}
			
 
				-cat &lt;&lt;'!' &gt;<I>t
			
 
				-Allentown 
			
 
				-Atlanta
			
 
				-Cedar Crest
			
 
				-Chester
			
 
				-Columbus
			
 
				-Elmhurst
			
 
				-Fullerton
			
 
				-Holmdel
			
 
				-Indian Hill
			
 
				-Merrimack Valley
			
 
				-Morristown
			
 
				-Neptune
			
 
				-Piscataway
			
 
				-Reading
			
 
				-Short Hills
			
 
				-South Plainfield
			
 
				-Summit
			
 
				-Whippany
			
 
				-West Long Branch
			
 
				-!
			
 
				-while(){
			
 
				-   lab=`{fortune </I>t}
			
 
				-   echo <I>lab
			
 
				-   if(~ </I>lab Holmdel){
			
 
				-      echo You lose.
			
 
				-      exit
			
 
				-   }
			
 
				-   while(read lab; ! grep -i -s <I>lab </I>t) echo No such location.
			
 
				-   if(~ <I>lab [hH]olmdel){
			
 
				-      echo You win.
			
 
				-      exit
			
 
				-   }
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</I><P>
			
 
				-This script is worth describing in detail
			
 
				-(rather, it would be if it weren't so silly.)
			
 
				-</P>
			
 
				-<P>
			
 
				-Variable
			
 
				-<TT></TT>t<TT>
			
 
				-is an abbreviation for the name of a temporary file.
			
 
				-Including
			
 
				-</TT><TT></TT><I>pid</I><TT>,
			
 
				-initialized by
			
 
				-</TT><I>rc</I><TT>
			
 
				-to its process-id,
			
 
				-in the names of temporary files insures that their
			
 
				-names won't collide, in case more than one instance
			
 
				-of the script is running at a time.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Function
			
 
				-<TT>read</TT>'s
			
 
				-argument is the name of a variable into which a
			
 
				-line gathered from standard input is read.
			
 
				-<TT></TT>ifs<TT>
			
 
				-is set to just a newline.  Thus
			
 
				-</TT><TT>read</TT><TT>'s
			
 
				-input is not split apart at spaces, but the terminating
			
 
				-newline is deleted.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-A handler is set to catch
			
 
				-<TT>sigint</TT>,
			
 
				-<TT>sigquit</TT>,
			
 
				-and
			
 
				-<TT>sighup,</TT>
			
 
				-and the artificial
			
 
				-<TT>sigexit</TT>
			
 
				-signal.  It just removes the temporary file and exits.
			
 
				-</P>
			
 
				-<P>
			
 
				-The temporary file is initialized from a here
			
 
				-document containing a list of Bell Labs locations, and
			
 
				-the main loop starts.
			
 
				-</P>
			
 
				-<P>
			
 
				-First, the program guesses a location (in
			
 
				-<TT></TT><I>lab</I><TT>)
			
 
				-using the
			
 
				-</TT><TT>fortune</TT><TT>
			
 
				-program to pick a random line from the location list.
			
 
				-It prints the location, and if it guessed Holmdel, prints
			
 
				-a message and exits.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-Then it uses the
			
 
				-<TT>read</TT>
			
 
				-function to get lines from standard input and validity-check
			
 
				-them until it gets a legal name.
			
 
				-Note that the condition part of a
			
 
				-<TT>while</TT>
			
 
				-can be a compound command.  Only the exit status of the
			
 
				-last command in the sequence is checked.
			
 
				-</P>
			
 
				-<P>
			
 
				-Again, if the result
			
 
				-is Holmdel, it prints a message and exits.
			
 
				-Otherwise it goes back to the top of the loop.
			
 
				-</P>
			
 
				-<H4>28 Design Principles
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Rc</I>
			
 
				-draws heavily from Steve Bourne's
			
 
				-<TT>/bin/sh</TT>.
			
 
				-Any successor of the Bourne shell is bound to
			
 
				-suffer in comparison.  I have tried to fix its
			
 
				-best-acknowledged shortcomings and to simplify things
			
 
				-wherever possible, usually by omitting inessential features.
			
 
				-Only when irresistibly tempted have I introduced novel ideas.
			
 
				-Obviously I have tinkered extensively with Bourne's syntax.
			
 
				-</P>
			
 
				-<P>
			
 
				-The most important principle in
			
 
				-<I>rc</I>'s
			
 
				-design is that it's not a macro processor.  Input is never
			
 
				-scanned more than once by the lexical and syntactic analysis
			
 
				-code (except, of course, by the
			
 
				-<TT>eval</TT>
			
 
				-command, whose
			
 
				-<I>raison d'&ecirc;tre</I>
			
 
				-is to break the rule).
			
 
				-</P>
			
 
				-<P>
			
 
				-Bourne shell scripts can often be made
			
 
				-to run wild by passing them arguments containing spaces.
			
 
				-These will be split into multiple arguments using
			
 
				-<TT>IFS</TT>,
			
 
				-often at inopportune times.
			
 
				-In
			
 
				-<I>rc</I>,
			
 
				-values of variables, including command line arguments, are not re-read
			
 
				-when substituted into a command.
			
 
				-Arguments have presumably been scanned in the parent process, and ought
			
 
				-not to be re-read.
			
 
				-</P>
			
 
				-<P>
			
 
				-Why does Bourne re-scan commands after variable substitution?
			
 
				-He needs to be able to store lists of arguments in variables whose values are
			
 
				-character strings.
			
 
				-If we eliminate re-scanning, we must change the type of variables, so that
			
 
				-they can explicitly carry lists of strings.
			
 
				-</P>
			
 
				-<P>
			
 
				-This introduces some
			
 
				-conceptual complications.  We need a notation for lists of words.
			
 
				-There are two different kinds of concatenation, for strings &#173;
			
 
				-<TT></TT>a^<I>b</I>,
			
 
				-and lists &#173;
			
 
				-<TT>(</TT>a <I>b)</I>.
			
 
				-The difference between
			
 
				-<TT>()</TT>
			
 
				-and
			
 
				-<TT>''</TT>
			
 
				-is confusing to novices,
			
 
				-although the distinction is arguably sensible &#173;
			
 
				-a null argument is not the same as no argument.
			
 
				-</P>
			
 
				-<P>
			
 
				-Bourne also rescans input when doing command substitution.
			
 
				-This is because the text enclosed in back-quotes is not
			
 
				-a string, but a command.  Properly, it ought to
			
 
				-be parsed when the enclosing command is, but this makes
			
 
				-it difficult to
			
 
				-handle nested command substitutions, like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-size=`wc -l \`ls -t|sed 1q\``
			
 
				-</PRE></TT></DL>
			
 
				-The inner back-quotes must be escaped
			
 
				-to avoid terminating the outer command.
			
 
				-This can get much worse than the above example;
			
 
				-the number of
			
 
				-<TT>\</TT>'s
			
 
				-required is exponential in the nesting depth.
			
 
				-<I>Rc</I>
			
 
				-fixes this by making the backquote a unary operator
			
 
				-whose argument is a command, like this:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-size=`{wc -l `{ls -t|sed 1q}}
			
 
				-</PRE></TT></DL>
			
 
				-No escapes are ever required, and the whole thing
			
 
				-is parsed in one pass.
			
 
				-</P>
			
 
				-<P>
			
 
				-For similar reasons
			
 
				-<I>rc</I>
			
 
				-defines signal handlers as though they were functions,
			
 
				-instead of associating a string with each signal, as Bourne does,
			
 
				-with the attendant possibility of getting a syntax error message
			
 
				-in response to typing the interrupt character.  Since
			
 
				-<I>rc</I>
			
 
				-parses input when typed, it reports errors when you make them.
			
 
				-</P>
			
 
				-<P>
			
 
				-For all this trouble, we gain substantial semantic simplifications.
			
 
				-There is no need for the distinction between
			
 
				-<TT></TT>*<TT>
			
 
				-and
			
 
				-</TT><TT></TT><I>@</I><TT>.
			
 
				-There is no need for four types of quotation, nor the
			
 
				-extremely complicated rules that govern them.  In
			
 
				-</TT><I>rc</I><TT>
			
 
				-you use quotation marks when you want a syntax character
			
 
				-to appear in an argument, or an argument that is the empty string,
			
 
				-and at no other time.
			
 
				-</TT><TT>IFS</TT><TT>
			
 
				-is no longer used, except in the one case where it was indispensable:
			
 
				-converting command output into argument lists during command substitution.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-This also avoids an important UNIX security hole.
			
 
				-In UNIX, the
			
 
				-<I>system</I>
			
 
				-and
			
 
				-<I>popen</I>
			
 
				-functions call
			
 
				-<TT>/bin/sh</TT>
			
 
				-to execute a command.  It is impossible to use either
			
 
				-of these routines with any assurance that the specified command will
			
 
				-be executed, even if the caller of
			
 
				-<I>system</I>
			
 
				-or
			
 
				-<I>popen</I>
			
 
				-specifies a full path name for the command.  This can be devastating
			
 
				-if it occurs in a set-userid program.
			
 
				-The problem is that
			
 
				-<TT>IFS</TT>
			
 
				-is used to split the command into words, so an attacker can just
			
 
				-set
			
 
				-<TT>IFS=/</TT>
			
 
				-in his environment and leave a Trojan horse
			
 
				-named
			
 
				-<TT>usr</TT>
			
 
				-or
			
 
				-<TT>bin</TT>
			
 
				-in the current working directory before running the privileged program.
			
 
				-<I>Rc</I>
			
 
				-fixes this by never rescanning input for any reason.
			
 
				-</P>
			
 
				-<P>
			
 
				-Most of the other differences between
			
 
				-<I>rc</I>
			
 
				-and the Bourne shell are not so serious.  I eliminated Bourne's
			
 
				-peculiar forms of variable substitution, like
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-echo {a=b} <I>{c-d} </I>{e?error}
			
 
				-</PRE></TT></DL>
			
 
				-because they are little used, redundant and easily
			
 
				-expressed in less abstruse terms.
			
 
				-I deleted the builtins
			
 
				-<TT>export</TT>,
			
 
				-<TT>readonly</TT>,
			
 
				-<TT>break</TT>,
			
 
				-<TT>continue</TT>,
			
 
				-<TT>read</TT>,
			
 
				-<TT>return</TT>,
			
 
				-<TT>set</TT>,
			
 
				-<TT>times</TT>
			
 
				-and
			
 
				-<TT>unset</TT>
			
 
				-because they seem redundant or
			
 
				-only marginally useful.
			
 
				-</P>
			
 
				-<P>
			
 
				-Where Bourne's syntax draws from Algol 68,
			
 
				-<I>rc</I>'s
			
 
				-is based on C or Awk.  This is harder to defend.
			
 
				-I believe that, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if(test -f junk) rm junk
			
 
				-</PRE></TT></DL>
			
 
				-is better syntax than
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if test -f junk; then rm junk; fi
			
 
				-</PRE></TT></DL>
			
 
				-because it is less cluttered with keywords,
			
 
				-it avoids the semicolons that Bourne requires
			
 
				-in odd places,
			
 
				-and the syntax characters better set off the
			
 
				-active parts of the command.
			
 
				-</P>
			
 
				-<P>
			
 
				-The one bit of large-scale syntax that Bourne
			
 
				-unquestionably does better than
			
 
				-<I>rc</I>
			
 
				-is the
			
 
				-<TT>if</TT>
			
 
				-statement with
			
 
				-<TT>else</TT>
			
 
				-clause.
			
 
				-<I>Rc</I>'s
			
 
				-<TT>if</TT>
			
 
				-has no terminating
			
 
				-<TT>fi</TT>-like
			
 
				-bracket.  As a result, the parser cannot
			
 
				-tell whether or not to expect an
			
 
				-<TT>else</TT>
			
 
				-clause without looking ahead in its input.
			
 
				-The problem is that after reading, for example
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if(test -f junk) echo junk found
			
 
				-</PRE></TT></DL>
			
 
				-in interactive mode,
			
 
				-<I>rc</I>
			
 
				-cannot decide whether to execute it immediately and print
			
 
				-<TT></TT><I>prompt(1)</I><TT>,
			
 
				-or to print
			
 
				-</TT><TT></TT><TT>prompt(2)</TT><TT>
			
 
				-and wait for the
			
 
				-</TT><TT>else</TT><TT>
			
 
				-to be typed.
			
 
				-In the Bourne shell, this is not a problem, because the
			
 
				-</TT><TT>if</TT><TT>
			
 
				-command must end with
			
 
				-</TT><TT>fi</TT><TT>,
			
 
				-regardless of whether it contains an
			
 
				-</TT><TT>else</TT><TT>
			
 
				-or not.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-<I>Rc</I>'s
			
 
				-admittedly feeble solution is to declare that the
			
 
				-<TT>else</TT>
			
 
				-clause is a separate statement, with the semantic
			
 
				-proviso that it must immediately follow an
			
 
				-<TT>if</TT>,
			
 
				-and to call it
			
 
				-<TT>if not</TT>
			
 
				-rather than
			
 
				-<TT>else</TT>,
			
 
				-as a reminder that something odd is going on.
			
 
				-The only noticeable consequence of this is that
			
 
				-the braces are required in the construction
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-for(i){
			
 
				-    if(test -f <I>i) echo </I>i found
			
 
				-    if not echo <I>i not found
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-and that
			
 
				-</I><I>rc</I><I>
			
 
				-resolves the ``dangling else'' ambiguity in opposition
			
 
				-to most people's expectations.
			
 
				-</P>
			
 
				-</I><P>
			
 
				-It is remarkable that in the four most recent editions of the UNIX system
			
 
				-programmer's manual the Bourne shell grammar described in the manual page
			
 
				-does not admit the command
			
 
				-<TT>who|wc</TT>.
			
 
				-This is surely an oversight, but it suggests something darker:
			
 
				-nobody really knows what the Bourne shell's grammar is.  Even examination
			
 
				-of the source code is little help.  The parser is implemented by recursive
			
 
				-descent, but the routines corresponding to the syntactic categories all
			
 
				-have a flag argument that subtly changes their operation depending on the
			
 
				-context.
			
 
				-<I>Rc</I>'s
			
 
				-parser is implemented using
			
 
				-<I>yacc</I>,
			
 
				-so I can say precisely what the grammar is.
			
 
				-</P>
			
 
				-<H4>29 Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Rob Pike, Howard Trickey and other Plan 9 users have been insistent, incessant
			
 
				-sources of good ideas and criticism.  Some examples in this document are plagiarized
			
 
				-from [Bourne],
			
 
				-as are most of
			
 
				-<I>rc</I>'s
			
 
				-good features.
			
 
				-</P>
			
 
				-<H4>30 Reference
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-S. R. Bourne,
			
 
				-UNIX Time-Sharing System: The UNIX Shell,
			
 
				-Bell System Technical Journal, Volume 57 number 6, July-August 1978
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/release3.html
+++ b/sys/doc/release3.html
@@ -1,214 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Plan 9 From Bell Labs
			
 
				-<br>
			
 
				-Third Release Notes
			
 
				-<br>
			
 
				-June 7, 2000
			
 
				-</H1>
			
 
				-<br>&#32;<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Copyright &#169; 2000 Lucent Technologies Inc.
			
 
				-All Rights Reserved
			
 
				-<br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-The third release of the Plan 9 operating system from Bell Labs
			
 
				-is something of a snapshot of the current system.
			
 
				-This differs from the previous, 1995 release,
			
 
				-which was a more coordinated, well-defined release of an already-out-of-date
			
 
				-system.
			
 
				-Also, the previous releases were distributed on fixed media, while this release
			
 
				-is being done over the web.
			
 
				-The other major difference is that the third release is licensed under
			
 
				-an open source agreement, which we hope will encourage people
			
 
				-to experiment with it.
			
 
				-<br>&#32;<br>
			
 
				-Beyond that, there are innumerable little changes throughout the code.
			
 
				-Although superficially it is the same environment, there is hardly an aspect
			
 
				-of the system that has not been redesigned, rewritten, or replaced.
			
 
				-The following is an incomplete list of changes.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The list of architectures has changed; more compilers are included
			
 
				-and the list of kernels has changed.
			
 
				-There is solid support for Intel x86 multiprocessors.
			
 
				-Also, although the sources are available for
			
 
				-other architectures, the binaries and libraries are built only for the
			
 
				-Intel x86 architectures.
			
 
				-Kernel source is available for x86, Mips, DEC Alpha, and Power PC architectures.
			
 
				-Compilers also exist for AMD 29000, Motorola MC68000 and MC68020,
			
 
				-Intel i960, and SPARC.
			
 
				-(Unlike the the last release, no SPARC kernel exists for the current system.)
			
 
				-The compilers and related tools
			
 
				-have been made easier to port to Unix and Windows.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The kernel now has a file cache to improve I/O performance.
			
 
				-Other kernel changes include the replacement of the streams interface
			
 
				-with a simpler, faster, but less flexible I/O queue structure.
			
 
				-The x86 kernels support PCI and PCMCIA devices.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Network management has been simplified and generalized.
			
 
				-DNS supports a resolver mode and the DNS server is now solid.
			
 
				-DHCP is supported both at the client and server ends.
			
 
				-The system can handle multiple IP stacks, which are also
			
 
				-no longer Ethernet-specific.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The organization of disks in the kernel has been unified, providing
			
 
				-a consistent interface to all disks and controllers: SCSI or ATAPI,
			
 
				-magnetic or CD-ROM.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-File offsets, such as in the
			
 
				-<TT>seek</TT>
			
 
				-system call, are now 64-bit values.
			
 
				-The 1995 release defined the type
			
 
				-<TT>Length</TT>
			
 
				-for the x86 as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef union
			
 
				-{
			
 
				-	char	clength[8];
			
 
				-	vlong	vlength;
			
 
				-	struct
			
 
				-	{
			
 
				-		long	hlength;
			
 
				-		long	length;
			
 
				-	};
			
 
				-} Length;
			
 
				-</PRE></TT></DL>
			
 
				-which is the wrong byte order.
			
 
				-Now, for all architectures,
			
 
				-<TT>Length</TT>
			
 
				-is well handled by a
			
 
				-<TT>vlong</TT>
			
 
				-(<TT>long</TT>
			
 
				-<TT>long</TT>)
			
 
				-type, although for compatibility it's still held in a union:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef union
			
 
				-{
			
 
				-	vlong	length;
			
 
				-} Length;
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The kernel now maintains a file name associated with each open file or
			
 
				-directory, which can be cheaply recovered by the
			
 
				-<TT>fd2path</TT>
			
 
				-system call.
			
 
				-Plan 9 now does a much better job with
			
 
				-<TT>..</TT>
			
 
				-(dot-dot).
			
 
				-On a related note, a description of a process's name space may be
			
 
				-read with the
			
 
				-<TT>ns</TT>
			
 
				-file in
			
 
				-<TT>/proc</TT>,
			
 
				-or by the
			
 
				-<TT>ns</TT>
			
 
				-command.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The security model is the same, although
			
 
				-the key format has changed.
			
 
				-If you have an old key file, use
			
 
				-<TT>auth/convkeys2</TT>
			
 
				-(see
			
 
				-<A href="/magic/man2html/8/auth"><I>auth</I>(8))
			
 
				-</A>to update it.
			
 
				-There are new libraries for mulitprecision arithmetic and security.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The graphics model is very different.
			
 
				-It is based on the Porter-Duff compositing algebra rather than
			
 
				-<TT>bitblt</TT>,
			
 
				-and the system supports everything from bitmaps to true-color displays.
			
 
				-Some of the graphics drivers exploit hardware acceleration.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Coupled to the graphics changes, the image and font file formats have
			
 
				-changed.
			
 
				-They can represent a wider range of pixel formats and compress the data.
			
 
				-Also the white/black sense of value is reversed (zero is now black; pixels
			
 
				-represent light, not ink).
			
 
				-Most of the tools can handle the old format, but they all write the new format only.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The user interface now incorporates plumbing, a language-driven
			
 
				-way for applications to communicate.  See
			
 
				-<A href="/magic/man2html/6/plumb"><I>plumb</I>(6)
			
 
				-</A>for information.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Building on plumbing and a program that presents the mail box as a file
			
 
				-system, Plan 9 now has convenient support for MIME mail messages.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-<TT>8&#189;</TT>
			
 
				-has been replaced by
			
 
				-<TT>rio</TT>,
			
 
				-which has a similar appearance but a different architecture.
			
 
				-Although still a file server, it is much more efficient: the kernel driver
			
 
				-multiplexes graphics output so
			
 
				-<TT>rio</TT>
			
 
				-is not in the display path.
			
 
				-<TT>Rio</TT>
			
 
				-handles input and window control only.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-PC booting is more sophisticated.  PCs can now boot Plan 9 directly from
			
 
				-the disk without running DOS.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Alef is gone.
			
 
				-It was deemed too difficult to maintain two sets of compilers and libraries
			
 
				-for all architectures.
			
 
				-Alef programs were translated into C, with the help of a new thread library
			
 
				-that preserves much of Alef's functionality, but none of its syntax.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Mothra is gone.  There is no web browser included in this release,
			
 
				-but something may well appear before long.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The
			
 
				-<TT>fb</TT>
			
 
				-(frame buffer) suite is gone. Most of its tools are
			
 
				-supplanted by new ones, such as
			
 
				-<TT>page</TT>,
			
 
				-<TT>jpg</TT>,
			
 
				-and
			
 
				-<TT>togif</TT>.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Also gone from this release are the games and support for
			
 
				-international input
			
 
				-(<TT>ktrans</TT>
			
 
				-etc.).
			
 
				-Both may return.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-New things include an implementation of
			
 
				-<TT>ssh</TT>,
			
 
				-an IMAP4 server,
			
 
				-and some spam-filtering software (see
			
 
				-<A href="/magic/man2html/8/scanmail"><I>scanmail</I>(8)).
			
 
				-</A><br>&#32;<br>
			
 
				-There's lots more.
			
 
				-If you have problems, mail
			
 
				-<TT>9trouble@plan9.bell-labs.com</TT>.
			
 
				-Please don't mail us individually.
			
 
				-<br>&#32;<br>
			
 
				-Good Luck!
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/release4.html
+++ b/sys/doc/release4.html
@@ -1,180 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Plan 9 From Bell Labs
			
 
				-<br>
			
 
				-Fourth Release Notes
			
 
				-<br>
			
 
				-April, 2002
			
 
				-<br>
			
 
				-updated June, 2003
			
 
				-</H1>
			
 
				-<br>&#32;<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Copyright &#169; 2002-2003 Lucent Technologies Inc.
			
 
				-All Rights Reserved
			
 
				-<br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-The fourth release of the Plan 9 operating system from Bell Labs
			
 
				-packages a major overhaul of the system at every level.
			
 
				-From the underlying file system protocol, 9P, through the kernel,
			
 
				-libraries, and applications, almost everything has been modified
			
 
				-and, in many cases, redesigned or rewritten.
			
 
				-<br>&#32;<br>
			
 
				-The most significant change is that 9P has been redesigned to address
			
 
				-a number of shortcomings, most important, its previous inability to handle long
			
 
				-file names.
			
 
				-Unfortunately, squeezing long names onto the disks of existing
			
 
				-file servers is a messy business that we're still grappling with,
			
 
				-so at the moment
			
 
				-<A href="/magic/man2html/4/fs"><I>fs</I>(4)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/4/kfs"><I>kfs</I>(4)
			
 
				-</A>can't yet handle long names,
			
 
				-although they do talk the new protocol.
			
 
				-(In fact, they
			
 
				-talk both old and new, as required, to ease transition.)
			
 
				-In the meantime, there is a workaround &#173;
			
 
				-<A href="/magic/man2html/4/lnfs"><I>lnfs</I>(4)
			
 
				-</A>&#173;
			
 
				-and many of the other file servers such as
			
 
				-<A href="/magic/man2html/4/ramfs"><I>ramfs</I>(4)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/4/u9fs"><I>u9fs</I>(4)
			
 
				-</A>work just fine with long names.
			
 
				-It's only the old disk-resident file servers
			
 
				-that don't.
			
 
				-The new file server
			
 
				-<A href="/magic/man2html/4/fossil"><I>fossil</I>(4)
			
 
				-</A>handles supports long names and many other features.
			
 
				-The older servers are now deprecated.
			
 
				-<br>&#32;<br>
			
 
				-The following is a partial list of the major changes throughout the system.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The file system protocol, 9P, has been reworked.
			
 
				-It now has variable-length names, so it can handle long names
			
 
				-but also is more compact when handling short ones.
			
 
				-It uses a different format that is easily parsed, eliminating the need for the old
			
 
				-<TT>aux/fcall</TT>
			
 
				-utility,
			
 
				-and delegates its authentication duties to an external agent,
			
 
				-<TT>factotum</TT>.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Security has been a focus of attention.
			
 
				-A new security agent,
			
 
				-<A href="/magic/man2html/4/factotum"><I>factotum</I>(4),
			
 
				-</A>manages passwords and other secrets and, coupled with a new secure file store
			
 
				-<A href="/magic/man2html/8/secstore"><I>secstore</I>(8),
			
 
				-</A>enables secure single sign-on.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-<TT>Cpu</TT>,
			
 
				-<TT>import</TT>,
			
 
				-and
			
 
				-<TT>exportfs</TT>
			
 
				-all encrypt their connections now, and since they use the new 9P they
			
 
				-also use new network port numbers.
			
 
				-A new service
			
 
				-<A href="/magic/man2html/1/aan"><I>aan</I>(1)
			
 
				-</A>is used by
			
 
				-<TT>import</TT>
			
 
				-to make its network connections more reliable in the face of network outages.
			
 
				-The old ports still work, through the agency of a protocol conversion filter
			
 
				-<A href="/magic/man2html/4/srvold9p"><I>srvold9p</I>(4).
			
 
				-</A><br>&#32;<br>
			
 
				-*
			
 
				-We are phasing out the IL protocol since it doesn't handle long-distance connections
			
 
				-well (and long-distance networks don't handle it well, either).
			
 
				-IL is still used by
			
 
				-<A href="/magic/man2html/4/fs"><I>fs</I>(4)
			
 
				-</A>but TCP has become the standard protocol for all other services.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The software for the new network-resident secure block store,
			
 
				-<A href="/magic/man2html/8/venti"><I>venti</I>(8),
			
 
				-</A>is included with this distribution.
			
 
				-The new
			
 
				-file server
			
 
				-<A href="/magic/man2html/4/fossil"><I>fossil</I>(4)
			
 
				-</A>uses Venti rather than a WORM as its permanent block repository/backup medium.
			
 
				-It is still being developed, but is mature enough that a handful of users
			
 
				-throughout the world are using it as their primary file server.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The need to handle longer file names triggered a rethinking of the way the
			
 
				-system handles strings in general.
			
 
				-The kernel is now more explanatory when it gives an error message and
			
 
				-more consistent in how it handles strings such as commands to devices.
			
 
				-The interfaces to many of the system calls, such as
			
 
				-<A href="/magic/man2html/2/errstr"><I>errstr</I>(2)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/2/wait"><I>wait</I>(2)
			
 
				-</A>all had to change as a result, as did the library interface to read directories,
			
 
				-<A href="/magic/man2html/2/stat"><I>stat</I>(2)
			
 
				-</A>and its relatives.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The formatted I/O package described in
			
 
				-<A href="/magic/man2html/2/print"><I>print</I>(2)
			
 
				-</A>and
			
 
				-<A href="/magic/man2html/2/fmtinstall"><I>fmtinstall</I>(2)
			
 
				-</A>has been redesigned.
			
 
				-Although the basic interface is unchanged, it now runs without locks and
			
 
				-has an internal buffer management mechanism that means
			
 
				-<TT>print</TT>
			
 
				-no longer needs a large on-stack buffer.
			
 
				-The interface for writing custom print verbs and custom formatted I/O routines
			
 
				-has also been greatly improved.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-The thread library
			
 
				-<A href="/magic/man2html/2/thread"><I>thread</I>(2)
			
 
				-</A>has been completely rewritten.
			
 
				-The main visible change is that, coupled with the changes to printing,
			
 
				-<TT>threadprint</TT>
			
 
				-is gone; you can just use
			
 
				-<TT>print</TT>
			
 
				-or
			
 
				-<TT>fprint</TT>
			
 
				-at will.
			
 
				-<br>&#32;<br>
			
 
				-*
			
 
				-Support for electronic mail has been extended in many ways and now includes
			
 
				-some new spam filtering tools,
			
 
				-much better (and more standard) handling of MIME messages,
			
 
				-the ability to render incoming HTML mail,
			
 
				-and much more.
			
 
				-<br>&#32;<br>
			
 
				-There are so many changes to the programming interfaces of the system
			
 
				-that they are described in a separate document, entitled
			
 
				-Changes to the Programming Environment in the Fourth Release of Plan 9.
			
 
				-Please read it before you start updating your own software to run under the new system.
			
 
				-<br>&#32;<br>
			
 
				-The installation method has also changed and we're moving towards a new
			
 
				-method for maintaining updates.
			
 
				-The Plan 9 Wiki
			
 
				-(<TT>http://plan9.bell-labs.com/wiki/plan9</TT>)
			
 
				-and Usenet group
			
 
				-(<TT>comp.os.plan9</TT>)
			
 
				-are the places to visit to learn more and stay current.
			
 
				-In particular, the installation notes are now maintained in the Wiki;
			
 
				-the traditional papers on installation and start-up are gone.
			
 
				-<br>&#32;<br>
			
 
				-There's lots more new stuff.
			
 
				-If you have problems, mail
			
 
				-<TT>9trouble@plan9.bell-labs.com</TT>
			
 
				-or, better, check the wiki
			
 
				-<TT>http://plan9.bell-labs.com/wiki/plan9</TT>
			
 
				-or ask the Usenet newsgroup
			
 
				-<TT>comp.os.plan9</TT>.
			
 
				-<br>&#32;<br>
			
 
				-Good Luck!
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/sam/sam.html
+++ b/sys/doc/sam/sam.html
@@ -1,3291 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>The Text Editor <TT>sam</TT>
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-rob@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-is an interactive multi-file text editor intended for
			
 
				-bitmap displays.
			
 
				-A textual command language
			
 
				-supplements the mouse-driven, cut-and-paste interface
			
 
				-to make complex or
			
 
				-repetitive editing tasks easy to specify.
			
 
				-The language is characterized by the composition of regular expressions
			
 
				-to describe the structure of the text being modified.
			
 
				-The treatment of files as a database, with changes logged
			
 
				-as atomic transactions, guides the implementation and
			
 
				-makes a general `undo' mechanism straightforward.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-is implemented as two processes connected by a low-bandwidth stream,
			
 
				-one process handling the display and the other the editing
			
 
				-algorithms.  Therefore it can run with the display process
			
 
				-in a bitmap terminal and the editor on a local host,
			
 
				-with both processes on a bitmap-equipped host, or with
			
 
				-the display process in the terminal and the editor in a
			
 
				-remote host.
			
 
				-By suppressing the display process,
			
 
				-it can even run without a bitmap terminal.
			
 
				-</P>
			
 
				-<P>
			
 
				-This paper is reprinted from Software&#173;Practice and Experience,
			
 
				-Vol 17, number 11, pp. 813-845, November 1987.
			
 
				-The paper has not been updated for the Plan 9 manuals.  Although
			
 
				-<TT>Sam</TT>
			
 
				-has not changed much since the paper was written, the system around it certainly has.
			
 
				-Nonetheless, the description here still stands as the best introduction to the editor.
			
 
				-</DL>
			
 
				-</P>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-is an interactive text editor that combines cut-and-paste interactive editing with
			
 
				-an unusual command language based on the composition of regular expressions.
			
 
				-It is written as two programs: one, the `host part,' runs on a UNIX system
			
 
				-and implements the command language and provides file access; the other, the
			
 
				-`terminal part,' runs asynchronously
			
 
				-on a machine with a mouse and bitmap display
			
 
				-and supports the display and interactive editing.
			
 
				-The host part may be even run in isolation on an ordinary terminal
			
 
				-to edit text using the command
			
 
				-language, much like a traditional line editor,
			
 
				-without assistance from a mouse or display.
			
 
				-Most often,
			
 
				-the terminal part runs on a Blit<sup>1</sup> terminal
			
 
				-(actually on a Teletype DMD 5620, the production version of the Blit), whose
			
 
				-host connection is an ordinary 9600 bps RS232 link;
			
 
				-on the SUN computer the host and display processes run on a single machine,
			
 
				-connected by a pipe.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-edits uninterpreted
			
 
				-ASCII text.
			
 
				-It has no facilities for multiple fonts, graphics or tables,
			
 
				-unlike MacWrite,<sup>2</sup> Bravo,<sup>3</sup> Tioga<sup>4</sup>
			
 
				-or Lara.<sup>5</sup>
			
 
				-Also unlike them, it has a rich command language.
			
 
				-(Throughout this paper, the phrase
			
 
				-command language
			
 
				-refers to
			
 
				-textual commands; commands activated from the mouse form the
			
 
				-<I>mouse</I>
			
 
				-<I>language.</I>)
			
 
				-<TT>Sam</TT>
			
 
				-developed as an editor for use by programmers, and tries to join
			
 
				-the styles of the UNIX text editor
			
 
				-<TT>ed</TT><sup>6,7</sup>
			
 
				-with that of interactive cut-and-paste editors by
			
 
				-providing a comfortable mouse-driven interface
			
 
				-to a program with a solid command language driven by regular expressions.
			
 
				-The command language developed more than the mouse language, and
			
 
				-acquired a notation for describing the structure of files
			
 
				-more richly than as a sequence of lines,
			
 
				-using a dataflow-like syntax for specifying changes.
			
 
				-</P>
			
 
				-<P>
			
 
				-The interactive style was influenced by
			
 
				-<TT>jim</TT>,<sup>1</sup>
			
 
				-an early cut-and-paste editor for the Blit, and by
			
 
				-<TT>mux</TT>,<sup>8</sup>
			
 
				-the Blit window system.
			
 
				-<TT>Mux</TT>
			
 
				-merges the original Blit window system,
			
 
				-<TT>mpx</TT>,<sup>1</sup>
			
 
				-with cut-and-paste editing, forming something like a
			
 
				-multiplexed version of
			
 
				-<TT>jim</TT>
			
 
				-that edits the output of (and input to) command sessions rather than files.
			
 
				-</P>
			
 
				-<P>
			
 
				-The first part of this paper describes the command language, then the mouse
			
 
				-language, and explains how they interact.
			
 
				-That is followed by a description of the implementation,
			
 
				-first of the host part, then of the terminal part.
			
 
				-A principle that influenced the design of
			
 
				-<TT>sam</TT>
			
 
				-is that it should have no explicit limits, such as upper limits on
			
 
				-file size or line length.
			
 
				-A secondary consideration is that it be efficient.
			
 
				-To honor these two goals together requires a method for efficiently
			
 
				-manipulating
			
 
				-huge strings (files) without breaking them into lines,
			
 
				-perhaps while making thousands of changes
			
 
				-under control of the command language.
			
 
				-<TT>Sam</TT>'s
			
 
				-method is to
			
 
				-treat the file as a transaction database, implementing changes as atomic
			
 
				-updates.  These updates may be unwound easily to `undo' changes.
			
 
				-Efficiency is achieved through a collection of caches that minimizes
			
 
				-disc traffic and data motion, both within the two parts of the program
			
 
				-and between them.
			
 
				-</P>
			
 
				-<P>
			
 
				-The terminal part of
			
 
				-<TT>sam</TT>
			
 
				-is fairly straightforward.
			
 
				-More interesting is how the two halves of the editor stay
			
 
				-synchronized when either half may initiate a change.
			
 
				-This is achieved through a data structure that organizes the
			
 
				-communications and is maintained in parallel by both halves.
			
 
				-</P>
			
 
				-<P>
			
 
				-The last part of the paper chronicles the writing of
			
 
				-<TT>sam</TT>
			
 
				-and discusses the lessons that were learned through its development and use.
			
 
				-</P>
			
 
				-<P>
			
 
				-The paper is long, but is composed largely of two papers of reasonable length:
			
 
				-a description of the user interface of
			
 
				-<TT>sam</TT>
			
 
				-and a discussion of its implementation.
			
 
				-They are combined because the implementation is strongly influenced by
			
 
				-the user interface, and vice versa.
			
 
				-</P>
			
 
				-<H4>The Interface
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-is a text editor for multiple files.
			
 
				-File names may be provided when it is invoked:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-sam file1 file2 ...
			
 
				-</PRE></TT></DL>
			
 
				-and there are commands
			
 
				-to add new files and discard unneeded ones.
			
 
				-Files are not read until necessary
			
 
				-to complete some command.
			
 
				-Editing operations apply to an internal copy
			
 
				-made when the file is read; the UNIX file associated with the copy
			
 
				-is changed only by an explicit command.
			
 
				-To simplify the discussion, the internal copy is here called a
			
 
				-<I>file</I>,
			
 
				-while the disc-resident original is called a
			
 
				-disc file.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-is usually connected to a bitmap display that presents a cut-and-paste
			
 
				-editor driven by the mouse.
			
 
				-In this mode, the command language is still available:
			
 
				-text typed in a special window, called the
			
 
				-<TT>sam</TT>
			
 
				-<I>window,</I>
			
 
				-is interpreted
			
 
				-as commands to be executed in the current file.
			
 
				-Cut-and-paste editing may be used in any window &#173; even in the
			
 
				-<TT>sam</TT>
			
 
				-window to construct commands.
			
 
				-The other mode of operation, invoked by starting
			
 
				-<TT>sam</TT>
			
 
				-with the option
			
 
				-<TT>-d</TT>
			
 
				-(for `no download'),
			
 
				-does not use the mouse or bitmap display, but still permits
			
 
				-editing using the textual command language, even on an ordinary terminal,
			
 
				-interactively or from a script.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following sections describe first the command language (under
			
 
				-<TT>sam -d</TT>
			
 
				-and in the
			
 
				-<TT>sam</TT>
			
 
				-window), and then the mouse interface.
			
 
				-These two languages are nearly independent, but connect through the
			
 
				-<I>current</I>
			
 
				-<I>text,</I>
			
 
				-described below.
			
 
				-</P>
			
 
				-<H4>The Command Language
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A file consists of its contents, which are an array of characters
			
 
				-(that is, a string); the
			
 
				-<I>name</I>
			
 
				-of the associated disc file; the
			
 
				-modified bit
			
 
				-that states whether the contents match those of
			
 
				-the disc file;
			
 
				-and a substring of the contents, called the
			
 
				-current text
			
 
				-or
			
 
				-<I>dot</I>
			
 
				-(see Figures 1 and 2).
			
 
				-If the current text is a null string, dot falls between characters.
			
 
				-The
			
 
				-<I>value</I>
			
 
				-of dot is the location of the current text; the
			
 
				-<I>contents</I>
			
 
				-of dot are the characters it contains.
			
 
				-<TT>Sam</TT>
			
 
				-imparts to the text no two-dimensional interpretation such as columns
			
 
				-or fields; text is always one-dimensional.
			
 
				-Even the idea of a `line' of text as understood by most UNIX programs
			
 
				-&#173; a sequence of characters terminated by a newline character &#173;
			
 
				-is only weakly supported.
			
 
				-<P>
			
 
				-The
			
 
				-current file
			
 
				-is the file to which editing commands refer.
			
 
				-The current text is therefore dot in the current file.
			
 
				-If a command doesn't explicitly name a particular file or piece of text,
			
 
				-the command is assumed to apply to the current text.
			
 
				-For the moment, ignore the presence of multiple files and consider
			
 
				-editing a single file.
			
 
				-<br><img src="fig1.ps.11760.gif"><br>
			
 
				-<br>
			
 
				-<I>Figure 1. A typical
			
 
				-</I><TT>sam</TT><I>
			
 
				-screen, with the editing menu presented.
			
 
				-The
			
 
				-</I><TT>sam</TT><I>
			
 
				-(command language) window is in the middle, with file windows above and below.
			
 
				-(The user interface makes it easy to create these abutting windows.)
			
 
				-The partially obscured window is a third file window.
			
 
				-The uppermost window is that to which typing and mouse operations apply,
			
 
				-as indicated by its heavy border.
			
 
				-Each window has its current text highlighted in reverse video.
			
 
				-The
			
 
				-</I><TT>sam</TT><I>
			
 
				-window's current text is the null string on the last visible line,
			
 
				-indicated by a vertical bar.
			
 
				-See also Figure 2.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Commands have one-letter names.
			
 
				-Except for non-editing commands such as writing
			
 
				-the file to disc, most commands make some change
			
 
				-to the text in dot and leave dot set to the text resulting from the change.
			
 
				-For example, the delete command,
			
 
				-<TT>d</TT>,
			
 
				-deletes the text in dot, replacing it by the null string and setting dot
			
 
				-to the result.
			
 
				-The change command,
			
 
				-<TT>c</TT>,
			
 
				-replaces dot by text delimited by an arbitrary punctuation character,
			
 
				-conventionally
			
 
				-a slash.  Thus,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-c/Peter/
			
 
				-</PRE></TT></DL>
			
 
				-replaces the text in dot by the string
			
 
				-<TT>Peter</TT>.
			
 
				-Similarly,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-a/Peter/
			
 
				-</PRE></TT></DL>
			
 
				-(append) adds the string after dot, and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-i/Peter/
			
 
				-</PRE></TT></DL>
			
 
				-(insert) inserts before dot.
			
 
				-All three leave dot set to the new text,
			
 
				-<TT>Peter</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Newlines are part of the syntax of commands:
			
 
				-the newline character lexically terminates a command.
			
 
				-Within the inserted text, however, newlines are never implicit.
			
 
				-But since it is often convenient to insert multiple lines of text,
			
 
				-<TT>sam</TT>
			
 
				-has a special
			
 
				-syntax for that case:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-a
			
 
				-some lines of text
			
 
				-to be inserted in the file,
			
 
				-terminated by a period
			
 
				-on a line by itself
			
 
				-.
			
 
				-</PRE></TT></DL>
			
 
				-In the one-line syntax, a newline character may be specified by a C-like
			
 
				-escape, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-c/\n/
			
 
				-</PRE></TT></DL>
			
 
				-replaces dot by a single newline character.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-also has a substitute command,
			
 
				-<TT>s</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-s/<I>expression</I>/<I>replacement</I>/
			
 
				-</PRE></TT></DL>
			
 
				-substitutes the replacement text for the first match, in dot,
			
 
				-of the regular expression.
			
 
				-Thus, if dot is the string
			
 
				-<TT>Peter</TT>,
			
 
				-the command
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-s/t/st/
			
 
				-</PRE></TT></DL>
			
 
				-changes it to
			
 
				-<TT>Pester</TT>.
			
 
				-In general,
			
 
				-<TT>s</TT>
			
 
				-is unnecessary, but it was inherited from
			
 
				-<TT>ed</TT>
			
 
				-and it has some convenient variations.
			
 
				-For instance, the replacement text may include the matched text,
			
 
				-specified by
			
 
				-<TT>&</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-s/Peter/Oh, &amp;, &amp;, &amp;, &amp;!/
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-There are also three commands that apply programs
			
 
				-to text:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&#60; <I>UNIX program</I>
			
 
				-</PRE></TT></DL>
			
 
				-replaces dot by the output of the UNIX program.
			
 
				-Similarly, the
			
 
				-<TT>></TT>
			
 
				-command
			
 
				-runs the program with dot as its standard input, and
			
 
				-<TT>|</TT>
			
 
				-does both.  For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-| sort
			
 
				-</PRE></TT></DL>
			
 
				-replaces dot by the result of applying the standard sorting utility to it.
			
 
				-Again, newlines have no special significance for these
			
 
				-<TT>sam</TT>
			
 
				-commands.
			
 
				-The text acted upon and resulting from these commands is not necessarily
			
 
				-bounded by newlines, although for connection with UNIX programs,
			
 
				-newlines may be necessary to obey conventions.
			
 
				-</P>
			
 
				-<P>
			
 
				-One more command:
			
 
				-<TT>p</TT>
			
 
				-prints the contents of dot.
			
 
				-Table I summarizes
			
 
				-<TT>sam</TT>'s
			
 
				-commands.
			
 
				-<br><img src="-.11761.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<P>
			
 
				-The value of dot may be changed by
			
 
				-specifying an
			
 
				-<I>address</I>
			
 
				-for the command.
			
 
				-The simplest address is a line number:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-3
			
 
				-</PRE></TT></DL>
			
 
				-refers to the third line of the file, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-3d
			
 
				-</PRE></TT></DL>
			
 
				-deletes the third line of the file, and implicitly renumbers
			
 
				-the lines so the old line 4 is now numbered 3.
			
 
				-(This is one of the few places where
			
 
				-<TT>sam</TT>
			
 
				-deals with lines directly.)
			
 
				-Line
			
 
				-<TT>0</TT>
			
 
				-is the null string at the beginning of the file.
			
 
				-If a command consists of only an address, a
			
 
				-<TT>p</TT>
			
 
				-command is assumed, so typing an unadorned
			
 
				-<TT>3</TT>
			
 
				-prints line 3 on the terminal.
			
 
				-There are a couple of other basic addresses:
			
 
				-a period addresses dot itself; and
			
 
				-a dollar sign
			
 
				-(<TT>$</TT>)
			
 
				-addresses the null string at the end of the file.
			
 
				-</P>
			
 
				-<P>
			
 
				-An address is always a single substring of the file.
			
 
				-Thus, the address
			
 
				-<TT>3</TT>
			
 
				-addresses the characters
			
 
				-after the second newline of
			
 
				-the file through the third newline of the file.
			
 
				-A
			
 
				-compound address
			
 
				-is constructed by the comma operator
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I>address1</I>,<I>address2</I>
			
 
				-</PRE></TT></DL>
			
 
				-and addresses the substring of the file from the beginning of
			
 
				-<I>address1</I>
			
 
				-to the end of
			
 
				-<I>address2</I>.
			
 
				-For example, the command
			
 
				-<TT>3,5p</TT>
			
 
				-prints the third through fifth lines of the file and
			
 
				-<TT>.,$d</TT>
			
 
				-deletes the text from the beginning of dot to the end of the file.
			
 
				-</P>
			
 
				-<P>
			
 
				-These addresses are all absolute positions in the file, but
			
 
				-<TT>sam</TT>
			
 
				-also has relative addresses, indicated by
			
 
				-<TT>+</TT>
			
 
				-or
			
 
				-<TT>-</TT>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-$-3
			
 
				-</PRE></TT></DL>
			
 
				-is the third line before the end of the file and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-.+1
			
 
				-</PRE></TT></DL>
			
 
				-is the line after dot.
			
 
				-If no address appears to the left of the
			
 
				-<TT>+</TT>
			
 
				-or
			
 
				-<TT>-</TT>,
			
 
				-dot is assumed;
			
 
				-if nothing appears to the right,
			
 
				-<TT>1</TT>
			
 
				-is assumed.
			
 
				-Therefore,
			
 
				-<TT>.+1</TT>
			
 
				-may be abbreviated to just a plus sign.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>+</TT>
			
 
				-operator acts relative to the end of its first argument, while the
			
 
				-<TT>-</TT>
			
 
				-operator acts relative to the beginning.  Thus
			
 
				-<TT>.+1</TT>
			
 
				-addresses the first line after dot,
			
 
				-<TT>.-</TT>
			
 
				-addresses the first line before dot, and
			
 
				-<TT>+-</TT>
			
 
				-refers to the line containing the end of dot.  (Dot may span multiple lines, and
			
 
				-<TT>+</TT>
			
 
				-selects the line after the end of dot, then
			
 
				-<TT>-</TT>
			
 
				-backs up one line.)
			
 
				-</P>
			
 
				-<P>
			
 
				-The final type of address is a regular expression, which addresses the
			
 
				-text matched by the expression.  The expression is enclosed in slashes, as in
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/<I>expression</I>/
			
 
				-</PRE></TT></DL>
			
 
				-The expressions are the same as those in the UNIX program
			
 
				-<TT>egrep</TT>,<sup>6,7</sup>
			
 
				-and include closures, alternations, and so on.
			
 
				-They find the
			
 
				-leftmost longest
			
 
				-string that matches the expression, that is,
			
 
				-the first match after the point where the search is started,
			
 
				-and if more than one match begins at the same spot, the longest such match.
			
 
				-(I assume familiarity with the syntax for regular expressions in UNIX programs.<sup>9</sup>)
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/x/
			
 
				-</PRE></TT></DL>
			
 
				-matches the next
			
 
				-<TT>x</TT>
			
 
				-character in the file,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/xx*/
			
 
				-</PRE></TT></DL>
			
 
				-matches the next run of one or more
			
 
				-<TT>x</TT>'s,
			
 
				-and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/x|Peter/
			
 
				-</PRE></TT></DL>
			
 
				-matches the next
			
 
				-<TT>x</TT>
			
 
				-or
			
 
				-<TT>Peter</TT>.
			
 
				-For compatibility with other UNIX programs, the `any character' operator,
			
 
				-a period,
			
 
				-does not match a newline, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/.*/
			
 
				-</PRE></TT></DL>
			
 
				-matches the text from dot to the end of the line, but excludes the newline
			
 
				-and so will not match across
			
 
				-the line boundary.
			
 
				-</P>
			
 
				-<P>
			
 
				-Regular expressions are always relative addresses.
			
 
				-The direction is forwards by default,
			
 
				-so
			
 
				-<TT>/Peter/</TT>
			
 
				-is really an abbreviation for
			
 
				-<TT>+/Peter/</TT>.
			
 
				-The search can be reversed with a minus sign, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>-/Peter/</TT>
			
 
				-</PRE></TT></DL>
			
 
				-finds the first
			
 
				-<TT>Peter</TT>
			
 
				-before dot.
			
 
				-Regular expressions may be used with other address forms, so
			
 
				-<TT>0+/Peter/</TT>
			
 
				-finds the first
			
 
				-<TT>Peter</TT>
			
 
				-in the file and
			
 
				-<TT>$-/Peter/</TT>
			
 
				-finds the last.
			
 
				-Table II summarizes
			
 
				-<TT>sam</TT>'s
			
 
				-addresses.
			
 
				-<br><img src="-.11762.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<P>
			
 
				-The language discussed so far will not seem novel
			
 
				-to people who use UNIX text editors
			
 
				-such as
			
 
				-<TT>ed</TT>
			
 
				-or
			
 
				-<TT>vi</TT>.<sup>9</sup>
			
 
				-Moreover, the kinds of editing operations these commands allow, with the exception
			
 
				-of regular expressions and line numbers,
			
 
				-are clearly more conveniently handled by a mouse-based interface.
			
 
				-Indeed,
			
 
				-<TT>sam</TT>'s
			
 
				-mouse language (discussed at length below) is the means by which
			
 
				-simple changes are usually made.
			
 
				-For large or repetitive changes, however, a textual language
			
 
				-outperforms a manual interface.
			
 
				-</P>
			
 
				-<P>
			
 
				-Imagine that, instead of deleting just one occurrence of the string
			
 
				-<TT>Peter</TT>,
			
 
				-we wanted to eliminate every
			
 
				-<TT>Peter</TT>.
			
 
				-What's needed is an iterator that runs a command for each occurrence of some
			
 
				-text.
			
 
				-<TT>Sam</TT>'s
			
 
				-iterator is called
			
 
				-<TT>x</TT>,
			
 
				-for extract:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x/<I>expression</I>/ <I>command</I>
			
 
				-</PRE></TT></DL>
			
 
				-finds all matches in dot of the specified expression, and for each
			
 
				-such match, sets dot to the text matched and runs the command.
			
 
				-So to delete all the
			
 
				-<TT>Peters:</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-0,$ x/Peter/ d
			
 
				-</PRE></TT></DL>
			
 
				-(Blanks in these examples are to improve readability;
			
 
				-<TT>sam</TT>
			
 
				-neither requires nor interprets them.)
			
 
				-This searches the entire file
			
 
				-(<TT>0,$</TT>)
			
 
				-for occurrences of the string
			
 
				-<TT>Peter</TT>,
			
 
				-and runs the
			
 
				-<TT>d</TT>
			
 
				-command with dot set to each such occurrence.
			
 
				-(By contrast, the comparable
			
 
				-<TT>ed</TT>
			
 
				-command would delete all
			
 
				-<I>lines</I>
			
 
				-containing
			
 
				-<TT>Peter</TT>;
			
 
				-<TT>sam</TT>
			
 
				-deletes only the
			
 
				-<TT>Peters</TT>.)
			
 
				-The address
			
 
				-<TT>0,$</TT>
			
 
				-is commonly used, and may be abbreviated to just a comma.
			
 
				-As another example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/Peter/ p
			
 
				-</PRE></TT></DL>
			
 
				-prints a list of
			
 
				-<TT>Peters,</TT>
			
 
				-one for each appearance in the file, with no intervening text (not even newlines
			
 
				-to separate the instances).
			
 
				-</P>
			
 
				-<P>
			
 
				-Of course, the text extracted by
			
 
				-<TT>x</TT>
			
 
				-may be selected by a regular expression,
			
 
				-which complicates deciding what set of matches is chosen &#173;
			
 
				-matches may overlap.  This is resolved by generating the matches
			
 
				-starting from the beginning of dot using the leftmost-longest rule,
			
 
				-and searching for each match starting from the end of the previous one.
			
 
				-Regular expressions may also match null strings, but a null match
			
 
				-adjacent to a non-null match is never selected; at least one character
			
 
				-must intervene.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, c/AAA/
			
 
				-x/B*/ c/-/
			
 
				-, p
			
 
				-</PRE></TT></DL>
			
 
				-produces as output
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				--A-A-A-
			
 
				-</PRE></TT></DL>
			
 
				-because the pattern
			
 
				-<TT>B*</TT>
			
 
				-matches the null strings separating the
			
 
				-<TT>A</TT>'s.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>x</TT>
			
 
				-command has a complement,
			
 
				-<TT>y</TT>,
			
 
				-with similar syntax, that executes the command with dot set to the text
			
 
				-<I>between</I>
			
 
				-the matches of the expression.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, c/AAA/
			
 
				-y/A/ c/-/
			
 
				-, p
			
 
				-</PRE></TT></DL>
			
 
				-produces the same result as the example above.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>x</TT>
			
 
				-and
			
 
				-<TT>y</TT>
			
 
				-commands are looping constructs, and
			
 
				-<TT>sam</TT>
			
 
				-has a pair of conditional commands to go with them.
			
 
				-They have similar syntax:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-g/<I>expression</I>/ <I>command</I>
			
 
				-</PRE></TT></DL>
			
 
				-(guard)
			
 
				-runs the command exactly once if dot contains a match of the expression.
			
 
				-This is different from
			
 
				-<TT>x</TT>,
			
 
				-which runs the command for
			
 
				-<I>each</I>
			
 
				-match:
			
 
				-<TT>x</TT>
			
 
				-loops;
			
 
				-<TT>g</TT>
			
 
				-merely tests, without changing the value of dot.
			
 
				-Thus,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/Peter/ d
			
 
				-</PRE></TT></DL>
			
 
				-deletes all occurrences of
			
 
				-<TT>Peter</TT>,
			
 
				-but
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, g/Peter/ d
			
 
				-</PRE></TT></DL>
			
 
				-deletes the whole file (reduces it to a null string) if
			
 
				-<TT>Peter</TT>
			
 
				-occurs anywhere in the text.
			
 
				-The complementary conditional is
			
 
				-<TT>v</TT>,
			
 
				-which runs the command if there is
			
 
				-<I>no</I>
			
 
				-match of the expression.
			
 
				-</P>
			
 
				-<P>
			
 
				-These control-structure-like commands may be composed to construct more
			
 
				-involved operations.  For example, to print those lines of text that
			
 
				-contain the string
			
 
				-<TT>Peter</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/.*\n/ g/Peter/ p
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>x</TT>
			
 
				-breaks the file into lines, the
			
 
				-<TT>g</TT>
			
 
				-selects those lines containing
			
 
				-<TT>Peter</TT>,
			
 
				-and the
			
 
				-<TT>p</TT>
			
 
				-prints them.
			
 
				-This command gives an address for the
			
 
				-<TT>x</TT>
			
 
				-command (the whole file), but because
			
 
				-<TT>g</TT>
			
 
				-does not have an explicit address, it applies to the value of
			
 
				-dot produced by the
			
 
				-<TT>x</TT>
			
 
				-command, that is, to each line.
			
 
				-All commands in
			
 
				-<TT>sam</TT>
			
 
				-except for the command to write a file to disc use dot for the
			
 
				-default address.
			
 
				-</P>
			
 
				-<P>
			
 
				-Composition may be continued indefinitely.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/.*\n/ g/Peter/ v/SaltPeter/ p
			
 
				-</PRE></TT></DL>
			
 
				-prints those lines containing
			
 
				-<TT>Peter</TT>
			
 
				-but
			
 
				-<I>not</I>
			
 
				-those containing
			
 
				-<TT>SaltPeter</TT>.
			
 
				-</P>
			
 
				-<H4>Structural Regular Expressions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Unlike other UNIX text editors,
			
 
				-including the non-interactive ones such as
			
 
				-<TT>sed</TT>
			
 
				-and
			
 
				-<TT>awk</TT>,<sup>7</sup>
			
 
				-<TT>sam</TT>
			
 
				-is good for manipulating files with multi-line `records.'
			
 
				-An example is an on-line phone book composed of records,
			
 
				-separated by blank lines, of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Herbert Tic
			
 
				-44 Turnip Ave., Endive, NJ
			
 
				-201-5555642
			
 
				-
			
 
				-Norbert Twinge
			
 
				-16 Potato St., Cabbagetown, NJ
			
 
				-201-5553145
			
 
				-
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-The format may be encoded as a regular expression:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(.+\n)+
			
 
				-</PRE></TT></DL>
			
 
				-that is, a sequence of one or more non-blank lines.
			
 
				-The command to print Mr. Tic's entire record is then
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/(.+\n)+/ g/^Herbert Tic$/ p
			
 
				-</PRE></TT></DL>
			
 
				-and that to extract just the phone number is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/(.+\n)+/ g/^Herbert Tic$/ x/^[0-9]*-[0-9]*\n/ p
			
 
				-</PRE></TT></DL>
			
 
				-The latter command breaks the file into records,
			
 
				-chooses Mr. Tic's record,
			
 
				-extracts the phone number from the record,
			
 
				-and finally prints the number.
			
 
				-<P>
			
 
				-A more involved problem is that of
			
 
				-renaming a particular variable, say
			
 
				-<TT>n</TT>,
			
 
				-to
			
 
				-<TT>num</TT>
			
 
				-in a C program.
			
 
				-The obvious first attempt,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/n/ c/num/
			
 
				-</PRE></TT></DL>
			
 
				-is badly flawed: it changes not only the variable
			
 
				-<TT>n</TT>
			
 
				-but any letter
			
 
				-<TT>n</TT>
			
 
				-that appears.
			
 
				-We need to extract all the variables, and select those that match
			
 
				-<TT>n</TT>
			
 
				-and only
			
 
				-<TT>n</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
			
 
				-</PRE></TT></DL>
			
 
				-The pattern
			
 
				-<TT>[A-Za-z_][A-Za-z_0-9]*</TT>
			
 
				-matches C identifiers.
			
 
				-Next
			
 
				-<TT>g/n/</TT>
			
 
				-selects those containing an
			
 
				-<TT>n</TT>.
			
 
				-Then
			
 
				-<TT>v/../</TT>
			
 
				-rejects those containing two (or more) characters, and finally
			
 
				-<TT>c/num/</TT>
			
 
				-changes the remainder (identifiers
			
 
				-<TT>n</TT>)
			
 
				-to
			
 
				-<TT>num</TT>.
			
 
				-This version clearly works much better, but there may still be problems.
			
 
				-For example, in C character and string constants, the sequence
			
 
				-<TT>0fP
			
 
				-is interpreted as a newline character, and we don't want to change it to
			
 
				-</TT><TT>0m.</TT><TT>
			
 
				-This problem can be forestalled with a
			
 
				-</TT><TT>y</TT><TT>
			
 
				-command:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-, y/\\n/ x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
			
 
				-</PRE></TT></DL>
			
 
				-(the second
			
 
				-</TT><TT>\fP
			
 
				-is necessary because of lexical conventions in regular expressions),
			
 
				-or we could even reject character constants and strings outright:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-,y/'[^']*'/ y/"[^"]*"/ x/[A-Za-z_][A-Za-z_0-9]*/ g/n/ v/../ c/num/
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-</TT><TT>y</TT><TT>
			
 
				-commands in this version exclude from consideration all character constants
			
 
				-and strings.
			
 
				-The only remaining problem is to deal with the possible occurrence of
			
 
				-</TT><TT>'</TT><TT>
			
 
				-or
			
 
				-</TT><TT>
			
 
				-within these sequences, but it's easy to see how to resolve this difficulty.
			
 
				-</P>
			
 
				-</TT><P>
			
 
				-The point of these composed commands is successive refinement.
			
 
				-A simple version of the command is tried, and if it's not good enough,
			
 
				-it can be honed by adding a clause or two.
			
 
				-(Mistakes can be undone; see below.
			
 
				-Also, the mouse language makes it unnecessary to retype the command each time.)
			
 
				-The resulting chains of commands are somewhat reminiscent of
			
 
				-shell pipelines.<sup>7</sup>
			
 
				-Unlike pipelines, though, which pass along modified
			
 
				-<I>data</I>,
			
 
				-<TT>sam</TT>
			
 
				-commands pass a
			
 
				-<I>view</I>
			
 
				-of the data.
			
 
				-The text at each step of the command is the same, but which pieces
			
 
				-are selected is refined step by step until the correct piece is
			
 
				-available to the final step of the command line, which ultimately makes the change.
			
 
				-</P>
			
 
				-<P>
			
 
				-In other UNIX programs, regular expressions are used only for selection,
			
 
				-as in the
			
 
				-<TT>sam</TT>
			
 
				-<TT>g</TT>
			
 
				-command, never for extraction as in the
			
 
				-<TT>x</TT>
			
 
				-or
			
 
				-<TT>y</TT>
			
 
				-command.
			
 
				-For example, patterns in
			
 
				-<TT>awk</TT><sup>7</sup>
			
 
				-are used to select lines to be operated on, but cannot be used
			
 
				-to describe the format of the input text, or to handle newline-free text.
			
 
				-The use of regular expressions to describe the structure of a piece
			
 
				-of text rather than its contents, as in the
			
 
				-<TT>x</TT>
			
 
				-command, 
			
 
				-has been given a name:
			
 
				-structural regular expressions.
			
 
				-When they are composed, as in the above example,
			
 
				-they are pleasantly expressive.
			
 
				-Their use is discussed at greater length elsewhere.<sup>10</sup>
			
 
				-</P>
			
 
				-<P>
			
 
				-</P>
			
 
				-<H4>Multiple files
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-has a few other commands, mostly relating to input and output.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-e discfilename
			
 
				-</PRE></TT></DL>
			
 
				-replaces the contents and name of the current file with those of the named
			
 
				-disc file;
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-w discfilename
			
 
				-</PRE></TT></DL>
			
 
				-writes the contents to the named disc file; and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-r discfilename
			
 
				-</PRE></TT></DL>
			
 
				-replaces dot with the contents of the named disc file.
			
 
				-All these commands use the current file's name if none is specified.
			
 
				-Finally,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-f discfilename
			
 
				-</PRE></TT></DL>
			
 
				-changes the name associated with the file and displays the result:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-'-. discfilename
			
 
				-</PRE></TT></DL>
			
 
				-This output is called the file's
			
 
				-menu line,
			
 
				-because it is the contents of the file's line in the button 3 menu (described
			
 
				-in the
			
 
				-next section).
			
 
				-The first three characters are a concise notation for the state of the file.
			
 
				-The apostrophe signifies that the file is modified.
			
 
				-The minus sign indicates the number of windows
			
 
				-open on the file (see the next section):
			
 
				-<TT>-</TT>
			
 
				-means none,
			
 
				-<TT>+</TT>
			
 
				-means one, and
			
 
				-<TT>*</TT>
			
 
				-means more than one.
			
 
				-Finally, the period indicates that this is the current file.
			
 
				-These characters are useful for controlling the
			
 
				-<TT>X</TT>
			
 
				-command, described shortly.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-may be started with a set of disc files (such as all the source for
			
 
				-a program) by invoking it with a list of file names as arguments, and
			
 
				-more may be added or deleted on demand.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-B discfile1 discfile2 ...
			
 
				-</PRE></TT></DL>
			
 
				-adds the named files to
			
 
				-<TT>sam</TT>'s
			
 
				-list, and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-D discfile1 discfile2 ...
			
 
				-</PRE></TT></DL>
			
 
				-removes them from
			
 
				-<TT>sam</TT>'s
			
 
				-memory (without effect on associated disc files).
			
 
				-Both these commands have a syntax for using the shell<sup>7</sup>
			
 
				-(the UNIX command interpreter) to generate the lists:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-B &#60;echo *.c
			
 
				-</PRE></TT></DL>
			
 
				-will add all C source files, and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-B &#60;grep -l variable *.c
			
 
				-</PRE></TT></DL>
			
 
				-will add all C source files referencing a particular variable
			
 
				-(the UNIX command
			
 
				-<TT>grep -l</TT>
			
 
				-lists all files in its arguments that contain matches of
			
 
				-the specified regular expression).
			
 
				-Finally,
			
 
				-<TT>D</TT>
			
 
				-without arguments deletes the current file.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are two ways to change which file is current:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-b filename
			
 
				-</PRE></TT></DL>
			
 
				-makes the named file current.
			
 
				-The
			
 
				-<TT>B</TT>
			
 
				-command
			
 
				-does the same, but also adds any new files to
			
 
				-<TT>sam</TT>'s
			
 
				-list.
			
 
				-(In practice, of course, the current file
			
 
				-is usually chosen by mouse actions, not by textual commands.)
			
 
				-The other way is to use a form of address that refers to files:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-"<I>expression</I>" <I>address</I>
			
 
				-</PRE></TT></DL>
			
 
				-refers to the address evaluated in the file whose menu line
			
 
				-matches the expression (there must be exactly one match).
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-"peter.c" 3
			
 
				-</PRE></TT></DL>
			
 
				-refers to the third line of the file whose name matches
			
 
				-<TT>peter.c</TT>.
			
 
				-This is most useful in the move
			
 
				-(<TT>m</TT>)
			
 
				-and copy
			
 
				-(<TT>t</TT>)
			
 
				-commands:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-0,$ t "peter.c" 0
			
 
				-</PRE></TT></DL>
			
 
				-makes a copy of the current file at the beginning of
			
 
				-<TT>peter.c</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>X</TT>
			
 
				-command
			
 
				-is a looping construct, like
			
 
				-<TT>x</TT>,
			
 
				-that refers to files instead of strings:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-X/<I>expression</I>/ <I>command</I>
			
 
				-</PRE></TT></DL>
			
 
				-runs the command in all
			
 
				-files whose menu lines match the expression.  The best example is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-X/'/ w
			
 
				-</PRE></TT></DL>
			
 
				-which writes to disc all modified files.
			
 
				-<TT>Y</TT>
			
 
				-is the complement of
			
 
				-<TT>X</TT>:
			
 
				-it runs the command on all files whose menu lines don't match the expression:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Y/\.c/ D
			
 
				-</PRE></TT></DL>
			
 
				-deletes all files that don't have
			
 
				-<TT>.c</TT>
			
 
				-in their names, that is, it keeps all C source files and deletes the rest.
			
 
				-</P>
			
 
				-<P>
			
 
				-Braces allow commands to be grouped, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-{
			
 
				-	<I>command1</I>
			
 
				-	<I>command2</I>
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-is syntactically a single command that runs two commands.
			
 
				-Thus,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-X/\.c/ ,g/variable/ {
			
 
				-	f
			
 
				-	, x/.*\n/ g/variable/ p
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-finds all occurrences of
			
 
				-<TT>variable</TT>
			
 
				-in C source files, and prints
			
 
				-out the file names and lines of each match.
			
 
				-The precise semantics of compound operations is discussed in the implementation
			
 
				-sections below.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally,
			
 
				-the undo command,
			
 
				-<TT>u</TT>,
			
 
				-undoes the last command,
			
 
				-no matter how many files were affected.
			
 
				-Multiple undo operations move further back in time, so
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-u
			
 
				-u
			
 
				-</PRE></TT></DL>
			
 
				-(which may be abbreviated
			
 
				-<TT>u2</TT>)
			
 
				-undoes the last two commands.  An undo may not be undone, however, nor
			
 
				-may any command that adds or deletes files.
			
 
				-Everything else is undoable, though, including for example
			
 
				-<TT>e</TT>
			
 
				-commands:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-e filename
			
 
				-u
			
 
				-</PRE></TT></DL>
			
 
				-restores the state of the file completely, including its name, dot,
			
 
				-and modified bit.  Because of the undo, potentially dangerous commands
			
 
				-are not guarded by confirmations.  Only
			
 
				-<TT>D</TT>,
			
 
				-which destroys the information necessary to restore itself, is protected.
			
 
				-It will not delete a modified file, but a second
			
 
				-<TT>D</TT>
			
 
				-of the same file will succeed regardless.
			
 
				-The
			
 
				-<TT>q</TT>
			
 
				-command, which exits
			
 
				-<TT>sam</TT>,
			
 
				-is similarly guarded.
			
 
				-</P>
			
 
				-<H4>Mouse Interface
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-is most commonly run
			
 
				-connected to a bitmap display and mouse for interactive editing.
			
 
				-The only difference in the command language
			
 
				-between regular, mouse-driven
			
 
				-<TT>sam</TT>
			
 
				-and
			
 
				-<TT>sam -d</TT>
			
 
				-is that if an address
			
 
				-is provided without a command,
			
 
				-<TT>sam -d</TT>
			
 
				-will print the text referenced by the address, but
			
 
				-regular
			
 
				-<TT>sam</TT>
			
 
				-will highlight it on the screen &#173; in fact,
			
 
				-dot is always highlighted (see Figure 2).
			
 
				-<br><img src="fig3.ps.11763.gif"><br>
			
 
				-<br>
			
 
				-<I>Figure 2. A
			
 
				-</I><TT>sam</TT><I>
			
 
				-window.  The scroll bar down the left
			
 
				-represents the file, with the bubble showing the fraction
			
 
				-visible in the window.
			
 
				-The scroll bar may be manipulated by the mouse for convenient browsing.
			
 
				-The current text,
			
 
				-which is highlighted, need not fit on a line.  Here it consists of one partial
			
 
				-line, one complete line, and final partial line.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-<P>
			
 
				-Each file may have zero or more windows open on the display.
			
 
				-At any time, only one window in all of
			
 
				-<TT>sam</TT>
			
 
				-is the
			
 
				-current window,
			
 
				-that is, the window to which typing and mouse actions refer;
			
 
				-this may be the
			
 
				-<TT>sam</TT>
			
 
				-window (that in which commands may be typed)
			
 
				-or one of the file windows.
			
 
				-When a file has multiple windows, the image of the file in each window
			
 
				-is always kept up to date.
			
 
				-The current file is the last file affected by a command,
			
 
				-so if the
			
 
				-<TT>sam</TT>
			
 
				-window is current,
			
 
				-the current window is not a window on the current file.
			
 
				-However, each window on a file has its own value of dot,
			
 
				-and when switching between windows on a single file,
			
 
				-the file's value of dot is changed to that of the window.
			
 
				-Thus, flipping between windows behaves in the obvious, convenient way.
			
 
				-</P>
			
 
				-<P>
			
 
				-The mouse on the Blit has three buttons, numbered left to right.
			
 
				-Button 3 has a list of commands to manipulate windows,
			
 
				-followed by a list of `menu lines' exactly as printed by the
			
 
				-<TT>f</TT>
			
 
				-command, one per file (not one per window).
			
 
				-These menu lines are sorted by file name.
			
 
				-If the list is long, the Blit menu software will make it more manageable
			
 
				-by generating a scrolling menu instead of an unwieldy long list.
			
 
				-Using the menu to select a file from the list makes that file the current
			
 
				-file, and the most recently current window in that file the current window.
			
 
				-But if that file is already current, selecting it in the menu cycles through
			
 
				-the windows on the file; this simple trick avoids a special menu to
			
 
				-choose windows on a file.
			
 
				-If there is no window open on the file,
			
 
				-<TT>sam</TT>
			
 
				-changes the mouse cursor to prompt the user to create one.
			
 
				-</P>
			
 
				-<P>
			
 
				-The commands on the button 3 menu are straightforward (see Figure 3), and
			
 
				-are like the commands to manipulate windows in
			
 
				-<TT>mux</TT>,<sup>8</sup>
			
 
				-the Blit's window system.
			
 
				-<TT>New</TT>
			
 
				-makes a new file, and gives it one empty window, whose size is determined
			
 
				-by a rectangle swept by the mouse.
			
 
				-<TT>Zerox</TT>
			
 
				-prompts for a window to be selected, and
			
 
				-makes a clone of that window; this is how multiple windows are created on one file.
			
 
				-<TT>Reshape</TT>
			
 
				-changes the size of the indicated window, and
			
 
				-<TT>close</TT>
			
 
				-deletes it.  If that is the last window open on the file,
			
 
				-<TT>close</TT>
			
 
				-first does a
			
 
				-<TT>D</TT>
			
 
				-command on the file.
			
 
				-<TT>Write</TT>
			
 
				-is identical to a
			
 
				-<TT>w</TT>
			
 
				-command on the file; it is in the menu purely for convenience.
			
 
				-Finally,
			
 
				-<TT>~~sam~~</TT>
			
 
				-is a menu item that appears between the commands and the file names.
			
 
				-Selecting it makes the
			
 
				-<TT>sam</TT>
			
 
				-window the current window,
			
 
				-causing subsequent typing to be interpreted as commands.
			
 
				-<br><img src="fig2.ps.11764.gif"><br>
			
 
				-<br>
			
 
				-<I>Figure 3. The menu on button 3.
			
 
				-The black rectangle on the left is a scroll bar; the menu is limited to
			
 
				-the length shown to prevent its becoming unwieldy.
			
 
				-Above the
			
 
				-</I><TT>~~sam~~</TT><I>
			
 
				-line is a list of commands;
			
 
				-beneath it is a list of files, presented exactly as with the
			
 
				-</I><TT>f</TT><I>
			
 
				-command.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-When
			
 
				-<TT>sam</TT>
			
 
				-requests that a window be swept, in response to
			
 
				-<TT>new</TT>,
			
 
				-<TT>zerox</TT>
			
 
				-or
			
 
				-<TT>reshape</TT>,
			
 
				-it changes the mouse cursor from the usual arrow to a box with
			
 
				-a small arrow.
			
 
				-In this state, the mouse may be used to indicate an arbitrary rectangle by
			
 
				-pressing button 3 at one corner and releasing it at the opposite corner.
			
 
				-More conveniently,
			
 
				-button 3 may simply be clicked,
			
 
				-whereupon
			
 
				-<TT>sam</TT>
			
 
				-creates the maximal rectangle that contains the cursor
			
 
				-and abuts the
			
 
				-<TT>sam</TT>
			
 
				-window.
			
 
				-By placing the
			
 
				-<TT>sam</TT>
			
 
				-window in the middle of the screen, the user can define two regions (one above,
			
 
				-one below) in which stacked fully-overlapping
			
 
				-windows can be created with minimal fuss (see Figure 1).
			
 
				-This simple user interface trick makes window creation noticeably easier.
			
 
				-</P>
			
 
				-<P>
			
 
				-The cut-and-paste editor is essentially the same as that in Smalltalk-80.<sup>11</sup>
			
 
				-The text in dot is always highlighted on the screen.
			
 
				-When a character is typed it replaces dot, and sets dot to the null
			
 
				-string after the character.  Thus, ordinary typing inserts text.
			
 
				-Button 1 is used for selection:
			
 
				-pressing the button, moving the mouse, and lifting the button
			
 
				-selects (sets dot to) the text between the points where the
			
 
				-button was pressed and released.
			
 
				-Pressing and releasing at the same point selects a null string; this
			
 
				-is called clicking.  Clicking twice quickly, or
			
 
				-double clicking,
			
 
				-selects larger objects;
			
 
				-for example, double clicking in a word selects the word,
			
 
				-double clicking just inside an opening bracket selects the text
			
 
				-contained in the brackets (handling nested brackets correctly),
			
 
				-and similarly for
			
 
				-parentheses, quotes, and so on.
			
 
				-The double-clicking rules reflect a bias toward
			
 
				-programmers.
			
 
				-If
			
 
				-<TT>sam</TT>
			
 
				-were intended more for word processing, double-clicks would probably
			
 
				-select linguistic structures such as sentences.
			
 
				-</P>
			
 
				-<P>
			
 
				-If button 1 is pressed outside the current window, it makes the indicated
			
 
				-window current.
			
 
				-This is the easiest way to switch between windows and files.
			
 
				-</P>
			
 
				-<P>
			
 
				-Pressing button 2 brings up a menu of editing functions (see Figure 4).
			
 
				-These mostly apply to the selected text:
			
 
				-<TT>cut</TT>
			
 
				-deletes the selected text, and remembers it in a hidden buffer called the
			
 
				-snarf buffer,
			
 
				-<TT>paste</TT>
			
 
				-replaces the selected text by the contents of the snarf buffer,
			
 
				-<TT>snarf</TT>
			
 
				-just copies the selected text to the snarf buffer,
			
 
				-<TT>look</TT>
			
 
				-searches forward for the next literal occurrence of the selected text, and
			
 
				-<TT><mux></TT>
			
 
				-exchanges snarf buffers with the window system in which
			
 
				-<TT>sam</TT>
			
 
				-is running.
			
 
				-Finally, the last regular expression used appears as a menu entry
			
 
				-to search
			
 
				-forward for the next occurrence of a match for the expression.
			
 
				-<br><img src="fig4.ps.11765.gif"><br>
			
 
				-<br>
			
 
				-<I>Figure 4. The menu on button 2.
			
 
				-The bottom entry tracks the most recently used regular expression, which may
			
 
				-be literal text.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The relationship between the command language and the mouse language is
			
 
				-entirely due to the equality of dot and the selected text chosen
			
 
				-with button 1 on the mouse.
			
 
				-For example, to make a set of changes in a C subroutine, dot can be
			
 
				-set by double clicking on the left brace that begins the subroutine,
			
 
				-which sets dot for the command language.
			
 
				-An address-free command then typed in the
			
 
				-<TT>sam</TT>
			
 
				-window will apply only to the text between the opening and closing
			
 
				-braces of the function.
			
 
				-The idea is to select what you want, and then say what you want
			
 
				-to do with it, whether invoked by a menu selection or by a typed command.
			
 
				-And of course, the value of dot is highlighted on
			
 
				-the display after the command completes.
			
 
				-This relationship between mouse interface and command language
			
 
				-is clumsy to explain, but comfortable, even natural, in practice.
			
 
				-</P>
			
 
				-<H4>The Implementation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The next few sections describe how
			
 
				-<TT>sam</TT>
			
 
				-is put together, first the host part,
			
 
				-then the inter-component communication,
			
 
				-then the terminal part.
			
 
				-After explaining how the command language is implemented,
			
 
				-the discussion follows (roughly) the path of a character
			
 
				-from the temporary file on disc to the screen.
			
 
				-The presentation centers on the data structures,
			
 
				-because that is how the program was designed and because
			
 
				-the algorithms are easy to provide, given the right data
			
 
				-structures.
			
 
				-<H4>Parsing and execution
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The command language is interpreted by parsing each command with a
			
 
				-table-driven recursive
			
 
				-descent parser, and when a complete command is assembled, invoking a top-down
			
 
				-executor.
			
 
				-Most editors instead employ a simple character-at-a-time
			
 
				-lexical scanner.
			
 
				-Use of a parser makes it
			
 
				-easy and unambiguous to detect when a command is complete,
			
 
				-which has two advantages.
			
 
				-First, escape conventions such as backslashes to quote
			
 
				-multiple-line commands are unnecessary;  if the command isn't finished,
			
 
				-the parser keeps reading.  For example, a multiple-line append driven by an
			
 
				-<TT>x</TT>
			
 
				-command is straightforward:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x/.*\n/ g/Peter/ a
			
 
				-one line about Peter
			
 
				-another line about Peter
			
 
				-.
			
 
				-</PRE></TT></DL>
			
 
				-Other UNIX editors would require a backslash after all but the last line.
			
 
				-<P>
			
 
				-The other advantage is specific to the two-process structure of
			
 
				-<TT>sam</TT>.
			
 
				-The host process must decide when a command is completed so the
			
 
				-command interpreter can be called.  This problem is easily resolved
			
 
				-by having the lexical analyzer read the single stream of events from the
			
 
				-terminal, directly executing all typing and mouse commands,
			
 
				-but passing to the parser characters typed to the
			
 
				-<TT>sam</TT>
			
 
				-command window.
			
 
				-This scheme is slightly complicated by the availability of cut-and-paste
			
 
				-editing in the
			
 
				-<TT>sam</TT>
			
 
				-window, but that difficulty is resolved by applying the rules
			
 
				-used in
			
 
				-<TT>mux</TT>:
			
 
				-when a newline is typed to the
			
 
				-<TT>sam</TT>
			
 
				-window, all text between the newline and the previously typed newline
			
 
				-is made available to the parser.
			
 
				-This permits arbitrary editing to be done to a command before
			
 
				-typing newline and thereby requesting execution.
			
 
				-</P>
			
 
				-<P>
			
 
				-The parser is driven by a table because the syntax of addresses
			
 
				-and commands is regular enough
			
 
				-to be encoded compactly.  There are few special cases, such as the
			
 
				-replacement text in a substitution, so the syntax of almost all commands
			
 
				-can be encoded with a few flags.
			
 
				-These include whether the command allows an address (for example,
			
 
				-<TT>e</TT>
			
 
				-does not), whether it takes a regular expression (as in
			
 
				-<TT>x</TT>
			
 
				-and
			
 
				-<TT>s</TT>),
			
 
				-whether it takes replacement text (as in
			
 
				-<TT>c</TT>
			
 
				-or
			
 
				-<TT>i</TT>),
			
 
				-which may be multi-line, and so on.
			
 
				-The internal syntax of regular expressions is handled by a separate
			
 
				-parser; a regular expression is a leaf of the command parse tree.
			
 
				-Regular expressions are discussed fully in the next section.
			
 
				-</P>
			
 
				-<P>
			
 
				-The parser table also has information about defaults, so the interpreter
			
 
				-is always called with a complete tree.  For example, the parser fills in
			
 
				-the implicit
			
 
				-<TT>0</TT>
			
 
				-and
			
 
				-<TT>$</TT>
			
 
				-in the abbreviated address
			
 
				-<TT>,</TT>
			
 
				-(comma),
			
 
				-inserts a
			
 
				-<TT>+</TT>
			
 
				-to the left of an unadorned regular expression in an address,
			
 
				-and provides the usual default address
			
 
				-<TT>.</TT>
			
 
				-(dot) for commands that expect an address but are not given one.
			
 
				-</P>
			
 
				-<P>
			
 
				-Once a complete command is parsed, the evaluation is easy.
			
 
				-The address is evaluated left-to-right starting from the value of dot,
			
 
				-with a mostly ordinary expression evaluator.
			
 
				-Addresses, like many of the data structures in
			
 
				-<TT>sam</TT>,
			
 
				-are held in a C structure and passed around by value:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef long Posn;    /* Position in a file */
			
 
				-typedef struct Range{
			
 
				-        Posn    p1, p2;
			
 
				-}Range;
			
 
				-typedef struct Address{
			
 
				-        Range   r;
			
 
				-        File    *f;
			
 
				-}Address;
			
 
				-</PRE></TT></DL>
			
 
				-An address is encoded as a substring (character positions
			
 
				-<TT>p1</TT>
			
 
				-to
			
 
				-<TT>p2</TT>)
			
 
				-in a file
			
 
				-<TT>f</TT>.
			
 
				-(The data type
			
 
				-<TT>File</TT>
			
 
				-is described in detail below.)
			
 
				-</P>
			
 
				-<P>
			
 
				-The address interpreter is an
			
 
				-<TT>Address</TT>-valued
			
 
				-function that traverses the parse tree describing an address (the
			
 
				-parse tree for the address has type
			
 
				-<TT>Addrtree</TT>):
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Address
			
 
				-address(ap, a, sign)
			
 
				-	Addrtree *ap;
			
 
				-	Address a;
			
 
				-	int sign;
			
 
				-{
			
 
				-	Address a2;
			
 
				-	do
			
 
				-		switch(ap-&#62;type){
			
 
				-		case '.':
			
 
				-			a=a.f-&#62;dot;
			
 
				-			break;
			
 
				-		case '$':
			
 
				-			a.r.p1=a.r.p2=a.f-&#62;nbytes;
			
 
				-			break;
			
 
				-		case '"':	
			
 
				-			a=matchfile(a, ap-&#62;aregexp)-&#62;dot; 
			
 
				-			break;
			
 
				-		case ',':
			
 
				-			a2=address(ap-&#62;right, a, 0);
			
 
				-			a=address(ap-&#62;left, a, 0);
			
 
				-			if(a.f!=a2.f || a2.r.p2&#60;a.r.p1)
			
 
				-				error(Eorder);
			
 
				-			a.r.p2=a2.r.p2;
			
 
				-			return a;
			
 
				-		/* and so on */
			
 
				-		}
			
 
				-	while((ap=ap-&#62;right)!=0);
			
 
				-	return a;
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-Throughout, errors are handled by a non-local
			
 
				-<TT>goto</TT>
			
 
				-(a
			
 
				-<TT>setjmp/longjmp</TT>
			
 
				-in C terminology)
			
 
				-hidden in a routine called
			
 
				-<TT>error</TT>
			
 
				-that immediately aborts the execution, retracts any
			
 
				-partially made changes (see the section below on `undoing'), and
			
 
				-returns to the top level of the parser.
			
 
				-The argument to
			
 
				-<TT>error</TT>
			
 
				-is an enumeration type that
			
 
				-is translated to a terse but possibly helpful
			
 
				-message such as `?addresses out of order.'
			
 
				-Very common messages are kept short; for example the message for
			
 
				-a failed regular expression search is `?search.'
			
 
				-</P>
			
 
				-<P>
			
 
				-Character addresses such as
			
 
				-<TT>#3</TT>
			
 
				-are trivial to implement, as the
			
 
				-<TT>File</TT>
			
 
				-data structure is accessible by character number.
			
 
				-However,
			
 
				-<TT>sam</TT>
			
 
				-keeps no information about the position of newlines &#173; it is too
			
 
				-expensive to track dynamically &#173; so line addresses are computed by reading
			
 
				-the file, counting newlines.  Except in very large files, this has proven
			
 
				-acceptable: file access is fast enough to make the technique practical,
			
 
				-and lines are not central to the structure of the command language.
			
 
				-</P>
			
 
				-<P>
			
 
				-The command interpreter, called
			
 
				-<TT>cmdexec</TT>,
			
 
				-is also straightforward.  The parse table includes a
			
 
				-function to call to interpret a particular command.  That function
			
 
				-receives as arguments
			
 
				-the calculated address
			
 
				-for the command
			
 
				-and the command tree (of type
			
 
				-<TT>Cmdtree</TT>),
			
 
				-which may contain information such as the subtree for compound commands.
			
 
				-Here, for example, is the function for the
			
 
				-<TT>g</TT>
			
 
				-and
			
 
				-<TT>v</TT>
			
 
				-commands:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-int
			
 
				-g_cmd(a, cp)
			
 
				-	Address a;
			
 
				-	Cmdtree *cp;
			
 
				-{
			
 
				-	compile(cp-&#62;regexp);
			
 
				-	if(execute(a.f, a.r.p1, a.r.p2)!=(cp-&#62;cmdchar=='v')){
			
 
				-		a.f-&#62;dot=a;
			
 
				-		return cmdexec(a, cp-&#62;subcmd);
			
 
				-	}
			
 
				-	return TRUE;	/* cause execution to continue */
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-(<TT>Compile</TT>
			
 
				-and
			
 
				-<TT>execute</TT>
			
 
				-are part of the regular expression code, described in the next section.)
			
 
				-Because the parser and the
			
 
				-<TT>File</TT>
			
 
				-data structure do most of the work, most commands
			
 
				-are similarly brief.
			
 
				-</P>
			
 
				-<H4>Regular expressions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The regular expression code in
			
 
				-<TT>sam</TT>
			
 
				-is an interpreted, rather than compiled on-the-fly, implementation of Thompson's
			
 
				-non-deterministic finite automaton algorithm.<sup>12</sup>
			
 
				-The syntax and semantics of the expressions are as in the UNIX program
			
 
				-<TT>egrep</TT>,
			
 
				-including alternation, closures, character classes, and so on.
			
 
				-The only changes in the notation are two additions:
			
 
				-<TT>0fP
			
 
				-is translated to, and matches, a newline character, and
			
 
				-</TT><TT>@</TT><TT>
			
 
				-matches any character.  In
			
 
				-</TT><TT>egrep</TT><TT>,
			
 
				-the character
			
 
				-</TT><TT>.</TT><TT>
			
 
				-matches any character except newline, and in
			
 
				-</TT><TT>sam</TT><TT>
			
 
				-the same rule seemed safest, to prevent idioms like
			
 
				-</TT><TT>.*</TT><TT>
			
 
				-from spanning newlines.
			
 
				-</TT><TT>Egrep</TT><TT>
			
 
				-expressions are arguably too complicated for an interactive editor &#173;
			
 
				-certainly it would make sense if all the special characters were two-character
			
 
				-sequences, so that most of the punctuation characters wouldn't have
			
 
				-peculiar meanings &#173; but for an interesting command language, full
			
 
				-regular expressions are necessary, and
			
 
				-</TT><TT>egrep</TT><TT>
			
 
				-defines the full regular expression syntax for UNIX programs.
			
 
				-Also, it seemed superfluous to define a new syntax, since various UNIX programs
			
 
				-(</TT><TT>ed</TT><TT>,
			
 
				-</TT><TT>egrep</TT><TT>
			
 
				-and
			
 
				-</TT><TT>vi</TT><TT>)
			
 
				-define too many already.
			
 
				-</TT><P>
			
 
				-The expressions are compiled by a routine,
			
 
				-<TT>compile</TT>,
			
 
				-that generates the description of the non-deterministic finite state machine.
			
 
				-A second routine,
			
 
				-<TT>execute</TT>,
			
 
				-interprets the machine to generate the leftmost-longest match of the
			
 
				-expression in a substring of the file.
			
 
				-The algorithm is described elsewhere.<sup>12,13</sup>
			
 
				-<TT>Execute</TT>
			
 
				-reports
			
 
				-whether a match was found, and sets a global variable,
			
 
				-of type
			
 
				-<TT>Range</TT>,
			
 
				-to the substring matched.
			
 
				-</P>
			
 
				-<P>
			
 
				-A trick is required to evaluate the expression in reverse, such as when
			
 
				-searching backwards for an expression.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				--/P.*r/
			
 
				-</PRE></TT></DL>
			
 
				-looks backwards through the file for a match of the expression.
			
 
				-The expression, however, is defined for a forward search.
			
 
				-The solution is to construct a machine identical to the machine
			
 
				-for a forward search except for a reversal of all the concatenation
			
 
				-operators (the other operators are symmetric under direction reversal),
			
 
				-to exchange the meaning of the operators
			
 
				-<TT>^</TT>
			
 
				-and
			
 
				-<TT>$</TT>,
			
 
				-and then to read the file backwards, looking for the
			
 
				-usual earliest longest match.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Execute</TT>
			
 
				-generates only one match each time it is called.
			
 
				-To interpret looping constructs such as the
			
 
				-<TT>x</TT>
			
 
				-command,
			
 
				-<TT>sam</TT>
			
 
				-must therefore synchronize between
			
 
				-calls of
			
 
				-<TT>execute</TT>
			
 
				-to avoid
			
 
				-problems with null matches.
			
 
				-For example, even given the leftmost-longest rule,
			
 
				-the expression
			
 
				-<TT>a*</TT>
			
 
				-matches three times in the string
			
 
				-<TT>ab</TT>
			
 
				-(the character
			
 
				-<TT>a</TT>,
			
 
				-the null string between the
			
 
				-<TT>a</TT>
			
 
				-and
			
 
				-<TT>b</TT>,
			
 
				-and the final null string).
			
 
				-After returning a match for the
			
 
				-<TT>a</TT>,
			
 
				-<TT>sam</TT>
			
 
				-must not match the null string before the
			
 
				-<TT>b</TT>.
			
 
				-The algorithm starts
			
 
				-<TT>execute</TT>
			
 
				-at the end of its previous match, and
			
 
				-if the match it returns
			
 
				-is null and abuts the previous match, rejects the match and advances
			
 
				-the initial position one character.
			
 
				-</P>
			
 
				-<H4>Memory allocation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The C language has no memory allocation primitives, although a standard
			
 
				-library routine,
			
 
				-<TT>malloc</TT>,
			
 
				-provides adequate service for simple programs.
			
 
				-For specific uses, however,
			
 
				-it can be better to write a custom allocator.
			
 
				-The allocator (or rather, pair of allocators) described here
			
 
				-work in both the terminal and host parts of
			
 
				-<TT>sam</TT>.
			
 
				-They are designed for efficient manipulation of strings,
			
 
				-which are allocated and freed frequently and vary in length from essentially
			
 
				-zero to 32 Kbytes (very large strings are written to disc).
			
 
				-More important, strings may be large and change size often,
			
 
				-so to minimize memory usage it is helpful to reclaim and to coalesce the
			
 
				-unused portions of strings when they are truncated.
			
 
				-<P>
			
 
				-Objects to be allocated in
			
 
				-<TT>sam</TT>
			
 
				-are of two flavors:
			
 
				-the first is C
			
 
				-<TT>structs</TT>,
			
 
				-which are small and often addressed by pointer variables;
			
 
				-the second is variable-sized arrays of characters
			
 
				-or integers whose
			
 
				-base pointer is always used to access them.
			
 
				-The memory allocator in
			
 
				-<TT>sam</TT>
			
 
				-is therefore in two parts:
			
 
				-first, a traditional first-fit allocator that provides fixed storage for
			
 
				-<TT>structs</TT>;
			
 
				-and second, a garbage-compacting allocator that reduces storage
			
 
				-overhead for variable-sized objects, at the cost of some bookkeeping.
			
 
				-The two types of objects are allocated from adjoining arenas, with
			
 
				-the garbage-compacting allocator controlling the arena with higher addresses.
			
 
				-Separating into two arenas simplifies compaction and prevents fragmentation due
			
 
				-to immovable objects.
			
 
				-The access rules for garbage-compactable objects
			
 
				-(discussed in the next paragraph) allow them to be relocated, so when
			
 
				-the first-fit arena needs space, it moves the garbage-compacted arena
			
 
				-to higher addresses to make room.  Storage is therefore created only
			
 
				-at successively higher addresses, either when more garbage-compacted
			
 
				-space is needed or when the first-fit arena pushes up the other arena.
			
 
				-</P>
			
 
				-<P>
			
 
				-Objects that may be compacted declare to the
			
 
				-allocator a cell that is guaranteed to be the sole repository of the
			
 
				-address of the object whenever a compaction can occur.
			
 
				-The compactor can then update the address when the object is moved.
			
 
				-For example, the implementation of type
			
 
				-<TT>List</TT>
			
 
				-(really a variable-length array)
			
 
				-is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef struct List{
			
 
				-        int     nused;
			
 
				-        long    *ptr;
			
 
				-}List;
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>ptr</TT>
			
 
				-cell must always be used directly, and never copied.  When a
			
 
				-<TT>List</TT>
			
 
				-is to be created the
			
 
				-<TT>List</TT>
			
 
				-structure is allocated in the ordinary first-fit arena
			
 
				-and its
			
 
				-<TT>ptr</TT>
			
 
				-is allocated in the garbage-compacted arena.
			
 
				-A similar data type for strings, called
			
 
				-<TT>String</TT>,
			
 
				-stores variable-length character arrays of up to 32767 elements.
			
 
				-</P>
			
 
				-<P>
			
 
				-A related matter of programming style:
			
 
				-<TT>sam</TT>
			
 
				-frequently passes structures by value, which
			
 
				-simplifies the code.
			
 
				-Traditionally, C programs have
			
 
				-passed structures by reference, but implicit allocation on
			
 
				-the stack is easier to use.
			
 
				-Structure passing is a relatively new feature of C
			
 
				-(it is not in the 
			
 
				-standard reference manual for C<sup>14</sup>), and is poorly supported in most
			
 
				-commercial C compilers.
			
 
				-It's convenient and expressive, though,
			
 
				-and simplifies memory management by
			
 
				-avoiding the allocator altogether
			
 
				-and eliminating pointer aliases.
			
 
				-</P>
			
 
				-<H4>Data structures for manipulating files
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Experience with
			
 
				-<TT>jim</TT>
			
 
				-showed that the requirements
			
 
				-of the file data structure were few, but strict.
			
 
				-First, files need to be read and written quickly;
			
 
				-adding a fresh file must be painless.
			
 
				-Second, the implementation must place no arbitrary upper limit on
			
 
				-the number or sizes of files.  (It should be practical to edit many files,
			
 
				-and files up to megabytes in length should be handled gracefully.)
			
 
				-This implies that files be stored on disc, not in main memory.
			
 
				-(Aficionados of virtual memory may argue otherwise, but the
			
 
				-implementation of virtual
			
 
				-memory in our system is not something to depend on
			
 
				-for good performance.)
			
 
				-Third, changes to files need be made by only two primitives:
			
 
				-deletion and insertion.
			
 
				-These are inverses of each other,
			
 
				-which simplifies the implementation of the undo operation.
			
 
				-Finally,
			
 
				-it must be easy and efficient to access the file, either
			
 
				-forwards or backwards, a byte at a time.
			
 
				-<P>
			
 
				-The
			
 
				-<TT>File</TT>
			
 
				-data type is constructed from three simpler data structures that hold arrays
			
 
				-of characters.
			
 
				-Each of these types has an insertion and deletion operator, and the
			
 
				-insertion and deletion operators of the
			
 
				-<TT>File</TT>
			
 
				-type itself are constructed from them.
			
 
				-</P>
			
 
				-<P>
			
 
				-The simplest type is the
			
 
				-<TT>String</TT>,
			
 
				-which is used to hold strings in main memory.
			
 
				-The code that manages
			
 
				-<TT>Strings</TT>
			
 
				-guarantees that they will never be longer
			
 
				-than some moderate size, and in practice they are rarely larger than 8 Kbytes.
			
 
				-<TT>Strings</TT>
			
 
				-have two purposes: they hold short strings like file names with little overhead,
			
 
				-and because they are deliberately small, they are efficient to modify.
			
 
				-They are therefore used as the data structure for in-memory caches.
			
 
				-</P>
			
 
				-<P>
			
 
				-The disc copy of the file is managed by a data structure called a
			
 
				-<TT>Disc</TT>,
			
 
				-which corresponds to a temporary file.  A
			
 
				-<TT>Disc</TT>
			
 
				-has no storage in main memory other than bookkeeping information;
			
 
				-the actual data being held is all on the disc.
			
 
				-To reduce the number of open files needed,
			
 
				-<TT>sam</TT>
			
 
				-opens a dozen temporary UNIX files and multiplexes the
			
 
				-<TT>Discs</TT>
			
 
				-upon them.
			
 
				-This permits many files to
			
 
				-be edited; the entire
			
 
				-<TT>sam</TT>
			
 
				-source (48 files) may be edited comfortably with a single
			
 
				-instance of
			
 
				-<TT>sam</TT>.
			
 
				-Allocating one temporary file per
			
 
				-<TT>Disc</TT>
			
 
				-would strain the operating system's limit on the number of open files.
			
 
				-Also, spreading the traffic among temporary files keeps the files shorter,
			
 
				-and shorter files are more efficiently implemented by the UNIX
			
 
				-I/O subsystem.
			
 
				-</P>
			
 
				-<P>
			
 
				-A
			
 
				-<TT>Disc</TT>
			
 
				-is an array of fixed-length blocks, each of which contains
			
 
				-between 1 and 4096 characters of active data.
			
 
				-(The block size of our UNIX file system is 4096 bytes.)
			
 
				-The block addresses within the temporary file and the length of each
			
 
				-block are stored in a
			
 
				-<TT>List</TT>.
			
 
				-When changes are made the live part of blocks may change size.
			
 
				-Blocks are created and coalesced when necessary to try to keep the sizes
			
 
				-between 2048 and 4096 bytes.
			
 
				-An actively changing part of the
			
 
				-<TT>Disc</TT>
			
 
				-therefore typically has about a kilobyte of slop that can be
			
 
				-inserted or deleted
			
 
				-without changing more than one block or affecting the block order.
			
 
				-When an insertion would overflow a block, the block is split, a new one
			
 
				-is allocated to receive the overflow, and the memory-resident list of blocks
			
 
				-is rearranged to reflect the insertion of the new block.
			
 
				-</P>
			
 
				-<P>
			
 
				-Obviously, going to the disc for every modification to the file is
			
 
				-prohibitively expensive.
			
 
				-The data type
			
 
				-<TT>Buffer</TT>
			
 
				-consists of a
			
 
				-<TT>Disc</TT>
			
 
				-to hold the data and a
			
 
				-<TT>String</TT>
			
 
				-that acts as a cache.
			
 
				-This is the first of a series of caches throughout the data structures in
			
 
				-<TT>sam.</TT>
			
 
				-The caches not only improve performance, they provide a way to organize
			
 
				-the flow of data, particularly in the communication between the host
			
 
				-and terminal.
			
 
				-This idea is developed below, in the section on communications.
			
 
				-</P>
			
 
				-<P>
			
 
				-To reduce disc traffic, changes to a
			
 
				-<TT>Buffer</TT>
			
 
				-are mediated by a variable-length string, in memory, that acts as a cache.
			
 
				-When an insertion or deletion is made to a
			
 
				-<TT>Buffer</TT>,
			
 
				-if the change can be accommodated by the cache, it is done there.
			
 
				-If the cache becomes bigger than a block because of an insertion,
			
 
				-some of it is written to the
			
 
				-<TT>Disc</TT>
			
 
				-and deleted from the cache.
			
 
				-If the change does not intersect the cache, the cache is flushed.
			
 
				-The cache is only loaded at the new position if the change is smaller than a block;
			
 
				-otherwise, it is sent directly to the
			
 
				-<TT>Disc</TT>.
			
 
				-This is because
			
 
				-large changes are typically sequential,
			
 
				-whereupon the next change is unlikely to overlap the current one.
			
 
				-</P>
			
 
				-<P>
			
 
				-A
			
 
				-<TT>File</TT>
			
 
				-comprises a
			
 
				-<TT>String</TT>
			
 
				-to hold the file name and some ancillary data such as dot and the modified bit.
			
 
				-The most important components, though, are a pair of
			
 
				-<TT>Buffers</TT>,
			
 
				-one called the transcript and the other the contents.
			
 
				-Their use is described in the next section.
			
 
				-</P>
			
 
				-<P>
			
 
				-The overall structure is shown in Figure 5.
			
 
				-Although it may seem that the data is touched many times on its
			
 
				-way from the
			
 
				-<TT>Disc</TT>,
			
 
				-it is read (by one UNIX system call) directly into the cache of the
			
 
				-associated
			
 
				-<TT>Buffer</TT>;
			
 
				-no extra copy is done.
			
 
				-Similarly, when flushing the cache, the text is written
			
 
				-directly from the cache to disc.
			
 
				-Most operations act directly on the text in the cache.
			
 
				-A principle applied throughout
			
 
				-<TT>sam</TT>
			
 
				-is that the fewer times the data is copied, the faster the program will run
			
 
				-(see also the paper by Waite<sup>15</sup>).
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.11766.gif"><br>
			
 
				-<br>
			
 
				-</PRE></TT></DL>
			
 
				-<I>Figure 5. File data structures.
			
 
				-The temporary files are stored in the standard repository for such files
			
 
				-on the host system.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The contents of a
			
 
				-<TT>File</TT>
			
 
				-are accessed by a routine that
			
 
				-copies to a buffer a substring of a file starting at a specified offset.
			
 
				-To read a byte at a time, a
			
 
				-per-<TT>File</TT>
			
 
				-array is loaded starting from a specified initial position,
			
 
				-and bytes may then be read from the array.
			
 
				-The implementation is done by a macro similar to the C standard I/O
			
 
				-<TT>getc</TT>
			
 
				-macro.<sup>14</sup>
			
 
				-Because the reading may be done at any address, a minor change to the
			
 
				-macro allows the file to be read backwards.
			
 
				-This array is read-only; there is no
			
 
				-<TT>putc</TT>.
			
 
				-</P>
			
 
				-<H4>Doing and undoing
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-has an unusual method for managing changes to files.
			
 
				-The command language makes it easy to specify multiple variable-length changes
			
 
				-to a file millions of bytes long, and such changes
			
 
				-must be made efficiently if the editor is to be practical.
			
 
				-The usual techniques for inserting and deleting strings
			
 
				-are inadequate under these conditions.
			
 
				-The
			
 
				-<TT>Buffer</TT>
			
 
				-and
			
 
				-<TT>Disc</TT>
			
 
				-data structures are designed for efficient random access to long strings,
			
 
				-but care must be taken to avoid super-linear behavior when making
			
 
				-many changes simultaneously.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-uses a two-pass algorithm for making changes, and treats each file as a database
			
 
				-against which transactions are registered.
			
 
				-Changes are not made directly to the contents.
			
 
				-Instead, when a command is started, a `mark' containing
			
 
				-a sequence number is placed in the transcript
			
 
				-<TT>Buffer</TT>,
			
 
				-and each change made to the file, either an insertion or deletion
			
 
				-or a change to the file name,
			
 
				-is appended to the end of the transcript.
			
 
				-When the command is complete, the transcript is rewound to the
			
 
				-mark and applied to the contents.
			
 
				-</P>
			
 
				-<P>
			
 
				-One reason for separating evaluation from
			
 
				-application in this way is to simplify tracking the addresses of changes
			
 
				-made in the middle of a long sequence.
			
 
				-The two-pass algorithm also allows all changes to apply to the
			
 
				-<I>original</I>
			
 
				-data: no change can affect another change made in the same command.
			
 
				-This is particularly important when evaluating an
			
 
				-<TT>x</TT>
			
 
				-command because it prevents regular expression matches
			
 
				-from stumbling over changes made earlier in the execution.
			
 
				-Also, the two-pass
			
 
				-algorithm is cleaner than the way other UNIX editors allow changes to
			
 
				-affect each other;
			
 
				-for example,
			
 
				-<TT>ed</TT>'s
			
 
				-idioms to do things like delete every other line
			
 
				-depend critically on the implementation.
			
 
				-Instead,
			
 
				-<TT>sam</TT>'s
			
 
				-simple model, in which all changes in a command occur effectively
			
 
				-simultaneously, is easy to explain and to understand.
			
 
				-</P>
			
 
				-<P>
			
 
				-The records in the transcript are of the form ``delete substring from
			
 
				-locations
			
 
				-123 to 456'' and ``insert 11 characters `hello there' at location 789.''
			
 
				-(It is an error if the changes are not at monotonically greater
			
 
				-positions through the file.)
			
 
				-While the update is occurring, these numbers must be
			
 
				-offset by earlier changes, but that is straightforward and
			
 
				-local to the update routine;
			
 
				-moreover, all the numbers have been computed
			
 
				-before the first is examined.
			
 
				-</P>
			
 
				-<P>
			
 
				-Treating the file as a transaction system has another advantage:
			
 
				-undo is trivial.
			
 
				-All it takes is to invert the transcript after it has been
			
 
				-implemented, converting insertions
			
 
				-into deletions and vice versa, and saving them in a holding
			
 
				-<TT>Buffer</TT>.
			
 
				-The `do' transcript can then be deleted from
			
 
				-the transcript
			
 
				-<TT>Buffer</TT>
			
 
				-and replaced by the `undo' transcript.
			
 
				-If an undo is requested, the transcript is rewound and the undo transcript
			
 
				-executed.
			
 
				-Because the transcript
			
 
				-<TT>Buffer</TT>
			
 
				-is not truncated after each command, it accumulates
			
 
				-successive changes.
			
 
				-A sequence of undo commands
			
 
				-can therefore back up the file arbitrarily,
			
 
				-which is more helpful than the more commonly implemented self-inverse form of undo.
			
 
				-(<TT>Sam</TT>
			
 
				-provides no way to undo an undo, but if it were desired,
			
 
				-it would be easy to provide by re-interpreting the `do' transcript.)
			
 
				-Each mark in the transcript contains a sequence number and the offset into
			
 
				-the transcript of the previous mark, to aid in unwinding the transcript.
			
 
				-Marks also contain the value of dot and the modified bit so these can be
			
 
				-restored easily.
			
 
				-Undoing multiple files is easy; it merely demands undoing all files whose
			
 
				-latest change has the same sequence number as the current file.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another benefit of having a transcript is that errors encountered in the middle
			
 
				-of a complicated command need not leave the files in an intermediate state.
			
 
				-By rewinding the transcript to the mark beginning the command,
			
 
				-the partial command can be trivially undone.
			
 
				-</P>
			
 
				-<P>
			
 
				-When the update algorithm was first implemented, it was unacceptably slow,
			
 
				-so a cache was added to coalesce nearby changes,
			
 
				-replacing multiple small changes by a single larger one.
			
 
				-This reduced the number
			
 
				-of insertions into the transaction
			
 
				-<TT>Buffer</TT>,
			
 
				-and made a dramatic improvement in performance,
			
 
				-but made it impossible
			
 
				-to handle changes in non-monotonic order in the file; the caching method
			
 
				-only works if changes don't overlap.
			
 
				-Before the cache was added, the transaction could in principle be sorted
			
 
				-if the changes were out of order, although
			
 
				-this was never done.
			
 
				-The current status is therefore acceptable performance with a minor
			
 
				-restriction on global changes, which is sometimes, but rarely, an annoyance.
			
 
				-</P>
			
 
				-<P>
			
 
				-The update algorithm obviously paws the data more than simpler
			
 
				-algorithms, but it is not prohibitively expensive;
			
 
				-the caches help.
			
 
				-(The principle of avoiding copying the data is still honored here,
			
 
				-although not as piously:
			
 
				-the data is moved from contents' cache to
			
 
				-the transcript's all at once and through only one internal buffer.)
			
 
				-Performance figures confirm the efficiency.
			
 
				-To read from a dead start a hundred kilobyte file on a VAX-11/750
			
 
				-takes 1.4 seconds of user time, 2.5 seconds of system time,
			
 
				-and 5 seconds of real time.
			
 
				-Reading the same file in
			
 
				-<TT>ed</TT>
			
 
				-takes 6.0 seconds of user time, 1.7 seconds of system time,
			
 
				-and 8 seconds of real time.
			
 
				-<TT>Sam</TT>
			
 
				-uses about half the CPU time.
			
 
				-A more interesting example is the one stated above:
			
 
				-inserting a character between every pair of characters in the file.
			
 
				-The
			
 
				-<TT>sam</TT>
			
 
				-command is
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-,y/@/ a/x/
			
 
				-</PRE></TT></DL>
			
 
				-and takes 3 CPU seconds per kilobyte of input file, of which
			
 
				-about a third is spent in the regular expression code.
			
 
				-This translates to about 500 changes per second.
			
 
				-<TT>Ed</TT>
			
 
				-takes 1.5 seconds per kilobyte to make a similar change (ignoring newlines),
			
 
				-but cannot undo it.
			
 
				-The same example in
			
 
				-<TT>ex</TT>,<sup>9</sup>
			
 
				-a variant of
			
 
				-<TT>ed</TT>
			
 
				-done at the University of California at Berkeley,
			
 
				-which allows one level of undoing, again takes 3 seconds.
			
 
				-In summary,
			
 
				-<TT>sam</TT>'s
			
 
				-performance is comparable to that of other UNIX editors, although it solves
			
 
				-a harder problem.
			
 
				-</P>
			
 
				-<H4>Communications
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The discussion so far has described the implementation of the host part of
			
 
				-<TT>sam</TT>;
			
 
				-the next few sections explain how a machine with mouse and bitmap display
			
 
				-can be engaged to improve interaction.
			
 
				-<TT>Sam</TT>
			
 
				-is not the first editor to be written as two processes,<sup>16</sup>
			
 
				-but its implementation
			
 
				-has some unusual aspects.
			
 
				-<P>
			
 
				-There are several ways
			
 
				-<TT>sam</TT>'s
			
 
				-host and terminal parts may be connected.
			
 
				-The first and simplest is to forgo the terminal part and use the host
			
 
				-part's command language to edit text on an ordinary terminal.
			
 
				-This mode is invoked by starting
			
 
				-<TT>sam</TT>
			
 
				-with the
			
 
				-<TT>-d</TT>
			
 
				-option.
			
 
				-With no options,
			
 
				-<TT>sam</TT>
			
 
				-runs separate host and terminal programs,
			
 
				-communicating with a message protocol over the physical
			
 
				-connection that joins them.
			
 
				-Typically, the connection is an RS-232 link between a Blit
			
 
				-(the prototypical display for
			
 
				-<TT>sam</TT>)
			
 
				-and a host running
			
 
				-the Ninth Edition of the UNIX operating system.<sup>8</sup>
			
 
				-(This is the version of the system used in the Computing Sciences Research
			
 
				-Center at AT&amp;T Bell Laboratories [now Lucent Technologies, Bell Labs], where I work.  Its relevant
			
 
				-aspects are discussed in the Blit paper.<sup>1</sup>)
			
 
				-The implementation of
			
 
				-<TT>sam</TT>
			
 
				-for the SUN computer runs both processes on the same machine and
			
 
				-connects them by a pipe.
			
 
				-</P>
			
 
				-<P>
			
 
				-The low bandwidth of an RS-232 link
			
 
				-necessitated the split between
			
 
				-the two programs.
			
 
				-The division is a mixed blessing:
			
 
				-a program in two parts is much harder to write and to debug
			
 
				-than a self-contained one,
			
 
				-but the split makes several unusual configurations possible.
			
 
				-The terminal may be physically separated from the host, allowing the conveniences
			
 
				-of a mouse and bitmap display to be taken home while leaving the files at work.
			
 
				-It is also possible to run the host part on a remote machine:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-sam -r host
			
 
				-</PRE></TT></DL>
			
 
				-connects to the terminal in the usual way, and then makes a call
			
 
				-across the network to establish the host part of
			
 
				-<TT>sam</TT>
			
 
				-on the named machine.
			
 
				-Finally, it cross-connects the I/O to join the two parts.
			
 
				-This allows
			
 
				-<TT>sam</TT>
			
 
				-to be run on machines that do not support bitmap displays;
			
 
				-for example,
			
 
				-<TT>sam</TT>
			
 
				-is the editor of choice on our Cray X-MP/24.
			
 
				-<TT>Sam</TT>
			
 
				-<TT>-r</TT>
			
 
				-involves
			
 
				-<I>three</I>
			
 
				-machines: the remote host, the terminal, and the local host.
			
 
				-The local host's job is simple but vital: it passes the data
			
 
				-between the remote host and terminal.
			
 
				-</P>
			
 
				-<P>
			
 
				-The host and terminal exchange messages asynchronously
			
 
				-(rather than, say, as remote procedure calls) but there is no
			
 
				-error detection or correction
			
 
				-because, whatever the configuration, the connection is reliable.
			
 
				-Because the terminal handles mundane interaction tasks such as
			
 
				-popping up menus and interpreting the responses, the messages are about
			
 
				-data, not actions.
			
 
				-For example, the host knows nothing about what is displayed on the screen,
			
 
				-and when the user types a character, the message sent to the host says
			
 
				-``insert a one-byte string at location 123 in file 7,'' not ``a character
			
 
				-was typed at the current position in the current file.''
			
 
				-In other words, the messages look very much like the transaction records
			
 
				-in the transcripts.
			
 
				-</P>
			
 
				-<P>
			
 
				-Either the host or terminal part of
			
 
				-<TT>sam</TT>
			
 
				-may initiate a change to a file.
			
 
				-The command language operates on the host, while typing and some
			
 
				-mouse operations are executed directly in the terminal to optimize response.
			
 
				-Changes initiated by the host program must be transmitted to the terminal,
			
 
				-and
			
 
				-vice versa.
			
 
				-(A token is exchanged to determine which end is in control,
			
 
				-which means that characters typed while a time-consuming command runs
			
 
				-must be buffered and do not appear until the command is complete.)
			
 
				-To maintain consistent information,
			
 
				-the host and terminal track changes through a per-file
			
 
				-data structure that records what portions of the file
			
 
				-the terminal has received.
			
 
				-The data structure, called a
			
 
				-<TT>Rasp</TT>
			
 
				-(a weak pun: it's a file with holes)
			
 
				-is held and updated by both the host and terminal.
			
 
				-A
			
 
				-<TT>Rasp</TT>
			
 
				-is a list of
			
 
				-<TT>Strings</TT>
			
 
				-holding those parts of the file known to the terminal,
			
 
				-separated by counts of the number of bytes in the interstices.
			
 
				-Of course, the host doesn't keep a separate copy of the data (it only needs
			
 
				-the lengths of the various pieces),
			
 
				-but the structure is the same on both ends.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Rasp</TT>
			
 
				-in the terminal doubles as a cache.
			
 
				-Since the terminal keeps the text for portions of the file it has displayed,
			
 
				-it need not request data from the host when revisiting old parts of the file
			
 
				-or redrawing obscured windows, which speeds things up considerably
			
 
				-over low-speed links.
			
 
				-</P>
			
 
				-<P>
			
 
				-It's trivial for the terminal to maintain its
			
 
				-<TT>Rasp</TT>,
			
 
				-because all changes made on the terminal apply to parts of the file
			
 
				-already loaded there.
			
 
				-Changes made by the host are compared against the
			
 
				-<TT>Rasp</TT>
			
 
				-during the update sequence after each command.
			
 
				-Small changes to pieces of the file loaded in the terminal
			
 
				-are sent in their entirety.
			
 
				-Larger changes, and changes that fall entirely in the holes,
			
 
				-are transmitted as messages without literal data:
			
 
				-only the lengths of the deleted and inserted strings are transmitted.
			
 
				-When a command is completed, the terminal examines its visible
			
 
				-windows to see if any holes in their
			
 
				-<TT>Rasps</TT>
			
 
				-intersect the visible portion of the file.
			
 
				-It then requests the missing data from the host,
			
 
				-along with up to 512 bytes of surrounding data, to minimize
			
 
				-the number of messages when visiting a new portion of the file.
			
 
				-This technique provides a kind of two-level lazy evaluation for the terminal.
			
 
				-The first level sends a minimum of information about
			
 
				-parts of the file not being edited interactively;
			
 
				-the second level waits until a change is displayed before
			
 
				-transmitting the new data.
			
 
				-Of course,
			
 
				-performance is also helped by having the terminal respond immediately to typing
			
 
				-and simple mouse requests.
			
 
				-Except for small changes to active pieces of the file, which are
			
 
				-transmitted to the terminal without negotiation,
			
 
				-the terminal is wholly responsible for deciding what is displayed;
			
 
				-the host uses the
			
 
				-<TT>Rasp</TT>
			
 
				-only to tell the terminal what might be relevant.
			
 
				-</P>
			
 
				-<P>
			
 
				-When a change is initiated by the host,
			
 
				-the messages to the terminal describing the change
			
 
				-are generated by the routine that applies the transcript of the changes
			
 
				-to the contents of the
			
 
				-<TT>File</TT>.
			
 
				-Since changes are undone by the same update routine,
			
 
				-undoing requires
			
 
				-no extra code in the communications;
			
 
				-the usual messages describing changes to the file are sufficient
			
 
				-to back up the screen image.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Rasp</TT>
			
 
				-is a particularly good example of the way caches are used in
			
 
				-<TT>sam</TT>.
			
 
				-First, it facilitates access to the active portion of the text by placing
			
 
				-the busy text in main memory.
			
 
				-In so doing, it provides efficient access
			
 
				-to a large data structure that does not fit in memory.
			
 
				-Since the form of data is to be imposed by the user, not by the program,
			
 
				-and because characters will frequently be scanned sequentially,
			
 
				-files are stored as flat objects.
			
 
				-Caches help keep performance good and linear when working with such
			
 
				-data.
			
 
				-</P>
			
 
				-<P>
			
 
				-Second, the
			
 
				-<TT>Rasp</TT>
			
 
				-and several of the other caches have some
			
 
				-<I>read-ahead;</I>
			
 
				-that is, the cache is loaded with more information than is needed for
			
 
				-the job immediately at hand.
			
 
				-When manipulating linear structures, the accesses are usually sequential,
			
 
				-and read-ahead can significantly reduce the average time to access the
			
 
				-next element of the object.
			
 
				-Sequential access is a common mode for people as well as programs;
			
 
				-consider scrolling through a document while looking for something.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, like any good data structure,
			
 
				-the cache guides the algorithm, or at least the implementation.
			
 
				-The
			
 
				-<TT>Rasp</TT>
			
 
				-was actually invented to control the communications between the host and
			
 
				-terminal parts, but I realized very early that it was also a form of
			
 
				-cache.  Other caches were more explicitly intended to serve a double
			
 
				-purpose: for example, the caches in
			
 
				-<TT>Files</TT>
			
 
				-that coalesce updates not only reduce traffic to the
			
 
				-transcript and contents
			
 
				-<TT>Buffers</TT>,
			
 
				-they also clump screen updates so that complicated changes to the
			
 
				-screen are achieved in
			
 
				-just a few messages to the terminal.
			
 
				-This saved me considerable work: I did not need to write special
			
 
				-code to optimize the message traffic to the
			
 
				-terminal.
			
 
				-Caches pay off in surprising ways.
			
 
				-Also, they tend to be independent, so their performance improvements
			
 
				-are multiplicative.
			
 
				-</P>
			
 
				-<H4>Data structures in the terminal
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The terminal's job is to display and to maintain a consistent image of
			
 
				-pieces of the files being edited.
			
 
				-Because the text is always in memory, the data structures are
			
 
				-considerably simpler than those in the host part.
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-typically has far more windows than does
			
 
				-<TT>mux</TT>,
			
 
				-the window system within which its Blit implementation runs.
			
 
				-<TT>Mux</TT>
			
 
				-has a fairly small number of asynchronously updated windows;
			
 
				-<TT>sam</TT>
			
 
				-needs a large number of synchronously updated windows that are
			
 
				-usually static and often fully obscured.
			
 
				-The different tradeoffs guided
			
 
				-<TT>sam</TT>
			
 
				-away from the memory-intensive implementation of windows, called
			
 
				-<TT>Layers</TT>,<sup>17</sup>
			
 
				-used in
			
 
				-<TT>mux.</TT>
			
 
				-Rather than depending on a complete bitmap image of the display for each window,
			
 
				-<TT>sam</TT>
			
 
				-regenerates the image from its in-memory text
			
 
				-(stored in the
			
 
				-<TT>Rasp</TT>)
			
 
				-when necessary, although it will use such an image if it is available.
			
 
				-Like
			
 
				-<TT>Layers</TT>,
			
 
				-though,
			
 
				-<TT>sam</TT>
			
 
				-uses the screen bitmap as active storage in which to update the image using
			
 
				-<TT>bitblt</TT>.<sup>18,19</sup>
			
 
				-The resulting organization, pictured in Figure 6,
			
 
				-has a global array of windows, called
			
 
				-<TT>Flayers</TT>,
			
 
				-each of which holds an image of a piece of text held in a data structure
			
 
				-called a
			
 
				-<TT>Frame</TT>,
			
 
				-which in turn represents
			
 
				-a rectangular window full of text displayed in some
			
 
				-<TT>Bitmap</TT>.
			
 
				-Each
			
 
				-<TT>Flayer</TT>
			
 
				-appears in a global list that orders them all front-to-back
			
 
				-on the display, and simultaneously as an element of a per-file array
			
 
				-that holds all the open windows for that file.
			
 
				-The complement in the terminal of the
			
 
				-<TT>File</TT>
			
 
				-on the host is called a
			
 
				-<TT>Text</TT>;
			
 
				-each connects its
			
 
				-<TT>Flayers</TT>
			
 
				-to the associated
			
 
				-<TT>Rasp</TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.11767.gif"><br>
			
 
				-<br>
			
 
				-</PRE></TT></DL>
			
 
				-<I>Figure 6. Data structures in the terminal.
			
 
				-</I><TT>Flayers</TT><I>
			
 
				-are also linked together into a front-to-back list.
			
 
				-</I><TT>Boxes</TT><I>
			
 
				-are discussed in the next section.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Bitmap</TT>
			
 
				-for a
			
 
				-<TT>Frame</TT>
			
 
				-contains the image of the text.
			
 
				-For a fully visible window, the
			
 
				-<TT>Bitmap</TT>
			
 
				-will be the screen (or at least the
			
 
				-<TT>Layer</TT>
			
 
				-in which
			
 
				-<TT>sam</TT>
			
 
				-is being run),
			
 
				-while for partially obscured windows the
			
 
				-<TT>Bitmap</TT>
			
 
				-will be off-screen.
			
 
				-If the window is fully obscured, the
			
 
				-<TT>Bitmap</TT>
			
 
				-will be null.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Bitmap</TT>
			
 
				-is a kind of cache.
			
 
				-When making changes to the display, most of the original image will
			
 
				-look the same in the final image, and the update algorithms exploit this.
			
 
				-The
			
 
				-<TT>Frame</TT>
			
 
				-software updates the image in the
			
 
				-<TT>Bitmap</TT>
			
 
				-incrementally; the
			
 
				-<TT>Bitmap</TT>
			
 
				-is not just an image, it is a data structure.<sup>18,19</sup>
			
 
				-The job of the software that updates the display is therefore
			
 
				-to use as much as possible of the existing image (converting the
			
 
				-text from ASCII characters to pixels is expensive) in a sort of two-dimensional
			
 
				-string insertion algorithm.
			
 
				-The details of this process are described in the next section.
			
 
				-</P>
			
 
				-<P>
			
 
				-The
			
 
				-<TT>Frame</TT>
			
 
				-software has no code to support overlapping windows;
			
 
				-its job is to keep a single
			
 
				-<TT>Bitmap</TT>
			
 
				-up to date.
			
 
				-It falls to the
			
 
				-<TT>Flayer</TT>
			
 
				-software to multiplex the various
			
 
				-<TT>Bitmaps</TT>
			
 
				-onto the screen.
			
 
				-The problem of maintaining overlapping
			
 
				-<TT>Flayers</TT>
			
 
				-is easier than for
			
 
				-<TT>Layers</TT><sup>17</sup>
			
 
				-because changes are made synchronously and because the contents of the window
			
 
				-can be reconstructed from the data stored in the
			
 
				-<TT>Frame</TT>;
			
 
				-the
			
 
				-<TT>Layers</TT>
			
 
				-software
			
 
				-makes no such assumptions.
			
 
				-In
			
 
				-<TT>sam</TT>,
			
 
				-the window being changed is almost always fully visible, because the current
			
 
				-window is always fully visible, by construction.
			
 
				-However, when multi-file changes are being made, or when
			
 
				-more than one window is open on a file,
			
 
				-it may be necessary to update partially obscured windows.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are three cases: the window is 
			
 
				-fully visible, invisible (fully obscured), or partially visible.
			
 
				-If fully visible, the
			
 
				-<TT>Bitmap</TT>
			
 
				-is part of the screen, so when the
			
 
				-<TT>Flayer</TT>
			
 
				-update routine calls the
			
 
				-<TT>Frame</TT>
			
 
				-update routine, the screen will be updated directly.
			
 
				-If the window is invisible,
			
 
				-there is no associated
			
 
				-<TT>Bitmap</TT>,
			
 
				-and all that is necessary is to update the
			
 
				-<TT>Frame</TT>
			
 
				-data structure, not the image.
			
 
				-If the window is partially visible, the
			
 
				-<TT>Frame</TT>
			
 
				-routine is called to update the image in the off-screen
			
 
				-<TT>Bitmap</TT>,
			
 
				-which may require regenerating it from the text of the window.
			
 
				-The
			
 
				-<TT>Flayer</TT>
			
 
				-code then clips this
			
 
				-<TT>Bitmap</TT>
			
 
				-against the
			
 
				-<TT>Bitmaps</TT>
			
 
				-of all
			
 
				-<TT>Frames</TT>
			
 
				-in front of the
			
 
				-<TT>Frame</TT>
			
 
				-being modified, and the remainder is copied to the display.
			
 
				-</P>
			
 
				-<P>
			
 
				-This is much faster than recreating the image off-screen
			
 
				-for every change, or clipping all the changes made to the image
			
 
				-during its update.
			
 
				-Unfortunately, these caches can also consume prohibitive amounts of
			
 
				-memory, so they are freed fairly liberally &#173; after every change to the
			
 
				-front-to-back order of the
			
 
				-<TT>Flayers</TT>.
			
 
				-The result is that
			
 
				-the off-screen
			
 
				-<TT>Bitmaps</TT>
			
 
				-exist only while multi-window changes are occurring,
			
 
				-which is the only time the performance improvement they provide is needed.
			
 
				-Also, the user interface causes fully-obscured windows to be the
			
 
				-easiest to make &#173;
			
 
				-creating a canonically sized and placed window requires only a button click
			
 
				-&#173; which reduces the need for caching still further.
			
 
				-</P>
			
 
				-<P>
			
 
				-</P>
			
 
				-<H4>Screen update
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Only two low-level primitives are needed for incremental update:
			
 
				-<TT>bitblt</TT>,
			
 
				-which copies rectangles of pixels, and
			
 
				-<TT>string</TT>
			
 
				-(which in turn calls
			
 
				-<TT>bitblt</TT>),
			
 
				-which draws a null-terminated character string in a
			
 
				-<TT>Bitmap</TT>.
			
 
				-A
			
 
				-<TT>Frame</TT>
			
 
				-contains a list of
			
 
				-<TT>Boxes</TT>,
			
 
				-each of which defines a horizontal strip of text in the window
			
 
				-(see Figure 7).
			
 
				-A
			
 
				-<TT>Box</TT>
			
 
				-has a character string
			
 
				-<TT>str</TT>,
			
 
				-and a
			
 
				-<TT>Rectangle</TT>
			
 
				-<TT>rect</TT>
			
 
				-that defines the location of the strip in the window.
			
 
				-(The text in
			
 
				-<TT>str</TT>
			
 
				-is stored in the
			
 
				-<TT>Box</TT>
			
 
				-separately from the
			
 
				-<TT>Rasp</TT>
			
 
				-associated with the window's file, so
			
 
				-<TT>Boxes</TT>
			
 
				-are self-contained.)
			
 
				-The invariant is that
			
 
				-the image of the
			
 
				-<TT>Box</TT>
			
 
				-can be reproduced by calling
			
 
				-<TT>string</TT>
			
 
				-with argument
			
 
				-<TT>str</TT>
			
 
				-to draw the string in
			
 
				-<TT>rect</TT>,
			
 
				-and the resulting picture fits perfectly within
			
 
				-<TT>rect</TT>.
			
 
				-In other words, the
			
 
				-<TT>Boxes</TT>
			
 
				-define the tiling of the window.
			
 
				-The tiling may be complicated by long lines of text, which
			
 
				-are folded onto the next line.
			
 
				-Some editors use horizontal scrolling to avoid this complication,
			
 
				-but to be comfortable this technique requires that lines not be
			
 
				-<I>too</I>
			
 
				-long;
			
 
				-<TT>sam</TT>
			
 
				-has no such restriction.
			
 
				-Also, and perhaps more importantly, UNIX programs and terminals traditionally fold
			
 
				-long lines to make their contents fully visible.
			
 
				-<P>
			
 
				-Two special kinds of
			
 
				-<TT>Boxes</TT>
			
 
				-contain a single
			
 
				-character: either a newline or a tab.
			
 
				-Newlines and tabs are white space.
			
 
				-A newline
			
 
				-<TT>Box</TT>
			
 
				-always extends to the right edge of the window,
			
 
				-forcing the following
			
 
				-<TT>Box</TT>
			
 
				-to the next line.
			
 
				-The width of a tab depends on where it is located:
			
 
				-it forces the next
			
 
				-<TT>Box</TT>
			
 
				-to begin at a tab location.
			
 
				-Tabs also
			
 
				-have a minimum width equivalent to a blank (blanks are
			
 
				-drawn by
			
 
				-<TT>string</TT>
			
 
				-and are not treated specially); newlines have a minimum width of zero.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br><img src="-.11768.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-<br>
			
 
				-</PRE></TT></DL>
			
 
				-<I>Figure 7. A line of text showing its
			
 
				-</I><TT>Boxes</TT><I>.
			
 
				-The first two blank
			
 
				-</I><TT>Boxes</TT><I>
			
 
				-contain tabs; the last contains a newline.
			
 
				-Spaces are handled as ordinary characters.
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I><br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The update algorithms always use the
			
 
				-<TT>Bitmap</TT>
			
 
				-image of the text (either the display or cache
			
 
				-<TT>Bitmap</TT>);
			
 
				-they never examine the characters within a
			
 
				-<TT>Box</TT>
			
 
				-except when the
			
 
				-<TT>Box</TT>
			
 
				-needs to be split in two.
			
 
				-Before a change, the window consists of a tiling of
			
 
				-<TT>Boxes</TT>;
			
 
				-after the change the window is tiled differently.
			
 
				-The update algorithms rearrange the tiles in place, without
			
 
				-backup storage.
			
 
				-The algorithms are not strictly optimal &#173; for example, they can
			
 
				-clear a pixel that is later going to be written upon &#173;
			
 
				-but they never move a tile that doesn't need to be moved,
			
 
				-and they move each tile at most once.
			
 
				-<TT>Frinsert</TT>
			
 
				-on a Blit can absorb over a thousand characters a second if the strings
			
 
				-being inserted are a few tens of characters long.
			
 
				-</P>
			
 
				-<P>
			
 
				-Consider
			
 
				-<TT>frdelete</TT>.
			
 
				-Its job is to delete a substring from a
			
 
				-<TT>Frame</TT>
			
 
				-and restore the image of the
			
 
				-<TT>Frame</TT>.
			
 
				-The image of a substring has a peculiar shape (see Figure 2) comprising
			
 
				-possibly a partial line,
			
 
				-zero or more full lines,
			
 
				-and possibly a final partial line.
			
 
				-For reference, call this the
			
 
				-Z-shape.
			
 
				-<TT>Frdelete</TT>
			
 
				-begins by splitting, if necessary, the
			
 
				-<TT>Boxes</TT>
			
 
				-containing the ends of
			
 
				-the substring so the substring begins and ends on
			
 
				-<TT>Box</TT>
			
 
				-boundaries.
			
 
				-Because the substring is being deleted, its image is not needed,
			
 
				-so the Z-shape is then cleared.
			
 
				-Then, tiles (that is, the images of
			
 
				-<TT>Boxes</TT>)
			
 
				-are copied, using
			
 
				-<TT>bitblt</TT>,
			
 
				-from immediately after the Z-shape to
			
 
				-the beginning of the Z-shape,
			
 
				-resulting in a new Z-shape.
			
 
				-(<TT>Boxes</TT>
			
 
				-whose contents would span two lines in the new position must first be split.)
			
 
				-</P>
			
 
				-<P>
			
 
				-Copying the remainder of the
			
 
				-<TT>Frame</TT>
			
 
				-tile by tile
			
 
				-this way will clearly accomplish the deletion but eventually,
			
 
				-typically when the copying algorithm encounters a tab or newline,
			
 
				-the old and new
			
 
				-<TT>x</TT>
			
 
				-coordinates of the tile
			
 
				-to be copied are the same.
			
 
				-This correspondence implies
			
 
				-that the Z-shape has its beginning and ending edges aligned
			
 
				-vertically, and a sequence of at most two
			
 
				-<TT>bitblts</TT>
			
 
				-can be used to copy the remaining tiles.
			
 
				-The last step is to clear out the resulting empty space at the bottom
			
 
				-of the window;
			
 
				-the number of lines to be cleared is the number of complete lines in the
			
 
				-Z-shape closed by the final
			
 
				-<TT>bitblts.</TT>
			
 
				-The final step is to merge horizontally adjacent
			
 
				-<TT>Boxes</TT>
			
 
				-of plain text.
			
 
				-The complete source to
			
 
				-<TT>frdelete</TT>
			
 
				-is less than 100 lines of C.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>frinsert</TT>
			
 
				-is more complicated because it must do four passes:
			
 
				-one to construct the
			
 
				-<TT>Box</TT>
			
 
				-list for the inserted string,
			
 
				-one to reconnoitre,
			
 
				-one to copy (in opposite order to
			
 
				-<TT>frdelete</TT>)
			
 
				-the
			
 
				-<TT>Boxes</TT>
			
 
				-to make the hole for the new text,
			
 
				-and finally one to copy the new text into place.
			
 
				-Overall, though,
			
 
				-<TT>frinsert</TT>
			
 
				-has a similar flavor to
			
 
				-<TT>frdelete</TT>,
			
 
				-and needn't be described further.
			
 
				-<TT>Frinsert</TT>
			
 
				-and its subsidiary routines comprise 211 lines of C.
			
 
				-</P>
			
 
				-<P>
			
 
				-The terminal source code is 3024 lines of C,
			
 
				-and the host source is 5797 lines.
			
 
				-</P>
			
 
				-<H4>Discussion
			
 
				-</H4>
			
 
				-<H4>History
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The immediate ancestor of
			
 
				-<TT>sam</TT>
			
 
				-was the original text editor for the Blit, called
			
 
				-<TT>jim</TT>.
			
 
				-<TT>Sam</TT>
			
 
				-inherited
			
 
				-<TT>jim</TT>'s
			
 
				-two-process structure and mouse language almost unchanged, but
			
 
				-<TT>jim</TT>
			
 
				-suffered from several drawbacks that were addressed in the design of
			
 
				-<TT>sam</TT>.
			
 
				-The most important of these was the lack of a command language.
			
 
				-Although
			
 
				-<TT>jim</TT>
			
 
				-was easy to use for simple editing, it provided no direct help with
			
 
				-large or repetitive editing tasks.  Instead, it provided a command to pass
			
 
				-selected text through a shell pipeline,
			
 
				-but this was no more satisfactory than could be expected of a stopgap measure.
			
 
				-<P>
			
 
				-<TT>Jim</TT>
			
 
				-was written primarily as a vehicle for experimenting with a mouse-based
			
 
				-interface to text, and the experiment was successful.
			
 
				-<TT>Jim</TT>
			
 
				-had some spin-offs:
			
 
				-<TT>mux</TT>,
			
 
				-the second window system for the Blit, is essentially a multiplexed
			
 
				-version of the terminal part of
			
 
				-<TT>jim</TT>;
			
 
				-and the debugger
			
 
				-<TT>pi</TT>'s
			
 
				-user interface<sup>20</sup> was closely modeled on
			
 
				-<TT>jim</TT>'s.
			
 
				-But after a couple of years,
			
 
				-<TT>jim</TT>
			
 
				-had become difficult to maintain and limiting to use,
			
 
				-and its replacement was overdue.
			
 
				-</P>
			
 
				-<P>
			
 
				-I began the design of
			
 
				-<TT>sam</TT>
			
 
				-by asking
			
 
				-<TT>jim</TT>
			
 
				-customers what they wanted.
			
 
				-This was probably a mistake; the answers were essentially a list of features
			
 
				-to be found in other editors, which did not provide any of the
			
 
				-guiding principles I was seeking.
			
 
				-For instance, one common request was for a ``global substitute,''
			
 
				-but no one suggested how to provide it within a cut-and-paste editor.
			
 
				-I was looking for a scheme that would
			
 
				-support such specialized features comfortably in the context of some
			
 
				-general command language.
			
 
				-Ideas were not forthcoming, though, particularly given my insistence
			
 
				-on removing all limits on file sizes, line lengths and so on.
			
 
				-Even worse, I recognized that, since the mouse could easily
			
 
				-indicate a region of the screen that was not an integral number of lines,
			
 
				-the command language would best forget about newlines altogether,
			
 
				-and that meant the command language had to treat the file as a single
			
 
				-string, not an array of lines.
			
 
				-</P>
			
 
				-<P>
			
 
				-Eventually, I decided that thinking was not getting me very far and it was
			
 
				-time to try building.
			
 
				-I knew that the terminal part could be built easily &#173;
			
 
				-that part of
			
 
				-<TT>jim</TT>
			
 
				-behaved acceptably well &#173; and that most of the hard work was going
			
 
				-to be in the host part: the file interface, command interpreter and so on.
			
 
				-Moreover, I had some ideas about how the architecture of
			
 
				-<TT>jim</TT>
			
 
				-could be improved without destroying its basic structure, which I liked
			
 
				-in principle but which hadn't worked out as well as I had hoped.
			
 
				-So I began by designing the file data structure,
			
 
				-starting with the way
			
 
				-<TT>jim</TT>
			
 
				-worked &#173; comparable to a single structure merging
			
 
				-<TT>Disc</TT>
			
 
				-and
			
 
				-<TT>Buffer</TT>,
			
 
				-which I split to make the cache more general
			
 
				-&#173; and thinking about how global substitute could be implemented.
			
 
				-The answer was clearly that it had to be done in two passes,
			
 
				-and the transcript-oriented implementation fell out naturally.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-was written bottom-up,
			
 
				-starting from the data structures and algorithms for manipulating text,
			
 
				-through the command language and up to the code for maintaining
			
 
				-the display.
			
 
				-In retrospect, it turned out well, but this implementation method is
			
 
				-not recommended in general.
			
 
				-There were several times when I had a large body of interesting code
			
 
				-assembled and no clue how to proceed with it.
			
 
				-The command language, in particular, took almost a year to figure out,
			
 
				-but can be implemented (given what was there at the beginning of that year)
			
 
				-in a day or two.  Similarly, inventing the
			
 
				-<TT>Rasp</TT>
			
 
				-data structure delayed the
			
 
				-connection of the host and terminal pieces by another few months.
			
 
				-<TT>Sam</TT>
			
 
				-took about two years to write, although only about four months were
			
 
				-spent actually working on it.
			
 
				-</P>
			
 
				-<P>
			
 
				-Part of the design process was unusual:
			
 
				-the subset of the protocol that maintains the
			
 
				-<TT>Rasp</TT>
			
 
				-was simulated, debugged
			
 
				-and verified by an automatic protocol analyzer,<sup>21</sup> and was bug-free
			
 
				-from the start.
			
 
				-The rest of the protocol, concerned mostly
			
 
				-with keeping menus up to date,
			
 
				-was unfortunately too unwieldy for such analysis,
			
 
				-and was debugged by more traditional methods, primarily
			
 
				-by logging in a file all messages in and out of the host.
			
 
				-</P>
			
 
				-<H4>Reflections
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-is essentially the only interactive editor used by the sixty or so members of
			
 
				-the computing science research center in which I work.
			
 
				-The same could not be said of
			
 
				-<TT>jim</TT>;
			
 
				-the lack of a command language kept some people from adopting it.
			
 
				-The union of a user interface as comfortable as
			
 
				-<TT>jim</TT>'s
			
 
				-with a command language as powerful as
			
 
				-<TT>ed</TT>'s&#191;
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> &#191;The people who criticize
			
 
				-<TT>ed</TT>
			
 
				-as an interactive program often forget that it and its close relative
			
 
				-<TT>sed</TT><sup>7</sup>
			
 
				-still thrive as programmable editors.  The strength of these programs is
			
 
				-independent of their convenience for interactive editing.
			
 
				-<br>
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-is essential to
			
 
				-<TT>sam</TT>'s
			
 
				-success.
			
 
				-When
			
 
				-<TT>sam</TT>
			
 
				-was first made available to the
			
 
				-<TT>jim</TT>
			
 
				-community,
			
 
				-almost everyone switched to it within two or three days.
			
 
				-In the months that followed, even people who had never adopted
			
 
				-<TT>jim</TT>
			
 
				-started using
			
 
				-<TT>sam</TT>
			
 
				-exclusively.
			
 
				-<P>
			
 
				-To be honest,
			
 
				-<TT>ed</TT>
			
 
				-still gets occasional use, but usually when
			
 
				-something quick needs to be done and the overhead of
			
 
				-downloading the terminal part of
			
 
				-<TT>sam</TT>
			
 
				-isn't worth the trouble.
			
 
				-Also, as a `line' editor,
			
 
				-<TT>sam</TT>
			
 
				-<TT>-d</TT>
			
 
				-is a bit odd;
			
 
				-when using a good old ASCII terminal, it's comforting to have
			
 
				-a true line editor.
			
 
				-But it is fair to say that
			
 
				-<TT>sam</TT>'s
			
 
				-command language has displaced
			
 
				-<TT>ed</TT>'s
			
 
				-for most of the complicated editing that has kept line editors
			
 
				-(that is, command-driven editors) with us.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sam</TT>'s
			
 
				-command language is even fancier than
			
 
				-<TT>ed</TT>'s,
			
 
				-and most
			
 
				-<TT>sam</TT>
			
 
				-customers don't come near to using all its capabilities.
			
 
				-Does it need to be so sophisticated?
			
 
				-I think the answer is yes, for two reasons.
			
 
				-</P>
			
 
				-<P>
			
 
				-First, the
			
 
				-<I>model</I>
			
 
				-for
			
 
				-<TT>sam</TT>'s
			
 
				-command language is really relatively simple, and certainly simpler than that of
			
 
				-<TT>ed</TT>.
			
 
				-For instance, there is only one kind of textual loop in
			
 
				-<TT>sam</TT>
			
 
				-&#173; the
			
 
				-<TT>x</TT>
			
 
				-command &#173;
			
 
				-while
			
 
				-<TT>ed</TT>
			
 
				-has three (the
			
 
				-<TT>g</TT>
			
 
				-command, the global flag on substitutions, and the implicit loop over
			
 
				-lines in multi-line substitutions).
			
 
				-Also,
			
 
				-<TT>ed</TT>'s
			
 
				-substitute command is necessary to make changes within lines, but in
			
 
				-<TT>sam</TT>
			
 
				-the
			
 
				-<TT>s</TT>
			
 
				-command is more of a familiar convenience than a necessity;
			
 
				-<TT>c</TT>
			
 
				-and
			
 
				-<TT>t</TT>
			
 
				-can do all the work.
			
 
				-</P>
			
 
				-<P>
			
 
				-Second,
			
 
				-given a community that expects an editor to be about as powerful as
			
 
				-<TT>ed</TT>,
			
 
				-it's hard to see how
			
 
				-<TT>sam</TT>
			
 
				-could really be much simpler and still satisfy that expectation.
			
 
				-People want to do ``global substitutes,'' and most are content
			
 
				-to have the recipe for that and a few other fancy changes.
			
 
				-The sophistication of the command language is really just a veneer
			
 
				-over a design that makes it possible to do global substitutes
			
 
				-in a screen editor.
			
 
				-Some people will always want something more, however, and it's gratifying to
			
 
				-be able to provide it.
			
 
				-The real power of
			
 
				-<TT>sam</TT>'s
			
 
				-command language comes from composability of the operators, which is by
			
 
				-nature orthogonal to the underlying model.
			
 
				-In other words,
			
 
				-<TT>sam</TT>
			
 
				-is not itself complex, but it makes complex things possible.
			
 
				-If you don't want to do anything complex, you can ignore the
			
 
				-complexity altogether, and many people do so.
			
 
				-</P>
			
 
				-<P>
			
 
				-Sometimes I am asked the opposite question: why didn't I just make
			
 
				-<TT>sam</TT>
			
 
				-a real programmable editor, with macros and variables and so on?
			
 
				-The main reason is a matter of taste: I like the editor
			
 
				-to be the same every time I use it.
			
 
				-There is one technical reason, though:
			
 
				-programmability in editors is largely a workaround for insufficient
			
 
				-interactivity.
			
 
				-Programmable editors are used to make particular, usually short-term,
			
 
				-things easy to do, such as by providing shorthands for common actions.
			
 
				-If things are generally easy to do in the first place,
			
 
				-shorthands are not as helpful.
			
 
				-<TT>Sam</TT>
			
 
				-makes common editing operations very easy, and the solutions to
			
 
				-complex editing problems seem commensurate with the problems themselves.
			
 
				-Also, the ability to edit the
			
 
				-<TT>sam</TT>
			
 
				-window makes it easy to repeat commands &#173; it only takes a mouse button click
			
 
				-to execute a command again.
			
 
				-</P>
			
 
				-<H4>Pros and cons
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sam</TT>
			
 
				-has several other good points,
			
 
				-and its share of problems.
			
 
				-Among the good things is the idea of
			
 
				-structural regular expressions,
			
 
				-whose usefulness has only begun to be explored.
			
 
				-They were arrived at serendipitously when I attempted to distill the essence of
			
 
				-<TT>ed</TT>'s
			
 
				-way of doing global substitution and recognized that the looping command in
			
 
				-<TT>ed</TT>
			
 
				-was implicitly imposing a structure (an array of lines) on the file.
			
 
				-<P>
			
 
				-Another of
			
 
				-<TT>sam</TT>'s
			
 
				-good things is its undo capability.
			
 
				-I had never before used an editor with a true undo,
			
 
				-but I would never go back now.
			
 
				-Undo
			
 
				-<I>must</I>
			
 
				-be done well, but if it is, it can be relied on.
			
 
				-For example,
			
 
				-it's safe to experiment if you're not sure how to write some intricate command,
			
 
				-because if you make a mistake, it can be fixed simply and reliably.
			
 
				-I learned two things about undo from writing
			
 
				-<TT>sam</TT>:
			
 
				-first, it's easy to provide if you design it in from the beginning, and
			
 
				-second, it's necessary, particularly if the system has some subtle
			
 
				-properties that may be unfamiliar or error-prone for users.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sam</TT>'s
			
 
				-lack of internal limits and sizes is a virtue.
			
 
				-Because it avoids all fixed-size tables and data structures,
			
 
				-<TT>sam</TT>
			
 
				-is able to make global changes to files that some of our other
			
 
				-tools cannot even read.
			
 
				-Moreover, the design keeps the performance linear when doing such
			
 
				-operations, although I must admit
			
 
				-<TT>sam</TT>
			
 
				-does get slow when editing a huge file.
			
 
				-</P>
			
 
				-<P>
			
 
				-Now, the problems.
			
 
				-Externally, the most obvious is that it is poorly integrated into the
			
 
				-surrounding window system.
			
 
				-By design, the user interface in
			
 
				-<TT>sam</TT>
			
 
				-feels almost identical to that of
			
 
				-<TT>mux</TT>,
			
 
				-but a thick wall separates text in
			
 
				-<TT>sam</TT>
			
 
				-from the programs running in
			
 
				-<TT>mux</TT>.
			
 
				-For instance, the `snarf buffer' in
			
 
				-<TT>sam</TT>
			
 
				-must be maintained separately from that in
			
 
				-<TT>mux</TT>.
			
 
				-This is regrettable, but probably necessary given the unusual configuration
			
 
				-of the system, with a programmable terminal on the far end of an RS-232 link.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sam</TT>
			
 
				-is reliable; otherwise, people wouldn't use it.
			
 
				-But it was written over such a long time, and has so many new (to me)
			
 
				-ideas in it, that I would like to see it done over again to clean
			
 
				-up the code and remove many of the lingering problems in the implementation.
			
 
				-The worst part is in the interconnection of the host and terminal parts,
			
 
				-which might even be able to go away in a redesign for a more
			
 
				-conventional window system.
			
 
				-The program must be split in two to use the terminal effectively,
			
 
				-but the low bandwidth of the connection forces the separation to
			
 
				-occur in an inconvenient part of the design if performance is to be acceptable.
			
 
				-A simple remote procedure call
			
 
				-protocol driven by the host, emitting only graphics
			
 
				-commands, would be easy to write but wouldn't have nearly the
			
 
				-necessary responsiveness.  On the other hand, if the terminal were in control
			
 
				-and requested much simpler file services from the host, regular expression
			
 
				-searches would require that the terminal read the entire file over its RS-232
			
 
				-link, which would be unreasonably slow.
			
 
				-A compromise in which either end can take control is necessary.
			
 
				-In retrospect, the communications protocol should have been
			
 
				-designed and verified formally, although I do not know of any tool
			
 
				-that can adequately relate the protocol to
			
 
				-its implementation.
			
 
				-</P>
			
 
				-<P>
			
 
				-Not all of
			
 
				-<TT>sam</TT>'s
			
 
				-users are comfortable with its command language, and few are adept.
			
 
				-Some (venerable) people use a sort of
			
 
				-<TT>ed</TT>
			
 
				-``
			
 
				-subset'' of
			
 
				-<TT>sam</TT>'s
			
 
				-command language,
			
 
				-and even ask why
			
 
				-<TT>sam</TT>'s
			
 
				-command language is not exactly
			
 
				-<TT>ed</TT>'s.
			
 
				-(The reason, of course, is that
			
 
				-<TT>sam</TT>'s
			
 
				-model for text does not include newlines, which are central to
			
 
				-<TT>ed</TT>.
			
 
				-Making the text an array of newlines to the command language would
			
 
				-be too much of a break from the seamless model provided by the mouse.
			
 
				-Some editors, such as
			
 
				-<TT>vi</TT>,
			
 
				-are willing to make this break, though.)
			
 
				-The difficulty is that
			
 
				-<TT>sam</TT>'s
			
 
				-syntax is so close to
			
 
				-<TT>ed</TT>'s
			
 
				-that people believe it
			
 
				-<I>should</I>
			
 
				-be the same.
			
 
				-I thought, with some justification in hindsight,
			
 
				-that making
			
 
				-<TT>sam</TT>
			
 
				-similar to
			
 
				-<TT>ed</TT>
			
 
				-would make it easier to learn and to accept.
			
 
				-But I may have overstepped and raised the users'
			
 
				-expectations too much.
			
 
				-It's hard to decide which way to resolve this problem.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, there is a tradeoff in
			
 
				-<TT>sam</TT>
			
 
				-that was decided by the environment in which it runs:
			
 
				-<TT>sam</TT>
			
 
				-is a multi-file editor, although in a different system there might instead be
			
 
				-multiple single-file editors.
			
 
				-The decision was made primarily because starting a new program in a Blit is
			
 
				-time-consuming.
			
 
				-If the choice could be made freely, however, I would
			
 
				-still choose the multi-file architecture, because it allows
			
 
				-groups of files to be handled as a unit;
			
 
				-the usefulness of the multi-file commands is incontrovertible.
			
 
				-It is delightful to have the source to an entire program
			
 
				-available at your fingertips.
			
 
				-</P>
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Tom Cargill suggested the idea behind the
			
 
				-<TT>Rasp</TT>
			
 
				-data structure.
			
 
				-Norman Wilson and Ken Thompson influenced the command language.
			
 
				-This paper was improved by comments from
			
 
				-Al Aho,
			
 
				-Jon Bentley,
			
 
				-Chris Fraser,
			
 
				-Gerard Holzmann,
			
 
				-Brian Kernighan,
			
 
				-Ted Kowalski,
			
 
				-Doug McIlroy
			
 
				-and
			
 
				-Dennis Ritchie.
			
 
				-<H4>REFERENCES
			
 
				-</H4>
			
 
				-<P>
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT> 1.<DD>
			
 
				-R. Pike,
			
 
				-`The Blit: a multiplexed graphics terminal,'
			
 
				-AT&amp;T Bell Labs. Tech. J.,
			
 
				-<B>63</B>,
			
 
				-(8),
			
 
				-1607-1631 (1984).
			
 
				-<DT> 2.<DD>
			
 
				-L. Johnson,
			
 
				-<I>MacWrite,</I>
			
 
				-Apple Computer Inc., Cupertino, Calif. 1983.
			
 
				-<DT> 3.<DD>
			
 
				-B. Lampson,
			
 
				-`Bravo Manual,'
			
 
				-in
			
 
				-Alto User's Handbook,
			
 
				-pp. 31-62,
			
 
				-Xerox Palo Alto Research Center,
			
 
				-Palo Alto, Calif.
			
 
				-1979.
			
 
				-<DT> 4.<DD>
			
 
				-W. Teitelman,
			
 
				-`A tour through Cedar,'
			
 
				-IEEE Software,
			
 
				-<B>1</B>
			
 
				-(2), 44-73 (1984).
			
 
				-<DT> 5.<DD>
			
 
				-J. Gutknecht,
			
 
				-`Concepts of the text editor Lara,'
			
 
				-Comm. ACM,
			
 
				-<B>28</B>,
			
 
				-(9),
			
 
				-942-960 (1985).
			
 
				-<DT> 6.<DD>
			
 
				-Bell Telephone Laboratories,
			
 
				-UNIX Programmer's Manual,
			
 
				-Holt, Rinehart and Winston, New York 1983.
			
 
				-<DT> 7.<DD>
			
 
				-B. W. Kernighan and R. Pike,
			
 
				-The Unix Programming Environment,
			
 
				-Prentice-Hall, Englewood Cliffs, New Jersey 1984.
			
 
				-<DT> 8.<DD>
			
 
				-Unix Time-Sharing System Programmer's Manual, Research Version, Ninth Edition,
			
 
				-Volume 1,
			
 
				-AT&amp;T Bell Laboratories, Murray Hill, New Jersey 1986.
			
 
				-<DT> 9.<DD>
			
 
				-Unix Time-Sharing System Programmer's Manual, 4.1 Berkeley Software Distribution,
			
 
				-Volumes 1 and 2C,
			
 
				-University of California, Berkeley, Calif. 1981.
			
 
				-<DT>10.<DD>
			
 
				-R. Pike,
			
 
				-`Structural Regular Expressions,'
			
 
				-Proc. EUUG Spring Conf., Helsinki 1987,
			
 
				-Eur. Unix User's Group, Buntingford, Herts, UK 1987.
			
 
				-<DT>11.<DD>
			
 
				-A. Goldberg,
			
 
				-Smalltalk-80 &#191; The Interactive Programming Environment,
			
 
				-Addison-Wesley, Reading, Mass. 1984.
			
 
				-<DT>12.<DD>
			
 
				-K. Thompson,
			
 
				-`Regular expression search algorithm,'
			
 
				-Comm. ACM,
			
 
				-<B>11</B>,
			
 
				-(6),
			
 
				-419-422 (1968).
			
 
				-<DT>13.<DD>
			
 
				-A. V. Aho, J. E. Hopcroft and J. D. Ullman,
			
 
				-The Design and Analysis of Computer Algorithms,
			
 
				-Addison-Wesley, Reading, Mass. 1974.
			
 
				-<DT>14.<DD>
			
 
				-B. W. Kernighan and D. M. Ritchie,
			
 
				-The C Programming Language,
			
 
				-Prentice-Hall, Englewood Cliffs, New Jersey 1978.
			
 
				-<DT>15.<DD>
			
 
				-W. M. Waite,
			
 
				-`The cost of lexical analysis,'
			
 
				-Softw. Pract. Exp.,
			
 
				-<B>16</B>,
			
 
				-(5),
			
 
				-473-488 (1986).
			
 
				-<DT>16.<DD>
			
 
				-C. W. Fraser,
			
 
				-`A generalized text editor,'
			
 
				-Comm. ACM,
			
 
				-<B>23</B>,
			
 
				-(3),
			
 
				-154-158 (1980).
			
 
				-<DT>17.<DD>
			
 
				-R. Pike,
			
 
				-`Graphics in overlapping bitmap layers,'
			
 
				-ACM Trans. on Graph.,
			
 
				-<B>2</B>,
			
 
				-(2)
			
 
				-135-160 (1983).
			
 
				-<DT>18.<DD>
			
 
				-L. J. Guibas and J. Stolfi,
			
 
				-`A language for bitmap manipulation,'
			
 
				-ACM Trans. on Graph.,
			
 
				-<B>1</B>,
			
 
				-(3),
			
 
				-191-214 (1982).
			
 
				-<DT>19.<DD>
			
 
				-R. Pike, B. Locanthi and J. Reiser,
			
 
				-`Hardware/software trade-offs for bitmap graphics on the Blit,'
			
 
				-Softw. Pract. Exp.,
			
 
				-<B>15</B>,
			
 
				-(2),
			
 
				-131-151 (1985).
			
 
				-<DT>20.<DD>
			
 
				-T. A. Cargill,
			
 
				-`The feel of Pi,'
			
 
				-Winter USENIX Conference Proceedings,
			
 
				-Denver 1986,
			
 
				-62-71,
			
 
				-USENIX Assoc., El Cerrito, CA.
			
 
				-<DT>21.<DD>
			
 
				-G. J. Holzmann,
			
 
				-`Tracing protocols,'
			
 
				-AT&amp;T Tech. J.,
			
 
				-<B>64</B>,
			
 
				-(10),
			
 
				-2413-2434 (1985).
			
 
				-
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/sleep.html
+++ b/sys/doc/sleep.html
@@ -1,547 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Process Sleep and Wakeup on a Shared-memory Multiprocessor
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-Dave Presotto<br>
			
 
				-Ken Thompson<br>
			
 
				-Gerard Holzmann<br>
			
 
				-<br>&#32;<br>
			
 
				-rob,presotto,ken,gerard@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Appeared in a slightly different form in
			
 
				-Proceedings of the Spring 1991 EurOpen Conference,
			
 
				-Troms&oslash;, Norway, 1991, pp. 161-166.
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-The problem of enabling a `sleeping' process on a shared-memory multiprocessor
			
 
				-is a difficult one, especially if the process is to be awakened by an interrupt-time
			
 
				-event.  We present here the code
			
 
				-for sleep and wakeup primitives that we use in our multiprocessor system.
			
 
				-The code has been exercised by years of active use and by a verification
			
 
				-system.
			
 
				-</DL>
			
 
				-<br>&#32;<br>
			
 
				-Our problem is to synchronise processes on a symmetric shared-memory multiprocessor.
			
 
				-Processes suspend execution, or
			
 
				-<I>sleep,</I>
			
 
				-while awaiting an enabling event such as an I/O interrupt.
			
 
				-When the event occurs, the process is issued a
			
 
				-<I>wakeup</I>
			
 
				-to resume its execution.
			
 
				-During these events, other processes may be running and other interrupts
			
 
				-occurring on other processors.
			
 
				-<br>&#32;<br>
			
 
				-More specifically, we wish to implement subroutines called
			
 
				-<TT>sleep</TT>,
			
 
				-callable by a process to relinquish control of its current processor,
			
 
				-and
			
 
				-<TT>wakeup</TT>,
			
 
				-callable by another process or an interrupt to resume the execution
			
 
				-of a suspended process.
			
 
				-The calling conventions of these subroutines will remain unspecified
			
 
				-for the moment.
			
 
				-<br>&#32;<br>
			
 
				-We assume the processors have an atomic test-and-set or equivalent
			
 
				-operation but no other synchronisation method.  Also, we assume interrupts
			
 
				-can occur on any processor at any time, except on a processor that has
			
 
				-locally inhibited them.
			
 
				-<br>&#32;<br>
			
 
				-The problem is the generalisation to a multiprocessor of a familiar
			
 
				-and well-understood uniprocessor problem.  It may be reduced to a
			
 
				-uniprocessor problem by using a global test-and-set to serialise the
			
 
				-sleeps and wakeups,
			
 
				-which is equivalent to synchronising through a monitor.
			
 
				-For performance and cleanliness, however,
			
 
				-we prefer to allow the interrupt handling and process control to be multiprocessed.
			
 
				-<br>&#32;<br>
			
 
				-Our attempts to solve the sleep/wakeup problem in Plan 9
			
 
				-[Pik90]
			
 
				-prompted this paper.
			
 
				-We implemented solutions several times over several months and each
			
 
				-time convinced ourselves &#173; wrongly &#173; they were correct.
			
 
				-Multiprocessor algorithms can be
			
 
				-difficult to prove correct by inspection and formal reasoning about them
			
 
				-is impractical.  We finally developed an algorithm we trust by
			
 
				-verifying our code using an
			
 
				-empirical testing tool.
			
 
				-We present that code here, along with some comments about the process by
			
 
				-which it was designed.
			
 
				-<H4>History
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Since processes in Plan 9 and the UNIX
			
 
				-system have similar structure and properties, one might ask if
			
 
				-UNIX
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-[Bac86]
			
 
				-could not easily be adapted from their standard uniprocessor implementation
			
 
				-to our multiprocessor needs.
			
 
				-The short answer is, no.
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-UNIX
			
 
				-routines
			
 
				-take as argument a single global address
			
 
				-that serves as a unique
			
 
				-identifier to connect the wakeup with the appropriate process or processes.
			
 
				-This has several inherent disadvantages.
			
 
				-From the point of view of
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>,
			
 
				-it is difficult to associate a data structure with an arbitrary address;
			
 
				-the routines are unable to maintain a state variable recording the
			
 
				-status of the event and processes.
			
 
				-(The reverse is of course easy &#173; we could
			
 
				-require the address to point to a special data structure &#173;
			
 
				-but we are investigating
			
 
				-UNIX
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>,
			
 
				-not the code that calls them.)
			
 
				-Also, multiple processes sleep `on' a given address, so
			
 
				-<TT>wakeup</TT>
			
 
				-must enable them all, and let process scheduling determine which process
			
 
				-actually benefits from the event.
			
 
				-This is inefficient;
			
 
				-a queueing mechanism would be preferable
			
 
				-but, again, it is difficult to associate a queue with a general address.
			
 
				-Moreover, the lack of state means that
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-cannot know what the corresponding process (or interrupt) is doing;
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-must be executed atomically.
			
 
				-On a uniprocessor it suffices to disable interrupts during their
			
 
				-execution.
			
 
				-On a multiprocessor, however,
			
 
				-most processors
			
 
				-can inhibit interrupts only on the current processor,
			
 
				-so while a process is executing
			
 
				-<TT>sleep</TT>
			
 
				-the desired interrupt can come and go on another processor.
			
 
				-If the wakeup is to be issued by another process, the problem is even harder.
			
 
				-Some inter-process mutual exclusion mechanism must be used,
			
 
				-which, yet again, is difficult to do without a way to communicate state.
			
 
				-<br>&#32;<br>
			
 
				-In summary, to be useful on a multiprocessor,
			
 
				-UNIX
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-must either be made to run atomically on a single
			
 
				-processor (such as by using a monitor)
			
 
				-or they need a richer model for their communication.
			
 
				-<H4>The design
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Consider the case of an interrupt waking up a sleeping process.
			
 
				-(The other case, a process awakening a second process, is easier because
			
 
				-atomicity can be achieved using an interlock.)
			
 
				-The sleeping process is waiting for some event to occur, which may be
			
 
				-modeled by a condition coming true.
			
 
				-The condition could be just that the event has happened, or something
			
 
				-more subtle such as a queue draining below some low-water mark.
			
 
				-We represent the condition by a function of one
			
 
				-argument of type
			
 
				-<TT>void*</TT>;
			
 
				-the code supporting the device generating the interrupts
			
 
				-provides such a function to be used by
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-to synchronise.  The function returns
			
 
				-<TT>false</TT>
			
 
				-if the event has not occurred, and
			
 
				-<TT>true</TT>
			
 
				-some time after the event has occurred.
			
 
				-The
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-routines must, of course, work correctly if the
			
 
				-event occurs while the process is executing
			
 
				-<TT>sleep</TT>.
			
 
				-<br>&#32;<br>
			
 
				-We assume that a particular call to
			
 
				-<TT>sleep</TT>
			
 
				-corresponds to a particular call to
			
 
				-<TT>wakeup</TT>,
			
 
				-that is,
			
 
				-at most one process is asleep waiting for a particular event.
			
 
				-This can be guaranteed in the code that calls
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-by appropriate interlocks.
			
 
				-We also assume for the moment that there will be only one interrupt
			
 
				-and that it may occur at any time, even before
			
 
				-<TT>sleep</TT>
			
 
				-has been called.
			
 
				-<br>&#32;<br>
			
 
				-For performance,
			
 
				-we desire that multiple instances of
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-may be running simultaneously on our multiprocessor.
			
 
				-For example, a process calling
			
 
				-<TT>sleep</TT>
			
 
				-to await a character from an input channel need not
			
 
				-wait for another process to finish executing
			
 
				-<TT>sleep</TT>
			
 
				-to await a disk block.
			
 
				-At a finer level, we would like a process reading from one input channel
			
 
				-to be able to execute
			
 
				-<TT>sleep</TT>
			
 
				-in parallel with a process reading from another input channel.
			
 
				-A standard approach to synchronisation is to interlock the channel `driver'
			
 
				-so that only one process may be executing in the channel code at once.
			
 
				-This method is clearly inadequate for our purposes; we need
			
 
				-fine-grained synchronisation, and in particular to apply
			
 
				-interlocks at the level of individual channels rather than at the level
			
 
				-of the channel driver.
			
 
				-<br>&#32;<br>
			
 
				-Our approach is to use an object called a
			
 
				-<I>rendezvous</I>,
			
 
				-which is a data structure through which
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-synchronise.
			
 
				-(The similarly named construct in Ada is a control structure;
			
 
				-ours is an unrelated data structure.)
			
 
				-A rendezvous
			
 
				-is allocated for each active source of events:
			
 
				-one for each I/O channel,
			
 
				-one for each end of a pipe, and so on.
			
 
				-The rendezvous serves as an interlockable structure in which to record
			
 
				-the state of the sleeping process, so that
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-can communicate if the event happens before or while
			
 
				-<TT>sleep</TT>
			
 
				-is executing.
			
 
				-<br>&#32;<br>
			
 
				-Our design for
			
 
				-<TT>sleep</TT>
			
 
				-is therefore a function
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void sleep(Rendezvous *r, int (*condition)(void*), void *arg)
			
 
				-</PRE></TT></DL>
			
 
				-called by the sleeping process.
			
 
				-The argument
			
 
				-<TT>r</TT>
			
 
				-connects the call to
			
 
				-<TT>sleep</TT>
			
 
				-with the call to
			
 
				-<TT>wakeup</TT>,
			
 
				-and is part of the data structure for the (say) device.
			
 
				-The function
			
 
				-<TT>condition</TT>
			
 
				-is described above;
			
 
				-called with argument
			
 
				-<TT>arg</TT>,
			
 
				-it is used by
			
 
				-<TT>sleep</TT>
			
 
				-to decide whether the event has occurred.
			
 
				-<TT>Wakeup</TT>
			
 
				-has a simpler specification:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void wakeup(Rendezvous *r).
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Wakeup</TT>
			
 
				-must be called after the condition has become true.
			
 
				-<H4>An implementation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-<TT>Rendezvous</TT>
			
 
				-data type is defined as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef struct{
			
 
				-	Lock	l;
			
 
				-	Proc	*p;
			
 
				-}Rendezvous;
			
 
				-</PRE></TT></DL>
			
 
				-Our
			
 
				-<TT>Locks</TT>
			
 
				-are test-and-set spin locks.
			
 
				-The routine
			
 
				-<TT>lock(Lockr</TT>*l)
			
 
				-eturns when the current process holds that lock;
			
 
				-<TT>unlock(Lockr</TT>*l)
			
 
				-eleases the lock.
			
 
				-<br>&#32;<br>
			
 
				-Here is our implementation of
			
 
				-<TT>sleep</TT>.
			
 
				-Its details are discussed below.
			
 
				-<TT>Thisp</TT>
			
 
				-is a pointer to the current process on the current processor.
			
 
				-(Its value differs on each processor.)
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-sleep(Rendezvous *r, int (*condition)(void*), void *arg)
			
 
				-{
			
 
				-	int s;
			
 
				-
			
 
				-	s = inhibit();		/* interrupts */
			
 
				-	lock(&amp;r-&gt;l);
			
 
				-
			
 
				-	/*
			
 
				-	 * if condition happened, never mind
			
 
				-	 */
			
 
				-	if((*condition)(arg)){	
			
 
				-		unlock(&amp;r-&gt;l);
			
 
				-		allow();	/* interrupts */
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * now we are committed to
			
 
				-	 * change state and call scheduler
			
 
				-	 */
			
 
				-	if(r-&gt;p)
			
 
				-		error("double sleep %d %d", r-&gt;p-&gt;pid, thisp-&gt;pid);
			
 
				-	thisp-&gt;state = Wakeme;
			
 
				-	r-&gt;p = thisp;
			
 
				-	unlock(&amp;r-&gt;l);
			
 
				-	allow(s);	/* interrupts */
			
 
				-	sched();	/* relinquish CPU */
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Here is
			
 
				-<TT>wakeup.</TT>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-wakeup(Rendezvous *r)
			
 
				-{
			
 
				-	Proc *p;
			
 
				-	int s;
			
 
				-
			
 
				-	s = inhibit();	/* interrupts; return old state */
			
 
				-	lock(&amp;r-&gt;l);
			
 
				-	p = r-&gt;p;
			
 
				-	if(p){
			
 
				-		r-&gt;p = 0;
			
 
				-		if(p-&gt;state != Wakeme)
			
 
				-			panic("wakeup: not Wakeme");
			
 
				-		ready(p);
			
 
				-	}
			
 
				-	unlock(&amp;r-&gt;l);
			
 
				-	if(s)
			
 
				-		allow();
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-both begin by disabling interrupts
			
 
				-and then locking the rendezvous structure.
			
 
				-Because
			
 
				-<TT>wakeup</TT>
			
 
				-may be called in an interrupt routine, the lock must be set only
			
 
				-with interrupts disabled on the current processor,
			
 
				-so that if the interrupt comes during
			
 
				-<TT>sleep</TT>
			
 
				-it will occur only on a different processor;
			
 
				-if it occurred on the processor executing
			
 
				-<TT>sleep</TT>,
			
 
				-the spin lock in
			
 
				-<TT>wakeup</TT>
			
 
				-would hang forever.
			
 
				-At the end of each routine, the lock is released and processor priority
			
 
				-returned to its previous value.
			
 
				-(<TT>Wakeup</TT>
			
 
				-needs to inhibit interrupts in case
			
 
				-it is being called by a process;
			
 
				-this is a no-op if called by an interrupt.)
			
 
				-<br>&#32;<br>
			
 
				-<TT>Sleep</TT>
			
 
				-checks to see if the condition has become true, and returns if so.
			
 
				-Otherwise the process posts its name in the rendezvous structure where
			
 
				-<TT>wakeup</TT>
			
 
				-may find it, marks its state as waiting to be awakened
			
 
				-(this is for error checking only) and goes to sleep by calling
			
 
				-<TT>sched()</TT>.
			
 
				-The manipulation of the rendezvous structure is all done under the lock,
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-only examines it under lock, so atomicity and mutual exclusion
			
 
				-are guaranteed.
			
 
				-<br>&#32;<br>
			
 
				-<TT>Wakeup</TT>
			
 
				-has a simpler job.  When it is called, the condition has implicitly become true,
			
 
				-so it locks the rendezvous, sees if a process is waiting, and readies it to run.
			
 
				-<H4>Discussion
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The synchronisation technique used here
			
 
				-is similar to known methods, even as far back as Saltzer's thesis
			
 
				-[Sal66].
			
 
				-The code looks trivially correct in retrospect: all access to data structures is done
			
 
				-under lock, and there is no place that things may get out of order.
			
 
				-Nonetheless, it took us several iterations to arrive at the above
			
 
				-implementation, because the things that
			
 
				-<I>can</I>
			
 
				-go wrong are often hard to see.  We had four earlier implementations
			
 
				-that were examined at great length and only found faulty when a new,
			
 
				-different style of device or activity was added to the system.
			
 
				-<br>&#32;<br>
			
 
				-Here, for example, is an incorrect implementation of wakeup,
			
 
				-closely related to one of our versions.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-void
			
 
				-wakeup(Rendezvous *r)
			
 
				-{
			
 
				-	Proc *p;
			
 
				-	int s;
			
 
				-
			
 
				-	p = r-&gt;p;
			
 
				-	if(p){
			
 
				-		s = inhibit();
			
 
				-		lock(&amp;r-&gt;l);
			
 
				-		r-&gt;p = 0;
			
 
				-		if(p-&gt;state != Wakeme)
			
 
				-			panic("wakeup: not Wakeme");
			
 
				-		ready(p);
			
 
				-		unlock(&amp;r-&gt;l);
			
 
				-		if(s)
			
 
				-			allow();
			
 
				-	}
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The mistake is that the reading of
			
 
				-<TT>r-&gt;p</TT>
			
 
				-may occur just as the other process calls
			
 
				-<TT>sleep</TT>,
			
 
				-so when the interrupt examines the structure it sees no one to wake up,
			
 
				-and the sleeping process misses its wakeup.
			
 
				-We wrote the code this way because we reasoned that the fetch
			
 
				-<TT>p</TT>
			
 
				-<TT>=</TT>
			
 
				-<TT>r-&gt;p</TT>
			
 
				-was inherently atomic and need not be interlocked.
			
 
				-The bug was found by examination when a new, very fast device
			
 
				-was added to the system and sleeps and interrupts were closely overlapped.
			
 
				-However, it was in the system for a couple of months without causing an error.
			
 
				-<br>&#32;<br>
			
 
				-How many errors lurk in our supposedly correct implementation above?
			
 
				-We would like a way to guarantee correctness; formal proofs are beyond
			
 
				-our abilities when the subtleties of interrupts and multiprocessors are
			
 
				-involved.
			
 
				-With that in mind, the first three authors approached the last to see
			
 
				-if his automated tool for checking protocols
			
 
				-[Hol91]
			
 
				-could be
			
 
				-used to verify our new
			
 
				-<TT>sleep</TT>
			
 
				-and
			
 
				-<TT>wakeup</TT>
			
 
				-for correctness.
			
 
				-The code was translated into the language for that system
			
 
				-(with, unfortunately, no way of proving that the translation is itself correct)
			
 
				-and validated by exhaustive simulation.
			
 
				-<br>&#32;<br>
			
 
				-The validator found a bug.
			
 
				-Under our assumption that there is only one interrupt, the bug cannot
			
 
				-occur, but in the more general case of multiple interrupts synchronising
			
 
				-through the same condition function and rendezvous,
			
 
				-the process and interrupt can enter a peculiar state.
			
 
				-A process may return from
			
 
				-<TT>sleep</TT>
			
 
				-with the condition function false
			
 
				-if there is a delay between
			
 
				-the condition coming true and
			
 
				-<TT>wakeup</TT>
			
 
				-being called,
			
 
				-with the delay occurring
			
 
				-just as the receiving process calls
			
 
				-<TT>sleep</TT>.
			
 
				-The condition is now true, so that process returns immediately,
			
 
				-does whatever is appropriate, and then (say) decides to call
			
 
				-<TT>sleep</TT>
			
 
				-again.  This time the condition is false, so it goes to sleep.
			
 
				-The wakeup process then finds a sleeping process,
			
 
				-and wakes it up, but the condition is now false.
			
 
				-<br>&#32;<br>
			
 
				-There is an easy (and verified) solution: at the end of
			
 
				-<TT>sleep</TT>
			
 
				-or after
			
 
				-<TT>sleep</TT>
			
 
				-returns,
			
 
				-if the condition is false, execute
			
 
				-<TT>sleep</TT>
			
 
				-again.  This re-execution cannot repeat; the second synchronisation is guaranteed
			
 
				-to function under the external conditions we are supposing.
			
 
				-<br>&#32;<br>
			
 
				-Even though the original code is completely
			
 
				-protected by interlocks and had been examined carefully by all of us
			
 
				-and believed correct, it still had problems.
			
 
				-It seems to us that some exhaustive automated analysis is
			
 
				-required of multiprocessor algorithms to guarantee their safety.
			
 
				-Our experience has confirmed that it is almost impossible to
			
 
				-guarantee by inspection or simple testing the correctness
			
 
				-of a multiprocessor algorithm.  Testing can demonstrate the presence
			
 
				-of bugs but not their absence
			
 
				-[Dij72].
			
 
				-<br>&#32;<br>
			
 
				-We close by claiming that the code above with
			
 
				-the suggested modification passes all tests we have for correctness
			
 
				-under the assumptions used in the validation.
			
 
				-We would not, however, go so far as to claim that it is universally correct.
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Bac86] Maurice J. Bach,
			
 
				-<I>The Design of the UNIX Operating System,</I>
			
 
				-Prentice-Hall,
			
 
				-Englewood Cliffs,
			
 
				-1986.
			
 
				-<br>&#32;<br>
			
 
				-[Dij72] Edsger W. Dijkstra,
			
 
				-``The Humble Programmer - 1972 Turing Award Lecture'',
			
 
				-<I>Comm. ACM,</I>
			
 
				-15(10), pp. 859-866, 
			
 
				-October 1972.
			
 
				-<br>&#32;<br>
			
 
				-[Hol91] Gerard J. Holzmann,
			
 
				-<I>Design and Validation of Computer Protocols,</I>
			
 
				-Prentice-Hall,
			
 
				-Englewood Cliffs,
			
 
				-1991.
			
 
				-<br>&#32;<br>
			
 
				-[Pik90]
			
 
				-Rob Pike,
			
 
				-Dave Presotto,
			
 
				-Ken Thompson,
			
 
				-Howard Trickey,
			
 
				-``Plan 9 from Bell Labs'',
			
 
				-<I>Proceedings of the Summer 1990 UKUUG Conference,</I>
			
 
				-pp. 1-9,
			
 
				-London,
			
 
				-July, 1990.
			
 
				-<br>&#32;<br>
			
 
				-[Sal66] Jerome H. Saltzer,
			
 
				-<I>Traffic Control in a Multiplexed Computer System</I>
			
 
				-MIT,
			
 
				-Cambridge, Mass.,
			
 
				-1966.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/spin.html
+++ b/sys/doc/spin.html
@@ -1,2490 +0,0 @@
 
				-<html>
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-<br><img src="-.19126690.gif"><br>
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Using <small>SPIN</small>
			
 
				-</H1>
			
 
				-<DL><DD><I>Gerard J. Holzmann<br>
			
 
				-gerard@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<small>SPIN</small> can be used for proving or disproving logical properties
			
 
				-of concurrent systems.
			
 
				-To render the proofs, a concurrent system is first
			
 
				-modeled in a formal specification language called <small>PROMELA</small>.
			
 
				-The language allows one to specify the behaviors
			
 
				-of asynchronously executing
			
 
				-processes that may interact through synchronous
			
 
				-or asynchronous message passing, or through direct
			
 
				-access to shared variables.
			
 
				-<br>&#32;<br>
			
 
				-System models specified in this way can be verified
			
 
				-for both safety and liveness properties. The specification
			
 
				-of general properties in linear time temporal logic is
			
 
				-also supported.
			
 
				-<br>&#32;<br>
			
 
				-The first part of this manual
			
 
				-discusses the basic features of the specification language <small>PROMELA</small>.
			
 
				-The second part describes the verifier <small>SPIN</small>.
			
 
				-</DL>
			
 
				-<H4>1 The Language <small>PROMELA</small>
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<small>PROMELA</small> is short for Protocol Meta Language [Ho91].
			
 
				-<small>PROMELA</small> is a <I>modeling</I> language, not a programming language.
			
 
				-A formal model differs in two essential ways from an implementation.
			
 
				-First, a model is meant to be an abstraction of a design
			
 
				-that contains only those aspects of the design that are
			
 
				-directly relevant to the properties one is interested in proving.
			
 
				-Second, a formal model must contain things that are typically not part
			
 
				-of an implementation, such as worst-case assumptions about
			
 
				-the behavior of the environment that may interact with the
			
 
				-system being studied, and a formal statement of relevant correctness
			
 
				-properties. It is possible to mechanically extract abstract models
			
 
				-from implementation level code, as discussed, for instance in [HS99].
			
 
				-<br>&#32;<br>
			
 
				-Verification with <small>SPIN</small> is often performed in a series of steps,
			
 
				-with the construction of increasingly detailed models.
			
 
				-Each model can be verified under different types of
			
 
				-assumptions about the environment and for different
			
 
				-types of correctness properties.
			
 
				-If a property is not valid for the given assumptions about
			
 
				-system behavior, the verifier can produce a counter-example
			
 
				-that demonstrates how the property may be violated.
			
 
				-If a property is valid, it may be possible to simplify the
			
 
				-model based on that fact, and prove still other properties.
			
 
				-<br>&#32;<br>
			
 
				-Section 1.1 covers the basic building blocks of the language.
			
 
				-Section 1.2 introduces the control flow structures.
			
 
				-Section 1.3 explains how correctness properties are specified.
			
 
				-Section 1.4 concludes the first part with a discussion of
			
 
				-special predefined variables and functions that can be used to
			
 
				-express some correctness properties.
			
 
				-<br>&#32;<br>
			
 
				-Up to date manual pages for <small>SPIN</small> can always be found online at:
			
 
				-http://cm.bell-labs.com/cm/cs/what/spin/Man/
			
 
				-<H4>1.1 Basics
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A <small>PROMELA</small> model can contain three different types of objects:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DD>
			
 
				-* Processes (section 1.1.1),
			
 
				-<br>
			
 
				-* Variables (section 1.1.2),
			
 
				-<br>
			
 
				-* Message channels (section 1.1.3).
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-All processes are global objects.
			
 
				-For obvious reasons, a <small>PROMELA</small> model must contain at least one
			
 
				-process to be meaningful.
			
 
				-Since <small>SPIN</small> is specifically meant to prove properties of
			
 
				-concurrent systems, a model typically contains more than
			
 
				-one process.
			
 
				-<br>&#32;<br>
			
 
				-Message channels and variables, the two basic types of data objects,
			
 
				-can be declared with either a global scope or a local scope.
			
 
				-A data object with global scope can be referred to by all processes.
			
 
				-A data object with a local scope can be referred to by just a
			
 
				-single process: the process that declares and instantiates the object.
			
 
				-As usual, all objects must be declared in the specification
			
 
				-before they are referenced.
			
 
				-<H4>1.1.1 Processes
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Here is a simple process that does nothing except print
			
 
				-a line of text:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init {
			
 
				-	printf("it works\n")
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-There are a few things to note.
			
 
				-<TT>Init</TT>
			
 
				-is a predefined keyword from the language.
			
 
				-It can be used to declare and instantiate
			
 
				-a single initial process in the model.
			
 
				-(It is comparable to the
			
 
				-<TT>main</TT>
			
 
				-procedure of a C program.)
			
 
				-The
			
 
				-<TT>init</TT>
			
 
				-process does not take arguments, but it can
			
 
				-start up (instantiate) other processes that do.
			
 
				-<TT>Printf</TT>
			
 
				-is one of a few built-in procedures in the language.
			
 
				-It behaves the same as the C version.
			
 
				-Note, finally, that no semicolon follows the single
			
 
				-<TT>printf</TT>
			
 
				-statement in the above example.
			
 
				-In <small>PROMELA</small>, semicolons are used as statement separators,
			
 
				-not statement terminators.  (The <small>SPIN</small> parser, however, is
			
 
				-lenient on this issue.)
			
 
				-<br>&#32;<br>
			
 
				-Any process can start new processes by using another
			
 
				-built-in procedure called
			
 
				-<TT>run</TT>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype you_run(byte x)
			
 
				-{
			
 
				-	printf("my x is: %d\n", x)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init {
			
 
				-	run you_run(1);
			
 
				-	run you_run(2)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The word
			
 
				-<TT>proctype</TT>
			
 
				-is again a keyword that introduces the declaration
			
 
				-of a new type of process.
			
 
				-In this case, we have named that type
			
 
				-<TT>you_run</TT>
			
 
				-and declared that all instantiations of processes
			
 
				-of this type will take one argument:  a data object
			
 
				-of type
			
 
				-<TT>byte</TT>,
			
 
				-that can be referred to within this process by the name
			
 
				-<TT>x</TT>.
			
 
				-Instances of a
			
 
				-<TT>proctype</TT>
			
 
				-can be created with the predefined procedure
			
 
				-<TT>run</TT>,
			
 
				-as shown in the example.
			
 
				-When the
			
 
				-<TT>run</TT>
			
 
				-statement completes, a copy of the process
			
 
				-has been started, and all its arguments have been
			
 
				-initialized with the arguments provided.
			
 
				-The process may, but need not, have performed
			
 
				-any statement executions at this point.
			
 
				-It is now part of the concurrent system,
			
 
				-and its execution can be interleaved arbitrarily with
			
 
				-those of the other, already executing processes.
			
 
				-(More about the semantics of execution follows shortly.)
			
 
				-<br>&#32;<br>
			
 
				-In many cases, we are only interested in creating a
			
 
				-single instance of each process type that is declared,
			
 
				-and the processes require no arguments.
			
 
				-We can define this by prefixing the keyword
			
 
				-<TT>proctype</TT>
			
 
				-from the process declaration with another keyword:
			
 
				-<TT>active</TT>.
			
 
				-Instances of all active proctypes are created when the
			
 
				-system itself is initialized.
			
 
				-We could, for instance, have avoided the use of
			
 
				-<TT>init</TT>
			
 
				-by declaring the corresponding process in the last example
			
 
				-as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype main() {
			
 
				-	run you_run(1);
			
 
				-	run you_run(2)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note that there are no parameters to instantiate in this
			
 
				-case.  Had they been declared, they would default to a
			
 
				-zero value, just like all other data objects
			
 
				-that are not explicitly instantiated.
			
 
				-<br>&#32;<br>
			
 
				-Multiple copies of a process type can also be created in
			
 
				-this way.  For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active [4] proctype try_me() {
			
 
				-	printf("hi, i am process %d\n", _pid)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-creates four processes.
			
 
				-A predefined variable
			
 
				-<TT>_pid</TT>
			
 
				-is assigned to each running process, and holds
			
 
				-its unique process instantiation number.
			
 
				-In some cases, this number is needed when a reference
			
 
				-has to be made to a specific process.
			
 
				-<br>&#32;<br>
			
 
				-Summarizing:  process behavior is declared in
			
 
				-<TT>proctype</TT>
			
 
				-definitions, and it is instantiated with either
			
 
				-<TT>run</TT>
			
 
				-statements or with the prefix
			
 
				-<TT>active</TT>.
			
 
				-Within a proctype declaration, statements are separated
			
 
				-(not terminated) by semicolons.
			
 
				-As we shall see in examples that follow, instead of the
			
 
				-semicolon, one can also use the alternative separator
			
 
				-<TT>-&gt;</TT>
			
 
				-(arrow), wherever that may help to clarify the structure
			
 
				-of a <small>PROMELA</small> model.
			
 
				-<H4>Semantics of Execution
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In <small>PROMELA</small> there is no difference between a condition or
			
 
				-expression and a statement.
			
 
				-Fundamental to the semantics of the language is the
			
 
				-notion of the <I>executability</I> of statements.
			
 
				-Statements are either executable or blocked.
			
 
				-Executability is the basic means of enforcing
			
 
				-synchronization between the processes in a distributed system.
			
 
				-A process can wait for an event to happen by waiting
			
 
				-for a statement to become executable.
			
 
				-For instance, instead of writing a busy wait loop:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-while (a != b)	/* not valid Promela syntax */
			
 
				-	skip;	/* wait for a==b */
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-we achieve the same effect in <small>PROMELA</small> with the statement
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(a == b);
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-Often we indicate that the continuation of an execution
			
 
				-is conditional on the truth of some expression by using
			
 
				-the alternate statement separator:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(a == b) -&gt; ...
			
 
				-</PRE></TT></DL>
			
 
				-Assignments and
			
 
				-<TT>printf</TT>
			
 
				-statements are always executable in <small>PROMELA</small>.
			
 
				-A condition, however, can only be executed (passed) when it holds.
			
 
				-If the condition does not hold, execution blocks until it does.
			
 
				-There are similar rules for determining the executability
			
 
				-of all other primitive and compound statements in the
			
 
				-language.
			
 
				-The semantics of each statement is defined in terms of
			
 
				-rules for executability and effect.
			
 
				-The rules for executability set a precondition on the state
			
 
				-of the system in which a statement can be executed.
			
 
				-The effect defines how a statement will alter a
			
 
				-system state when executed.
			
 
				-<br>&#32;<br>
			
 
				-<small>PROMELA</small> assumes that all individual statements are executed
			
 
				-atomically: that is, they model the smallest meaningful entities
			
 
				-of execution in the system being studied.
			
 
				-This means that <small>PROMELA</small> defines the standard asynchronous interleaving
			
 
				-model of execution, where a supposed scheduler is free at
			
 
				-each point in the execution to select any one of the processes
			
 
				-to proceed by executing a single primitive statement.
			
 
				-Synchronization constraints can be used to influence the
			
 
				-interleaving patterns.  It is the purpose of a concurrent system's
			
 
				-design to constrain those patterns in such a way that no
			
 
				-correctness requirements can be violated, and all service
			
 
				-requirements are met.  It is the purpose of the verifier
			
 
				-either to find counter-examples to a designer's claim that this
			
 
				-goal has been met, or to demonstrate that the claim is indeed valid.
			
 
				-<H4>1.1.2 Variables
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The table summarizes the five basic data types used in <small>PROMELA</small>.
			
 
				-<TT>Bit</TT>
			
 
				-and
			
 
				-<TT>bool</TT>
			
 
				-are synonyms for a single bit of information.
			
 
				-The first three types can store only unsigned quantities.
			
 
				-The last two can hold either positive or negative values.
			
 
				-The precise value ranges of variables of types
			
 
				-<TT>short</TT>
			
 
				-and
			
 
				-<TT>int</TT>
			
 
				-is implementation dependent, and corresponds
			
 
				-to those of the same types in C programs
			
 
				-that are compiled for the same hardware.
			
 
				-The values given in the table are most common.
			
 
				-<br><img src="-.19126691.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-The following example program declares a array of
			
 
				-two elements of type
			
 
				-<TT>bool</TT>
			
 
				-and a scalar variable
			
 
				-<TT>turn</TT>
			
 
				-of the same type.
			
 
				-Note that the example relies on the fact that
			
 
				-<TT>_pid</TT>
			
 
				-is either 0 or 1 here.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-/*
			
 
				- * Peterson's algorithm for enforcing
			
 
				- * mutual exclusion between two processes
			
 
				- * competing for access to a critical section
			
 
				- */
			
 
				-bool turn, want[2];
			
 
				-
			
 
				-active [2] proctype user()
			
 
				-{
			
 
				-again:
			
 
				-	want[_pid] = 1; turn = _pid;
			
 
				-
			
 
				-	/* wait until this condition holds: */
			
 
				-	(want[1 - _pid] == 0 || turn == 1 - _pid);
			
 
				-
			
 
				-	/* enter */
			
 
				-critical:	skip;
			
 
				-	/* leave */
			
 
				-
			
 
				-	want[_pid] = 0;
			
 
				-	goto again
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-In the above case, all variables are initialized to zero.
			
 
				-The general syntax for declaring and instantiating a
			
 
				-variable, respectively for scalar and array variables, is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-type name = expression;
			
 
				-type name[constant] = expression
			
 
				-</PRE></TT></DL>
			
 
				-In the latter case, all elements of the array are initialized
			
 
				-to the value of the expression.
			
 
				-A missing initializer fields defaults to the value zero.
			
 
				-As usual, multiple variables of the same type can be grouped
			
 
				-behind a single type name, as in:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-byte a, b[3], c = 4
			
 
				-</PRE></TT></DL>
			
 
				-In this example, the variable
			
 
				-<TT>c</TT>
			
 
				-is initialized to the value 4; variable
			
 
				-<TT>a</TT>
			
 
				-and the elements of array
			
 
				-<TT>b</TT>
			
 
				-are all initialized to zero.
			
 
				-<br>&#32;<br>
			
 
				-Variables can also be declared as structures.
			
 
				-For example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef Field {
			
 
				-        short f = 3;
			
 
				-        byte  g
			
 
				-};
			
 
				-
			
 
				-typedef Msg {
			
 
				-        byte a[3];
			
 
				-        int fld1;
			
 
				-        Field fld2;
			
 
				-        chan p[3];
			
 
				-        bit b
			
 
				-};
			
 
				-
			
 
				-Msg foo;
			
 
				-</PRE></TT></DL>
			
 
				-introduces two user-defined data types, the first named
			
 
				-<TT>Field</TT>
			
 
				-and the second named
			
 
				-<TT>Msg</TT>.
			
 
				-A single variable named
			
 
				-<TT>foo</TT>
			
 
				-of type
			
 
				-<TT>Msg</TT>
			
 
				-is declared.
			
 
				-All fields of
			
 
				-<TT>foo</TT>
			
 
				-that are not explicitly initialized (in the example, all fields except
			
 
				-<TT>foo.fld2.f</TT>)
			
 
				-are initialized to zero.
			
 
				-References to the elements of a structure are written as:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-foo.a[2] = foo.fld2.f + 12
			
 
				-</PRE></TT></DL>
			
 
				-A variable of a user-defined type can be passed as a single
			
 
				-argument to a new process in
			
 
				-<TT>run</TT>
			
 
				-statements.
			
 
				-For instance,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype me(Msg z) {
			
 
				-	z.a[2] = 12
			
 
				-}
			
 
				-init {
			
 
				-	Msg foo;
			
 
				-	run me(foo)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-Note that even though <small>PROMELA</small> supports only one-dimensional arrays,
			
 
				-a two-dimensional array can be created indirectly with user-defined
			
 
				-structures, for instance as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef Array {
			
 
				-	byte el[4]
			
 
				-};
			
 
				-
			
 
				-Array a[4];
			
 
				-</PRE></TT></DL>
			
 
				-This creates a data structure of 16 elements that can be
			
 
				-referenced, for instance, as
			
 
				-<TT>a[i].el[j]</TT>.
			
 
				-<br>&#32;<br>
			
 
				-As in C, the indices of an array of
			
 
				-<TT>N</TT>
			
 
				-elements range from 0 to
			
 
				-<TT>N-1</TT>.
			
 
				-<H4>Expressions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Expressions must be side-effect free in <small>PROMELA</small>.
			
 
				-Specifically, this means that an expression cannot
			
 
				-contain assignments, or send and receive operations (see section 1.1.3).
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-c = c + 1; c = c - 1
			
 
				-</PRE></TT></DL>
			
 
				-and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-c++; c--
			
 
				-</PRE></TT></DL>
			
 
				-are assignments in <small>PROMELA</small>, with the same effects.
			
 
				-But, unlike in C,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-b = c++
			
 
				-</PRE></TT></DL>
			
 
				-is not a valid assignment, because the right-hand side
			
 
				-operand is not a valid expression in <small>PROMELA</small> (it is not side-effect free).
			
 
				-<br>&#32;<br>
			
 
				-It is also possible to write a side-effect free conditional
			
 
				-expression, with the following syntax:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(expr1 -&gt; expr2 : expr3)
			
 
				-</PRE></TT></DL>
			
 
				-The parentheses around the conditional expression are required to
			
 
				-avoid misinterpretation of the arrow.
			
 
				-The example expression has the value of <TT>expr2</TT> when <TT>expr1</TT>
			
 
				-evaluates to a non-zero value, and the value of <TT>expr3</TT> otherwise.
			
 
				-<br>&#32;<br>
			
 
				-In assignments like
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-variable = expression
			
 
				-</PRE></TT></DL>
			
 
				-the values of all operands used inside the expression are first cast to
			
 
				-signed integers before the operands are applied.
			
 
				-After the evaluation of the expression completes, the value produced
			
 
				-is cast to the type of the target variable before the assignment takes place.
			
 
				-<H4>1.1.3 Message Channels
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Message channels are used to model the transfer of data
			
 
				-between processes.
			
 
				-They are declared either locally or globally,
			
 
				-for instance as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-chan qname = [16] of { short, byte }
			
 
				-</PRE></TT></DL>
			
 
				-The keyword
			
 
				-<TT>chan</TT>
			
 
				-introduces a channel declaration.
			
 
				-In this case, the channel is named
			
 
				-<TT>qname</TT>,
			
 
				-and it is declared to be capable of storing up
			
 
				-to 16 messages.
			
 
				-Each message stored in the channel is declared here to
			
 
				-consist of two fields: one of type
			
 
				-<TT>short</TT>
			
 
				-and one of type
			
 
				-<TT>byte</TT>.
			
 
				-The fields of a message can be any one of the basic types
			
 
				-<TT>bit</TT>,
			
 
				-<TT>bool</TT>,
			
 
				-<TT>byte</TT>,
			
 
				-<TT>short</TT>,
			
 
				-<TT>int</TT>,
			
 
				-and
			
 
				-<TT>chan</TT>,
			
 
				-or any user-defined type.
			
 
				-Message fields cannot be declared as arrays.
			
 
				-<br>&#32;<br>
			
 
				-A message field of type
			
 
				-<TT>chan</TT>
			
 
				-can be used to pass a channel identifier
			
 
				-through a channel from one process to another.
			
 
				-<br>&#32;<br>
			
 
				-The statement
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname!expr1,expr2
			
 
				-</PRE></TT></DL>
			
 
				-sends the values of expressions
			
 
				-<TT>expr1</TT>
			
 
				-and
			
 
				-<TT>expr2</TT>
			
 
				-to the channel that we just created.  It appends
			
 
				-the message field created from the values of the two
			
 
				-expressions (and cast to the appropriate types of the
			
 
				-message fields declared for
			
 
				-<TT>qname</TT>)
			
 
				-to the tail of the message buffer of 16 slots that belongs
			
 
				-to channel
			
 
				-<TT>qname</TT>.
			
 
				-By default the send statement is only executable if the target
			
 
				-channel is non-full.
			
 
				-(This default semantics can be changed in the verifier into
			
 
				-one where the send statement is always executable, but the
			
 
				-message will be lost when an attempt is made to append it to
			
 
				-a full channel.)
			
 
				-<br>&#32;<br>
			
 
				-The statement
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname?var1,var2
			
 
				-</PRE></TT></DL>
			
 
				-retrieves a message from the head of the same buffer,
			
 
				-and stores the two expressions in variables
			
 
				-<TT>var1</TT>
			
 
				-and
			
 
				-<TT>var2</TT>.
			
 
				-<br>&#32;<br>
			
 
				-The receive statement is executable only if the source channel
			
 
				-is non-empty.
			
 
				-<br>&#32;<br>
			
 
				-If more parameters are sent per message than were declared
			
 
				-for the message channel, the redundant parameters are lost.
			
 
				-If fewer parameters are sent than declared,
			
 
				-the value of the remaining parameters is undefined.
			
 
				-Similarly, if the receive operation tries to retrieve more
			
 
				-parameters than available, the value of the extra parameters is
			
 
				-undefined; if it receives fewer than the number of parameters
			
 
				-sent, the extra information is lost.
			
 
				-<br>&#32;<br>
			
 
				-An alternative, and equivalent, notation for the
			
 
				-send and receive operations is to structure the
			
 
				-message fields with parentheses, as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname!expr1(expr2,expr3)
			
 
				-qname?var1(var2,var3)
			
 
				-</PRE></TT></DL>
			
 
				-In the above case, we assume that
			
 
				-<TT>qname</TT>
			
 
				-was declared to hold messages consisting of three fields.
			
 
				-<P>
			
 
				-Some or all of the arguments of the receive operation
			
 
				-can be given as constants instead of as variables:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname?cons1,var2,cons2
			
 
				-</PRE></TT></DL>
			
 
				-In this case, an extra condition on the executability of the
			
 
				-receive operation is that the value of all message fields
			
 
				-specified as constants match the value of the corresponding
			
 
				-fields in the message that is to be received.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-Here is an example that uses some of the mechanisms introduced
			
 
				-so far.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype A(chan q1)
			
 
				-{	chan q2;
			
 
				-	q1?q2;
			
 
				-	q2!123
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype B(chan qforb)
			
 
				-{	int x;
			
 
				-	qforb?x;
			
 
				-	printf("x = %d\n", x)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init {
			
 
				-	chan qname = [1] of { chan };
			
 
				-	chan qforb = [1] of { int };
			
 
				-	run A(qname);
			
 
				-	run B(qforb);
			
 
				-	qname!qforb
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The value printed by the process of type
			
 
				-<TT>B</TT>
			
 
				-will be
			
 
				-<TT>123</TT>.
			
 
				-<br>&#32;<br>
			
 
				-A predefined function
			
 
				-<TT>len(qname)</TT>
			
 
				-returns the number of messages currently
			
 
				-stored in channel
			
 
				-<TT>qname</TT>.
			
 
				-Two shorthands for the most common uses of this
			
 
				-function are
			
 
				-<TT>empty(qname)</TT>
			
 
				-and
			
 
				-<TT>full(qname)</TT>,
			
 
				-with the obvious connotations.
			
 
				-<br>&#32;<br>
			
 
				-Since all expressions must be side-effect free,
			
 
				-it is not valid to say:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(qname?var == 0)
			
 
				-</PRE></TT></DL>
			
 
				-or
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-(a &gt; b &amp;&amp; qname!123)
			
 
				-</PRE></TT></DL>
			
 
				-We could rewrite the second example (using an atomic sequence,
			
 
				-as explained further in section 1.2.1):
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-atomic { (a &gt; b &amp;&amp; !full(qname)) -&gt; qname!123 }
			
 
				-</PRE></TT></DL>
			
 
				-The meaning of the first example is ambiguous.  It could mean
			
 
				-that we want the condition to be true if the receive operation
			
 
				-is unexecutable.  In that case, we can rewrite it without
			
 
				-side-effects as:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-empty(qname)
			
 
				-</PRE></TT></DL>
			
 
				-It could also mean that we want the condition
			
 
				-to be true when the channel does contain a message with
			
 
				-value zero.
			
 
				-We can specify that as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-atomic { qname?[0] -&gt; qname?var }
			
 
				-</PRE></TT></DL>
			
 
				-The first statement of this atomic sequence is
			
 
				-an expression without side-effects that
			
 
				-evaluates to a non-zero value only if the
			
 
				-receive operation
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname?0
			
 
				-</PRE></TT></DL>
			
 
				-would have been executable at that
			
 
				-point (i.e., channel
			
 
				-<TT>qname</TT>
			
 
				-contains at least one message and the oldest
			
 
				-message stored consists of one message field
			
 
				-equal to zero).
			
 
				-Any receive statement can be turned into
			
 
				-a side-effect free expression by placing square
			
 
				-brackets around the list of all message parameters.
			
 
				-The channel contents remain undisturbed by the
			
 
				-evaluation of such expressions.
			
 
				-<br>&#32;<br>
			
 
				-Note carefully, however, that in non-atomic sequences
			
 
				-of two statements such as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-!full(qname) -&gt; qname!msgtype
			
 
				-</PRE></TT></DL>
			
 
				-and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname?[msgtype] -&gt; qname?msgtype
			
 
				-</PRE></TT></DL>
			
 
				-the second statement is not necessarily executable
			
 
				-after the first one has been executed.
			
 
				-There may be race conditions when access to the channels
			
 
				-is shared between several processes.
			
 
				-Another process can send a message to the channel
			
 
				-just after this process determined that it was not full,
			
 
				-or another process can steal away the
			
 
				-message just after our process determined its presence.
			
 
				-<br>&#32;<br>
			
 
				-Two other types of send and receive statements are used
			
 
				-less frequently: sorted send and random receive.
			
 
				-A sorted send operation is written with two, instead of one,
			
 
				-exclamation marks, as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname!!msg
			
 
				-</PRE></TT></DL>
			
 
				-A sorted send operation will insert a message into the channel's buffer
			
 
				-in numerical order, instead of in FIFO order.
			
 
				-The channel contents are scanned from the first message towards the
			
 
				-last, and the message is inserted immediately before the first message
			
 
				-that follows it in numerical order.
			
 
				-To determine the numerical order, all message fields are
			
 
				-taken into account.
			
 
				-<br>&#32;<br>
			
 
				-The logical counterpart of the sorted send operation
			
 
				-is the random receive.
			
 
				-It is written with two, instead of one, question marks:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-qname??msg
			
 
				-</PRE></TT></DL>
			
 
				-A random receive operation is executable if it is executable for <I>any</I>
			
 
				-message that is currently buffered in a message channel (instead of
			
 
				-only for the first message in the channel).
			
 
				-Normal send and receive operations can freely be combined with
			
 
				-sorted send and random receive operations.
			
 
				-<H4>Rendezvous Communication
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-So far we have talked about asynchronous communication between processes
			
 
				-via message channels, declared in statements such as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-chan qname = [N] of { byte }
			
 
				-</PRE></TT></DL>
			
 
				-where
			
 
				-<TT>N</TT>
			
 
				-is a positive constant that defines the buffer size.
			
 
				-A logical extension is to allow for the declaration
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-chan port = [0] of { byte }
			
 
				-</PRE></TT></DL>
			
 
				-to define a rendezvous port.
			
 
				-The channel size is zero, that is, the channel
			
 
				-<TT>port</TT>
			
 
				-can pass, but cannot store, messages.
			
 
				-Message interactions via such rendezvous ports are
			
 
				-by definition synchronous.
			
 
				-Consider the following example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#define msgtype 33
			
 
				-
			
 
				-chan name = [0] of { byte, byte };
			
 
				-
			
 
				-active proctype A()
			
 
				-{	name!msgtype(124);
			
 
				-	name!msgtype(121)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype B()
			
 
				-{	byte state;
			
 
				-	name?msgtype(state)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Channel
			
 
				-<TT>name</TT>
			
 
				-is a global rendezvous port.
			
 
				-The two processes will synchronously execute their first statement:
			
 
				-a handshake on message
			
 
				-<TT>msgtype</TT>
			
 
				-and a transfer of the value 124 to local variable
			
 
				-<TT>state</TT>.
			
 
				-The second statement in process
			
 
				-<TT>A</TT>
			
 
				-will be unexecutable,
			
 
				-because there is no matching receive operation in process
			
 
				-<TT>B</TT>.
			
 
				-<br>&#32;<br>
			
 
				-If the channel
			
 
				-<TT>name</TT>
			
 
				-is defined  with a non-zero buffer capacity,
			
 
				-the behavior is different.
			
 
				-If the buffer size is at least 2, the process of type
			
 
				-<TT>A</TT>
			
 
				-can complete its execution, before its peer even starts.
			
 
				-If the buffer size is 1, the sequence of events is as follows.
			
 
				-The process of type
			
 
				-<TT>A</TT>
			
 
				-can complete its first send action, but it blocks on the
			
 
				-second, because the channel is now filled to capacity.
			
 
				-The process of type
			
 
				-<TT>B</TT>
			
 
				-can then retrieve the first message and complete.
			
 
				-At this point
			
 
				-<TT>A</TT>
			
 
				-becomes executable again and completes,
			
 
				-leaving its last message as a residual in the channel.
			
 
				-<br>&#32;<br>
			
 
				-Rendezvous communication is binary: only two processes,
			
 
				-a sender and a receiver, can be synchronized in a
			
 
				-rendezvous handshake.
			
 
				-<br>&#32;<br>
			
 
				-As the example shows, symbolic constants can be defined
			
 
				-with preprocessor macros using
			
 
				-<TT>#define</TT>.
			
 
				-The source text of a <small>PROMELA</small> model is translated by the standard
			
 
				-C preprocessor.
			
 
				-The disadvantage of defining symbolic names in this way is,
			
 
				-however, that the <small>PROMELA</small> parser will only see the expanded text,
			
 
				-and cannot refer to the symbolic names themselves.
			
 
				-To prevent that, <small>PROMELA</small> also supports another way to define
			
 
				-symbolic names, which are preserved in error reports.
			
 
				-For instance, by including the declaration
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mtype = { ack, msg, error, data };
			
 
				-</PRE></TT></DL>
			
 
				-at the top of a <small>PROMELA</small> model, the names provided between the
			
 
				-curly braces are equivalent to integers of type
			
 
				-<TT>byte</TT>,
			
 
				-but known by their symbolic names to the <small>SPIN</small> parser and the
			
 
				-verifiers it generates.
			
 
				-The constant values assigned start at 1, and count up.
			
 
				-There can be only one
			
 
				-<TT>mtype</TT>
			
 
				-declaration per model.
			
 
				-<H4>1.2 Control Flow
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-So far, we have seen only some of the basic statements
			
 
				-of <small>PROMELA</small>, and the way in which they can be combined to
			
 
				-model process behaviors.
			
 
				-The five types of statements we have mentioned are:
			
 
				-<TT>printf</TT>,
			
 
				-<TT>assignment</TT>,
			
 
				-<TT>condition</TT>,
			
 
				-<TT>send</TT>,
			
 
				-and
			
 
				-<TT>receive</TT>.
			
 
				-<br>&#32;<br>
			
 
				-The pseudo-statement
			
 
				-<TT>skip</TT>
			
 
				-is syntactically and semantically equivalent to the
			
 
				-condition
			
 
				-<TT>(1)</TT>
			
 
				-(i.e., to true), and is in fact quietly replaced with this
			
 
				-expression by the lexical analyzer of <small>SPIN</small>.
			
 
				-<br>&#32;<br>
			
 
				-There are also five types of compound statements.
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DD>
			
 
				-*
			
 
				-Atomic sequences (section 1.2.1),
			
 
				-<br>
			
 
				-*
			
 
				-Deterministic steps (section 1.2.2),
			
 
				-<br>
			
 
				-*
			
 
				-Selections (section 1.2.3),
			
 
				-<br>
			
 
				-*
			
 
				-Repetitions (section 1.2.4),
			
 
				-<br>
			
 
				-*
			
 
				-Escape sequences (section 1.2.5).
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<H4>1.2.1 Atomic Sequences
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The simplest compound statement is the
			
 
				-<TT>atomic</TT>
			
 
				-sequence:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-atomic {	/* swap the values of a and b */
			
 
				-	tmp = b;
			
 
				-	b = a;
			
 
				-	a = tmp
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-In the example, the values of two variables
			
 
				-<TT>a</TT>
			
 
				-and
			
 
				-<TT>b</TT>
			
 
				-are swapped in a sequence of statement executions
			
 
				-that is defined to be uninterruptable.
			
 
				-That is, in the interleaving of process executions, no
			
 
				-other process can execute statements from the moment that
			
 
				-the first statement of this sequence begins to execute until
			
 
				-the last one has completed.
			
 
				-<br>&#32;<br>
			
 
				-It is often useful to use
			
 
				-<TT>atomic</TT>
			
 
				-sequences to start a series of processes in such a
			
 
				-way that none of them can start executing statements
			
 
				-until all of them have been initialized:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init {
			
 
				-	atomic {
			
 
				-		run A(1,2);
			
 
				-		run B(2,3);
			
 
				-		run C(3,1)
			
 
				-	}
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Atomic</TT>
			
 
				-sequences may be non-deterministic.
			
 
				-If any statement inside an
			
 
				-<TT>atomic</TT>
			
 
				-sequence is found to be unexecutable, however,
			
 
				-the atomic chain is broken, and another process can take over
			
 
				-control.
			
 
				-When the blocking statement becomes executable later,
			
 
				-control can non-deterministically return to the process,
			
 
				-and the atomic execution of the sequence resumes as if
			
 
				-it had not been interrupted.
			
 
				-<H4>1.2.2 Deterministic Steps
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Another way to define an indivisible sequence of actions
			
 
				-is to use the
			
 
				-<TT>d_step</TT>
			
 
				-statement.
			
 
				-In the above case, for instance, we could also have written:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-d_step {	/* swap the values of a and b */
			
 
				-	tmp = b;
			
 
				-	b = a;
			
 
				-	a = tmp
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The difference between a
			
 
				-<TT>d_step</TT>
			
 
				-sequence
			
 
				-and an
			
 
				-<TT>atomic</TT>
			
 
				-sequence are:
			
 
				-<UL>
			
 
				-<LI>
			
 
				-A
			
 
				-<TT>d_step</TT>
			
 
				-sequence must be completely deterministic.
			
 
				-(If non-determinism is nonetheless encountered,
			
 
				-it is always resolved in a fixed and deterministic
			
 
				-way: i.e., the first true guard in selection or
			
 
				-repetition structures is always selected.)
			
 
				-<LI>
			
 
				-No
			
 
				-<TT>goto</TT>
			
 
				-jumps into or out of a
			
 
				-<TT>d_step</TT>
			
 
				-sequence are permitted.
			
 
				-<LI>
			
 
				-The execution of a
			
 
				-<TT>d_step</TT>
			
 
				-sequence cannot be interrupted when a
			
 
				-blocking statement is encountered.
			
 
				-It is an error if any statement other than
			
 
				-the first one in a
			
 
				-<TT>d_step</TT>
			
 
				-sequence is found to be unexecutable.
			
 
				-<LI>
			
 
				-A
			
 
				-<TT>d_step</TT>
			
 
				-sequence is executed as one single statement.
			
 
				-In a way, it is a mechanism for adding new types
			
 
				-of statements to the language.
			
 
				-</ul>
			
 
				-<br>&#32;<br>
			
 
				-None of the items listed above apply to
			
 
				-<TT>atomic</TT>
			
 
				-sequences.
			
 
				-This means that the keyword
			
 
				-<TT>d_step</TT>
			
 
				-can always be replaced with the keyword
			
 
				-<TT>atomic</TT>,
			
 
				-but the reverse is not true.
			
 
				-(The main, perhaps the only, reason for using
			
 
				-<TT>d_step</TT>
			
 
				-sequences is to improve the efficiency of
			
 
				-verifications.)
			
 
				-<H4>1.2.3 Selection Structures
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A more interesting construct is the selection structure.
			
 
				-Using the relative values of two variables
			
 
				-<TT>a</TT>
			
 
				-and
			
 
				-<TT>b</TT>
			
 
				-to choose between two options, for instance, we can write:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-if
			
 
				-:: (a != b) -&gt; option1
			
 
				-:: (a == b) -&gt; option2
			
 
				-fi
			
 
				-</PRE></TT></DL>
			
 
				-The selection structure above contains two execution sequences,
			
 
				-each preceded by a double colon.
			
 
				-Only one sequence from the list will be executed.
			
 
				-A sequence can be selected only if its first statement is executable.
			
 
				-The first statement is therefore called a <I>guard</I>.
			
 
				-<br>&#32;<br>
			
 
				-In the above example the guards are mutually exclusive, but they
			
 
				-need not be.
			
 
				-If more than one guard is executable, one of the corresponding sequences
			
 
				-is selected nondeterministically.
			
 
				-If all guards are unexecutable the process will block until at least
			
 
				-one of them can be selected.
			
 
				-There is no restriction on the type of statements that can be used
			
 
				-as a guard: it may include sends or receives, assignments,
			
 
				-<TT>printf</TT>,
			
 
				-<TT>skip</TT>,
			
 
				-etc.
			
 
				-The rules of executability determine in each case what the semantics
			
 
				-of the complete selection structure will be.
			
 
				-The following example, for instance, uses receive statements
			
 
				-as guards in a selection.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-mtype = { a, b };
			
 
				-
			
 
				-chan ch = [1] of { byte };
			
 
				-
			
 
				-active proctype A()
			
 
				-{	ch!a
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype B()
			
 
				-{	ch!b
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype C()
			
 
				-{	if
			
 
				-	:: ch?a
			
 
				-	:: ch?b
			
 
				-	fi
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The example defines three processes and one channel.
			
 
				-The first option in the selection structure of the process
			
 
				-of type
			
 
				-<TT>C</TT>
			
 
				-is executable if the channel contains
			
 
				-a message named
			
 
				-<TT>a</TT>,
			
 
				-where
			
 
				-<TT>a</TT>
			
 
				-is a symbolic constant defined in the
			
 
				-<TT>mtype</TT>
			
 
				-declaration at the start of the program.
			
 
				-The second option is executable if it contains a message
			
 
				-<TT>b</TT>,
			
 
				-where, similarly,
			
 
				-<TT>b</TT>
			
 
				-is a symbolic constant.
			
 
				-Which message will be available depends on the unknown
			
 
				-relative speeds of the processes.
			
 
				-<br>&#32;<br>
			
 
				-A process of the following type will either increment
			
 
				-or decrement the value of variable
			
 
				-<TT>count</TT>
			
 
				-once.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-byte count;
			
 
				-
			
 
				-active proctype counter()
			
 
				-{
			
 
				-	if
			
 
				-	:: count++
			
 
				-	:: count--
			
 
				-	fi
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Assignments are always executable, so the choice made
			
 
				-here is truly a non-deterministic one that is independent
			
 
				-of the initial value of the variable (zero in this case).
			
 
				-<H4>1.2.4 Repetition Structures
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-We can modify the above program as follows, to obtain
			
 
				-a cyclic program that randomly changes the value of
			
 
				-the variable up or down, by replacing the selection
			
 
				-structure with a repetition.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-byte count;
			
 
				-
			
 
				-active proctype counter()
			
 
				-{
			
 
				-	do
			
 
				-	:: count++
			
 
				-	:: count--
			
 
				-	:: (count == 0) -&gt; break
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Only one option can be selected for execution at a time.
			
 
				-After the option completes, the execution of the structure
			
 
				-is repeated.
			
 
				-The normal way to terminate the repetition structure is
			
 
				-with a
			
 
				-<TT>break</TT>
			
 
				-statement.
			
 
				-In the example, the loop can be
			
 
				-broken only when the count reaches zero.
			
 
				-Note, however, that it need not terminate since the other
			
 
				-two options remain executable.
			
 
				-To force termination we could modify the program as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype counter()
			
 
				-{
			
 
				-	do
			
 
				-	:: (count != 0) -&gt;
			
 
				-		if
			
 
				-		:: count++
			
 
				-		:: count--
			
 
				-		fi
			
 
				-	:: (count == 0) -&gt; break
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-A special type of statement that is useful in selection
			
 
				-and repetition structures is the
			
 
				-<TT>else</TT>
			
 
				-statement.
			
 
				-An
			
 
				-<TT>else</TT>
			
 
				-statement becomes executable only if no other statement
			
 
				-within the same process, at the same control-flow point,
			
 
				-is executable.
			
 
				-We could try to use it in two places in the above example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype counter()
			
 
				-{
			
 
				-	do
			
 
				-	:: (count != 0) -&gt;
			
 
				-		if
			
 
				-		:: count++
			
 
				-		:: count--
			
 
				-		:: else
			
 
				-		fi
			
 
				-	:: else -&gt; break
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The first
			
 
				-<TT>else</TT>,
			
 
				-inside the nested selection structure, can never become
			
 
				-executable though, and is therefore redundant (both alternative
			
 
				-guards of the selection are assignments, which are always
			
 
				-executable).
			
 
				-The second usage of the
			
 
				-<TT>else</TT>,
			
 
				-however, becomes executable exactly when
			
 
				-<TT>!(count != 0)</TT>
			
 
				-or
			
 
				-<TT>(count == 0)</TT>,
			
 
				-and is therefore equivalent to the latter to break from the loop.
			
 
				-<br>&#32;<br>
			
 
				-There is also an alternative way to exit the do-loop, without
			
 
				-using a
			
 
				-<TT>break</TT>
			
 
				-statement:  the infamous
			
 
				-<TT>goto</TT>.
			
 
				-This is illustrated in the following implementation of
			
 
				-Euclid's algorithm for finding the greatest common divisor
			
 
				-of two non-zero, positive numbers:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype Euclid(int x, y)
			
 
				-{
			
 
				-	do
			
 
				-	:: (x &gt;  y) -&gt; x = x - y
			
 
				-	:: (x &lt;  y) -&gt; y = y - x
			
 
				-	:: (x == y) -&gt; goto done
			
 
				-	od;
			
 
				-done:
			
 
				-	skip
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init { run Euclid(36, 12) }
			
 
				-</PRE></TT></DL>
			
 
				-The
			
 
				-<TT>goto</TT>
			
 
				-in this example jumps to a label named
			
 
				-<TT>done</TT>.
			
 
				-Since a label can only appear before a statement,
			
 
				-we have added the dummy statement
			
 
				-<TT>skip</TT>.
			
 
				-Like a
			
 
				-<TT>skip</TT>,
			
 
				-a
			
 
				-<TT>goto</TT>
			
 
				-statement is always executable and has no other
			
 
				-effect than to change the control-flow point
			
 
				-of the process that executes it.
			
 
				-<br>&#32;<br>
			
 
				-As a final example, consider the following implementation of
			
 
				-a Dijkstra semaphore, which is implemented with the help of
			
 
				-a synchronous channel.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#define p	0
			
 
				-#define v	1
			
 
				-
			
 
				-chan sema = [0] of { bit };
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype Dijkstra()
			
 
				-{	byte count = 1;
			
 
				-
			
 
				-	do
			
 
				-	:: (count == 1) -&gt;
			
 
				-		sema!p; count = 0
			
 
				-	:: (count == 0) -&gt;
			
 
				-		sema?v; count = 1
			
 
				-	od	
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active [3] proctype user()
			
 
				-{	do
			
 
				-	:: sema?p;
			
 
				-	   /* critical section */
			
 
				-	   sema!v;
			
 
				-	   /* non-critical section */
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The semaphore guarantees that only one of the three user processes
			
 
				-can enter its critical section at a time.
			
 
				-It does not necessarily prevent the monopolization of
			
 
				-the access to the critical section by one of the processes.
			
 
				-<br>&#32;<br>
			
 
				-<small>PROMELA</small> does not have a mechanism for defining functions or
			
 
				-procedures.  Where necessary, though, these may be
			
 
				-modeled with the help of additional processes.
			
 
				-The return value of a function, for instance, can be passed
			
 
				-back to the calling process via global variables or messages.
			
 
				-The following program illustrates this by recursively
			
 
				-calculating the factorial of a number
			
 
				-<TT>n</TT>.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype fact(int n; chan p)
			
 
				-{	chan child = [1] of { int };
			
 
				-	int result;
			
 
				-
			
 
				-	if
			
 
				-	:: (n &lt;= 1) -&gt; p!1
			
 
				-	:: (n &gt;= 2) -&gt;
			
 
				-		run fact(n-1, child);
			
 
				-		child?result;
			
 
				-		p!n*result
			
 
				-	fi
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-init
			
 
				-{	chan child = [1] of { int };
			
 
				-	int result;
			
 
				-
			
 
				-	run fact(7, child);
			
 
				-	child?result;
			
 
				-	printf("result: %d\n", result)
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Each process creates a private channel and uses it
			
 
				-to communicate with its direct descendant.
			
 
				-There are no input statements in <small>PROMELA</small>.
			
 
				-The reason is that models must always be complete to
			
 
				-allow for logical verifications, and input statements
			
 
				-would leave at least the source of some information unspecified.
			
 
				-A way to read input
			
 
				-would presuppose a source of information that is not
			
 
				-part of the model.
			
 
				-<br>&#32;<br>
			
 
				-We have already discussed a few special types of statement:
			
 
				-<TT>skip</TT>,
			
 
				-<TT>break</TT>,
			
 
				-and
			
 
				-<TT>else</TT>.
			
 
				-Another statement in this class is the
			
 
				-<TT>timeout</TT>.
			
 
				-The
			
 
				-<TT>timeout</TT>
			
 
				-is comparable to a system level
			
 
				-<TT>else</TT>
			
 
				-statement: it becomes executable if and only if no other
			
 
				-statement in any of the processes is executable.
			
 
				-<TT>Timeout</TT>
			
 
				-is a modeling feature that provides for an escape from a
			
 
				-potential deadlock state.
			
 
				-The
			
 
				-<TT>timeout</TT>
			
 
				-takes no parameters, because the types of properties we
			
 
				-would like to prove for <small>PROMELA</small> models must be proven independent
			
 
				-of all absolute and relative timing considerations.
			
 
				-In particular, the relative speeds of processes can never be
			
 
				-known with certainty in an asynchronous system.
			
 
				-<H4>1.2.5 Escape Sequences
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The last type of compound structure to be discussed is the
			
 
				-<TT>unless</TT>
			
 
				-statement.
			
 
				-It is used as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-{ P } unless { E }
			
 
				-</PRE></TT></DL>
			
 
				-where the letters
			
 
				-<TT>P</TT>
			
 
				-and
			
 
				-<TT>E</TT>
			
 
				-represent arbitrary <small>PROMELA</small> fragments.
			
 
				-Execution of the
			
 
				-<TT>unless</TT>
			
 
				-statement begins with the execution of statements from
			
 
				-<TT>P</TT>.
			
 
				-Before each statement execution in
			
 
				-<TT>P</TT>
			
 
				-the executability of the first statement of
			
 
				-<TT>E</TT>
			
 
				-is checked, using the normal <small>PROMELA</small> semantics of executability.
			
 
				-Execution of statements from
			
 
				-<TT>P</TT>
			
 
				-proceeds only while the first statement of
			
 
				-<TT>E</TT>
			
 
				-remains unexecutable.
			
 
				-The first time that this `guard of the escape sequence'
			
 
				-is found to be executable, control changes to it,
			
 
				-and execution continues as defined for
			
 
				-<TT>E</TT>.
			
 
				-Individual statement executions remain indivisible,
			
 
				-so control can only change from inside
			
 
				-<TT>P</TT>
			
 
				-to the start of
			
 
				-<TT>E</TT>
			
 
				-in between individual statement executions.
			
 
				-If the guard of the escape sequence
			
 
				-does not become executable during the
			
 
				-execution of
			
 
				-<TT>P</TT>,
			
 
				-then it is skipped entirely when
			
 
				-<TT>P</TT>
			
 
				-terminates.
			
 
				-<br>&#32;<br>
			
 
				-An example of the use of escape sequences is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-A;
			
 
				-do
			
 
				-:: b1 -&gt; B1
			
 
				-:: b2 -&gt; B2
			
 
				-...
			
 
				-od
			
 
				-unless { c -&gt; C };
			
 
				-D
			
 
				-</PRE></TT></DL>
			
 
				-As shown in the example, the curly braces around the main sequence
			
 
				-(or the escape sequence) can be deleted if there can be no confusion
			
 
				-about which statements belong to those sequences.
			
 
				-In the example, condition
			
 
				-<TT>c</TT>
			
 
				-acts as a watchdog on the repetition construct from the main sequence.
			
 
				-Note that this is not necessarily equivalent to the construct
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-A;
			
 
				-do
			
 
				-:: b1 -&gt; B1
			
 
				-:: b2 -&gt; B2
			
 
				-...
			
 
				-:: c -&gt; break
			
 
				-od;
			
 
				-C; D
			
 
				-</PRE></TT></DL>
			
 
				-if
			
 
				-<TT>B1</TT>
			
 
				-or
			
 
				-<TT>B2</TT>
			
 
				-are non-empty.
			
 
				-In the first version of the example, execution of the iteration can
			
 
				-be interrupted at <I>any</I> point inside each option sequence.
			
 
				-In the second version, execution can only be interrupted at the
			
 
				-start of the option sequences.
			
 
				-<H4>1.3 Correctness Properties
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-There are three ways to express correctness properties in <small>PROMELA</small>,
			
 
				-using:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DD>
			
 
				-<br>
			
 
				-*
			
 
				-Assertions (section 1.3.1),
			
 
				-<br>
			
 
				-*
			
 
				-Special labels (section 1.3.2),
			
 
				-<br>
			
 
				-*
			
 
				-<TT>Never</TT>
			
 
				-claims (section 1.3.3).
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<H4>1.3.1 Assertions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Statements of the form
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-assert(expression)
			
 
				-</PRE></TT></DL>
			
 
				-are always executable.
			
 
				-If the expression evaluates to a non-zero value (i.e., the
			
 
				-corresponding condition holds), the statement has no effect
			
 
				-when executed.
			
 
				-The correctness property expressed, though, is that it is
			
 
				-impossible for the expression to evaluate to zero (i.e., for
			
 
				-the condition to be false).
			
 
				-A failing assertion will cause execution to be aborted.
			
 
				-<H4>1.3.2 Special Labels
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Labels in a <small>PROMELA</small> specification ordinarily serve as
			
 
				-targets for unconditional
			
 
				-<TT>goto</TT>
			
 
				-jumps, as usual.
			
 
				-There are, however, also three types of labels that
			
 
				-have a special meaning to the verifier.
			
 
				-We discuss them in the next three subsections.
			
 
				-<H4>1.3.2.1 End-State Labels
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-When a <small>PROMELA</small> model is checked for reachable deadlock states
			
 
				-by the verifier, it must be able to distinguish valid <I>end state</I>s
			
 
				-from invalid ones.
			
 
				-By default, the only valid end states are those in which
			
 
				-every <small>PROMELA</small> process that was instantiated has reached the end of
			
 
				-its code.
			
 
				-Not all <small>PROMELA</small> processes, however, are meant to reach the
			
 
				-end of their code.
			
 
				-Some may very well linger in a known wait
			
 
				-state, or they may sit patiently in a loop
			
 
				-ready to spring into action when new input arrives.
			
 
				-<br>&#32;<br>
			
 
				-To make it clear to the verifier that these alternate end states
			
 
				-are also valid, we can define special end-state labels.
			
 
				-We can do so, for instance, in the process type
			
 
				-<TT>Dijkstra</TT>,
			
 
				-from an earlier example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype Dijkstra()
			
 
				-{	byte count = 1;
			
 
				-
			
 
				-end:	do
			
 
				-	:: (count == 1) -&gt;
			
 
				-		sema!p; count = 0
			
 
				-	:: (count == 0) -&gt;
			
 
				-		sema?v; count = 1
			
 
				-	od	
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The label
			
 
				-<TT>end</TT>
			
 
				-defines that it is not an error if, at the end of an
			
 
				-execution sequence, a process of this type
			
 
				-has not reached its closing curly brace, but waits at the label.
			
 
				-Of course, such a state could still be part of a deadlock state, but
			
 
				-if so, it is not caused by this particular process.
			
 
				-<br>&#32;<br>
			
 
				-There may be more than one end-state label per <small>PROMELA</small> model.
			
 
				-If so, all labels that occur within the same process body must
			
 
				-be unique.
			
 
				-The rule is that every label name with the prefix
			
 
				-<TT>end</TT>
			
 
				-is taken to be an end-state label.
			
 
				-<H4>1.3.2.2 Progress-State Labels
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-In the same spirit, <small>PROMELA</small> also allows for the definition of
			
 
				-<TT>progress</TT>
			
 
				-labels.
			
 
				-Passing a progress label during an execution is interpreted
			
 
				-as a good thing:  the process is not just idling while
			
 
				-waiting for things to happen elsewhere, but is making
			
 
				-effective progress in its execution.
			
 
				-The implicit correctness property expressed here is that any
			
 
				-infinite execution cycle allowed by the model that does not
			
 
				-pass through at least one of these progress labels is a
			
 
				-potential starvation loop.
			
 
				-In the
			
 
				-<TT>Dijkstra</TT>
			
 
				-example, for instance, we can label the
			
 
				-successful passing of a semaphore test as progress and
			
 
				-ask a verifier to make sure that there is no cycle elsewhere
			
 
				-in the system.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-proctype Dijkstra()
			
 
				-{	byte count = 1;
			
 
				-
			
 
				-end:	do
			
 
				-	:: (count == 1) -&gt;
			
 
				-progress:	sema!p; count = 0
			
 
				-	:: (count == 0) -&gt;
			
 
				-		sema?v; count = 1
			
 
				-	od	
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-If more than one state carries a progress label,
			
 
				-variations with a common prefix are again valid.
			
 
				-<H4>1.3.2.3 Accept-State Labels
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The last type of label, the accept-state label, is used
			
 
				-primarily in combination with
			
 
				-<TT>never</TT>
			
 
				-claims.
			
 
				-Briefly, by labeling a state with any label starting
			
 
				-with the prefix
			
 
				-<TT>accept</TT>
			
 
				-we can ask the verifier to find all cycles that <I>do</I>
			
 
				-pass through at least one of those labels.
			
 
				-The implicit correctness claim is that this cannot happen.
			
 
				-The primary place where accept labels are used is inside
			
 
				-<TT>never</TT>
			
 
				-claims.
			
 
				-We discuss
			
 
				-<TT>never</TT>
			
 
				-claims next.
			
 
				-<H4>1.3.3 Never Claims
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Up to this point we have talked about the specification
			
 
				-of correctness criteria with assertions
			
 
				-and with three special types of labels.
			
 
				-Powerful types of correctness criteria can already
			
 
				-be expressed with these tools, yet so far our only option is
			
 
				-to add them to individual
			
 
				-<TT>proctype</TT>
			
 
				-declarations.
			
 
				-We can, for instance, express the claim ``every system state
			
 
				-in which property
			
 
				-<TT>P</TT>
			
 
				-is true eventually leads to a system state in which property
			
 
				-<TT>Q</TT>
			
 
				-is true,'' with an extra monitor process, such as:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype monitor()
			
 
				-{
			
 
				-progress:
			
 
				-	do
			
 
				-	:: P -&gt; Q
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-If we require that property
			
 
				-<TT>P</TT>
			
 
				-must <I>remain</I> true while we are waiting
			
 
				-<TT>Q</TT>
			
 
				-to become true, we can try to change this to:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-active proctype monitor()
			
 
				-{
			
 
				-progress:
			
 
				-	do
			
 
				-	:: P -&gt; assert(P || Q)
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-but this does not quite do the job.
			
 
				-Note that we cannot make any assumptions about the
			
 
				-relative execution speeds of processes in a <small>PROMELA</small> model.
			
 
				-This means that if in the remainder of the system the
			
 
				-property
			
 
				-<TT>P</TT>
			
 
				-becomes true, we can move to the state just before the
			
 
				-<TT>assert</TT>,
			
 
				-and wait there for an unknown amount of time (anything between
			
 
				-a zero delay and an infinite delay is possible here, since
			
 
				-no other synchronizations apply).
			
 
				-If
			
 
				-<TT>Q</TT>
			
 
				-becomes true, we may pass the assertion, but we need not
			
 
				-do so.
			
 
				-Even if
			
 
				-<TT>P</TT>
			
 
				-becomes false only <I>after</I>
			
 
				-<TT>Q</TT>
			
 
				-has become true, we may still fail the assertion,
			
 
				-as long as there exists some later state where neither
			
 
				-<TT>P</TT>
			
 
				-nor
			
 
				-<TT>Q</TT>
			
 
				-is true.
			
 
				-This is clearly unsatisfactory, and we need another mechanism
			
 
				-to express these important types of liveness properties.
			
 
				-<H4>The Connection with Temporal Logic
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A general way to express system properties of the type we
			
 
				-have just discussed is to use linear time temporal logic (LTL)
			
 
				-formulae.
			
 
				-Every <small>PROMELA</small> expression is automatically also a valid LTL formula.
			
 
				-An LTL formula can also contain the unary temporal operators &#164;
			
 
				-(pronounced always), &#186; (pronounced eventually), and
			
 
				-two binary temporal operators
			
 
				-<TT>U</TT>
			
 
				-(pronounced weak until) and
			
 
				-<B><I>U</I></B>
			
 
				-(pronounced strong until).
			
 
				-<br>&#32;<br>
			
 
				-Where the value of a <small>PROMELA</small> expression without temporal operators can be
			
 
				-defined uniquely for individual system states, without further context,
			
 
				-the truth value of an LTL formula is defined for sequences of states:
			
 
				-specifically, it is defined for the first state of a given infinite
			
 
				-sequence of system states (a trace).
			
 
				-Given, for instance, the sequence of system states:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-s0;s1;s2;...
			
 
				-</PRE></TT></DL>
			
 
				-the LTL formula
			
 
				-<TT>pUq</TT>,
			
 
				-with
			
 
				-<TT>p</TT>
			
 
				-and
			
 
				-<TT>q</TT>
			
 
				-standard <small>PROMELA</small> expressions, is true for
			
 
				-<TT>s0</TT>
			
 
				-either if
			
 
				-<TT>q</TT>
			
 
				-is true in
			
 
				-<TT>s0</TT>,
			
 
				-or if
			
 
				-<TT>p</TT>
			
 
				-is true in
			
 
				-<TT>s0</TT>
			
 
				-and
			
 
				-<TT>pUq</TT>
			
 
				-holds for the remainder of the sequence after
			
 
				-<TT>s0</TT>.
			
 
				-<br>&#32;<br>
			
 
				-Informally,
			
 
				-<TT>pUq</TT>
			
 
				-says that
			
 
				-<TT>p</TT>
			
 
				-is required to hold at least until
			
 
				-<TT>q</TT>
			
 
				-becomes true.
			
 
				-If, instead, we would write <TT>p</TT><B><I>U</I></B><TT>q</TT>,
			
 
				-then we also require that there exists at least
			
 
				-one state in the sequence where
			
 
				-<TT>q</TT>
			
 
				-does indeed become true.
			
 
				-<br>&#32;<br>
			
 
				-The temporal operators &#164; and &#186;
			
 
				-can be defined in terms of the strong until operator
			
 
				-<B><I>U</I></B>,
			
 
				-as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&#164; p = !&#186; !p = !(true <B><I>U</I></B><TT> !p)
			
 
				-</PRE></TT></DL>
			
 
				-Informally, &#164;
			
 
				-</TT><TT>p</TT><TT>
			
 
				-says that property
			
 
				-</TT><TT>p</TT><TT>
			
 
				-must hold in all states of a trace, and &#186;
			
 
				-</TT><TT>p</TT><TT>
			
 
				-says that
			
 
				-</TT><TT>p</TT><TT>
			
 
				-holds in at least one state of the trace.
			
 
				-</TT><br>&#32;<br>
			
 
				-To express our original example requirement: ``every system state
			
 
				-in which property
			
 
				-<TT>P</TT>
			
 
				-is true eventually leads to a system state in which property
			
 
				-<TT>Q</TT>
			
 
				-is true,''
			
 
				-we can write the LTL formula:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&#164; (P -&gt; &#186; Q)
			
 
				-</PRE></TT></DL>
			
 
				-where the logical implication symbol
			
 
				-<TT>-&gt;</TT>
			
 
				-is defined in the usual way as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-P =&gt; Q means !P || Q
			
 
				-</PRE></TT></DL>
			
 
				-<H4>Mapping LTL Formulae onto Never Claims
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<small>PROMELA</small> does not include syntax for specifying LTL formulae
			
 
				-directly, but it relies on the fact that every such
			
 
				-formula can be translated into a special type of
			
 
				-automaton, known as a B&uuml;chi automaton.
			
 
				-In the syntax of <small>PROMELA</small> this automaton is called a
			
 
				-<TT>never</TT>
			
 
				-claim.
			
 
				-If you don't care too much about the details of
			
 
				-<TT>never</TT>
			
 
				-claims, you can skip the remainder of this section and
			
 
				-simple remember that <small>SPIN</small> can convert any LTL formula
			
 
				-automatically into the proper never claim syntax with
			
 
				-the command:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -f "...formula..."
			
 
				-</PRE></TT></DL>
			
 
				-Here are the details.
			
 
				-The syntax of a never claim is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	...
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-where the dots can contain any <small>PROMELA</small> fragment, including
			
 
				-arbitrary repetition, selection, unless constructs,
			
 
				-jumps, etc.
			
 
				-<br>&#32;<br>
			
 
				-There is an important difference in semantics between a
			
 
				-<TT>proctype</TT>
			
 
				-declaration and a
			
 
				-<TT>never</TT>
			
 
				-claim.
			
 
				-Every statement inside a
			
 
				-<TT>never</TT>
			
 
				-claim is interpreted as a proposition, i.e., a condition.
			
 
				-A
			
 
				-<TT>never</TT>
			
 
				-claim should therefore only contain expressions and never
			
 
				-statements that can have side-effects (assignments, sends or
			
 
				-receives, run-statements, etc.)
			
 
				-<br>&#32;<br>
			
 
				-<TT>Never</TT>
			
 
				-claims are used to express behaviors that are considered
			
 
				-undesirable or illegal.
			
 
				-We say that a
			
 
				-<TT>never</TT>
			
 
				-claim is `matched' if the undesirable behavior can be realized,
			
 
				-contrary to the claim, and thus the correctness requirement violated.
			
 
				-The claims are evaluated over system executions, that is, the
			
 
				-propositions that are listed in the claim are evaluated over the
			
 
				-traces from the remainder of the system.
			
 
				-The claim, therefore, should not alter that behavior: it merely
			
 
				-monitors it.
			
 
				-Every time that the system reaches a new state, by asynchronously
			
 
				-executing statements from the model, the claim will evaluate the
			
 
				-appropriate propositions to determine if a counter-example can
			
 
				-be constructed to the implicit LTL formula that is specified.
			
 
				-<br>&#32;<br>
			
 
				-Since LTL formulae are only defined for infinite executions,
			
 
				-the behavior of a
			
 
				-<TT>never</TT>
			
 
				-claim can only be matched by an infinite system execution.
			
 
				-This by itself would restrict us to the use of progress labels
			
 
				-and accept labels as the only means we have discussed so far
			
 
				-for expressing properties of infinite behaviors.
			
 
				-To conform to standard omega automata theory, the behaviors of
			
 
				-<TT>never</TT>
			
 
				-claims are expressed exclusively with
			
 
				-<TT>accept</TT>
			
 
				-labels (never with
			
 
				-<TT>progress</TT>
			
 
				-labels).
			
 
				-To match a claim, therefore, an infinite sequence of true propositions
			
 
				-must exist, at least one of which is labeled with an
			
 
				-<TT>accept</TT>
			
 
				-label (inside the never claim).
			
 
				-<br>&#32;<br>
			
 
				-Since <small>PROMELA</small> models can also express terminating system behaviors,
			
 
				-we have to define the semantics of the
			
 
				-<TT>never</TT>
			
 
				-claims also for those behaviors.
			
 
				-To facilitate this, it is defined that a
			
 
				-<TT>never</TT>
			
 
				-claim can also be matched when it reaches its closing curly brace
			
 
				-(i.e., when it appears to terminate).
			
 
				-This semantics is based on what is usually referred to as a `stuttering
			
 
				-semantics.'
			
 
				-With stuttering semantics, any terminating execution can be extended
			
 
				-into an equivalent infinite execution (for the purposes of evaluating
			
 
				-LTL properties) by repeating (stuttering) the final state infinitely often.
			
 
				-As a syntactical convenience, the final state of a
			
 
				-<TT>never</TT>
			
 
				-claim is defined to be accepting, i.e., it could be replaced with
			
 
				-the explicit repetition construct:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-accept: do :: skip od
			
 
				-</PRE></TT></DL>
			
 
				-Every process behavior, similarly, is (for the purposes of evaluating the
			
 
				-<TT>never</TT>
			
 
				-claims) thought to be extended with a dummy self-loop on all final states:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-	do :: skip od
			
 
				-</PRE></TT></DL>
			
 
				-(Note the
			
 
				-<TT>accept</TT>
			
 
				-labels only occur in the
			
 
				-<TT>never</TT>
			
 
				-claim, not in the system.)
			
 
				-<H4>The Semantics of a Never Claim
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>Never</TT>
			
 
				-claims are probably the hardest part of the language to understand,
			
 
				-so it is worth spending a few extra words on them.
			
 
				-On an initial reading, feel free to skip the remainder of this
			
 
				-section.
			
 
				-<br>&#32;<br>
			
 
				-The difference between a
			
 
				-<TT>never</TT>
			
 
				-claim and the remainder of a <small>PROMELA</small> system can be explained
			
 
				-as follows.
			
 
				-A <small>PROMELA</small> model defines an asynchronous interleaving product of the
			
 
				-behaviors of individual processes.
			
 
				-Given an arbitrary system state, its successor states are
			
 
				-conceptually obtained in two steps.
			
 
				-In a first step, all the executable statements in the
			
 
				-individual processes are identified.
			
 
				-In a second step, each one of these statements is executed,
			
 
				-each one producing one potential successor for the current state.
			
 
				-The complete system behavior is thus defined recursively and
			
 
				-represents all possible interleavings of the individual process behaviors.
			
 
				-It is this asynchronous product machine that we call the `global
			
 
				-system behavior'.
			
 
				-<br>&#32;<br>
			
 
				-The addition of a
			
 
				-<TT>never</TT>
			
 
				-claim defines a <I>synchronous</I> product of the global system behavior
			
 
				-with the behavior expressed in the claim.
			
 
				-This synchronous product can be thought of as the construction of a
			
 
				-new global state machine, in which every state is defined as a pair
			
 
				-<TT>(s,n)</TT>
			
 
				-with
			
 
				-<TT>s</TT>
			
 
				-a state from the global system (the asynchronous product of processes), and
			
 
				-<TT>n</TT>
			
 
				-a state from the claim.
			
 
				-Every transition in the new global machine is similarly defined by a pair
			
 
				-of transitions, with the first element a statement from the system, and the
			
 
				-second a proposition from the claim.
			
 
				-In other words, every transition in this final synchronous product is
			
 
				-defined as a joint transition of the system and the claim.
			
 
				-Of course, that transition can only occur if the proposition from the
			
 
				-second half of the transition pair evaluates to true in the current state
			
 
				-of the system (the first half of the state pair).
			
 
				-<H4>Examples
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-To manually translate an LTL formula into a
			
 
				-<TT>never</TT>
			
 
				-claim (e.g. foregoing the builtin translation that <small>SPIN</small>
			
 
				-offers), we must carefully consider whether the
			
 
				-formula expresses a positive or a negative property.
			
 
				-A positive property expresses a good behavior that we
			
 
				-would like our system to have.
			
 
				-A negative property expresses a bad behavior that we
			
 
				-claim the system does not have.
			
 
				-A
			
 
				-<TT>never</TT>
			
 
				-claim can express only negative claims, not positive ones.
			
 
				-Fortunately, the two are exchangeable:  if we want to express
			
 
				-that a good behavior is unavoidable, we can formalize all
			
 
				-ways in which the good behavior could be violated, and express
			
 
				-that in the
			
 
				-<TT>never</TT>
			
 
				-claim.
			
 
				-<br>&#32;<br>
			
 
				-Suppose that the LTL formula &#186;&#164;
			
 
				-<TT>p</TT>,
			
 
				-with
			
 
				-<TT>p</TT>
			
 
				-a <small>PROMELA</small> expression, expresses a negative claim
			
 
				-(i.e., it is considered a correctness violation if
			
 
				-there exists any execution sequence in which
			
 
				-<TT>p</TT>
			
 
				-can eventually remain true infinitely long).
			
 
				-This can be written in a
			
 
				-<TT>never</TT>
			
 
				-claim as:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {	/* &lt;&gt;[]p */
			
 
				-	do
			
 
				-	:: skip	/* after an arbitrarily long prefix */
			
 
				-	:: p -&gt; break	/* p becomes true */
			
 
				-	od;
			
 
				-accept:	do
			
 
				-	:: p	/* and remains true forever after */
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note that in this case the claim does not terminate, and
			
 
				-also does not necessarily match all system behaviors.
			
 
				-It is sufficient if it precisely captures all violations
			
 
				-of our correctness requirement, and no more.
			
 
				-<br>&#32;<br>
			
 
				-If the LTL formula expressed a positive property, we first
			
 
				-have to invert it to the corresponding negative property
			
 
				-<TT>&#186;!p</TT>
			
 
				-and translate that into a
			
 
				-<TT>never</TT>
			
 
				-claim.
			
 
				-The requirement now says that it is a violation if
			
 
				-<TT>p</TT>
			
 
				-does not hold infinitely long.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {	/* &lt;&gt;!p*/
			
 
				-	do
			
 
				-	:: skip
			
 
				-	:: !p -&gt; break
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-We have used the implicit match of a claim upon reaching the
			
 
				-closing terminating brace.
			
 
				-Since the first violation of the property suffices to disprove
			
 
				-it, we could also have written:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {	/* &lt;&gt;!p*/
			
 
				-	do
			
 
				-	:: p
			
 
				-	:: !p -&gt; break
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-or, if we abandon the connection with LTL for a moment,
			
 
				-even more tersely as:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never { do :: assert(p) od }
			
 
				-</PRE></TT></DL>
			
 
				-Suppose we wish to express that it is a violation of our
			
 
				-correctness requirements if there exists any execution in
			
 
				-the system where
			
 
				-<TT>&#164; (p -&gt; &#186; q)</TT>
			
 
				-is violated (i.e., the negation of this formula is satisfied).
			
 
				-The following
			
 
				-<TT>never</TT>
			
 
				-claim expresses that property:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	do
			
 
				-	:: skip
			
 
				-	:: p &amp;&amp; !q -&gt; break
			
 
				-	od;
			
 
				-accept:
			
 
				-	do
			
 
				-	:: !q
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note that using
			
 
				-<TT>(!p || q)</TT>
			
 
				-instead of
			
 
				-<TT>skip</TT>
			
 
				-in the first repetition construct would imply a check for just
			
 
				-the first occurrence of proposition
			
 
				-<TT>p</TT>
			
 
				-becoming true in the execution sequence, while
			
 
				-<TT>q</TT>
			
 
				-is false.
			
 
				-The above formalization checks for all occurrences, anywhere in a trace.
			
 
				-<br>&#32;<br>
			
 
				-Finally, consider a formalization of the LTL property
			
 
				-<TT>&#164; (p -&gt; (q U r))</TT>.
			
 
				-The corresponding claim is:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	do
			
 
				-	:: skip		/* to match any occurrence */
			
 
				-	:: p &amp;&amp;  q &amp;&amp; !r -&gt; break
			
 
				-	:: p &amp;&amp; !q &amp;&amp; !r -&gt; goto error
			
 
				-	od;
			
 
				-	do
			
 
				-	::  q &amp;&amp; !r
			
 
				-	:: !q &amp;&amp; !r -&gt; break
			
 
				-	od;
			
 
				-error:	skip
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-Note again the use of
			
 
				-<TT>skip</TT>
			
 
				-instead of
			
 
				-<TT>(!p || r)</TT>
			
 
				-to avoid matching just the first occurrence of
			
 
				-<TT>(p && !r)</TT>
			
 
				-in a trace.
			
 
				-<H4>1.4 Predefined Variables and Functions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The following predefined variables and functions
			
 
				-can be especially useful in
			
 
				-<TT>never</TT>
			
 
				-claims.
			
 
				-<br>&#32;<br>
			
 
				-The predefined variables are:
			
 
				-<TT>_pid</TT>
			
 
				-and
			
 
				-<TT>_last</TT>.
			
 
				-<br>&#32;<br>
			
 
				-<TT>_pid</TT>
			
 
				-is a predefined local variable in each process
			
 
				-that holds the unique instantiation number for
			
 
				-that process.
			
 
				-It is always a non-negative number.
			
 
				-<br>&#32;<br>
			
 
				-<TT>_last</TT>
			
 
				-is a predefined global variable that always holds the
			
 
				-instantiation number of the process that performed the last
			
 
				-step in the current execution sequence.
			
 
				-Its value is not part of the system state unless it is
			
 
				-explicitly used in a specification.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	/* it is not possible for the process with pid=1
			
 
				-	 * to execute precisely every other step forever
			
 
				-	 */
			
 
				-accept:
			
 
				-	do
			
 
				-	:: _last != 1 -&gt; _last == 1
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The initial value of
			
 
				-<TT>_last</TT>
			
 
				-is zero.
			
 
				-<br>&#32;<br>
			
 
				-Three predefined functions are specifically intended to be used in
			
 
				-<TT>never</TT>
			
 
				-claims, and may not be used elsewhere in a model:
			
 
				-<TT>pc_value(pid)</TT>,
			
 
				-<TT>enabled(pid)</TT>,
			
 
				-<TT>procname[pid]@label</TT>.
			
 
				-<br>&#32;<br>
			
 
				-The function
			
 
				-<TT>pc_value(pid)</TT>
			
 
				-returns the current control state
			
 
				-of the process with instantiation number
			
 
				-<TT>pid</TT>,
			
 
				-or zero if no such process exists.
			
 
				-<br>&#32;<br>
			
 
				-Example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	/* Whimsical use: claim that it is impossible
			
 
				-	 * for process 1 to remain in the same control
			
 
				-	 * state as process 2, or one with smaller value.
			
 
				-	 */
			
 
				-accept:	do
			
 
				-	:: pc_value(1) &lt;= pc_value(2)
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The function
			
 
				-<TT>enabled(pid)</TT>
			
 
				-tells whether the process with instantiation number
			
 
				-<TT>pid</TT>
			
 
				-has an executable statement that it can execute next.
			
 
				-<br>&#32;<br>
			
 
				-Example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-never {
			
 
				-	/* it is not possible for the process with pid=1
			
 
				-	 * to remain enabled without ever executing
			
 
				-	 */
			
 
				-accept:
			
 
				-	do
			
 
				-	:: _last != 1 &amp;&amp; enabled(1)
			
 
				-	od
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-The last function
			
 
				-<TT>procname[pid]@label</TT>
			
 
				-tells whether the process with instantiation number
			
 
				-<TT>pid</TT>
			
 
				-is currently in the state labeled with
			
 
				-<TT>label</TT>
			
 
				-in
			
 
				-<TT>proctype procname</TT>.
			
 
				-It is an error if the process referred to is not an instantiation
			
 
				-of that proctype.
			
 
				-<H4>2 Verifications with <small>SPIN</small>
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The easiest way to use <small>SPIN</small> is probably on a Windows terminal
			
 
				-with the Tcl/Tk implementation of <small>XSPIN</small>.
			
 
				-All functionality of <small>SPIN</small>, however, is accessible from
			
 
				-any plain ASCII terminal, and there is something to be
			
 
				-said for directly interacting with the tool itself.
			
 
				-<br>&#32;<br>
			
 
				-The description in this paper gives a short walk-through of
			
 
				-a common mode of operation in using the verifier.
			
 
				-A more tutorial style description of the verification
			
 
				-process can be found in [Ho93].
			
 
				-More detail on the verification of large systems with the
			
 
				-help of <small>SPIN</small>'s supertrace (bitstate) verification algorithm
			
 
				-can be found in [Ho95].
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DD>
			
 
				-<br>
			
 
				-*
			
 
				-Random and interactive simulations (section 2.1),
			
 
				-<br>
			
 
				-*
			
 
				-Generating a verifier (section 2.2),
			
 
				-<br>
			
 
				-*
			
 
				-Compilation for different types of searches (section 2.3),
			
 
				-<br>
			
 
				-*
			
 
				-Performing the verification (section 2.4),
			
 
				-<br>
			
 
				-*
			
 
				-Inspecting error traces produced by the verifier (section 2.5),
			
 
				-<br>
			
 
				-*
			
 
				-Exploiting partial order reductions (section 2.6).
			
 
				-</DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<H4>2.1 Random and Interactive Simulations
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Given a model in <small>PROMELA</small>, say stored in a file called
			
 
				-<TT>spec</TT>,
			
 
				-the easiest mode of operation is to perform a random simulation.
			
 
				-For instance,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -p spec
			
 
				-</PRE></TT></DL>
			
 
				-tells <small>SPIN</small> to perform a random simulation, while printing the
			
 
				-process moves selected for execution at each step (by default
			
 
				-nothing is printed, other than explicit
			
 
				-<TT>printf</TT>
			
 
				-statements that appear in the model itself).
			
 
				-A range of options exists to make the traces more verbose,
			
 
				-e.g., by adding printouts of local variables (add option
			
 
				-<TT>-l</TT>),
			
 
				-global variables (add option
			
 
				-<TT>-g</TT>),
			
 
				-send statements (add option
			
 
				-<TT>-s</TT>),
			
 
				-or receive statements (add option
			
 
				-<TT>-r</TT>).
			
 
				-Use option
			
 
				-<TT>-n</TT>N
			
 
				-(with N any number) to fix the seed on <small>SPIN</small>'s internal
			
 
				-random number generator, and thus make the simulation runs
			
 
				-reproducible.
			
 
				-By default the current time is used to seed the random number
			
 
				-generator.
			
 
				-For instance:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -p -l -g -r -s -n1 spec
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-If you don't like the system randomly resolving non-deterministic
			
 
				-choices for you, you can select an interactive simulation:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -i -p spec
			
 
				-</PRE></TT></DL>
			
 
				-In this case you will be offered a menu with choices each time
			
 
				-the execution could proceed in more than one way.
			
 
				-<br>&#32;<br>
			
 
				-Simulations, of course, are intended primarily for the
			
 
				-debugging of a model.  They cannot prove anything about it.
			
 
				-Assertions will be evaluated during simulation runs, and
			
 
				-any violations that result will be reported, but none of
			
 
				-the other correctness requirements can be checked in this way.
			
 
				-<H4>2.2 Generating the Verifier
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-A model-specific verifier is generated as follows:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -a spec
			
 
				-</PRE></TT></DL>
			
 
				-This generates a C program in a number of files (with names
			
 
				-starting with
			
 
				-<TT>pan</TT>).
			
 
				-<H4>2.3 Compiling the Verifier
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-At this point it is good to know the physical limitations of
			
 
				-the computer system that you will run the verification on.
			
 
				-If you know how much physical (not virtual) memory your system
			
 
				-has, you can take advantage of that.
			
 
				-Initially, you can simply compile the verifier for a straight
			
 
				-exhaustive verification run (constituting the strongest type
			
 
				-of proof if it can be completed).
			
 
				-Compile as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pcc -o pan pan.c		# standard exhaustive search
			
 
				-</PRE></TT></DL>
			
 
				-If you know a memory bound that you want to restrict the run to
			
 
				-(e.g., to avoid paging), find the nearest power of 2 (e.g., 23
			
 
				-for the bound 2^&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; bytes) and compile as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pcc '-DMEMCNT=23' -o pan pan.c
			
 
				-</PRE></TT></DL>
			
 
				-or equivalently in terms of MegaBytes:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pcc '-DMEMLIM=8' -o pan pan.c
			
 
				-</PRE></TT></DL>
			
 
				-If the verifier runs out of memory before completing its task,
			
 
				-you can decide to increase the bound or to switch to a frugal
			
 
				-supertrace verification.  In the latter case, compile as follows.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pcc -DBITSTATE -o pan pan.c
			
 
				-</PRE></TT></DL>
			
 
				-<H4>2.4 Performing the Verification
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-There are three specific decisions to make to
			
 
				-perform verifications optimally: estimating the
			
 
				-size of the reachable state space (section 2.4.1),
			
 
				-estimating the maximum length of a unique execution
			
 
				-sequence (2.4.2), and selecting the type of correctness
			
 
				-property (2.4.3).
			
 
				-No great harm is done if the estimates from the first two
			
 
				-steps are off.  The feedback from the verifier usually provides
			
 
				-enough clues to determine quickly what the optimal settings
			
 
				-for peak performance should be.
			
 
				-<H4>2.4.1 Reachable States
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-For a standard exhaustive run, you can override the default choice
			
 
				-for the size for the hash table (2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;18&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; slots) with option
			
 
				-<TT>-w</TT>.
			
 
				-For instance,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -w23
			
 
				-</PRE></TT></DL>
			
 
				-selects 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; slots.
			
 
				-The hash table size should optimally be roughly equal to the number of
			
 
				-reachable states you expect (within say a factor of two or three).
			
 
				-Too large a number merely wastes memory, too low a number wastes
			
 
				-CPU time, but neither can affect the correctness of the result.
			
 
				-<br>&#32;<br>
			
 
				-For a supertrace run, the hash table <I>is</I> the memory arena, and
			
 
				-you can override the default of 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;22&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; bits with any other number.
			
 
				-Set it to the maximum size of physical memory you can grab without
			
 
				-making the system page, again within a factor of say two or three.
			
 
				-Use, for instance
			
 
				-<TT>-w23</TT>
			
 
				-if you expect 8 million reachable states and have access to at least
			
 
				-8 million (2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;23&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt;) bits of memory (i.e., 2^&lt;/big&gt;&lt;/big&gt;&lt;/big&gt;&lt;small&gt;&lt;small&gt;&lt;small&gt;20&lt;/small&gt;&lt;/small&gt;&lt;/small&gt;&lt;big&gt;&lt;big&gt;&lt;big&gt; or 1 Megabyte of RAM).
			
 
				-<H4>2.4.2 Search Depth
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-By default the analyzers have a search depth restriction of 10,000 steps.
			
 
				-If this isn't enough, the search will truncate at 9,999 steps (watch for
			
 
				-it in the printout).
			
 
				-Define a different search depth with the -m flag.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -m100000
			
 
				-</PRE></TT></DL>
			
 
				-If you exceed also this limit, it is probably good to take some
			
 
				-time to consider if the model you have specified is indeed finite.
			
 
				-Check, for instance, if no unbounded number of processes is created.
			
 
				-If satisfied that the model is finite, increase the search depth at
			
 
				-least as far as is required to avoid truncation completely.
			
 
				-<br>&#32;<br>
			
 
				-If you find a particularly nasty error that takes a large number of steps
			
 
				-to hit, you may also set lower search depths to find the shortest variant
			
 
				-of an error sequence.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -m40
			
 
				-</PRE></TT></DL>
			
 
				-Go up or down by powers of two until you find the place where the
			
 
				-error first appears or disappears and then home in on the first
			
 
				-depth where the error becomes apparent, and use the error trail of
			
 
				-that verification run for guided simulation.
			
 
				-<br>&#32;<br>
			
 
				-Note that if a run with a given search depth fails to find
			
 
				-an error, this does not necessarily mean that no violation of a
			
 
				-correctness requirement is possible within that number of steps.
			
 
				-The verifier performs its search for errors by using a standard
			
 
				-depth-first graph search.  If the search is truncated at N steps,
			
 
				-and a state at level N-1 happens to be reachable also within fewer
			
 
				-steps from the initial state, the second time it is reached it
			
 
				-will not be explored again, and thus neither will its successors.
			
 
				-Those successors may contain errors states that are reachable within
			
 
				-N steps from the initial state.
			
 
				-Normally, the verification should be run in such a way that no
			
 
				-execution paths can be truncated, but to force the complete exploration
			
 
				-of also truncated searches one can override the defaults with a compile-time
			
 
				-flag
			
 
				-<TT>-DREACH</TT>.
			
 
				-When the verifier is compiled with that additional directive, the depth at
			
 
				-which each state is visited is remembered, and a state is now considered
			
 
				-unvisited if it is revisited via a shorter path later in the search.
			
 
				-(This option cannot be used with a supertrace search.)
			
 
				-<H4>2.4.3 Liveness or Safety Verification
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-For the last, and perhaps the most critical, runtime decision:
			
 
				-it must be decided if the system is to be checked for safety
			
 
				-violations or for liveness violations.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -l	# search for non-progress cycles
			
 
				-pan -a	# search for acceptance cycles
			
 
				-</PRE></TT></DL>
			
 
				-(In the first case, though, you must compile pan.c with -DNP as an
			
 
				-additional directive. If you forget, the executable will remind you.)
			
 
				-If you don't use either of the above two options, the default types of
			
 
				-correctness properties are checked (assertion violations,
			
 
				-completeness, race conditions, etc.).
			
 
				-Note that the use of a
			
 
				-<TT>never</TT>
			
 
				-claim that contains
			
 
				-<TT>accept</TT>
			
 
				-labels requires the use of the
			
 
				-<TT>-a</TT>
			
 
				-flag for complete verification.
			
 
				-<br>&#32;<br>
			
 
				-Adding option
			
 
				-<TT>-f</TT>
			
 
				-restricts the search for liveness properties further under
			
 
				-a standard <I>weak fairness</I> constraint:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -f -l	# search for weakly fair non-progress cycles
			
 
				-pan -f -a	# search for weakly fair acceptance cycles
			
 
				-</PRE></TT></DL>
			
 
				-With this constraint, each process is required to appear
			
 
				-infinitely often in the infinite trace that constitutes
			
 
				-the violation of a liveness property (e.g., a non-progress cycle
			
 
				-or an acceptance cycle), unless it is permanently blocked
			
 
				-(i.e., has no executable statements after a certain point in
			
 
				-the trace is reached).
			
 
				-Adding the fairness constraint increases the time complexity
			
 
				-of the verification by a factor that is linear in the number
			
 
				-of active processes.
			
 
				-<br>&#32;<br>
			
 
				-By default, the verifier will report on unreachable code in
			
 
				-the model only when a verification run is successfully
			
 
				-completed.
			
 
				-This default behavior can be turned off with the runtime option
			
 
				-<TT>-n</TT>,
			
 
				-as in:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -n -f -a
			
 
				-</PRE></TT></DL>
			
 
				-(The order in which the options such as these are listed is
			
 
				-always irrelevant.)
			
 
				-A brief explanation of these and other runtime options can
			
 
				-be determined by typing:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan --
			
 
				-</PRE></TT></DL>
			
 
				-<H4>2.5 Inspecting Error Traces
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-If the verification run reports an error,
			
 
				-any error, </big></big></big><small>SPIN</small> dumps an error trail into a file named
			
 
				-<TT>spec.trail</TT>,
			
 
				-where
			
 
				-<TT>spec</TT>
			
 
				-is the name of your original <small>PROMELA</small> file.
			
 
				-To inspect the trail, and determine the cause of the error,
			
 
				-you must use the guided simulation option.
			
 
				-For instance:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -t -c spec
			
 
				-</PRE></TT></DL>
			
 
				-gives you a summary of message exchanges in the trail, or
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -t -p spec
			
 
				-</PRE></TT></DL>
			
 
				-gives a printout of every single step executed.
			
 
				-Add as many extra or different options as you need to pin down the error:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-spin -t -r -s -l -g spec
			
 
				-</PRE></TT></DL>
			
 
				-Make sure the file
			
 
				-<TT>spec</TT>
			
 
				-didn't change since you generated the analyzer from it.
			
 
				-<br>&#32;<br>
			
 
				-If you find non-progress cycles, add or delete progress labels
			
 
				-and repeat the verification until you are content that you have found what
			
 
				-you were looking for.
			
 
				-<br>&#32;<br>
			
 
				-If you are not interested in the first error reported,
			
 
				-use pan option
			
 
				-<TT>-c</TT>
			
 
				-to report on specific others:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -c3
			
 
				-</PRE></TT></DL>
			
 
				-ignores the first two errors and reports on the third one that
			
 
				-is discovered.
			
 
				-If you just want to count all errors and not see them, use
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -c0
			
 
				-</PRE></TT></DL>
			
 
				-<H4>State Assignments
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-Internally, the verifiers produced by <small>SPIN</small> deal with a formalization of
			
 
				-a <small>PROMELA</small> model in terms of extended finite state machines.
			
 
				-<small>SPIN</small> therefore assigns state numbers to all statements in the model.
			
 
				-The state numbers are listed in all the relevant output to make it
			
 
				-completely unambiguous (source line references unfortunately do not
			
 
				-have that property).
			
 
				-To confirm the precise state assignments, there is a runtime option
			
 
				-to the analyzer generated:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-pan -d	# print state machines
			
 
				-</PRE></TT></DL>
			
 
				-which will print out a table with all state assignments for each
			
 
				-<TT>proctype</TT>
			
 
				-in the model.
			
 
				-<H4>2.6 Exploiting Partial Order Reductions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-The search algorithm used by <small>SPIN</small> is optimized
			
 
				-according to the rules of a partial order theory explained in [HoPe94].
			
 
				-The effect of the reduction, however, can be increased considerably if the verifier
			
 
				-has extra information about the access of processes to global
			
 
				-message channels.
			
 
				-For this purpose, there are two keywords in the language that
			
 
				-allow one to assert that specific channels are used exclusively
			
 
				-by specific processes.
			
 
				-For example, the assertions
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-xr q1;
			
 
				-xs q2;
			
 
				-</PRE></TT></DL>
			
 
				-claim that the process that executes them is the <I>only</I> process
			
 
				-that will receive messages from channel
			
 
				-<TT>q1</TT>,
			
 
				-and the <I>only</I> process that will send messages to channel
			
 
				-<TT>q2</TT>.
			
 
				-<br>&#32;<br>
			
 
				-If an exclusive usage assertion turns out to be invalid, the
			
 
				-verifier will be able to detect this, and report it as a violation
			
 
				-of an implicit correctness requirement.
			
 
				-<br>&#32;<br>
			
 
				-Every read or write access to a message channel can introduce
			
 
				-new dependencies that may diminish the maximum effect of the
			
 
				-partial order reduction strategies.
			
 
				-If, for instance, a process uses the
			
 
				-<TT>len</TT>
			
 
				-function to check the number of messages stored in a channel,
			
 
				-this counts as a read access, which can in some cases invalidate
			
 
				-an exclusive access pattern that might otherwise exist.
			
 
				-There are two special functions that can be used to poll the
			
 
				-size of a channel in a safe way that is compatible with the
			
 
				-reduction strategy.
			
 
				-<br>&#32;<br>
			
 
				-The expression
			
 
				-<TT>nfull(qname)</TT>
			
 
				-returns true if channel
			
 
				-<TT>qname</TT>
			
 
				-is not full, and
			
 
				-<TT>nempty(qname)</TT>
			
 
				-returns true if channel
			
 
				-<TT>qname</TT>
			
 
				-contains at least one message.
			
 
				-Note that the parser will not recognize the free form expressions
			
 
				-<TT>!full(qname)</TT>
			
 
				-and
			
 
				-<TT>!empty(qname)</TT>
			
 
				-as equally safe, and it will forbid constructions such as
			
 
				-<TT>!nfull(qname)</TT>
			
 
				-or
			
 
				-<TT>!nempty(qname)</TT>.
			
 
				-More detail on this aspect of the reduction algorithms can be
			
 
				-found in [HoPe94].
			
 
				-<H4>Keywords
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-For reference, the following table contains all the keywords,
			
 
				-predefined functions, predefined variables, and
			
 
				-special label-prefixes of the language <small>PROMELA</small>,
			
 
				-and refers to the section of this paper in
			
 
				-which they were discussed.
			
 
				-<br><img src="-.19126692.gif"><br>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[Ho91]
			
 
				-G.J. Holzmann,
			
 
				-Design and Validation of Computer Protocols,
			
 
				-Prentice Hall, 1991.
			
 
				-<br>&#32;<br>
			
 
				-[Ho93]
			
 
				-G.J. Holzmann, ``Tutorial: Design and Validation of Protocols,''
			
 
				-Computer Networks and ISDN Systems,
			
 
				-1993, Vol. 25, No. 9, pp. 981-1017.
			
 
				-<br>&#32;<br>
			
 
				-[HoPe94]
			
 
				-G.J. Holzmann and D.A. Peled, ``An improvement in
			
 
				-formal verification,''
			
 
				-Proc. 7th Int. Conf. on Formal Description Techniques,
			
 
				-FORTE94, Berne, Switzerland. October 1994.
			
 
				-<br>&#32;<br>
			
 
				-[Ho95]
			
 
				-G.J. Holzmann, ``An Analysis of Bitstate Hashing,''
			
 
				-technical report 2/95, available from author.
			
 
				-<br>&#32;<br>
			
 
				-[HS99]
			
 
				-G.J. Holzmann, ``Software model checking: extracting
			
 
				-verification models from source code,''
			
 
				-Proc. Formal Methods in Software Engineering and Distributed
			
 
				-Systems,
			
 
				-PSTV/FORTE99, Beijng, China, Oct. 1999, Kluwer,pp. 481-497.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/troff.html
+++ b/sys/doc/troff.html
@@ -1,3348 +0,0 @@
 
				-<html>
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-<title>
			
 
				--
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Troff User's Manual
			
 
				-</H1>
			
 
				-<DL><DD><I>Joseph F. Ossanna<br>
			
 
				-Brian W. Kernighan<br>
			
 
				-<br>&#32;<br>
			
 
				-bwk@research.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Troff</I> and <I>nroff</I> are text processors 
			
 
				-that format text for typesetter- and
			
 
				-typewriter-like terminals, respectively.
			
 
				-They accept lines of text interspersed with lines of
			
 
				-format control information and
			
 
				-format the text into a printable, paginated document
			
 
				-having a user-designed style.
			
 
				-<I>Troff</I> and <I>nroff</I> offer
			
 
				-unusual freedom in document styling:
			
 
				-arbitrary style headers and footers;
			
 
				-arbitrary style footnotes;
			
 
				-multiple automatic sequence numbering for paragraphs, sections, etc;
			
 
				-multiple column output;
			
 
				-dynamic font and point-size control;
			
 
				-arbitrary horizontal and vertical local motions at any point;
			
 
				-and
			
 
				-a family of automatic overstriking, bracket construction, and
			
 
				-line-drawing functions.
			
 
				-
			
 
				-
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Troff</I>
			
 
				-produces its output in a device-independent form,
			
 
				-although parameterized for a specific device;
			
 
				-<I>troff</I> output must be processed by a driver for that
			
 
				-device to produce printed output.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Troff</I> and <I>nroff</I> are highly compatible with each other and it is almost always
			
 
				-possible to prepare input acceptable to both.
			
 
				-Conditional input is provided to enable
			
 
				-the user to embed input expressly destined for either program.
			
 
				-<I>Nroff</I> can prepare output directly for a variety of terminal types and
			
 
				-is capable of utilizing the full resolution of each terminal.
			
 
				-<I>Nroff</I> is the same program as <I>troff</I>; in fact, on Plan 9 
			
 
				-<I>nroff</I> is a shell script that calls <I>troff</I> with the
			
 
				-argument.
			
 
				-</P>
			
 
				-<H4>Background to the Plan 9 Edition
			
 
				-</H4>
			
 
				-<P>
			
 
				-The primary change to <I>troff</I> and <I>nroff</I> for Plan 9 is
			
 
				-support of the Unicode Standard, which was added during
			
 
				-1992 and 1993.  There are two results.  First, there is much
			
 
				-less need for the myriad of two-character names that are so
			
 
				-much a part of <I>troff</I> lore; in Plan 9, for example, one naturally uses the
			
 
				-Unicode character &#189; instead of <I>troff</I>'s
			
 
				-Second, the output device, though called
			
 
				-is almost always a form of PostScript printer;
			
 
				-the panoply of special drivers for different typesetters
			
 
				-has largely disappeared.
			
 
				-Unfortunately, not all PostScript printers can cope
			
 
				-with Unicode characters, so there remains a need for
			
 
				-programs that synthesize PostScript characters from bitmaps;
			
 
				-this is especially true for Asian languages.
			
 
				-</P>
			
 
				-<H4>Background to the Second Edition
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Troff</I>
			
 
				-was originally written by the late Joe Ossanna
			
 
				-in about 1973, in assembly language for the
			
 
				-PDP-11,
			
 
				-to drive the Graphic Systems CAT typesetter.
			
 
				-It was rewritten in C around 1975,
			
 
				-and underwent slow but steady evolution until
			
 
				-Ossanna's death late in 1977.
			
 
				-</P>
			
 
				-<P>
			
 
				-In 1979, Brian Kernighan
			
 
				-modified
			
 
				-<I>troff</I>
			
 
				-so that it would produce output for a variety of typesetters,
			
 
				-while retaining its input specifications.
			
 
				-Over the decade from 1979 to 1989,
			
 
				-the internals
			
 
				-have been modestly revised,
			
 
				-though much of the code remains as it was when Ossanna wrote it.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Troff</I>
			
 
				-reads parameter files
			
 
				-each time it is invoked, to
			
 
				-set values for machine resolution,
			
 
				-legal type sizes and fonts, and character names,
			
 
				-character widths
			
 
				-and the like.
			
 
				-<I>Troff</I>
			
 
				-output is
			
 
				-ASCII
			
 
				-characters
			
 
				-in a simple language
			
 
				-that describes where each character is to be placed
			
 
				-and in what size and font.
			
 
				-A post-processor must be written for each device
			
 
				-to convert this typesetter-independent language
			
 
				-into specific instructions for that device.
			
 
				-</P>
			
 
				-<P>
			
 
				-The output language contains information that was not readily
			
 
				-identifiable in the older output.
			
 
				-In the newer language, the beginning of each page, line, and word
			
 
				-is marked,
			
 
				-so post-processors can do device-specific optimizations
			
 
				-such as sorting the data vertically or printing it boustrophedonically,
			
 
				-independent of
			
 
				-<I>troff</I>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Capabilities for graphics have been added:
			
 
				-<I>troff</I>
			
 
				-recognizes commands for drawing diagonal lines,
			
 
				-circles, ellipses, circular arcs,
			
 
				-and quadratic B-splines.
			
 
				-There are also ways to pass arbitrary information to the output,
			
 
				-unprocessed by
			
 
				-<I>troff</I>.
			
 
				-</P>
			
 
				-<P>
			
 
				-A number of limitations have been eased or eliminated.
			
 
				-A document may have an arbitrary number of fonts on any page
			
 
				-(if the output device permits it, of course).
			
 
				-Fonts may be accessed merely by naming them;
			
 
				-``mounting'' is no longer necessary.
			
 
				-There are no limits on the number of characters.
			
 
				-Character height and slant may be set
			
 
				-independently of width.
			
 
				-</P>
			
 
				-<P>
			
 
				-The remainder of this document contains a description of
			
 
				-usage and command-line options;
			
 
				-a summary of requests, escape sequences, and pre-defined number registers;
			
 
				-a reference manual;
			
 
				-tutorial examples;
			
 
				-and a list of commonly-available characters.
			
 
				-</P>
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Joe Ossanna's
			
 
				-<I>troff</I>
			
 
				-remains a remarkable accomplishment.
			
 
				-For more than twenty years, it has proven a robust tool,
			
 
				-taking unbelievable abuse from a variety of preprocessors
			
 
				-and being forced into uses that were never conceived of
			
 
				-in the original design,
			
 
				-all with considerable grace under fire.
			
 
				-</P>
			
 
				-<P>
			
 
				-Recent versions of <I>troff</I> have profited from
			
 
				-significant code improvements by
			
 
				-Jaap Akkerhuis, Dennis Ritchie, Ken Thompson, and Molly Wagner.
			
 
				-UTF facilities owe much to Jaap Akkerhuis.
			
 
				-Andrew Hume, Doug McIlroy, Peter Nelson and Ravi Sethi made valuable suggestions on the manual.
			
 
				-I fear that the remaining bugs are my fault.
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B>Usage
			
 
				-</B><P>
			
 
				-<I>Troff</I> or <I>nroff</I> is invoked as
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-troff  <I>options  files</I>
			
 
				-nroff  <I>options  files</I>
			
 
				-</PRE></TT></DL>
			
 
				-where <I>options</I> represents any of a number of option arguments
			
 
				-and <I>files</I> represents the list of files containing the document
			
 
				-to be formatted.
			
 
				-An argument consisting of a single minus
			
 
				-represents standard input.
			
 
				-If no filenames are given input is taken from the standard input.
			
 
				-The options, which may appear in any order so long as they appear
			
 
				-before the files, are:
			
 
				-<br><img src="-.16251.gif"><br>
			
 
				-</P>
			
 
				-<P>
			
 
				-Each option is a separate argument;
			
 
				-for example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-troff -Tutf -ms -mpictures -o4,6,8-10 <I>file1 file2</I>
			
 
				-</PRE></TT></DL>
			
 
				-requests formatting of pages 4, 6, and 8 through 10 of a document contained in the files
			
 
				-named <I>file1</I> and <I>file2</I>,
			
 
				-specifies the output in UTF,
			
 
				-and invokes the macro packages
			
 
				-and
			
 
				-</P>
			
 
				-<P>
			
 
				-Various pre- and post-processors are available for use with <I>nroff</I> and <I>troff</I>.
			
 
				-These include the equation preprocessor
			
 
				-<I>eqn</I>
			
 
				-(for <I>troff</I> only),
			
 
				-the table-construction preprocessor
			
 
				-<I>tbl</I>,
			
 
				-and
			
 
				-<I>pic</I>
			
 
				-and
			
 
				-<I>grap</I>
			
 
				-for various forms of graphics.
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B>Request Summary
			
 
				-</B><P>
			
 
				-In the following table,
			
 
				-the notation &#177;<I>N</I> in the
			
 
				-<B><I>column means that the forms </I></B><I>N</I><B><I>, </I></B><I>+N</I><B><I>, or </I></B><I>-N</I><B><I> are permitted,
			
 
				-to set the parameter to </I></B><I>N</I><B><I>, increment it by </I></B><I>N</I><B><I>, or decrement it by </I></B><I>N</I><B><I>,
			
 
				-respectively.
			
 
				-Plain </I></B><I>N</I><B><I> means that the value is used to set the parameter.
			
 
				-</I></B><B><I>separated by 
			
 
				-</I></B>are for
			
 
				-<I>troff</I>
			
 
				-and
			
 
				-<I>nroff</I>
			
 
				-respectively.
			
 
				-In the 
			
 
				-<B><I>column,
			
 
				-<br><img src="-.16252.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-<br><img src="-.16253.gif"><br>
			
 
				-<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-</I></B><TT>ab</TT><B><I>	20
			
 
				-</I></B><TT>ad</TT><B><I>	4
			
 
				-</I></B><TT>af</TT><B><I>	8
			
 
				-</I></B><TT>am</TT><B><I>	7
			
 
				-</I></B><TT>as</TT><B><I>	7
			
 
				-</I></B><TT>bd</TT><B><I>	2
			
 
				-</I></B><TT>bp</TT><B><I>	3
			
 
				-</I></B><TT>br</TT><B><I>	4
			
 
				-</I></B><TT>c2</TT><B><I>	10
			
 
				-</I></B><TT>cc</TT><B><I>	10
			
 
				-</I></B><TT>ce</TT><B><I>	4
			
 
				-</I></B><TT>cf</TT><B><I>	19
			
 
				-</I></B><TT>ch</TT><B><I>	7
			
 
				-</I></B><TT>cs</TT><B><I>	2
			
 
				-</I></B><TT>cu</TT><B><I>	10
			
 
				-</I></B><TT>da</TT><B><I>	7
			
 
				-</I></B><TT>de</TT><B><I>	7
			
 
				-</I></B><TT>di</TT><B><I>	7
			
 
				-</I></B><TT>ds</TT><B><I>	7
			
 
				-</I></B><TT>dt</TT><B><I>	7
			
 
				-</I></B><TT>ec</TT><B><I>	10
			
 
				-</I></B><TT>el</TT><B><I>	16
			
 
				-</I></B><TT>em</TT><B><I>	7
			
 
				-</I></B><TT>eo</TT><B><I>	10
			
 
				-</I></B><TT>ev</TT><B><I>	17
			
 
				-</I></B><TT>ex</TT><B><I>	18
			
 
				-</I></B><TT>fc</TT><B><I>	9
			
 
				-</I></B><TT>fi</TT><B><I>	4
			
 
				-</I></B><TT>fl</TT><B><I>	20
			
 
				-</I></B><TT>fp</TT><B><I>	2
			
 
				-</I></B><TT>ft</TT><B><I>	2
			
 
				-</I></B><TT>hc</TT><B><I>	13
			
 
				-</I></B><TT>hw</TT><B><I>	13
			
 
				-</I></B><TT>hy</TT><B><I>	13
			
 
				-</I></B><TT>ie</TT><B><I>	16
			
 
				-</I></B><TT>if</TT><B><I>	16
			
 
				-</I></B><TT>ig</TT><B><I>	20
			
 
				-</I></B><TT>in</TT><B><I>	6
			
 
				-</I></B><TT>it</TT><B><I>	7
			
 
				-</I></B><TT>lc</TT><B><I>	9
			
 
				-</I></B><TT>lg</TT><B><I>	10
			
 
				-</I></B><TT>lf</TT><B><I>	20
			
 
				-</I></B><TT>ll</TT><B><I>	6
			
 
				-</I></B><TT>ls</TT><B><I>	5
			
 
				-</I></B><TT>lt</TT><B><I>	14
			
 
				-</I></B><TT>mc</TT><B><I>	20
			
 
				-</I></B><TT>mk</TT><B><I>	3
			
 
				-</I></B><TT>na</TT><B><I>	4
			
 
				-</I></B><TT>ne</TT><B><I>	3
			
 
				-</I></B><TT>nf</TT><B><I>	4
			
 
				-</I></B><TT>nh</TT><B><I>	13
			
 
				-</I></B><TT>nm</TT><B><I>	15
			
 
				-</I></B><TT>nn</TT><B><I>	15
			
 
				-</I></B><TT>nr</TT><B><I>	8
			
 
				-</I></B><TT>ns</TT><B><I>	5
			
 
				-</I></B><TT>nx</TT><B><I>	19
			
 
				-</I></B><TT>os</TT><B><I>	5
			
 
				-</I></B><TT>pc</TT><B><I>	14
			
 
				-</I></B><TT>pi</TT><B><I>	19
			
 
				-</I></B><TT>pl</TT><B><I>	3
			
 
				-</I></B><TT>pm</TT><B><I>	20
			
 
				-</I></B><TT>pn</TT><B><I>	3
			
 
				-</I></B><TT>po</TT><B><I>	3
			
 
				-</I></B><TT>ps</TT><B><I>	2
			
 
				-</I></B><TT>rd</TT><B><I>	18
			
 
				-</I></B><TT>rm</TT><B><I>	7
			
 
				-</I></B><TT>rn</TT><B><I>	7
			
 
				-</I></B><TT>rr</TT><B><I>	8
			
 
				-</I></B><TT>rs</TT><B><I>	5
			
 
				-</I></B><TT>rt</TT><B><I>	3
			
 
				-</I></B><TT>so</TT><B><I>	19
			
 
				-</I></B><TT>sp</TT><B><I>	5
			
 
				-</I></B><TT>ss</TT><B><I>	2
			
 
				-</I></B><TT>sv</TT><B><I>	5
			
 
				-</I></B><TT>sy</TT><B><I>	19
			
 
				-</I></B><TT>ta</TT><B><I>	9
			
 
				-</I></B><TT>tc</TT><B><I>	9
			
 
				-</I></B><TT>ti</TT><B><I>	6
			
 
				-</I></B><TT>tl</TT><B><I>	14
			
 
				-</I></B><TT>tm</TT><B><I>	20
			
 
				-</I></B><TT>tr</TT><B><I>	10
			
 
				-</I></B><TT>uf</TT><B><I>	10
			
 
				-</I></B><TT>ul</TT><B><I>	10
			
 
				-</I></B><TT>vs</TT><B><I>	5
			
 
				-</I></B><TT>wh</TT><B><I>	7
			
 
				-<br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-</I></B><br>&#32;<br>
			
 
				-<B>Alphabetical Request and Section Number Cross Reference
			
 
				-</B><br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-</PRE></TT></DL>
			
 
				-<br>&#32;<br>
			
 
				-<B>Escape Sequences for Characters, Indicators, and Functions
			
 
				-</B><br>&#32;<br>
			
 
				-<br><img src="-.16254.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-The escape sequences
			
 
				-and
			
 
				-are interpreted in copy mode (&#167;7.2).
			
 
				- 
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<B>Predefined Number Registers
			
 
				-</B><br>&#32;<br>
			
 
				-<br><img src="-.16255.gif"><br>
			
 
				-
			
 
				-
			
 
				-<br>&#32;<br>
			
 
				-<B>Predefined Read-Only Number Registers
			
 
				-</B><br>&#32;<br>
			
 
				-<br><img src="-.16256.gif"><br>
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<B>Reference Manual
			
 
				-</B><H4>1 General Explanation
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Form of input.</I></B> 
			
 
				-Input consists of <I>text lines</I>, which are destined to be printed,
			
 
				-interspersed with <I>control lines</I>,
			
 
				-which set parameters or otherwise control subsequent processing.
			
 
				-Control lines begin with a <I>control character</I>&#173;
			
 
				-normally <TT>.</TT> (period) or <TT>'</TT> (single quote)&#173;
			
 
				-followed by a one or two character name that specifies
			
 
				-a basic <I>request</I> or the substitution of
			
 
				-a user-defined <I>macro</I> in place of the control line.
			
 
				-The control character <TT>'</TT> suppresses the <I>break</I> function&#173;
			
 
				-the forced output of a partially filled line&#173;
			
 
				-caused by certain requests.
			
 
				-The control character may be separated from the request/macro name by
			
 
				-white space (spaces and/or tabs) for aesthetic reasons.
			
 
				-Names should be followed by either
			
 
				-space or newline.
			
 
				-Control lines with unrecognized names are ignored.
			
 
				-<P>
			
 
				-Various special functions may be introduced anywhere in the input by
			
 
				-means of an <I>escape</I> character, normally <TT>\</TT>.
			
 
				-For example, the function
			
 
				-causes the interpolation of the contents of the
			
 
				-<I>number register R</I>
			
 
				-in place of the function;
			
 
				-here <I>R</I> is either a single character name
			
 
				-as in <TT>\n</TT><I>x</I>,
			
 
				-or a two-character name introduced by
			
 
				-a left-parenthesis, as in <TT>\n(</TT><I>xx</I>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Formatter and device resolution.</I></B> 
			
 
				-<I>Troff</I> internally stores and processes dimensions in units that correspond to
			
 
				-the particular device for which output is being prepared;
			
 
				-values from 300 to 1200/inch are typical.
			
 
				-See &#167;23.
			
 
				-<I>Nroff</I> internally uses 240 units/inch,
			
 
				-corresponding to the least common multiple of the
			
 
				-horizontal and vertical resolutions of various
			
 
				-typewriter-like output devices.
			
 
				-<I>Troff</I> rounds horizontal/vertical numerical parameter input to the actual
			
 
				-horizontal/vertical resolution of the output device indicated by the <TT>-T</TT> option
			
 
				-(default
			
 
				-<I>Nroff</I> similarly rounds numerical input to the actual resolution
			
 
				-of its output device
			
 
				-(default Model 37 Teletype).
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Numerical parameter input.</I></B> 
			
 
				-Both <I>nroff</I> and <I>troff</I>
			
 
				-accept numerical input with the appended scale
			
 
				-indicators
			
 
				-shown in the following table,
			
 
				-where
			
 
				-<I>S</I> is the current type size in points and
			
 
				-<I>V</I> is the current vertical line spacing in
			
 
				-basic units.
			
 
				-<br><img src="-.16257.gif"><br>
			
 
				-In <I>nroff</I>, both the em and the en are taken to be equal to the
			
 
				-nominal character width,
			
 
				-which is output-device dependent;
			
 
				-common values are 1/10 and 1/12 inch.
			
 
				-Actual character widths in <I>nroff</I> need not be all the same and constructed characters
			
 
				-such as -&#62; (->) are often extra wide.
			
 
				-The default scaling is
			
 
				-for the horizontally-oriented requests
			
 
				-and functions
			
 
				-and horizontal coordinates of
			
 
				-for the vertically-oriented requests and functions
			
 
				-and vertical coordinates of
			
 
				-for the
			
 
				-request;
			
 
				-and
			
 
				-for the requests
			
 
				-and
			
 
				-<I>All</I> other requests ignore any scale indicators.
			
 
				-When a number register containing an already appropriately scaled number
			
 
				-is interpolated to provide numerical input,
			
 
				-the unit scale indicator
			
 
				-<TT>u</TT> may need to be appended to prevent
			
 
				-an additional inappropriate default scaling.
			
 
				-The number, <I>N</I>, may be specified in decimal-fraction form
			
 
				-but the parameter finally stored is rounded to an integer number of basic units.
			
 
				-Internal computations are performed in integer arithmetic.
			
 
				-<P>
			
 
				-The <I>absolute position</I> indicator <TT>|</TT> may be prefixed
			
 
				-to a number <I>N</I>
			
 
				-to generate the distance to the vertical or horizontal place <I>N</I>.
			
 
				-For vertically-oriented requests and functions, <TT>|</TT><I>N</I>
			
 
				-becomes the distance in basic units from the current vertical place on the page or in a <I>diversion</I> (&#167;7.4)
			
 
				-to the vertical place <I>N</I>.
			
 
				-For <I>all</I> other requests and functions,
			
 
				-<TT>|</TT><I>N</I>
			
 
				-becomes the distance from
			
 
				-the current horizontal place on the <I>input</I> line to the horizontal place <I>N</I>.
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-.sp |3.2c
			
 
				-</PRE></TT></DL>
			
 
				-will space in the required direction to 3.2 centimeters from the top of the page.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Numerical expressions.</I></B> 
			
 
				-Wherever numerical input is expected,
			
 
				-an expression involving parentheses,
			
 
				-the arithmetic operators <TT>+</TT>, <TT>-</TT>, <TT>/</TT>, <TT>*</TT>, <TT>%</TT> (mod),
			
 
				-and the logical operators
			
 
				-<TT>&#60;</TT>,
			
 
				-<TT>&#62;</TT>,
			
 
				-<TT>&#60;=</TT>,
			
 
				-<TT>&#62;=</TT>,
			
 
				-<TT>=</TT> (or <TT>==</TT>),
			
 
				-<TT>&amp;</TT> (and),
			
 
				-<TT>:</TT> (or)
			
 
				-may be used.
			
 
				-Except where controlled by parentheses, evaluation of expressions is left-to-right;
			
 
				-there is no operator precedence.
			
 
				-In the case of certain requests, an initial <TT>+</TT> or <TT>-</TT> is stripped
			
 
				-and interpreted as an increment or decrement indicator respectively.
			
 
				-In the presence of default scaling, the desired scale indicator must be
			
 
				-attached to <I>every</I> number in an expression
			
 
				-for which the desired and default scaling differ.
			
 
				-For example,
			
 
				-if the number register <TT>x</TT> contains 2
			
 
				-and the current point size is 10,
			
 
				-then
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-.ll (4.25i+\nxP+3)/2u
			
 
				-</PRE></TT></DL>
			
 
				-will set the line length to 1/2 the sum of 4.25 inches + 2 picas + 3 ems.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Notation.</I></B> 
			
 
				-Numerical parameters are indicated in this manual in two ways.
			
 
				-&#177;<I>N</I> means that the argument may take the forms <I>N</I>, <I>+N</I>, or <I>-N</I> and
			
 
				-that the corresponding effect is to set the parameter
			
 
				-to <I>N</I>, to increment it by <I>N</I>, or to decrement it by <I>N</I> respectively.
			
 
				-Plain <I>N</I> means that an initial algebraic sign is <I>not</I>
			
 
				-an increment indicator,
			
 
				-but merely the sign of <I>N</I>.
			
 
				-Generally, unreasonable numerical input is either ignored
			
 
				-or truncated to a reasonable value.
			
 
				-For example,
			
 
				-most requests expect to set parameters to non-negative
			
 
				-values;
			
 
				-exceptions are
			
 
				-and
			
 
				-The requests
			
 
				-and
			
 
				-restore the previous parameter value in the absence
			
 
				-of an argument.
			
 
				-<P>
			
 
				-Single character arguments are indicated by single lower case letters
			
 
				-and
			
 
				-one/two character arguments are indicated by a pair of lower case letters.
			
 
				-Character string arguments are indicated by multi-character mnemonics.
			
 
				-</P>
			
 
				-<H4>2 Font and Character Size Control
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Character set.</I></B> 
			
 
				-The <I>troff</I> character set is defined by a description file specific to each output device (&#167;23).
			
 
				-There are normally several regular fonts and one or more special fonts.
			
 
				-Characters are input as themselves,
			
 
				-as <I></I><TT>&#191;TT><I>xx</I>, as <I></I><TT>C'</TT><I>name</I><I></I><TT>'</TT>,
			
 
				-or as 
			
 
				-The form
			
 
				-permits a name of any length;
			
 
				-the form
			
 
				-refers to the <I>n</I>-th character on the current font,
			
 
				-whether named or not.
			
 
				-<P>
			
 
				-Normally the input characters
			
 
				-and
			
 
				-are printed as `, ', and - respectively;
			
 
				-and
			
 
				-produce `, ', and -.
			
 
				-If the character does not exist in the font, <I>troff</I> assumes the width is 1 em and
			
 
				-outputs the character with a
			
 
				-name as defined in Section 22.
			
 
				-(This is independent of how the device handles characters unknown to it.)
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Nroff</I> has an analogous, but different, mechanism for defining legal characters
			
 
				-and how to print them.
			
 
				-By default all characters are valid.
			
 
				-There are such
			
 
				-additional characters as may be available on
			
 
				-the output device,
			
 
				-such characters as may be constructed
			
 
				-by overstriking or other combination,
			
 
				-and those that can reasonably be mapped
			
 
				-into other printable characters.
			
 
				-The exact behavior is determined by a driving
			
 
				-table prepared for each device.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Fonts.</I></B> 
			
 
				-<I>Troff</I>
			
 
				-begins execution by reading information for a set of defaults fonts,
			
 
				-said to be
			
 
				-<I>mounted</I>;
			
 
				-conventionally, the first four are
			
 
				-Times Roman (<TT>R</TT>),
			
 
				-Times Italic
			
 
				-(<TT>I</TT>),
			
 
				-Times Bold
			
 
				-(<TT>B</TT>),
			
 
				-and
			
 
				-Times Bold Italic
			
 
				-(<TT>BI</TT>) ,
			
 
				-and the last is a Special font
			
 
				-containing miscellaneous characters.
			
 
				-(This document uses Lucida Sans in place of Times.)
			
 
				-The set of fonts and positions is determined by the device description file,
			
 
				-described in &#167;23.
			
 
				-<P>
			
 
				-The current font, initially Roman, may be changed
			
 
				-by the <TT>ft</TT> request,
			
 
				-or by embedding at any desired point
			
 
				-<TT>/TT><I>x</I>, <TT>TT><I>xx</I>, or <TT>/TT><I>N</I>,
			
 
				-where
			
 
				-<I>x</I> and <I>xx</I> are the name of a font
			
 
				-and <I>N</I> is a numerical font position.
			
 
				-</P>
			
 
				-<P>
			
 
				-It is not necessary to change to the Special font;
			
 
				-characters on that font are automatically handled
			
 
				-as if they were physically part of the current font.
			
 
				-The Special font may actually be several fonts;
			
 
				-the name
			
 
				-is reserved and is generally used for one of these.
			
 
				-All special fonts must be mounted after regular fonts.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Troff</I> can be informed that any particular font is mounted
			
 
				-by use of the <TT>fp</TT> request.
			
 
				-The list of known fonts is installation dependent.
			
 
				-In the subsequent discussion of font-related requests,
			
 
				-<I>F</I> represents either a one/two-character
			
 
				-font name or the numerical font position.
			
 
				-The current font is available (as a numerical position) in the read-only number register <TT>.f</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-A request for a named but not-mounted font is honored
			
 
				-if the font description information exists.
			
 
				-In this way, there is no limit on the number of fonts that may be printed
			
 
				-in any part of a document.
			
 
				-Mounted fonts may be handled more efficiently,
			
 
				-and they may be referred to by their mount positions,
			
 
				-but there is no other difference.
			
 
				-Mention of an unmounted font loads it temporarily at font position
			
 
				-zero, which serves as a one-font cache.
			
 
				-</P>
			
 
				-<P>
			
 
				-The function
			
 
				-causes the current font to be slanted by
			
 
				-&#177;<I>N</I>
			
 
				-degrees.
			
 
				-Not all devices support slanting.
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Nroff</I> understands font control
			
 
				-and normally underlines italic characters (see &#167;10.5).
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Character size.</I></B> 
			
 
				-Character point sizes available depend on the specific output device;
			
 
				-a typical (historical) set of values is
			
 
				-6, 7, 8, 9, 10, 11, 12, 14, 16, 18, 20, 22, 24, 28, and 36.
			
 
				-This is a range of 1/12 inch to 1/2 inch.
			
 
				-The <TT>ps</TT> request is used to change or restore the point size.
			
 
				-Alternatively the point size may be changed between any two characters
			
 
				-by embedding a
			
 
				-at the desired point
			
 
				-to set the size to <I>N</I>,
			
 
				-or a
			
 
				-</TT>(1<=<I>N</I><=9)
			
 
				-to increment/decrement the size by <I>N</I>;
			
 
				-restores the previous size.
			
 
				-Requested point size values that are between two valid
			
 
				-sizes yield the larger of the two.
			
 
				-<P>
			
 
				-Note that through an accident of history, a construction like
			
 
				-is parsed as size 39, and thus converted to size 36 (given the sizes above),
			
 
				-while
			
 
				-is parsed as size 4 followed by
			
 
				-The forms
			
 
				-<I></I><TT></TT><I>nn</I> and <I></I><TT></TT>&#177;<I></I><TT>(</TT><I>nn</I>
			
 
				-permit specification of sizes that would otherwise be ambiguous.
			
 
				-</P>
			
 
				-<P>
			
 
				-The current size is available in the <TT>.s</TT> register.
			
 
				-<I>Nroff</I> ignores type size requests.
			
 
				-</P>
			
 
				-<P>
			
 
				-The function
			
 
				-sets the height of the current font to
			
 
				-<I>N</I>, or increments it by <I>+N</I>, or decrements it by <I>-N</I>;
			
 
				-if <I>N=</I>0, the height is restored to the current point size.
			
 
				-In each case, the width is unchanged.
			
 
				-Not all devices support independent height and width for characters.
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> *The fields have the same meaning as described earlier in the Request Summary.
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-<br>&#32;<br>
			
 
				-<I>Request</I>	<I>Initial</I>	<I>If No</I>
			
 
				-<br>
			
 
				-<I>Form</I>	<I>Value</I>	<I>Argument</I>	<I>Notes</I>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ps</TT><I> &#177;N</I>*	10point	previous	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Point size
			
 
				-set to &#177;<I>N</I>.
			
 
				-Alternatively, embed
			
 
				-or
			
 
				-</TT>Any positive size value may be requested;
			
 
				-if invalid, the next larger valid size will result, with a
			
 
				-maximum of 36.
			
 
				-A paired sequence
			
 
				-<I>+N</I>, <I>-N</I>
			
 
				-will work because the previous requested value is also remembered.
			
 
				-Ignored in <I>nroff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ss</TT><I> N</I>	12/36em	ignored	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Space-character size
			
 
				-(i.e., inter-word gap)
			
 
				-is set to <I>N</I>/36 ems.
			
 
				-This size is the minimum word spacing in adjusted text.
			
 
				-Ignored in <I>nroff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.cs</TT><I>FNM</I>	off	-	P
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Constant character space
			
 
				-(width) mode is
			
 
				-set on for font <I>F</I> (if mounted); the width of every character will be
			
 
				-taken to be <I>N</I>/36 ems.
			
 
				-If <I>M</I> is absent,
			
 
				-the em is that of the character's point size;
			
 
				-if <I>M</I> is given,
			
 
				-the em is <I>M</I> points.
			
 
				-All affected characters
			
 
				-are centered in this space, including those with an actual width
			
 
				-larger than this space.
			
 
				-Special Font characters occurring while the current font
			
 
				-is <I>F</I> are also so treated.
			
 
				-If <I>N</I> is absent, the mode is turned off.
			
 
				-The mode must be in effect when the characters are physically printed.
			
 
				-Ignored in <I>nroff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.bd</TT><I> F N</I>	off	-	P
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The characters in font <I>F</I> will be artificially
			
 
				-emboldened by printing each one twice, separated by <I>N-</I>1 basic units.
			
 
				-A reasonable value for <I>N</I> is 3 when the character size is near 10 points.
			
 
				-If <I>N</I> is missing the embolden mode is turned off.
			
 
				-The emboldening value <I>N</I> is in the <TT>.b</TT> register.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-This paragraph is printed with <TT>.bd R 3</TT>.
			
 
				-The mode must be in effect when the characters are physically printed.
			
 
				-Ignored in <I>nroff</I>.
			
 
				-<br>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.bd S </TT><I>F N</I>	off	-	P
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The characters in the Special font
			
 
				-will be emboldened whenever the current font is <I>F</I>.
			
 
				-The mode must be in effect when the characters are physically printed.
			
 
				-Ignored in <I>nroff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ft</TT> <I>F</I>	Roman	previous	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Font changed to
			
 
				-<I>F</I>.
			
 
				-Alternatively, embed
			
 
				-The font name <TT>P</TT> is reserved to mean the previous font,
			
 
				-and the name
			
 
				-for the special font.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.fp </TT><I>N F L</I>	R,I,B,...,S	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Font position.
			
 
				-This is a statement
			
 
				-that a font named <I>F</I> is associated with position <I>N</I>.
			
 
				-It is a fatal error if <I>F</I> is not known.
			
 
				-For fonts with names longer than two characters,
			
 
				-<I>L</I>
			
 
				-refers to the long name,
			
 
				-and
			
 
				-<I>F</I>
			
 
				-becomes a synonym.
			
 
				-There is generally a limit of about 10 mounted fonts.
			
 
				-</dl>
			
 
				-<H4>3 Page control
			
 
				-</H4>
			
 
				-<P>
			
 
				-Top and bottom margins are not automatically provided;
			
 
				-it is conventional to define two <I>macros</I> and to set <I>traps</I>
			
 
				-for them at vertical positions 0 (top) and <I>-N</I> (distance <I>N</I> up from the bottom).
			
 
				-See &#167;7 and Tutorial Examples &#167;T2.
			
 
				-A pseudo-page transition onto the first page occurs
			
 
				-either when the first <I>break</I> occurs or
			
 
				-when the first <I>non-diverted</I> text processing occurs.
			
 
				-Arrangements
			
 
				-for a trap to occur at the top of the first page
			
 
				-must be completed before this transition.
			
 
				-In the following, references to the <I>current diversion</I> (&#167;7.4)
			
 
				-mean that the mechanism being described works during both
			
 
				-ordinary and diverted output (the former considered as the top diversion level).
			
 
				-</P>
			
 
				-<P>
			
 
				-The limitations on <I>troff</I> and <I>nroff</I> output dimensions
			
 
				-are device dependent.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.pl</TT><I> &#177;N</I>	11in	11in	<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Page length set to &#177;<I>N</I>.
			
 
				-The current page length is available in the <TT>.p</TT> register.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.bp</TT><I> &#177;N</I>	<I>N=</I>1	-	B,<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Begin page.
			
 
				-The current page is ejected and a new page is begun.
			
 
				-If &#177;<I>N</I> is given, the new page number will be &#177;<I>N</I>.
			
 
				-Also see request <TT>ns</TT>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.pn</TT><I> &#177;N</I>	<I>N</I>=1	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Page number.
			
 
				-The next page (when it occurs) will have the page number &#177;<I>N</I>.
			
 
				-A <TT>pn</TT> must occur before the initial pseudo-page transition
			
 
				-to affect the page number of the first page.
			
 
				-The current page number is in the <TT>%</TT> register.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.po</TT><I> &#177;N</I>	1in; 0	previous	<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Page offset.
			
 
				-The current <I>left margin</I> is set to &#177;<I>N</I>.
			
 
				-The <I>troff</I> initial value provides 1 inch of paper margin
			
 
				-on a typical device.
			
 
				-The current page offset is available in the <TT>.o</TT> register.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ne</TT><I> N</I>	-	<I>N=</I>1<I>V</I>	D,<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Need <I>N</I> vertical space.
			
 
				-If the distance <I>D</I> to the next trap position (see &#167;7.5) is less than <I>N</I>,
			
 
				-a forward vertical space of size <I>D</I> occurs,
			
 
				-which will spring the trap.
			
 
				-If there are no remaining
			
 
				-traps on the page,
			
 
				-<I>D</I> is the distance to the bottom of the page.
			
 
				-If <I>D&#60;V</I>, another line could still be output
			
 
				-and spring the trap.
			
 
				-In a diversion, <I>D</I> is the distance to the <I>diversion trap</I>, if any,
			
 
				-or is very large.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.mk</TT><I> R</I>	none	internal	D
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Mark the current vertical place
			
 
				-in an internal register (both associated with the current diversion level),
			
 
				-or in register <I>R</I>, if given.
			
 
				-See <TT>rt</TT> request.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.rt</TT><I> &#177;N</I>	none	internal	D,<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Return <I>upward only</I> to a marked vertical place
			
 
				-in the current diversion.
			
 
				-If &#177;<I>N</I> (with respect to current place) is given,
			
 
				-the place is &#177;<I>N</I> from the top of the page or diversion
			
 
				-or, if <I>N</I> is absent, to a
			
 
				-place marked by a previous <TT>mk</TT>.
			
 
				-The <TT>sp</TT> request (&#167;5.3) may be used
			
 
				-instead of <TT>rt</TT>
			
 
				-by spacing to the absolute place stored in a explicit register,
			
 
				-e.g., using
			
 
				-<I>R</I> ...
			
 
				-this also works when the motion is downwards.
			
 
				-</dl>
			
 
				-<H4>4 Text Filling, Adjusting, and Centering
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Filling and adjusting.</I></B> 
			
 
				-Normally,
			
 
				-words are collected from input text lines
			
 
				-and assembled into a output text line
			
 
				-until some word does not fit.
			
 
				-An attempt is then made
			
 
				-to hyphenate the word to put part
			
 
				-of it into the output line.
			
 
				-The spaces between the words on the output line
			
 
				-are then increased to spread out the line
			
 
				-to the current <I>line length</I>
			
 
				-minus any current <I>indent</I>.
			
 
				-A <I>word</I> is any string of characters delimited by
			
 
				-the <I>space</I> character or the beginning/end of the input line.
			
 
				-Any adjacent pair of words that must be kept together
			
 
				-(neither split across output lines nor spread apart
			
 
				-in the adjustment process)
			
 
				-can be tied together by separating them with the
			
 
				-<I>unpaddable space</I> character
			
 
				-``<TT>\ </TT>'' (backslash-space).
			
 
				-The adjusted word spacings are uniform in <I>troff</I>
			
 
				-and the minimum interword spacing can be controlled
			
 
				-with the <TT>ss</TT> request (&#167;2).
			
 
				-In <I>nroff</I>, they are normally nonuniform because of
			
 
				-quantization to character-size spaces;
			
 
				-however,
			
 
				-the command line option <TT>-e</TT> causes uniform
			
 
				-spacing with full output device resolution.
			
 
				-Filling, adjustment, and hyphenation (&#167;13) can all be
			
 
				-prevented or controlled.
			
 
				-The text length on the last line output is available in the <TT>.n</TT> register,
			
 
				-and text baseline position on the page for this line is in the <TT>nl</TT> register.
			
 
				-The text baseline high-water mark (lowest place) on the current page is in
			
 
				-the <TT>.h</TT> register.
			
 
				-The current horizontal output position is in the <TT>.k</TT> register.
			
 
				-<P>
			
 
				-An input text line
			
 
				-<I>ending</I>
			
 
				-with <TT>.</TT>, <TT>?</TT>, or <TT>!</TT>,
			
 
				-optionally followed by any number of
			
 
				-or
			
 
				-&#191;,
			
 
				-is taken
			
 
				-to be the end of a sentence, and an additional space character is
			
 
				-automatically provided during filling.
			
 
				-To prevent this, add
			
 
				-to the end of the input line.
			
 
				-Multiple inter-word space characters found in the input are retained,
			
 
				-except for trailing spaces;
			
 
				-initial spaces also cause a break.
			
 
				-</P>
			
 
				-<P>
			
 
				-When filling is in effect, a <TT>\p</TT> may be embedded or attached to a word to
			
 
				-cause a break at the end of the word and have the resulting output
			
 
				-line spread out to fill the current line length.
			
 
				-</P>
			
 
				-<P>
			
 
				-A text input line that happens to begin
			
 
				-with a control character can
			
 
				-be made not to look like a control line
			
 
				-by prefixing it with
			
 
				-the non-printing, zero-width filler character <TT>\&amp;</TT>.
			
 
				-Still another way is to specify output translation of some
			
 
				-convenient character into the control character
			
 
				-using <TT>tr</TT> (&#167;10.5).
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Interrupted text.</I></B> 
			
 
				-The copying of a input line in <I>nofill</I>(non-fill) mode can be interrupted
			
 
				-by terminating
			
 
				-the partial line with a <TT>\c</TT>.
			
 
				-The next encountered input text line will be considered to be a continuation
			
 
				-of the same line of input text.
			
 
				-Similarly,
			
 
				-a word within <I>filled</I> text may be interrupted by terminating the
			
 
				-word (and line) with <TT>\c</TT>;
			
 
				-the next encountered text will be taken as a continuation of the
			
 
				-interrupted word.
			
 
				-If the intervening control lines cause a break,
			
 
				-any partial line will be forced out along with any partial word.
			
 
				-<br>&#32;<br>
			
 
				-<TT>.br</TT>	-	-	B
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Break.
			
 
				-The filling of the line currently
			
 
				-being collected is stopped and
			
 
				-the line is output without adjustment.
			
 
				-Text lines beginning with space characters
			
 
				-(but not tabs)
			
 
				-and empty text lines (blank lines) also cause a break.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.fi</TT>	fill on	-	B,E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Fill subsequent output lines.
			
 
				-The register <TT>.u</TT> is 1 in fill mode and 0 in nofill mode.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.nf</TT>	fill on	-	B,E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Nofill.
			
 
				-Subsequent output lines are neither filled nor adjusted.
			
 
				-Input text lines are copied directly to output lines
			
 
				-without regard for the current line length.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ad</TT><I> c</I>	adj, both	adjust	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Line adjustment is begun.
			
 
				-If fill mode is not on, adjustment will be deferred until
			
 
				-fill mode is back on.
			
 
				-If the type indicator <I>c</I> is present,
			
 
				-the adjustment type is changed as shown in the following table.
			
 
				-<br><img src="-.16258.gif"><br>
			
 
				-The number register
			
 
				-contains the current value of the
			
 
				-setting;
			
 
				-its value can be recorded and used subsequently to set adjustment.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.na</TT>	adjust	-	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Noadjust.
			
 
				-Adjustment is turned off;
			
 
				-the right margin will be ragged.
			
 
				-The adjustment type for <TT>ad</TT> is not changed.
			
 
				-Output line filling still occurs if fill mode is on.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ce</TT><I> N</I>	off	<I>N=</I>1	B,E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Center the next <I>N</I> input text lines
			
 
				-within the current available horizontal space (line-length minus indent).
			
 
				-If <I>N=</I>0, any residual count is cleared.
			
 
				-A break occurs after each of the <I>N</I> input lines.
			
 
				-If the input line is too long,
			
 
				-it will be left adjusted.
			
 
				-</dl>
			
 
				-<H4>5 Vertical Spacing
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Baseline spacing.</I></B> 
			
 
				-The vertical spacing (<I>V</I>) between the baselines of successive
			
 
				-output lines can be set
			
 
				-using the <TT>vs</TT> request.
			
 
				-<I>V</I> should be large enough to accommodate the character sizes
			
 
				-on the affected output lines.
			
 
				-For the common type sizes (9-12 points),
			
 
				-usual typesetting practice is to set <I>V</I> to 2 points greater than the
			
 
				-point size;
			
 
				-<I>troff</I> default is 10-point type on a 12-point spacing
			
 
				-(as in this document).
			
 
				-The current <I>V</I> is available in the <TT>.v</TT> register.
			
 
				-Multiple-<I>V</I> line separation (e.g., double spacing) may be requested
			
 
				-with <TT>ls</TT>,
			
 
				-but it is better to use a large
			
 
				-instead;
			
 
				-certain preprocessors assume single spacing.
			
 
				-The current line spacing is available in the <TT>.L</TT> register.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Extra line-space.</I></B> 
			
 
				-If a word contains a tall construct requiring
			
 
				-the output line containing it to have extra vertical space
			
 
				-before and/or after it,
			
 
				-the <I>extra-line-space</I> function <TT>\x'</TT><I>N</I><TT>'</TT>
			
 
				-can be embedded in or attached to that word.
			
 
				-If <I>N</I> is negative,
			
 
				-the output line containing the word will
			
 
				-be preceded by <I>N</I> extra vertical space;
			
 
				-if <I>N</I> is positive,
			
 
				-the output line containing the word
			
 
				-will be followed by <I>N</I> extra vertical space.
			
 
				-If successive requests for extra space apply to the same line,
			
 
				-the maximum values are used.
			
 
				-The most recently utilized post-line extra line-space is available in the <TT>.a</TT> register.
			
 
				-<P>
			
 
				-In
			
 
				-and other functions having a pair of delimiters around
			
 
				-their parameter,
			
 
				-the delimiter choice (here 
			
 
				-is arbitrary,
			
 
				-except that it can not look like the continuation of a number expression for <I>N</I>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Blocks of vertical space.</I></B> 
			
 
				-A block of vertical space is ordinarily requested using <TT>sp</TT>,
			
 
				-which honors the <I>no-space</I> mode and which does
			
 
				-not space past a trap.
			
 
				-A contiguous block of vertical space may be reserved using <TT>sv</TT>.
			
 
				-<br>&#32;<br>
			
 
				-<TT>.vs </TT><I>N</I>	12pts; 1/6in	previous	E,<B>p</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Set vertical baseline spacing size <I>V</I>.
			
 
				-Transient extra vertical space is available with <TT>\x</TT><I>'N'</I> (see above).
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ls </TT><I>N</I>	<I>N=</I>1	previous	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-<I>Line</I> spacing
			
 
				-set to &#177;<I>N</I>.
			
 
				-<I>N-</I>1 <I>V</I>s (blank lines) are
			
 
				-appended to each output text line.
			
 
				-Appended blank lines are omitted, if the text or previous appended blank line reached a trap position.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.sp </TT><I>N</I>	-	<I>N=</I>1 <I>V</I>	B,<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Space vertically in either direction.
			
 
				-If <I>N</I> is negative, the motion is backward (upward)
			
 
				-and is limited to the distance to the top of the page.
			
 
				-Forward (downward) motion is truncated to the distance to the
			
 
				-nearest trap.
			
 
				-(Recall the use of
			
 
				-from &#167;1.3.)
			
 
				-If the no-space mode is on,
			
 
				-no spacing occurs (see <TT>ns</TT> and <TT>rs</TT> below).
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.sv</TT><I> N</I>	-	<I>N=</I>1 <I>V</I>	<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Save a contiguous vertical block of size <I>N</I>.
			
 
				-If the distance to the next trap is greater
			
 
				-than <I>N</I>, <I>N</I> vertical space is output.
			
 
				-No-space mode has no effect.
			
 
				-If this distance is less than <I>N</I>,
			
 
				-no vertical space is immediately output,
			
 
				-but <I>N</I> is remembered for later output (see <TT>os</TT>).
			
 
				-Subsequent <TT>sv</TT> requests will overwrite any still remembered <I>N</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.os</TT>	-	-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Output saved vertical space.
			
 
				-No-space mode has no effect.
			
 
				-Used to finally output a block of vertical space requested
			
 
				-by an earlier <TT>sv</TT> request.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ns</TT>	space	-	D
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-No-space mode turned on.
			
 
				-When on, no-space mode inhibits <TT>sp</TT> requests and
			
 
				-<TT>bp</TT> requests <I>without</I> a next page number.
			
 
				-No-space mode is turned off when a line of
			
 
				-output occurs, or with <TT>rs</TT>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.rs</TT>	space	-	D
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Restore spacing.
			
 
				-The no-space mode is turned off.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-Blank text line.		-	B
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Causes a break and
			
 
				-output of a blank line exactly like <TT>sp 1</TT>.
			
 
				-</dl>
			
 
				-<H4>6 Line Length and Indenting
			
 
				-</H4>
			
 
				-<P>
			
 
				-The maximum line length for fill mode may be set with <TT>ll</TT>.
			
 
				-The indent may be set with <TT>in</TT>;
			
 
				-an indent applicable to only the next output line may be set with <TT>ti</TT>.
			
 
				-The line length includes indent space but not
			
 
				-page offset space.
			
 
				-The line length minus the indent is the basis for centering with <TT>ce</TT>.
			
 
				-The effect of <TT>ll</TT>, <TT>in</TT>, or <TT>ti</TT>
			
 
				-is delayed, if a partially collected line exists,
			
 
				-until after that line is output.
			
 
				-In fill mode the length of text on an output line is less than or equal to
			
 
				-the line length minus the indent.
			
 
				-The current line length and indent are available in registers <TT>.l</TT> and <TT>.i</TT> respectively.
			
 
				-The length of <I>three-part titles</I> produced by <TT>tl</TT>
			
 
				-(see &#167;14) is independently set by <TT>lt</TT>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ll</TT><I> &#177;N</I>	6.5in	previous	E,<B>m</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Line length is set to &#177;<I>N</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.in</TT><I> &#177;N</I>	<I>N=</I>0	previous	B,E,<B>m</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Indent is set to &#177;<I>N</I>.
			
 
				-The indent is prefixed to each output line.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ti</TT><I> &#177;N</I>	-	ignored	B,E,<B>m</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Temporary indent.
			
 
				-The next output text line will be indented a distance &#177;<I>N</I>
			
 
				-with respect to the current indent.
			
 
				-The resulting total indent may not be negative.
			
 
				-The current indent is not changed.
			
 
				-</dl>
			
 
				-<H4>7 Macros, Strings, Diversion, and Position Traps
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Macros and strings.</I></B> 
			
 
				-A <I>macro</I> is a named set of arbitrary <I>lines</I> that may be invoked by name or
			
 
				-with a <I>trap</I>.
			
 
				-A <I>string</I> is a named string of <I>characters</I>,
			
 
				-not including a newline character,
			
 
				-that may be interpolated by name at any point.
			
 
				-Request, macro, and string names share the same name list.
			
 
				-Macro and string names
			
 
				-may be one or two characters long and may usurp previously defined
			
 
				-request, macro, or string names;
			
 
				-this implies that built-in operations may be (irrevocably) redefined.
			
 
				-Any of these entities may be renamed with <TT>rn</TT>
			
 
				-or removed with <TT>rm</TT>.
			
 
				-<P>
			
 
				-Macros are created by <TT>de</TT> and <TT>di</TT>, and appended to by <TT>am</TT> and <TT>da</TT>;
			
 
				-<TT>di</TT> and <TT>da</TT> cause normal output to be stored in a macro.
			
 
				-A macro is invoked in the same way as a request;
			
 
				-a control line beginning <TT>.</TT><I>xx</I> will interpolate the contents of macro <I>xx</I>.
			
 
				-The remainder of the line may contain up to nine <I>arguments</I>.
			
 
				-</P>
			
 
				-<P>
			
 
				-Strings are created by <TT>ds</TT> and appended to by <TT>as</TT>.
			
 
				-The strings <I>x</I> and <I>xx</I> are interpolated at any desired point with
			
 
				-<TT>\*</TT><I>x</I> and <TT>\*(</TT><I>xx</I> respectively.
			
 
				-String references and macro invocations may be nested.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Copy mode input interpretation.</I></B> 
			
 
				-During the definition and extension
			
 
				-of strings and macros (not by diversion)
			
 
				-the input is read in <I>copy mode</I>.
			
 
				-In copy mode, input is copied without interpretation
			
 
				-except that:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-^* The contents of number registers indicated by <TT>\n</TT> are interpolated.
			
 
				-^* Strings indicated by <TT>\*</TT> are interpolated.
			
 
				-^* Arguments indicated by <TT>\$</TT> are interpolated.
			
 
				-^* Concealed newlines indicated by <TT>\</TT><I>newline</I> are eliminated.
			
 
				-^* Comments indicated by <TT>\"</TT> are eliminated.
			
 
				-^* <TT>\t</TT> and <TT>\a</TT> are interpreted as ASCII horizontal tab and SOH respectively (&#167;9).
			
 
				-^* <TT>\\</TT> is interpreted as <TT>\</TT>.
			
 
				-^* <TT>\.</TT> is interpreted as ``<TT>.</TT>''.
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-These interpretations can be suppressed by
			
 
				-prefixing
			
 
				-a <TT>\</TT>.
			
 
				-For example, since <TT>\\</TT> maps into a <TT>\</TT>, <TT>\\n</TT> will copy as <TT>\n</TT>, which
			
 
				-will be interpreted as a number register indicator when the
			
 
				-macro or string is reread.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Arguments.</I></B> 
			
 
				-When a macro is invoked by name, the remainder of the line is
			
 
				-taken to contain up to nine arguments.
			
 
				-The argument separator is the space character (not tab), and arguments
			
 
				-may be surrounded by double quotes to permit embedded space characters.
			
 
				-Pairs of double quotes may be embedded in double-quoted arguments to
			
 
				-represent a single double-quote character.
			
 
				-The argument
			
 
				-is explicitly null.
			
 
				-If the desired arguments won't fit on a line,
			
 
				-a concealed newline may be used to continue on the next line.
			
 
				-A trailing double quote may be omitted.
			
 
				-<P>
			
 
				-When a macro is invoked the <I>input level</I> is <I>pushed down</I> and
			
 
				-any arguments available at the previous level become unavailable
			
 
				-until the macro is completely read and the previous level is restored.
			
 
				-A macro's own arguments can be interpolated at any point
			
 
				-within the macro with
			
 
				-which interpolates the <I>N</I>th
			
 
				-argument
			
 
				-(1<=<I>N</I><=9).
			
 
				-If an invoked argument does not exist,
			
 
				-a null string results.
			
 
				-For example, the macro <I>xx</I> may be defined by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de xx	\" begin definition
			
 
				-Today is \\$1 the \\$2.
			
 
				-&amp;.	\" end definition
			
 
				-</PRE></TT></DL>
			
 
				-and called by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;xx Monday 14th
			
 
				-</PRE></TT></DL>
			
 
				-to produce the text
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-Today is Monday the 14th.
			
 
				-</PRE></TT></DL>
			
 
				-Note that each <TT>\$</TT>
			
 
				-was concealed in the definition with a prefixed <TT>\</TT>.
			
 
				-The number of
			
 
				-arguments is in the <TT>.$</TT> register.
			
 
				-</P>
			
 
				-<P>
			
 
				-No arguments are available at the top (non-macro) level,
			
 
				-within a string, or within a trap-invoked macro.
			
 
				-</P>
			
 
				-<P>
			
 
				-Arguments are copied in copy mode onto a stack
			
 
				-where they are available for reference.
			
 
				-It is advisable to
			
 
				-conceal string references (with an extra <TT>\</TT>)
			
 
				-to delay interpolation until argument reference time.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Diversions.</I></B> 
			
 
				-Processed output may be diverted into a macro for purposes
			
 
				-such as footnote processing (see Tutorial &#167;T5)
			
 
				-or determining the horizontal and vertical size of some text for
			
 
				-conditional changing of pages or columns.
			
 
				-A single diversion trap may be set at a specified vertical position.
			
 
				-The number registers <TT>dn</TT> and <TT>dl</TT> respectively contain the
			
 
				-vertical and horizontal size of the most
			
 
				-recently ended diversion.
			
 
				-Processed text that is diverted into a macro
			
 
				-retains the vertical size of each of its lines when reread
			
 
				-in <I>nofill</I> mode
			
 
				-regardless of the current <I>V</I>.
			
 
				-Constant-spaced (<TT>cs</TT>) or emboldened (<TT>bd</TT>) text that is diverted
			
 
				-can be reread correctly only if these modes are again or still in effect
			
 
				-at reread time.
			
 
				-One way to do this is to embed in the diversion the appropriate
			
 
				-<TT>cs</TT> or <TT>bd</TT> requests with the <I>transparent</I>
			
 
				-mechanism described in &#167;10.6.
			
 
				-<P>
			
 
				-Diversions may be nested
			
 
				-and certain parameters and registers
			
 
				-are associated
			
 
				-with the current diversion level
			
 
				-(the top non-diversion level may be thought of as the
			
 
				-0th diversion level).
			
 
				-These are the diversion trap and associated macro,
			
 
				-no-space mode,
			
 
				-the internally-saved marked place (see <TT>mk</TT> and <TT>rt</TT>),
			
 
				-the current vertical place (<TT>.d</TT> register),
			
 
				-the current high-water text baseline (<TT>.h</TT> register),
			
 
				-and the current diversion name (<TT>.z</TT> register).
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Traps.</I></B> 
			
 
				-Three types of trap mechanisms are available&#173;page traps, a diversion trap, and
			
 
				-an input-line-count trap.
			
 
				-Macro-invocation traps may be planted using <TT>wh</TT> at any page position including the top.
			
 
				-This trap position may be changed using <TT>ch</TT>.
			
 
				-Trap positions at or below the bottom of the page
			
 
				-have no effect unless or until
			
 
				-moved to within the page or rendered effective by an increase in page length.
			
 
				-Two traps may be planted at the same position only by first planting them at different
			
 
				-positions and then moving one of the traps;
			
 
				-the first planted trap will conceal the second unless and until the first one is moved
			
 
				-(see Tutorial Examples).
			
 
				-If the first one is moved back, it again conceals the second trap.
			
 
				-The macro associated with a page trap is automatically
			
 
				-invoked when a line of text is output whose vertical size reaches
			
 
				-or sweeps past the trap position.
			
 
				-Reaching the bottom of a page springs the top-of-page trap, if any,
			
 
				-provided there is a next page.
			
 
				-The distance to the next trap position is available in the <TT>.t</TT> register;
			
 
				-if there are no traps between the current position and the bottom of the page,
			
 
				-the distance returned is the distance to the page bottom.
			
 
				-<P>
			
 
				-A macro-invocation trap effective in the current diversion may be planted using <TT>dt</TT>.
			
 
				-The <TT>.t</TT> register works in a diversion; if there is no subsequent trap a large
			
 
				-distance is returned.
			
 
				-For a description of input-line-count traps, see <TT>it</TT> below.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&de</TT><I> xx yy</I>	-	<I>.yy=</I><TT>..</TT>	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Define or redefine the macro <I>xx</I>.
			
 
				-The contents of the macro begin on the next input line.
			
 
				-Input lines are copied in <I>copy mode</I> until the definition is terminated by a
			
 
				-line beginning with <TT>.</TT><I>yy</I>,
			
 
				-whereupon the macro <I>yy</I> is called.
			
 
				-In the absence of <I>yy</I>, the definition
			
 
				-is terminated by a
			
 
				-line beginning with ``<TT>..</TT>''.
			
 
				-A macro may contain <TT>de</TT> requests
			
 
				-provided the terminating macros differ
			
 
				-or the contained definition terminator is concealed.
			
 
				-``<TT>..</TT>'' can be concealed as
			
 
				-<TT>\\..</TT> which will copy as <TT>\..</TT> and be reread as ``<TT>..</TT>''.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&am</TT><I> xx yy</I>	-	<I>.yy=</I><TT>..</TT>	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Append to macro
			
 
				-<I>xx</I>
			
 
				-(append version of <TT>de</TT>).
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ds</TT><I> xx string</I>	-	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Define a string
			
 
				-<I>xx</I> containing <I>string</I>.
			
 
				-Any initial double quote in <I>string</I> is stripped off to permit
			
 
				-initial blanks.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&as</TT><I> xx string</I>	-	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Append
			
 
				-<I>string</I> to string <I>xx</I>
			
 
				-(append version of <TT>ds</TT>).
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&rm</TT><I> xx</I>	-	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Remove
			
 
				-request, macro, or string.
			
 
				-The name <I>xx</I> is removed from the name list and
			
 
				-any related storage space is freed.
			
 
				-Subsequent references will have no effect.
			
 
				-If many macros and strings are being created dynamically, it
			
 
				-may become necessary to remove unused ones
			
 
				-to recapture internal storage space for newer registers.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&rn</TT><I> xx yy</I>	-	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Rename request, macro, or string
			
 
				-<I>xx</I> to <I>yy</I>.
			
 
				-If <I>yy</I> exists, it is first removed.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&di</TT><I> xx</I>	-	end	D
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Divert output to macro <I>xx</I>.
			
 
				-Normal text processing occurs during diversion
			
 
				-except that page offsetting is not done.
			
 
				-The diversion ends when the request <TT>di</TT> or <TT>da</TT> is encountered without an argument;
			
 
				-extraneous
			
 
				-requests of this type should not appear when nested diversions are being used.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&da </TT><I>xx</I>	-	end	D
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Divert, appending to macro <I>xx</I>
			
 
				-(append version of <TT>di</TT>).
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&wh</TT><I> N xx</I>	-	-	<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Install
			
 
				-a trap to invoke <I>xx</I> at page position <I>N</I>;
			
 
				-a negative N will be interpreted as a distance from the
			
 
				-page bottom.
			
 
				-Any macro previously planted at <I>N</I> is replaced by <I>xx</I>.
			
 
				-A zero <I>N</I> refers to the top of a page.
			
 
				-In the absence of <I>xx</I>, the first trap found at <I>N</I>, if any, is removed.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ch</TT><I> xx N</I>	-	-	<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Change
			
 
				-the trap position for macro <I>xx</I> to be <I>N</I>.
			
 
				-In the absence of <I>N</I>, the trap, if any, is removed.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&dt</TT><I> N xx</I>	-	off	D,<B>v</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Install a diversion trap
			
 
				-at position <I>N</I> in the <I>current</I> diversion to invoke
			
 
				-macro <I>xx</I>.
			
 
				-Another <TT>dt</TT> will redefine the diversion trap.
			
 
				-If no arguments are given, the diversion trap is removed.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&it</TT><I> N xx</I>	-	off	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Set an input-line-count trap
			
 
				-to invoke the macro <I>xx</I> after <I>N</I> lines of <I>text</I> input
			
 
				-have been read
			
 
				-(control or request lines do not count).
			
 
				-The text may be inline text or
			
 
				-text interpolated by inline or trap-invoked macros.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&em</TT><I> xx</I>	none	none	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The
			
 
				-macro <I>xx</I> will be invoked
			
 
				-when all input has ended.
			
 
				-The effect is almost as if the contents of <I>xx</I> had been at the end
			
 
				-of the last file processed,
			
 
				-but all processing ceases at the next page eject.
			
 
				-</dl>
			
 
				-<H4>8 Number Registers
			
 
				-</H4>
			
 
				-<P>
			
 
				-A variety of parameters are available to the user as
			
 
				-predefined <I>number registers</I> (see Summary, page 0u+7u).
			
 
				-In addition, users may define their own registers.
			
 
				-Register names are one or two characters long and do not conflict
			
 
				-with request, macro, or string names.
			
 
				-Except for certain predefined read-only registers,
			
 
				-a number register can be read, written, automatically
			
 
				-incremented or decremented, and interpolated
			
 
				-into the input in a variety of formats.
			
 
				-One common use of user-defined registers is to
			
 
				-automatically number sections, paragraphs, lines, etc.
			
 
				-A number register may be used any time numerical input is expected or desired
			
 
				-and may be used in numerical <I>expressions</I> (&#167;1.4).
			
 
				-</P>
			
 
				-<P>
			
 
				-Number registers are created and modified using <TT>nr</TT>, which
			
 
				-specifies the name, numerical value, and the auto-increment size.
			
 
				-Registers are also modified, if accessed
			
 
				-with an auto-incrementing sequence.
			
 
				-If the registers <I>x</I> and <I>xx</I> both contain
			
 
				-<I>N</I> and have the auto-increment size <I>M</I>,
			
 
				-the following access sequences have the effect shown:
			
 
				-<br><img src="-.16259.gif"><br>
			
 
				-When interpolated, a number register is converted to
			
 
				-decimal (default),
			
 
				-decimal with leading zeros,
			
 
				-lower-case Roman,
			
 
				-upper-case Roman,
			
 
				-lower-case sequential alphabetic,
			
 
				-or
			
 
				-upper-case sequential alphabetic
			
 
				-according to the format specified by <TT>af</TT>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&nr</TT><I> R &#177;N M</I>		-	<B>u</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The number register
			
 
				-<I>R</I> is assigned the value &#177;<I>N</I>
			
 
				-with respect to the previous value, if any.
			
 
				-The increment for auto-incrementing is set to <I>M</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&af</TT><I> R c</I>	arabic	-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Assign
			
 
				-format <I>c</I> to register <I>R</I>.
			
 
				-The available formats are:
			
 
				-<br><img src="-.162510.gif"><br>
			
 
				-An arabic format having <I>N</I> digits
			
 
				-specifies a field width of <I>N</I> digits (example 2 above).
			
 
				-The read-only registers and the width function
			
 
				-(&#167;11.2)
			
 
				-are always arabic.
			
 
				-Warning: the value of a number register in a non-Arabic format
			
 
				-is not numeric, and will not produce the expected results in expressions.
			
 
				-<DT><DT>&#32;<DD>
			
 
				-The function
			
 
				-or
			
 
				-returns the format of a number register in a form suitable for
			
 
				-it returns nothing if the register has not been used.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&rr</TT><I> R</I>	-	ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Remove number register <I>R</I>.
			
 
				-If many registers are being created dynamically, it
			
 
				-may become necessary to remove unused registers
			
 
				-to recapture internal storage space for newer registers.
			
 
				-The register
			
 
				-contains the number of number registers still available.
			
 
				-</dl>
			
 
				-<H4>9 Tabs, Leaders, and Fields
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Tabs and leaders.</I></B> 
			
 
				-The ASCII horizontal tab character and the ASCII
			
 
				-SOH (control-A, hereafter called the <I>leader</I> character)
			
 
				-can both be used to generate either horizontal motion or
			
 
				-a string of repeated characters.
			
 
				-The length of the generated entity is governed
			
 
				-by internal <I>tab stops</I> specifiable
			
 
				-with <TT>ta</TT>.
			
 
				-The default difference is that tabs generate motion and leaders generate
			
 
				-a string of periods;
			
 
				-<TT>tc</TT> and <TT>lc</TT>
			
 
				-offer the choice of repeated character or motion.
			
 
				-There are three types of internal tab stops&#173;
			
 
				-<I>left</I> adjusting, <I>right</I> adjusting,
			
 
				-and <I>centering</I>.
			
 
				-In the following table,
			
 
				-<I>D</I> is the distance from the current position on the <I>input</I> line
			
 
				-(where a tab or leader was found)
			
 
				-to the next tab stop,
			
 
				-<I>next-string</I> consists
			
 
				-of the input characters following the tab (or leader) up to the next tab (or leader) or end of line,
			
 
				-and
			
 
				-<I>W</I> is the width of <I>next-string</I>.
			
 
				-<br><img src="-.162511.gif"><br>
			
 
				-The length of generated motion is allowed to be negative, but
			
 
				-that of a repeated character string cannot be.
			
 
				-Repeated character strings contain an integer number of characters, and
			
 
				-any residual distance is prepended as motion.
			
 
				-Tabs or leaders found after the last tab stop are ignored, but may be used
			
 
				-as <I>next-string</I> terminators.
			
 
				-<P>
			
 
				-Tabs and leaders are not interpreted in copy mode.
			
 
				-<TT>\t</TT> and <TT>\a</TT> always generate a non-interpreted
			
 
				-tab and leader respectively, and
			
 
				-are equivalent to actual tabs and leaders in copy mode.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Fields.</I></B> 
			
 
				-A <I>field</I> is contained between
			
 
				-a pair of <I>field delimiter</I> characters,
			
 
				-and consists of substrings
			
 
				-separated by <I>padding</I> indicator characters.
			
 
				-The field length is the distance on the
			
 
				-<I>input</I> line from the position where the field begins to the next tab stop.
			
 
				-The difference between the total length of all the substrings
			
 
				-and the field length is incorporated as horizontal
			
 
				-padding space that is divided among the indicated
			
 
				-padding places.
			
 
				-The incorporated padding is allowed to be negative.
			
 
				-For example,
			
 
				-if the field delimiter is <TT>#</TT> and the padding indicator is <TT>^</TT>,
			
 
				-<TT>#^</TT><I>xxx</I><TT>^</TT><I>right</I><TT>#</TT>
			
 
				-specifies a right-adjusted string with the string <I>xxx</I> centered
			
 
				-in the remaining space.
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ta</TT><I> Nt ...</I>	0.8; 0.5in	none	E,<B>m</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Set tab stops and types.
			
 
				-<I>t=</I><TT>R</TT>, right adjusting;
			
 
				-<I>t=</I><TT>C</TT>, centering;
			
 
				-<I>t</I> absent, left adjusting.
			
 
				-<I>Troff</I> tab stops are preset every 0.5in.,
			
 
				-<I>nroff</I> every 0.8in.
			
 
				-The stop values are separated by spaces, and
			
 
				-a value preceded by <TT>+</TT>
			
 
				-is treated as an increment to the previous stop value.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&tc</TT><I> c</I>	none	none	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The tab repetition character
			
 
				-becomes <I>c</I>,
			
 
				-or is removed, thus specifying motion.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&lc</TT><I> c</I>	<TT>.</TT>	none	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The leader repetition character
			
 
				-becomes <I>c</I>,
			
 
				-or is removed, thus specifying motion.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&fc</TT><I> a b</I>	off	off	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The field delimiter
			
 
				-is set to <I>a</I>;
			
 
				-the padding indicator is set to the space character or to
			
 
				-<I>b</I>, if given.
			
 
				-In the absence of arguments the field mechanism is turned off.
			
 
				-</dl>
			
 
				-<H4>10 Input and Output Conventions and Character Translations
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Input character translations.</I></B> 
			
 
				-Ways of inputting the valid character set were
			
 
				-discussed in &#167;2.1.
			
 
				-The ASCII control characters horizontal tab (&#167;9.1),
			
 
				-SOH (&#167;9.1), and backspace (&#167;10.3) are discussed elsewhere.
			
 
				-The newline delimits input lines.
			
 
				-In addition,
			
 
				-STX, ETX, ENQ, ACK, and BEL
			
 
				-are accepted,
			
 
				-and may be used as delimiters or translated into a graphic with <TT>tr</TT> (&#167;10.5).
			
 
				-All others are ignored.
			
 
				-<P>
			
 
				-The <I>escape</I> character <TT>\</TT>
			
 
				-introduces <I>escape sequences</I>,
			
 
				-which cause the following character to mean
			
 
				-another character, or to indicate
			
 
				-some function.
			
 
				-A complete list of such sequences is given in the Summary on page 0u+7u.
			
 
				-The escape character <TT>\</TT>
			
 
				-should not be confused with the ASCII control character ESC.
			
 
				-The escape character <TT>\</TT> can be input with the sequence <TT>\\</TT>.
			
 
				-The escape character can be changed with <TT>ec</TT>,
			
 
				-and all that has been said about the default <TT>\</TT> becomes true
			
 
				-for the new escape character.
			
 
				-<TT>\e</TT> can be used to print whatever the current escape character is.
			
 
				-The escape mechanism may be turned off with <TT>eo</TT>,
			
 
				-and restored with <TT>ec</TT>.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ec</TT><I> c</I>	<TT></TT>	<TT></TT>	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Set escape character
			
 
				-to <TT></TT>, or to <I>c</I>, if given.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&eo</TT>	on	-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Turn escape mechanism off.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Ligatures.</I></B> 
			
 
				-The set of available ligatures is device and font dependent,
			
 
				-but is often a subset of
			
 
				-<B>fi</B>, <B>fl</B>, <B>ff</B>, <B>ffi</B>, and <B>ffl</B>.
			
 
				-They may be input by
			
 
				-<TT>\(fi</TT>, <TT>\(fl</TT>, <TT>\(ff</TT>, <TT>\(Fi</TT>, and <TT>\(Fl</TT> respectively.
			
 
				-The ligature mode is normally on in <I>troff</I>, and automatically invokes 
			
 
				-ligatures during input.
			
 
				-<br>&#32;<br>
			
 
				-<TT>&lg</TT><I> N</I>	on; off	on	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Ligature mode
			
 
				-is turned on if <I>N</I> is absent or non-zero,
			
 
				-and turned off if <I>N=</I>0.
			
 
				-If <I>N=</I>2, only the two-character ligatures are automatically invoked.
			
 
				-Ligature mode is inhibited for
			
 
				-request, macro, string, register, or file names,
			
 
				-and in copy mode.
			
 
				-No effect in <I>nroff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Backspacing, underlining, overstriking, etc.</I></B> 
			
 
				-Unless in copy mode, the ASCII backspace character is replaced
			
 
				-by a backward horizontal motion having the width of the
			
 
				-space character.
			
 
				-Underlining as a form of line-drawing is discussed in &#167;12.4.
			
 
				-A generalized overstriking function is described in &#167;12.1.
			
 
				-<P>
			
 
				-<I>Nroff</I> automatically underlines
			
 
				-characters in the <I>underline</I> font,
			
 
				-specifiable with <TT>uf</TT>,
			
 
				-normally that on font position 2.
			
 
				-In addition to <TT>ft</TT> and
			
 
				-the underline font may be selected by <TT>ul</TT> and <TT>cu</TT>.
			
 
				-Underlining is restricted to an output-device-dependent
			
 
				-subset of reasonable characters.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ul</TT><I> N</I>	off	<I>N=</I>1	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Italicize in <I>troff</I>
			
 
				-(underline in <I>nroff</I>) the next <I>N</I>
			
 
				-input text lines.
			
 
				-Actually, switch to underline font, saving the
			
 
				-current font for later restoration;
			
 
				-other font changes within the span of a <TT>ul</TT>
			
 
				-will take effect,
			
 
				-but the restoration will undo the last change.
			
 
				-Output generated by <TT>tl</TT> (&#167;14) is affected by the
			
 
				-font change, but does not decrement <I>N</I>.
			
 
				-If <I>N&#62;</I>1, there is the risk that
			
 
				-a trap interpolated macro may provide text
			
 
				-lines within the span;
			
 
				-environment switching can prevent this.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&cu</TT><I> N</I>	off	<I>N=</I>1	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Continuous underline.
			
 
				-A variant
			
 
				-of <TT>ul</TT> that causes <I>every</I> character to be underlined in <I>nroff</I>.
			
 
				-Identical to <TT>ul</TT> in <I>troff</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&uf</TT><I> F</I>	Italic	Italic	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Underline font set to <I>F</I>.
			
 
				-In <I>nroff</I>,
			
 
				-<I>F</I> may not be on position 1.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Control characters.</I></B> 
			
 
				-Both the control character <TT>.</TT> and the <I>no-break</I>
			
 
				-control character <TT>'</TT> may be changed.
			
 
				-Such a change must be compatible with the design
			
 
				-of any macros used in the span of the change,
			
 
				-and
			
 
				-particularly of any trap-invoked macros.
			
 
				-<br>&#32;<br>
			
 
				-<TT>&cc</TT><I> c</I>	<TT>.</TT>	<TT>.</TT>	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The basic control character
			
 
				-is set to <I>c</I>,
			
 
				-or reset to ``<TT>.</TT>''.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&c2</TT><I> c</I>	<TT>'	'</TT>	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The <I>no-break</I> control character is set
			
 
				-to <I>c</I>, or reset to ``<TT>'</TT>''.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Output translation.</I></B> 
			
 
				-One character can be made a stand-in for another character using <TT>tr</TT>.
			
 
				-All text processing (e.g., character comparisons) takes place
			
 
				-with the input (stand-in) character, which appears to have the width of the final
			
 
				-character.
			
 
				-The graphic translation occurs at the moment of output
			
 
				-(including diversion).
			
 
				-<br>&#32;<br>
			
 
				-<TT>&tr</TT><I> abcd....</I>	none	-	O
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Translate
			
 
				-<I>a</I> into <I>b</I>, <I>c</I> into <I>d</I>, etc.
			
 
				-If an odd number of characters is given,
			
 
				-the last one will be mapped into the space character.
			
 
				-To be consistent, a particular translation
			
 
				-must stay in effect from <I>input</I> to <I>output</I> time.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Transparent throughput.</I></B> 
			
 
				-An input line beginning with a <TT>\!</TT> is read in copy mode and <I>transparently</I> output
			
 
				-(without the initial <TT>\!</TT>);
			
 
				-the text processor is otherwise unaware of the line's presence.
			
 
				-This mechanism may be used to pass control information to a post-processor
			
 
				-or to embed control lines in a macro created by a diversion.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Transparent output</I></B> 
			
 
				-The sequence
			
 
				-copies
			
 
				-<I>anything</I>
			
 
				-to the output, as a device control function of the form
			
 
				-<I>anything</I>
			
 
				-(&#167;22).
			
 
				-Escape sequences in
			
 
				-<I>anything</I>
			
 
				-are processed.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Comments and concealed newlines.</I></B> 
			
 
				-An uncomfortably long input line that must stay
			
 
				-one line (e.g., a string definition, or nofilled text)
			
 
				-can be split into several physical lines by ending all but
			
 
				-the last one with the escape <TT>\</TT>.
			
 
				-The sequence <TT></TT><I>newline</I> is always ignored,
			
 
				-except in a comment.
			
 
				-Comments may be embedded at the end of any line by
			
 
				-prefacing them with <TT>\"</TT>.
			
 
				-The newline at the end of a comment cannot be concealed.
			
 
				-A line beginning with <TT>\"</TT> will appear as a blank line and
			
 
				-behave like
			
 
				-a comment can be on a line by itself by beginning the line with <TT>.\"</TT>.
			
 
				-<H4>11 Local Horizontal and Vertical Motions, and the Width Function
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Local Motions.</I></B> 
			
 
				-The functions <TT>\v'</TT><I>N</I><TT>'</TT> and
			
 
				-<TT>\h'</TT><I>N</I><TT>'</TT>
			
 
				-can be used for <I>local</I> vertical and horizontal motion respectively.
			
 
				-The distance <I>N</I> may be negative; the positive directions
			
 
				-are rightward and downward.
			
 
				-A local motion is one contained within a line.
			
 
				-To avoid unexpected vertical dislocations, it is necessary that
			
 
				-the net vertical local motion within a word in filled text
			
 
				-and otherwise within a line balance to zero.
			
 
				-The escape sequences providing local motion are
			
 
				-summarized in the following table.
			
 
				-<br><img src="-.162512.gif"><br>
			
 
				-As an example,
			
 
				-<TT>E^2</TT>
			
 
				-could be generated by a sequence of size changes and motions:
			
 
				-<TT>E\s-2\v'-0.4m'2\v'0.4m'\s+2</TT>;
			
 
				-note that
			
 
				-the 0.4 em vertical motions are at the smaller size.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Width Function.</I></B> 
			
 
				-The <I>width</I> function <TT>\w'</TT><I>string</I><TT>'</TT>
			
 
				-generates the numerical width of <I>string</I> (in basic units).
			
 
				-Size and font changes may be embedded in <I>string</I>,
			
 
				-and will not affect the current environment.
			
 
				-For example,
			
 
				-<TT>.ti -\w'\fB1. 'u</TT> could be used to
			
 
				-temporarily indent leftward a distance equal to the
			
 
				-size of the string ``<TT>1. </TT>'' in font
			
 
				-<P>
			
 
				-The width function also sets three number registers.
			
 
				-The registers <TT>st</TT> and <TT>sb</TT> are set respectively to the highest and
			
 
				-lowest extent of <I>string</I> relative to the baseline;
			
 
				-then, for example,
			
 
				-the total height of the string is <TT>\n(stu-\n(sbu</TT>.
			
 
				-In <I>troff</I> the number register <TT>ct</TT> is set to a value
			
 
				-between 0 and 3.
			
 
				-The value
			
 
				-0 means that all of the characters in <I>string</I> were short lower
			
 
				-case characters without descenders (like <TT>e</TT>);
			
 
				-1 means that at least one character has a descender (like <TT>y</TT>);
			
 
				-2 means that at least one character is tall (like <TT>H</TT>);
			
 
				-and 3 means that both tall characters and characters with
			
 
				-descenders are present.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Mark horizontal place.</I></B> 
			
 
				-The function <TT>\k</TT><I>x</I> causes the current horizontal
			
 
				-position in the <I>input line</I> to be stored in register <I>x</I>.
			
 
				-For example,
			
 
				-the construction <TT>\kx</TT><I>word</I><TT>\h'|\nxu+3u'</TT><I>word</I><TT></TT>
			
 
				-will embolden <I>word</I> by backing up to almost its beginning and overprinting it,
			
 
				-resulting in <I>word</I>h'|0u+3u'<I>word</I>.
			
 
				-<H4>12 Overstrike, Bracket, Line-drawing, Graphics, and Zero-width Functions
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Overstriking.</I></B> 
			
 
				-Automatically centered overstriking of up to nine characters
			
 
				-is provided by the <I>overstrike</I> function
			
 
				-<TT>\o'</TT><I>string</I><TT>'</TT>.
			
 
				-The characters in <I>string</I> are overprinted with centers aligned; the total width
			
 
				-is that of the widest character.
			
 
				-<I>string</I> may not contain local vertical motion.
			
 
				-As examples,
			
 
				-<TT>\o'e\''</TT> produces o'e'', and
			
 
				-<TT>\o'\(mo\(sl'</TT> produces o'C/'.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Zero-width characters.</I></B> 
			
 
				-The function
			
 
				-will output <I>c</I> without spacing over
			
 
				-it, and can be used to produce left-aligned overstruck
			
 
				-combinations.
			
 
				-As examples,
			
 
				-<TT>\z&#164;+</TT> will produce z&#164;+, and
			
 
				-<TT>\(br\z\(rn\(ul\(br</TT> will produce a small
			
 
				-badly constructed box |z _|.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Large Brackets.</I></B> 
			
 
				-The Special Font usually contains a number of bracket construction pieces
			
 
				-(())|||||||
			
 
				-that can be combined into various bracket styles.
			
 
				-The function <TT>\b'</TT><I>string</I><TT>'</TT> may be used to pile
			
 
				-up vertically the characters in <I>string</I>
			
 
				-(the first character on top and the last at the bottom);
			
 
				-the characters are vertically separated by 1 em and the total
			
 
				-pile is centered 1/2 em above the current baseline
			
 
				-(&#189; line in <I>nroff</I>).
			
 
				-For example,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-\b'\(lc\(lf'E\b'\(rc\(rf'\x'-0.5m'\x'0.5m'
			
 
				-</PRE></TT></DL>
			
 
				-produces
			
 
				-x'-.5m'x'.5m'b'||'Eb'||'.
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Line drawing.</I></B> 
			
 
				-The function <TT><HR></TT> (backslash-ell) draws a string of repeated <I>c</I>'s towards the right for a distance <I>N</I>.
			
 
				-If <I>c</I> looks like a continuation of
			
 
				-an expression for <I>N</I>, it may be insulated from <I>N</I> with <TT></TT>.
			
 
				-If <I>c</I> is not specified, the <TT>_</TT> (baseline rule) is used
			
 
				-(underline character in <I>nroff</I>).
			
 
				-If <I>N</I> is negative, a backward horizontal motion
			
 
				-of size <I>N</I> is made before drawing the string.
			
 
				-Any space resulting from <I>N</I>/(size of <I>c</I>) having a remainder is put at the beginning (left end)
			
 
				-of the string.
			
 
				-If <I>N</I> is less than the width of <I>c</I>,
			
 
				-a single <I>c</I> is centered on a distance <I>N</I>.
			
 
				-In the case of characters
			
 
				-that are designed to be connected, such as
			
 
				-baseline-rule <TT>_</TT>,
			
 
				-under-rule <TT>_</TT>,
			
 
				-and
			
 
				-root-en <TT> </TT>,
			
 
				-the remainder space is covered by overlapping.
			
 
				-As an example, a macro to underscore a string can be written
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de us
			
 
				-\\$1\l'|0\(ul'
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-or one to draw a box around a string
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de bx
			
 
				-\(br\|\\$1\|\(br\l'|0\(rn'\l'|0\(ul'
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-such that
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;ul "underlined words"
			
 
				-</PRE></TT></DL>
			
 
				-and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;bx "words in a box"
			
 
				-</PRE></TT></DL>
			
 
				-yield
			
 
				-underlined words<HR>
			
 
				-and
			
 
				-|words in a box|<HR><HR>
			
 
				-h'-w'.'u'.
			
 
				-<P>
			
 
				-The function <TT>\L'</TT><I>Nc</I><TT>'</TT> draws a vertical line consisting
			
 
				-of the (optional) character <I>c</I> stacked vertically apart 1em
			
 
				-(1 line in <I>nroff</I>),
			
 
				-with the first two characters overlapped,
			
 
				-if necessary, to form a continuous line.
			
 
				-The default character is the <I>box rule</I> | (<TT>\(br</TT>);
			
 
				-the other suitable character is the <I>bold vertical</I> | (<TT>\(bv</TT>).
			
 
				-The line is begun without any initial motion relative to the
			
 
				-current baseline.
			
 
				-A positive <I>N</I> specifies a line drawn downward and
			
 
				-a negative <I>N</I> specifies a line drawn upward.
			
 
				-After the line is drawn no compensating
			
 
				-motions are made;
			
 
				-the instantaneous baseline is at the end of the line.
			
 
				-</P>
			
 
				-<P>
			
 
				-The horizontal and vertical line drawing functions may be used
			
 
				-in combination to produce large boxes.
			
 
				-The zero-width <I>box-rule</I> and the &#189;-em wide <I>under-rule</I>
			
 
				-were designed to form corners when using 1-em vertical
			
 
				-spacings.
			
 
				-For example the macro
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-.de eb
			
 
				-.sp -1	\"compensate for next automatic baseline spacing
			
 
				-.nf	\"avoid possibly overflowing word buffer
			
 
				-\h'-.5n'\L'|\\nau-1'\l'\\n(.lu+1n\(ul'\L'-|\\nau+1'\l'|0u-.5n\(ul'
			
 
				-.fi
			
 
				-..
			
 
				-</PRE></TT></DL>
			
 
				-will draw a box around some text whose beginning vertical place was
			
 
				-saved in number register <I>a</I>
			
 
				-(e.g., using <TT>.mk a</TT>)
			
 
				-as was done for this paragraph.
			
 
				-<br>&#32;<br>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-h'-.5n'L'|0+1u-1'<HR>L'-|0+1u+1'<HR>
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Graphics.</I></B> 
			
 
				-The function
			
 
				-draws a graphic object of type <I>c</I>
			
 
				-according to a sequence of parameters,
			
 
				-which are generally pairs of numbers.
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<TT>D'l </TT><I>dh</I><TT> </TT><I>dv</I><TT>'	</TT>draw line from current position by <I>dh</I>, <I>dv</I><TT>
			
 
				-</TT><TT>D'c </TT><I>d</I><TT>'	</TT>draw circle of diameter <I>d</I> with left side at current position<TT>
			
 
				-</TT><TT>D'e </TT><I>d</I><TT></TT>1<TT></TT><I>d</I><TT></TT>2<TT>'	</TT>draw ellipse of diameters <I>d</I>1 and <I>d</I>2<TT>
			
 
				-</TT><TT>D'a </TT><I>dh</I><TT></TT>1<TT> </TT><I>dv</I><TT></TT>1<TT> </TT><I>dh</I><TT></TT>2<TT> </TT><I>dv</I><TT></TT>2<TT>'</TT><TT>	</TT>draw arc from current position to <I>dh</I>1<I>+dh</I>2, <I>dv</I>1<I>+dv</I>2,<TT>
			
 
				-	   </TT>with center at <I>dh</I>1, <I>dv</I>1 from current position<TT>
			
 
				-</TT><TT>D'~ </TT><I>dh</I><TT></TT>1<TT></TT><I>dv</I><TT></TT>1<TT></TT><I>dh</I><TT></TT>2<TT></TT><I>dv</I><TT></TT>2<TT></TT><I>...</I><TT>'</TT><TT>	</TT>draw B-spline from current position by <I>dh</I>1<I></I>,<I>dv</I>1,<TT>
			
 
				-	   </TT>then by <I>dh</I>2,<I>dv</I>2, then by <I>dh</I>2,<I>dv</I>2, then ...<TT>
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-</TT><br>&#32;<br>
			
 
				-For example,
			
 
				-draws the ellipse
			
 
				-D'e.2i .1i',
			
 
				-and
			
 
				-the line
			
 
				-D'l.2i -.1i'D'l.1i .1i'.
			
 
				-A
			
 
				-with an unknown <I>c</I> is processed and copied through to the output
			
 
				-for unspecified interpretation;
			
 
				-coordinates are interpreted alternately as horizontal and vertical
			
 
				-values.
			
 
				-<P>
			
 
				-Numbers taken as horizontal (first, third, etc.) have default scaling of ems;
			
 
				-vertical numbers (second, fourth, etc.) have default scaling of <I>V</I>s (&#167;1.3).
			
 
				-The position after a graphical object has been drawn is
			
 
				-at its end; for circles and ellipses, the ``end''
			
 
				-is at the right side.
			
 
				-</P>
			
 
				-<H4>13 Hyphenation.
			
 
				-</H4>
			
 
				-<P>
			
 
				-Automatic hyphenation may be switched off and on.
			
 
				-When switched on with <TT>hy</TT>,
			
 
				-several variants may be set.
			
 
				-A <I>hyphenation indicator</I> character may be embedded in a word to
			
 
				-specify desired hyphenation points,
			
 
				-or may be prefixed to suppress hyphenation.
			
 
				-In addition,
			
 
				-the user may specify a small list of exception words.
			
 
				-</P>
			
 
				-<P>
			
 
				-Only words that consist of a central alphabetic string
			
 
				-surrounded by (usually null) non-alphabetic strings
			
 
				-are candidates for automatic hyphenation.
			
 
				-Words that contain hyphens
			
 
				-(minus),
			
 
				-em-dashes (<TT>\(em</TT>),
			
 
				-or hyphenation indicator characters
			
 
				-are always subject to splitting after those characters,
			
 
				-whether automatic hyphenation is on or off.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&nh</TT>	hyphenate	-	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Automatic hyphenation is turned off.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&hy</TT> <I>N</I>	on, <I>N=</I>1	on, <I>N=</I>1	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Automatic hyphenation is turned on
			
 
				-for <I>N</I>>=1, or off for <I>N=</I>0.
			
 
				-If <I>N=</I>2, last lines (ones that will cause a trap)
			
 
				-are not hyphenated.
			
 
				-For <I>N=</I>4 and 8, the last and first two characters
			
 
				-respectively of a word are not split off.
			
 
				-These values are additive;
			
 
				-i.e., <I>N=</I>14 will invoke all three restrictions.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&hc</TT><I> c</I>	<TT>	</TT>	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Hyphenation indicator character is set
			
 
				-to <I>c</I> or to the default <TT></TT>.
			
 
				-The indicator does not appear in the output.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&hw</TT><I> word ...</I>		ignored	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Specify
			
 
				-hyphenation points in words
			
 
				-with embedded minus signs.
			
 
				-Versions of a word with terminal <I>s</I> are implied;
			
 
				-i.e.,
			
 
				-implies
			
 
				-This list is examined initially and after
			
 
				-each suffix stripping.
			
 
				-The space available is small.
			
 
				-</dl>
			
 
				-<H4>14 Three-Part Titles.
			
 
				-</H4>
			
 
				-<P>
			
 
				-The titling function <TT>tl</TT> provides for automatic placement
			
 
				-of three fields at the left, center, and right of a line
			
 
				-with a title length
			
 
				-specifiable with <TT>lt</TT>.
			
 
				-<TT>tl</TT> may be used anywhere, and is independent of the
			
 
				-normal text collecting process.
			
 
				-A common use is in header and footer macros.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&tl '</TT><I>left</I><TT>'</TT><I>center</I><TT>'</TT><I>right</I><TT>'</TT>	-	-	
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The strings
			
 
				-<I>left</I>, <I>center</I>, and <I>right</I> are
			
 
				-respectively left-adjusted, centered, and right-adjusted
			
 
				-in the current title length.
			
 
				-Any of the strings may be empty,
			
 
				-and overlapping is permitted.
			
 
				-If the page-number character (initially <TT>%</TT>) is found within any of the fields it is replaced
			
 
				-by the current page number in the format assigned to register <TT>%</TT>.
			
 
				-Any character may be used in place of
			
 
				-as the string delimiter.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&pc</TT><I> c</I>	<TT>%</TT>	off	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The page number character is set to <I>c</I>,
			
 
				-or removed.
			
 
				-The page number register remains <TT>%</TT>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&lt</TT><I> &#177;N</I>	6.5in	previous	E,<B>m</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Length of title
			
 
				-is set to &#177;<I>N</I>.
			
 
				-The line length and the title length are independent.
			
 
				-Indents do not apply to titles; page offsets do.
			
 
				-</dl>
			
 
				-<H4>15 Output Line Numbering.
			
 
				-</H4>
			
 
				-<P>
			
 
				-Automatic sequence numbering of output lines may be
			
 
				-requested with <TT>nm</TT>.
			
 
				-When in effect,
			
 
				-a three-digit, arabic number plus a digit-space
			
 
				-is prefixed to output text lines.
			
 
				-The text lines are thus offset by four digit-spaces,
			
 
				-and otherwise retain their line length;
			
 
				-a reduction in line length may be desired to keep the right margin
			
 
				-aligned with an earlier margin.
			
 
				-Blank lines, other vertical spaces, and lines generated by <TT>tl</TT>
			
 
				-are not numbered.
			
 
				-Numbering can be temporarily suspended with <TT>nn</TT>,
			
 
				-or with an <TT>.nm</TT> followed by a later <TT>.nm +0</TT>.
			
 
				-In addition,
			
 
				-a line number indent <I>I</I>, and the number-text separation <I>S</I>
			
 
				-may be specified in digit-spaces.
			
 
				-Further, it can be specified that only those line numbers that are
			
 
				-multiples of some number <I>M</I> are to be printed (the others will appear
			
 
				-as blank number fields).
			
 
				-<br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&nm</TT><I> &#177;N M S I</I>		off	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Line number mode.
			
 
				-If &#177;<I>N</I> is given,
			
 
				-line numbering is turned on,
			
 
				-and the next output line numbered is numbered &#177;<I>N</I>.
			
 
				-Default values are <I>M=</I>1, <I>S=</I>1, and <I>I=</I>0.
			
 
				-Parameters corresponding to missing arguments are unaffected;
			
 
				-a non-numeric argument is considered missing.
			
 
				-In the absence of all arguments, numbering is turned off;
			
 
				-the next line number is preserved for possible further use
			
 
				-in number register <TT>ln</TT>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&nn</TT><I> N</I>	-	<I>N=</I>1	E
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-The next <I>N</I> text output lines are not
			
 
				-numbered.
			
 
				-</dl>
			
 
				-<P>
			
 
				-As an example, the paragraph portions of this section
			
 
				-are numbered with <I>M=</I>3:
			
 
				-<TT>.nm 1 3</TT> was placed at the beginning;
			
 
				-<TT>.nm</TT> was placed at the end of the first paragraph;
			
 
				-and <TT>.nm +0</TT> was placed in front of this paragraph;
			
 
				-and <TT>.nm</TT> finally placed at the end.
			
 
				-Line lengths were also changed (by <TT>\w'0000'u</TT>) to keep the right side aligned.
			
 
				-Another example is
			
 
				-which turns on numbering with the line number of the next
			
 
				-line to be 5 greater than the last numbered line,
			
 
				-with <I>M=</I>5, with spacing <I>S</I> untouched, and with the indent <I>I</I> set to 3.
			
 
				-<br>
			
 
				-</P>
			
 
				-<H4>16 Conditional Acceptance of Input
			
 
				-</H4>
			
 
				-<P>
			
 
				-In the following,
			
 
				-<I>c</I> is a one-character built-in <I>condition</I> name,
			
 
				-<TT>!</TT> signifies <I>not</I>,
			
 
				-<I>N</I> is a numerical expression,
			
 
				-<I>string1</I> and <I>string2</I> are strings delimited by any non-blank, non-numeric character not in the strings,
			
 
				-and
			
 
				-<I>anything</I> represents what is conditionally accepted.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if</TT><I> c anything</I>	-	-	
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If condition
			
 
				-<I>c</I> true, accept <I>anything</I> as input;
			
 
				-in multi-line case use \{<I>anything</I>\}.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if !</TT><I>c anything</I>	-	-	
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If condition <I>c</I> false, accept <I>anything</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if</TT><I> N anything</I>		-	<B>u</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If expression <I>N</I> > 0, accept <I>anything</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if !</TT><I>N anything</I>		-	<B>u</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If expression <I>N</I> <= 0 [sic], accept <I>anything</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if '</TT><I>string1</I><TT>'</TT><I>string2</I><TT>'</TT><I> anything</I>	-		
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If <I>string1</I> identical to <I>string2</I>,
			
 
				-accept <I>anything</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&if !'</TT><I>string1</I><TT>'</TT><I>string2</I><TT>'</TT><I> anything</I>	-		
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If <I>string1</I> not identical to <I>string2</I>,
			
 
				-accept <I>anything</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ie</TT><I> c anything</I>		-	<B>u</B>
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-If portion of if-else;
			
 
				-all of the forms for <TT>if</TT> above are valid.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&el</TT><I> anything</I>	-	-	
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Else portion of if-else.
			
 
				-</dl>
			
 
				-<P>
			
 
				-The built-in condition names are:
			
 
				-<br><img src="-.162513.gif"><br>
			
 
				-If the condition <I>c</I> is true, or if the number <I>N</I> is greater than zero,
			
 
				-or if the strings compare identically (including motions and character size and font),
			
 
				-<I>anything</I> is accepted as input.
			
 
				-If a <TT>!</TT> precedes the condition, number, or string comparison,
			
 
				-the sense of the acceptance is reversed.
			
 
				-</P>
			
 
				-<P>
			
 
				-Any spaces between the condition and the beginning of <I>anything</I> are skipped over.
			
 
				-The <I>anything</I> can be either a single input line (text, macro, or whatever)
			
 
				-or a number of input lines.
			
 
				-In the multi-line case,
			
 
				-the first line must begin with a left delimiter <TT>\{</TT> and
			
 
				-the last line must end with a right delimiter <TT>\}</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The request <TT>ie</TT> (if-else) is identical to <TT>if</TT>
			
 
				-except that the acceptance state is remembered.
			
 
				-A subsequent and matching <TT>el</TT> (else) request then uses the reverse sense of that state.
			
 
				-<TT>ie</TT>-<TT>el</TT> pairs may be nested.
			
 
				-</P>
			
 
				-<P>
			
 
				-Some examples are:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;if e .tl 'Even Page %'''
			
 
				-</PRE></TT></DL>
			
 
				-which outputs a title if the page number is even; and
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;ie \n%&#62;1 \{\
			
 
				-'	sp 0.5i
			
 
				-&amp;	tl 'Page %'''
			
 
				-'	sp |1.2i \}
			
 
				-&amp;el .sp |2.5i
			
 
				-</PRE></TT></DL>
			
 
				-which treats page 1 differently from other pages.
			
 
				-</P>
			
 
				-<H4>17 Environment Switching.
			
 
				-</H4>
			
 
				-<P>
			
 
				-A number of the parameters that
			
 
				-control the text processing are gathered together into an
			
 
				-<I>environment</I>, which can be switched by the user.
			
 
				-The environment parameters are those associated
			
 
				-with requests noting E in their <I>Notes</I> column;
			
 
				-in addition, partially collected lines and words are in the environment.
			
 
				-Everything else is global; examples are page-oriented parameters,
			
 
				-diversion-oriented parameters, number registers, and macro and string definitions.
			
 
				-All environments are initialized with default parameter values.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ev</TT><I> N</I>	<I>N=</I>0	previous	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Environment switched to
			
 
				-environment 0<=<I>N</I><=2.
			
 
				-Switching is done in push-down fashion so that
			
 
				-restoring a previous environment <I>must</I> be done with <TT>.ev</TT>
			
 
				-rather than specific reference.
			
 
				-Note that what is pushed down and restored is the environment
			
 
				-<I>number,</I>
			
 
				-not its contents.
			
 
				-</dl>
			
 
				-<H4>18 Insertions from the Standard Input
			
 
				-</H4>
			
 
				-<P>
			
 
				-The input can be temporarily switched to the system standard input
			
 
				-with <TT>rd</TT>,
			
 
				-which will switch back when two consecutive newlines
			
 
				-are found (the extra blank line is not used).
			
 
				-This mechanism is intended for insertions in form-letter-like documentation.
			
 
				-The standard input can be the user's keyboard,
			
 
				-a pipe, or a file.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&rd</TT><I> prompt</I>	-	<I>prompt=</I>BEL" 	-"
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Read insertion
			
 
				-from the standard input until two newlines in a row are found.
			
 
				-If the standard input is the user's keyboard, <I>prompt</I> (or a BEL)
			
 
				-is written onto the standard output.
			
 
				-<TT>rd</TT> behaves like a macro,
			
 
				-and arguments may be placed after <I>prompt</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ex</TT>	-	-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Exit from <I>nroff</I>/<I>troff</I>.
			
 
				-Text processing is terminated exactly as if all input had ended.
			
 
				-</dl>
			
 
				-<P>
			
 
				-If insertions are to be
			
 
				-taken from the terminal keyboard while output is being printed
			
 
				-on the terminal, the command line option <TT>-q</TT> will turn off the echoing
			
 
				-of keyboard input and prompt only with BEL.
			
 
				-The regular input and insertion input cannot
			
 
				-simultaneously come from the standard input.
			
 
				-</P>
			
 
				-<P>
			
 
				-As an example,
			
 
				-multiple copies of a form letter may be prepared by entering the insertions
			
 
				-for all the copies in one file to be used as the standard input,
			
 
				-and causing the file containing the letter to reinvoke itself with <TT>nx</TT> (&#167;19);
			
 
				-the process would ultimately be ended by an <TT>ex</TT> in the insertion file.
			
 
				-</P>
			
 
				-<H4>19 Input/Output File Switching
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&so</TT><I> filename</I>		-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Switch source file.
			
 
				-The top input (file reading) level is switched to <I>filename</I>.
			
 
				-When the new file ends,
			
 
				-input is again taken from the original file.
			
 
				-<TT>so</TT>'s may be nested.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&nx</TT><I> filename</I>		end-of-file	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Next file is <I>filename</I>.
			
 
				-The current file is considered ended, and the input is immediately switched
			
 
				-to <I>filename</I>.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&sy</TT><I> string</I>		-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Execute program from <I>string</I>,
			
 
				-which is the rest of the input line.
			
 
				-The output is not collected automatically.
			
 
				-The number register
			
 
				-which contains the process id of the <I>troff</I> process,
			
 
				-may be useful in generating unique filenames for output.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&pi</TT><I> string</I>		-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Pipe output to <I>string</I>,
			
 
				-which is the rest of the input line.
			
 
				-This request must occur before any printing occurs;
			
 
				-typically it is the first line of input.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&cf</TT><I> filename</I>		-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Copy
			
 
				-contents of file
			
 
				-<I>filename</I>
			
 
				-to output, completely unprocessed.
			
 
				-The file is assumed to contain something meaningful
			
 
				-to subsequent processes.
			
 
				-</dl>
			
 
				-<H4>20 Miscellaneous
			
 
				-<br>
			
 
				-that a <I>margin</I> character <I>c</I> appear a distance
			
 
				-<I>N</I> to the right of the right margin
			
 
				-after each non-empty text line (except those produced by <TT>tl</TT>).
			
 
				-If the output line is too long (as can happen in nofill mode)
			
 
				-the character will be appended to the line.
			
 
				-If <I>N</I> is not given, the previous <I>N</I> is used; the initial <I>N</I> is
			
 
				-0.2 inches in <I>nroff</I> and 1 em in <I>troff</I>.
			
 
				-The margin character used with this paragraph was a 12-point box-rule.
			
 
				-<br>
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.tm</TT><I> string</I>	-	newline	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-After skipping initial blanks,
			
 
				-<I>string</I> (rest of the line) is read in copy mode
			
 
				-and written on the standard error.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>&ab</TT><I> string</I>	-	newline	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-After skipping initial blanks,
			
 
				-<I>string</I> (rest of the line) is read in copy mode
			
 
				-and written on the standard error.
			
 
				-<I>Troff</I> or <I>nroff</I> then exit.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.ig</TT><I> yy</I>	-	<I>.yy=</I><TT>..</TT>	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Ignore
			
 
				-input lines.
			
 
				-<TT>ig</TT> behaves exactly like <TT>de</TT> (&#167;7) except that the
			
 
				-input is discarded.
			
 
				-The input is read in copy mode, and any auto-incremented
			
 
				-registers will be affected.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.lf</TT><I> N filename</I>		-	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Set
			
 
				-line number to <I>N</I> and filename to <I>filename</I>
			
 
				-for purposes of subsequent error messages, etc.
			
 
				-The number register [sic]
			
 
				-contains the name of the current input file,
			
 
				-as set by command line argument,
			
 
				-or
			
 
				-The number register
			
 
				-contains the number of input lines read from the current file,
			
 
				-again perhaps as modified by
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.pm</TT><I> t</I>	-	all	-
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Print macros.
			
 
				-The names and sizes of all of the defined macros and strings are printed
			
 
				-on the standard error;
			
 
				-if <I>t</I> is given, only the total of the sizes is printed.
			
 
				-The sizes is given in blocks
			
 
				-of 128 characters.
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-<TT>.fl</TT>	-	-	B
			
 
				-<DL COMPACT>
			
 
				-<DT><DD>
			
 
				-Flush output buffer.
			
 
				-Force output, including any pending position information.
			
 
				-</dl>
			
 
				-<H4>21 Output and Error Messages.
			
 
				-</H4>
			
 
				-<P>
			
 
				-The output from <TT>tm</TT>, <TT>pm</TT>, and the prompt from <TT>rd</TT>,
			
 
				-as well as various error messages, are written onto
			
 
				-the standard error.
			
 
				-The latter is different from the standard output,
			
 
				-where formatted text goes.
			
 
				-By default, both are written onto the user's terminal,
			
 
				-but they can be independently redirected.
			
 
				-</P>
			
 
				-<P>
			
 
				-Various error conditions may occur during
			
 
				-the operation of <I>nroff</I> and <I>troff</I>.
			
 
				-Certain less serious errors having only local impact do not
			
 
				-cause processing to terminate.
			
 
				-Two examples are <I>word overflow</I>, caused by a word that is too large
			
 
				-to fit into the word buffer (in fill mode), and
			
 
				-<I>line overflow</I>, caused by an output line that grew too large
			
 
				-to fit in the line buffer.
			
 
				-In both cases, a message is printed, the offending excess
			
 
				-is discarded,
			
 
				-and the affected word or line is marked at the point of truncation
			
 
				-with a * in <I>nroff</I> and a <= in <I>troff</I>.
			
 
				-Processing continues if possible,
			
 
				-on the grounds that output useful for debugging may be produced.
			
 
				-If a serious error occurs, processing terminates,
			
 
				-and a message is printed, along with a list of the macro names currently active.
			
 
				-Examples of serious errors include the inability to create, read, or write files,
			
 
				-and the exceeding of certain internal limits that
			
 
				-make future output unlikely to be useful.
			
 
				-</P>
			
 
				-<H4>22 Output Language
			
 
				-</H4>
			
 
				-<P>
			
 
				-<I>Troff</I>
			
 
				-produces its output in a language that is independent of any
			
 
				-specific output device,
			
 
				-except that the numbers in it have been computed on the basis
			
 
				-of the resolution of the device,
			
 
				-and the sizes, fonts, and characters that that device can print.
			
 
				-Nevertheless it is quite possible to interpret that output
			
 
				-on a different device, within the latter's capabilities.
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I></I><TT>s</TT><I>n</I>	set point size to <I>n</I>
			
 
				-<I></I><TT>f</TT><I>n</I>	set font to <I>n</I>
			
 
				-<I></I><TT>c</TT><I>c</I>	print character <I>c</I>
			
 
				-<I></I><TT>C</TT><I>name</I>	print the character called <I>name</I>; terminate <I>name</I> by white space
			
 
				-<I></I><TT>N</TT><I>n</I>	print character <I>n</I> on current font
			
 
				-<I></I><TT>H</TT><I>n</I>	go to absolute horizontal position <I>n</I> (<I>n</I>>=0)
			
 
				-<I></I><TT>V</TT><I>n</I>	go to absolute vertical position <I>n</I> (<I>n</I>>=0, down is positive)
			
 
				-<I></I><TT>h</TT><I>n</I>	go <I>n</I> units horizontally; <I>n</I><I><</I>0 is to the left
			
 
				-<I></I><TT>v</TT><I>n</I>	go <I>n</I> units vertically; <I>n</I><I><</I>0 is up
			
 
				-<I>nnc</I>	move right <I>nn</I>, then print UTF character <I>c</I>;  <I>nn</I> must be exactly 2 digits
			
 
				-<I></I><TT>p</TT><I>n</I>	new page <I>n</I> begins&#173;set vertical position to 0
			
 
				-<I></I><TT>n</TT><I>b</I> <I>a</I>	end of line (information only&#173;no action);  <I>b</I> = space before line, <I>a</I> = after
			
 
				-<I></I><TT>w</TT>	paddable word space (information only&#173;no action)
			
 
				-<I></I><TT>D</TT><I>c</I> ...0graphics function <I>c</I>; see below
			
 
				-<I></I><TT>x</TT> ...0device control functions; see below
			
 
				-<I></I><TT>#</TT> ...0comment
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-All position values are in units.
			
 
				-Sequences that end in digits must be followed by a non-digit.
			
 
				-Blanks, tabs and newlines may occur as separators
			
 
				-in the input, and are mandatory to separate constructions
			
 
				-that would otherwise be confused.
			
 
				-Graphics functions, device control functions, and comments extend to the
			
 
				-end of the line they occur on.
			
 
				-<P>
			
 
				-The device control and graphics commands are intended as open-ended
			
 
				-families, to be expanded as needed.
			
 
				-The graphics functions coincide directly with the
			
 
				-sequences:
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I></I><TT>Dl</TT> <I>dh dv</I>	draw line from current position by <I>dh</I>, <I>dv</I>
			
 
				-<I></I><TT>Dc</TT> <I>d</I>	draw circle of diameter <I>d</I> with left side here
			
 
				-<I></I><TT>De</TT> <I>dh</I>1 <I>dv</I>2	draw ellipse of diameters <I>dh</I>1 and <I>dv</I>2
			
 
				-<I></I><TT>Da</TT> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2	draw arc from current position to <I>dh</I>1<I>+dh</I>2, <I>dv</I>1<I>+dv</I>2,
			
 
				-		   center at <I>dh</I>1, <I>dv</I>1 from current position
			
 
				-<I></I><TT>D~</TT> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2 ...	draw B-spline from current position to <I>dh</I>1, <I>dv</I>1,
			
 
				-		   then to <I>dh</I>2, <I>dv</I>2, then to ...
			
 
				-<I></I><TT>D</TT><I>z</I> <I>dh</I>1 <I>dv</I>1 <I>dh</I>2 <I>dv</I>2 ...	for any other <I>z</I> is uninterpreted
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-In all of these, <I>dh</I>, <I>dv</I> is an increment on the current horizontal and
			
 
				-vertical position,
			
 
				-with down and right positive.
			
 
				-All distances and dimensions are in units.
			
 
				-<P>
			
 
				-The device control functions begin with
			
 
				-then a command, then other parameters.
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x T <I>s</I>	name of typesetter is <I>s</I><TT>
			
 
				-x r </TT><I>n h v</I><TT>	</TT>resolution is <I>n</I> units/inch;<TT>
			
 
				-		</TT><I>h</I> = minimum horizontal motion, <I>v</I> = minimum vertical<TT>
			
 
				-x i	</TT>initialize<TT>
			
 
				-x f </TT><I>n s</I><TT>	</TT>mount font <I>s</I> on font position <I>n</I><TT>
			
 
				-x p	</TT>pause&#173;can restart<TT>
			
 
				-x s	</TT>stop&#173;done forever<TT>
			
 
				-x t	</TT>generate trailer information, if any<TT>
			
 
				-x H </TT><I>n</I><TT>	</TT>set character height to <I>n</I><TT>
			
 
				-x S </TT><I>n</I><TT>	</TT>set slant to <I>n</I><TT>
			
 
				-x X </TT><I>any</I><TT>	</TT>generated by the <TT>\X</TT> function<TT>
			
 
				-x </TT><I>any</I><TT>	</TT>to be ignored if not recognized<TT>
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-</TT><br>&#32;<br>
			
 
				-Subcommands like
			
 
				-may be spelled out like
			
 
				-<P>
			
 
				-The commands
			
 
				-and
			
 
				-must occur first;
			
 
				-fonts must be mounted before they can be used;
			
 
				-comes last.
			
 
				-There are no other order requirements.
			
 
				-</P>
			
 
				-<P>
			
 
				-The following is the output from
			
 
				-for a typical printer,
			
 
				-as described in &#167;23:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x T utf
			
 
				-x res 720 1 1
			
 
				-x init
			
 
				-V0
			
 
				-p1
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-x font 1 R
			
 
				-x font 2 I
			
 
				-x font 3 B
			
 
				-x font 4 BI
			
 
				-x font 5 CW
			
 
				-x font 6 H
			
 
				-x font 7 HB
			
 
				-x font 8 HX
			
 
				-x font 9 S1
			
 
				-x font 10 S
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-s10
			
 
				-f1
			
 
				-H0
			
 
				-s10
			
 
				-f1
			
 
				-V0
			
 
				-H720
			
 
				-V120
			
 
				-ch
			
 
				-50e44l28l28o50,w58w72o50r33l28dn120 0
			
 
				-x trailer
			
 
				-V7920
			
 
				-x stop
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-<I>Troff</I> output is normally not redundant;
			
 
				-size and font changes and position information are not included
			
 
				-unless needed.
			
 
				-Nevertheless, each page is self-contained, for the benefit of postprocessors
			
 
				-that re-order pages or process only a subset.
			
 
				-</P>
			
 
				-<H4>23 Device and Font Description Files
			
 
				-</H4>
			
 
				-<P>
			
 
				-The parameters that describe a output device
			
 
				-<I>name</I>
			
 
				-are read
			
 
				-from the directory
			
 
				-each time
			
 
				-<I>troff</I>
			
 
				-is invoked.
			
 
				-The device name is provided by default,
			
 
				-by the environment variable
			
 
				-or by a command-line argument
			
 
				-The default device name is
			
 
				-for UTFencoded Unicode characters.
			
 
				-The pre-defined string
			
 
				-contains the name of the device.
			
 
				-The
			
 
				-command-line option may be used to change the default directory.
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Device description file.</I></B> 
			
 
				-General parameters of the device are stored, one per line, in
			
 
				-the file 
			
 
				-as a sequence of names and values.
			
 
				-<I>Troff</I> recognizes these parameters, and ignores any
			
 
				-others that may be present for specific drivers:
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I></I><TT>fonts</TT> <I>n</I> <I>F</I>1 <I>F</I>2 <I>.</I><I>.</I><I>.</I> <I>F</I><I>n</I>
			
 
				-<I></I><TT>sizes</TT> <I>s</I>1 <I>s</I>2 <I>.</I><I>.</I><I>.</I><I></I><TT>0</TT>
			
 
				-<I></I><TT>res</TT> <I>n</I>
			
 
				-<I></I><TT>hor</TT> <I>n</I>
			
 
				-<I></I><TT>vert</TT> <I>n</I>
			
 
				-<I></I><TT>unitwidth</TT> <I>n</I>
			
 
				-<I></I><TT>charset</TT>
			
 
				-<I>list of multi-character character names (optional)</I>
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-The <I>F</I><I>i</I> are font names
			
 
				-to be initially mounted.
			
 
				-The list of sizes is a set of integers representing
			
 
				-some or all of the legal sizes the device can produce,
			
 
				-terminated by a zero.
			
 
				-The 
			
 
				-parameter gives the resolution of the machine in units per inch;
			
 
				-and
			
 
				-give the minimum number of units that can be moved
			
 
				-horizontally and vertically.
			
 
				-<P>
			
 
				-Character widths for each font are assumed to be given in machine units
			
 
				-at point size
			
 
				-(In other words, a character with a width of
			
 
				-<I>n</I> is <I>n</I> units wide at size
			
 
				-All widths are integers at all sizes.
			
 
				-</P>
			
 
				-<P>
			
 
				-A list of valid character names may be introduced by
			
 
				-the list of names is optional.
			
 
				-</P>
			
 
				-<P>
			
 
				-A line whose first non-blank character is
			
 
				-is a comment.
			
 
				-Except that
			
 
				-must occur last, parameters may appear in any order.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here is a subset of the
			
 
				-file for a typical Postscript printer:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-# Description file for Postscript printers.
			
 
				-
			
 
				-fonts 10 R I B BI CW H HB HX S1 S
			
 
				-sizes 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
			
 
				-   24 25 26 27 28 29 30 31 32 33 34 35 36 38 40 44 48 54 60 72 0
			
 
				-res 720
			
 
				-hor 1
			
 
				-vert 1
			
 
				-unitwidth 10
			
 
				-charset
			
 
				-hy ct fi fl ff Fi Fl dg em 14 34 12 en aa
			
 
				-ga ru sc dd -&#62; br Sl ps cs cy as os =. ld
			
 
				-rd le ge pp -+ ob vr
			
 
				-sq bx ci fa te ** pl mi eq ~= *A *B *X *D
			
 
				-*E *F *G *Y *I *K *L *M *N *O *P *R *H *S *T *U *W
			
 
				-*C *Q *Z ul rn *a *b *x *d *e *f *g *y *i *k
			
 
				-*l *m *n *o *p *h *r *s *t *u *w *c *q *z
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B><I>0.0s.  Font description files.</I></B> 
			
 
				-Each font is described by an analogous description file,
			
 
				-which begins with parameters of the font, one per line, followed by a
			
 
				-list of characters and widths.
			
 
				-The file for font
			
 
				-<I>f</I>
			
 
				-is
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-<I></I><TT>name</TT> <I>str</I>	name of font is <I>str</I>
			
 
				-<I></I><TT>ligatures</TT> <I>. . .</I> <I></I><TT>0</TT>	list of ligatures
			
 
				-<I></I><TT>spacewidth</TT> <I>n</I>	width of a space on this font
			
 
				-<I></I><TT>special</TT>	this is a special font
			
 
				-<I></I><TT>charset</TT>
			
 
				-<I>list of character name, width, ascender/descender, code</I>, tab separated
			
 
				-</PRE></TT></DL>
			
 
				-</dl>
			
 
				-<br>&#32;<br>
			
 
				-The
			
 
				-and
			
 
				-fields are mandatory;
			
 
				-must be last.
			
 
				-Comments are permitted,
			
 
				-as are other unrecognized parameters.
			
 
				-<P>
			
 
				-Each line following
			
 
				-describes one character: its name, its width in units as described above,
			
 
				-ascender/descender information, and a decimal, octal or hexadecimal value
			
 
				-by which the output device knows it
			
 
				-(the
			
 
				-``number'' of the character).
			
 
				-The character name is arbitrary, except that
			
 
				-signifies an unnamed character.
			
 
				-If the width field contains
			
 
				-the name is a synonym for the previous character.
			
 
				-The ascender/descender field is 1 if
			
 
				-the character has a descender (hangs below the baseline, like
			
 
				-is 2 if it has an ascender (is tall, like
			
 
				-is 3 if both,
			
 
				-and is 0 if neither.
			
 
				-The value is returned
			
 
				-in the 
			
 
				-register, as computed by the
			
 
				-function (&#167;11.2).
			
 
				-</P>
			
 
				-<P>
			
 
				-Here are excerpts from a typical font description file
			
 
				-for the same Postscript printer.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-hy	33	0	45	hyphen \(hy
			
 
				--	"			- is a synonym for \(hy
			
 
				-<br>&#32;<br>
			
 
				-Q	72	3	81
			
 
				-<br>&#32;<br>
			
 
				-a	44	0	97
			
 
				-b	50	2	98
			
 
				-c	44	0	99
			
 
				-d	50	2	100
			
 
				-y	50	1	121
			
 
				-<br>&#32;<br>
			
 
				-em	100	0	208
			
 
				----	44	2	220	Pound symbol &#163;, \N'220'
			
 
				----	36	0	221	centered dot \N'221'
			
 
				-</PRE></TT></DL>
			
 
				-This says, for example, that the width of the letter
			
 
				-is 44 units at point size 10,
			
 
				-the value of 
			
 
				-Point sizes are scaled linearly and rounded, so the width of
			
 
				-will be 44 at size 10, 40 at size 9, 35 at size 8,
			
 
				-and so on.
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B>Tutorial Examples
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-<br>&#32;<br>
			
 
				-</B><H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-It is almost always necessary to
			
 
				-prepare at least a small set of macro definitions
			
 
				-to describe a document.
			
 
				-Such common formatting needs
			
 
				-as page margins and footnotes
			
 
				-are deliberately not built into <I>nroff</I> and <I>troff</I>.
			
 
				-Instead,
			
 
				-the macro and string definition, number register, diversion,
			
 
				-environment switching, page-position trap, and conditional input mechanisms
			
 
				-provide the basis for user-defined implementations.
			
 
				-</P>
			
 
				-<P>
			
 
				-For most uses, a standard package like
			
 
				-or
			
 
				-is the right choice.
			
 
				-The next stage is to augment that,
			
 
				-or to selectively replace macros from the standard package.
			
 
				-The last stage, much harder,
			
 
				-is to write one's own from scratch.
			
 
				-This is not a task for the novice.
			
 
				-</P>
			
 
				-<P>
			
 
				-The examples discussed here are intended to be useful and somewhat realistic,
			
 
				-but will not necessarily cover all relevant contingencies.
			
 
				-Explicit numerical parameters are used
			
 
				-in the examples
			
 
				-to make them easier to read and to
			
 
				-illustrate typical values.
			
 
				-In many cases, number registers would be used
			
 
				-to reduce the number of places where numerical
			
 
				-information is kept,
			
 
				-and to concentrate conditional parameter initialization
			
 
				-like that which depends on whether <I>troff</I> or <I>nroff</I> is being used.
			
 
				-</P>
			
 
				-<H4>Page Margins
			
 
				-</H4>
			
 
				-<P>
			
 
				-As discussed in &#167;3,
			
 
				-header and footer macros are usually defined
			
 
				-to describe the top and bottom page margin areas respectively.
			
 
				-A trap is planted at page position 0 for the header, and at
			
 
				-<I>-N</I> (<I>N</I> from the page bottom) for the footer.
			
 
				-The simplest such definitions might be
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de hd	\"define header
			
 
				-'sp 1i
			
 
				-&amp;&amp;	\"end definition
			
 
				-&amp;de fo	\"define footer
			
 
				-'bp
			
 
				-&amp;&amp;	\"end definition
			
 
				-&amp;wh 0 hd
			
 
				-&amp;wh -1i fo
			
 
				-</PRE></TT></DL>
			
 
				-which provide blank 1 inch top and bottom margins.
			
 
				-The header will occur on the <I>first</I> page
			
 
				-only if the definition and trap exist prior to
			
 
				-the initial pseudo-page transition (&#167;3).
			
 
				-In fill mode, the output line that springs the footer trap
			
 
				-was typically forced out because some part or whole word didn't fit on it.
			
 
				-If anything in the footer and header that follows causes a break,
			
 
				-that word or part word will be forced out.
			
 
				-In this and other examples,
			
 
				-requests like <TT>bp</TT> and <TT>sp</TT> that normally cause breaks are invoked using
			
 
				-the no-break control character <TT>'</TT>
			
 
				-to avoid this.
			
 
				-When the header/footer design contains material
			
 
				-requiring independent text processing, the
			
 
				-environment may be switched, avoiding
			
 
				-most interaction with the running text.
			
 
				-</P>
			
 
				-<P>
			
 
				-A more realistic example would be
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de hd	\"header
			
 
				-&amp;if \\n%&#62;1 \{\
			
 
				-'sp ~0.5i-1	\"tl base at 0.5i
			
 
				-&amp;tl ''- % -''	\"centered page number
			
 
				-&amp;ps	\"restore size
			
 
				-&amp;ft	\"restore font
			
 
				-&amp;vs  \}	\"restore vs
			
 
				-'sp ~1.0i  	\"space to 1.0i
			
 
				-&amp;ns	\"turn on no-space mode
			
 
				-&amp;&amp;
			
 
				-&amp;de fo	\"footer
			
 
				-&amp;ps 10	\"set footer/header size
			
 
				-&amp;ft R	\"set font
			
 
				-&amp;vs 12p	\"set baseline spacing
			
 
				-&amp;if \\n%=1 \{\
			
 
				-'sp ~\\n(.pu-0.5i-1  \"tl base 0.5i up
			
 
				-&amp;tl ''- % -'' \}  \"first page number
			
 
				-'bp
			
 
				-&amp;&amp;
			
 
				-&amp;wh 0 hd
			
 
				-&amp;wh -1i fo
			
 
				-</PRE></TT></DL>
			
 
				-which sets the size, font, and baseline spacing for the
			
 
				-header/footer material, and ultimately restores them.
			
 
				-The material in this case is a page number at the bottom of the
			
 
				-first page and at the top of the remaining pages.
			
 
				-The <TT>sp</TT>'s refer to absolute positions to avoid
			
 
				-dependence on the baseline spacing.
			
 
				-Another reason for doing this in the footer
			
 
				-is that the footer is invoked by printing a line whose
			
 
				-vertical spacing swept past the trap position by possibly
			
 
				-as much as the baseline spacing.
			
 
				-No-space mode is turned on at the end of <TT>hd</TT>
			
 
				-to render ineffective
			
 
				-accidental occurrences of <TT>sp</TT> at the top of the running text.
			
 
				-</P>
			
 
				-<P>
			
 
				-This method of restoring size, font, etc., presupposes
			
 
				-that such requests (that set <I>previous</I> value) are <I>not</I>
			
 
				-used in the running text.
			
 
				-A better scheme is to save and restore both the current <I>and</I>
			
 
				-previous values as shown for size in the following:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fo
			
 
				-&amp;nr s1 \\n(.s	\"current size
			
 
				-&amp;ps
			
 
				-&amp;nr s2 \\n(.s	\"previous size
			
 
				-&amp;  ---	\"rest of footer
			
 
				-&amp;&amp;
			
 
				-&amp;de hd
			
 
				-&amp;  ---	\"header stuff
			
 
				-&amp;ps \\n(s2  \"restore previous size
			
 
				-&amp;ps \\n(s1  \"restore current size
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-Page numbers may be printed in the bottom margin
			
 
				-by a separate macro triggered during the footer's
			
 
				-page ejection:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de bn	\"bottom number
			
 
				-&amp;tl ''- % -''	\"centered page number
			
 
				-&amp;&amp;
			
 
				-&amp;wh -0.5i-1v bn	 \"tl base 0.5i up
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<H4>Paragraphs and Headings
			
 
				-</H4>
			
 
				-<P>
			
 
				-The housekeeping
			
 
				-associated with starting a new paragraph should be collected
			
 
				-in a paragraph macro
			
 
				-that, for example,
			
 
				-does the desired preparagraph spacing,
			
 
				-forces the correct font, size, baseline spacing, and indent,
			
 
				-checks that enough space remains for <I>more than one</I> line,
			
 
				-and
			
 
				-requests a temporary indent.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de pg    \"paragraph
			
 
				-&amp;br       \"break
			
 
				-&amp;ft R     \"force font,
			
 
				-&amp;ps 10    \"size,
			
 
				-&amp;vs 12p   \"spacing,
			
 
				-&amp;in 0     \"and indent
			
 
				-&amp;sp 0.4   \"prespace
			
 
				-&amp;ne 1+\\n(.Vu  \"want more than 1 line
			
 
				-&amp;ti 0.2i         \"temp indent
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-The first break in <TT>pg</TT>
			
 
				-will force out any previous partial lines,
			
 
				-and must occur before the <TT>vs</TT>.
			
 
				-The forcing of font, etc., is
			
 
				-partly a defense against prior error and
			
 
				-partly to permit
			
 
				-things like section heading macros to
			
 
				-set parameters only once.
			
 
				-The prespacing parameter is suitable for <I>troff</I>;
			
 
				-a larger space, at least as big as the output device vertical resolution, would be
			
 
				-more suitable in <I>nroff</I>.
			
 
				-The choice of remaining space to test for in the <TT>ne</TT>
			
 
				-is the smallest amount greater than one line
			
 
				-(the <TT>.V</TT> is the available vertical resolution).
			
 
				-</P>
			
 
				-<P>
			
 
				-A macro to automatically number section headings
			
 
				-might look like:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de sc	\"section
			
 
				-&amp;  ---	\"force font, etc.
			
 
				-&amp;sp 0.4	\"prespace
			
 
				-&amp;ne 2.4+\\n(.Vu \"want 2.4+ lines
			
 
				-&amp;fi
			
 
				-\\n+S.
			
 
				-&amp;&amp;
			
 
				-&amp;nr S 0 1	\"init S
			
 
				-</PRE></TT></DL>
			
 
				-The usage is <TT>.sc</TT>,
			
 
				-followed by the section heading text,
			
 
				-followed by <TT>.pg</TT>.
			
 
				-The <TT>ne</TT> test value includes one line of heading,
			
 
				-0.4 line in the following <TT>pg</TT>, and
			
 
				-one line of the paragraph text.
			
 
				-A word consisting of the next section number and a period is
			
 
				-produced to begin the heading line.
			
 
				-The format of the number may be set by <TT>af</TT> (&#167;8).
			
 
				-</P>
			
 
				-<P>
			
 
				-Another common form is the labeled, indented paragraph,
			
 
				-where the label protrudes left into the indent space.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de lp	\"labeled paragraph
			
 
				-&amp;pg
			
 
				-&amp;in 0.5i	\"paragraph indent
			
 
				-&amp;ta 0.2i 0.5i	\"label, paragraph
			
 
				-&amp;ti 0
			
 
				-\t\\$1\t\c	\"flow into paragraph
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-The intended usage is ``<TT>.lp</TT> <I>label</I>'';
			
 
				-<I>label</I> will begin at 0.2 inch, and
			
 
				-cannot exceed a length of 0.3 inch without intruding into
			
 
				-the paragraph.
			
 
				-The label could be right adjusted against 0.4 inch by
			
 
				-setting the tabs instead with <TT>.ta|0.4iR|0.5i</TT>.
			
 
				-The last line of <TT>lp</TT> ends with <TT>\c</TT> so that
			
 
				-it will become a part of the first line of the text
			
 
				-that follows.
			
 
				-</P>
			
 
				-<H4>Multiple Column Output
			
 
				-</H4>
			
 
				-<P>
			
 
				-The production of multiple column pages requires
			
 
				-the footer macro to decide whether it was
			
 
				-invoked by other than the last column,
			
 
				-so that it will begin a new column rather than
			
 
				-produce the bottom margin.
			
 
				-The header can initialize a column register that
			
 
				-the footer will increment and test.
			
 
				-The following is arranged for two columns, but
			
 
				-is easily modified for more.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de hd	\"header
			
 
				-&amp;  ---
			
 
				-&amp;nr cl 0 1	\"init column count
			
 
				-&amp;mk	\"mark top of text
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fo	\"footer
			
 
				-&amp;ie \\n+(cl&#60;2 \{\
			
 
				-&amp;po +3.4i	\"next column; 3.1+0.3
			
 
				-&amp;rt	\"back to mark
			
 
				-&amp;ns \}	\"no-space mode
			
 
				-&amp;el \{\
			
 
				-&amp;po \\nMu	\"restore left margin
			
 
				-&amp;  ---
			
 
				-'bp \}
			
 
				-&amp;&amp;
			
 
				-&amp;ll 3.1i	\"column width
			
 
				-&amp;nr M \\n(.o	\"save left margin
			
 
				-</PRE></TT></DL>
			
 
				-Typically a portion of the top of the first page
			
 
				-contains full width text;
			
 
				-the request for the narrower line length,
			
 
				-as well as another <TT>.mk</TT> would
			
 
				-be made where the two column output was to begin.
			
 
				-</P>
			
 
				-<H4>Footnotes
			
 
				-</H4>
			
 
				-<P>
			
 
				-The footnote mechanism to be described is used by
			
 
				-embedding the footnotes in the input text at the
			
 
				-point of reference,
			
 
				-demarcated by an initial <TT>.fn</TT> and a terminal <TT>.ef</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;fn
			
 
				-<I>Footnote text and control lines...</I>
			
 
				-&amp;ef
			
 
				-</PRE></TT></DL>
			
 
				-In the following,
			
 
				-footnotes are processed in a separate environment and diverted
			
 
				-for later printing in the space immediately prior to the bottom
			
 
				-margin.
			
 
				-There is provision for the case where the last collected
			
 
				-footnote doesn't completely fit in the available space.
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de hd	\"header
			
 
				-&amp;  ---
			
 
				-&amp;nr x 0 1	\"init footnote count
			
 
				-&amp;nr y 0-\\nb	\"current footer place
			
 
				-&amp;ch fo -\\nbu	\"reset footer trap
			
 
				-&amp;if \\n(dn .fz	\"leftover footnote
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fo	\"footer
			
 
				-&amp;nr dn 0  \"zero last diversion size
			
 
				-&amp;if \\nx \{\
			
 
				-&amp;ev 1	\"expand footnotes in ev1
			
 
				-&amp;nf	\"retain vertical size
			
 
				-&amp;FN	\"footnotes
			
 
				-&amp;rm FN	\"delete it
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;if "\\n(.z"fy" .di  \"end overflow di
			
 
				-&amp;nr x 0	\"disable fx
			
 
				-&amp;ev  \}	\"pop environment
			
 
				-&amp;  ---
			
 
				-'bp
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fx	\"process footnote overflow
			
 
				-&amp;if \\nx .di fy	\"divert overflow
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fn	\"start footnote
			
 
				-&amp;da FN	\"divert (append) footnote
			
 
				-&amp;ev 1	\"in environment 1
			
 
				-&amp;if \\n+x=1 .fs   \"if 1st, separator
			
 
				-&amp;fi	\"fill mode
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de ef	\"end footnote
			
 
				-&amp;br	\"finish output
			
 
				-&amp;nr z \\n(.v	\"save spacing
			
 
				-&amp;ev	\"pop ev
			
 
				-&amp;di	\"end diversion
			
 
				-&amp;nr y -\\n(dn	\"new footer position,
			
 
				-&amp;if \\nx=1 .nr y -(\\n(.v-\\nz) \
			
 
				-	\"uncertainty correction
			
 
				-&amp;ch fo \\nyu	\"y is negative
			
 
				-&amp;if (\\n(nl+1v)&#62;(\\n(.p+\\ny) \
			
 
				-&amp;ch fo \\n(nlu+1v	 \"didn't fit
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fs	\"separator
			
 
				-\l'1i'	\"1 inch rule
			
 
				-&amp;br
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de fz	\"get leftover footnote
			
 
				-&amp;fn
			
 
				-&amp;nf	\"retain vertical size
			
 
				-&amp;fy	\"where fx put it
			
 
				-&amp;ef
			
 
				-&amp;&amp;
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;nr b 1.0i  \"bottom margin size
			
 
				-&amp;wh 0 hd    \"header trap
			
 
				-&amp;wh 12i fo  \"footer trap-&#62;temp pos
			
 
				-&amp;wh -\\nbu fx	\"fx at footer position
			
 
				-&amp;ch fo -\\nbu	\"conceal fx with fo
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-The header <TT>hd</TT> initializes a footnote count register <TT>x</TT>,
			
 
				-and sets both the current footer trap position register <TT>y</TT> and
			
 
				-the footer trap itself to a nominal position specified in
			
 
				-register <TT>b</TT>.
			
 
				-In addition, if the register <TT>dn</TT> indicates a leftover footnote,
			
 
				-<TT>fz</TT> is invoked to reprocess it.
			
 
				-The footnote start macro <TT>fn</TT> begins a diversion (append) in environment 1,
			
 
				-and increments the count <TT>x</TT>; if the count is one, the footnote separator <TT>fs</TT>
			
 
				-is interpolated.
			
 
				-The separator is kept in a separate macro to permit user redefinition.
			
 
				-</P>
			
 
				-<P>
			
 
				-The footnote end macro <TT>ef</TT> restores
			
 
				-the previous environment and ends the diversion after saving the spacing size in register <TT>z</TT>.
			
 
				-<TT>y</TT> is then decremented by the size of the footnote, available in <TT>dn</TT>;
			
 
				-then on the first footnote, <TT>y</TT> is further decremented by the difference
			
 
				-in vertical baseline spacings of the two environments, to
			
 
				-prevent the late triggering of the footer trap from causing the last
			
 
				-line of the combined footnotes to overflow.
			
 
				-The footer trap is then set to the lower (on the page) of <TT>y</TT> or the current page position (<TT>nl</TT>)
			
 
				-plus one line, to allow for printing the reference line.
			
 
				-</P>
			
 
				-<P>
			
 
				-If indicated by <TT>x</TT>, the footer <TT>fo</TT> rereads the footnotes from <TT>FN</TT> in nofill mode
			
 
				-in environment 1,
			
 
				-and deletes <TT>FN</TT>.
			
 
				-If the footnotes were too large to fit, the macro <TT>fx</TT> will be trap-invoked to redivert
			
 
				-the overflow into <TT>fy</TT>,
			
 
				-and the register <TT>dn</TT> will later indicate to the header whether <TT>fy</TT> is empty.
			
 
				-</P>
			
 
				-<P>
			
 
				-Both <TT>fo</TT> and <TT>fx</TT> are planted in the nominal footer trap position in an order
			
 
				-that causes <TT>fx</TT> to be concealed unless the <TT>fo</TT> trap is moved.
			
 
				-The footer then terminates the overflow diversion, if necessary, and
			
 
				-zeros <TT>x</TT> to disable <TT>fx</TT>,
			
 
				-because the uncertainty correction
			
 
				-together with a not-too-late triggering of the footer can result
			
 
				-in the footnote rereading finishing before reaching the <TT>fx</TT> trap.
			
 
				-</P>
			
 
				-<P>
			
 
				-A good exercise for the student is to combine the multiple-column and footnote mechanisms.
			
 
				-</P>
			
 
				-<H4>The Last Page
			
 
				-</H4>
			
 
				-<P>
			
 
				-After the last input file has ended, <I>nroff</I> and <I>troff</I>
			
 
				-invoke the <I>end macro</I> (&#167;7), if any,
			
 
				-and when it finishes, eject the remainder of the page.
			
 
				-During the eject, any traps encountered are processed normally.
			
 
				-At the end of this last page, processing terminates
			
 
				-unless a partial line, word, or partial word remains.
			
 
				-If it is desired that another page be started, the end-macro
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-&amp;de en	\"end-macro
			
 
				-\c
			
 
				-'bp
			
 
				-&amp;&amp;
			
 
				-&amp;em en
			
 
				-</PRE></TT></DL>
			
 
				-will deposit a null partial word,
			
 
				-and produce another last page.
			
 
				-<br>&#32;<br>
			
 
				-<HR>
			
 
				-<br>&#32;<br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<B>Special Character Names
			
 
				-</B><P>
			
 
				-The following table lists names for a set of characters,
			
 
				-most of which have traditionally been provided by <I>troff</I> using
			
 
				-the `special' or `symbol' font.
			
 
				-Many of these sequences are old ways to get what are now Unicode
			
 
				-characters;
			
 
				-Lucida Sans, for example, has glyphs corresponding to many of these
			
 
				-but does not have the special sequences.
			
 
				-Therefore
			
 
				-the <I>troff</I> sequence
			
 
				-gives the character &#191; from the Times font instead of the
			
 
				-character &#191; from the current font, in this case Lucida Sans.
			
 
				-Not all sequences print on any particular device, including this one; Peter
			
 
				-faces appear in their place.
			
 
				-<br><img src="-.162514.gif"><br>
			
 
				-</P>
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2000 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/doc/utf.html
+++ b/sys/doc/utf.html
@@ -1,1323 +0,0 @@
 
				-<html>
			
 
				-<title>
			
 
				-data
			
 
				-</title>
			
 
				-<body BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000FF" VLINK="#330088" ALINK="#FF0044">
			
 
				-<H1>Hello World
			
 
				-<br>
			
 
				-or
			
 
				-<br>
			
 
				-&#922;&#945;&#955;&#951;&#956;&#941;&#961;&#945; &#954;&#972;&#963;&#956;&#949;
			
 
				-<br>
			
 
				-or
			
 
				-<br>
			
 
				-&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;
			
 
				-</H1>
			
 
				-<DL><DD><I>Rob Pike<br>
			
 
				-Ken Thompson<br>
			
 
				-<br>&#32;<br>
			
 
				-rob,ken@plan9.bell-labs.com<br>
			
 
				-</I></DL>
			
 
				-<DL><DD><H4>ABSTRACT</H4>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> Originally appeared, in a slightly different form, in
			
 
				-Proc. of the Winter 1993 USENIX Conf.,
			
 
				-pp. 43-50,
			
 
				-San Diego
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-Plan 9 from Bell Labs has recently been converted from ASCII
			
 
				-to an ASCII-compatible variant of the Unicode Standard, a 16-bit character set.
			
 
				-In this paper we explain the reasons for the change,
			
 
				-describe the character set and representation we chose,
			
 
				-and present the programming models and software changes
			
 
				-that support the new text format.
			
 
				-Although we stopped short of full internationalization&#173;for
			
 
				-example, system error messages are in Unixese, not Japanese&#173;we
			
 
				-believe Plan 9 is the first system to treat the representation
			
 
				-of all major languages on a uniform, equal footing throughout all its
			
 
				-software.
			
 
				-</DL>
			
 
				-<H4>Introduction
			
 
				-</H4>
			
 
				-<P>
			
 
				-The world is multilingual but most computer systems
			
 
				-are based on English and ASCII.
			
 
				-The first release of Plan 9 [Pike90], a new distributed operating
			
 
				-system from Bell Laboratories, seemed a good occasion
			
 
				-to correct this chauvinism.
			
 
				-It is easier to make such deep changes when building new systems than
			
 
				-by refitting old ones.
			
 
				-</P>
			
 
				-<P>
			
 
				-The ANSI C standard [ANSIC] contains some guidance on the matter of
			
 
				-`wide' and `multi-byte' characters but falls far short of
			
 
				-solving the myriad associated problems.
			
 
				-We could find no literature on how to convert a
			
 
				-<I>system</I>
			
 
				-to larger character sets, although some individual
			
 
				-<I>programs</I>
			
 
				-had been converted.
			
 
				-This paper reports what we discovered as we
			
 
				-explored the problem of representing multilingual
			
 
				-text at all levels of an operating system,
			
 
				-from the file system and kernel through
			
 
				-the applications and up to the window system
			
 
				-and display.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 has not been `internationalized':
			
 
				-its manuals are in English,
			
 
				-its error messages are in English,
			
 
				-and it can display text that goes from left to right only.
			
 
				-But before we can address these other problems,
			
 
				-we need to handle, uniformly and comfortably,
			
 
				-the textual representation of all the major written languages.
			
 
				-That subproblem is richer than we had anticipated.
			
 
				-</P>
			
 
				-<H4>Standards
			
 
				-</H4>
			
 
				-<P>
			
 
				-Our first step was to select a standard.
			
 
				-At the time (January 1992),
			
 
				-there were only two viable options:
			
 
				-ISO 10646 [ISO10646] and Unicode [Unicode].
			
 
				-The documents describing both proposals were still in the draft stage.
			
 
				-</P>
			
 
				-<P>
			
 
				-The draft of ISO 10646 was not
			
 
				-very attractive to us.
			
 
				-It defined a sparse set of 32-bit characters,
			
 
				-which would be
			
 
				-hard to implement
			
 
				-and have punitive storage requirements.
			
 
				-Also, the draft attempted to
			
 
				-mollify national interests by allocating
			
 
				-16-bit subspaces to national committees
			
 
				-to partition individually.
			
 
				-The suggested mode of use was to
			
 
				-``flip'' between separate national
			
 
				-standards to implement the international standard.
			
 
				-This did not strike us as a sound basis for a character set.
			
 
				-As well, transmitting 32-bit values in a byte stream,
			
 
				-such as in pipes, would be expensive and hard to implement.
			
 
				-Since the standard does not define a byte order for such
			
 
				-transmission, the byte stream would also have to carry
			
 
				-state to enable the values to be recovered.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Unicode Standard is a proposal by a consortium of mostly American
			
 
				-computer companies formed
			
 
				-to protest the technical
			
 
				-failings of ISO 10646.
			
 
				-It defines a uniform 16-bit code based on the
			
 
				-principle of unification:
			
 
				-two characters are the same if they look the
			
 
				-same even though they are from different
			
 
				-languages.
			
 
				-This principle, called Han unification,
			
 
				-allows the large Japanese, Chinese, and Korean
			
 
				-character sets to be packed comfortably into a 16-bit representation.
			
 
				-</P>
			
 
				-<P>
			
 
				-We chose the Unicode Standard for its technical merits and because its
			
 
				-code space was better defined.
			
 
				-Moreover,
			
 
				-the Unicode Consortium was derailing the
			
 
				-ISO 10646 standard.
			
 
				-(Now, in 1995,
			
 
				-ISO 10646 is a standard
			
 
				-with one 16-bit group defined,
			
 
				-which is almost exactly the Unicode Standard.
			
 
				-As most people expected, the two standards bodies
			
 
				-reached a d&eacute;tente and
			
 
				-ISO 10646 and Unicode represent the same character set.)
			
 
				-</P>
			
 
				-<P>
			
 
				-The Unicode Standard defines an adequate character set
			
 
				-but an unreasonable representation.
			
 
				-It states that all characters
			
 
				-are 16 bits wide and are communicated and stored in
			
 
				-16-bit units.
			
 
				-It also reserves a pair of characters
			
 
				-(hexadecimal FFFE and FEFF) to detect byte order
			
 
				-in transmitted text, requiring state in the byte stream.
			
 
				-(The Unicode Consortium was thinking of files, not pipes.)
			
 
				-To adopt this encoding,
			
 
				-we would have had to convert all text going
			
 
				-into and out of Plan 9 between ASCII and Unicode, which cannot be done.
			
 
				-Within a single program, in command of all its input and output,
			
 
				-it is possible to define characters as 16-bit quantities;
			
 
				-in the context of a networked system with
			
 
				-hundreds of applications on diverse machines
			
 
				-by different manufacturers,
			
 
				-it is impossible.
			
 
				-</P>
			
 
				-<P>
			
 
				-We needed a way to adapt the Unicode Standard to the tools-and-pipes
			
 
				-model of text processing embodied by the Unix system.
			
 
				-To do that, we
			
 
				-needed an ASCII-compatible textual
			
 
				-representation of Unicode characters for transmission
			
 
				-and storage.
			
 
				-In the draft ISO standard there was an informative
			
 
				-(non-required)
			
 
				-Annex
			
 
				-called UTF
			
 
				-that provided a byte stream encoding
			
 
				-of the 32-bit ISO code.
			
 
				-The encoding uses multibyte sequences composed
			
 
				-from the 190 printable characters of Latin-1
			
 
				-to represent character values larger
			
 
				-than 159.
			
 
				-</P>
			
 
				-<P>
			
 
				-The UTF encoding has several good properties.
			
 
				-By far the most important is that
			
 
				-a byte in the ASCII range 0-127 represents
			
 
				-itself in UTF.
			
 
				-Thus UTF is backward compatible with ASCII.
			
 
				-</P>
			
 
				-<P>
			
 
				-UTF has other advantages.
			
 
				-It is a byte encoding and is
			
 
				-therefore byte-order independent.
			
 
				-ASCII control characters appear in the byte stream
			
 
				-only as themselves, never as an element of a sequence
			
 
				-encoding another character,
			
 
				-so newline bytes separate lines of UTF text.
			
 
				-Finally, ANSI C's
			
 
				-<TT>strcmp</TT>
			
 
				-function applied to UTF strings preserves the ordering of Unicode characters.
			
 
				-</P>
			
 
				-<P>
			
 
				-To encode and decode UTF is expensive (involving multiplication,
			
 
				-division, and modulo operations) but workable.
			
 
				-UTF's major disadvantage is that the encoding
			
 
				-is not self-synchronizing.
			
 
				-It is in general impossible to find the character
			
 
				-boundaries in a UTF string without reading from
			
 
				-the beginning of the string, although in practice
			
 
				-control characters such as newlines,
			
 
				-tabs, and blanks provide synchronization points.
			
 
				-</P>
			
 
				-<P>
			
 
				-In August 1992,
			
 
				-X-Open circulated a proposal for another UTF-like
			
 
				-byte encoding of Unicode characters.
			
 
				-Their major concern was that an embedded character
			
 
				-in a file name
			
 
				-(in particular a slash)
			
 
				-could be part of an escape sequence in UTF and
			
 
				-therefore confuse a traditional file system.
			
 
				-Their proposal would allow all 7-bit ASCII characters
			
 
				-to represent themselves
			
 
				-<I>and only themselves</I>
			
 
				-in text.
			
 
				-Multibyte sequences would contain only characters
			
 
				-with the high bit set.
			
 
				-We proposed a modification to the new UTF that
			
 
				-would address our synchronization problem.
			
 
				-Our proposal, which was  originally known informally as UTF-2 and FSS-UTF,
			
 
				-is now referred to as UTF-8 and has been approved by ISO to become
			
 
				-Annex P to ISO 10646.
			
 
				-</P>
			
 
				-<P>
			
 
				-The model for text in Plan 9 is chosen from these
			
 
				-three standards*:
			
 
				-</P>
			
 
				-<DL>
			
 
				-<DT><DT>&#32;<DD>
			
 
				-NOTE:<I> * ``That's the nice thing about standards&#173;there's so many to choose from.'' - Andy Tannenbaum (no, the other one)
			
 
				-</I><DT>&#32;<DD></dl>
			
 
				-<br>
			
 
				-the Unicode character set encoded as a byte stream by
			
 
				-UTF-8, from
			
 
				-(soon to be) Annex P of ISO 10646.
			
 
				-Although this mixture may seem like a precarious position for us to adopt,
			
 
				-it is not as bad as it sounds.
			
 
				-ISO 10646 and the Unicode Standard have converged,
			
 
				-other systems such as Linux have adopted the same character set and encoding,
			
 
				-and the general feeling seems to be that Unicode and UTF-8 will be accepted as the way
			
 
				-to exchange text between systems.
			
 
				-The prognosis for wide acceptance is good.
			
 
				-<P>
			
 
				-There are a couple of aspects of the Unicode Standard we have not faced.
			
 
				-One is the issue of right-to-left text such as Hebrew or Arabic.
			
 
				-Since that is an issue of display, not representation, we believe
			
 
				-we can defer that problem for the moment without affecting our
			
 
				-ability to solve it later.
			
 
				-Another issue is diacriticals and `combining characters',
			
 
				-which cause overstriking of multiple Unicode characters.
			
 
				-Although necessary for some scripts, such as Thai, Arabic, and Hebrew,
			
 
				-such characters confuse the issues for Latin languages because they
			
 
				-generate multiple representations for accented characters.
			
 
				-ISO 10646 describes three levels of implementation;
			
 
				-in Plan 9 we decided not to address the issue.
			
 
				-Again, this can be labeled as a display issue and its finer points are still being debated,
			
 
				-so we felt comfortable deferring.  Ma&ntilde;ana.
			
 
				-</P>
			
 
				-<P>
			
 
				-Although we converted Plan 9 in the altruistic interests of
			
 
				-serving foreign languages, we have found the large character
			
 
				-set attractive for other reasons.  The Unicode Standard includes many
			
 
				-characters&#173;mathematical symbols, scientific notation,
			
 
				-more general punctuation, and more&#173;that we now use
			
 
				-daily in our work.  We no longer test our imaginations
			
 
				-to find ways to include non-ASCII symbols in our text;
			
 
				-why type
			
 
				-<TT>:-)</TT>
			
 
				-when you can use the character &#9786;?
			
 
				-Most compelling is the ability to absorb documents
			
 
				-and data that contain non-ASCII characters; our browser for the
			
 
				-Oxford English Dictionary
			
 
				-lets us see the dictionary as it really is, with pronunciation
			
 
				-in the IPA font, foreign phrases properly rendered, and so on,
			
 
				-<I>in plain text.</I>
			
 
				-</P>
			
 
				-<P>
			
 
				-In the rest of this paper, except when
			
 
				-stated otherwise, the term `UTF' refers to the UTF-8 encoding
			
 
				-of Unicode characters as adopted by Plan 9.
			
 
				-</P>
			
 
				-<H4>C Compiler
			
 
				-</H4>
			
 
				-<P>
			
 
				-The first program to be converted to UTF
			
 
				-was the C Compiler.
			
 
				-There are two levels of conversion.
			
 
				-On the syntactic level,
			
 
				-input to the C compiler
			
 
				-is UTF; on the semantic level,
			
 
				-the C language needs to define
			
 
				-how compiled programs manipulate
			
 
				-the UTF set.
			
 
				-</P>
			
 
				-<P>
			
 
				-The syntactic part is simple.
			
 
				-The ANSI C language standard defines the
			
 
				-source character set to be ASCII.
			
 
				-Since UTF is backward compatible with ASCII,
			
 
				-the compiler needs little change.
			
 
				-The only places where a larger character set
			
 
				-is allowed are in character constants, strings, and comments.
			
 
				-Since 7-bit ASCII characters can represent only
			
 
				-themselves in UTF,
			
 
				-the compiler does not have to be careful while looking
			
 
				-for the termination of a string or comment.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Plan 9 compiler extends ANSI C to treat any Unicode
			
 
				-character with a value outside of the ASCII range as
			
 
				-an alphabetic.
			
 
				-To a Greek programmer or an English mathematician,
			
 
				-&#945; is a sensible and now valid variable name.
			
 
				-</P>
			
 
				-<P>
			
 
				-On the semantic level, ANSI C allows,
			
 
				-but does not tie down,
			
 
				-the notion of a
			
 
				-<I>wide character</I>
			
 
				-and admits string and character constants
			
 
				-of this type.
			
 
				-We chose the wide character type to be
			
 
				-<TT>unsigned</TT>
			
 
				-<TT>short</TT>.
			
 
				-In the libraries, the word
			
 
				-<TT>Rune</TT>
			
 
				-is defined by a
			
 
				-<TT>typedef</TT>
			
 
				-to be equivalent to
			
 
				-<TT>unsigned</TT>
			
 
				-<TT>short</TT>
			
 
				-and is
			
 
				-used to signify a Unicode character.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are surprises; for example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-L'x'	is 120
			
 
				-'x'	is 120
			
 
				-L'&yuml;'	is 255
			
 
				-'&yuml;'	is -1, stdio EOF (if char is signed)
			
 
				-L'&#945;'	is 945
			
 
				-'&#945;'	is illegal
			
 
				-</PRE></TT></DL>
			
 
				-In the string constants,
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;"
			
 
				-L"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;",
			
 
				-</PRE></TT></DL>
			
 
				-the former is an array of
			
 
				-<TT>chars</TT>
			
 
				-with 22 elements
			
 
				-and a null byte,
			
 
				-while the latter is an array of
			
 
				-<TT>unsigned</TT>
			
 
				-<TT>shorts</TT>
			
 
				-(<TT>Runes</TT>)
			
 
				-with 8 elements and a null
			
 
				-<TT>Rune</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Plan 9 library provides an output conversion function,
			
 
				-<TT>print</TT>
			
 
				-(analogous to
			
 
				-<TT>printf</TT>),
			
 
				-with formats
			
 
				-<TT>%c</TT>,
			
 
				-<TT>%C</TT>,
			
 
				-<TT>%s</TT>,
			
 
				-and
			
 
				-<TT>%S</TT>.
			
 
				-Since
			
 
				-<TT>print</TT>
			
 
				-produces text, its output is always UTF.
			
 
				-The character conversion
			
 
				-<TT>%c</TT>
			
 
				-(lower case) masks its argument
			
 
				-to 8 bits before converting to UTF.
			
 
				-Thus
			
 
				-<TT>L'&yuml;'</TT>
			
 
				-and
			
 
				-<TT>'&yuml;'</TT>
			
 
				-printed under
			
 
				-<TT>%c</TT>
			
 
				-will be identical,
			
 
				-but
			
 
				-<TT>L'</TT>&#945;<TT>'</TT>
			
 
				-will print as the Unicode
			
 
				-character with decimal value 177.
			
 
				-The character conversion
			
 
				-<TT>%C</TT>
			
 
				-(upper case) masks its argument
			
 
				-to 16 bits before converting to UTF.
			
 
				-Thus
			
 
				-<TT>L'&yuml;'</TT>
			
 
				-and
			
 
				-<TT>L'</TT>&#945;<TT>'</TT>
			
 
				-will print correctly under
			
 
				-<TT>%C</TT>,
			
 
				-but
			
 
				-<TT>'&yuml;'</TT>
			
 
				-will not.
			
 
				-The conversion
			
 
				-<TT>%s</TT>
			
 
				-(lower case)
			
 
				-expects a pointer to
			
 
				-<TT>char</TT>
			
 
				-and copies UTF sequences up to a null byte.
			
 
				-The conversion
			
 
				-<TT>%S</TT>
			
 
				-(upper case) expects a pointer to
			
 
				-<TT>Rune</TT>
			
 
				-and
			
 
				-performs sequential
			
 
				-<TT>%C</TT>
			
 
				-conversions until a null
			
 
				-<TT>Rune</TT>
			
 
				-is encountered.
			
 
				-</P>
			
 
				-<P>
			
 
				-Another problem in format conversion
			
 
				-is the definition of
			
 
				-<TT>%10s</TT>:
			
 
				-does the number refer to bytes or characters?
			
 
				-We decided that such formats were most
			
 
				-often used to align output columns and
			
 
				-so made the number count characters.
			
 
				-Some programs, however, use the count
			
 
				-to place blank-padded strings
			
 
				-in fixed-sized arrays.
			
 
				-These programs must be found and corrected.
			
 
				-</P>
			
 
				-<P>
			
 
				-Here is a complete example:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-#include &lt;u.h&gt;
			
 
				-
			
 
				-char c[] = "&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;";
			
 
				-Rune s[] = L"&#12371;&#12435;&#12395;&#12385;&#12399; &#19990;&#30028;";
			
 
				-
			
 
				-main(void)
			
 
				-{
			
 
				-	print("%d, %d\n", sizeof(c), sizeof(s));
			
 
				-	print("%s\n", c);
			
 
				-	print("%S\n", s);
			
 
				-}
			
 
				-</PRE></TT></DL>
			
 
				-</P>
			
 
				-<P>
			
 
				-This program prints
			
 
				-<TT>23,</TT>
			
 
				-<TT>18</TT>
			
 
				-and then two identical lines of
			
 
				-UTF text.
			
 
				-In practice,
			
 
				-<TT>%S</TT>
			
 
				-and
			
 
				-<TT>L"..."</TT>
			
 
				-are rare in programs; one reason is
			
 
				-that most formatted I/O is done in unconverted UTF.
			
 
				-</P>
			
 
				-<H4>Ramifications
			
 
				-</H4>
			
 
				-<P>
			
 
				-All programs in Plan 9 now read and write text as UTF, not ASCII.
			
 
				-This change breaks two deep-rooted symmetries implicit in most C programs:
			
 
				-</P>
			
 
				-<DL COMPACT>
			
 
				-<DT>1.<DD>
			
 
				-A character is no longer a
			
 
				-<TT>char</TT>.
			
 
				-<DT>2.<DD>
			
 
				-The internal representation (Rune) of a character now differs from its
			
 
				-external representation (UTF).
			
 
				-</dl>
			
 
				-<P>
			
 
				-In the sections that follow,
			
 
				-we show how these issues were faced in the layers of
			
 
				-system software from the operating system up to the applications.
			
 
				-The effects are wide-reaching and often surprising.
			
 
				-</P>
			
 
				-<H4>Operating system
			
 
				-</H4>
			
 
				-<P>
			
 
				-Since UTF is the only format for text in Plan 9,
			
 
				-the interface to the operating system had to be converted to UTF.
			
 
				-Text strings cross the interface in several places:
			
 
				-command arguments,
			
 
				-file names,
			
 
				-user names (people can log in using their native name),
			
 
				-error messages,
			
 
				-and miscellaneous minor places such as commands to the I/O system.
			
 
				-Little change was required: null-terminated UTF strings
			
 
				-are equivalent to null-terminated ASCII strings for most purposes
			
 
				-of the operating system.
			
 
				-The library routines described in the next section made that
			
 
				-change straightforward.
			
 
				-</P>
			
 
				-<P>
			
 
				-The window system, once called
			
 
				-<TT>8.5</TT>,
			
 
				-is now rightfully called
			
 
				-<TT>8&#189;</TT>.
			
 
				-</P>
			
 
				-<H4>Libraries
			
 
				-</H4>
			
 
				-<P>
			
 
				-A header file included by all programs (see [Pike92]) declares
			
 
				-the
			
 
				-<TT>Rune</TT>
			
 
				-type to hold 16-bit character values:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-typedef unsigned short Rune;
			
 
				-</PRE></TT></DL>
			
 
				-Also defined are several constants relevant to UTF:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-enum
			
 
				-{
			
 
				-    UTFmax    = 3,    /* maximum bytes per rune */
			
 
				-    Runesync  = 0x80, /* can't appear in UTF sequence (&lt;) */
			
 
				-    Runeself  = 0x80, /* rune==UTF sequence (&lt;) */
			
 
				-    Runeerror = 0x80, /* decoding error in UTF */
			
 
				-};
			
 
				-</PRE></TT></DL>
			
 
				-(With the original UTF,
			
 
				-<TT>Runesync</TT>
			
 
				-was hexadecimal 21 and
			
 
				-<TT>Runeself</TT>
			
 
				-was A0.)
			
 
				-<TT>UTFmax</TT>
			
 
				-bytes are sufficient
			
 
				-to hold the UTF encoding of any Unicode character.
			
 
				-Characters of value less than
			
 
				-<TT>Runesync</TT>
			
 
				-only appear in a UTF string as
			
 
				-themselves, never as part of a sequence encoding another character.
			
 
				-Characters of value less than
			
 
				-<TT>Runeself</TT>
			
 
				-encode into single bytes
			
 
				-of the same value.
			
 
				-Finally, when the library detects errors in UTF input&#173;byte sequences
			
 
				-that are not valid UTF sequences&#173;it converts the first byte of the
			
 
				-error sequence to the character
			
 
				-<TT>Runeerror</TT>.
			
 
				-There is little a rune-oriented program can do when given bad data
			
 
				-except exit, which is unreasonable, or carry on.
			
 
				-Originally the conversion routines, described below,
			
 
				-returned errors when given invalid UTF,
			
 
				-but we found ourselves repeatedly checking for errors and ignoring them.
			
 
				-We therefore decided to convert a bad sequence to a valid rune
			
 
				-and continue processing.
			
 
				-(The ANSI C routines, on the other hand, return errors.)
			
 
				-</P>
			
 
				-<P>
			
 
				-This technique does have the unfortunate property that converting
			
 
				-invalid UTF byte strings in and out of runes does not preserve the input,
			
 
				-but this circumstance only occurs when non-textual input is
			
 
				-given to a textual program.
			
 
				-The Unicode Standard defines an error character, value FFFD, to stand for
			
 
				-characters from other sets that it does not represent.
			
 
				-The
			
 
				-<TT>Runeerror</TT>
			
 
				-character is a different concept, related to the encoding rather than the character set, so we
			
 
				-chose a different character for it.
			
 
				-</P>
			
 
				-<P>
			
 
				-The Plan 9 C library contains a number of routines for
			
 
				-manipulating runes.
			
 
				-The first set converts between runes and UTF strings:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-extern	int	runetochar(char*, Rune*);
			
 
				-extern	int	chartorune(Rune*, char*);
			
 
				-extern	int	runelen(long);
			
 
				-extern	int	fullrune(char*, int);
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Runetochar</TT>
			
 
				-translates a single
			
 
				-<TT>Rune</TT>
			
 
				-to a UTF sequence and returns the number of bytes produced.
			
 
				-<TT>Chartorune</TT>
			
 
				-goes the other way, reporting how many bytes were consumed.
			
 
				-<TT>Runelen</TT>
			
 
				-returns the number of bytes in the UTF encoding of a rune.
			
 
				-<TT>Fullrune</TT>
			
 
				-examines a UTF string up to a specified number of bytes
			
 
				-and reports whether the string begins with a complete UTF encoding.
			
 
				-All these routines use the
			
 
				-<TT>Runeerror</TT>
			
 
				-character to work around encoding problems.
			
 
				-</P>
			
 
				-<P>
			
 
				-There is also a set of routines for examining null-terminated UTF strings,
			
 
				-based on the model of the ANSI standard
			
 
				-<TT>str</TT>
			
 
				-routines, but with
			
 
				-<TT>utf</TT>
			
 
				-substituted for
			
 
				-<TT>str</TT>
			
 
				-and
			
 
				-<TT>rune</TT>
			
 
				-for
			
 
				-<TT>chr</TT>:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-extern	int	utflen(char*);
			
 
				-extern	char*	utfrune(char*, long);
			
 
				-extern	char*	utfrrune(char*, long);
			
 
				-extern	char*	utfutf(char*, char*);
			
 
				-</PRE></TT></DL>
			
 
				-<TT>Utflen</TT>
			
 
				-returns the number of runes in a UTF string;
			
 
				-<TT>utfrune</TT>
			
 
				-returns a pointer to the first occurrence of a rune in a UTF string;
			
 
				-and
			
 
				-<TT>utfrrune</TT>
			
 
				-a pointer to the last.
			
 
				-<TT>Utfutf</TT>
			
 
				-searches for the first occurrence of a UTF string in another UTF string.
			
 
				-Given the synchronizing property of UTF-8,
			
 
				-<TT>utfutf</TT>
			
 
				-is the same as
			
 
				-<TT>strstr</TT>
			
 
				-if the arguments point to valid UTF strings.
			
 
				-</P>
			
 
				-<P>
			
 
				-It is a mistake to use
			
 
				-<TT>strchr</TT>
			
 
				-or
			
 
				-<TT>strrchr</TT>
			
 
				-unless searching for a 7-bit ASCII character, that is, a character
			
 
				-less than
			
 
				-<TT>Runeself</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-We have no routines for manipulating null-terminated arrays of
			
 
				-<TT>Runes</TT>.
			
 
				-Although they should probably exist for completeness, we have
			
 
				-found no need for them, for the same reason that
			
 
				-<TT>%S</TT>
			
 
				-and
			
 
				-<TT>L"..."</TT>
			
 
				-are rarely used.
			
 
				-</P>
			
 
				-<P>
			
 
				-Most Plan 9 programs use a new buffered I/O library, BIO, in place of
			
 
				-Standard I/O.
			
 
				-BIO contains routines to read and write UTF streams, converting to and from
			
 
				-runes.
			
 
				-<TT>Bgetrune</TT>
			
 
				-returns, as a
			
 
				-<TT>Rune</TT>
			
 
				-within a
			
 
				-<TT>long</TT>,
			
 
				-the next character in the UTF input stream;
			
 
				-<TT>Bputrune</TT>
			
 
				-takes a rune and writes its UTF representation.
			
 
				-<TT>Bungetrune</TT>
			
 
				-puts a rune back into the input stream for rereading.
			
 
				-</P>
			
 
				-<P>
			
 
				-Plan 9 programs use a simple set of macros to process command line arguments.
			
 
				-Converting these macros to UTF automatically updated the
			
 
				-argument processing of most programs.
			
 
				-In general,
			
 
				-argument flag names can no longer be held in bytes and
			
 
				-arrays of 256 bytes cannot be used to hold a set of flags.
			
 
				-</P>
			
 
				-<P>
			
 
				-We have done nothing analogous to ANSI C's locales, partly because
			
 
				-we do not feel qualified to define locales and partly because we remain
			
 
				-unconvinced of that model for dealing with the problems.
			
 
				-That is really more an issue of internationalization than conversion
			
 
				-to a larger character set; on the other hand,
			
 
				-because we have chosen a single character set that encompasses
			
 
				-most languages, some of the need for
			
 
				-locales is eliminated.
			
 
				-(We have a utility,
			
 
				-<TT>tcs</TT>,
			
 
				-that translates between UTF and other character sets.)
			
 
				-</P>
			
 
				-<P>
			
 
				-There are several reasons why our library does not follow the ANSI design
			
 
				-for wide and multi-byte characters.
			
 
				-The ANSI model was designed by a committee, untried, almost
			
 
				-as an afterthought, whereas
			
 
				-we wanted to design as we built.
			
 
				-(We made several major changes to the interface
			
 
				-as we became familiar with the problems involved.)
			
 
				-We disagree with ANSI C's handling of invalid multi-byte sequences.
			
 
				-Also, the ANSI C library is incomplete:
			
 
				-although it contains some crucial routines for handling
			
 
				-wide and multi-byte characters, there are some serious omissions.
			
 
				-For example, our software can exploit
			
 
				-the fact that UTF preserves ASCII characters in the byte stream.
			
 
				-We could remove that assumption by replacing all
			
 
				-calls to
			
 
				-<TT>strchr</TT>
			
 
				-with
			
 
				-<TT>utfrune</TT>
			
 
				-and so on.
			
 
				-(Because of the weaker properties of the original UTF,
			
 
				-we have actually done so.)
			
 
				-ANSI C cannot:
			
 
				-the standard says nothing about the representation, so portable code should
			
 
				-<I>never</I>
			
 
				-call
			
 
				-<TT>strchr</TT>,
			
 
				-yet there is no ANSI equivalent to
			
 
				-<TT>utfrune</TT>.
			
 
				-ANSI C simultaneously invalidates
			
 
				-<TT>strchr</TT>
			
 
				-and offers no replacement.
			
 
				-</P>
			
 
				-<P>
			
 
				-Finally, ANSI did nothing to integrate wide characters
			
 
				-into the I/O system: it gives no method for printing
			
 
				-wide characters.
			
 
				-We therefore needed to invent some things and decided to invent
			
 
				-everything.
			
 
				-In the end, some of our entry points do correspond closely to
			
 
				-ANSI routines&#173;for example
			
 
				-<TT>chartorune</TT>
			
 
				-and
			
 
				-<TT>runetochar</TT>
			
 
				-are similar to
			
 
				-<TT>mbtowc</TT>
			
 
				-and
			
 
				-<TT>wctomb</TT>&#173;but
			
 
				-Plan 9's library defines more functionality, enough
			
 
				-to write real applications comfortably.
			
 
				-</P>
			
 
				-<H4>Converting the tools
			
 
				-</H4>
			
 
				-<P>
			
 
				-The source for our tools and applications had already been converted to
			
 
				-work with Latin-1, so it was `8-bit safe', but the conversion to the Unicode
			
 
				-Standard and UTF is more involved.
			
 
				-Some programs needed no change at all:
			
 
				-<TT>cat</TT>,
			
 
				-for instance,
			
 
				-interprets its argument strings, delivered in UTF,
			
 
				-as file names that it passes uninterpreted to the
			
 
				-<TT>open</TT>
			
 
				-system call,
			
 
				-and then just copies bytes from its input to its output;
			
 
				-it never makes decisions based on the values of the bytes.
			
 
				-(Plan 9
			
 
				-<TT>cat</TT>
			
 
				-has no options such as
			
 
				-<TT>-v</TT>
			
 
				-to complicate matters.)
			
 
				-Most programs, however, needed modest change.
			
 
				-</P>
			
 
				-<P>
			
 
				-It is difficult to
			
 
				-find automatically the places that need attention,
			
 
				-but
			
 
				-<TT>grep</TT>
			
 
				-helps.
			
 
				-Software that uses the libraries conscientiously can be searched
			
 
				-for calls to library routines that examine bytes as characters:
			
 
				-<TT>strchr</TT>,
			
 
				-<TT>strrchr</TT>,
			
 
				-<TT>strstr</TT>,
			
 
				-etc.
			
 
				-Replacing these by calls to
			
 
				-<TT>utfrune</TT>,
			
 
				-<TT>utfrrune</TT>,
			
 
				-and
			
 
				-<TT>utfutf</TT>
			
 
				-is enough to fix many programs.
			
 
				-Few tools actually need to operate on runes internally;
			
 
				-more typically they need only to look for the final slash in a file
			
 
				-name and similar trivial tasks.
			
 
				-Of the 170 C source programs in the top levels of
			
 
				-<TT>/sys/src/cmd</TT>,
			
 
				-only 23 now contain the word
			
 
				-<TT>Rune</TT>.
			
 
				-</P>
			
 
				-<P>
			
 
				-The programs that
			
 
				-<I>do</I>
			
 
				-store runes internally
			
 
				-are mostly those whose
			
 
				-<I>raison</I>
			
 
				-<I>d'&ecirc;tre</I>
			
 
				-is character manipulation:
			
 
				-<TT>sam</TT>
			
 
				-(the text editor),
			
 
				-<TT>sed</TT>,
			
 
				-<TT>sort</TT>,
			
 
				-<TT>tr</TT>,
			
 
				-<TT>troff</TT>,
			
 
				-<TT>8&#189;</TT>
			
 
				-(the window system and terminal emulator),
			
 
				-and so on.
			
 
				-To decide whether to compute using runes
			
 
				-or UTF-encoded byte strings requires balancing the cost of converting
			
 
				-the data when read and written
			
 
				-against the cost of converting relevant text on demand.
			
 
				-For programs such as editors that run a long time with a relatively
			
 
				-constant dataset, runes are the better choice.
			
 
				-There are space considerations too, but they are more complicated:
			
 
				-plain ASCII text grows when converted to runes; UTF-encoded Japanese
			
 
				-shrinks.
			
 
				-</P>
			
 
				-<P>
			
 
				-Again, it is hard to automate the conversion of a program from
			
 
				-<TT>chars</TT>
			
 
				-to
			
 
				-<TT>Runes</TT>.
			
 
				-It is not enough just to change the type of variables; the assumption
			
 
				-that bytes and characters are equivalent can be insidious.
			
 
				-For instance, to clear a character array by
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-memset(buf, 0, BUFSIZE)
			
 
				-</PRE></TT></DL>
			
 
				-becomes wrong if
			
 
				-<TT>buf</TT>
			
 
				-is changed from an array of
			
 
				-<TT>chars</TT>
			
 
				-to an array of
			
 
				-<TT>Runes</TT>.
			
 
				-Any program that indexes tables based on character values needs
			
 
				-rethinking.
			
 
				-Consider
			
 
				-<TT>tr</TT>,
			
 
				-which originally used multiple 256-byte arrays for the mapping.
			
 
				-The na&iuml;ve conversion would yield multiple 65536-rune arrays.
			
 
				-Instead Plan 9
			
 
				-<TT>tr</TT>
			
 
				-saves space by building in effect
			
 
				-a run-encoded version of the map.
			
 
				-</P>
			
 
				-<P>
			
 
				-<TT>Sort</TT>
			
 
				-has related problems.
			
 
				-The cooperation of UTF and
			
 
				-<TT>strcmp</TT>
			
 
				-means that a simple sort&#173;one with no options&#173;can be done
			
 
				-on the original UTF strings using
			
 
				-<TT>strcmp</TT>.
			
 
				-With sorting options enabled, however,
			
 
				-<TT>sort</TT>
			
 
				-may need to convert its input to runes: for example,
			
 
				-option
			
 
				-<TT>-t</TT>&#945;<TT></TT>
			
 
				-requires searching for alphas in the input text to
			
 
				-crack the input into fields.
			
 
				-The field specifier
			
 
				-<TT>+3.2</TT>
			
 
				-refers to 2 runes beyond the third field.
			
 
				-Some of the other options are hopelessly provincial:
			
 
				-consider the case-folding and dictionary order options
			
 
				-(Japanese doesn't even have an official dictionary order) or
			
 
				-<TT>-M</TT>
			
 
				-which compares by case-insensitive English month name.
			
 
				-Handling these options involves the
			
 
				-larger issues of internationalization and is beyond the scope
			
 
				-of this paper and our expertise.
			
 
				-Plan 9
			
 
				-<TT>sort</TT>
			
 
				-works sensibly with options that make sense relative to the input.
			
 
				-The simple and most important options are, however, usually meaningful.
			
 
				-In particular,
			
 
				-<TT>sort</TT>
			
 
				-sorts UTF into the same order that
			
 
				-<TT>look</TT>
			
 
				-expects.
			
 
				-</P>
			
 
				-<P>
			
 
				-Regular expression-matching algorithms need rethinking to
			
 
				-be applied to UTF text.
			
 
				-Deterministic automata are usually applied to bytes;
			
 
				-converting them to operate on variable-sized byte sequences is awkward.
			
 
				-On the other hand, converting the input stream to runes adds measurable
			
 
				-expense
			
 
				-and the state tables expand
			
 
				-from size 256 to 65536; it can be expensive just to generate them.
			
 
				-For simple string searching,
			
 
				-the Boyer-Moore algorithm works with UTF provided the input is
			
 
				-guaranteed to be only valid UTF strings; however, it does not work
			
 
				-with the old UTF encoding.
			
 
				-At a more mundane level, even character classes are harder:
			
 
				-the usual bit-vector representation within a non-deterministic automaton
			
 
				-is unwieldy with 65536 characters in the alphabet.
			
 
				-</P>
			
 
				-<P>
			
 
				-We compromised.
			
 
				-An existing library for compiling and executing regular expressions
			
 
				-was adapted to work on runes, with two entry points for searching
			
 
				-in arrays of runes and arrays of chars (the pattern is always UTF text).
			
 
				-Character classes are represented internally as runs of runes;
			
 
				-the reserved value
			
 
				-<TT>FFFF</TT>
			
 
				-marks the end of the class.
			
 
				-Then
			
 
				-<I>all</I>
			
 
				-utilities that use regular expressions&#173;editors,
			
 
				-<TT>grep</TT>,
			
 
				-<TT>awk</TT>,
			
 
				-etc.&#173;except the shell, whose notation
			
 
				-was grandfathered, were converted to use the library.
			
 
				-For some programs, there was a concomitant loss of performance,
			
 
				-but there was also a strong advantage.
			
 
				-To our knowledge, Plan 9 is the only Unix-like system
			
 
				-that has a single definition and implementation of
			
 
				-regular expressions; patterns are written and interpreted
			
 
				-identically by all the programs in the system.
			
 
				-</P>
			
 
				-<P>
			
 
				-A handful of programs have the notion of character built into them
			
 
				-so strongly as to confuse the issue of what they should do with UTF input.
			
 
				-Such programs were treated as individual special cases.
			
 
				-For example,
			
 
				-<TT>wc</TT>
			
 
				-is, by default, unchanged in behavior and output; a new option,
			
 
				-<TT>-r</TT>,
			
 
				-counts the number of correctly encoded runes&#173;valid UTF sequences&#173;in
			
 
				-its input;
			
 
				-<TT>-b</TT>
			
 
				-the number of invalid sequences.
			
 
				-</P>
			
 
				-<P>
			
 
				-It took us several months to convert all the software in the system
			
 
				-to the Unicode Standard and the old UTF.
			
 
				-When we decided to convert from that to the new UTF,
			
 
				-only three things needed to be done.
			
 
				-First, we rewrote the library routines to encode and decode the
			
 
				-new UTF.  This took an evening.
			
 
				-Next, we converted all the files containing UTF
			
 
				-to the new encoding.
			
 
				-We wrote a trivial program to look for non-ASCII bytes in
			
 
				-text files and used a Plan 9 program called
			
 
				-<TT>tcs</TT>
			
 
				-(translate character set) to change encodings.
			
 
				-Finally, we recompiled all the system software;
			
 
				-the library interface was unchanged, so recompilation was sufficient
			
 
				-to effect the transformation.
			
 
				-The second two steps were done concurrently and took an afternoon.
			
 
				-We concluded that the actual encoding is relatively unimportant to the
			
 
				-software; the adoption of large characters and a byte-stream encoding
			
 
				-<I>per</I>
			
 
				-<I>se</I>
			
 
				-are much deeper issues.
			
 
				-</P>
			
 
				-<H4>Graphics and fonts
			
 
				-</H4>
			
 
				-<P>
			
 
				-Plan 9 provides only minimal support for plain text terminals.
			
 
				-It is instead designed to be used with all character input and
			
 
				-output mediated by a window system such as
			
 
				-<TT>8&#189;</TT>.
			
 
				-The window system and related software are responsible for the
			
 
				-display of UTF text as Unicode character images.
			
 
				-For plain text, the window system must provide a user-settable
			
 
				-<I>font</I>
			
 
				-that provides a (possibly empty) picture for each Unicode character.
			
 
				-Fancier applications that use bold and Italic characters
			
 
				-need multiple fonts storing multiple pictures for each
			
 
				-Unicode value.
			
 
				-All the issues are apparent, though,
			
 
				-in just the problem of
			
 
				-displaying a single image for each character, that is, the
			
 
				-Unicode equivalent of a plain text terminal.
			
 
				-With 128 or even 256 characters, a font can be just
			
 
				-an array of bitmaps.  With 65536 characters,
			
 
				-a more sophisticated design is necessary.  To store the ideographs
			
 
				-for just Japanese as 16&#215;16&#215;1 bit images,
			
 
				-the smallest they can reasonably be, takes over a quarter of a
			
 
				-megabyte.  Make the images a little larger, store more bits per
			
 
				-pixel, and hold a copy in every running application, and the
			
 
				-memory cost becomes unreasonable.
			
 
				-</P>
			
 
				-<P>
			
 
				-The structure of the bitmap graphics services is described at length elsewhere
			
 
				-[Pike91].
			
 
				-In summary, the memory holding the bitmaps is stored in the same machine that has
			
 
				-the display, mouse, and keyboard: the terminal in Plan 9 terminology,
			
 
				-the workstation in others'.
			
 
				-Access to that memory and associated services is provided
			
 
				-by device files served by system
			
 
				-software on the terminal.  One of those files,
			
 
				-<TT>/dev/bitblt</TT>,
			
 
				-interprets messages written upon it as requests for actions
			
 
				-corresponding to entry points in the graphics library:
			
 
				-allocate a bitmap, execute a raster operation, draw a text string, etc.
			
 
				-The window system
			
 
				-acts as a multiplexer that mediates access to the services
			
 
				-and resources of the terminal by simulating in each client window
			
 
				-a set of files mirroring those provided by the system.
			
 
				-That is, each window has a distinct
			
 
				-<TT>/dev/mouse</TT>,
			
 
				-<TT>/dev/bitblt</TT>,
			
 
				-and so on through which applications drive graphical
			
 
				-input and output.
			
 
				-</P>
			
 
				-<P>
			
 
				-One of the resources managed by
			
 
				-<TT>8&#189;</TT>
			
 
				-and the terminal is the set of active
			
 
				-<I>subfonts.</I>
			
 
				-Each subfont holds the
			
 
				-bitmaps and associated data structures for a sequential set of Unicode
			
 
				-characters.
			
 
				-Subfonts are stored in files and loaded into the terminal by
			
 
				-<TT>8&#189;</TT>
			
 
				-or an application.
			
 
				-For example, one subfont
			
 
				-might hold the images of the first 256 characters of the Unicode space,
			
 
				-corresponding to the Latin-1 character set;
			
 
				-another might hold the standard phonetic character set, Unicode characters
			
 
				-with value 0250 to 02E9.
			
 
				-These files are collected in directories corresponding to typefaces:
			
 
				-<TT>/lib/font/bit/pelm</TT>
			
 
				-contains the Pellucida Monospace character set, with subfonts holding
			
 
				-the Latin-1, Greek, Cyrillic and other components of the typeface.
			
 
				-A suffix on subfont files encodes (in a subfont-specific
			
 
				-way) the size of the images:
			
 
				-<TT>/lib/font/bit/pelm/latin1.9</TT>
			
 
				-contains the Latin-1 Pellucida Monospace characters with lower
			
 
				-case letters 9 pixels high;
			
 
				-<TT>/lib/font/bit/jis/jis5400.16</TT>
			
 
				-contains 16-pixel high
			
 
				-ideographs starting at Unicode value 5400.
			
 
				-</P>
			
 
				-<P>
			
 
				-The subfonts do not identify which portion of the Unicode space
			
 
				-they cover.  Instead, a
			
 
				-font file, in plain text,
			
 
				-describes how to assemble subfonts into a complete
			
 
				-character set.
			
 
				-The font file is presented as an argument to the window system
			
 
				-to determine how plain text is displayed in text windows and
			
 
				-applications.
			
 
				-Here is the beginning of the font file
			
 
				-<TT>/lib/font/bit/pelm/jis.9.font</TT>,
			
 
				-which describes the layout of a font covering that portion of
			
 
				-the Unicode Standard for which we have characters of typical
			
 
				-display size, using Japanese characters
			
 
				-to cover the Han space:
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-18	14
			
 
				-0x0000	0x00FF	latin1.9
			
 
				-0x0100	0x017E	latineur.9
			
 
				-0x0250	0x02E9	ipa.9
			
 
				-0x0386	0x03F5	greek.9
			
 
				-0x0400	0x0475	cyrillic.9
			
 
				-0x2000	0x2044	../misc/genpunc.9
			
 
				-0x2070	0x208E	supsub.9
			
 
				-0x20A0	0x20AA	currency.9
			
 
				-0x2100	0x2138	../misc/letterlike.9
			
 
				-0x2190	0x21EA	../misc/arrows
			
 
				-0x2200	0x227F	../misc/math1
			
 
				-0x2280	0x22F1	../misc/math2
			
 
				-0x2300	0x232C	../misc/tech
			
 
				-0x2500	0x257F	../misc/chart
			
 
				-0x2600	0x266F	../misc/ding
			
 
				-</PRE></TT></DL>
			
 
				-<DL><DT><DD><TT><PRE>
			
 
				-0x3000	0x303f	../jis/jis3000.16
			
 
				-0x30a1	0x30fe	../jis/katakana.16
			
 
				-0x3041	0x309e	../jis/hiragana.16
			
 
				-0x4e00	0x4fff	../jis/jis4e00.16
			
 
				-0x5000	0x51ff	../jis/jis5000.16
			
 
				-...
			
 
				-</PRE></TT></DL>
			
 
				-The first two numbers set the interline spacing of the font (18
			
 
				-pixels) and the distance from the baseline to the top of the
			
 
				-line (14 pixels).
			
 
				-When characters are displayed, they are placed so as best
			
 
				-to fit within those constraints; characters
			
 
				-too large to fit will be truncated.
			
 
				-The rest of the file associates subfont files
			
 
				-with portions of Unicode space.
			
 
				-The first four such files are in the Pellucida Monospace typeface
			
 
				-and directory; others reside in other directories.  The file names
			
 
				-are relative to the font file's own location.
			
 
				-</P>
			
 
				-<P>
			
 
				-There are several advantages to this two-level structure.
			
 
				-First, it simultaneously breaks the huge Unicode space into manageable
			
 
				-components and provides a unifying architecture for
			
 
				-assembling fonts from disjoint pieces.
			
 
				-Second, the structure promotes sharing.
			
 
				-For example, we have only one set of Japanese
			
 
				-characters but dozens of typefaces for the Latin-1 characters,
			
 
				-and this structure permits us to store only one copy of the
			
 
				-Japanese set but use it with any Roman typeface.
			
 
				-Also, customization is easy.
			
 
				-English-speaking users who don't need Japanese characters
			
 
				-but may want to read an on-line Oxford English Dictionary can
			
 
				-assemble a custom font with the
			
 
				-Latin-1 (or even just ASCII) characters and the International
			
 
				-Phonetic Alphabet (IPA).
			
 
				-Moreover, to do so requires just editing a plain text file,
			
 
				-not using a special font editing tool.
			
 
				-Finally, the structure guides the design of
			
 
				-caching protocols to improve performance and memory usage.
			
 
				-</P>
			
 
				-<P>
			
 
				-To load a complete Unicode character set into each application
			
 
				-would consume too
			
 
				-much memory and, particularly on slow terminal lines, would take
			
 
				-unreasonably long.
			
 
				-Instead, Plan 9 assembles a multi-level cache structure for
			
 
				-each font.
			
 
				-An application opens a font file, reads and parses it,
			
 
				-and allocates a data structure.
			
 
				-A message written to
			
 
				-<TT>/dev/bitblt</TT>
			
 
				-allocates an associated structure held in the terminal, in particular,
			
 
				-a bitmap to act as a cache
			
 
				-for recently used character images.
			
 
				-Other messages copy these images to bitmaps such as the screen
			
 
				-by loading characters from subfonts into the cache on demand and
			
 
				-from there to the destination bitmap.
			
 
				-The protocol to draw characters is in terms of cache indices,
			
 
				-not Unicode character number or UTF sequences.
			
 
				-These details are hidden from the application, which instead
			
 
				-sees only a subroutine to draw a string in a bitmap from a
			
 
				-given font, functions to discover character size information,
			
 
				-and routines to allocate and to free fonts.
			
 
				-</P>
			
 
				-<P>
			
 
				-As needed, whole
			
 
				-subfonts are opened by the graphics library, read, and then downloaded
			
 
				-to the terminal.
			
 
				-They are held open by the library in an LRU-replacement list.
			
 
				-Even when the program closes a subfont, it is retained
			
 
				-in the terminal for later use.
			
 
				-When the application opens the subfont, it asks the terminal
			
 
				-if it already has a copy to avoid reading it from the file
			
 
				-server if possible.
			
 
				-This level of cache has the property that the bitmaps for, say,
			
 
				-all the Japanese characters are stored only once, in the terminal;
			
 
				-the applications read only size and width information from the terminal
			
 
				-and share the images.
			
 
				-</P>
			
 
				-<P>
			
 
				-The sizes of the character and subfont caches held by the
			
 
				-application are adaptive.
			
 
				-A simple algorithm monitors the cache miss rate to enlarge and
			
 
				-shrink the caches as required.
			
 
				-The size of the character cache is limited to 2048 images maximum,
			
 
				-which in practice seems enough even for Japanese text.
			
 
				-For plain ASCII-like text it naturally stays around 128 images.
			
 
				-</P>
			
 
				-<P>
			
 
				-This mechanism sounds complicated but is implemented by only about
			
 
				-500 lines in the library and considerably less in each of the
			
 
				-terminal's graphics driver and
			
 
				-<TT>8&#189;</TT>.
			
 
				-It has the advantage that only characters that are
			
 
				-being used are loaded into memory.
			
 
				-It is also efficient: if the characters being drawn
			
 
				-are in the cache the extra overhead is negligible.
			
 
				-It works particularly well for alphabetic character sets,
			
 
				-but also adapts on demand for ideographic sets.
			
 
				-When a user first looks at Japanese text, it takes a few
			
 
				-seconds to read all the font data, but thereafter the
			
 
				-text is drawn almost as fast as regular text (the images
			
 
				-are larger, so draw a little slower).
			
 
				-Also, because the bitmaps are remembered by the terminal,
			
 
				-if a second application then looks at Japanese text
			
 
				-it starts faster than the first.
			
 
				-</P>
			
 
				-<P>
			
 
				-We considered
			
 
				-building a `font server'
			
 
				-to cache character images and associated data
			
 
				-for the applications, the window system, and the terminal.
			
 
				-We rejected this design because, although isolating
			
 
				-many of the problems of font management into a separate program,
			
 
				-it didn't simplify the applications.
			
 
				-Moreover, in a distributed system such as Plan 9 it is easy
			
 
				-to have too many special purpose servers.
			
 
				-Making the management of the fonts the concern of only
			
 
				-the essential components simplifies the system and makes
			
 
				-bootstrapping less intricate.
			
 
				-</P>
			
 
				-<H4>Input
			
 
				-</H4>
			
 
				-<P>
			
 
				-A completely different problem is how to type Unicode characters
			
 
				-as input to the system.
			
 
				-We selected an unused key on our ASCII keyboards
			
 
				-to serve as a prefix for multi-keystroke
			
 
				-sequences that generate Unicode characters.
			
 
				-For example, the character
			
 
				-<TT>&uuml;</TT>
			
 
				-is generated by the prefix key
			
 
				-(typically
			
 
				-<TT>ALT</TT>
			
 
				-or
			
 
				-<TT>Compose</TT>)
			
 
				-followed by a double quote and a lower-case
			
 
				-<TT>u</TT>.
			
 
				-When that character is read by the application, from the file
			
 
				-<TT>/dev/cons</TT>,
			
 
				-it is of course presented as its UTF encoding.
			
 
				-Such sequences generate characters from an arbitrary set that
			
 
				-includes all of Latin-1 plus a selection of mathematical
			
 
				-and technical characters.
			
 
				-An arbitrary Unicode character may be generated by typing the prefix,
			
 
				-an upper case X, and four hexadecimal digits that identify
			
 
				-the Unicode value.
			
 
				-</P>
			
 
				-<P>
			
 
				-These simple mechanisms are adequate for most of our day-to-day needs:
			
 
				-it's easy to remember to type `ALT 1 2' for &#189; or `ALT accent letter'
			
 
				-for accented Latin letters.
			
 
				-For the occasional unusual character, the cut and paste features of
			
 
				-<TT>8&#189;</TT>
			
 
				-serve well.  A program called (perhaps misleadingly)
			
 
				-<TT>unicode</TT>
			
 
				-takes as argument a hexadecimal value, and prints the UTF representation of that character,
			
 
				-which may then be picked up with the mouse and used as input.
			
 
				-</P>
			
 
				-<P>
			
 
				-These methods
			
 
				-are clearly unsatisfactory when working in a non-English language.
			
 
				-In the native country of such a language
			
 
				-the appropriate keyboard is likely to be at hand.
			
 
				-But it's also reasonable&#173;especially now that the system handles Unicode characters&#173;to
			
 
				-work in a language foreign to the keyboard.
			
 
				-</P>
			
 
				-<P>
			
 
				-For alphabetic languages such as Greek or Russian, it is
			
 
				-straightforward to construct a program that does phonetic substitution,
			
 
				-so that, for example, typing a Latin `a' yields the Greek `&#945;'.
			
 
				-Within Plan 9, such a program can be inserted transparently
			
 
				-between the real keyboard and a program such as the window system,
			
 
				-providing a manageable input device for such languages.
			
 
				-</P>
			
 
				-<P>
			
 
				-For ideographic languages such as Chinese or Japanese the problem is harder.
			
 
				-Native users of such languages have adopted methods for dealing with
			
 
				-Latin keyboards that involve a hybrid technique based on phonetics
			
 
				-to generate a list of possible symbols followed by menu selection to
			
 
				-choose the desired one.
			
 
				-Such methods can be
			
 
				-effective, but their design must be rooted in information about
			
 
				-the language unknown to non-native speakers.
			
 
				-(<TT>Cxterm</TT>,
			
 
				-a Chinese terminal emulator built by and for
			
 
				-Chinese programmers,
			
 
				-employs such a technique
			
 
				-[Pong and Zhang].)
			
 
				-Although the technical problem of implementing such a device
			
 
				-is easy in Plan 9&#173;it is just an elaboration of the technique for
			
 
				-alphabetic languages&#173;our lack of familiarity with such languages
			
 
				-has restrained our enthusiasm for building one.
			
 
				-</P>
			
 
				-<P>
			
 
				-The input problem is technically the least interesting but perhaps
			
 
				-emotionally the most important of the problems of converting a system
			
 
				-to an international character set.
			
 
				-Beyond that remain the deeper problems of internationalization
			
 
				-such as multi-lingual error messages and command names,
			
 
				-problems we are not qualified to solve.
			
 
				-With the ability to treat text of most languages on an equal
			
 
				-footing, though, we can begin down that path.
			
 
				-Perhaps people in non-English speaking countries will
			
 
				-consider adopting Plan 9, solving the input problem locally&#173;perhaps
			
 
				-just by plugging in their local terminals&#173;and begin to use
			
 
				-a system with at least the capacity to be international.
			
 
				-</P>
			
 
				-<H4>Acknowledgements
			
 
				-</H4>
			
 
				-<P>
			
 
				-Dennis Ritchie provided consultation and encouragement.
			
 
				-Bob Flandrena converted most of the standard tools to UTF.
			
 
				-Brian Kernighan suffered cheerfully with several
			
 
				-inadequate implementations and converted
			
 
				-<TT>troff</TT>
			
 
				-to UTF.
			
 
				-Rich Drechsler converted his Postscript driver to UTF.
			
 
				-John Hobby built the Postscript &#9786;.
			
 
				-We thank them all.
			
 
				-</P>
			
 
				-<H4>References
			
 
				-</H4>
			
 
				-<br>&#32;<br>
			
 
				-[ANSIC] <I>American National Standard for Information Systems -
			
 
				-Programming Language C</I>, American National Standards Institute, Inc.,
			
 
				-New York, 1990.
			
 
				-<br>&#32;<br>
			
 
				-[ISO10646]
			
 
				-ISO/IEC DIS 10646-1:1993
			
 
				-<I>Information technology -
			
 
				-Universal Multiple-Octet Coded Character Set (UCS) &#173;
			
 
				-Part 1: Architecture and Basic Multilingual Plane</I>.
			
 
				-<br>&#32;<br>
			
 
				-[Pike90] R. Pike, D. Presotto, K. Thompson, H. Trickey,
			
 
				-``Plan 9 from Bell Labs'',
			
 
				-UKUUG Proc. of the Summer 1990 Conf.,
			
 
				-London, England,
			
 
				-1990.
			
 
				-<br>&#32;<br>
			
 
				-[Pike91] R. Pike, ``8&#189;, The Plan 9 Window System'', USENIX Summer
			
 
				-Conf. Proc., Nashville, 1991, reprinted in this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Pike92] R. Pike, ``How to Use the Plan 9 C Compiler'', this volume.
			
 
				-<br>&#32;<br>
			
 
				-[Pong and Zhang] Man-Chi Pong and Yongguang Zhang, ``cxterm:
			
 
				-A Chinese Terminal Emulator for the X Window System'',
			
 
				-Software&#173;Practice and Experience,
			
 
				-Vol 22(1), 809-926, October 1992.
			
 
				-<br>&#32;<br>
			
 
				-[Unicode]
			
 
				-<I>The Unicode Standard,
			
 
				-Worldwide Character Encoding,
			
 
				-Version 1.0, Volume 1</I>,
			
 
				-The Unicode Consortium,
			
 
				-Addison Wesley,
			
 
				-New York,
			
 
				-1991.
			
 
				-<br>&#32;<br>
			
 
				-<A href=http://www.lucent.com/copyright.html>
			
 
				-Copyright</A> &#169; 2004 Lucent Technologies Inc.  All rights reserved.
			
 
				-</body></html>
			
--- a/sys/man/3/uart
+++ b/sys/man/3/uart
@@ -36,28 +36,27 @@ It accepts the following commands:
 
				 Set the baud rate to
			
 
				 .IR n .
			
 
				 .TP
			
 
				+.BI c n
			
 
				+Set hangup on DCD if
			
 
				+.I n
			
 
				+is non-zero; else clear it.
			
 
				+.TP
			
 
				 .BI d n
			
 
				 Set DTR if
			
 
				 .I n
			
 
				 is non-zero;
			
 
				 else clear it.
			
 
				 .TP
			
 
				-.BI k n
			
 
				-Send a break lasting
			
 
				+.BI e n
			
 
				+Set hangup on DSR if
			
 
				 .I n
			
 
				-milliseconds.
			
 
				+is non-zero; else clear it.
			
 
				 .TP
			
 
				-.BI r n
			
 
				-Set RTS if
			
 
				-.I n
			
 
				-is non-zero;
			
 
				-else clear it.
			
 
				+.B f
			
 
				+Flush output queue.
			
 
				 .TP
			
 
				-.BI m n
			
 
				-Obey modem CTS signal if
			
 
				-.I n
			
 
				-is non-zero;
			
 
				-else clear it.
			
 
				+.B h
			
 
				+Close input and output queues.
			
 
				 .TP
			
 
				 .BI i n
			
 
				 Enable/disable the FIFOs.
			
@@ -75,6 +74,25 @@ value of
 
				 .I n
			
 
				 causes the maximum-supported trigger level to be set.
			
 
				 .TP
			
 
				+.BI k n
			
 
				+Send a break lasting
			
 
				+.I n
			
 
				+milliseconds.
			
 
				+.TP
			
 
				+.BI l n
			
 
				+Set number of bits per byte to
			
 
				+.IR n .
			
 
				+Legal values are 5, 6, 7, or 8.
			
 
				+.TP
			
 
				+.BI m n
			
 
				+Obey modem CTS signal if
			
 
				+.I n
			
 
				+is non-zero;
			
 
				+else clear it.
			
 
				+.TP
			
 
				+.B n
			
 
				+Make writes non-blocking.
			
 
				+.TP
			
 
				 .BI p c
			
 
				 Set parity to odd if
			
 
				 .I c
			
@@ -86,16 +104,21 @@ is
 
				 .BR e ;
			
 
				 else set no parity.
			
 
				 .TP
			
 
				+.BI q n
			
 
				+Set input and output queue limits to
			
 
				+.IR n .
			
 
				+.TP
			
 
				+.BI r n
			
 
				+Set RTS if
			
 
				+.I n
			
 
				+is non-zero;
			
 
				+else clear it.
			
 
				+.TP
			
 
				 .BI s n
			
 
				 Set number of stop bits to
			
 
				 .IR n .
			
 
				 Legal values are 1 or 2.
			
 
				 .TP
			
 
				-.BI l n
			
 
				-Set number of bits per byte to
			
 
				-.IR n .
			
 
				-Legal values are 5, 6, 7, or 8.
			
 
				-.TP
			
 
				 .BI w n
			
 
				 Set the uart clock timer to
			
 
				 n times 100us.
			
--- a/sys/man/6/smtpd
+++ b/sys/man/6/smtpd
@@ -204,18 +204,20 @@ relaying is allowed only if the source IP address is in
 
				 or the destination domain is specified in
			
 
				 .BR ourdomains .
			
 
				 .SS Blocked Addresses
			
 
				-When
			
 
				-.B /mail/lib/blocked
			
 
				-exists and is readable,
			
 
				-.I smtpd
			
 
				-reads a list of banned addresses from it.
			
 
				+.I Smtpd
			
 
				+consults
			
 
				+.B /mail/ratify
			
 
				+(see
			
 
				+.IR ratfs (4))
			
 
				+for a list of banned addresses.
			
 
				 Messages received from these addresses are
			
 
				 rejected with a 5\fIxx\fP-series SMTP error code.
			
 
				 There is no option
			
 
				-to turn blocking on or off; if the file is accessible,
			
 
				-blocking is enabled on all
			
 
				+to turn blocking on or off; if 
			
 
				+.B /mail/ratify
			
 
				+is mounted,
			
 
				 .I smtpd
			
 
				-sessions, including those from trusted networks.
			
 
				+will use it, even for connections from trusted networks.
			
 
				 .PP
			
 
				 The command line format and address specifications
			
 
				 conform to the notation described above.  If the parameters
			
@@ -303,4 +305,5 @@ command line arguments applicable
 
				 to exposed systems.
			
 
				 .SH "SEE ALSO"
			
 
				 .IR mail (1),
			
 
				+.IR ratfs (4),
			
 
				 .IR scanmail (8)
			
--- a/sys/src/libventi/client.c
+++ b/sys/src/libventi/client.c
@@ -29,12 +29,19 @@ vtDial(char *host, int canfail)
 
				 	if(host == nil)
			
 
				 		host = "$venti";
			
 
				 
			
 
				-	na = netmkaddr(host, 0, "venti");
			
 
				-	fd = dial(na, 0, 0, 0);
			
 
				+	if (host == nil) {
			
 
				+		if (!canfail)
			
 
				+			werrstr("no venti host set");
			
 
				+		na = "";
			
 
				+		fd = -1;
			
 
				+	} else {
			
 
				+		na = netmkaddr(host, 0, "venti");
			
 
				+		fd = dial(na, 0, 0, 0);
			
 
				+	}
			
 
				 	if(fd < 0){
			
 
				 		rerrstr(e, sizeof e);
			
 
				 		if(!canfail){
			
 
				-			vtSetError("%s", e);
			
 
				+			vtSetError("venti dialstring %s: %s", na, e);
			
 
				 			return nil;
			
 
				 		}
			
 
				 	}