1
0

9.ms 83 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330
  1. .HTML "Plan 9 from Bell Labs"
  2. .TL
  3. Plan 9 from Bell Labs
  4. .AU
  5. Rob Pike
  6. Dave Presotto
  7. Sean Dorward
  8. Bob Flandrena
  9. Ken Thompson
  10. Howard Trickey
  11. Phil Winterbottom
  12. .AI
  13. .MH
  14. USA
  15. .SH
  16. Motivation
  17. .PP
  18. .FS
  19. Appeared in a slightly different form in
  20. .I
  21. Computing Systems,
  22. .R
  23. Vol 8 #3, Summer 1995, pp. 221-254.
  24. .FE
  25. By the mid 1980's, the trend in computing was
  26. away from large centralized time-shared computers towards
  27. networks of smaller, personal machines,
  28. typically UNIX `workstations'.
  29. People had grown weary of overloaded, bureaucratic timesharing machines
  30. and were eager to move to small, self-maintained systems, even if that
  31. meant a net loss in computing power.
  32. As microcomputers became faster, even that loss was recovered, and
  33. this style of computing remains popular today.
  34. .PP
  35. In the rush to personal workstations, though, some of their weaknesses
  36. were overlooked.
  37. First, the operating system they run, UNIX, is itself an old timesharing system and
  38. has had trouble adapting to ideas
  39. born after it. Graphics and networking were added to UNIX well into
  40. its lifetime and remain poorly integrated and difficult to administer.
  41. More important, the early focus on having private machines
  42. made it difficult for networks of machines to serve as seamlessly as the old
  43. monolithic timesharing systems.
  44. Timesharing centralized the management
  45. and amortization of costs and resources;
  46. personal computing fractured, democratized, and ultimately amplified
  47. administrative problems.
  48. The choice of
  49. an old timesharing operating system to run those personal machines
  50. made it difficult to bind things together smoothly.
  51. .PP
  52. Plan 9 began in the late 1980's as an attempt to have it both
  53. ways: to build a system that was centrally administered and cost-effective
  54. using cheap modern microcomputers as its computing elements.
  55. The idea was to build a time-sharing system out of workstations, but in a novel way.
  56. Different computers would handle
  57. different tasks: small, cheap machines in people's offices would serve
  58. as terminals providing access to large, central, shared resources such as computing
  59. servers and file servers. For the central machines, the coming wave of
  60. shared-memory multiprocessors seemed obvious candidates.
  61. The philosophy is much like that of the Cambridge
  62. Distributed System [NeHe82].
  63. The early catch phrase was to build a UNIX out of a lot of little systems,
  64. not a system out of a lot of little UNIXes.
  65. .PP
  66. The problems with UNIX were too deep to fix, but some of its ideas could be
  67. brought along. The best was its use of the file system to coordinate
  68. naming of and access to resources, even those, such as devices, not traditionally
  69. treated as files.
  70. For Plan 9, we adopted this idea by designing a network-level protocol, called 9P,
  71. to enable machines to access files on remote systems.
  72. Above this, we built a naming
  73. system that lets people and their computing agents build customized views
  74. of the resources in the network.
  75. This is where Plan 9 first began to look different:
  76. a Plan 9 user builds a private computing environment and recreates it wherever
  77. desired, rather than doing all computing on a private machine.
  78. It soon became clear that this model was richer
  79. than we had foreseen, and the ideas of per-process name spaces
  80. and file-system-like resources were extended throughout
  81. the system\(emto processes, graphics, even the network itself.
  82. .PP
  83. By 1989 the system had become solid enough
  84. that some of us began using it as our exclusive computing environment.
  85. This meant bringing along many of the services and applications we had
  86. used on UNIX. We used this opportunity to revisit many issues, not just
  87. kernel-resident ones, that we felt UNIX addressed badly.
  88. Plan 9 has new compilers,
  89. languages,
  90. libraries,
  91. window systems,
  92. and many new applications.
  93. Many of the old tools were dropped, while those brought along have
  94. been polished or rewritten.
  95. .PP
  96. Why be so all-encompassing?
  97. The distinction between operating system, library, and application
  98. is important to the operating system researcher but uninteresting to the
  99. user. What matters is clean functionality.
  100. By building a complete new system,
  101. we were able to solve problems where we thought they should be solved.
  102. For example, there is no real `tty driver' in the kernel; that is the job of the window
  103. system.
  104. In the modern world, multi-vendor and multi-architecture computing
  105. are essential, yet the usual compilers and tools assume the program is being
  106. built to run locally; we needed to rethink these issues.
  107. Most important, though, the test of a system is the computing
  108. environment it provides.
  109. Producing a more efficient way to run the old UNIX warhorses
  110. is empty engineering;
  111. we were more interested in whether the new ideas suggested by
  112. the architecture of the underlying system encourage a more effective way of working.
  113. Thus, although Plan 9 provides an emulation environment for
  114. running POSIX commands, it is a backwater of the system.
  115. The vast majority
  116. of system software is developed in the `native' Plan 9 environment.
  117. .PP
  118. There are benefits to having an all-new system.
  119. First, our laboratory has a history of building experimental peripheral boards.
  120. To make it easy to write device drivers,
  121. we want a system that is available in source form
  122. (no longer guaranteed with UNIX, even
  123. in the laboratory in which it was born).
  124. Also, we want to redistribute our work, which means the software
  125. must be locally produced. For example, we could have used some vendors'
  126. C compilers for our system, but even had we overcome the problems with
  127. cross-compilation, we would have difficulty
  128. redistributing the result.
  129. .PP
  130. This paper serves as an overview of the system. It discusses the architecture
  131. from the lowest building blocks to the computing environment seen by users.
  132. It also serves as an introduction to the rest of the Plan 9 Programmer's Manual,
  133. which it accompanies. More detail about topics in this paper
  134. can be found elsewhere in the manual.
  135. .SH
  136. Design
  137. .PP
  138. The view of the system is built upon three principles.
  139. First, resources are named and accessed like files in a hierarchical file system.
  140. Second, there is a standard protocol, called 9P, for accessing these
  141. resources.
  142. Third, the disjoint hierarchies provided by different services are
  143. joined together into a single private hierarchical file name space.
  144. The unusual properties of Plan 9 stem from the consistent, aggressive
  145. application of these principles.
  146. .PP
  147. A large Plan 9 installation has a number of computers networked
  148. together, each providing a particular class of service.
  149. Shared multiprocessor servers provide computing cycles;
  150. other large machines offer file storage.
  151. These machines are located in an air-conditioned machine
  152. room and are connected by high-performance networks.
  153. Lower bandwidth networks such as Ethernet or ISDN connect these
  154. servers to office- and home-resident workstations or PCs, called terminals
  155. in Plan 9 terminology.
  156. Figure 1 shows the arrangement.
  157. .KF
  158. .PS < network.pic
  159. .IP
  160. .ps -1
  161. .in .25i
  162. .ll -.25i
  163. .ps -1
  164. .vs -1
  165. .I "Figure 1. Structure of a large Plan 9 installation.
  166. CPU servers and file servers share fast local-area networks,
  167. while terminals use slower wider-area networks such as Ethernet,
  168. Datakit, or telephone lines to connect to them.
  169. Gateway machines, which are just CPU servers connected to multiple
  170. networks, allow machines on one network to see another.
  171. .ps +1
  172. .vs +1
  173. .ll +.25i
  174. .in 0
  175. .ps
  176. .sp
  177. .KE
  178. .PP
  179. The modern style of computing offers each user a dedicated workstation or PC.
  180. Plan 9's approach is different.
  181. The various machines with screens, keyboards, and mice all provide
  182. access to the resources of the network, so they are functionally equivalent,
  183. in the manner of the terminals attached to old timesharing systems.
  184. When someone uses the system, though,
  185. the terminal is temporarily personalized by that user.
  186. Instead of customizing the hardware, Plan 9 offers the ability to customize
  187. one's view of the system provided by the software.
  188. That customization is accomplished by giving local, personal names for the
  189. publicly visible resources in the network.
  190. Plan 9 provides the mechanism to assemble a personal view of the public
  191. space with local names for globally accessible resources.
  192. Since the most important resources of the network are files, the model
  193. of that view is file-oriented.
  194. .PP
  195. The client's local name space provides a way to customize the user's
  196. view of the network. The services available in the network all export file
  197. hierarchies.
  198. Those important to the user are gathered together into
  199. a custom name space; those of no immediate interest are ignored.
  200. This is a different style of use from the idea of a `uniform global name space'.
  201. In Plan 9, there are known names for services and uniform names for
  202. files exported by those services,
  203. but the view is entirely local. As an analogy, consider the difference
  204. between the phrase `my house' and the precise address of the speaker's
  205. home. The latter may be used by anyone but the former is easier to say and
  206. makes sense when spoken.
  207. It also changes meaning depending on who says it,
  208. yet that does not cause confusion.
  209. Similarly, in Plan 9 the name
  210. .CW /dev/cons
  211. always refers to the user's terminal and
  212. .CW /bin/date
  213. the correct version of the date
  214. command to run,
  215. but which files those names represent depends on circumstances such as the
  216. architecture of the machine executing
  217. .CW date .
  218. Plan 9, then, has local name spaces that obey globally understood
  219. conventions;
  220. it is the conventions that guarantee sane behavior in the presence
  221. of local names.
  222. .PP
  223. The 9P protocol is structured as a set of transactions that
  224. send a request from a client to a (local or remote) server and return the result.
  225. 9P controls file systems, not just files:
  226. it includes procedures to resolve file names and traverse the name
  227. hierarchy of the file system provided by the server.
  228. On the other hand,
  229. the client's name space is held by the client system alone, not on or with the server,
  230. a distinction from systems such as Sprite [OCDNW88].
  231. Also, file access is at the level of bytes, not blocks, which distinguishes
  232. 9P from protocols like NFS and RFS.
  233. A paper by Welch compares Sprite, NFS, and Plan 9's network file system structures [Welc94].
  234. .PP
  235. This approach was designed with traditional files in mind,
  236. but can be extended
  237. to many other resources.
  238. Plan 9 services that export file hierarchies include I/O devices,
  239. backup services,
  240. the window system,
  241. network interfaces,
  242. and many others.
  243. One example is the process file system,
  244. .CW /proc ,
  245. which provides a clean way
  246. to examine and control running processes.
  247. Precursor systems had a similar idea [Kill84], but Plan 9 pushes the
  248. file metaphor much further [PPTTW93].
  249. The file system model is well-understood, both by system builders and general users,
  250. so services that present file-like interfaces are easy to build, easy to understand,
  251. and easy to use.
  252. Files come with agreed-upon rules for
  253. protection,
  254. naming,
  255. and access both local and remote,
  256. so services built this way are ready-made for a distributed system.
  257. (This is a distinction from `object-oriented' models, where these issues
  258. must be faced anew for every class of object.)
  259. Examples in the sections that follow illustrate these ideas in action.
  260. .SH
  261. The Command-level View
  262. .PP
  263. Plan 9 is meant to be used from a machine with a screen running
  264. the window system.
  265. It has no notion of `teletype' in the UNIX sense. The keyboard handling of
  266. the bare system is rudimentary, but once the window system, 8½ [Pike91],
  267. is running,
  268. text can be edited with `cut and paste' operations from a pop-up menu,
  269. copied between windows, and so on.
  270. 8½ permits editing text from the past, not just on the current input line.
  271. The text-editing capabilities of 8½ are strong enough to displace
  272. special features such as history in the shell,
  273. paging and scrolling,
  274. and mail editors.
  275. 8½ windows do not support cursor addressing and,
  276. except for one terminal emulator to simplify connecting to traditional systems,
  277. there is no cursor-addressing software in Plan 9.
  278. .PP
  279. Each window is created in a separate name space.
  280. Adjustments made to the name space in a window do not affect other windows
  281. or programs, making it safe to experiment with local modifications to the name
  282. space, for example
  283. to substitute files from the dump file system when debugging.
  284. Once the debugging is done, the window can be deleted and all trace of the
  285. experimental apparatus is gone.
  286. Similar arguments apply to the private space each window has for environment
  287. variables, notes (analogous to UNIX signals), etc.
  288. .PP
  289. Each window is created running an application, such as the shell, with
  290. standard input and output connected to the editable text of the window.
  291. Each window also has a private bitmap and multiplexed access to the
  292. keyboard, mouse, and other graphical resources through files like
  293. .CW /dev/mouse ,
  294. .CW /dev/bitblt ,
  295. and
  296. .CW /dev/cons
  297. (analogous to UNIX's
  298. .CW /dev/tty ).
  299. These files are provided by 8½, which is implemented as a file server.
  300. Unlike X windows, where a new application typically creates a new window
  301. to run in, an 8½ graphics application usually runs in the window where it starts.
  302. It is possible and efficient for an application to create a new window, but
  303. that is not the style of the system.
  304. Again contrasting to X, in which a remote application makes a network
  305. call to the X server to start running,
  306. a remote 8½ application sees the
  307. .CW mouse ,
  308. .CW bitblt ,
  309. and
  310. .CW cons
  311. files for the window as usual in
  312. .CW /dev ;
  313. it does not know whether the files are local.
  314. It just reads and writes them to control the window;
  315. the network connection is already there and multiplexed.
  316. .PP
  317. The intended style of use is to run interactive applications such as the window
  318. system and text editor on the terminal and to run computation- or file-intensive
  319. applications on remote servers.
  320. Different windows may be running programs on different machines over
  321. different networks, but by making the name space equivalent in all windows,
  322. this is transparent: the same commands and resources are available, with the same names,
  323. wherever the computation is performed.
  324. .PP
  325. The command set of Plan 9 is similar to that of UNIX.
  326. The commands fall into several broad classes. Some are new programs for
  327. old jobs: programs like
  328. .CW ls ,
  329. .CW cat ,
  330. and
  331. .CW who
  332. have familiar names and functions but are new, simpler implementations.
  333. .CW Who ,
  334. for example, is a shell script, while
  335. .CW ps
  336. is just 95 lines of C code.
  337. Some commands are essentially the same as their UNIX ancestors:
  338. .CW awk ,
  339. .CW troff ,
  340. and others have been converted to ANSI C and extended to handle
  341. Unicode, but are still the familiar tools.
  342. Some are entirely new programs for old niches: the shell
  343. .CW rc ,
  344. text editor
  345. .CW sam ,
  346. debugger
  347. .CW acid ,
  348. and others
  349. displace the better-known UNIX tools with similar jobs.
  350. Finally, about half the commands are new.
  351. .PP
  352. Compatibility was not a requirement for the system.
  353. Where the old commands or notation seemed good enough, we
  354. kept them. When they didn't, we replaced them.
  355. .SH
  356. The File Server
  357. .PP
  358. A central file server stores permanent files and presents them to the network
  359. as a file hierarchy exported using 9P.
  360. The server is a stand-alone system, accessible only over the network,
  361. designed to do its one job well.
  362. It runs no user processes, only a fixed set of routines compiled into the
  363. boot image.
  364. Rather than a set of disks or separate file systems,
  365. the main hierarchy exported by the server is a single
  366. tree, representing files on many disks.
  367. That hierarchy is
  368. shared by many users over a wide area on a variety of networks.
  369. Other file trees exported by
  370. the server include
  371. special-purpose systems such as temporary storage and, as explained
  372. below, a backup service.
  373. .PP
  374. The file server has three levels of storage.
  375. The central server in our installation has
  376. about 100 megabytes of memory buffers,
  377. 27 gigabytes of magnetic disks,
  378. and 350 gigabytes of
  379. bulk storage in a write-once-read-many (WORM) jukebox.
  380. The disk is a cache for the WORM and the memory is a cache for the disk;
  381. each is much faster, and sees about an order of magnitude more traffic,
  382. than the level it caches.
  383. The addressable data in the file system can be larger than the size of the
  384. magnetic disks, because they are only a cache;
  385. our main file server has about 40 gigabytes of active storage.
  386. .PP
  387. The most unusual feature of the file server
  388. comes from its use of a WORM device for
  389. stable storage.
  390. Every morning at 5 o'clock, a
  391. .I dump
  392. of the file system occurs automatically.
  393. The file system is frozen and
  394. all blocks modified since the last dump
  395. are queued to be written to the WORM.
  396. Once the blocks are queued,
  397. service is restored and
  398. the read-only root of the dumped
  399. file system appears in a
  400. hierarchy of all dumps ever taken, named by its date.
  401. For example, the directory
  402. .CW /n/dump/1995/0315
  403. is the root directory of an image of the file system
  404. as it appeared in the early morning of March 15, 1995.
  405. It takes a few minutes to queue the blocks,
  406. but the process to copy blocks to the WORM, which runs in the background, may take hours.
  407. .PP
  408. There are two ways the dump file system is used.
  409. The first is by the users themselves, who can browse the
  410. dump file system directly or attach pieces of
  411. it to their name space.
  412. For example, to track down a bug,
  413. it is straightforward to try the compiler from three months ago
  414. or to link a program with yesterday's library.
  415. With daily snapshots of all files,
  416. it is easy to find when a particular change was
  417. made or what changes were made on a particular date.
  418. People feel free to make large speculative changes
  419. to files in the knowledge that they can be backed
  420. out with a single
  421. copy command.
  422. There is no backup system as such;
  423. instead, because the dump
  424. is in the file name space,
  425. backup problems can be solved with
  426. standard tools
  427. such as
  428. .CW cp ,
  429. .CW ls ,
  430. .CW grep ,
  431. and
  432. .CW diff .
  433. .PP
  434. The other (very rare) use is complete system backup.
  435. In the event of disaster,
  436. the active file system can be initialized from any dump by clearing the
  437. disk cache and setting the root of
  438. the active file system to be a copy
  439. of the dumped root.
  440. Although easy to do, this is not to be taken lightly:
  441. besides losing any change made after the date of the dump, this recovery method
  442. results in a very slow system.
  443. The cache must be reloaded from WORM, which is much
  444. slower than magnetic disks.
  445. The file system takes a few days to reload the working
  446. set and regain its full performance.
  447. .PP
  448. Access permissions of files in the dump are the same
  449. as they were when the dump was made.
  450. Normal utilities have normal
  451. permissions in the dump without any special arrangement.
  452. The dump file system is read-only, though,
  453. which means that files in the dump cannot be written regardless of their permission bits;
  454. in fact, since directories are part of the read-only structure,
  455. even the permissions cannot be changed.
  456. .PP
  457. Once a file is written to WORM, it cannot be removed,
  458. so our users never see
  459. ``please clean up your files''
  460. messages and there is no
  461. .CW df
  462. command.
  463. We regard the WORM jukebox as an unlimited resource.
  464. The only issue is how long it will take to fill.
  465. Our WORM has served a community of about 50 users
  466. for five years and has absorbed daily dumps, consuming a total of
  467. 65% of the storage in the jukebox.
  468. In that time, the manufacturer has improved the technology,
  469. doubling the capacity of the individual disks.
  470. If we were to upgrade to the new media,
  471. we would have more free space than in the original empty jukebox.
  472. Technology has created storage faster than we can use it.
  473. .SH
  474. Unusual file servers
  475. .PP
  476. Plan 9 is characterized by a variety of servers that offer
  477. a file-like interface to unusual services.
  478. Many of these are implemented by user-level processes, although the distinction
  479. is unimportant to their clients; whether a service is provided by the kernel,
  480. a user process, or a remote server is irrelevant to the way it is used.
  481. There are dozens of such servers; in this section we present three representative ones.
  482. .PP
  483. Perhaps the most remarkable file server in Plan 9 is 8½, the window system.
  484. It is discussed at length elsewhere [Pike91], but deserves a brief explanation here.
  485. 8½ provides two interfaces: to the user seated at the terminal, it offers a traditional
  486. style of interaction with multiple windows, each running an application, all controlled
  487. by a mouse and keyboard.
  488. To the client programs, the view is also fairly traditional:
  489. programs running in a window see a set of files in
  490. .CW /dev
  491. with names like
  492. .CW mouse ,
  493. .CW screen ,
  494. and
  495. .CW cons .
  496. Programs that want to print text to their window write to
  497. .CW /dev/cons ;
  498. to read the mouse, they read
  499. .CW /dev/mouse .
  500. In the Plan 9 style, bitmap graphics is implemented by providing a file
  501. .CW /dev/bitblt
  502. on which clients write encoded messages to execute graphical operations such as
  503. .CW bitblt
  504. (RasterOp).
  505. What is unusual is how this is done:
  506. 8½ is a file server, serving the files in
  507. .CW /dev
  508. to the clients running in each window.
  509. Although every window looks the same to its client,
  510. each window has a distinct set of files in
  511. .CW /dev .
  512. 8½ multiplexes its clients' access to the resources of the terminal
  513. by serving multiple sets of files. Each client is given a private name space
  514. with a
  515. .I different
  516. set of files that behave the same as in all other windows.
  517. There are many advantages to this structure.
  518. One is that 8½ serves the same files it needs for its own implementation\(emit
  519. multiplexes its own interface\(emso it may be run, recursively, as a client of itself.
  520. Also, consider the implementation of
  521. .CW /dev/tty
  522. in UNIX, which requires special code in the kernel to redirect
  523. .CW open
  524. calls to the appropriate device.
  525. Instead, in 8½ the equivalent service falls out
  526. automatically: 8½ serves
  527. .CW /dev/cons
  528. as its basic function; there is nothing extra to do.
  529. When a program wants to
  530. read from the keyboard, it opens
  531. .CW /dev/cons ,
  532. but it is a private file, not a shared one with special properties.
  533. Again, local name spaces make this possible; conventions about the consistency of
  534. the files within them make it natural.
  535. .PP
  536. 8½ has a unique feature made possible by its design.
  537. Because it is implemented as a file server,
  538. it has the power to postpone answering read requests for a particular window.
  539. This behavior is toggled by a reserved key on the keyboard.
  540. Toggling once suspends client reads from the window;
  541. toggling again resumes normal reads, which absorb whatever text has been prepared,
  542. one line at a time.
  543. This allows the user to edit multi-line input text on the screen before the application sees it,
  544. obviating the need to invoke a separate editor to prepare text such as mail
  545. messages.
  546. A related property is that reads are answered directly from the
  547. data structure defining the text on the display: text may be edited until
  548. its final newline makes the prepared line of text readable by the client.
  549. Even then, until the line is read, the text the client will read can be changed.
  550. For example, after typing
  551. .P1
  552. % make
  553. rm *
  554. .P2
  555. to the shell, the user can backspace over the final newline at any time until
  556. .CW make
  557. finishes, holding off execution of the
  558. .CW rm
  559. command, or even point with the mouse
  560. before the
  561. .CW rm
  562. and type another command to be executed first.
  563. .PP
  564. There is no
  565. .CW ftp
  566. command in Plan 9. Instead, a user-level file server called
  567. .CW ftpfs
  568. dials the FTP site, logs in on behalf of the user, and uses the FTP protocol
  569. to examine files in the remote directory.
  570. To the local user, it offers a file hierarchy, attached to
  571. .CW /n/ftp
  572. in the local name space, mirroring the contents of the FTP site.
  573. In other words, it translates the FTP protocol into 9P to offer Plan 9 access to FTP sites.
  574. The implementation is tricky;
  575. .CW ftpfs
  576. must do some sophisticated caching for efficiency and
  577. use heuristics to decode remote directory information.
  578. But the result is worthwhile:
  579. all the local file management tools such as
  580. .CW cp ,
  581. .CW grep ,
  582. .CW diff ,
  583. and of course
  584. .CW ls
  585. are available to FTP-served files exactly as if they were local files.
  586. Other systems such as Jade and Prospero
  587. have exploited the same opportunity [Rao81, Neu92],
  588. but because of local name spaces and the simplicity of implementing 9P,
  589. this approach
  590. fits more naturally into Plan 9 than into other environments.
  591. .PP
  592. One server,
  593. .CW exportfs ,
  594. is a user process that takes a portion of its own name space and
  595. makes it available to other processes by
  596. translating 9P requests into system calls to the Plan 9 kernel.
  597. The file hierarchy it exports may contain files from multiple servers.
  598. .CW Exportfs
  599. is usually run as a remote server
  600. started by a local program,
  601. either
  602. .CW import
  603. or
  604. .CW cpu .
  605. .CW Import
  606. makes a network call to the remote machine, starts
  607. .CW exportfs
  608. there, and attaches its 9P connection to the local name space. For example,
  609. .P1
  610. import helix /net
  611. .P2
  612. makes Helix's network interfaces visible in the local
  613. .CW /net
  614. directory. Helix is a central server and
  615. has many network interfaces, so this permits a machine with one network to
  616. access to any of Helix's networks. After such an import, the local
  617. machine may make calls on any of the networks connected to Helix.
  618. Another example is
  619. .P1
  620. import helix /proc
  621. .P2
  622. which makes Helix's processes visible in the local
  623. .CW /proc ,
  624. permitting local debuggers to examine remote processes.
  625. .PP
  626. The
  627. .CW cpu
  628. command connects the local terminal to a remote
  629. CPU server.
  630. It works in the opposite direction to
  631. .CW import :
  632. after calling the server, it starts a
  633. .I local
  634. .CW exportfs
  635. and mounts it in the name space of a process, typically a newly created shell, on the
  636. server.
  637. It then rearranges the name space
  638. to make local device files (such as those served by
  639. the terminal's window system) visible in the server's
  640. .CW /dev
  641. directory.
  642. The effect of running a
  643. .CW cpu
  644. command is therefore to start a shell on a fast machine, one more tightly
  645. coupled to the file server,
  646. with a name space analogous
  647. to the local one.
  648. All local device files are visible remotely, so remote applications have full
  649. access to local services such as bitmap graphics,
  650. .CW /dev/cons ,
  651. and so on.
  652. This is not the same as
  653. .CW rlogin ,
  654. which does nothing to reproduce the local name space on the remote system,
  655. nor is it the same as
  656. file sharing with, say, NFS, which can achieve some name space equivalence but
  657. not the combination of access to local hardware devices, remote files, and remote
  658. CPU resources.
  659. The
  660. .CW cpu
  661. command is a uniquely transparent mechanism.
  662. For example, it is reasonable
  663. to start a window system in a window running a
  664. .CW cpu
  665. command; all windows created there automatically start processes on the CPU server.
  666. .SH
  667. Configurability and administration
  668. .PP
  669. The uniform interconnection of components in Plan 9 makes it possible to configure
  670. a Plan 9 installation many different ways.
  671. A single laptop PC can function as a stand-alone Plan 9 system;
  672. at the other extreme, our setup has central multiprocessor CPU
  673. servers and file servers and scores of terminals ranging from small PCs to
  674. high-end graphics workstations.
  675. It is such large installations that best represent how Plan 9 operates.
  676. .PP
  677. The system software is portable and the same
  678. operating system runs on all hardware.
  679. Except for performance, the appearance of the system on, say,
  680. an SGI workstation is the same
  681. as on a laptop.
  682. Since computing and file services are centralized, and terminals have
  683. no permanent file storage, all terminals are functionally identical.
  684. In this way, Plan 9 has one of the good properties of old timesharing systems, where
  685. a user could sit in front of any machine and see the same system. In the modern
  686. workstation community, machines tend to be owned by people who customize them
  687. by storing private information on local disk.
  688. We reject this style of use,
  689. although the system itself can be used this way.
  690. In our group, we have a laboratory with many public-access machines\(ema terminal
  691. room\(emand a user may sit down at any one of them and work.
  692. .PP
  693. Central file servers centralize not just the files, but also their administration
  694. and maintenance.
  695. In fact, one server is the main server, holding all system files; other servers provide
  696. extra storage or are available for debugging and other special uses, but the system
  697. software resides on one machine.
  698. This means that each program
  699. has a single copy of the binary for each architecture, so it is
  700. trivial to install updates and bug fixes.
  701. There is also a single user database; there is no need to synchronize distinct
  702. .CW /etc/passwd
  703. files.
  704. On the other hand, depending on a single central server does limit the size of an installation.
  705. .PP
  706. Another example of the power of centralized file service
  707. is the way Plan 9 administers network information.
  708. On the central server there is a directory,
  709. .CW /lib/ndb ,
  710. that contains all the information necessary to administer the local Ethernet and
  711. other networks.
  712. All the machines use the same database to talk to the network; there is no
  713. need to manage a distributed naming system or keep parallel files up to date.
  714. To install a new machine on the local Ethernet, choose a
  715. name and IP address and add these to a single file in
  716. .CW /lib/ndb ;
  717. all the machines in the installation will be able to talk to it immediately.
  718. To start running, plug the machine into the network, turn it on, and use BOOTP
  719. and TFTP to load the kernel.
  720. All else is automatic.
  721. .PP
  722. Finally,
  723. the automated dump file system frees all users from the need to maintain
  724. their systems, while providing easy access to backup files without
  725. tapes, special commands, or the involvement of support staff.
  726. It is difficult to overstate the improvement in lifestyle afforded by this service.
  727. .PP
  728. Plan 9 runs on a variety of hardware without
  729. constraining how to configure an installation.
  730. In our laboratory, we
  731. chose to use central servers because they amortize costs and administration.
  732. A sign that this is a good decision is that our cheap
  733. terminals remain comfortable places
  734. to work for about five years, much longer than workstations that must provide
  735. the complete computing environment.
  736. We do, however, upgrade the central machines, so
  737. the computation available from even old Plan 9 terminals improves with time.
  738. The money saved by avoiding regular upgrades of terminals
  739. is instead spent on the newest, fastest multiprocessor servers.
  740. We estimate this costs about half the money of networked workstations
  741. yet provides general access to more powerful machines.
  742. .SH
  743. C Programming
  744. .PP
  745. Plan 9 utilities are written in several languages.
  746. Some are scripts for the shell,
  747. .CW rc
  748. [Duff90]; a handful
  749. are written in a new C-like concurrent language called Alef [Wint95], described below.
  750. The great majority, though, are written in a dialect of ANSI C [ANSIC].
  751. Of these, most are entirely new programs, but some
  752. originate in pre-ANSI C code
  753. from our research UNIX system [UNIX85].
  754. These have been updated to ANSI C
  755. and reworked for portability and cleanliness.
  756. .PP
  757. The Plan 9 C dialect has some minor extensions,
  758. described elsewhere [Pike95], and a few major restrictions.
  759. The most important restriction is that the compiler demands that
  760. all function definitions have ANSI prototypes
  761. and all function calls appear in the scope of a prototyped declaration
  762. of the function.
  763. As a stylistic rule,
  764. the prototyped declaration is placed in a header file
  765. included by all files that call the function.
  766. Each system library has an associated header file, declaring all
  767. functions in that library.
  768. For example, the standard Plan 9 library is called
  769. .CW libc ,
  770. so all C source files include
  771. .CW <libc.h> .
  772. These rules guarantee that all functions
  773. are called with arguments having the expected types \(em something
  774. that was not true with pre-ANSI C programs.
  775. .PP
  776. Another restriction is that the C compilers accept only a subset of the
  777. preprocessor directives required by ANSI.
  778. The main omission is
  779. .CW #if ,
  780. since we believe it
  781. is never necessary and often abused.
  782. Also, its effect is
  783. better achieved by other means.
  784. For instance, an
  785. .CW #if
  786. used to toggle a feature at compile time can be written
  787. as a regular
  788. .CW if
  789. statement, relying on compile-time constant folding and
  790. dead code elimination to discard object code.
  791. .PP
  792. Conditional compilation, even with
  793. .CW #ifdef ,
  794. is used sparingly in Plan 9.
  795. The only architecture-dependent
  796. .CW #ifdefs
  797. in the system are in low-level routines in the graphics library.
  798. Instead, we avoid such dependencies or, when necessary, isolate
  799. them in separate source files or libraries.
  800. Besides making code hard to read,
  801. .CW #ifdefs
  802. make it impossible to know what source is compiled into the binary
  803. or whether source protected by them will compile or work properly.
  804. They make it harder to maintain software.
  805. .PP
  806. The standard Plan 9 library overlaps much of
  807. ANSI C and POSIX [POSIX], but diverges
  808. when appropriate to Plan 9's goals or implementation.
  809. When the semantics of a function
  810. change, we also change the name.
  811. For instance, instead of UNIX's
  812. .CW creat ,
  813. Plan 9 has a
  814. .CW create
  815. function that takes three arguments,
  816. the original two plus a third that, like the second
  817. argument of
  818. .CW open ,
  819. defines whether the returned file descriptor is to be opened for reading,
  820. writing, or both.
  821. This design was forced by the way 9P implements creation,
  822. but it also simplifies the common use of
  823. .CW create
  824. to initialize a temporary file.
  825. .PP
  826. Another departure from ANSI C is that Plan 9 uses a 16-bit character set
  827. called Unicode [ISO10646, Unicode].
  828. Although we stopped short of full internationalization,
  829. Plan 9 treats the representation
  830. of all major languages uniformly throughout all its
  831. software.
  832. To simplify the exchange of text between programs, the characters are packed into
  833. a byte stream by an encoding we designed, called UTF-8,
  834. which is now
  835. becoming accepted as a standard [FSSUTF].
  836. It has several attractive properties,
  837. including byte-order independence,
  838. backwards compatibility with ASCII,
  839. and ease of implementation.
  840. .PP
  841. There are many problems in adapting existing software to a large
  842. character set with an encoding that represents characters with
  843. a variable number of bytes.
  844. ANSI C addresses some of the issues but
  845. falls short of
  846. solving them all.
  847. It does not pick a character set encoding and does not
  848. define all the necessary I/O library routines.
  849. Furthermore, the functions it
  850. .I does
  851. define have engineering problems.
  852. Since the standard left too many problems unsolved,
  853. we decided to build our own interface.
  854. A separate paper has the details [Pike93].
  855. .PP
  856. A small class of Plan 9 programs do not follow the conventions
  857. discussed in this section.
  858. These are programs imported from and maintained by
  859. the UNIX community;
  860. .CW tex
  861. is a representative example.
  862. To avoid reconverting such programs every time a new version
  863. is released,
  864. we built a porting environment, called the ANSI C/POSIX Environment, or APE [Tric95].
  865. APE comprises separate include files, libraries, and commands,
  866. conforming as much as possible to the strict ANSI C and base-level
  867. POSIX specifications.
  868. To port network-based software such as X Windows, it was necessary to add
  869. some extensions to those
  870. specifications, such as the BSD networking functions.
  871. .SH
  872. Portability and Compilation
  873. .PP
  874. Plan 9 is portable across a variety of processor architectures.
  875. Within a single computing session, it is common to use
  876. several architectures: perhaps the window system running on
  877. an Intel processor connected to a MIPS-based CPU server with files
  878. resident on a SPARC system.
  879. For this heterogeneity to be transparent, there must be conventions
  880. about data interchange between programs; for software maintenance
  881. to be straightforward, there must be conventions about cross-architecture
  882. compilation.
  883. .PP
  884. To avoid byte order problems,
  885. data is communicated between programs as text whenever practical.
  886. Sometimes, though, the amount of data is high enough that a binary
  887. format is necessary;
  888. such data is communicated as a byte stream with a pre-defined encoding
  889. for multi-byte values.
  890. In the rare cases where a format
  891. is complex enough to be defined by a data structure,
  892. the structure is never
  893. communicated as a unit; instead, it is decomposed into
  894. individual fields, encoded as an ordered byte stream, and then
  895. reassembled by the recipient.
  896. These conventions affect data
  897. ranging from kernel or application program state information to object file
  898. intermediates generated by the compiler.
  899. .PP
  900. Programs, including the kernel, often present their data
  901. through a file system interface,
  902. an access mechanism that is inherently portable.
  903. For example, the system clock is represented by a decimal number in the file
  904. .CW /dev/time ;
  905. the
  906. .CW time
  907. library function (there is no
  908. .CW time
  909. system call) reads the file and converts it to binary.
  910. Similarly, instead of encoding the state of an application
  911. process in a series of flags and bits in private memory,
  912. the kernel
  913. presents a text string in the file named
  914. .CW status
  915. in the
  916. .CW /proc
  917. file system associated with each process.
  918. The Plan 9
  919. .CW ps
  920. command is trivial: it prints the contents of
  921. the desired status files after some minor reformatting; moreover, after
  922. .P1
  923. import helix /proc
  924. .P2
  925. a local
  926. .CW ps
  927. command reports on the status of Helix's processes.
  928. .PP
  929. Each supported architecture has its own compilers and loader.
  930. The C and Alef compilers produce intermediate files that
  931. are portably encoded; the contents
  932. are unique to the target architecture but the format of the
  933. file is independent of compiling processor type.
  934. When a compiler for a given architecture is compiled on
  935. another type of processor and then used to compile a program
  936. there,
  937. the intermediate produced on
  938. the new architecture is identical to the intermediate
  939. produced on the native processor. From the compiler's
  940. point of view, every compilation is a cross-compilation.
  941. .PP
  942. Although each architecture's loader accepts only intermediate files produced
  943. by compilers for that architecture,
  944. such files could have been generated by a compiler executing
  945. on any type of processor.
  946. For instance, it is possible to run
  947. the MIPS compiler on a 486, then use the MIPS loader on a
  948. SPARC to produce a MIPS executable.
  949. .PP
  950. Since Plan 9 runs on a variety of architectures, even in a single installation,
  951. distinguishing the compilers and intermediate names
  952. simplifies multi-architecture
  953. development from a single source tree.
  954. The compilers and the loader for each architecture are
  955. uniquely named; there is no
  956. .CW cc
  957. command.
  958. The names are derived by concatenating a code letter
  959. associated with the target architecture with the name of the
  960. compiler or loader. For example, the letter `8' is
  961. the code letter for Intel
  962. .I x 86
  963. processors; the C compiler is named
  964. .CW 8c ,
  965. the Alef compiler
  966. .CW 8al ,
  967. and the loader is called
  968. .CW 8l .
  969. Similarly, the compiler intermediate files are suffixed
  970. .CW .8 ,
  971. not
  972. .CW .o .
  973. .PP
  974. The Plan 9
  975. build program
  976. .CW mk ,
  977. a relative of
  978. .CW make ,
  979. reads the names of the current and target
  980. architectures from environment variables called
  981. .CW $cputype
  982. and
  983. .CW $objtype .
  984. By default the current processor is the target, but setting
  985. .CW $objtype
  986. to the name of another architecture
  987. before invoking
  988. .CW mk
  989. results in a cross-build:
  990. .P1
  991. % objtype=sparc mk
  992. .P2
  993. builds a program for the SPARC architecture regardless of the executing machine.
  994. The value of
  995. .CW $objtype
  996. selects a
  997. file of architecture-dependent variable definitions
  998. that configures the build to use the appropriate compilers and loader.
  999. Although simple-minded, this technique works well in practice:
  1000. all applications in Plan 9 are built from a single source tree
  1001. and it is possible to build the various architectures in parallel without conflict.
  1002. .SH
  1003. Parallel programming
  1004. .PP
  1005. Plan 9's support for parallel programming has two aspects.
  1006. First, the kernel provides
  1007. a simple process model and a few carefully designed system calls for
  1008. synchronization and sharing.
  1009. Second, a new parallel programming language called Alef
  1010. supports concurrent programming.
  1011. Although it is possible to write parallel
  1012. programs in C, Alef is the parallel language of choice.
  1013. .PP
  1014. There is a trend in new operating systems to implement two
  1015. classes of processes: normal UNIX-style processes and light-weight
  1016. kernel threads.
  1017. Instead, Plan 9 provides a single class of process but allows fine control of the
  1018. sharing of a process's resources such as memory and file descriptors.
  1019. A single class of process is a
  1020. feasible approach in Plan 9 because the kernel has an efficient system
  1021. call interface and cheap process creation and scheduling.
  1022. .PP
  1023. Parallel programs have three basic requirements:
  1024. management of resources shared between processes,
  1025. an interface to the scheduler,
  1026. and fine-grain process synchronization using spin locks.
  1027. On Plan 9,
  1028. new processes are created using the
  1029. .CW rfork
  1030. system call.
  1031. .CW Rfork
  1032. takes a single argument,
  1033. a bit vector that specifies
  1034. which of the parent process's resources should be shared,
  1035. copied, or created anew
  1036. in the child.
  1037. The resources controlled by
  1038. .CW rfork
  1039. include the name space,
  1040. the environment,
  1041. the file descriptor table,
  1042. memory segments,
  1043. and notes (Plan 9's analog of UNIX signals).
  1044. One of the bits controls whether the
  1045. .CW rfork
  1046. call will create a new process; if the bit is off, the resulting
  1047. modification to the resources occurs in the process making the call.
  1048. For example, a process calls
  1049. .CW rfork(RFNAMEG)
  1050. to disconnect its name space from its parent's.
  1051. Alef uses a
  1052. fine-grained fork in which all the resources, including
  1053. memory, are shared between parent
  1054. and child, analogous to creating a kernel thread in many systems.
  1055. .PP
  1056. An indication that
  1057. .CW rfork
  1058. is the right model is the variety of ways it is used.
  1059. Other than the canonical use in the library routine
  1060. .CW fork ,
  1061. it is hard to find two calls to
  1062. .CW rfork
  1063. with the same bits set; programs
  1064. use it to create many different forms of sharing and resource allocation.
  1065. A system with just two types of processes\(emregular processes and threads\(emcould
  1066. not handle this variety.
  1067. .PP
  1068. There are two ways to share memory.
  1069. First, a flag to
  1070. .CW rfork
  1071. causes all the memory segments of the parent to be shared with the child
  1072. (except the stack, which is
  1073. forked copy-on-write regardless).
  1074. Alternatively, a new segment of memory may be
  1075. attached using the
  1076. .CW segattach
  1077. system call; such a segment
  1078. will always be shared between parent and child.
  1079. .PP
  1080. The
  1081. .CW rendezvous
  1082. system call provides a way for processes to synchronize.
  1083. Alef uses it to implement communication channels,
  1084. queuing locks,
  1085. multiple reader/writer locks, and
  1086. the sleep and wakeup mechanism.
  1087. .CW Rendezvous
  1088. takes two arguments, a tag and a value.
  1089. When a process calls
  1090. .CW rendezvous
  1091. with a tag it sleeps until another process
  1092. presents a matching tag.
  1093. When a pair of tags match, the values are exchanged
  1094. between the two processes and both
  1095. .CW rendezvous
  1096. calls return.
  1097. This primitive is sufficient to implement the full set of synchronization routines.
  1098. .PP
  1099. Finally, spin locks are provided by
  1100. an architecture-dependent library at user level.
  1101. Most processors provide atomic test and set instructions that
  1102. can be used to implement locks.
  1103. A notable exception is the MIPS R3000, so the SGI
  1104. Power series multiprocessors have special lock hardware on the bus.
  1105. User processes gain access to the lock hardware
  1106. by mapping pages of hardware locks
  1107. into their address space using the
  1108. .CW segattach
  1109. system call.
  1110. .PP
  1111. A Plan 9 process in a system call will block regardless of its `weight'.
  1112. This means that when a program wishes to read from a slow
  1113. device without blocking the entire calculation, it must fork a process to do
  1114. the read for it. The solution is to start a satellite
  1115. process that does the I/O and delivers the answer to the main program
  1116. through shared memory or perhaps a pipe.
  1117. This sounds onerous but works easily and efficiently in practice; in fact,
  1118. most interactive Plan 9 applications, even relatively ordinary ones written
  1119. in C, such as
  1120. the text editor Sam [Pike87], run as multiprocess programs.
  1121. .PP
  1122. The kernel support for parallel programming in Plan 9 is a few hundred lines
  1123. of portable code; a handful of simple primitives enable the problems to be handled
  1124. cleanly at user level.
  1125. Although the primitives work fine from C,
  1126. they are particularly expressive from within Alef.
  1127. The creation
  1128. and management of slave I/O processes can be written in a few lines of Alef,
  1129. providing the foundation for a consistent means of multiplexing
  1130. data flows between arbitrary processes.
  1131. Moreover, implementing it in a language rather than in the kernel
  1132. ensures consistent semantics between all devices
  1133. and provides a more general multiplexing primitive.
  1134. Compare this to the UNIX
  1135. .CW select
  1136. system call:
  1137. .CW select
  1138. applies only to a restricted set of devices,
  1139. legislates a style of multiprogramming in the kernel,
  1140. does not extend across networks,
  1141. is difficult to implement, and is hard to use.
  1142. .PP
  1143. Another reason
  1144. parallel programming is important in Plan 9 is that
  1145. multi-threaded user-level file servers are the preferred way
  1146. to implement services.
  1147. Examples of such servers include the programming environment
  1148. Acme [Pike94],
  1149. the name space exporting tool
  1150. .CW exportfs
  1151. [PPTTW93],
  1152. the HTTP daemon,
  1153. and the network name servers
  1154. .CW cs
  1155. and
  1156. .CW dns
  1157. [PrWi93].
  1158. Complex applications such as Acme prove that
  1159. careful operating system support can reduce the difficulty of writing
  1160. multi-threaded applications without moving threading and
  1161. synchronization primitives into the kernel.
  1162. .SH
  1163. Implementation of Name Spaces
  1164. .PP
  1165. User processes construct name spaces using three system calls:
  1166. .CW mount ,
  1167. .CW bind ,
  1168. and
  1169. .CW unmount .
  1170. The
  1171. .CW mount
  1172. system call attaches a tree served by a file server to
  1173. the current name space. Before calling
  1174. .CW mount ,
  1175. the client must (by outside means) acquire a connection to the server in
  1176. the form of a file descriptor that may be written and read to transmit 9P messages.
  1177. That file descriptor represents a pipe or network connection.
  1178. .PP
  1179. The
  1180. .CW mount
  1181. call attaches a new hierarchy to the existing name space.
  1182. The
  1183. .CW bind
  1184. system call, on the other hand, duplicates some piece of existing name space at
  1185. another point in the name space.
  1186. The
  1187. .CW unmount
  1188. system call allows components to be removed.
  1189. .PP
  1190. Using
  1191. either
  1192. .CW bind
  1193. or
  1194. .CW mount ,
  1195. multiple directories may be stacked at a single point in the name space.
  1196. In Plan 9 terminology, this is a
  1197. .I union
  1198. directory and behaves like the concatenation of the constituent directories.
  1199. A flag argument to
  1200. .CW bind
  1201. and
  1202. .CW mount
  1203. specifies the position of a new directory in the union,
  1204. permitting new elements
  1205. to be added either at the front or rear of the union or to replace it entirely.
  1206. When a file lookup is performed in a union directory, each component
  1207. of the union is searched in turn and the first match taken; likewise,
  1208. when a union directory is read, the contents of each of the component directories
  1209. is read in turn.
  1210. Union directories are one of the most widely used organizational features
  1211. of the Plan 9 name space.
  1212. For instance, the directory
  1213. .CW /bin
  1214. is built as a union of
  1215. .CW /$cputype/bin
  1216. (program binaries),
  1217. .CW /rc/bin
  1218. (shell scripts),
  1219. and perhaps more directories provided by the user.
  1220. This construction makes the shell
  1221. .CW $PATH
  1222. variable unnecessary.
  1223. .PP
  1224. One question raised by union directories
  1225. is which element of the union receives a newly created file.
  1226. After several designs, we decided on the following.
  1227. By default, directories in unions do not accept new files, although the
  1228. .CW create
  1229. system call applied to an existing file succeeds normally.
  1230. When a directory is added to the union, a flag to
  1231. .CW bind
  1232. or
  1233. .CW mount
  1234. enables create permission (a property of the name space) in that directory.
  1235. When a file is being created with a new name in a union, it is created in the
  1236. first directory of the union with create permission; if that creation fails,
  1237. the entire
  1238. .CW create
  1239. fails.
  1240. This scheme enables the common use of placing a private directory anywhere
  1241. in a union of public ones,
  1242. while allowing creation only in the private directory.
  1243. .PP
  1244. By convention, kernel device file systems
  1245. are bound into the
  1246. .CW /dev
  1247. directory, but to bootstrap the name space building process it is
  1248. necessary to have a notation that permits
  1249. direct access to the devices without an existing name space.
  1250. The root directory
  1251. of the tree served by a device driver can be accessed using the syntax
  1252. .CW # \f2c\f1,
  1253. where
  1254. .I c
  1255. is a unique character (typically a letter) identifying the
  1256. .I type
  1257. of the device.
  1258. Simple device drivers serve a single level directory containing a few files.
  1259. As an example,
  1260. each serial port is represented by a data and a control file:
  1261. .P1
  1262. % bind -a '#t' /dev
  1263. % cd /dev
  1264. % ls -l eia*
  1265. --rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1
  1266. --rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia1ctl
  1267. --rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2
  1268. --rw-rw-rw- t 0 bootes bootes 0 Feb 24 21:14 eia2ctl
  1269. .P2
  1270. The
  1271. .CW bind
  1272. program is an encapsulation of the
  1273. .CW bind
  1274. system call; its
  1275. .CW -a
  1276. flag positions the new directory at the end of the union.
  1277. The data files
  1278. .CW eia1
  1279. and
  1280. .CW eia2
  1281. may be read and written to communicate over the serial line.
  1282. Instead of using special operations on these files to control the devices,
  1283. commands written to the files
  1284. .CW eia1ctl
  1285. and
  1286. .CW eia2ctl
  1287. control the corresponding device;
  1288. for example,
  1289. writing the text string
  1290. .CW b1200
  1291. to
  1292. .CW /dev/eia1ctl
  1293. sets the speed of that line to 1200 baud.
  1294. Compare this to the UNIX
  1295. .CW ioctl
  1296. system call: in Plan 9, devices are controlled by textual messages,
  1297. free of byte order problems, with clear semantics for reading and writing.
  1298. It is common to configure or debug devices using shell scripts.
  1299. .PP
  1300. It is the universal use of the 9P protocol that
  1301. connects Plan 9's components together to form a
  1302. distributed system.
  1303. Rather than inventing a unique protocol for each
  1304. service such as
  1305. .CW rlogin ,
  1306. FTP, TFTP, and X windows,
  1307. Plan 9 implements services
  1308. in terms of operations on file objects,
  1309. and then uses a single, well-documented protocol to exchange information between
  1310. computers.
  1311. Unlike NFS, 9P treats files as a sequence of bytes rather than blocks.
  1312. Also unlike NFS, 9P is stateful: clients perform
  1313. remote procedure calls to establish pointers to objects in the remote
  1314. file server.
  1315. These pointers are called file identifiers or
  1316. .I fids .
  1317. All operations on files supply a fid to identify an object in the remote file system.
  1318. .PP
  1319. The 9P protocol defines 17 messages, providing
  1320. means to authenticate users, navigate fids around
  1321. a file system hierarchy, copy fids, perform I/O, change file attributes,
  1322. and create and delete files.
  1323. Its complete specification is in Section 5 of the Programmer's Manual [9man].
  1324. Here is the procedure to gain access to the name hierarchy supplied by a server.
  1325. A file server connection is established via a pipe or network connection.
  1326. An initial
  1327. .CW session
  1328. message performs a bilateral authentication between client and server.
  1329. An
  1330. .CW attach
  1331. message then connects a fid suggested by the client to the root of the server file
  1332. tree.
  1333. The
  1334. .CW attach
  1335. message includes the identity of the user performing the attach; henceforth all
  1336. fids derived from the root fid will have permissions associated with
  1337. that user.
  1338. Multiple users may share the connection, but each must perform an attach to
  1339. establish his or her identity.
  1340. .PP
  1341. The
  1342. .CW walk
  1343. message moves a fid through a single level of the file system hierarchy.
  1344. The
  1345. .CW clone
  1346. message takes an established fid and produces a copy that points
  1347. to the same file as the original.
  1348. Its purpose is to enable walking to a file in a directory without losing the fid
  1349. on the directory.
  1350. The
  1351. .CW open
  1352. message locks a fid to a specific file in the hierarchy,
  1353. checks access permissions,
  1354. and prepares the fid
  1355. for I/O.
  1356. The
  1357. .CW read
  1358. and
  1359. .CW write
  1360. messages allow I/O at arbitrary offsets in the file;
  1361. the maximum size transferred is defined by the protocol.
  1362. The
  1363. .CW clunk
  1364. message indicates the client has no further use for a fid.
  1365. The
  1366. .CW remove
  1367. message behaves like
  1368. .CW clunk
  1369. but causes the file associated with the fid to be removed and any associated
  1370. resources on the server to be deallocated.
  1371. .PP
  1372. 9P has two forms: RPC messages sent on a pipe or network connection and a procedural
  1373. interface within the kernel.
  1374. Since kernel device drivers are directly addressable,
  1375. there is no need to pass messages to
  1376. communicate with them;
  1377. instead each 9P transaction is implemented by a direct procedure call.
  1378. For each fid,
  1379. the kernel maintains a local representation in a data structure called a
  1380. .I channel ,
  1381. so all operations on files performed by the kernel involve a channel connected
  1382. to that fid.
  1383. The simplest example is a user process's file descriptors, which are
  1384. indexes into an array of channels.
  1385. A table in the kernel provides a list
  1386. of entry points corresponding one to one with the 9P messages for each device.
  1387. A system call such as
  1388. .CW read
  1389. from the user translates into one or more procedure calls
  1390. through that table, indexed by the type character stored in the channel:
  1391. .CW procread ,
  1392. .CW eiaread ,
  1393. etc.
  1394. Each call takes at least
  1395. one channel as an argument.
  1396. A special kernel driver, called the
  1397. .I mount
  1398. driver, translates procedure calls to messages, that is,
  1399. it converts local procedure calls to remote ones.
  1400. In effect, this special driver
  1401. becomes a local proxy for the files served by a remote file server.
  1402. The channel pointer in the local call is translated to the associated fid
  1403. in the transmitted message.
  1404. .PP
  1405. The mount driver is the sole RPC mechanism employed by the system.
  1406. The semantics of the supplied files, rather than the operations performed upon
  1407. them, create a particular service such as the
  1408. .CW cpu
  1409. command.
  1410. The mount driver demultiplexes protocol
  1411. messages between clients sharing a communication channel
  1412. with a file server.
  1413. For each outgoing RPC message,
  1414. the mount driver allocates a buffer labeled by a small unique integer,
  1415. called a
  1416. .I tag .
  1417. The reply to the RPC is labeled with the same tag, which is used by
  1418. the mount driver to match the reply with the request.
  1419. .PP
  1420. The kernel representation of the name space
  1421. is called the
  1422. .I "mount table" ,
  1423. which stores a list of bindings between channels.
  1424. Each entry in the mount table contains a pair of channels: a
  1425. .I from
  1426. channel and a
  1427. .I to
  1428. channel.
  1429. Every time a walk succeeds in moving a channel to a new location in the name space,
  1430. the mount table is consulted to see if a `from' channel matches the new name; if
  1431. so the `to' channel is cloned and substituted for the original.
  1432. Union directories are implemented by converting the `to'
  1433. channel into a list of channels:
  1434. a successful walk to a union directory returns a `to' channel that forms
  1435. the head of
  1436. a list of channels, each representing a component directory
  1437. of the union.
  1438. If a walk
  1439. fails to find a file in the first directory of the union, the list is followed,
  1440. the next component cloned, and walk tried on that directory.
  1441. .PP
  1442. Each file in Plan 9 is uniquely identified by a set of integers:
  1443. the type of the channel (used as the index of the function call table),
  1444. the server or device number
  1445. distinguishing the server from others of the same type (decided locally by the driver),
  1446. and a
  1447. .I qid
  1448. formed from two 32-bit numbers called
  1449. .I path
  1450. and
  1451. .I version .
  1452. The path is a unique file number assigned by a device driver or
  1453. file server when a file is created.
  1454. The version number is updated whenever
  1455. the file is modified; as described in the next section,
  1456. it can be used to maintain cache coherency between
  1457. clients and servers.
  1458. .PP
  1459. The type and device number are analogous to UNIX major and minor
  1460. device numbers;
  1461. the qid is analogous to the i-number.
  1462. The device and type
  1463. connect the channel to a device driver and the qid
  1464. identifies the file within that device.
  1465. If the file recovered from a walk has the same type, device, and qid path
  1466. as an entry in the mount table, they are the same file and the
  1467. corresponding substitution from the mount table is made.
  1468. This is how the name space is implemented.
  1469. .SH
  1470. File Caching
  1471. .PP
  1472. The 9P protocol has no explicit support for caching files on a client.
  1473. The large memory of the central file server acts as a shared cache for all its clients,
  1474. which reduces the total amount of memory needed across all machines in the network.
  1475. Nonetheless, there are sound reasons to cache files on the client, such as a slow
  1476. connection to the file server.
  1477. .PP
  1478. The version field of the qid is changed whenever the file is modified,
  1479. which makes it possible to do some weakly coherent forms of caching.
  1480. The most important is client caching of text and data segments of executable files.
  1481. When a process
  1482. .CW execs
  1483. a program, the file is re-opened and the qid's version is compared with that in the cache;
  1484. if they match, the local copy is used.
  1485. The same method can be used to build a local caching file server.
  1486. This user-level server interposes on the 9P connection to the remote server and
  1487. monitors the traffic, copying data to a local disk.
  1488. When it sees a read of known data, it answers directly,
  1489. while writes are passed on immediately\(emthe cache is write-through\(emto keep
  1490. the central copy up to date.
  1491. This is transparent to processes on the terminal and requires no change to 9P;
  1492. it works well on home machines connected over serial lines.
  1493. A similar method can be applied to build a general client cache in unused local
  1494. memory, but this has not been done in Plan 9.
  1495. .SH
  1496. Networks and Communication Devices
  1497. .PP
  1498. Network interfaces are kernel-resident file systems, analogous to the EIA device
  1499. described earlier.
  1500. Call setup and shutdown are achieved by writing text strings to the control file
  1501. associated with the device;
  1502. information is sent and received by reading and writing the data file.
  1503. The structure and semantics of the devices is common to all networks so,
  1504. other than a file name substitution,
  1505. the same procedure makes a call using TCP over Ethernet as URP over Datakit [Fra80].
  1506. .PP
  1507. This example illustrates the structure of the TCP device:
  1508. .P1
  1509. % ls -lp /net/tcp
  1510. d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 0
  1511. d-r-xr-xr-x I 0 bootes bootes 0 Feb 23 20:20 1
  1512. --rw-rw-rw- I 0 bootes bootes 0 Feb 23 20:20 clone
  1513. % ls -lp /net/tcp/0
  1514. --rw-rw---- I 0 rob bootes 0 Feb 23 20:20 ctl
  1515. --rw-rw---- I 0 rob bootes 0 Feb 23 20:20 data
  1516. --rw-rw---- I 0 rob bootes 0 Feb 23 20:20 listen
  1517. --r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 local
  1518. --r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 remote
  1519. --r--r--r-- I 0 bootes bootes 0 Feb 23 20:20 status
  1520. %
  1521. .P2
  1522. The top directory,
  1523. .CW /net/tcp ,
  1524. contains a
  1525. .CW clone
  1526. file and a directory for each connection, numbered
  1527. .CW 0
  1528. to
  1529. .I n .
  1530. Each connection directory corresponds to an TCP/IP connection.
  1531. Opening
  1532. .CW clone
  1533. reserves an unused connection and returns its control file.
  1534. Reading the control file returns the textual connection number, so the user
  1535. process can construct the full name of the newly allocated
  1536. connection directory.
  1537. The
  1538. .CW local ,
  1539. .CW remote ,
  1540. and
  1541. .CW status
  1542. files are diagnostic; for example,
  1543. .CW remote
  1544. contains the address (for TCP, the IP address and port number) of the remote side.
  1545. .PP
  1546. A call is initiated by writing a connect message with a network-specific address as
  1547. its argument; for example, to open a Telnet session (port 23) to a remote machine
  1548. with IP address 135.104.9.52,
  1549. the string is:
  1550. .P1
  1551. connect 135.104.9.52!23
  1552. .P2
  1553. The write to the control file blocks until the connection is established;
  1554. if the destination is unreachable, the write returns an error.
  1555. Once the connection is established, the
  1556. .CW telnet
  1557. application reads and writes the
  1558. .CW data
  1559. file
  1560. to talk to the remote Telnet daemon.
  1561. On the other end, the Telnet daemon would start by writing
  1562. .P1
  1563. announce 23
  1564. .P2
  1565. to its control file to indicate its willingness to receive calls to this port.
  1566. Such a daemon is called a
  1567. .I listener
  1568. in Plan 9.
  1569. .PP
  1570. A uniform structure for network devices cannot hide all the details
  1571. of addressing and communication for dissimilar networks.
  1572. For example, Datakit uses textual, hierarchical addresses unlike IP's 32-bit addresses, so
  1573. an application given a control file must still know what network it represents.
  1574. Rather than make every application know the addressing of every network,
  1575. Plan 9 hides these details in a
  1576. .I connection
  1577. .I server ,
  1578. called
  1579. .CW cs .
  1580. .CW Cs
  1581. is a file system mounted in a known place.
  1582. It supplies a single control file that an application uses to discover how to connect
  1583. to a host.
  1584. The application writes the symbolic address and service name for
  1585. the connection it wishes to make,
  1586. and reads back the name of the
  1587. .CW clone
  1588. file to open and the address to present to it.
  1589. If there are multiple networks between the machines,
  1590. .CW cs
  1591. presents a list of possible networks and addresses to be tried in sequence;
  1592. it uses heuristics to decide the order.
  1593. For instance, it presents the highest-bandwidth choice first.
  1594. .PP
  1595. A single library function called
  1596. .CW dial
  1597. talks to
  1598. .CW cs
  1599. to establish the connection.
  1600. An application that uses
  1601. .CW dial
  1602. needs no changes, not even recompilation, to adapt to new networks;
  1603. the interface to
  1604. .CW cs
  1605. hides the details.
  1606. .PP
  1607. The uniform structure for networks in Plan 9 makes the
  1608. .CW import
  1609. command all that is needed to construct gateways.
  1610. .SH
  1611. Kernel structure for networks
  1612. .PP
  1613. The kernel plumbing used to build Plan 9 communications
  1614. channels is called
  1615. .I streams
  1616. [Rit84][Presotto].
  1617. A stream is a bidirectional channel connecting a
  1618. physical or pseudo-device to a user process.
  1619. The user process inserts and removes data at one end of the stream;
  1620. a kernel process acting on behalf of a device operates at
  1621. the other end.
  1622. A stream comprises a linear list of
  1623. .I "processing modules" .
  1624. Each module has both an upstream (toward the process) and
  1625. downstream (toward the device)
  1626. .I "put routine" .
  1627. Calling the put routine of the module on either end of the stream
  1628. inserts data into the stream.
  1629. Each module calls the succeeding one to send data up or down the stream.
  1630. Like UNIX streams [Rit84],
  1631. Plan 9 streams can be dynamically configured.
  1632. .SH
  1633. The IL Protocol
  1634. .PP
  1635. The 9P protocol must run above a reliable transport protocol with delimited messages.
  1636. 9P has no mechanism to recover from transmission errors and
  1637. the system assumes that each read from a communication channel will
  1638. return a single 9P message;
  1639. it does not parse the data stream to discover message boundaries.
  1640. Pipes and some network protocols already have these properties but
  1641. the standard IP protocols do not.
  1642. TCP does not delimit messages, while
  1643. UDP [RFC768] does not provide reliable in-order delivery.
  1644. .PP
  1645. We designed a new protocol, called IL (Internet Link), to transmit 9P messages over IP.
  1646. It is a connection-based protocol that provides
  1647. reliable transmission of sequenced messages between machines.
  1648. Since a process can have only a single outstanding 9P request,
  1649. there is no need for flow control in IL.
  1650. Like TCP, IL has adaptive timeouts: it scales acknowledge and retransmission times
  1651. to match the network speed.
  1652. This allows the protocol to perform well on both the Internet and on local Ethernets.
  1653. Also, IL does no blind retransmission,
  1654. to avoid adding to the congestion of busy networks.
  1655. Full details are in another paper [PrWi95].
  1656. .PP
  1657. In Plan 9, the implementation of IL is smaller and faster than TCP.
  1658. IL is our main Internet transport protocol.
  1659. .SH
  1660. Overview of authentication
  1661. .PP
  1662. Authentication establishes the identity of a
  1663. user accessing a resource.
  1664. The user requesting the resource is called the
  1665. .I client
  1666. and the user granting access to the resource is called the
  1667. .I server .
  1668. This is usually done under the auspices of a 9P attach message.
  1669. A user may be a client in one authentication exchange and a server in another.
  1670. Servers always act on behalf of some user,
  1671. either a normal client or some administrative entity, so authentication
  1672. is defined to be between users, not machines.
  1673. .PP
  1674. Each Plan 9 user has an associated DES [NBS77] authentication key;
  1675. the user's identity is verified by the ability to
  1676. encrypt and decrypt special messages called challenges.
  1677. Since knowledge of a user's key gives access to that user's resources,
  1678. the Plan 9 authentication protocols never transmit a message containing
  1679. a cleartext key.
  1680. .PP
  1681. Authentication is bilateral:
  1682. at the end of the authentication exchange,
  1683. each side is convinced of the other's identity.
  1684. Every machine begins the exchange with a DES key in memory.
  1685. In the case of CPU and file servers, the key, user name, and domain name
  1686. for the server are read from permanent storage,
  1687. usually non-volatile RAM.
  1688. In the case of terminals,
  1689. the key is derived from a password typed by the user at boot time.
  1690. A special machine, known as the
  1691. .I authentication
  1692. .I server ,
  1693. maintains a database of keys for all users in its administrative domain and
  1694. participates in the authentication protocols.
  1695. .PP
  1696. The authentication protocol is as follows:
  1697. after exchanging challenges, one party
  1698. contacts the authentication server to create
  1699. permission-granting
  1700. .I tickets
  1701. encrypted with
  1702. each party's secret key and containing a new conversation key.
  1703. Each
  1704. party decrypts its own ticket and uses the conversation key to
  1705. encrypt the other party's challenge.
  1706. .PP
  1707. This structure is somewhat like Kerberos [MBSS87], but avoids
  1708. its reliance on synchronized clocks.
  1709. Also
  1710. unlike Kerberos, Plan 9 authentication supports a `speaks for'
  1711. relation [LABW91] that enables one user to have the authority
  1712. of another;
  1713. this is how a CPU server runs processes on behalf of its clients.
  1714. .PP
  1715. Plan 9's authentication structure builds
  1716. secure services rather than depending on firewalls.
  1717. Whereas firewalls require special code for every service penetrating the wall,
  1718. the Plan 9 approach permits authentication to be done in a single place\(em9P\(emfor
  1719. all services.
  1720. For example, the
  1721. .CW cpu
  1722. command works securely across the Internet.
  1723. .SH
  1724. Authenticating external connections
  1725. .PP
  1726. The regular Plan 9 authentication protocol is not suitable for text-based services such as
  1727. Telnet
  1728. or FTP.
  1729. In such cases, Plan 9 users authenticate with hand-held DES calculators called
  1730. .I authenticators .
  1731. The authenticator holds a key for the user, distinct from
  1732. the user's normal authentication key.
  1733. The user `logs on' to the authenticator using a 4-digit PIN.
  1734. A correct PIN enables the authenticator for a challenge/response exchange with the server.
  1735. Since a correct challenge/response exchange is valid only once
  1736. and keys are never sent over the network,
  1737. this procedure is not susceptible to replay attacks, yet
  1738. is compatible with protocols like Telnet and FTP.
  1739. .SH
  1740. Special users
  1741. .PP
  1742. Plan 9 has no super-user.
  1743. Each server is responsible for maintaining its own security, usually permitting
  1744. access only from the console, which is protected by a password.
  1745. For example, file servers have a unique administrative user called
  1746. .CW adm ,
  1747. with special privileges that apply only to commands typed at the server's
  1748. physical console.
  1749. These privileges concern the day-to-day maintenance of the server,
  1750. such as adding new users and configuring disks and networks.
  1751. The privileges do
  1752. .I not
  1753. include the ability to modify, examine, or change the permissions of any files.
  1754. If a file is read-protected by a user, only that user may grant access to others.
  1755. .PP
  1756. CPU servers have an equivalent user name that allows administrative access to
  1757. resources on that server such as the control files of user processes.
  1758. Such permission is necessary, for example, to kill rogue processes, but
  1759. does not extend beyond that server.
  1760. On the other hand, by means of a key
  1761. held in protected non-volatile RAM,
  1762. the identity of the administrative user is proven to the
  1763. authentication server.
  1764. This allows the CPU server to authenticate remote users, both
  1765. for access to the server itself and when the CPU server is acting
  1766. as a proxy on their behalf.
  1767. .PP
  1768. Finally, a special user called
  1769. .CW none
  1770. has no password and is always allowed to connect;
  1771. anyone may claim to be
  1772. .CW none .
  1773. .CW None
  1774. has restricted permissions; for example, it is not allowed to examine dump files
  1775. and can read only world-readable files.
  1776. .PP
  1777. The idea behind
  1778. .CW none
  1779. is analogous to the anonymous user in FTP
  1780. services.
  1781. On Plan 9, guest FTP servers are further confined within a special
  1782. restricted name space.
  1783. It disconnects guest users from system programs, such as the contents of
  1784. .CW /bin ,
  1785. but makes it possible to make local files available to guests
  1786. by binding them explicitly into the space.
  1787. A restricted name space is more secure than the usual technique of exporting
  1788. an ad hoc directory tree; the result is a kind of cage around untrusted users.
  1789. .SH
  1790. The cpu command and proxied authentication
  1791. .PP
  1792. When a call is made to a CPU server for a user, say Peter,
  1793. the intent is that Peter wishes to run processes with his own authority.
  1794. To implement this property,
  1795. the CPU server does the following when the call is received.
  1796. First, the listener forks off a process to handle the call.
  1797. This process changes to the user
  1798. .CW none
  1799. to avoid giving away permissions if it is compromised.
  1800. It then performs the authentication protocol to verify that the
  1801. calling user really is Peter, and to prove to Peter that
  1802. the machine is itself trustworthy.
  1803. Finally, it reattaches to all relevant file servers using the
  1804. authentication protocol to identify itself as Peter.
  1805. In this case, the CPU server is a client of the file server and performs the
  1806. client portion of the authentication exchange on behalf of Peter.
  1807. The authentication server will give the process tickets to
  1808. accomplish this only if the CPU server's administrative user name is allowed to
  1809. .I "speak for"
  1810. Peter.
  1811. .PP
  1812. The
  1813. .I "speaks for
  1814. relation [LABW91] is kept in a table on the authentication server.
  1815. To simplify the management of users computing in different authentication domains,
  1816. it also contains mappings between user names in different domains,
  1817. for example saying that user
  1818. .CW rtm
  1819. in one domain is the same person as user
  1820. .CW rtmorris
  1821. in another.
  1822. .SH
  1823. File Permissions
  1824. .PP
  1825. One of the advantages of constructing services as file systems
  1826. is that the solutions to ownership and permission problems fall out naturally.
  1827. As in UNIX,
  1828. each file or directory has separate read, write, and execute/search permissions
  1829. for the file's owner, the file's group, and anyone else.
  1830. The idea of group is unusual:
  1831. any user name is potentially a group name.
  1832. A group is just a user with a list of other users in the group.
  1833. Conventions make the distinction: most people have user names without group members,
  1834. while groups have long lists of attached names. For example, the
  1835. .CW sys
  1836. group traditionally has all the system programmers,
  1837. and system files are accessible
  1838. by group
  1839. .CW sys .
  1840. Consider the following two lines of a user database stored on a server:
  1841. .P1
  1842. pjw:pjw:
  1843. sys::pjw,ken,philw,presotto
  1844. .P2
  1845. The first establishes user
  1846. .CW pjw
  1847. as a regular user. The second establishes user
  1848. .CW sys
  1849. as a group and lists four users who are
  1850. .I members
  1851. of that group.
  1852. The empty colon-separated field is space for a user to be named as the
  1853. .I group
  1854. .I leader .
  1855. If a group has a leader, that user has special permissions for the group,
  1856. such as freedom to change the group permissions
  1857. of files in that group.
  1858. If no leader is specified, each member of the group is considered equal, as if each were
  1859. the leader.
  1860. In our example, only
  1861. .CW pjw
  1862. can add members to his group, but all of
  1863. .CW sys 's
  1864. members are equal partners in that group.
  1865. .PP
  1866. Regular files are owned by the user that creates them.
  1867. The group name is inherited from the directory holding the new file.
  1868. Device files are treated specially:
  1869. the kernel may arrange the ownership and permissions of
  1870. a file appropriate to the user accessing the file.
  1871. .PP
  1872. A good example of the generality this offers is process files,
  1873. which are owned and read-protected by the owner of the process.
  1874. If the owner wants to let someone else access the memory of a process,
  1875. for example to let the author of a program debug a broken image, the standard
  1876. .CW chmod
  1877. command applied to the process files does the job.
  1878. .PP
  1879. Another unusual application of file permissions
  1880. is the dump file system, which is not only served by the same file
  1881. server as the original data, but represented by the same user database.
  1882. Files in the dump are therefore given identical protection as files in the regular
  1883. file system;
  1884. if a file is owned by
  1885. .CW pjw
  1886. and read-protected, once it is in the dump file system it is still owned by
  1887. .CW pjw
  1888. and read-protected.
  1889. Also, since the dump file system is immutable, the file cannot be changed;
  1890. it is read-protected forever.
  1891. Drawbacks are that if the file is readable but should have been read-protected,
  1892. it is readable forever, and that user names are hard to re-use.
  1893. .SH
  1894. Performance
  1895. .PP
  1896. As a simple measure of the performance of the Plan 9 kernel,
  1897. we compared the
  1898. time to do some simple operations on Plan 9 and on SGI's IRIX Release 5.3
  1899. running on an SGI Challenge M with a 100MHz MIPS R4400 and a 1-megabyte
  1900. secondary cache.
  1901. The test program was written in Alef,
  1902. compiled with the same compiler,
  1903. and run on identical hardware,
  1904. so the only variables are the operating system and libraries.
  1905. .PP
  1906. The program tests the time to do a context switch
  1907. .CW rendezvous "" (
  1908. on Plan 9,
  1909. .CW blockproc
  1910. on IRIX);
  1911. a trivial system call
  1912. .CW rfork(0) "" (
  1913. and
  1914. .CW nap(0) );
  1915. and
  1916. lightweight fork
  1917. .CW rfork(RFPROC) "" (
  1918. and
  1919. .CW sproc(PR_SFDS|PR_SADDR) ).
  1920. It also measures the time to send a byte on a pipe from one process
  1921. to another and the throughput on a pipe between two processes.
  1922. The results appear in Table 1.
  1923. .KS
  1924. .TS
  1925. center,box;
  1926. ccc
  1927. lnn.
  1928. Test Plan 9 IRIX
  1929. _
  1930. Context switch 39 µs 150 µs
  1931. System call 6 µs 36 µs
  1932. Light fork 1300 µs 2200 µs
  1933. Pipe latency 110 µs 200 µs
  1934. Pipe bandwidth 11678 KB/s 14545 KB/s
  1935. .TE
  1936. .ce
  1937. .I
  1938. Table 1. Performance comparison.
  1939. .R
  1940. .KE
  1941. .LP
  1942. Although the Plan 9 times are not spectacular, they show that the kernel is
  1943. competitive with commercial systems.
  1944. .SH
  1945. Discussion
  1946. .PP
  1947. Plan 9 has a relatively conventional kernel;
  1948. the system's novelty lies in the pieces outside the kernel and the way they interact.
  1949. When building Plan 9, we considered all aspects
  1950. of the system together, solving problems where the solution fit best.
  1951. Sometimes the solution spanned many components.
  1952. An example is the problem of heterogeneous instruction architectures,
  1953. which is addressed by the compilers (different code characters, portable
  1954. object code),
  1955. the environment
  1956. .CW $cputype "" (
  1957. and
  1958. .CW $objtype ),
  1959. the name space
  1960. (binding in
  1961. .CW /bin ),
  1962. and other components.
  1963. Sometimes many issues could be solved in a single place.
  1964. The best example is 9P,
  1965. which centralizes naming, access, and authentication.
  1966. 9P is really the core
  1967. of the system;
  1968. it is fair to say that the Plan 9 kernel is primarily a 9P multiplexer.
  1969. .PP
  1970. Plan 9's focus on files and naming is central to its expressiveness.
  1971. Particularly in distributed computing, the way things are named has profound
  1972. influence on the system [Nee89].
  1973. The combination of
  1974. local name spaces and global conventions to interconnect networked resources
  1975. avoids the difficulty of maintaining a global uniform name space,
  1976. while naming everything like a file makes the system easy to understand, even for
  1977. novices.
  1978. Consider the dump file system, which is trivial to use for anyone familiar with
  1979. hierarchical file systems.
  1980. At a deeper level, building all the resources above a single uniform interface
  1981. makes interoperability easy.
  1982. Once a resource exports a 9P interface,
  1983. it can combine transparently
  1984. with any other part of the system to build unusual applications;
  1985. the details are hidden.
  1986. This may sound object-oriented, but there are distinctions.
  1987. First, 9P defines a fixed set of `methods'; it is not an extensible protocol.
  1988. More important,
  1989. files are well-defined and well-understood
  1990. and come prepackaged with familiar methods of access, protection, naming, and
  1991. networking.
  1992. Objects, despite their generality, do not come with these attributes defined.
  1993. By reducing `object' to `file', Plan 9 gets some technology for free.
  1994. .PP
  1995. Nonetheless, it is possible to push the idea of file-based computing too far.
  1996. Converting every resource in the system into a file system is a kind of metaphor,
  1997. and metaphors can be abused.
  1998. A good example of restraint is
  1999. .CW /proc ,
  2000. which is only a view of a process, not a representation.
  2001. To run processes, the usual
  2002. .CW fork
  2003. and
  2004. .CW exec
  2005. calls are still necessary, rather than doing something like
  2006. .P1
  2007. cp /bin/date /proc/clone/mem
  2008. .P2
  2009. The problem with such examples is that they require the server to do things
  2010. not under its control.
  2011. The ability to assign meaning to a command like this does not
  2012. imply the meaning will fall naturally out of the structure of answering the 9P requests
  2013. it generates.
  2014. As a related example, Plan 9 does not put machine's network names in the file
  2015. name space.
  2016. The network interfaces provide a very different model of naming, because using
  2017. .CW open ,
  2018. .CW create ,
  2019. .CW read ,
  2020. and
  2021. .CW write
  2022. on such files would not offer a suitable place to encode all the details of call
  2023. setup for an arbitrary network.
  2024. This does not mean that the network interface cannot be file-like, just that it must
  2025. have a more tightly defined structure.
  2026. .PP
  2027. What would we do differently next time?
  2028. Some elements of the implementation are unsatisfactory.
  2029. Using streams to implement network interfaces in the kernel
  2030. allows protocols to be connected together dynamically,
  2031. such as to attach the same TTY driver to TCP, URP, and
  2032. IL connections,
  2033. but Plan 9 makes no use of this configurability.
  2034. (It was exploited, however, in the research UNIX system for which
  2035. streams were invented.)
  2036. Replacing streams by static I/O queues would
  2037. simplify the code and make it faster.
  2038. .PP
  2039. Although the main Plan 9 kernel is portable across many machines,
  2040. the file server is implemented separately.
  2041. This has caused several problems:
  2042. drivers that must be written twice,
  2043. bugs that must be fixed twice,
  2044. and weaker portability of the file system code.
  2045. The solution is easy: the file server kernel should be maintained
  2046. as a variant of the regular operating system, with no user processes and
  2047. special compiled-in
  2048. kernel processes to implement file service.
  2049. Another improvement to the file system would be a change of internal structure.
  2050. The WORM jukebox is the least reliable piece of the hardware, but because
  2051. it holds the metadata of the file system, it must be present in order to serve files.
  2052. The system could be restructured so the WORM is a backup device only, with the
  2053. file system proper residing on magnetic disks.
  2054. This would require no change to the external interface.
  2055. .PP
  2056. Although Plan 9 has per-process name spaces, it has no mechanism to give the
  2057. description of a process's name space to another process except by direct inheritance.
  2058. The
  2059. .CW cpu
  2060. command, for example, cannot in general reproduce the terminal's name space;
  2061. it can only re-interpret the user's login profile and make substitutions for things like
  2062. the name of the binary directory to load.
  2063. This misses any local modifications made before running
  2064. .CW cpu .
  2065. It should instead be possible to capture the terminal's name space and transmit
  2066. its description to a remote process.
  2067. .PP
  2068. Despite these problems, Plan 9 works well.
  2069. It has matured into the system that supports our research,
  2070. rather than being the subject of the research itself.
  2071. Experimental new work includes developing interfaces to faster networks,
  2072. file caching in the client kernel,
  2073. encapsulating and exporting name spaces,
  2074. and the ability to re-establish the client state after a server crash.
  2075. Attention is now focusing on using the system to build distributed applications.
  2076. .PP
  2077. One reason for Plan 9's success is that we use it for our daily work, not just as a research tool.
  2078. Active use forces us to address shortcomings as they arise and to adapt the system
  2079. to solve our problems.
  2080. Through this process, Plan 9 has become a comfortable, productive programming
  2081. environment, as well as a vehicle for further systems research.
  2082. .SH
  2083. References
  2084. .nr PS -1
  2085. .nr VS -2
  2086. .IP [9man] 9
  2087. .I
  2088. Plan 9 Programmer's Manual,
  2089. Volume 1,
  2090. .R
  2091. AT&T Bell Laboratories,
  2092. Murray Hill, NJ,
  2093. 1995.
  2094. .IP [ANSIC] 9
  2095. \f2American National Standard for Information Systems \-
  2096. Programming Language C\f1, American National Standards Institute, Inc.,
  2097. New York, 1990.
  2098. .IP [Duff90] 9
  2099. Tom Duff, ``Rc - A Shell for Plan 9 and UNIX systems'',
  2100. .I
  2101. Proc. of the Summer 1990 UKUUG Conf.,
  2102. .R
  2103. London, July, 1990, pp. 21-33, reprinted, in a different form, in this volume.
  2104. .IP [Fra80] 9
  2105. A.G. Fraser,
  2106. ``Datakit \- A Modular Network for Synchronous and Asynchronous Traffic'',
  2107. .I
  2108. Proc. Int. Conf. on Commun.,
  2109. .R
  2110. June 1980, Boston, MA.
  2111. .IP [FSSUTF] 9
  2112. .I
  2113. File System Safe UCS Transformation Format (FSS-UTF),
  2114. .R
  2115. X/Open Preliminary Specification, 1993.
  2116. ISO designation is
  2117. ISO/IEC JTC1/SC2/WG2 N 1036, dated 1994-08-01.
  2118. .IP "[ISO10646] " 9
  2119. ISO/IEC DIS 10646-1:1993
  2120. .I
  2121. Information technology \-
  2122. Universal Multiple-Octet Coded Character Set (UCS) \(em
  2123. Part 1: Architecture and Basic Multilingual Plane.
  2124. .R
  2125. .IP [Kill84] 9
  2126. T.J. Killian,
  2127. ``Processes as Files'',
  2128. .I
  2129. USENIX Summer 1984 Conf. Proc.,
  2130. .R
  2131. June 1984, Salt Lake City, UT.
  2132. .IP "[LABW91] " 9
  2133. Butler Lampson,
  2134. Martín Abadi,
  2135. Michael Burrows, and
  2136. Edward Wobber,
  2137. ``Authentication in Distributed Systems: Theory and Practice'',
  2138. .I
  2139. Proc. 13th ACM Symp. on Op. Sys. Princ.,
  2140. .R
  2141. Asilomar, 1991,
  2142. pp. 165-182.
  2143. .IP "[MBSS87] " 9
  2144. S. P. Miller,
  2145. B. C. Neumann,
  2146. J. I. Schiller, and
  2147. J. H. Saltzer,
  2148. ``Kerberos Authentication and Authorization System'',
  2149. Massachusetts Institute of Technology,
  2150. 1987.
  2151. .IP [NBS77] 9
  2152. National Bureau of Standards (U.S.),
  2153. .I
  2154. Federal Information Processing Standard 46,
  2155. .R
  2156. National Technical Information Service, Springfield, VA, 1977.
  2157. .IP [Nee89] 9
  2158. R. Needham, ``Names'', in
  2159. .I
  2160. Distributed systems,
  2161. .R
  2162. S. Mullender, ed.,
  2163. Addison Wesley, 1989
  2164. .IP "[NeHe82] " 9
  2165. R.M. Needham and A.J. Herbert,
  2166. .I
  2167. The Cambridge Distributed Computing System,
  2168. .R
  2169. Addison-Wesley, London, 1982
  2170. .IP [Neu92] 9
  2171. B. Clifford Neuman,
  2172. ``The Prospero File System'',
  2173. .I
  2174. USENIX File Systems Workshop Proc.,
  2175. .R
  2176. Ann Arbor, 1992, pp. 13-28.
  2177. .IP "[OCDNW88] " 9
  2178. John Ousterhout, Andrew Cherenson, Fred Douglis, Mike Nelson, and Brent Welch,
  2179. ``The Sprite Network Operating System'',
  2180. .I
  2181. IEEE Computer,
  2182. .R
  2183. 21(2), 23-38, Feb. 1988.
  2184. .IP [Pike87] 9
  2185. Rob Pike, ``The Text Editor \f(CWsam\fP'',
  2186. .I
  2187. Software - Practice and Experience,
  2188. .R
  2189. Nov 1987, \f3\&17\f1(11), pp. 813-845; reprinted in this volume.
  2190. .IP [Pike91] 9
  2191. Rob Pike, ``8½, the Plan 9 Window System'',
  2192. .I
  2193. USENIX Summer Conf. Proc.,
  2194. .R
  2195. Nashville, June, 1991, pp. 257-265,
  2196. reprinted in this volume.
  2197. .IP [Pike93] 9
  2198. Rob Pike and Ken Thompson, ``Hello World or Καλημέρα κόσμε or
  2199. \f(Jpこんにちは 世界\fP'',
  2200. .I
  2201. USENIX Winter Conf. Proc.,
  2202. .R
  2203. San Diego, 1993, pp. 43-50,
  2204. reprinted in this volume.
  2205. .IP [Pike94] 9
  2206. Rob Pike,
  2207. ``Acme: A User Interface for Programmers'',
  2208. .I
  2209. USENIX Proc. of the Winter 1994 Conf.,
  2210. .R
  2211. San Francisco, CA,
  2212. .IP [Pike95] 9
  2213. Rob Pike,
  2214. ``How to Use the Plan 9 C Compiler'',
  2215. .I
  2216. Plan 9 Programmer's Manual,
  2217. Volume 2,
  2218. .R
  2219. AT&T Bell Laboratories,
  2220. Murray Hill, NJ,
  2221. 1995.
  2222. .IP [POSIX] 9
  2223. .I
  2224. Information Technology\(emPortable Operating
  2225. System Interface (POSIX) Part 1:
  2226. System Application Program Interface (API)
  2227. [C Language],
  2228. .R
  2229. IEEE, New York, 1990.
  2230. .IP "[PPTTW93] " 9
  2231. Rob Pike, Dave Presotto, Ken Thompson, Howard Trickey, and Phil Winterbottom, ``The Use of Name Spaces in Plan 9'',
  2232. .I
  2233. Op. Sys. Rev.,
  2234. .R
  2235. Vol. 27, No. 2, April 1993, pp. 72-76,
  2236. reprinted in this volume.
  2237. .IP [Presotto] 9
  2238. Dave Presotto,
  2239. ``Multiprocessor Streams for Plan 9'',
  2240. .I
  2241. UKUUG Summer 1990 Conf. Proc.,
  2242. .R
  2243. July 1990, pp. 11-19.
  2244. .IP [PrWi93] 9
  2245. Dave Presotto and Phil Winterbottom,
  2246. ``The Organization of Networks in Plan 9'',
  2247. .I
  2248. USENIX Proc. of the Winter 1993 Conf.,
  2249. .R
  2250. San Diego, CA,
  2251. pp. 43-50,
  2252. reprinted in this volume.
  2253. .IP [PrWi95] 9
  2254. Dave Presotto and Phil Winterbottom,
  2255. ``The IL Protocol'',
  2256. .I
  2257. Plan 9 Programmer's Manual,
  2258. Volume 2,
  2259. .R
  2260. AT&T Bell Laboratories,
  2261. Murray Hill, NJ,
  2262. 1995.
  2263. .IP "[RFC768] " 9
  2264. J. Postel, RFC768,
  2265. .I "User Datagram Protocol,
  2266. .I "DARPA Internet Program Protocol Specification,
  2267. August 1980.
  2268. .IP "[RFC793] " 9
  2269. RFC793,
  2270. .I "Transmission Control Protocol,
  2271. .I "DARPA Internet Program Protocol Specification,
  2272. September 1981.
  2273. .IP [Rao91] 9
  2274. Herman Chung-Hwa Rao,
  2275. .I
  2276. The Jade File System,
  2277. .R
  2278. (Ph. D. Dissertation),
  2279. Dept. of Comp. Sci,
  2280. University of Arizona,
  2281. TR 91-18.
  2282. .IP [Rit84] 9
  2283. D.M. Ritchie,
  2284. ``A Stream Input-Output System'',
  2285. .I
  2286. AT&T Bell Laboratories Technical Journal,
  2287. \f363\f1(8), October, 1984.
  2288. .IP [Tric95] 9
  2289. Howard Trickey,
  2290. ``APE \(em The ANSI/POSIX Environment'',
  2291. .I
  2292. Plan 9 Programmer's Manual,
  2293. Volume 2,
  2294. .R
  2295. AT&T Bell Laboratories,
  2296. Murray Hill, NJ,
  2297. 1995.
  2298. .IP [Unicode] 9
  2299. .I
  2300. The Unicode Standard,
  2301. Worldwide Character Encoding,
  2302. Version 1.0, Volume 1,
  2303. .R
  2304. The Unicode Consortium,
  2305. Addison Wesley,
  2306. New York,
  2307. 1991.
  2308. .IP [UNIX85] 9
  2309. .I
  2310. UNIX Time-Sharing System Programmer's Manual,
  2311. Research Version, Eighth Edition, Volume 1.
  2312. .R
  2313. AT&T Bell Laboratories, Murray Hill, NJ, 1985.
  2314. .IP [Welc94] 9
  2315. Brent Welch,
  2316. ``A Comparison of Three Distributed File System Architectures: Vnode, Sprite, and Plan 9'',
  2317. .I
  2318. Computing Systems,
  2319. .R
  2320. 7(2), pp. 175-199, Spring, 1994.
  2321. .IP [Wint95] 9
  2322. Phil Winterbottom,
  2323. ``Alef Language Reference Manual'',
  2324. .I
  2325. Plan 9 Programmer's Manual,
  2326. Volume 2,
  2327. .R
  2328. AT&T Bell Laboratories,
  2329. Murray Hill, NJ,
  2330. 1995.