ip 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201
  1. .TH IP 3
  2. .SH NAME
  3. ip \- network protocols over IP
  4. .SH SYNOPSIS
  5. .nf
  6. .2C
  7. .B bind -a #I\fIspec\fP /net
  8. .B /net/ipifc
  9. .B /net/ipifc/clone
  10. .B /net/ipifc/stats
  11. .BI /net/ipifc/ n
  12. .BI /net/ipifc/ n /status
  13. .BI /net/ipifc/ n /ctl
  14. \&...
  15. .B /net/arp
  16. .B /net/bootp
  17. .B /net/iproute
  18. .B /net/ipselftab
  19. .B /net/log
  20. .B /net/ndb
  21. .B /net/esp
  22. .B /net/gre
  23. .B /net/icmp
  24. .B /net/icmpv6
  25. .B /net/il
  26. .B /net/ipmux
  27. .B /net/rudp
  28. .B /net/tcp
  29. .B /net/udp
  30. .B /net/tcp/clone
  31. .B /net/tcp/stats
  32. .BI /net/tcp/ n
  33. .BI /net/tcp/ n /data
  34. .BI /net/tcp/ n /ctl
  35. .BI /net/tcp/ n /local
  36. .BI /net/tcp/ n /remote
  37. .BI /net/tcp/ n /status
  38. .BI /net/tcp/ n /listen
  39. \&...
  40. .1C
  41. .fi
  42. .SH DESCRIPTION
  43. The
  44. .I ip
  45. device provides the interface to Internet Protocol stacks.
  46. .I Spec
  47. is an integer from 0 to 15 identifying a stack.
  48. Each stack implements IPv4 and IPv6.
  49. Each stack is independent of all others:
  50. the only information transfer between them is via programs that
  51. mount multiple stacks.
  52. Normally a system uses only one stack.
  53. However multiple stacks can be used for debugging
  54. new IP networks or implementing firewalls or proxy
  55. services.
  56. .PP
  57. All addresses used are 16-byte IPv6 addresses.
  58. IPv4 addresses are a subset of the IPv6 addresses and both standard
  59. .SM ASCII
  60. formats are accepted.
  61. In binary representation, all v4 addresses start with the 12 bytes, in hex:
  62. .IP
  63. .EX
  64. 00 00 00 00 00 00 00 00 00 00 ff ff
  65. .EE
  66. .
  67. .SS "Configuring interfaces
  68. Each stack may have multiple interfaces and each interface
  69. may have multiple addresses.
  70. The
  71. .B /net/ipifc
  72. directory contains a
  73. .B clone
  74. file, a
  75. .B stats
  76. file, and numbered subdirectories for each physical interface.
  77. .PP
  78. Opening the
  79. .B clone
  80. file reserves an interface.
  81. The file descriptor returned from the
  82. .IR open (2)
  83. will point to the control file,
  84. .BR ctl ,
  85. of the newly allocated interface.
  86. Reading
  87. .B ctl
  88. returns a text string representing the number of the interface.
  89. Writing
  90. .B ctl
  91. alters aspects of the interface.
  92. The possible
  93. .I ctl
  94. messages are:
  95. .\" .TF "bind loopback"
  96. .TF "bind netdev"
  97. .PD
  98. .TP
  99. .BI "bind ether " path
  100. Treat the device mounted at
  101. .I path
  102. as an Ethernet medium carrying IP and ARP packets
  103. and associate it with this interface.
  104. The kernel will
  105. .IR dial (2)
  106. .IR path !0x800
  107. and
  108. .IR path !0x806
  109. and use the two connections for IPv4 and
  110. ARP respectively.
  111. .TP
  112. .B "bind pkt
  113. Treat this interface as a packet interface. Assume
  114. a user program will read and write the
  115. .I data
  116. file to receive and transmit IP packets to the kernel.
  117. This is used by programs such as
  118. .IR ppp (8)
  119. to mediate IP packet transfer between the kernel and
  120. a PPP encoded device.
  121. .TP
  122. .BI "bind netdev " path
  123. Treat this interface as a packet interface.
  124. The kernel will open
  125. .I path
  126. and read and write the resulting file descriptor
  127. to receive and transmit IP packets.
  128. .TP
  129. .BI "bind loopback "
  130. Treat this interface as a local loopback. Anything
  131. written to it will be looped back.
  132. .TP
  133. .B "unbind
  134. Disassociate the physical device from an IP interface.
  135. .TP
  136. .BI add\ "local mask remote mtu " proxy
  137. .PD 0
  138. .TP
  139. .BI try\ "local mask remote mtu " proxy
  140. .PD
  141. Add a local IP address to the interface.
  142. .I try
  143. adds the local address as a tentative address
  144. if it's an IPv6 address.
  145. The
  146. .IR mask ,
  147. .IR remote ,
  148. .IR mtu ,
  149. and
  150. .B proxy
  151. arguments are all optional. The default mask is
  152. the class mask for the local address. The default
  153. remote address is
  154. .I local
  155. ANDed with
  156. .IR mask .
  157. The default mtu is 1514 for Ethernet and 4096 for packet
  158. media.
  159. .IR Proxy ,
  160. if specified, means that this machine should answer
  161. ARP requests for the remote address.
  162. .IR Ppp (8)
  163. does this to make remote machines appear
  164. to be connected to the local Ethernet.
  165. .TP
  166. .BI remove\ "local mask"
  167. Remove a local IP address from an interface.
  168. .TP
  169. .BI addmulti\ Media-addr
  170. Treat the multicast
  171. .I Media-addr
  172. on this interface as a local address.
  173. .TP
  174. .BI remmulti\ Media-addr
  175. Remove the multicast address
  176. .I Media-addr
  177. from this interface.
  178. .TP
  179. .BI mtu\ n
  180. Set the maximum transfer unit for this device to
  181. .IR n .
  182. The mtu is the maximum size of the packet including any
  183. medium-specific headers.
  184. .TP
  185. .BI reassemble
  186. Reassemble IP fragments before forwarding to this interface
  187. .TP
  188. .BI iprouting\ n
  189. Allow
  190. .RI ( n
  191. is missing or non-zero) or disallow
  192. .RI ( n
  193. is 0) forwarding packets between this interface and
  194. others.
  195. .TP
  196. .B bridge
  197. Enable bridging (see
  198. .IR bridge (3)).
  199. .TP
  200. .B promiscuous
  201. Set the interface into promiscuous mode,
  202. which makes it accept all incoming packets,
  203. whether addressed to it or not.
  204. .TP
  205. .BI "connect " type
  206. marks the Ethernet packet
  207. .I type
  208. as being in use, if not already in use
  209. on this interface.
  210. A
  211. .I type
  212. of -1 means `all' but appears to be a no-op.
  213. .TP
  214. .B scanbs
  215. Make the wireless interface scan for base stations.
  216. .TP
  217. .B headersonly
  218. Set the interface to pass only packet headers, not data too.
  219. .TP
  220. .BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
  221. Add the local IPv6 address
  222. .I v6addr
  223. with prefix length
  224. .I pfx-len
  225. to this interface.
  226. See RFC 2461 §6.2.1 for more detail.
  227. The remaining arguments are optional:
  228. .RS
  229. .TF onlink
  230. .TP
  231. .I onlink
  232. flag: address is `on-link'
  233. .TP
  234. .I auto
  235. flag: autonomous
  236. .TP
  237. .I validlt
  238. valid life-time in seconds
  239. .TP
  240. .I preflt
  241. preferred life-time in seconds
  242. .RE
  243. .PD
  244. .TP
  245. .BI "ra6 " "keyword value ..."
  246. Set IPv6 router advertisement (RA) parameter
  247. .IR keyword 's
  248. .IR value .
  249. Known
  250. .IR keyword s
  251. and the meanings of their values follow.
  252. See RFC 2461 §6.2.1 for more detail.
  253. Flags are true iff non-zero.
  254. .RS
  255. .TF minraint
  256. .TP
  257. .B recvra
  258. flag: receive and process RAs.
  259. .TP
  260. .B sendra
  261. flag: generate and send RAs.
  262. .TP
  263. .B mflag
  264. flag: ``Managed address configuration'',
  265. goes into RAs.
  266. .TP
  267. .B oflag
  268. flag: ``Other stateful configuration'',
  269. goes into RAs.
  270. .TP
  271. .B maxraint
  272. ``maximum time allowed between sending unsolicited multicast''
  273. RAs from the interface, in ms.
  274. .TP
  275. .B minraint
  276. ``minimum time allowed between sending unsolicited multicast''
  277. RAs from the interface, in ms.
  278. .TP
  279. .B linkmtu
  280. ``value to be placed in MTU options sent by the router.''
  281. Zero indicates none.
  282. .TP
  283. .B reachtime
  284. sets the Reachable Time field in RAs sent by the router.
  285. ``Zero means unspecified (by this router).''
  286. .TP
  287. .B rxmitra
  288. sets the Retrans Timer field in RAs sent by the router.
  289. ``Zero means unspecified (by this router).''
  290. .TP
  291. .B ttl
  292. default value of the Cur Hop Limit field in RAs sent by the router.
  293. Should be set to the ``current diameter of the Internet.''
  294. ``Zero means unspecified (by this router).''
  295. .TP
  296. .B routerlt
  297. sets the Router Lifetime field of RAs sent from the interface, in ms.
  298. Zero means the router is not to be used as a default router.
  299. .PD
  300. .RE
  301. .PP
  302. Reading the interface's
  303. .I status
  304. file returns information about the interface, one line for each
  305. local address on that interface. The first line
  306. has 9 white-space-separated fields: device, mtu, local address,
  307. mask, remote or network address, packets in, packets out, input errors,
  308. output errors. Each subsequent line contains all but the device and mtu.
  309. See
  310. .I readipifc
  311. in
  312. .IR ip (2).
  313. .
  314. .SS "Routing
  315. The file
  316. .I iproute
  317. controls information about IP routing.
  318. When read, it returns one line per routing entry.
  319. Each line contains six white-space-separated fields:
  320. target address, target mask, address of next hop, flags,
  321. tag, and interface number.
  322. The entry used for routing an IP packet is the one with
  323. the longest mask for which destination address ANDed with
  324. target mask equals the target address.
  325. The one-character flags are:
  326. .TF m
  327. .TP
  328. .B 4
  329. IPv4 route
  330. .TP
  331. .B 6
  332. IPv6 route
  333. .TP
  334. .B i
  335. local interface
  336. .TP
  337. .B b
  338. broadcast address
  339. .TP
  340. .B u
  341. local unicast address
  342. .TP
  343. .B m
  344. multicast route
  345. .TP
  346. .B p
  347. point-to-point route
  348. .PD
  349. .PP
  350. The tag is an arbitrary, up to 4 character, string. It is normally used to
  351. indicate what routing protocol originated the route.
  352. .PP
  353. Writing to
  354. .B /net/iproute
  355. changes the route table. The messages are:
  356. .TF "tag str"
  357. .PD
  358. .TP
  359. .B flush
  360. Remove all routes.
  361. .TP
  362. .BI tag\ string
  363. Associate the tag,
  364. .IR string ,
  365. with all subsequent routes added via this file descriptor.
  366. .TP
  367. .BI add\ "target mask nexthop"
  368. Add the route to the table. If one already exists with the
  369. same target and mask, replace it.
  370. .TP
  371. .BI remove\ "target mask"
  372. Remove a route with a matching target and mask.
  373. .
  374. .SS "Address resolution
  375. The file
  376. .B /net/arp
  377. controls information about address resolution.
  378. The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
  379. information for Ethernet interfaces.
  380. When read, the file returns one line per address containing the
  381. type of medium, the status of the entry (OK, WAIT), the IP
  382. address, and the medium address.
  383. Writing to
  384. .B /net/arp
  385. administers the ARP information.
  386. The control messages are:
  387. .TF "del addr"
  388. .PD
  389. .TP
  390. .B flush
  391. Remove all entries.
  392. .TP
  393. .BI add\ "type IP-addr Media-addr"
  394. Add an entry or replace an existing one for the
  395. same IP address.
  396. .TP
  397. .BI del\ "IP-addr"
  398. Delete an individual entry.
  399. .PP
  400. ARP entries do not time out. The ARP table is a
  401. cache with an LRU replacement policy. The IP stack
  402. listens for all ARP requests and, if the requester is in
  403. the table, the entry is updated.
  404. Also, whenever a new address is configured onto an
  405. Ethernet, an ARP request is sent to help
  406. update the table on other systems.
  407. .PP
  408. Currently, the only medium type is
  409. .BR ether .
  410. .br
  411. .ne 3
  412. .
  413. .SS "Debugging and stack information
  414. If any process is holding
  415. .B /net/log
  416. open, the IP stack queues debugging information to it.
  417. This is intended primarily for debugging the IP stack.
  418. The information provided is implementation-defined;
  419. see the source for details. Generally, what is returned is error messages
  420. about bad packets.
  421. .PP
  422. Writing to
  423. .B /net/log
  424. controls debugging. The control messages are:
  425. .TF "clear addr"
  426. .PD
  427. .TP
  428. .BI set\ arglist
  429. .I Arglist
  430. is a space-separated list of items for which to enable debugging.
  431. The possible items are:
  432. .BR ppp ,
  433. .BR ip ,
  434. .BR fs ,
  435. .BR tcp ,
  436. .BR il ,
  437. .BR icmp ,
  438. .BR udb ,
  439. .BR compress ,
  440. .BR ilmsg ,
  441. .BR gre ,
  442. .BR tcpmsg ,
  443. .BR udpmsg ,
  444. .BR ipmsg ,
  445. and
  446. .BR esp .
  447. .TP
  448. .BI clear\ arglist
  449. .I Arglist
  450. is a space-separated list of items for which to disable debugging.
  451. .TP
  452. .BI only\ addr
  453. If
  454. .I addr
  455. is non-zero, restrict debugging to only those
  456. packets whose source or destination is that
  457. address.
  458. .PP
  459. The file
  460. .B /net/ndb
  461. can be read or written by
  462. programs. It is normally used by
  463. .IR ipconfig (8)
  464. to leave configuration information for other programs
  465. such as
  466. .B dns
  467. and
  468. .B cs
  469. (see
  470. .IR ndb (8)).
  471. .B /net/ndb
  472. may contain up to 1024 bytes.
  473. .PP
  474. The file
  475. .B /net/ipselftab
  476. is a read-only file containing all the IP addresses
  477. considered local. Each line in the file contains
  478. three white-space-separated fields: IP address, usage count,
  479. and flags. The usage count is the number of interfaces to which
  480. the address applies. The flags are the same as for routing
  481. entries.
  482. Note that the `IPv4 route' flag will never be set.
  483. .br
  484. .ne 3
  485. .
  486. .SS "Protocol directories
  487. The
  488. .I ip
  489. device
  490. supports IP as well as several protocols that run over it:
  491. TCP, UDP, RUDP, ICMP, IL, GRE, and ESP.
  492. TCP and UDP provide the standard Internet
  493. protocols for reliable stream and unreliable datagram
  494. communication.
  495. RUDP is a locally developed reliable datagram protocol based on
  496. UDP.
  497. ICMP is IP's catch-all control protocol used to send
  498. low level error messages and to implement
  499. .IR ping (8).
  500. IL provides a reliable datagram service for communication
  501. between Plan 9 machines but is now deprecated.
  502. GRE is a general encapsulation protocol.
  503. ESP is the encapsulation protocol for IPsec.
  504. .PP
  505. Each protocol is a subdirectory of the IP stack.
  506. The top level directory of each protocol contains a
  507. .B clone
  508. file, a
  509. .B stats
  510. file, and subdirectories numbered from zero to the number of connections
  511. opened for this protocol.
  512. .PP
  513. Opening the
  514. .B clone
  515. file reserves a connection. The file descriptor returned from the
  516. .IR open (2)
  517. will point to the control file,
  518. .BR ctl ,
  519. of the newly allocated connection.
  520. Reading
  521. .B ctl
  522. returns a text
  523. string representing the number of the
  524. connection.
  525. Connections may be used either to listen for incoming calls
  526. or to initiate calls to other machines.
  527. .PP
  528. A connection is controlled by writing text strings to the associated
  529. .B ctl
  530. file.
  531. After a connection has been established data may be read from
  532. and written to
  533. .BR data .
  534. A connection can be actively established using the
  535. .B connect
  536. message (see also
  537. .IR dial (2)).
  538. A connection can be established passively by first
  539. using an
  540. .B announce
  541. message (see
  542. .IR dial (2))
  543. to bind to a local port and then
  544. opening the
  545. .B listen
  546. file (see
  547. .IR dial (2))
  548. to receive incoming calls.
  549. .PP
  550. The following control messages are supported:
  551. .TF announceX
  552. .PD
  553. .TP
  554. .BI connect\ ip-address ! port "!r " local
  555. Establish a connection to the remote
  556. .I ip-address
  557. and
  558. .IR port .
  559. If
  560. .I local
  561. is specified, it is used as the local port number.
  562. If
  563. .I local
  564. is not specified but
  565. .B !r
  566. is, the system will allocate
  567. a restricted port number (less than 1024) for the connection to allow communication
  568. with Unix
  569. .B login
  570. and
  571. .B exec
  572. services.
  573. Otherwise a free port number starting at 5000 is chosen.
  574. The connect fails if the combination of local and remote address/port pairs
  575. are already assigned to another port.
  576. .TP
  577. .BI announce\ X
  578. .I X
  579. is a decimal port number or
  580. .LR * .
  581. Set the local port
  582. number to
  583. .I X
  584. and accept calls to
  585. .IR X .
  586. If
  587. .I X
  588. is
  589. .LR * ,
  590. accept
  591. calls for any port that no process has explicitly announced.
  592. The local IP address cannot be set.
  593. .B Announce
  594. fails if the connection is already announced or connected.
  595. .TP
  596. .BI bind\ X
  597. .I X
  598. is a decimal port number or
  599. .LR * .
  600. Set the local port number to
  601. .IR X .
  602. This exists to support emulation
  603. of BSD sockets by the APE libraries (see
  604. .IR pcc (1))
  605. and is not otherwise used.
  606. .TP
  607. .BI backlog\ n
  608. Set the maximum number of unanswered (queued) incoming
  609. connections to an announced port to
  610. .IR n .
  611. By default
  612. .I n
  613. is set to five. If more than
  614. .I n
  615. connections are pending,
  616. further requests for a service will be rejected.
  617. .TP
  618. .BI ttl\ n
  619. Set the time to live IP field in outgoing packets to
  620. .IR n .
  621. .TP
  622. .BI tos\ n
  623. Set the service type IP field in outgoing packets to
  624. .IR n .
  625. .PP
  626. Port numbers must be in the range 1 to 32767.
  627. .PP
  628. Several files report the status of a
  629. connection.
  630. The
  631. .B remote
  632. and
  633. .B local
  634. files contain the IP address and port number for the remote and local side of the
  635. connection. The
  636. .B status
  637. file contains protocol-dependent information to help debug network connections.
  638. On receiving and error or EOF reading or writing the
  639. .B data
  640. file, the
  641. .B err
  642. file contains the reason for error.
  643. .PP
  644. A process may accept incoming connections by
  645. .IR open (2)ing
  646. the
  647. .B listen
  648. file.
  649. The
  650. .B open
  651. will block until a new connection request arrives.
  652. Then
  653. .B open
  654. will return an open file descriptor which points to the control file of the
  655. newly accepted connection.
  656. This procedure will accept all calls for the
  657. given protocol.
  658. See
  659. .IR dial (2).
  660. .
  661. .SS TCP
  662. TCP connections are reliable point-to-point byte streams; there are no
  663. message delimiters.
  664. A connection is determined by the address and port numbers of the two
  665. ends.
  666. TCP
  667. .B ctl
  668. files support the following additional messages:
  669. .TF keepaliven
  670. .PD
  671. .TP
  672. .B hangup
  673. close down a TCP connection
  674. .TP
  675. .BI keepalive \ n
  676. turn on keep alive messages.
  677. .IR N ,
  678. if given, is the milliseconds between keepalives
  679. (default 30000).
  680. .
  681. .SS UDP
  682. UDP connections carry unreliable and unordered datagrams. A read from
  683. .B data
  684. will return the next datagram, discarding anything
  685. that doesn't fit in the read buffer.
  686. A write is sent as a single datagram.
  687. .PP
  688. By default, a UDP connection is a point-to-point link.
  689. Either a
  690. .B connect
  691. establishes a local and remote address/port pair or
  692. after an
  693. .BR announce ,
  694. each datagram coming from a different remote address/port pair
  695. establishes a new incoming connection.
  696. However, many-to-one semantics is also possible.
  697. .PP
  698. If, after an
  699. .BR announce ,
  700. the message
  701. .L headers
  702. is written to
  703. .BR ctl ,
  704. then all messages sent to the announced port
  705. are received on the announced connection prefixed
  706. with the corresponding structure,
  707. declared in
  708. .BR <ip.h> :
  709. .IP
  710. .EX
  711. typedef struct Udphdr Udphdr;
  712. struct Udphdr
  713. {
  714. uchar raddr[16]; /* V6 remote address and port */
  715. uchar laddr[16]; /* V6 local address and port */
  716. uchar ifcaddr[16]; /* V6 interface address (receive only) */
  717. uchar rport[2]; /* remote port */
  718. uchar lport[2]; /* local port */
  719. };
  720. .EE
  721. .PP
  722. Before a write, a user must prefix a similar structure to each message.
  723. The system overrides the user specified local port with the announced
  724. one. If the user specifies an address that isn't a unicast address in
  725. .BR /net/ipselftab ,
  726. that too is overridden.
  727. Since the prefixed structure is the same in read and write, it is relatively
  728. easy to write a server that responds to client requests by just copying new
  729. data into the message body and then writing back the same buffer that was
  730. read.
  731. .PP
  732. In this case (writing
  733. .L headers
  734. to the
  735. .I ctl
  736. file),
  737. no
  738. .I listen
  739. nor
  740. .I accept
  741. is needed;
  742. otherwise,
  743. the usual sequence of
  744. .IR announce ,
  745. .IR listen ,
  746. .I accept
  747. must be executed before performing I/O on the corresponding
  748. .I data
  749. file.
  750. .
  751. .SS RUDP
  752. RUDP is a reliable datagram protocol based on UDP.
  753. Packets are delivered in order.
  754. RUDP does not support
  755. .BR listen .
  756. One must write either
  757. .B connect
  758. or
  759. .B announce
  760. followed immediately by
  761. .B headers
  762. to
  763. .BR ctl .
  764. .PP
  765. Unlike IL or TCP, the reboot of one end of a connection does
  766. not force a closing of the connection. Communications will
  767. resume when the rebooted machine resumes talking. Any unacknowledged
  768. packets queued before the reboot will be lost. A reboot can
  769. be detected by reading the
  770. .B err
  771. file. It will contain the message
  772. .IP
  773. .BI hangup\ address ! port
  774. .PP
  775. where
  776. .I address
  777. and
  778. .I port
  779. are of the far side of the connection.
  780. Retransmitting a datagram more than 10 times
  781. is treated like a reboot:
  782. all queued messages are dropped, an error is queued to the
  783. .B err
  784. file, and the conversation resumes.
  785. .
  786. .SS ICMP
  787. ICMP is a datagram protocol for IPv4 used to exchange control requests and
  788. their responses with other machines' IP implementations.
  789. ICMP is primarily a kernel-to-kernel protocol, but it is possible
  790. to generate `echo request' and read `echo reply' packets from user programs.
  791. .
  792. .SS ICMPV6
  793. ICMPv6 is the IPv6 equivalent of ICMP.
  794. If, after an
  795. .BR announce ,
  796. the message
  797. .L headers
  798. is written to
  799. .BR ctl ,
  800. then before a write,
  801. a user must prefix each message with a corresponding structure,
  802. declared in
  803. .BR <ip.h> :
  804. .IP
  805. .EX
  806. /*
  807. * user level icmpv6 with control message "headers"
  808. */
  809. typedef struct Icmp6hdr Icmp6hdr;
  810. struct Icmp6hdr {
  811. uchar unused[8];
  812. uchar laddr[IPaddrlen]; /* local address */
  813. uchar raddr[IPaddrlen]; /* remote address */
  814. };
  815. .EE
  816. .PP
  817. In this case (writing
  818. .L headers
  819. to the
  820. .I ctl
  821. file),
  822. no
  823. .I listen
  824. nor
  825. .I accept
  826. is needed;
  827. otherwise,
  828. the usual sequence of
  829. .IR announce ,
  830. .IR listen ,
  831. .I accept
  832. must be executed before performing I/O on the corresponding
  833. .I data
  834. file.
  835. .
  836. .SS IL
  837. IL is a reliable point-to-point datagram protocol that runs over IPv4.
  838. Like TCP, IL delivers datagrams
  839. reliably and in order. Also like TCP, a connection is
  840. determined by the address and port numbers of the two ends.
  841. Like UDP, each read and write transfers a single datagram.
  842. .PP
  843. IL is efficient for LANs but doesn't have the
  844. congestion control features needed for use through
  845. the Internet.
  846. It is no longer necessary, except to communicate with old standalone
  847. .IR fs (4)
  848. file servers.
  849. Its use is now deprecated.
  850. .
  851. .SS GRE
  852. GRE is the encapsulation protocol used by PPTP.
  853. The kernel implements just enough of the protocol
  854. to multiplex it.
  855. Our implementation encapsulates in IPv4, per RFC 1702.
  856. .B Announce
  857. is not allowed in GRE, only
  858. .BR connect .
  859. Since GRE has no port numbers, the port number in the connect
  860. is actually the 16 bit
  861. .B eproto
  862. field in the GRE header.
  863. .PP
  864. Reads and writes transfer a
  865. GRE datagram starting at the GRE header.
  866. On write, the kernel fills in the
  867. .B eproto
  868. field with the port number specified
  869. in the connect message.
  870. .br
  871. .ne 3
  872. .
  873. .SS ESP
  874. ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
  875. for IPsec (RFC 4301).
  876. We currently implement only tunnel mode, not transport mode.
  877. It is used to set up an encrypted tunnel between machines.
  878. Like GRE, ESP has no port numbers. Instead, the
  879. port number in the
  880. .B connect
  881. message is the SPI (Security Association Identifier (sic)).
  882. IP packets are written to and read from
  883. .BR data .
  884. The kernel encrypts any packets written to
  885. .BR data ,
  886. appends a MAC, and prefixes an ESP header before
  887. sending to the other end of the tunnel.
  888. Received packets are checked against their MAC's,
  889. decrypted, and queued for reading from
  890. .BR data .
  891. The control messages are:
  892. .TF "alg secret"
  893. .PD
  894. .TP
  895. .BI esp\ "alg secret
  896. Encrypt with the algorithm,
  897. .IR alg ,
  898. using
  899. .I secret
  900. as the key.
  901. Possible algorithms are:
  902. .BR null ,
  903. .BR des_56_cbc ,
  904. and eventually
  905. .BR des3_cbc ,
  906. .BR aes_128_cbc ,
  907. and
  908. .BR aes_ctr .
  909. .TP
  910. .BI ah\ "alg secret
  911. Use the hash algorithm,
  912. .IR alg ,
  913. with
  914. .I secret
  915. as the key for generating the MAC.
  916. Possible algorithms are:
  917. .BR null ,
  918. .BR hmac_sha1_96 ,
  919. .BR hmac_md5_96 ,
  920. and eventually
  921. .BR aes_xcbc_mac_96 .
  922. .TP
  923. .B header
  924. Turn on header mode. Every buffer read from
  925. .B data
  926. starts with 4 unused bytes, and the first 4 bytes
  927. of every buffer written to
  928. .B data
  929. are ignored.
  930. .TP
  931. .B noheader
  932. Turn off header mode.
  933. .
  934. .SS "IP packet filter
  935. The directory
  936. .B /net/ipmux
  937. looks like another protocol directory.
  938. It is a packet filter built on top of IP.
  939. Each numbered
  940. subdirectory represents a different filter.
  941. The connect messages written to the
  942. .I ctl
  943. file describe the filter. Packets matching the filter can be read on the
  944. .B data
  945. file. Packets written to the
  946. .B data
  947. file are routed to an interface and transmitted.
  948. .PP
  949. A filter is a semicolon-separated list of
  950. relations. Each relation describes a portion
  951. of a packet to match. The possible relations are:
  952. .TF "iph[n:m]=expr"
  953. .PD
  954. .TP
  955. .BI proto= n
  956. the IP protocol number must be
  957. .IR n .
  958. .TP
  959. .BI data[ n : m ]= expr
  960. bytes
  961. .I n
  962. through
  963. .I m
  964. following the IP packet must match
  965. .IR expr .
  966. .TP
  967. .BI iph[ n : m ]= expr
  968. bytes
  969. .I n
  970. through
  971. .I m
  972. of the IP packet header must match
  973. .IR expr .
  974. .TP
  975. .BI ifc= expr
  976. the packet must have been received on an interface whose address
  977. matches
  978. .IR expr .
  979. .TP
  980. .BI src= expr
  981. The source address in the packet must match
  982. .IR expr .
  983. .TP
  984. .BI dst= expr
  985. The destination address in the packet must match
  986. .IR expr .
  987. .PP
  988. .I Expr
  989. is of the form:
  990. .TP
  991. .I \ value
  992. .TP
  993. .IB \ value | value | ...
  994. .TP
  995. .IB \ value & mask
  996. .TP
  997. .IB \ value | value & mask
  998. .PP
  999. If a mask is given, the relevant field is first ANDed with
  1000. the mask. The result is compared against the value or list
  1001. of values for a match. In the case of
  1002. .BR ifc ,
  1003. .BR dst ,
  1004. and
  1005. .B src
  1006. the value is a dot-formatted IP address and the mask is a dot-formatted
  1007. IP mask. In the case of
  1008. .BR data ,
  1009. .B iph
  1010. and
  1011. .BR proto ,
  1012. both value and mask are strings of 2 hexadecimal digits representing
  1013. 8-bit values.
  1014. .PP
  1015. A packet is delivered to only one filter.
  1016. The filters are merged into a single comparison tree.
  1017. If two filters match the same packet, the following
  1018. rules apply in order (here '>' means is preferred to):
  1019. .IP 1)
  1020. protocol > data > source > destination > interface
  1021. .IP 2)
  1022. lower data offsets > higher data offsets
  1023. .IP 3)
  1024. longer matches > shorter matches
  1025. .IP 4)
  1026. older > younger
  1027. .PP
  1028. So far this has just been used to implement a version of
  1029. OSPF in Inferno
  1030. and 6to4 tunnelling.
  1031. .br
  1032. .ne 5
  1033. .
  1034. .SS Statistics
  1035. The
  1036. .B stats
  1037. files are read only and contain statistics useful to network monitoring.
  1038. .br
  1039. .ne 12
  1040. .PP
  1041. Reading
  1042. .B /net/ipifc/stats
  1043. returns a list of 19 tagged and newline-separated fields representing:
  1044. .EX
  1045. .ft 1
  1046. .2C
  1047. .in +0.25i
  1048. forwarding status (0 and 2 mean forwarding off,
  1049. 1 means on)
  1050. default TTL
  1051. input packets
  1052. input header errors
  1053. input address errors
  1054. packets forwarded
  1055. input packets for unknown protocols
  1056. input packets discarded
  1057. input packets delivered to higher level protocols
  1058. output packets
  1059. output packets discarded
  1060. output packets with no route
  1061. timed out fragments in reassembly queue
  1062. requested reassemblies
  1063. successful reassemblies
  1064. failed reassemblies
  1065. successful fragmentations
  1066. unsuccessful fragmentations
  1067. fragments created
  1068. .in -0.25i
  1069. .1C
  1070. .ft
  1071. .EE
  1072. .br
  1073. .ne 16
  1074. .PP
  1075. Reading
  1076. .B /net/icmp/stats
  1077. returns a list of 26 tagged and newline-separated fields representing:
  1078. .EX
  1079. .ft 1
  1080. .2C
  1081. .in +0.25i
  1082. messages received
  1083. bad received messages
  1084. unreachables received
  1085. time exceededs received
  1086. input parameter problems received
  1087. source quenches received
  1088. redirects received
  1089. echo requests received
  1090. echo replies received
  1091. timestamps received
  1092. timestamp replies received
  1093. address mask requests received
  1094. address mask replies received
  1095. messages sent
  1096. transmission errors
  1097. unreachables sent
  1098. time exceededs sent
  1099. input parameter problems sent
  1100. source quenches sent
  1101. redirects sent
  1102. echo requests sent
  1103. echo replies sent
  1104. timestamps sent
  1105. timestamp replies sent
  1106. address mask requests sent
  1107. address mask replies sent
  1108. .in -0.25i
  1109. .1C
  1110. .EE
  1111. .PP
  1112. Reading
  1113. .B /net/tcp/stats
  1114. returns a list of 11 tagged and newline-separated fields representing:
  1115. .EX
  1116. .ft 1
  1117. .2C
  1118. .in +0.25i
  1119. maximum number of connections
  1120. total outgoing calls
  1121. total incoming calls
  1122. number of established connections to be reset
  1123. number of currently established connections
  1124. segments received
  1125. segments sent
  1126. segments retransmitted
  1127. retransmit timeouts
  1128. bad received segments
  1129. transmission failures
  1130. .in -0.25i
  1131. .1C
  1132. .EE
  1133. .PP
  1134. Reading
  1135. .B /net/udp/stats
  1136. returns a list of 4 tagged and newline-separated fields representing:
  1137. .EX
  1138. .ft 1
  1139. .2C
  1140. .in +0.25i
  1141. datagrams received
  1142. datagrams received for bad ports
  1143. malformed datagrams received
  1144. datagrams sent
  1145. .in -0.25i
  1146. .1C
  1147. .EE
  1148. .PP
  1149. Reading
  1150. .B /net/il/stats
  1151. returns a list of 6 tagged and newline-separated fields representing:
  1152. .EX
  1153. .ft 1
  1154. .2C
  1155. .in +0.25i
  1156. checksum errors
  1157. header length errors
  1158. out of order messages
  1159. retransmitted messages
  1160. duplicate messages
  1161. duplicate bytes
  1162. .in -0.25i
  1163. .1C
  1164. .EE
  1165. .PP
  1166. Reading
  1167. .B /net/gre/stats
  1168. returns a list of 1 tagged number representing:
  1169. .EX
  1170. .ft 1
  1171. .in +0.25i
  1172. header length errors
  1173. .in -0.25i
  1174. .EE
  1175. .SH "SEE ALSO"
  1176. .IR dial (2),
  1177. .IR ip (2),
  1178. .IR ndb (6),
  1179. .IR listen (8)
  1180. .br
  1181. .PD 0
  1182. .TF /lib/rfc/rfc2822
  1183. .TP
  1184. .B /lib/rfc/rfc2460
  1185. IPv6
  1186. .TP
  1187. .B /lib/rfc/rfc4291
  1188. IPv6 address architecture
  1189. .TP
  1190. .B /lib/rfc/rfc4443
  1191. ICMPv6
  1192. .SH SOURCE
  1193. .B /sys/src/9/ip
  1194. .SH BUGS
  1195. .I Ipmux
  1196. has not been heavily used and should be considered experimental.
  1197. It may disappear in favor of a more traditional packet filter in the future.