ip 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. .TH IP 3
  2. .SH NAME
  3. ip \- network protocols over IP
  4. .SH SYNOPSIS
  5. .nf
  6. .2C
  7. .B bind -a #I\fIspec\fP /net
  8. .B /net/ipifc
  9. .B /net/ipifc/clone
  10. .B /net/ipifc/stats
  11. .BI /net/ipifc/ n
  12. .BI /net/ipifc/ n /status
  13. .BI /net/ipifc/ n /ctl
  14. \&...
  15. .B /net/arp
  16. .B /net/bootp
  17. .B /net/iproute
  18. .B /net/ipselftab
  19. .B /net/log
  20. .B /net/ndb
  21. .B /net/esp
  22. .B /net/gre
  23. .B /net/icmp
  24. .B /net/icmpv6
  25. .B /net/ipmux
  26. .B /net/rudp
  27. .B /net/tcp
  28. .B /net/udp
  29. .B /net/tcp/clone
  30. .B /net/tcp/stats
  31. .BI /net/tcp/ n
  32. .BI /net/tcp/ n /data
  33. .BI /net/tcp/ n /ctl
  34. .BI /net/tcp/ n /local
  35. .BI /net/tcp/ n /remote
  36. .BI /net/tcp/ n /status
  37. .BI /net/tcp/ n /listen
  38. \&...
  39. .1C
  40. .fi
  41. .SH DESCRIPTION
  42. The
  43. .I ip
  44. device provides the interface to Internet Protocol stacks.
  45. .I Spec
  46. is an integer from 0 to 15 identifying a stack.
  47. Each stack implements IPv4 and IPv6.
  48. Each stack is independent of all others:
  49. the only information transfer between them is via programs that
  50. mount multiple stacks.
  51. Normally a system uses only one stack.
  52. However multiple stacks can be used for debugging
  53. new IP networks or implementing firewalls or proxy
  54. services.
  55. .PP
  56. All addresses used are 16-byte IPv6 addresses.
  57. IPv4 addresses are a subset of the IPv6 addresses and both standard
  58. .SM ASCII
  59. formats are accepted.
  60. In binary representation, all v4 addresses start with the 12 bytes, in hex:
  61. .IP
  62. .EX
  63. 00 00 00 00 00 00 00 00 00 00 ff ff
  64. .EE
  65. .
  66. .SS "Configuring interfaces
  67. Each stack may have multiple interfaces and each interface
  68. may have multiple addresses.
  69. The
  70. .B /net/ipifc
  71. directory contains a
  72. .B clone
  73. file, a
  74. .B stats
  75. file, and numbered subdirectories for each physical interface.
  76. .PP
  77. Opening the
  78. .B clone
  79. file reserves an interface.
  80. The file descriptor returned from the
  81. .IR open (2)
  82. will point to the control file,
  83. .BR ctl ,
  84. of the newly allocated interface.
  85. Reading
  86. .B ctl
  87. returns a text string representing the number of the interface.
  88. Writing
  89. .B ctl
  90. alters aspects of the interface.
  91. The possible
  92. .I ctl
  93. messages are:
  94. .\" .TF "bind loopback"
  95. .TF "bind netdev"
  96. .PD
  97. .TP
  98. .BI "bind ether " path
  99. Treat the device mounted at
  100. .I path
  101. as an Ethernet medium carrying IP and ARP packets
  102. and associate it with this interface.
  103. The kernel will
  104. .IR dial (2)
  105. .IR path !0x800
  106. and
  107. .IR path !0x806
  108. and use the two connections for IPv4 and
  109. ARP respectively.
  110. .TP
  111. .B "bind pkt
  112. Treat this interface as a packet interface. Assume
  113. a user program will read and write the
  114. .I data
  115. file to receive and transmit IP packets to the kernel.
  116. This is used by programs such as
  117. .IR ppp (8)
  118. to mediate IP packet transfer between the kernel and
  119. a PPP encoded device.
  120. .TP
  121. .BI "bind netdev " path
  122. Treat this interface as a packet interface.
  123. The kernel will open
  124. .I path
  125. and read and write the resulting file descriptor
  126. to receive and transmit IP packets.
  127. .TP
  128. .BI "bind loopback "
  129. Treat this interface as a local loopback. Anything
  130. written to it will be looped back.
  131. .TP
  132. .B "unbind
  133. Disassociate the physical device from an IP interface.
  134. .TP
  135. .BI add\ "local mask remote mtu " proxy
  136. .PD 0
  137. .TP
  138. .BI try\ "local mask remote mtu " proxy
  139. .PD
  140. Add a local IP address to the interface.
  141. .I try
  142. adds the local address as a tentative address
  143. if it's an IPv6 address.
  144. The
  145. .IR mask ,
  146. .IR remote ,
  147. .IR mtu ,
  148. and
  149. .B proxy
  150. arguments are all optional. The default mask is
  151. the class mask for the local address. The default
  152. remote address is
  153. .I local
  154. ANDed with
  155. .IR mask .
  156. The default mtu is 1514 for Ethernet and 4096 for packet
  157. media.
  158. .IR Proxy ,
  159. if specified, means that this machine should answer
  160. ARP requests for the remote address.
  161. .IR Ppp (8)
  162. does this to make remote machines appear
  163. to be connected to the local Ethernet.
  164. .TP
  165. .BI remove\ "local mask"
  166. Remove a local IP address from an interface.
  167. .TP
  168. .BI addmulti\ Media-addr
  169. Treat the multicast
  170. .I Media-addr
  171. on this interface as a local address.
  172. .TP
  173. .BI remmulti\ Media-addr
  174. Remove the multicast address
  175. .I Media-addr
  176. from this interface.
  177. .TP
  178. .BI mtu\ n
  179. Set the maximum transfer unit for this device to
  180. .IR n .
  181. The mtu is the maximum size of the packet including any
  182. medium-specific headers.
  183. .TP
  184. .BI reassemble
  185. Reassemble IP fragments before forwarding to this interface
  186. .TP
  187. .BI iprouting\ n
  188. Allow
  189. .RI ( n
  190. is missing or non-zero) or disallow
  191. .RI ( n
  192. is 0) forwarding packets between this interface and
  193. others.
  194. .TP
  195. .B bridge
  196. Enable bridging (see
  197. .IR bridge (3)).
  198. .TP
  199. .B promiscuous
  200. Set the interface into promiscuous mode,
  201. which makes it accept all incoming packets,
  202. whether addressed to it or not.
  203. .TP
  204. .BI "connect " type
  205. marks the Ethernet packet
  206. .I type
  207. as being in use, if not already in use
  208. on this interface.
  209. A
  210. .I type
  211. of -1 means `all' but appears to be a no-op.
  212. .TP
  213. .B scanbs
  214. Make the wireless interface scan for base stations.
  215. .TP
  216. .B headersonly
  217. Set the interface to pass only packet headers, not data too.
  218. .TP
  219. .BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
  220. Add the local IPv6 address
  221. .I v6addr
  222. with prefix length
  223. .I pfx-len
  224. to this interface.
  225. See RFC 2461 §6.2.1 for more detail.
  226. The remaining arguments are optional:
  227. .RS
  228. .TF onlink
  229. .TP
  230. .I onlink
  231. flag: address is `on-link'
  232. .TP
  233. .I auto
  234. flag: autonomous
  235. .TP
  236. .I validlt
  237. valid life-time in seconds
  238. .TP
  239. .I preflt
  240. preferred life-time in seconds
  241. .RE
  242. .PD
  243. .TP
  244. .BI "ra6 " "keyword value ..."
  245. Set IPv6 router advertisement (RA) parameter
  246. .IR keyword 's
  247. .IR value .
  248. Known
  249. .IR keyword s
  250. and the meanings of their values follow.
  251. See RFC 2461 §6.2.1 for more detail.
  252. Flags are true iff non-zero.
  253. .RS
  254. .TF minraint
  255. .TP
  256. .B recvra
  257. flag: receive and process RAs.
  258. .TP
  259. .B sendra
  260. flag: generate and send RAs.
  261. .TP
  262. .B mflag
  263. flag: ``Managed address configuration'',
  264. goes into RAs.
  265. .TP
  266. .B oflag
  267. flag: ``Other stateful configuration'',
  268. goes into RAs.
  269. .TP
  270. .B maxraint
  271. ``maximum time allowed between sending unsolicited multicast''
  272. RAs from the interface, in ms.
  273. .TP
  274. .B minraint
  275. ``minimum time allowed between sending unsolicited multicast''
  276. RAs from the interface, in ms.
  277. .TP
  278. .B linkmtu
  279. ``value to be placed in MTU options sent by the router.''
  280. Zero indicates none.
  281. .TP
  282. .B reachtime
  283. sets the Reachable Time field in RAs sent by the router.
  284. ``Zero means unspecified (by this router).''
  285. .TP
  286. .B rxmitra
  287. sets the Retrans Timer field in RAs sent by the router.
  288. ``Zero means unspecified (by this router).''
  289. .TP
  290. .B ttl
  291. default value of the Cur Hop Limit field in RAs sent by the router.
  292. Should be set to the ``current diameter of the Internet.''
  293. ``Zero means unspecified (by this router).''
  294. .TP
  295. .B routerlt
  296. sets the Router Lifetime field of RAs sent from the interface, in ms.
  297. Zero means the router is not to be used as a default router.
  298. .PD
  299. .RE
  300. .PP
  301. Reading the interface's
  302. .I status
  303. file returns information about the interface, one line for each
  304. local address on that interface. The first line
  305. has 9 white-space-separated fields: device, mtu, local address,
  306. mask, remote or network address, packets in, packets out, input errors,
  307. output errors. Each subsequent line contains all but the device and mtu.
  308. See
  309. .I readipifc
  310. in
  311. .IR ip (2).
  312. .
  313. .SS "Routing
  314. The file
  315. .I iproute
  316. controls information about IP routing.
  317. When read, it returns one line per routing entry.
  318. Each line contains six white-space-separated fields:
  319. target address, target mask, address of next hop, flags,
  320. tag, and interface number.
  321. The entry used for routing an IP packet is the one with
  322. the longest mask for which destination address ANDed with
  323. target mask equals the target address.
  324. The one-character flags are:
  325. .TF m
  326. .TP
  327. .B 4
  328. IPv4 route
  329. .TP
  330. .B 6
  331. IPv6 route
  332. .TP
  333. .B i
  334. local interface
  335. .TP
  336. .B b
  337. broadcast address
  338. .TP
  339. .B u
  340. local unicast address
  341. .TP
  342. .B m
  343. multicast route
  344. .TP
  345. .B p
  346. point-to-point route
  347. .PD
  348. .PP
  349. The tag is an arbitrary, up to 4 character, string. It is normally used to
  350. indicate what routing protocol originated the route.
  351. .PP
  352. Writing to
  353. .B /net/iproute
  354. changes the route table. The messages are:
  355. .TF "tag str"
  356. .PD
  357. .TP
  358. .B flush
  359. Remove all routes.
  360. .TP
  361. .BI tag\ string
  362. Associate the tag,
  363. .IR string ,
  364. with all subsequent routes added via this file descriptor.
  365. .TP
  366. .BI add\ "target mask nexthop"
  367. Add the route to the table. If one already exists with the
  368. same target and mask, replace it.
  369. .TP
  370. .BI remove\ "target mask"
  371. Remove a route with a matching target and mask.
  372. .
  373. .SS "Address resolution
  374. The file
  375. .B /net/arp
  376. controls information about address resolution.
  377. The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
  378. information for Ethernet interfaces.
  379. When read, the file returns one line per address containing the
  380. type of medium, the status of the entry (OK, WAIT), the IP
  381. address, and the medium address.
  382. Writing to
  383. .B /net/arp
  384. administers the ARP information.
  385. The control messages are:
  386. .TF "del addr"
  387. .PD
  388. .TP
  389. .B flush
  390. Remove all entries.
  391. .TP
  392. .BI add\ "type IP-addr Media-addr"
  393. Add an entry or replace an existing one for the
  394. same IP address.
  395. .TP
  396. .BI del\ "IP-addr"
  397. Delete an individual entry.
  398. .PP
  399. ARP entries do not time out. The ARP table is a
  400. cache with an LRU replacement policy. The IP stack
  401. listens for all ARP requests and, if the requester is in
  402. the table, the entry is updated.
  403. Also, whenever a new address is configured onto an
  404. Ethernet, an ARP request is sent to help
  405. update the table on other systems.
  406. .PP
  407. Currently, the only medium type is
  408. .BR ether .
  409. .br
  410. .ne 3
  411. .
  412. .SS "Debugging and stack information
  413. If any process is holding
  414. .B /net/log
  415. open, the IP stack queues debugging information to it.
  416. This is intended primarily for debugging the IP stack.
  417. The information provided is implementation-defined;
  418. see the source for details. Generally, what is returned is error messages
  419. about bad packets.
  420. .PP
  421. Writing to
  422. .B /net/log
  423. controls debugging. The control messages are:
  424. .TF "clear addr"
  425. .PD
  426. .TP
  427. .BI set\ arglist
  428. .I Arglist
  429. is a space-separated list of items for which to enable debugging.
  430. The possible items are:
  431. .BR ppp ,
  432. .BR ip ,
  433. .BR fs ,
  434. .BR tcp ,
  435. .BR icmp ,
  436. .BR udp ,
  437. .BR compress ,
  438. .BR gre ,
  439. .BR tcpwin ,
  440. .BR tcprxmt ,
  441. .BR udpmsg ,
  442. .BR ipmsg ,
  443. and
  444. .BR esp .
  445. .TP
  446. .BI clear\ arglist
  447. .I Arglist
  448. is a space-separated list of items for which to disable debugging.
  449. .TP
  450. .BI only\ addr
  451. If
  452. .I addr
  453. is non-zero, restrict debugging to only those
  454. packets whose source or destination is that
  455. address.
  456. .PP
  457. The file
  458. .B /net/ndb
  459. can be read or written by
  460. programs. It is normally used by
  461. .IR ipconfig (8)
  462. to leave configuration information for other programs
  463. such as
  464. .B dns
  465. and
  466. .B cs
  467. (see
  468. .IR ndb (8)).
  469. .B /net/ndb
  470. may contain up to 1024 bytes.
  471. .PP
  472. The file
  473. .B /net/ipselftab
  474. is a read-only file containing all the IP addresses
  475. considered local. Each line in the file contains
  476. three white-space-separated fields: IP address, usage count,
  477. and flags. The usage count is the number of interfaces to which
  478. the address applies. The flags are the same as for routing
  479. entries.
  480. Note that the `IPv4 route' flag will never be set.
  481. .br
  482. .ne 3
  483. .
  484. .SS "Protocol directories
  485. The
  486. .I ip
  487. device
  488. supports IP as well as several protocols that run over it:
  489. TCP, UDP, RUDP, ICMP, GRE, and ESP.
  490. TCP and UDP provide the standard Internet
  491. protocols for reliable stream and unreliable datagram
  492. communication.
  493. RUDP is a locally-developed reliable datagram protocol based on UDP.
  494. ICMP is IP's catch-all control protocol used to send
  495. low level error messages and to implement
  496. .IR ping (8).
  497. GRE is a general encapsulation protocol.
  498. ESP is the encapsulation protocol for IPsec.
  499. IL provided a reliable datagram service for communication
  500. between Plan 9 machines over IPv4, but is no longer part of the system.
  501. .PP
  502. Each protocol is a subdirectory of the IP stack.
  503. The top level directory of each protocol contains a
  504. .B clone
  505. file, a
  506. .B stats
  507. file, and subdirectories numbered from zero to the number of connections
  508. opened for this protocol.
  509. .PP
  510. Opening the
  511. .B clone
  512. file reserves a connection. The file descriptor returned from the
  513. .IR open (2)
  514. will point to the control file,
  515. .BR ctl ,
  516. of the newly allocated connection.
  517. Reading
  518. .B ctl
  519. returns a text
  520. string representing the number of the
  521. connection.
  522. Connections may be used either to listen for incoming calls
  523. or to initiate calls to other machines.
  524. .PP
  525. A connection is controlled by writing text strings to the associated
  526. .B ctl
  527. file.
  528. After a connection has been established data may be read from
  529. and written to
  530. .BR data .
  531. A connection can be actively established using the
  532. .B connect
  533. message (see also
  534. .IR dial (2)).
  535. A connection can be established passively by first
  536. using an
  537. .B announce
  538. message (see
  539. .IR dial (2))
  540. to bind to a local port and then
  541. opening the
  542. .B listen
  543. file (see
  544. .IR dial (2))
  545. to receive incoming calls.
  546. .PP
  547. The following control messages are supported:
  548. .TF announceX
  549. .PD
  550. .TP
  551. .BI connect\ ip-address ! port "!r " local
  552. Establish a connection to the remote
  553. .I ip-address
  554. and
  555. .IR port .
  556. If
  557. .I local
  558. is specified, it is used as the local port number.
  559. If
  560. .I local
  561. is not specified but
  562. .B !r
  563. is, the system will allocate
  564. a restricted port number (less than 1024) for the connection to allow communication
  565. with Unix
  566. .B login
  567. and
  568. .B exec
  569. services.
  570. Otherwise a free port number starting at 5000 is chosen.
  571. The connect fails if the combination of local and remote address/port pairs
  572. are already assigned to another port.
  573. .TP
  574. .BI announce\ X
  575. .I X
  576. is a decimal port number or
  577. .LR * .
  578. Set the local port
  579. number to
  580. .I X
  581. and accept calls to
  582. .IR X .
  583. If
  584. .I X
  585. is
  586. .LR * ,
  587. accept
  588. calls for any port that no process has explicitly announced.
  589. The local IP address cannot be set.
  590. .B Announce
  591. fails if the connection is already announced or connected.
  592. .TP
  593. .BI bind\ X
  594. .I X
  595. is a decimal port number or
  596. .LR * .
  597. Set the local port number to
  598. .IR X .
  599. This exists to support emulation
  600. of BSD sockets by the APE libraries (see
  601. .IR pcc (1))
  602. and is not otherwise used.
  603. .TP
  604. .BI backlog\ n
  605. Set the maximum number of unanswered (queued) incoming
  606. connections to an announced port to
  607. .IR n .
  608. By default
  609. .I n
  610. is set to five. If more than
  611. .I n
  612. connections are pending,
  613. further requests for a service will be rejected.
  614. .TP
  615. .BI ttl\ n
  616. Set the time to live IP field in outgoing packets to
  617. .IR n .
  618. .TP
  619. .BI tos\ n
  620. Set the service type IP field in outgoing packets to
  621. .IR n .
  622. .PP
  623. Port numbers must be in the range 1 to 32767.
  624. .PP
  625. Several files report the status of a
  626. connection.
  627. The
  628. .B remote
  629. and
  630. .B local
  631. files contain the IP address and port number for the remote and local side of the
  632. connection. The
  633. .B status
  634. file contains protocol-dependent information to help debug network connections.
  635. On receiving and error or EOF reading or writing the
  636. .B data
  637. file, the
  638. .B err
  639. file contains the reason for error.
  640. .PP
  641. A process may accept incoming connections by
  642. .IR open (2)ing
  643. the
  644. .B listen
  645. file.
  646. The
  647. .B open
  648. will block until a new connection request arrives.
  649. Then
  650. .B open
  651. will return an open file descriptor which points to the control file of the
  652. newly accepted connection.
  653. This procedure will accept all calls for the
  654. given protocol.
  655. See
  656. .IR dial (2).
  657. .
  658. .SS TCP
  659. TCP connections are reliable point-to-point byte streams; there are no
  660. message delimiters.
  661. A connection is determined by the address and port numbers of the two
  662. ends.
  663. TCP
  664. .B ctl
  665. files support the following additional messages:
  666. .TF keepaliven
  667. .PD
  668. .TP
  669. .B hangup
  670. close down a TCP connection
  671. .TP
  672. .BI keepalive \ n
  673. turn on keep alive messages.
  674. .IR N ,
  675. if given, is the milliseconds between keepalives
  676. (default 30000).
  677. .
  678. .SS UDP
  679. UDP connections carry unreliable and unordered datagrams. A read from
  680. .B data
  681. will return the next datagram, discarding anything
  682. that doesn't fit in the read buffer.
  683. A write is sent as a single datagram.
  684. .PP
  685. By default, a UDP connection is a point-to-point link.
  686. Either a
  687. .B connect
  688. establishes a local and remote address/port pair or
  689. after an
  690. .BR announce ,
  691. each datagram coming from a different remote address/port pair
  692. establishes a new incoming connection.
  693. However, many-to-one semantics is also possible.
  694. .PP
  695. If, after an
  696. .BR announce ,
  697. the message
  698. .L headers
  699. is written to
  700. .BR ctl ,
  701. then all messages sent to the announced port
  702. are received on the announced connection prefixed
  703. with the corresponding structure,
  704. declared in
  705. .BR <ip.h> :
  706. .IP
  707. .EX
  708. typedef struct Udphdr Udphdr;
  709. struct Udphdr
  710. {
  711. uchar raddr[16]; /* V6 remote address and port */
  712. uchar laddr[16]; /* V6 local address and port */
  713. uchar ifcaddr[16]; /* V6 interface address (receive only) */
  714. uchar rport[2]; /* remote port */
  715. uchar lport[2]; /* local port */
  716. };
  717. .EE
  718. .PP
  719. Before a write, a user must prefix a similar structure to each message.
  720. The system overrides the user specified local port with the announced
  721. one. If the user specifies an address that isn't a unicast address in
  722. .BR /net/ipselftab ,
  723. that too is overridden.
  724. Since the prefixed structure is the same in read and write, it is relatively
  725. easy to write a server that responds to client requests by just copying new
  726. data into the message body and then writing back the same buffer that was
  727. read.
  728. .PP
  729. In this case (writing
  730. .L headers
  731. to the
  732. .I ctl
  733. file),
  734. no
  735. .I listen
  736. nor
  737. .I accept
  738. is needed;
  739. otherwise,
  740. the usual sequence of
  741. .IR announce ,
  742. .IR listen ,
  743. .I accept
  744. must be executed before performing I/O on the corresponding
  745. .I data
  746. file.
  747. .
  748. .SS RUDP
  749. RUDP is a reliable datagram protocol based on UDP,
  750. currently only for IPv4.
  751. Packets are delivered in order.
  752. RUDP does not support
  753. .BR listen .
  754. One must write either
  755. .L connect
  756. or
  757. .L announce
  758. followed immediately by
  759. .L headers
  760. to
  761. .BR ctl .
  762. .PP
  763. Unlike TCP, the reboot of one end of a connection does
  764. not force a closing of the connection. Communications will
  765. resume when the rebooted machine resumes talking. Any unacknowledged
  766. packets queued before the reboot will be lost. A reboot can
  767. be detected by reading the
  768. .B err
  769. file. It will contain the message
  770. .IP
  771. .BI hangup\ address ! port
  772. .PP
  773. where
  774. .I address
  775. and
  776. .I port
  777. are of the far side of the connection.
  778. Retransmitting a datagram more than 10 times
  779. is treated like a reboot:
  780. all queued messages are dropped, an error is queued to the
  781. .B err
  782. file, and the conversation resumes.
  783. .
  784. .SS ICMP
  785. ICMP is a datagram protocol for IPv4 used to exchange control requests and
  786. their responses with other machines' IP implementations.
  787. ICMP is primarily a kernel-to-kernel protocol, but it is possible
  788. to generate `echo request' and read `echo reply' packets from user programs.
  789. .
  790. .SS ICMPV6
  791. ICMPv6 is the IPv6 equivalent of ICMP.
  792. If, after an
  793. .BR announce ,
  794. the message
  795. .L headers
  796. is written to
  797. .BR ctl ,
  798. then before a write,
  799. a user must prefix each message with a corresponding structure,
  800. declared in
  801. .BR <ip.h> :
  802. .IP
  803. .EX
  804. /*
  805. * user level icmpv6 with control message "headers"
  806. */
  807. typedef struct Icmp6hdr Icmp6hdr;
  808. struct Icmp6hdr {
  809. uchar unused[8];
  810. uchar laddr[IPaddrlen]; /* local address */
  811. uchar raddr[IPaddrlen]; /* remote address */
  812. };
  813. .EE
  814. .PP
  815. In this case (writing
  816. .L headers
  817. to the
  818. .I ctl
  819. file),
  820. no
  821. .I listen
  822. nor
  823. .I accept
  824. is needed;
  825. otherwise,
  826. the usual sequence of
  827. .IR announce ,
  828. .IR listen ,
  829. .I accept
  830. must be executed before performing I/O on the corresponding
  831. .I data
  832. file.
  833. .
  834. .SS GRE
  835. GRE is the encapsulation protocol used by PPTP.
  836. The kernel implements just enough of the protocol
  837. to multiplex it.
  838. Our implementation encapsulates in IPv4, per RFC 1702.
  839. .B Announce
  840. is not allowed in GRE, only
  841. .BR connect .
  842. Since GRE has no port numbers, the port number in the connect
  843. is actually the 16 bit
  844. .B eproto
  845. field in the GRE header.
  846. .PP
  847. Reads and writes transfer a
  848. GRE datagram starting at the GRE header.
  849. On write, the kernel fills in the
  850. .B eproto
  851. field with the port number specified
  852. in the connect message.
  853. .br
  854. .ne 3
  855. .
  856. .SS ESP
  857. ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
  858. for IPsec (RFC 4301).
  859. We currently implement only tunnel mode, not transport mode.
  860. It is used to set up an encrypted tunnel between machines.
  861. Like GRE, ESP has no port numbers. Instead, the
  862. port number in the
  863. .B connect
  864. message is the SPI (Security Association Identifier (sic)).
  865. IP packets are written to and read from
  866. .BR data .
  867. The kernel encrypts any packets written to
  868. .BR data ,
  869. appends a MAC, and prefixes an ESP header before
  870. sending to the other end of the tunnel.
  871. Received packets are checked against their MAC's,
  872. decrypted, and queued for reading from
  873. .BR data .
  874. The control messages are:
  875. .TF "alg secret"
  876. .PD
  877. .TP
  878. .BI esp\ "alg secret
  879. Encrypt with the algorithm,
  880. .IR alg ,
  881. using
  882. .I secret
  883. as the key.
  884. Possible algorithms are:
  885. .BR null ,
  886. .BR des_56_cbc ,
  887. .BR des3_cbc ,
  888. and eventually
  889. .BR aes_128_cbc ,
  890. and
  891. .BR aes_ctr .
  892. .TP
  893. .BI ah\ "alg secret
  894. Use the hash algorithm,
  895. .IR alg ,
  896. with
  897. .I secret
  898. as the key for generating the MAC.
  899. Possible algorithms are:
  900. .BR null ,
  901. .BR hmac_sha1_96 ,
  902. .BR hmac_md5_96 ,
  903. and eventually
  904. .BR aes_xcbc_mac_96 .
  905. .TP
  906. .B header
  907. Turn on header mode. Every buffer read from
  908. .B data
  909. starts with 4 unused bytes, and the first 4 bytes
  910. of every buffer written to
  911. .B data
  912. are ignored.
  913. .TP
  914. .B noheader
  915. Turn off header mode.
  916. .
  917. .SS "IP packet filter
  918. The directory
  919. .B /net/ipmux
  920. looks like another protocol directory.
  921. It is a packet filter built on top of IP.
  922. Each numbered
  923. subdirectory represents a different filter.
  924. The connect messages written to the
  925. .I ctl
  926. file describe the filter. Packets matching the filter can be read on the
  927. .B data
  928. file. Packets written to the
  929. .B data
  930. file are routed to an interface and transmitted.
  931. .PP
  932. A filter is a semicolon-separated list of
  933. relations. Each relation describes a portion
  934. of a packet to match. The possible relations are:
  935. .TF "iph[n:m]=expr"
  936. .PD
  937. .TP
  938. .BI proto= n
  939. the IP protocol number must be
  940. .IR n .
  941. .TP
  942. .BI data[ n : m ]= expr
  943. bytes
  944. .I n
  945. through
  946. .I m
  947. following the IP packet must match
  948. .IR expr .
  949. .TP
  950. .BI iph[ n : m ]= expr
  951. bytes
  952. .I n
  953. through
  954. .I m
  955. of the IP packet header must match
  956. .IR expr .
  957. .TP
  958. .BI ifc= expr
  959. the packet must have been received on an interface whose address
  960. matches
  961. .IR expr .
  962. .TP
  963. .BI src= expr
  964. The source address in the packet must match
  965. .IR expr .
  966. .TP
  967. .BI dst= expr
  968. The destination address in the packet must match
  969. .IR expr .
  970. .PP
  971. .I Expr
  972. is of the form:
  973. .TP
  974. .I \ value
  975. .TP
  976. .IB \ value | value | ...
  977. .TP
  978. .IB \ value & mask
  979. .TP
  980. .IB \ value | value & mask
  981. .PP
  982. If a mask is given, the relevant field is first ANDed with
  983. the mask. The result is compared against the value or list
  984. of values for a match. In the case of
  985. .BR ifc ,
  986. .BR dst ,
  987. and
  988. .B src
  989. the value is a dot-formatted IP address and the mask is a dot-formatted
  990. IP mask. In the case of
  991. .BR data ,
  992. .B iph
  993. and
  994. .BR proto ,
  995. both value and mask are strings of 2 hexadecimal digits representing
  996. 8-bit values.
  997. .PP
  998. A packet is delivered to only one filter.
  999. The filters are merged into a single comparison tree.
  1000. If two filters match the same packet, the following
  1001. rules apply in order (here '>' means is preferred to):
  1002. .IP 1)
  1003. protocol > data > source > destination > interface
  1004. .IP 2)
  1005. lower data offsets > higher data offsets
  1006. .IP 3)
  1007. longer matches > shorter matches
  1008. .IP 4)
  1009. older > younger
  1010. .PP
  1011. So far this has just been used to implement a version of
  1012. OSPF in Inferno
  1013. and 6to4 tunnelling.
  1014. .br
  1015. .ne 5
  1016. .
  1017. .SS Statistics
  1018. The
  1019. .B stats
  1020. files are read only and contain statistics useful to network monitoring.
  1021. .br
  1022. .ne 12
  1023. .PP
  1024. Reading
  1025. .B /net/ipifc/stats
  1026. returns a list of 19 tagged and newline-separated fields representing:
  1027. .EX
  1028. .ft 1
  1029. .2C
  1030. .in +0.25i
  1031. forwarding status (0 and 2 mean forwarding off,
  1032. 1 means on)
  1033. default TTL
  1034. input packets
  1035. input header errors
  1036. input address errors
  1037. packets forwarded
  1038. input packets for unknown protocols
  1039. input packets discarded
  1040. input packets delivered to higher level protocols
  1041. output packets
  1042. output packets discarded
  1043. output packets with no route
  1044. timed out fragments in reassembly queue
  1045. requested reassemblies
  1046. successful reassemblies
  1047. failed reassemblies
  1048. successful fragmentations
  1049. unsuccessful fragmentations
  1050. fragments created
  1051. .in -0.25i
  1052. .1C
  1053. .ft
  1054. .EE
  1055. .br
  1056. .ne 16
  1057. .PP
  1058. Reading
  1059. .B /net/icmp/stats
  1060. returns a list of 26 tagged and newline-separated fields representing:
  1061. .EX
  1062. .ft 1
  1063. .2C
  1064. .in +0.25i
  1065. messages received
  1066. bad received messages
  1067. unreachables received
  1068. time exceededs received
  1069. input parameter problems received
  1070. source quenches received
  1071. redirects received
  1072. echo requests received
  1073. echo replies received
  1074. timestamps received
  1075. timestamp replies received
  1076. address mask requests received
  1077. address mask replies received
  1078. messages sent
  1079. transmission errors
  1080. unreachables sent
  1081. time exceededs sent
  1082. input parameter problems sent
  1083. source quenches sent
  1084. redirects sent
  1085. echo requests sent
  1086. echo replies sent
  1087. timestamps sent
  1088. timestamp replies sent
  1089. address mask requests sent
  1090. address mask replies sent
  1091. .in -0.25i
  1092. .1C
  1093. .EE
  1094. .PP
  1095. Reading
  1096. .B /net/tcp/stats
  1097. returns a list of 11 tagged and newline-separated fields representing:
  1098. .EX
  1099. .ft 1
  1100. .2C
  1101. .in +0.25i
  1102. maximum number of connections
  1103. total outgoing calls
  1104. total incoming calls
  1105. number of established connections to be reset
  1106. number of currently established connections
  1107. segments received
  1108. segments sent
  1109. segments retransmitted
  1110. retransmit timeouts
  1111. bad received segments
  1112. transmission failures
  1113. .in -0.25i
  1114. .1C
  1115. .EE
  1116. .PP
  1117. Reading
  1118. .B /net/udp/stats
  1119. returns a list of 4 tagged and newline-separated fields representing:
  1120. .EX
  1121. .ft 1
  1122. .2C
  1123. .in +0.25i
  1124. datagrams received
  1125. datagrams received for bad ports
  1126. malformed datagrams received
  1127. datagrams sent
  1128. .in -0.25i
  1129. .1C
  1130. .EE
  1131. .PP
  1132. Reading
  1133. .B /net/gre/stats
  1134. returns a list of 1 tagged number representing:
  1135. .EX
  1136. .ft 1
  1137. .in +0.25i
  1138. header length errors
  1139. .in -0.25i
  1140. .EE
  1141. .SH "SEE ALSO"
  1142. .IR dial (2),
  1143. .IR ip (2),
  1144. .IR ndb (6),
  1145. .IR listen (8)
  1146. .br
  1147. .PD 0
  1148. .TF /lib/rfc/rfc2822
  1149. .TP
  1150. .B /lib/rfc/rfc2460
  1151. IPv6
  1152. .TP
  1153. .B /lib/rfc/rfc4291
  1154. IPv6 address architecture
  1155. .TP
  1156. .B /lib/rfc/rfc4443
  1157. ICMPv6
  1158. .SH SOURCE
  1159. .B /sys/src/9/ip
  1160. .SH BUGS
  1161. .I Ipmux
  1162. has not been heavily used and should be considered experimental.
  1163. It may disappear in favor of a more traditional packet filter in the future.