ip 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221
  1. .TH IP 3
  2. .SH NAME
  3. ip, esp, gre, icmp, icmpv6, ipmux, rudp, tcp, udp \- network protocols over IP
  4. .SH SYNOPSIS
  5. .nf
  6. .2C
  7. .B bind -a #I\fIspec\fP /net
  8. .sp 0.3v
  9. .B /net/ipifc
  10. .B /net/ipifc/clone
  11. .B /net/ipifc/stats
  12. .BI /net/ipifc/ n
  13. .BI /net/ipifc/ n /status
  14. .BI /net/ipifc/ n /ctl
  15. \&...
  16. .sp 0.3v
  17. .B /net/arp
  18. .B /net/bootp
  19. .B /net/iproute
  20. .B /net/ipselftab
  21. .B /net/log
  22. .B /net/ndb
  23. .sp 0.3v
  24. .B /net/esp
  25. .B /net/gre
  26. .B /net/icmp
  27. .B /net/icmpv6
  28. .B /net/ipmux
  29. .B /net/rudp
  30. .B /net/tcp
  31. .B /net/udp
  32. .sp 0.3v
  33. .B /net/tcp/clone
  34. .B /net/tcp/stats
  35. .BI /net/tcp/ n
  36. .BI /net/tcp/ n /data
  37. .BI /net/tcp/ n /ctl
  38. .BI /net/tcp/ n /local
  39. .BI /net/tcp/ n /remote
  40. .BI /net/tcp/ n /status
  41. .BI /net/tcp/ n /listen
  42. \&...
  43. .1C
  44. .fi
  45. .SH DESCRIPTION
  46. The
  47. .I ip
  48. device provides the interface to Internet Protocol stacks.
  49. .I Spec
  50. is an integer from 0 to 15 identifying a stack.
  51. Each stack implements IPv4 and IPv6.
  52. Each stack is independent of all others:
  53. the only information transfer between them is via programs that
  54. mount multiple stacks.
  55. Normally a system uses only one stack.
  56. However multiple stacks can be used for debugging
  57. new IP networks or implementing firewalls or proxy
  58. services.
  59. .PP
  60. All addresses used are 16-byte IPv6 addresses.
  61. IPv4 addresses are a subset of the IPv6 addresses and both standard
  62. .SM ASCII
  63. formats are accepted.
  64. In binary representation, all v4 addresses start with the 12 bytes, in hex:
  65. .IP
  66. .EX
  67. 00 00 00 00 00 00 00 00 00 00 ff ff
  68. .EE
  69. .
  70. .SS "Configuring interfaces
  71. Each stack may have multiple interfaces and each interface
  72. may have multiple addresses.
  73. The
  74. .B /net/ipifc
  75. directory contains a
  76. .B clone
  77. file, a
  78. .B stats
  79. file, and numbered subdirectories for each physical interface.
  80. .PP
  81. Opening the
  82. .B clone
  83. file reserves an interface.
  84. The file descriptor returned from the
  85. .IR open (2)
  86. will point to the control file,
  87. .BR ctl ,
  88. of the newly allocated interface.
  89. Reading
  90. .B ctl
  91. returns a text string representing the number of the interface.
  92. Writing
  93. .B ctl
  94. alters aspects of the interface.
  95. The possible
  96. .I ctl
  97. messages are:
  98. .\" .TF "bind loopback"
  99. .TF "bind netdev"
  100. .PD
  101. .TP
  102. .BI "bind ether " path
  103. Treat the device mounted at
  104. .I path
  105. as an Ethernet medium carrying IP and ARP packets
  106. and associate it with this interface.
  107. The kernel will
  108. .IR dial (2)
  109. .IR path !0x800
  110. and
  111. .IR path !0x806
  112. and use the two connections for IPv4 and
  113. ARP respectively.
  114. .TP
  115. .B "bind pkt
  116. Treat this interface as a packet interface. Assume
  117. a user program will read and write the
  118. .I data
  119. file to receive and transmit IP packets to the kernel.
  120. This is used by programs such as
  121. .IR ppp (8)
  122. to mediate IP packet transfer between the kernel and
  123. a PPP encoded device.
  124. .TP
  125. .BI "bind netdev " path
  126. Treat this interface as a packet interface.
  127. The kernel will open
  128. .I path
  129. and read and write the resulting file descriptor
  130. to receive and transmit IP packets.
  131. .TP
  132. .BI "bind loopback "
  133. Treat this interface as a local loopback. Anything
  134. written to it will be looped back.
  135. .TP
  136. .B "unbind
  137. Disassociate the physical device from an IP interface.
  138. .TP
  139. .BI add\ "local mask remote mtu " proxy
  140. .PD 0
  141. .TP
  142. .BI try\ "local mask remote mtu " proxy
  143. .PD
  144. Add a local IP address to the interface.
  145. .I try
  146. adds the local address as a tentative address
  147. if it's an IPv6 address.
  148. The
  149. .IR mask ,
  150. .IR remote ,
  151. .IR mtu ,
  152. and
  153. .B proxy
  154. arguments are all optional. The default mask is
  155. the class mask for the local address. The default
  156. remote address is
  157. .I local
  158. ANDed with
  159. .IR mask .
  160. The default mtu is 1514 for Ethernet and 4096 for packet
  161. media.
  162. .IR Proxy ,
  163. if specified, means that this machine should answer
  164. ARP requests for the remote address.
  165. .IR Ppp (8)
  166. does this to make remote machines appear
  167. to be connected to the local Ethernet.
  168. .TP
  169. .BI remove\ "local mask"
  170. Remove a local IP address from an interface.
  171. .TP
  172. .BI addmulti\ Media-addr
  173. Treat the multicast
  174. .I Media-addr
  175. on this interface as a local address.
  176. .TP
  177. .BI remmulti\ Media-addr
  178. Remove the multicast address
  179. .I Media-addr
  180. from this interface.
  181. .TP
  182. .BI mtu\ n
  183. Set the maximum transfer unit for this device to
  184. .IR n .
  185. The mtu is the maximum size of the packet including any
  186. medium-specific headers.
  187. .TP
  188. .BI reassemble
  189. Reassemble IP fragments before forwarding to this interface
  190. .TP
  191. .BI iprouting\ n
  192. Allow
  193. .RI ( n
  194. is missing or non-zero) or disallow
  195. .RI ( n
  196. is 0) forwarding packets between this interface and
  197. others.
  198. .TP
  199. .B bridge
  200. Enable bridging (see
  201. .IR bridge (3)).
  202. .TP
  203. .B promiscuous
  204. Set the interface into promiscuous mode,
  205. which makes it accept all incoming packets,
  206. whether addressed to it or not.
  207. .TP
  208. .BI "connect " type
  209. marks the Ethernet packet
  210. .I type
  211. as being in use, if not already in use
  212. on this interface.
  213. A
  214. .I type
  215. of -1 means `all' but appears to be a no-op.
  216. .TP
  217. .B scanbs
  218. Make the wireless interface scan for base stations.
  219. .TP
  220. .B headersonly
  221. Set the interface to pass only packet headers, not data too.
  222. .TP
  223. .BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
  224. Add the local IPv6 address
  225. .I v6addr
  226. with prefix length
  227. .I pfx-len
  228. to this interface.
  229. See RFC 2461 §6.2.1 for more detail.
  230. The remaining arguments are optional:
  231. .RS
  232. .TF onlink
  233. .TP
  234. .I onlink
  235. flag: address is `on-link'
  236. .TP
  237. .I auto
  238. flag: autonomous
  239. .TP
  240. .I validlt
  241. valid life-time in seconds
  242. .TP
  243. .I preflt
  244. preferred life-time in seconds
  245. .RE
  246. .PD
  247. .TP
  248. .BI "ra6 " "keyword value ..."
  249. Set IPv6 router advertisement (RA) parameter
  250. .IR keyword 's
  251. .IR value .
  252. Known
  253. .IR keyword s
  254. and the meanings of their values follow.
  255. See RFC 2461 §6.2.1 for more detail.
  256. Flags are true iff non-zero.
  257. .RS
  258. .TF minraint
  259. .TP
  260. .B recvra
  261. flag: receive and process RAs.
  262. .TP
  263. .B sendra
  264. flag: generate and send RAs.
  265. .TP
  266. .B mflag
  267. flag: ``Managed address configuration'',
  268. goes into RAs.
  269. .TP
  270. .B oflag
  271. flag: ``Other stateful configuration'',
  272. goes into RAs.
  273. .TP
  274. .B maxraint
  275. ``maximum time allowed between sending unsolicited multicast''
  276. RAs from the interface, in ms.
  277. .TP
  278. .B minraint
  279. ``minimum time allowed between sending unsolicited multicast''
  280. RAs from the interface, in ms.
  281. .TP
  282. .B linkmtu
  283. ``value to be placed in MTU options sent by the router.''
  284. Zero indicates none.
  285. .TP
  286. .B reachtime
  287. sets the Reachable Time field in RAs sent by the router.
  288. ``Zero means unspecified (by this router).''
  289. .TP
  290. .B rxmitra
  291. sets the Retrans Timer field in RAs sent by the router.
  292. ``Zero means unspecified (by this router).''
  293. .TP
  294. .B ttl
  295. default value of the Cur Hop Limit field in RAs sent by the router.
  296. Should be set to the ``current diameter of the Internet.''
  297. ``Zero means unspecified (by this router).''
  298. .TP
  299. .B routerlt
  300. sets the Router Lifetime field of RAs sent from the interface, in ms.
  301. Zero means the router is not to be used as a default router.
  302. .PD
  303. .RE
  304. .PP
  305. Reading the interface's
  306. .I status
  307. file returns information about the interface, one line for each
  308. local address on that interface. The first line
  309. has 9 white-space-separated fields: device, mtu, local address,
  310. mask, remote or network address, packets in, packets out, input errors,
  311. output errors. Each subsequent line contains all but the device and mtu.
  312. See
  313. .I readipifc
  314. in
  315. .IR ip (2).
  316. .
  317. .SS "Routing
  318. The file
  319. .I iproute
  320. controls information about IP routing.
  321. When read, it returns one line per routing entry.
  322. Each line contains six white-space-separated fields:
  323. target address, target mask, address of next hop, flags,
  324. tag, and interface number.
  325. The entry used for routing an IP packet is the one with
  326. the longest mask for which destination address ANDed with
  327. target mask equals the target address.
  328. The one-character flags are:
  329. .TF m
  330. .TP
  331. .B 4
  332. IPv4 route
  333. .TP
  334. .B 6
  335. IPv6 route
  336. .TP
  337. .B i
  338. local interface
  339. .TP
  340. .B b
  341. broadcast address
  342. .TP
  343. .B u
  344. local unicast address
  345. .TP
  346. .B m
  347. multicast route
  348. .TP
  349. .B p
  350. point-to-point route
  351. .PD
  352. .PP
  353. The tag is an arbitrary, up to 4 character, string. It is normally used to
  354. indicate what routing protocol originated the route.
  355. .PP
  356. Writing to
  357. .B /net/iproute
  358. changes the route table. The messages are:
  359. .TF "tag str"
  360. .PD
  361. .TP
  362. .B flush
  363. Remove all routes.
  364. .TP
  365. .BI tag\ string
  366. Associate the tag,
  367. .IR string ,
  368. with all subsequent routes added via this file descriptor.
  369. .TP
  370. .BI add\ "target mask nexthop"
  371. Add the route to the table. If one already exists with the
  372. same target and mask, replace it.
  373. .TP
  374. .BI remove\ "target mask"
  375. Remove a route with a matching target and mask.
  376. .
  377. .SS "Address resolution
  378. The file
  379. .B /net/arp
  380. controls information about address resolution.
  381. The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
  382. information for Ethernet interfaces.
  383. When read, the file returns one line per address containing the
  384. type of medium, the status of the entry (OK, WAIT), the IP
  385. address, and the medium address.
  386. Writing to
  387. .B /net/arp
  388. administers the ARP information.
  389. The control messages are:
  390. .TF "del addr"
  391. .PD
  392. .TP
  393. .B flush
  394. Remove all entries.
  395. .TP
  396. .BI add\ "type IP-addr Media-addr"
  397. Add an entry or replace an existing one for the
  398. same IP address.
  399. .TP
  400. .BI del\ "IP-addr"
  401. Delete an individual entry.
  402. .PP
  403. ARP entries do not time out. The ARP table is a
  404. cache with an LRU replacement policy. The IP stack
  405. listens for all ARP requests and, if the requester is in
  406. the table, the entry is updated.
  407. Also, whenever a new address is configured onto an
  408. Ethernet, an ARP request is sent to help
  409. update the table on other systems.
  410. .PP
  411. Currently, the only medium type is
  412. .BR ether .
  413. .br
  414. .ne 3
  415. .
  416. .SS "Debugging and stack information
  417. If any process is holding
  418. .B /net/log
  419. open, the IP stack queues debugging information to it.
  420. This is intended primarily for debugging the IP stack.
  421. The information provided is implementation-defined;
  422. see the source for details. Generally, what is returned is error messages
  423. about bad packets.
  424. .PP
  425. Writing to
  426. .B /net/log
  427. controls debugging. The control messages are:
  428. .TF "clear addr"
  429. .PD
  430. .TP
  431. .BI set\ arglist
  432. .I Arglist
  433. is a space-separated list of items for which to enable debugging.
  434. The possible items are:
  435. .BR ppp ,
  436. .BR ip ,
  437. .BR fs ,
  438. .BR tcp ,
  439. .BR icmp ,
  440. .BR udp ,
  441. .BR compress ,
  442. .BR gre ,
  443. .BR tcpwin ,
  444. .BR tcprxmt ,
  445. .BR udpmsg ,
  446. .BR ipmsg ,
  447. and
  448. .BR esp .
  449. .TP
  450. .BI clear\ arglist
  451. .I Arglist
  452. is a space-separated list of items for which to disable debugging.
  453. .TP
  454. .BI only\ addr
  455. If
  456. .I addr
  457. is non-zero, restrict debugging to only those
  458. packets whose source or destination is that
  459. address.
  460. .PP
  461. The file
  462. .B /net/ndb
  463. can be read or written by
  464. programs. It is normally used by
  465. .IR ipconfig (8)
  466. to leave configuration information for other programs
  467. such as
  468. .B dns
  469. and
  470. .B cs
  471. (see
  472. .IR ndb (8)).
  473. .B /net/ndb
  474. may contain up to 1024 bytes.
  475. .PP
  476. The file
  477. .B /net/ipselftab
  478. is a read-only file containing all the IP addresses
  479. considered local. Each line in the file contains
  480. three white-space-separated fields: IP address, usage count,
  481. and flags. The usage count is the number of interfaces to which
  482. the address applies. The flags are the same as for routing
  483. entries.
  484. Note that the `IPv4 route' flag will never be set.
  485. .br
  486. .ne 3
  487. .
  488. .SS "Protocol directories
  489. The
  490. .I ip
  491. device
  492. supports IP as well as several protocols that run over it:
  493. TCP, UDP, RUDP, ICMP, GRE, and ESP.
  494. TCP and UDP provide the standard Internet
  495. protocols for reliable stream and unreliable datagram
  496. communication.
  497. RUDP is a locally-developed reliable datagram protocol based on UDP.
  498. ICMP is IP's catch-all control protocol used to send
  499. low level error messages and to implement
  500. .IR ping (8).
  501. GRE is a general encapsulation protocol.
  502. ESP is the encapsulation protocol for IPsec.
  503. IL provided a reliable datagram service for communication
  504. between Plan 9 machines over IPv4, but is no longer part of the system.
  505. .PP
  506. Each protocol is a subdirectory of the IP stack.
  507. The top level directory of each protocol contains a
  508. .B clone
  509. file, a
  510. .B stats
  511. file, and subdirectories numbered from zero to the number of connections
  512. opened for this protocol.
  513. .PP
  514. Opening the
  515. .B clone
  516. file reserves a connection. The file descriptor returned from the
  517. .IR open (2)
  518. will point to the control file,
  519. .BR ctl ,
  520. of the newly allocated connection.
  521. Reading
  522. .B ctl
  523. returns a text
  524. string representing the number of the
  525. connection.
  526. Connections may be used either to listen for incoming calls
  527. or to initiate calls to other machines.
  528. .PP
  529. A connection is controlled by writing text strings to the associated
  530. .B ctl
  531. file.
  532. After a connection has been established data may be read from
  533. and written to
  534. .BR data .
  535. A connection can be actively established using the
  536. .B connect
  537. message (see also
  538. .IR dial (2)).
  539. A connection can be established passively by first
  540. using an
  541. .B announce
  542. message (see
  543. .IR dial (2))
  544. to bind to a local port and then
  545. opening the
  546. .B listen
  547. file (see
  548. .IR dial (2))
  549. to receive incoming calls.
  550. .PP
  551. The following control messages are supported:
  552. .TF announceX
  553. .PD
  554. .TP
  555. .BI connect\ ip-address ! port "!r " local
  556. Establish a connection to the remote
  557. .I ip-address
  558. and
  559. .IR port .
  560. If
  561. .I local
  562. is specified, it is used as the local port number.
  563. If
  564. .I local
  565. is not specified but
  566. .B !r
  567. is, the system will allocate
  568. a restricted port number (less than 1024) for the connection to allow communication
  569. with Unix
  570. .B login
  571. and
  572. .B exec
  573. services.
  574. Otherwise a free port number starting at 5000 is chosen.
  575. The connect fails if the combination of local and remote address/port pairs
  576. are already assigned to another port.
  577. .TP
  578. .BI announce\ X
  579. .I X
  580. is a decimal port number or
  581. .LR * .
  582. Set the local port
  583. number to
  584. .I X
  585. and accept calls to
  586. .IR X .
  587. If
  588. .I X
  589. is
  590. .LR * ,
  591. accept
  592. calls for any port that no process has explicitly announced.
  593. The local IP address cannot be set.
  594. .B Announce
  595. fails if the connection is already announced or connected.
  596. .TP
  597. .BI bind\ X
  598. .I X
  599. is a decimal port number or
  600. .LR * .
  601. Set the local port number to
  602. .IR X .
  603. This exists to support emulation
  604. of BSD sockets by the APE libraries (see
  605. .IR pcc (1))
  606. and is not otherwise used.
  607. .TP
  608. .BI backlog\ n
  609. Set the maximum number of unanswered (queued) incoming
  610. connections to an announced port to
  611. .IR n .
  612. By default
  613. .I n
  614. is set to five. If more than
  615. .I n
  616. connections are pending,
  617. further requests for a service will be rejected.
  618. .TP
  619. .BI ttl\ n
  620. Set the time to live IP field in outgoing packets to
  621. .IR n .
  622. .TP
  623. .BI tos\ n
  624. Set the service type IP field in outgoing packets to
  625. .IR n .
  626. .PP
  627. Port numbers must be in the range 1 to 32767.
  628. .PP
  629. Several files report the status of a
  630. connection.
  631. The
  632. .B remote
  633. and
  634. .B local
  635. files contain the IP address and port number for the remote and local side of the
  636. connection. The
  637. .B status
  638. file contains protocol-dependent information to help debug network connections.
  639. On receiving and error or EOF reading or writing the
  640. .B data
  641. file, the
  642. .B err
  643. file contains the reason for error.
  644. .PP
  645. A process may accept incoming connections by
  646. .IR open (2)ing
  647. the
  648. .B listen
  649. file.
  650. The
  651. .B open
  652. will block until a new connection request arrives.
  653. Then
  654. .B open
  655. will return an open file descriptor which points to the control file of the
  656. newly accepted connection.
  657. This procedure will accept all calls for the
  658. given protocol.
  659. See
  660. .IR dial (2).
  661. .
  662. .SS TCP
  663. TCP connections are reliable point-to-point byte streams; there are no
  664. message delimiters.
  665. A connection is determined by the address and port numbers of the two
  666. ends.
  667. TCP
  668. .B ctl
  669. files support the following additional messages:
  670. .TF checksumnn
  671. .PD
  672. .TP
  673. .B hangup
  674. close down this TCP connection
  675. .TP
  676. .BI keepalive \ n
  677. turn on keep alive messages.
  678. .IR N ,
  679. if given, is the milliseconds between keepalives
  680. (default 30000).
  681. .TP
  682. .BI checksum \ n
  683. emit TCP checksums of zero if
  684. .I n
  685. is zero; otherwise, and by default,
  686. TCP checksums are computed and sent normally.
  687. .TP
  688. .BI tcpporthogdefense \ onoff
  689. .I onoff
  690. of
  691. .L on
  692. enables the TCP port-hog defense for all TCP connections;
  693. .I onoff
  694. of
  695. .L off
  696. disables it.
  697. The defense is a solution to hijacked systems staking out ports
  698. as a form of denial-of-service attack.
  699. To avoid stateless TCP conversation hogs,
  700. .I ip
  701. picks a TCP sequence number at random for keepalives.
  702. If that number gets acked by the other end,
  703. .I ip
  704. shuts down the connection.
  705. Some firewalls,
  706. notably ones that perform stateful inspection,
  707. discard such out-of-specification keepalives,
  708. so connections through such firewalls
  709. will be killed after five minutes
  710. by the lack of keepalives.
  711. .
  712. .SS UDP
  713. UDP connections carry unreliable and unordered datagrams. A read from
  714. .B data
  715. will return the next datagram, discarding anything
  716. that doesn't fit in the read buffer.
  717. A write is sent as a single datagram.
  718. .PP
  719. By default, a UDP connection is a point-to-point link.
  720. Either a
  721. .B connect
  722. establishes a local and remote address/port pair or
  723. after an
  724. .BR announce ,
  725. each datagram coming from a different remote address/port pair
  726. establishes a new incoming connection.
  727. However, many-to-one semantics is also possible.
  728. .PP
  729. If, after an
  730. .BR announce ,
  731. the message
  732. .L headers
  733. is written to
  734. .BR ctl ,
  735. then all messages sent to the announced port
  736. are received on the announced connection prefixed
  737. with the corresponding structure,
  738. declared in
  739. .BR <ip.h> :
  740. .IP
  741. .EX
  742. typedef struct Udphdr Udphdr;
  743. struct Udphdr
  744. {
  745. uchar raddr[16]; /* V6 remote address and port */
  746. uchar laddr[16]; /* V6 local address and port */
  747. uchar ifcaddr[16]; /* V6 interface address (receive only) */
  748. uchar rport[2]; /* remote port */
  749. uchar lport[2]; /* local port */
  750. };
  751. .EE
  752. .PP
  753. Before a write, a user must prefix a similar structure to each message.
  754. The system overrides the user specified local port with the announced
  755. one. If the user specifies an address that isn't a unicast address in
  756. .BR /net/ipselftab ,
  757. that too is overridden.
  758. Since the prefixed structure is the same in read and write, it is relatively
  759. easy to write a server that responds to client requests by just copying new
  760. data into the message body and then writing back the same buffer that was
  761. read.
  762. .PP
  763. In this case (writing
  764. .L headers
  765. to the
  766. .I ctl
  767. file),
  768. no
  769. .I listen
  770. nor
  771. .I accept
  772. is needed;
  773. otherwise,
  774. the usual sequence of
  775. .IR announce ,
  776. .IR listen ,
  777. .I accept
  778. must be executed before performing I/O on the corresponding
  779. .I data
  780. file.
  781. .
  782. .SS RUDP
  783. RUDP is a reliable datagram protocol based on UDP,
  784. currently only for IPv4.
  785. Packets are delivered in order.
  786. RUDP does not support
  787. .BR listen .
  788. One must write either
  789. .L connect
  790. or
  791. .L announce
  792. followed immediately by
  793. .L headers
  794. to
  795. .BR ctl .
  796. .PP
  797. Unlike TCP, the reboot of one end of a connection does
  798. not force a closing of the connection. Communications will
  799. resume when the rebooted machine resumes talking. Any unacknowledged
  800. packets queued before the reboot will be lost. A reboot can
  801. be detected by reading the
  802. .B err
  803. file. It will contain the message
  804. .IP
  805. .BI hangup\ address ! port
  806. .PP
  807. where
  808. .I address
  809. and
  810. .I port
  811. are of the far side of the connection.
  812. Retransmitting a datagram more than 10 times
  813. is treated like a reboot:
  814. all queued messages are dropped, an error is queued to the
  815. .B err
  816. file, and the conversation resumes.
  817. .PP
  818. RUDP
  819. .I ctl
  820. files accept the following messages:
  821. .TF "randdrop percent"
  822. .TP
  823. .B headers
  824. Corresponds to the
  825. .L headers
  826. format of UDP.
  827. .TP
  828. .BI "hangup " "IP port"
  829. Drop the connection to address
  830. .I IP
  831. and
  832. .IR port .
  833. .TP
  834. .BI "randdrop " "[ percent ]"
  835. Randomly drop
  836. .I percent
  837. of outgoing packets.
  838. Default is 10%.
  839. .
  840. .SS ICMP
  841. ICMP is a datagram protocol for IPv4 used to exchange control requests and
  842. their responses with other machines' IP implementations.
  843. ICMP is primarily a kernel-to-kernel protocol, but it is possible
  844. to generate `echo request' and read `echo reply' packets from user programs.
  845. .
  846. .SS ICMPV6
  847. ICMPv6 is the IPv6 equivalent of ICMP.
  848. If, after an
  849. .BR announce ,
  850. the message
  851. .L headers
  852. is written to
  853. .BR ctl ,
  854. then before a write,
  855. a user must prefix each message with a corresponding structure,
  856. declared in
  857. .BR <ip.h> :
  858. .IP
  859. .EX
  860. /*
  861. * user level icmpv6 with control message "headers"
  862. */
  863. typedef struct Icmp6hdr Icmp6hdr;
  864. struct Icmp6hdr {
  865. uchar unused[8];
  866. uchar laddr[IPaddrlen]; /* local address */
  867. uchar raddr[IPaddrlen]; /* remote address */
  868. };
  869. .EE
  870. .PP
  871. In this case (writing
  872. .L headers
  873. to the
  874. .I ctl
  875. file),
  876. no
  877. .I listen
  878. nor
  879. .I accept
  880. is needed;
  881. otherwise,
  882. the usual sequence of
  883. .IR announce ,
  884. .IR listen ,
  885. .I accept
  886. must be executed before performing I/O on the corresponding
  887. .I data
  888. file.
  889. .
  890. .SS GRE
  891. GRE is the encapsulation protocol used by PPTP.
  892. The kernel implements just enough of the protocol
  893. to multiplex it.
  894. Our implementation encapsulates in IPv4, per RFC 1702.
  895. .B Announce
  896. is not allowed in GRE, only
  897. .BR connect .
  898. Since GRE has no port numbers, the port number in the connect
  899. is actually the 16 bit
  900. .B eproto
  901. field in the GRE header.
  902. .PP
  903. Reads and writes transfer a
  904. GRE datagram starting at the GRE header.
  905. On write, the kernel fills in the
  906. .B eproto
  907. field with the port number specified
  908. in the connect message.
  909. .br
  910. .ne 3
  911. .
  912. .SS ESP
  913. ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
  914. for IPsec (RFC 4301).
  915. We currently implement only tunnel mode, not transport mode.
  916. It is used to set up an encrypted tunnel between machines.
  917. Like GRE, ESP has no port numbers. Instead, the
  918. port number in the
  919. .B connect
  920. message is the SPI (Security Association Identifier (sic)).
  921. IP packets are written to and read from
  922. .BR data .
  923. The kernel encrypts any packets written to
  924. .BR data ,
  925. appends a MAC, and prefixes an ESP header before
  926. sending to the other end of the tunnel.
  927. Received packets are checked against their MAC's,
  928. decrypted, and queued for reading from
  929. .BR data .
  930. The control messages are:
  931. .TF "alg secret"
  932. .PD
  933. .TP
  934. .BI esp\ "alg secret
  935. Encrypt with the algorithm,
  936. .IR alg ,
  937. using
  938. .I secret
  939. as the key.
  940. Possible algorithms are:
  941. .BR null ,
  942. .BR des_56_cbc ,
  943. .BR des3_cbc ,
  944. and eventually
  945. .BR aes_128_cbc ,
  946. and
  947. .BR aes_ctr .
  948. .TP
  949. .BI ah\ "alg secret
  950. Use the hash algorithm,
  951. .IR alg ,
  952. with
  953. .I secret
  954. as the key for generating the MAC.
  955. Possible algorithms are:
  956. .BR null ,
  957. .BR hmac_sha1_96 ,
  958. .BR hmac_md5_96 ,
  959. and eventually
  960. .BR aes_xcbc_mac_96 .
  961. .TP
  962. .B header
  963. Turn on header mode. Every buffer read from
  964. .B data
  965. starts with 4 unused bytes, and the first 4 bytes
  966. of every buffer written to
  967. .B data
  968. are ignored.
  969. .TP
  970. .B noheader
  971. Turn off header mode.
  972. .
  973. .SS "IP packet filter
  974. The directory
  975. .B /net/ipmux
  976. looks like another protocol directory.
  977. It is a packet filter built on top of IP.
  978. Each numbered
  979. subdirectory represents a different filter.
  980. The connect messages written to the
  981. .I ctl
  982. file describe the filter. Packets matching the filter can be read on the
  983. .B data
  984. file. Packets written to the
  985. .B data
  986. file are routed to an interface and transmitted.
  987. .PP
  988. A filter is a semicolon-separated list of
  989. relations. Each relation describes a portion
  990. of a packet to match. The possible relations are:
  991. .TF "iph[n:m]=expr"
  992. .PD
  993. .TP
  994. .BI proto= n
  995. the IP protocol number must be
  996. .IR n .
  997. .TP
  998. .BI data[ n : m ]= expr
  999. bytes
  1000. .I n
  1001. through
  1002. .I m
  1003. following the IP packet must match
  1004. .IR expr .
  1005. .TP
  1006. .BI iph[ n : m ]= expr
  1007. bytes
  1008. .I n
  1009. through
  1010. .I m
  1011. of the IP packet header must match
  1012. .IR expr .
  1013. .TP
  1014. .BI ifc= expr
  1015. the packet must have been received on an interface whose address
  1016. matches
  1017. .IR expr .
  1018. .TP
  1019. .BI src= expr
  1020. The source address in the packet must match
  1021. .IR expr .
  1022. .TP
  1023. .BI dst= expr
  1024. The destination address in the packet must match
  1025. .IR expr .
  1026. .PP
  1027. .I Expr
  1028. is of the form:
  1029. .TP
  1030. .I \ value
  1031. .TP
  1032. .IB \ value | value | ...
  1033. .TP
  1034. .IB \ value & mask
  1035. .TP
  1036. .IB \ value | value & mask
  1037. .PP
  1038. If a mask is given, the relevant field is first ANDed with
  1039. the mask. The result is compared against the value or list
  1040. of values for a match. In the case of
  1041. .BR ifc ,
  1042. .BR dst ,
  1043. and
  1044. .B src
  1045. the value is a dot-formatted IP address and the mask is a dot-formatted
  1046. IP mask. In the case of
  1047. .BR data ,
  1048. .B iph
  1049. and
  1050. .BR proto ,
  1051. both value and mask are strings of 2 hexadecimal digits representing
  1052. 8-bit values.
  1053. .PP
  1054. A packet is delivered to only one filter.
  1055. The filters are merged into a single comparison tree.
  1056. If two filters match the same packet, the following
  1057. rules apply in order (here '>' means is preferred to):
  1058. .IP 1)
  1059. protocol > data > source > destination > interface
  1060. .IP 2)
  1061. lower data offsets > higher data offsets
  1062. .IP 3)
  1063. longer matches > shorter matches
  1064. .IP 4)
  1065. older > younger
  1066. .PP
  1067. So far this has just been used to implement a version of
  1068. OSPF in Inferno
  1069. and 6to4 tunnelling.
  1070. .br
  1071. .ne 5
  1072. .
  1073. .SS Statistics
  1074. The
  1075. .B stats
  1076. files are read only and contain statistics useful to network monitoring.
  1077. .br
  1078. .ne 12
  1079. .PP
  1080. Reading
  1081. .B /net/ipifc/stats
  1082. returns a list of 19 tagged and newline-separated fields representing:
  1083. .EX
  1084. .ft 1
  1085. .2C
  1086. .in +0.25i
  1087. forwarding status (0 and 2 mean forwarding off,
  1088. 1 means on)
  1089. default TTL
  1090. input packets
  1091. input header errors
  1092. input address errors
  1093. packets forwarded
  1094. input packets for unknown protocols
  1095. input packets discarded
  1096. input packets delivered to higher level protocols
  1097. output packets
  1098. output packets discarded
  1099. output packets with no route
  1100. timed out fragments in reassembly queue
  1101. requested reassemblies
  1102. successful reassemblies
  1103. failed reassemblies
  1104. successful fragmentations
  1105. unsuccessful fragmentations
  1106. fragments created
  1107. .in -0.25i
  1108. .1C
  1109. .ft
  1110. .EE
  1111. .br
  1112. .ne 16
  1113. .PP
  1114. Reading
  1115. .B /net/icmp/stats
  1116. returns a list of 26 tagged and newline-separated fields representing:
  1117. .EX
  1118. .ft 1
  1119. .2C
  1120. .in +0.25i
  1121. messages received
  1122. bad received messages
  1123. unreachables received
  1124. time exceededs received
  1125. input parameter problems received
  1126. source quenches received
  1127. redirects received
  1128. echo requests received
  1129. echo replies received
  1130. timestamps received
  1131. timestamp replies received
  1132. address mask requests received
  1133. address mask replies received
  1134. messages sent
  1135. transmission errors
  1136. unreachables sent
  1137. time exceededs sent
  1138. input parameter problems sent
  1139. source quenches sent
  1140. redirects sent
  1141. echo requests sent
  1142. echo replies sent
  1143. timestamps sent
  1144. timestamp replies sent
  1145. address mask requests sent
  1146. address mask replies sent
  1147. .in -0.25i
  1148. .1C
  1149. .EE
  1150. .PP
  1151. Reading
  1152. .B /net/tcp/stats
  1153. returns a list of 11 tagged and newline-separated fields representing:
  1154. .EX
  1155. .ft 1
  1156. .2C
  1157. .in +0.25i
  1158. maximum number of connections
  1159. total outgoing calls
  1160. total incoming calls
  1161. number of established connections to be reset
  1162. number of currently established connections
  1163. segments received
  1164. segments sent
  1165. segments retransmitted
  1166. retransmit timeouts
  1167. bad received segments
  1168. transmission failures
  1169. .in -0.25i
  1170. .1C
  1171. .EE
  1172. .PP
  1173. Reading
  1174. .B /net/udp/stats
  1175. returns a list of 4 tagged and newline-separated fields representing:
  1176. .EX
  1177. .ft 1
  1178. .2C
  1179. .in +0.25i
  1180. datagrams received
  1181. datagrams received for bad ports
  1182. malformed datagrams received
  1183. datagrams sent
  1184. .in -0.25i
  1185. .1C
  1186. .EE
  1187. .PP
  1188. Reading
  1189. .B /net/gre/stats
  1190. returns a list of 1 tagged number representing:
  1191. .EX
  1192. .ft 1
  1193. .in +0.25i
  1194. header length errors
  1195. .in -0.25i
  1196. .EE
  1197. .SH "SEE ALSO"
  1198. .IR dial (2),
  1199. .IR ip (2),
  1200. .IR bridge (3),
  1201. .\" .IR ike (4),
  1202. .IR ndb (6),
  1203. .IR listen (8)
  1204. .br
  1205. .PD 0
  1206. .TF /lib/rfc/rfc2822
  1207. .TP
  1208. .B /lib/rfc/rfc2460
  1209. IPv6
  1210. .TP
  1211. .B /lib/rfc/rfc4291
  1212. IPv6 address architecture
  1213. .TP
  1214. .B /lib/rfc/rfc4443
  1215. ICMPv6
  1216. .SH SOURCE
  1217. .B /sys/src/9/ip
  1218. .SH BUGS
  1219. .I Ipmux
  1220. has not been heavily used and should be considered experimental.
  1221. It may disappear in favor of a more traditional packet filter in the future.