ip 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269
  1. .TH IP 3
  2. .SH NAME
  3. ip, esp, gre, icmp, icmpv6, ipmux, rudp, tcp, udp \- network protocols over IP
  4. .SH SYNOPSIS
  5. .nf
  6. .2C
  7. .B bind -a #I\fIspec\fP /net
  8. .sp 0.3v
  9. .B /net/ipifc
  10. .B /net/ipifc/clone
  11. .B /net/ipifc/stats
  12. .BI /net/ipifc/ n
  13. .BI /net/ipifc/ n /status
  14. .BI /net/ipifc/ n /ctl
  15. \&...
  16. .sp 0.3v
  17. .B /net/arp
  18. .B /net/bootp
  19. .B /net/iproute
  20. .B /net/ipselftab
  21. .B /net/log
  22. .B /net/ndb
  23. .sp 0.3v
  24. .B /net/esp
  25. .B /net/gre
  26. .B /net/icmp
  27. .B /net/icmpv6
  28. .B /net/ipmux
  29. .B /net/rudp
  30. .B /net/tcp
  31. .B /net/udp
  32. .sp 0.3v
  33. .B /net/tcp/clone
  34. .B /net/tcp/stats
  35. .BI /net/tcp/ n
  36. .BI /net/tcp/ n /data
  37. .BI /net/tcp/ n /ctl
  38. .BI /net/tcp/ n /local
  39. .BI /net/tcp/ n /remote
  40. .BI /net/tcp/ n /status
  41. .BI /net/tcp/ n /listen
  42. \&...
  43. .1C
  44. .fi
  45. .SH DESCRIPTION
  46. The
  47. .I ip
  48. device provides the interface to Internet Protocol stacks.
  49. .I Spec
  50. is an integer from 0 to 15 identifying a stack.
  51. Each stack implements IPv4 and IPv6.
  52. Each stack is independent of all others:
  53. the only information transfer between them is via programs that
  54. mount multiple stacks.
  55. Normally a system uses only one stack.
  56. However multiple stacks can be used for debugging
  57. new IP networks or implementing firewalls or proxy
  58. services.
  59. .PP
  60. All addresses used are 16-byte IPv6 addresses.
  61. IPv4 addresses are a subset of the IPv6 addresses and both standard
  62. .SM ASCII
  63. formats are accepted.
  64. In binary representation, all v4 addresses start with the 12 bytes, in hex:
  65. .IP
  66. .EX
  67. 00 00 00 00 00 00 00 00 00 00 ff ff
  68. .EE
  69. .
  70. .SS "Configuring interfaces
  71. Each stack may have multiple interfaces and each interface
  72. may have multiple addresses.
  73. The
  74. .B /net/ipifc
  75. directory contains a
  76. .B clone
  77. file, a
  78. .B stats
  79. file, and numbered subdirectories for each physical interface.
  80. .PP
  81. Opening the
  82. .B clone
  83. file reserves an interface.
  84. The file descriptor returned from the
  85. .IR open (2)
  86. will point to the control file,
  87. .BR ctl ,
  88. of the newly allocated interface.
  89. Reading
  90. .B ctl
  91. returns a text string representing the number of the interface.
  92. Writing
  93. .B ctl
  94. alters aspects of the interface.
  95. The possible
  96. .I ctl
  97. messages are those described under
  98. .B "Protocol directories"
  99. below and these:
  100. .TF "\fLbind loopback\fR"
  101. .PD
  102. .
  103. .\" from devip.c
  104. .
  105. .TP
  106. .BI "bind ether " path
  107. Treat the device mounted at
  108. .I path
  109. as an Ethernet medium carrying IP and ARP packets
  110. and associate it with this interface.
  111. The kernel will
  112. .IR dial (2)
  113. .IR path !0x800
  114. and
  115. .IR path !0x806
  116. and use the two connections for IPv4 and
  117. ARP respectively.
  118. .TP
  119. .B "bind pkt
  120. Treat this interface as a packet interface. Assume
  121. a user program will read and write the
  122. .I data
  123. file to receive and transmit IP packets to the kernel.
  124. This is used by programs such as
  125. .IR ppp (8)
  126. to mediate IP packet transfer between the kernel and
  127. a PPP encoded device.
  128. .TP
  129. .BI "bind netdev " path
  130. Treat this interface as a packet interface.
  131. The kernel will open
  132. .I path
  133. and read and write the resulting file descriptor
  134. to receive and transmit IP packets.
  135. .TP
  136. .BI "bind loopback "
  137. Treat this interface as a local loopback. Anything
  138. written to it will be looped back.
  139. .
  140. .\" from ipifc.c
  141. .
  142. .TP
  143. .B "unbind
  144. Disassociate the physical device from an IP interface.
  145. .TP
  146. .BI add\ "local mask remote mtu " proxy
  147. .PD 0
  148. .TP
  149. .BI try\ "local mask remote mtu " proxy
  150. .PD
  151. Add a local IP address to the interface.
  152. .I Try
  153. adds the
  154. .I local
  155. address as a tentative address
  156. if it's an IPv6 address.
  157. The
  158. .IR mask ,
  159. .IR remote ,
  160. .IR mtu ,
  161. and
  162. .B proxy
  163. arguments are all optional.
  164. The default
  165. .I mask
  166. is the class mask for the local address.
  167. The default
  168. .I remote
  169. address is
  170. .I local
  171. ANDed with
  172. .IR mask .
  173. The default
  174. .I mtu
  175. (maximum transmission unit)
  176. is 1514 for Ethernet and 4096 for packet media.
  177. The
  178. .I mtu
  179. is the size in bytes of the largest packet that this interface can send.
  180. .IR Proxy ,
  181. if specified, means that this machine should answer
  182. ARP requests for the remote address.
  183. .IR Ppp (8)
  184. does this to make remote machines appear
  185. to be connected to the local Ethernet.
  186. .TP
  187. .BI remove\ "local mask"
  188. Remove a local IP address from an interface.
  189. .TP
  190. .BI mtu\ n
  191. Set the maximum transfer unit for this device to
  192. .IR n .
  193. The mtu is the maximum size of the packet including any
  194. medium-specific headers.
  195. .TP
  196. .BI reassemble
  197. Reassemble IP fragments before forwarding to this interface
  198. .TP
  199. .BI iprouting\ n
  200. Allow
  201. .RI ( n
  202. is missing or non-zero) or disallow
  203. .RI ( n
  204. is 0) forwarding packets between this interface and
  205. others.
  206. .
  207. .\" remainder from netif.c (thus called from devether.c),
  208. .\" except add6 and ra6 from ipifc.c
  209. .
  210. .TP
  211. .B bridge
  212. Enable bridging (see
  213. .IR bridge (3)).
  214. .TP
  215. .B promiscuous
  216. Set the interface into promiscuous mode,
  217. which makes it accept all incoming packets,
  218. whether addressed to it or not.
  219. .TP
  220. .BI "connect " type
  221. marks the Ethernet packet
  222. .I type
  223. as being in use, if not already in use
  224. on this interface.
  225. A
  226. .I type
  227. of -1 means `all' but appears to be a no-op.
  228. .TP
  229. .BI addmulti\ Media-addr
  230. Treat the multicast
  231. .I Media-addr
  232. on this interface as a local address.
  233. .TP
  234. .BI remmulti\ Media-addr
  235. Remove the multicast address
  236. .I Media-addr
  237. from this interface.
  238. .TP
  239. .B scanbs
  240. Make the wireless interface scan for base stations.
  241. .TP
  242. .B headersonly
  243. Set the interface to pass only packet headers, not data too.
  244. .
  245. .\" remainder from ipifc.c; tedious, so put them last
  246. .
  247. .TP
  248. .BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
  249. Add the local IPv6 address
  250. .I v6addr
  251. with prefix length
  252. .I pfx-len
  253. to this interface.
  254. See RFC 2461 §6.2.1 for more detail.
  255. The remaining arguments are optional:
  256. .RS
  257. .TF "\fIonlink\fR"
  258. .TP
  259. .I onlink
  260. flag: address is `on-link'
  261. .TP
  262. .I auto
  263. flag: autonomous
  264. .TP
  265. .I validlt
  266. valid life-time in seconds
  267. .TP
  268. .I preflt
  269. preferred life-time in seconds
  270. .RE
  271. .PD
  272. .TP
  273. .BI "ra6 " "keyword value ..."
  274. Set IPv6 router advertisement (RA) parameter
  275. .IR keyword 's
  276. .IR value .
  277. Known
  278. .IR keyword s
  279. and the meanings of their values follow.
  280. See RFC 2461 §6.2.1 for more detail.
  281. Flags are true iff non-zero.
  282. .RS
  283. .TF "\fLreachtime\fR"
  284. .TP
  285. .B recvra
  286. flag: receive and process RAs.
  287. .TP
  288. .B sendra
  289. flag: generate and send RAs.
  290. .TP
  291. .B mflag
  292. flag: ``Managed address configuration'',
  293. goes into RAs.
  294. .TP
  295. .B oflag
  296. flag: ``Other stateful configuration'',
  297. goes into RAs.
  298. .TP
  299. .B maxraint
  300. ``maximum time allowed between sending unsolicited multicast''
  301. RAs from the interface, in ms.
  302. .TP
  303. .B minraint
  304. ``minimum time allowed between sending unsolicited multicast''
  305. RAs from the interface, in ms.
  306. .TP
  307. .B linkmtu
  308. ``value to be placed in MTU options sent by the router.''
  309. Zero indicates none.
  310. .TP
  311. .B reachtime
  312. sets the Reachable Time field in RAs sent by the router.
  313. ``Zero means unspecified (by this router).''
  314. .TP
  315. .B rxmitra
  316. sets the Retrans Timer field in RAs sent by the router.
  317. ``Zero means unspecified (by this router).''
  318. .TP
  319. .B ttl
  320. default value of the Cur Hop Limit field in RAs sent by the router.
  321. Should be set to the ``current diameter of the Internet.''
  322. ``Zero means unspecified (by this router).''
  323. .TP
  324. .B routerlt
  325. sets the Router Lifetime field of RAs sent from the interface, in ms.
  326. Zero means the router is not to be used as a default router.
  327. .PD
  328. .RE
  329. .PP
  330. Reading the interface's
  331. .I status
  332. file returns information about the interface, one line for each
  333. local address on that interface. The first line
  334. has 9 white-space-separated fields: device, mtu, local address,
  335. mask, remote or network address, packets in, packets out, input errors,
  336. output errors. Each subsequent line contains all but the device and mtu.
  337. See
  338. .I readipifc
  339. in
  340. .IR ip (2).
  341. .
  342. .SS "Routing
  343. The file
  344. .I iproute
  345. controls information about IP routing.
  346. When read, it returns one line per routing entry.
  347. Each line contains six white-space-separated fields:
  348. target address, target mask, address of next hop, flags,
  349. tag, and interface number.
  350. The entry used for routing an IP packet is the one with
  351. the longest mask for which destination address ANDed with
  352. target mask equals the target address.
  353. The one-character flags are:
  354. .TF m
  355. .TP
  356. .B 4
  357. IPv4 route
  358. .TP
  359. .B 6
  360. IPv6 route
  361. .TP
  362. .B i
  363. local interface
  364. .TP
  365. .B b
  366. broadcast address
  367. .TP
  368. .B u
  369. local unicast address
  370. .TP
  371. .B m
  372. multicast route
  373. .TP
  374. .B p
  375. point-to-point route
  376. .PD
  377. .PP
  378. The tag is an arbitrary, up to 4 character, string. It is normally used to
  379. indicate what routing protocol originated the route.
  380. .PP
  381. Writing to
  382. .B /net/iproute
  383. changes the route table. The messages are:
  384. .TF "\fLtag \fIstring\fR"
  385. .PD
  386. .TP
  387. .B flush
  388. Remove all routes.
  389. .TP
  390. .BI tag\ string
  391. Associate the tag,
  392. .IR string ,
  393. with all subsequent routes added via this file descriptor.
  394. .TP
  395. .BI add\ "target mask nexthop"
  396. Add the route to the table. If one already exists with the
  397. same target and mask, replace it.
  398. .TP
  399. .BI remove\ "target mask"
  400. Remove a route with a matching target and mask.
  401. .
  402. .SS "Address resolution
  403. The file
  404. .B /net/arp
  405. controls information about address resolution.
  406. The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
  407. information for Ethernet interfaces.
  408. When read, the file returns one line per address containing the
  409. type of medium, the status of the entry (OK, WAIT), the IP
  410. address, and the medium address.
  411. Writing to
  412. .B /net/arp
  413. administers the ARP information.
  414. The control messages are:
  415. .TF "\fLdel \fIIP-addr\fR"
  416. .PD
  417. .TP
  418. .B flush
  419. Remove all entries.
  420. .TP
  421. .BI add\ "type IP-addr Media-addr"
  422. Add an entry or replace an existing one for the
  423. same IP address.
  424. .TP
  425. .BI del\ "IP-addr"
  426. Delete an individual entry.
  427. .PP
  428. ARP entries do not time out. The ARP table is a
  429. cache with an LRU replacement policy. The IP stack
  430. listens for all ARP requests and, if the requester is in
  431. the table, the entry is updated.
  432. Also, whenever a new address is configured onto an
  433. Ethernet, an ARP request is sent to help
  434. update the table on other systems.
  435. .PP
  436. Currently, the only medium type is
  437. .BR ether .
  438. .br
  439. .ne 3
  440. .
  441. .SS "Debugging and stack information
  442. If any process is holding
  443. .B /net/log
  444. open, the IP stack queues debugging information to it.
  445. This is intended primarily for debugging the IP stack.
  446. The information provided is implementation-defined;
  447. see the source for details. Generally, what is returned is error messages
  448. about bad packets.
  449. .PP
  450. Writing to
  451. .B /net/log
  452. controls debugging. The control messages are:
  453. .TF "\fLclear \fIarglist\fR"
  454. .PD
  455. .TP
  456. .BI set\ arglist
  457. .I Arglist
  458. is a space-separated list of items for which to enable debugging.
  459. The possible items are:
  460. .BR ppp ,
  461. .BR ip ,
  462. .BR fs ,
  463. .BR tcp ,
  464. .BR icmp ,
  465. .BR udp ,
  466. .BR compress ,
  467. .BR gre ,
  468. .BR tcpwin ,
  469. .BR tcprxmt ,
  470. .BR udpmsg ,
  471. .BR ipmsg ,
  472. and
  473. .BR esp .
  474. .TP
  475. .BI clear\ arglist
  476. .I Arglist
  477. is a space-separated list of items for which to disable debugging.
  478. .TP
  479. .BI only\ addr
  480. If
  481. .I addr
  482. is non-zero, restrict debugging to only those
  483. packets whose source or destination is that
  484. address.
  485. .PP
  486. The file
  487. .B /net/ndb
  488. can be read or written by
  489. programs. It is normally used by
  490. .IR ipconfig (8)
  491. to leave configuration information for other programs
  492. such as
  493. .B dns
  494. and
  495. .B cs
  496. (see
  497. .IR ndb (8)).
  498. .B /net/ndb
  499. may contain up to 1024 bytes.
  500. .PP
  501. The file
  502. .B /net/ipselftab
  503. is a read-only file containing all the IP addresses
  504. considered local. Each line in the file contains
  505. three white-space-separated fields: IP address, usage count,
  506. and flags. The usage count is the number of interfaces to which
  507. the address applies. The flags are the same as for routing
  508. entries.
  509. Note that the `IPv4 route' flag will never be set.
  510. .br
  511. .ne 3
  512. .
  513. .SS "Protocol directories
  514. The
  515. .I ip
  516. device
  517. supports IP as well as several protocols that run over it:
  518. TCP, UDP, RUDP, ICMP, GRE, and ESP.
  519. TCP and UDP provide the standard Internet
  520. protocols for reliable stream and unreliable datagram
  521. communication.
  522. RUDP is a locally-developed reliable datagram protocol based on UDP.
  523. ICMP is IP's catch-all control protocol used to send
  524. low level error messages and to implement
  525. .IR ping (8).
  526. GRE is a general encapsulation protocol.
  527. ESP is the encapsulation protocol for IPsec.
  528. IL provided a reliable datagram service for communication
  529. between Plan 9 machines over IPv4, but is no longer part of the system.
  530. .PP
  531. Each protocol is a subdirectory of the IP stack.
  532. The top level directory of each protocol contains a
  533. .B clone
  534. file, a
  535. .B stats
  536. file, and subdirectories numbered from zero to the number of connections
  537. opened for this protocol.
  538. .PP
  539. Opening the
  540. .B clone
  541. file reserves a connection. The file descriptor returned from the
  542. .IR open (2)
  543. will point to the control file,
  544. .BR ctl ,
  545. of the newly allocated connection.
  546. Reading
  547. .B ctl
  548. returns a text
  549. string representing the number of the
  550. connection.
  551. Connections may be used either to listen for incoming calls
  552. or to initiate calls to other machines.
  553. .PP
  554. A connection is controlled by writing text strings to the associated
  555. .B ctl
  556. file.
  557. After a connection has been established data may be read from
  558. and written to
  559. .BR data .
  560. A connection can be actively established using the
  561. .B connect
  562. message (see also
  563. .IR dial (2)).
  564. A connection can be established passively by first
  565. using an
  566. .B announce
  567. message (see
  568. .IR dial (2))
  569. to bind to a local port and then
  570. opening the
  571. .B listen
  572. file (see
  573. .IR dial (2))
  574. to receive incoming calls.
  575. .PP
  576. The following control messages are supported:
  577. .TF "\fLremmulti \fIip\fR"
  578. .PD
  579. .TP
  580. .BI connect\ ip-address ! port "!r " local
  581. Establish a connection to the remote
  582. .I ip-address
  583. and
  584. .IR port .
  585. If
  586. .I local
  587. is specified, it is used as the local port number.
  588. If
  589. .I local
  590. is not specified but
  591. .B !r
  592. is, the system will allocate
  593. a restricted port number (less than 1024) for the connection to allow communication
  594. with Unix
  595. .B login
  596. and
  597. .B exec
  598. services.
  599. Otherwise a free port number starting at 5000 is chosen.
  600. The connect fails if the combination of local and remote address/port pairs
  601. are already assigned to another port.
  602. .TP
  603. .BI announce\ X
  604. .I X
  605. is a decimal port number or
  606. .LR * .
  607. Set the local port
  608. number to
  609. .I X
  610. and accept calls to
  611. .IR X .
  612. If
  613. .I X
  614. is
  615. .LR * ,
  616. accept
  617. calls for any port that no process has explicitly announced.
  618. The local IP address cannot be set.
  619. .B Announce
  620. fails if the connection is already announced or connected.
  621. .TP
  622. .BI bind\ X
  623. .I X
  624. is a decimal port number or
  625. .LR * .
  626. Set the local port number to
  627. .IR X .
  628. This exists to support emulation
  629. of BSD sockets by the APE libraries (see
  630. .IR pcc (1))
  631. and is not otherwise used.
  632. .\" this is gone
  633. .\" .TP
  634. .\" .BI backlog\ n
  635. .\" Set the maximum number of unanswered (queued) incoming
  636. .\" connections to an announced port to
  637. .\" .IR n .
  638. .\" By default
  639. .\" .I n
  640. .\" is set to five. If more than
  641. .\" .I n
  642. .\" connections are pending,
  643. .\" further requests for a service will be rejected.
  644. .TP
  645. .BI ttl\ n
  646. Set the time to live IP field in outgoing packets to
  647. .IR n .
  648. .TP
  649. .BI tos\ n
  650. Set the service type IP field in outgoing packets to
  651. .IR n .
  652. .TP
  653. .B ignoreadvice
  654. Don't break (UDP) connections because of ICMP errors.
  655. .TP
  656. .BI addmulti\ "ifc-ip [ mcast-ip ]"
  657. Treat
  658. .I ifc-ip
  659. on this multicast interface as a local address.
  660. If
  661. .I mcast-ip
  662. is present,
  663. use it as the interface's multicast address.
  664. .TP
  665. .BI remmulti\ ip
  666. Remove the address
  667. .I ip
  668. from this multicast interface.
  669. .PP
  670. Port numbers must be in the range 1 to 32767.
  671. .PP
  672. Several files report the status of a
  673. connection.
  674. The
  675. .B remote
  676. and
  677. .B local
  678. files contain the IP address and port number for the remote and local side of the
  679. connection. The
  680. .B status
  681. file contains protocol-dependent information to help debug network connections.
  682. On receiving and error or EOF reading or writing the
  683. .B data
  684. file, the
  685. .B err
  686. file contains the reason for error.
  687. .PP
  688. A process may accept incoming connections by
  689. .IR open (2)ing
  690. the
  691. .B listen
  692. file.
  693. The
  694. .B open
  695. will block until a new connection request arrives.
  696. Then
  697. .B open
  698. will return an open file descriptor which points to the control file of the
  699. newly accepted connection.
  700. This procedure will accept all calls for the
  701. given protocol.
  702. See
  703. .IR dial (2).
  704. .
  705. .SS TCP
  706. TCP connections are reliable point-to-point byte streams; there are no
  707. message delimiters.
  708. A connection is determined by the address and port numbers of the two
  709. ends.
  710. TCP
  711. .B ctl
  712. files support the following additional messages:
  713. .TF "\fLkeepalive\fI n\fR"
  714. .PD
  715. .TP
  716. .B hangup
  717. close down this TCP connection
  718. .TP
  719. .BI keepalive \ n
  720. turn on keep alive messages.
  721. .IR N ,
  722. if given, is the milliseconds between keepalives
  723. (default 30000).
  724. .TP
  725. .BI checksum \ n
  726. emit TCP checksums of zero if
  727. .I n
  728. is zero; otherwise, and by default,
  729. TCP checksums are computed and sent normally.
  730. .TP
  731. .BI tcpporthogdefense \ onoff
  732. .I onoff
  733. of
  734. .L on
  735. enables the TCP port-hog defense for all TCP connections;
  736. .I onoff
  737. of
  738. .L off
  739. disables it.
  740. The defense is a solution to hijacked systems staking out ports
  741. as a form of denial-of-service attack.
  742. To avoid stateless TCP conversation hogs,
  743. .I ip
  744. picks a TCP sequence number at random for keepalives.
  745. If that number gets acked by the other end,
  746. .I ip
  747. shuts down the connection.
  748. Some firewalls,
  749. notably ones that perform stateful inspection,
  750. discard such out-of-specification keepalives,
  751. so connections through such firewalls
  752. will be killed after five minutes
  753. by the lack of keepalives.
  754. .
  755. .SS UDP
  756. UDP connections carry unreliable and unordered datagrams. A read from
  757. .B data
  758. will return the next datagram, discarding anything
  759. that doesn't fit in the read buffer.
  760. A write is sent as a single datagram.
  761. .PP
  762. By default, a UDP connection is a point-to-point link.
  763. Either a
  764. .B connect
  765. establishes a local and remote address/port pair or
  766. after an
  767. .BR announce ,
  768. each datagram coming from a different remote address/port pair
  769. establishes a new incoming connection.
  770. However, many-to-one semantics is also possible.
  771. .PP
  772. If, after an
  773. .BR announce ,
  774. the message
  775. .L headers
  776. is written to
  777. .BR ctl ,
  778. then all messages sent to the announced port
  779. are received on the announced connection prefixed
  780. with the corresponding structure,
  781. declared in
  782. .BR <ip.h> :
  783. .IP
  784. .EX
  785. typedef struct Udphdr Udphdr;
  786. struct Udphdr
  787. {
  788. uchar raddr[16]; /* V6 remote address and port */
  789. uchar laddr[16]; /* V6 local address and port */
  790. uchar ifcaddr[16]; /* V6 interface address (receive only) */
  791. uchar rport[2]; /* remote port */
  792. uchar lport[2]; /* local port */
  793. };
  794. .EE
  795. .PP
  796. Before a write, a user must prefix a similar structure to each message.
  797. The system overrides the user specified local port with the announced
  798. one. If the user specifies an address that isn't a unicast address in
  799. .BR /net/ipselftab ,
  800. that too is overridden.
  801. Since the prefixed structure is the same in read and write, it is relatively
  802. easy to write a server that responds to client requests by just copying new
  803. data into the message body and then writing back the same buffer that was
  804. read.
  805. .PP
  806. In this case (writing
  807. .L headers
  808. to the
  809. .I ctl
  810. file),
  811. no
  812. .I listen
  813. nor
  814. .I accept
  815. is needed;
  816. otherwise,
  817. the usual sequence of
  818. .IR announce ,
  819. .IR listen ,
  820. .I accept
  821. must be executed before performing I/O on the corresponding
  822. .I data
  823. file.
  824. .
  825. .SS RUDP
  826. RUDP is a reliable datagram protocol based on UDP,
  827. currently only for IPv4.
  828. Packets are delivered in order.
  829. RUDP does not support
  830. .BR listen .
  831. One must write either
  832. .L connect
  833. or
  834. .L announce
  835. followed immediately by
  836. .L headers
  837. to
  838. .BR ctl .
  839. .PP
  840. Unlike TCP, the reboot of one end of a connection does
  841. not force a closing of the connection. Communications will
  842. resume when the rebooted machine resumes talking. Any unacknowledged
  843. packets queued before the reboot will be lost. A reboot can
  844. be detected by reading the
  845. .B err
  846. file. It will contain the message
  847. .IP
  848. .BI hangup\ address ! port
  849. .PP
  850. where
  851. .I address
  852. and
  853. .I port
  854. are of the far side of the connection.
  855. Retransmitting a datagram more than 10 times
  856. is treated like a reboot:
  857. all queued messages are dropped, an error is queued to the
  858. .B err
  859. file, and the conversation resumes.
  860. .PP
  861. RUDP
  862. .I ctl
  863. files accept the following messages:
  864. .TF "\fLranddrop \fI[ percent ]\fR"
  865. .TP
  866. .B headers
  867. Corresponds to the
  868. .L headers
  869. format of UDP.
  870. .TP
  871. .BI "hangup " "IP port"
  872. Drop the connection to address
  873. .I IP
  874. and
  875. .IR port .
  876. .TP
  877. .BI "randdrop " "[ percent ]"
  878. Randomly drop
  879. .I percent
  880. of outgoing packets.
  881. Default is 10%.
  882. .
  883. .SS ICMP
  884. ICMP is a datagram protocol for IPv4 used to exchange control requests and
  885. their responses with other machines' IP implementations.
  886. ICMP is primarily a kernel-to-kernel protocol, but it is possible
  887. to generate `echo request' and read `echo reply' packets from user programs.
  888. .
  889. .SS ICMPV6
  890. ICMPv6 is the IPv6 equivalent of ICMP.
  891. If, after an
  892. .BR announce ,
  893. the message
  894. .L headers
  895. is written to
  896. .BR ctl ,
  897. then before a write,
  898. a user must prefix each message with a corresponding structure,
  899. declared in
  900. .BR <ip.h> :
  901. .IP
  902. .EX
  903. /*
  904. * user level icmpv6 with control message "headers"
  905. */
  906. typedef struct Icmp6hdr Icmp6hdr;
  907. struct Icmp6hdr {
  908. uchar unused[8];
  909. uchar laddr[IPaddrlen]; /* local address */
  910. uchar raddr[IPaddrlen]; /* remote address */
  911. };
  912. .EE
  913. .PP
  914. In this case (writing
  915. .L headers
  916. to the
  917. .I ctl
  918. file),
  919. no
  920. .I listen
  921. nor
  922. .I accept
  923. is needed;
  924. otherwise,
  925. the usual sequence of
  926. .IR announce ,
  927. .IR listen ,
  928. .I accept
  929. must be executed before performing I/O on the corresponding
  930. .I data
  931. file.
  932. .
  933. .SS GRE
  934. GRE is the encapsulation protocol used by PPTP.
  935. The kernel implements just enough of the protocol
  936. to multiplex it.
  937. Our implementation encapsulates in IPv4, per RFC 1702.
  938. .B Announce
  939. is not allowed in GRE, only
  940. .BR connect .
  941. Since GRE has no port numbers, the port number in the connect
  942. is actually the 16 bit
  943. .B eproto
  944. field in the GRE header.
  945. .PP
  946. Reads and writes transfer a
  947. GRE datagram starting at the GRE header.
  948. On write, the kernel fills in the
  949. .B eproto
  950. field with the port number specified
  951. in the connect message.
  952. .br
  953. .ne 3
  954. .
  955. .SS ESP
  956. ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
  957. for IPsec (RFC 4301).
  958. We currently implement only tunnel mode, not transport mode.
  959. It is used to set up an encrypted tunnel between machines.
  960. Like GRE, ESP has no port numbers. Instead, the
  961. port number in the
  962. .B connect
  963. message is the SPI (Security Association Identifier (sic)).
  964. IP packets are written to and read from
  965. .BR data .
  966. The kernel encrypts any packets written to
  967. .BR data ,
  968. appends a MAC, and prefixes an ESP header before
  969. sending to the other end of the tunnel.
  970. Received packets are checked against their MAC's,
  971. decrypted, and queued for reading from
  972. .BR data .
  973. In the following,
  974. .I secret
  975. is the hexadecimal encoding of a key,
  976. without a leading
  977. .LR 0x .
  978. The control messages are:
  979. .TF "\fLesp \fIalg secret\fR"
  980. .PD
  981. .TP
  982. .BI esp\ "alg secret
  983. Encrypt with the algorithm,
  984. .IR alg ,
  985. using
  986. .I secret
  987. as the key.
  988. Possible algorithms are:
  989. .BR null ,
  990. .BR des_56_cbc ,
  991. .BR des3_cbc ,
  992. and eventually
  993. .BR aes_128_cbc ,
  994. and
  995. .BR aes_ctr .
  996. .TP
  997. .BI ah\ "alg secret
  998. Use the hash algorithm,
  999. .IR alg ,
  1000. with
  1001. .I secret
  1002. as the key for generating the MAC.
  1003. Possible algorithms are:
  1004. .BR null ,
  1005. .BR hmac_sha1_96 ,
  1006. .BR hmac_md5_96 ,
  1007. and eventually
  1008. .BR aes_xcbc_mac_96 .
  1009. .TP
  1010. .B header
  1011. Turn on header mode. Every buffer read from
  1012. .B data
  1013. starts with 4 unused bytes, and the first 4 bytes
  1014. of every buffer written to
  1015. .B data
  1016. are ignored.
  1017. .TP
  1018. .B noheader
  1019. Turn off header mode.
  1020. .
  1021. .SS "IP packet filter
  1022. The directory
  1023. .B /net/ipmux
  1024. looks like another protocol directory.
  1025. It is a packet filter built on top of IP.
  1026. Each numbered
  1027. subdirectory represents a different filter.
  1028. The connect messages written to the
  1029. .I ctl
  1030. file describe the filter. Packets matching the filter can be read on the
  1031. .B data
  1032. file. Packets written to the
  1033. .B data
  1034. file are routed to an interface and transmitted.
  1035. .PP
  1036. A filter is a semicolon-separated list of
  1037. relations. Each relation describes a portion
  1038. of a packet to match. The possible relations are:
  1039. .TF "\fLdata[\fIn\fL:\fIm\fL]=\fIexpr\fR "
  1040. .PD
  1041. .TP
  1042. .BI proto= n
  1043. the IP protocol number must be
  1044. .IR n .
  1045. .TP
  1046. .BI data[ n : m ]= expr
  1047. bytes
  1048. .I n
  1049. through
  1050. .I m
  1051. following the IP packet must match
  1052. .IR expr .
  1053. .TP
  1054. .BI iph[ n : m ]= expr
  1055. bytes
  1056. .I n
  1057. through
  1058. .I m
  1059. of the IP packet header must match
  1060. .IR expr .
  1061. .TP
  1062. .BI ifc= expr
  1063. the packet must have been received on an interface whose address
  1064. matches
  1065. .IR expr .
  1066. .TP
  1067. .BI src= expr
  1068. The source address in the packet must match
  1069. .IR expr .
  1070. .TP
  1071. .BI dst= expr
  1072. The destination address in the packet must match
  1073. .IR expr .
  1074. .PP
  1075. .I Expr
  1076. is of the form:
  1077. .TP
  1078. .I \ value
  1079. .TP
  1080. .IB \ value | value | ...
  1081. .TP
  1082. .IB \ value & mask
  1083. .TP
  1084. .IB \ value | value & mask
  1085. .PP
  1086. If a mask is given, the relevant field is first ANDed with
  1087. the mask. The result is compared against the value or list
  1088. of values for a match. In the case of
  1089. .BR ifc ,
  1090. .BR dst ,
  1091. and
  1092. .B src
  1093. the value is a dot-formatted IP address and the mask is a dot-formatted
  1094. IP mask. In the case of
  1095. .BR data ,
  1096. .B iph
  1097. and
  1098. .BR proto ,
  1099. both value and mask are strings of 2 hexadecimal digits representing
  1100. 8-bit values.
  1101. .PP
  1102. A packet is delivered to only one filter.
  1103. The filters are merged into a single comparison tree.
  1104. If two filters match the same packet, the following
  1105. rules apply in order (here '>' means is preferred to):
  1106. .IP 1)
  1107. protocol > data > source > destination > interface
  1108. .IP 2)
  1109. lower data offsets > higher data offsets
  1110. .IP 3)
  1111. longer matches > shorter matches
  1112. .IP 4)
  1113. older > younger
  1114. .PP
  1115. So far this has just been used to implement a version of
  1116. OSPF in Inferno
  1117. and 6to4 tunnelling.
  1118. .br
  1119. .ne 5
  1120. .
  1121. .SS Statistics
  1122. The
  1123. .B stats
  1124. files are read only and contain statistics useful to network monitoring.
  1125. .br
  1126. .ne 12
  1127. .PP
  1128. Reading
  1129. .B /net/ipifc/stats
  1130. returns a list of 19 tagged and newline-separated fields representing:
  1131. .EX
  1132. .ft 1
  1133. .2C
  1134. .in +0.25i
  1135. forwarding status (0 and 2 mean forwarding off,
  1136. 1 means on)
  1137. default TTL
  1138. input packets
  1139. input header errors
  1140. input address errors
  1141. packets forwarded
  1142. input packets for unknown protocols
  1143. input packets discarded
  1144. input packets delivered to higher level protocols
  1145. output packets
  1146. output packets discarded
  1147. output packets with no route
  1148. timed out fragments in reassembly queue
  1149. requested reassemblies
  1150. successful reassemblies
  1151. failed reassemblies
  1152. successful fragmentations
  1153. unsuccessful fragmentations
  1154. fragments created
  1155. .in -0.25i
  1156. .1C
  1157. .ft
  1158. .EE
  1159. .br
  1160. .ne 16
  1161. .PP
  1162. Reading
  1163. .B /net/icmp/stats
  1164. returns a list of 26 tagged and newline-separated fields representing:
  1165. .EX
  1166. .ft 1
  1167. .2C
  1168. .in +0.25i
  1169. messages received
  1170. bad received messages
  1171. unreachables received
  1172. time exceededs received
  1173. input parameter problems received
  1174. source quenches received
  1175. redirects received
  1176. echo requests received
  1177. echo replies received
  1178. timestamps received
  1179. timestamp replies received
  1180. address mask requests received
  1181. address mask replies received
  1182. messages sent
  1183. transmission errors
  1184. unreachables sent
  1185. time exceededs sent
  1186. input parameter problems sent
  1187. source quenches sent
  1188. redirects sent
  1189. echo requests sent
  1190. echo replies sent
  1191. timestamps sent
  1192. timestamp replies sent
  1193. address mask requests sent
  1194. address mask replies sent
  1195. .in -0.25i
  1196. .1C
  1197. .EE
  1198. .PP
  1199. Reading
  1200. .B /net/tcp/stats
  1201. returns a list of 11 tagged and newline-separated fields representing:
  1202. .EX
  1203. .ft 1
  1204. .2C
  1205. .in +0.25i
  1206. maximum number of connections
  1207. total outgoing calls
  1208. total incoming calls
  1209. number of established connections to be reset
  1210. number of currently established connections
  1211. segments received
  1212. segments sent
  1213. segments retransmitted
  1214. retransmit timeouts
  1215. bad received segments
  1216. transmission failures
  1217. .in -0.25i
  1218. .1C
  1219. .EE
  1220. .PP
  1221. Reading
  1222. .B /net/udp/stats
  1223. returns a list of 4 tagged and newline-separated fields representing:
  1224. .EX
  1225. .ft 1
  1226. .2C
  1227. .in +0.25i
  1228. datagrams received
  1229. datagrams received for bad ports
  1230. malformed datagrams received
  1231. datagrams sent
  1232. .in -0.25i
  1233. .1C
  1234. .EE
  1235. .PP
  1236. Reading
  1237. .B /net/gre/stats
  1238. returns a list of 1 tagged number representing:
  1239. .EX
  1240. .ft 1
  1241. .in +0.25i
  1242. header length errors
  1243. .in -0.25i
  1244. .EE
  1245. .SH "SEE ALSO"
  1246. .IR dial (2),
  1247. .IR ip (2),
  1248. .IR bridge (3),
  1249. .\" .IR ike (4),
  1250. .IR ndb (6),
  1251. .IR listen (8)
  1252. .br
  1253. .PD 0
  1254. .TF "\fL/lib/rfc/rfc2822"
  1255. .TP
  1256. .B /lib/rfc/rfc2460
  1257. IPv6
  1258. .TP
  1259. .B /lib/rfc/rfc4291
  1260. IPv6 address architecture
  1261. .TP
  1262. .B /lib/rfc/rfc4443
  1263. ICMPv6
  1264. .SH SOURCE
  1265. .B /sys/src/9/ip
  1266. .SH BUGS
  1267. .I Ipmux
  1268. has not been heavily used and should be considered experimental.
  1269. It may disappear in favor of a more traditional packet filter in the future.