ip 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253
  1. .TH IP 3
  2. .SH NAME
  3. ip, esp, gre, icmp, icmpv6, ipmux, rudp, tcp, udp \- network protocols over IP
  4. .SH SYNOPSIS
  5. .nf
  6. .2C
  7. .B bind -a #I\fIspec\fP /net
  8. .sp 0.3v
  9. .B /net/ipifc
  10. .B /net/ipifc/clone
  11. .B /net/ipifc/stats
  12. .BI /net/ipifc/ n
  13. .BI /net/ipifc/ n /status
  14. .BI /net/ipifc/ n /ctl
  15. \&...
  16. .sp 0.3v
  17. .B /net/arp
  18. .B /net/bootp
  19. .B /net/iproute
  20. .B /net/ipselftab
  21. .B /net/log
  22. .B /net/ndb
  23. .sp 0.3v
  24. .B /net/esp
  25. .B /net/gre
  26. .B /net/icmp
  27. .B /net/icmpv6
  28. .B /net/ipmux
  29. .B /net/rudp
  30. .B /net/tcp
  31. .B /net/udp
  32. .sp 0.3v
  33. .B /net/tcp/clone
  34. .B /net/tcp/stats
  35. .BI /net/tcp/ n
  36. .BI /net/tcp/ n /data
  37. .BI /net/tcp/ n /ctl
  38. .BI /net/tcp/ n /local
  39. .BI /net/tcp/ n /remote
  40. .BI /net/tcp/ n /status
  41. .BI /net/tcp/ n /listen
  42. \&...
  43. .1C
  44. .fi
  45. .SH DESCRIPTION
  46. The
  47. .I ip
  48. device provides the interface to Internet Protocol stacks.
  49. .I Spec
  50. is an integer from 0 to 15 identifying a stack.
  51. Each stack implements IPv4 and IPv6.
  52. Each stack is independent of all others:
  53. the only information transfer between them is via programs that
  54. mount multiple stacks.
  55. Normally a system uses only one stack.
  56. However multiple stacks can be used for debugging
  57. new IP networks or implementing firewalls or proxy
  58. services.
  59. .PP
  60. All addresses used are 16-byte IPv6 addresses.
  61. IPv4 addresses are a subset of the IPv6 addresses and both standard
  62. .SM ASCII
  63. formats are accepted.
  64. In binary representation, all v4 addresses start with the 12 bytes, in hex:
  65. .IP
  66. .EX
  67. 00 00 00 00 00 00 00 00 00 00 ff ff
  68. .EE
  69. .
  70. .SS "Configuring interfaces
  71. Each stack may have multiple interfaces and each interface
  72. may have multiple addresses.
  73. The
  74. .B /net/ipifc
  75. directory contains a
  76. .B clone
  77. file, a
  78. .B stats
  79. file, and numbered subdirectories for each physical interface.
  80. .PP
  81. Opening the
  82. .B clone
  83. file reserves an interface.
  84. The file descriptor returned from the
  85. .IR open (2)
  86. will point to the control file,
  87. .BR ctl ,
  88. of the newly allocated interface.
  89. Reading
  90. .B ctl
  91. returns a text string representing the number of the interface.
  92. Writing
  93. .B ctl
  94. alters aspects of the interface.
  95. The possible
  96. .I ctl
  97. messages are those described under
  98. .B "Protocol directories"
  99. below and these:
  100. .TF "\fLbind loopback\fR"
  101. .PD
  102. .
  103. .\" from devip.c
  104. .
  105. .TP
  106. .BI "bind ether " path
  107. Treat the device mounted at
  108. .I path
  109. as an Ethernet medium carrying IP and ARP packets
  110. and associate it with this interface.
  111. The kernel will
  112. .IR dial (2)
  113. .IR path !0x800
  114. and
  115. .IR path !0x806
  116. and use the two connections for IPv4 and
  117. ARP respectively.
  118. .TP
  119. .B "bind pkt
  120. Treat this interface as a packet interface. Assume
  121. a user program will read and write the
  122. .I data
  123. file to receive and transmit IP packets to the kernel.
  124. This is used by programs such as
  125. .IR ppp (8)
  126. to mediate IP packet transfer between the kernel and
  127. a PPP encoded device.
  128. .TP
  129. .BI "bind netdev " path
  130. Treat this interface as a packet interface.
  131. The kernel will open
  132. .I path
  133. and read and write the resulting file descriptor
  134. to receive and transmit IP packets.
  135. .TP
  136. .BI "bind loopback "
  137. Treat this interface as a local loopback. Anything
  138. written to it will be looped back.
  139. .
  140. .\" from ipifc.c
  141. .
  142. .TP
  143. .B "unbind
  144. Disassociate the physical device from an IP interface.
  145. .TP
  146. .BI add\ "local mask remote mtu " proxy
  147. .PD 0
  148. .TP
  149. .BI try\ "local mask remote mtu " proxy
  150. .PD
  151. Add a local IP address to the interface.
  152. .I try
  153. adds the local address as a tentative address
  154. if it's an IPv6 address.
  155. The
  156. .IR mask ,
  157. .IR remote ,
  158. .IR mtu ,
  159. and
  160. .B proxy
  161. arguments are all optional. The default mask is
  162. the class mask for the local address. The default
  163. remote address is
  164. .I local
  165. ANDed with
  166. .IR mask .
  167. The default mtu is 1514 for Ethernet and 4096 for packet
  168. media.
  169. .IR Proxy ,
  170. if specified, means that this machine should answer
  171. ARP requests for the remote address.
  172. .IR Ppp (8)
  173. does this to make remote machines appear
  174. to be connected to the local Ethernet.
  175. .TP
  176. .BI remove\ "local mask"
  177. Remove a local IP address from an interface.
  178. .TP
  179. .BI mtu\ n
  180. Set the maximum transfer unit for this device to
  181. .IR n .
  182. The mtu is the maximum size of the packet including any
  183. medium-specific headers.
  184. .TP
  185. .BI reassemble
  186. Reassemble IP fragments before forwarding to this interface
  187. .TP
  188. .BI iprouting\ n
  189. Allow
  190. .RI ( n
  191. is missing or non-zero) or disallow
  192. .RI ( n
  193. is 0) forwarding packets between this interface and
  194. others.
  195. .
  196. .\" remainder from netif.c (thus called from devether.c),
  197. .\" except add6 and ra6 from ipifc.c
  198. .
  199. .TP
  200. .B bridge
  201. Enable bridging (see
  202. .IR bridge (3)).
  203. .TP
  204. .B promiscuous
  205. Set the interface into promiscuous mode,
  206. which makes it accept all incoming packets,
  207. whether addressed to it or not.
  208. .TP
  209. .BI "connect " type
  210. marks the Ethernet packet
  211. .I type
  212. as being in use, if not already in use
  213. on this interface.
  214. A
  215. .I type
  216. of -1 means `all' but appears to be a no-op.
  217. .TP
  218. .BI addmulti\ Media-addr
  219. Treat the multicast
  220. .I Media-addr
  221. on this interface as a local address.
  222. .TP
  223. .BI remmulti\ Media-addr
  224. Remove the multicast address
  225. .I Media-addr
  226. from this interface.
  227. .TP
  228. .B scanbs
  229. Make the wireless interface scan for base stations.
  230. .TP
  231. .B headersonly
  232. Set the interface to pass only packet headers, not data too.
  233. .
  234. .\" remainder from ipifc.c; tedious, so put them last
  235. .
  236. .TP
  237. .BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
  238. Add the local IPv6 address
  239. .I v6addr
  240. with prefix length
  241. .I pfx-len
  242. to this interface.
  243. See RFC 2461 §6.2.1 for more detail.
  244. The remaining arguments are optional:
  245. .RS
  246. .TF "\fIonlink\fR"
  247. .TP
  248. .I onlink
  249. flag: address is `on-link'
  250. .TP
  251. .I auto
  252. flag: autonomous
  253. .TP
  254. .I validlt
  255. valid life-time in seconds
  256. .TP
  257. .I preflt
  258. preferred life-time in seconds
  259. .RE
  260. .PD
  261. .TP
  262. .BI "ra6 " "keyword value ..."
  263. Set IPv6 router advertisement (RA) parameter
  264. .IR keyword 's
  265. .IR value .
  266. Known
  267. .IR keyword s
  268. and the meanings of their values follow.
  269. See RFC 2461 §6.2.1 for more detail.
  270. Flags are true iff non-zero.
  271. .RS
  272. .TF "\fLreachtime\fR"
  273. .TP
  274. .B recvra
  275. flag: receive and process RAs.
  276. .TP
  277. .B sendra
  278. flag: generate and send RAs.
  279. .TP
  280. .B mflag
  281. flag: ``Managed address configuration'',
  282. goes into RAs.
  283. .TP
  284. .B oflag
  285. flag: ``Other stateful configuration'',
  286. goes into RAs.
  287. .TP
  288. .B maxraint
  289. ``maximum time allowed between sending unsolicited multicast''
  290. RAs from the interface, in ms.
  291. .TP
  292. .B minraint
  293. ``minimum time allowed between sending unsolicited multicast''
  294. RAs from the interface, in ms.
  295. .TP
  296. .B linkmtu
  297. ``value to be placed in MTU options sent by the router.''
  298. Zero indicates none.
  299. .TP
  300. .B reachtime
  301. sets the Reachable Time field in RAs sent by the router.
  302. ``Zero means unspecified (by this router).''
  303. .TP
  304. .B rxmitra
  305. sets the Retrans Timer field in RAs sent by the router.
  306. ``Zero means unspecified (by this router).''
  307. .TP
  308. .B ttl
  309. default value of the Cur Hop Limit field in RAs sent by the router.
  310. Should be set to the ``current diameter of the Internet.''
  311. ``Zero means unspecified (by this router).''
  312. .TP
  313. .B routerlt
  314. sets the Router Lifetime field of RAs sent from the interface, in ms.
  315. Zero means the router is not to be used as a default router.
  316. .PD
  317. .RE
  318. .PP
  319. Reading the interface's
  320. .I status
  321. file returns information about the interface, one line for each
  322. local address on that interface. The first line
  323. has 9 white-space-separated fields: device, mtu, local address,
  324. mask, remote or network address, packets in, packets out, input errors,
  325. output errors. Each subsequent line contains all but the device and mtu.
  326. See
  327. .I readipifc
  328. in
  329. .IR ip (2).
  330. .
  331. .SS "Routing
  332. The file
  333. .I iproute
  334. controls information about IP routing.
  335. When read, it returns one line per routing entry.
  336. Each line contains six white-space-separated fields:
  337. target address, target mask, address of next hop, flags,
  338. tag, and interface number.
  339. The entry used for routing an IP packet is the one with
  340. the longest mask for which destination address ANDed with
  341. target mask equals the target address.
  342. The one-character flags are:
  343. .TF m
  344. .TP
  345. .B 4
  346. IPv4 route
  347. .TP
  348. .B 6
  349. IPv6 route
  350. .TP
  351. .B i
  352. local interface
  353. .TP
  354. .B b
  355. broadcast address
  356. .TP
  357. .B u
  358. local unicast address
  359. .TP
  360. .B m
  361. multicast route
  362. .TP
  363. .B p
  364. point-to-point route
  365. .PD
  366. .PP
  367. The tag is an arbitrary, up to 4 character, string. It is normally used to
  368. indicate what routing protocol originated the route.
  369. .PP
  370. Writing to
  371. .B /net/iproute
  372. changes the route table. The messages are:
  373. .TF "\fLtag \fIstring\fR"
  374. .PD
  375. .TP
  376. .B flush
  377. Remove all routes.
  378. .TP
  379. .BI tag\ string
  380. Associate the tag,
  381. .IR string ,
  382. with all subsequent routes added via this file descriptor.
  383. .TP
  384. .BI add\ "target mask nexthop"
  385. Add the route to the table. If one already exists with the
  386. same target and mask, replace it.
  387. .TP
  388. .BI remove\ "target mask"
  389. Remove a route with a matching target and mask.
  390. .
  391. .SS "Address resolution
  392. The file
  393. .B /net/arp
  394. controls information about address resolution.
  395. The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
  396. information for Ethernet interfaces.
  397. When read, the file returns one line per address containing the
  398. type of medium, the status of the entry (OK, WAIT), the IP
  399. address, and the medium address.
  400. Writing to
  401. .B /net/arp
  402. administers the ARP information.
  403. The control messages are:
  404. .TF "\fLdel \fIIP-addr\fR"
  405. .PD
  406. .TP
  407. .B flush
  408. Remove all entries.
  409. .TP
  410. .BI add\ "type IP-addr Media-addr"
  411. Add an entry or replace an existing one for the
  412. same IP address.
  413. .TP
  414. .BI del\ "IP-addr"
  415. Delete an individual entry.
  416. .PP
  417. ARP entries do not time out. The ARP table is a
  418. cache with an LRU replacement policy. The IP stack
  419. listens for all ARP requests and, if the requester is in
  420. the table, the entry is updated.
  421. Also, whenever a new address is configured onto an
  422. Ethernet, an ARP request is sent to help
  423. update the table on other systems.
  424. .PP
  425. Currently, the only medium type is
  426. .BR ether .
  427. .br
  428. .ne 3
  429. .
  430. .SS "Debugging and stack information
  431. If any process is holding
  432. .B /net/log
  433. open, the IP stack queues debugging information to it.
  434. This is intended primarily for debugging the IP stack.
  435. The information provided is implementation-defined;
  436. see the source for details. Generally, what is returned is error messages
  437. about bad packets.
  438. .PP
  439. Writing to
  440. .B /net/log
  441. controls debugging. The control messages are:
  442. .TF "\fLclear \fIarglist\fR"
  443. .PD
  444. .TP
  445. .BI set\ arglist
  446. .I Arglist
  447. is a space-separated list of items for which to enable debugging.
  448. The possible items are:
  449. .BR ppp ,
  450. .BR ip ,
  451. .BR fs ,
  452. .BR tcp ,
  453. .BR icmp ,
  454. .BR udp ,
  455. .BR compress ,
  456. .BR gre ,
  457. .BR tcpwin ,
  458. .BR tcprxmt ,
  459. .BR udpmsg ,
  460. .BR ipmsg ,
  461. and
  462. .BR esp .
  463. .TP
  464. .BI clear\ arglist
  465. .I Arglist
  466. is a space-separated list of items for which to disable debugging.
  467. .TP
  468. .BI only\ addr
  469. If
  470. .I addr
  471. is non-zero, restrict debugging to only those
  472. packets whose source or destination is that
  473. address.
  474. .PP
  475. The file
  476. .B /net/ndb
  477. can be read or written by
  478. programs. It is normally used by
  479. .IR ipconfig (8)
  480. to leave configuration information for other programs
  481. such as
  482. .B dns
  483. and
  484. .B cs
  485. (see
  486. .IR ndb (8)).
  487. .B /net/ndb
  488. may contain up to 1024 bytes.
  489. .PP
  490. The file
  491. .B /net/ipselftab
  492. is a read-only file containing all the IP addresses
  493. considered local. Each line in the file contains
  494. three white-space-separated fields: IP address, usage count,
  495. and flags. The usage count is the number of interfaces to which
  496. the address applies. The flags are the same as for routing
  497. entries.
  498. Note that the `IPv4 route' flag will never be set.
  499. .br
  500. .ne 3
  501. .
  502. .SS "Protocol directories
  503. The
  504. .I ip
  505. device
  506. supports IP as well as several protocols that run over it:
  507. TCP, UDP, RUDP, ICMP, GRE, and ESP.
  508. TCP and UDP provide the standard Internet
  509. protocols for reliable stream and unreliable datagram
  510. communication.
  511. RUDP is a locally-developed reliable datagram protocol based on UDP.
  512. ICMP is IP's catch-all control protocol used to send
  513. low level error messages and to implement
  514. .IR ping (8).
  515. GRE is a general encapsulation protocol.
  516. ESP is the encapsulation protocol for IPsec.
  517. IL provided a reliable datagram service for communication
  518. between Plan 9 machines over IPv4, but is no longer part of the system.
  519. .PP
  520. Each protocol is a subdirectory of the IP stack.
  521. The top level directory of each protocol contains a
  522. .B clone
  523. file, a
  524. .B stats
  525. file, and subdirectories numbered from zero to the number of connections
  526. opened for this protocol.
  527. .PP
  528. Opening the
  529. .B clone
  530. file reserves a connection. The file descriptor returned from the
  531. .IR open (2)
  532. will point to the control file,
  533. .BR ctl ,
  534. of the newly allocated connection.
  535. Reading
  536. .B ctl
  537. returns a text
  538. string representing the number of the
  539. connection.
  540. Connections may be used either to listen for incoming calls
  541. or to initiate calls to other machines.
  542. .PP
  543. A connection is controlled by writing text strings to the associated
  544. .B ctl
  545. file.
  546. After a connection has been established data may be read from
  547. and written to
  548. .BR data .
  549. A connection can be actively established using the
  550. .B connect
  551. message (see also
  552. .IR dial (2)).
  553. A connection can be established passively by first
  554. using an
  555. .B announce
  556. message (see
  557. .IR dial (2))
  558. to bind to a local port and then
  559. opening the
  560. .B listen
  561. file (see
  562. .IR dial (2))
  563. to receive incoming calls.
  564. .PP
  565. The following control messages are supported:
  566. .TF "\fLremmulti \fIip\fR"
  567. .PD
  568. .TP
  569. .BI connect\ ip-address ! port "!r " local
  570. Establish a connection to the remote
  571. .I ip-address
  572. and
  573. .IR port .
  574. If
  575. .I local
  576. is specified, it is used as the local port number.
  577. If
  578. .I local
  579. is not specified but
  580. .B !r
  581. is, the system will allocate
  582. a restricted port number (less than 1024) for the connection to allow communication
  583. with Unix
  584. .B login
  585. and
  586. .B exec
  587. services.
  588. Otherwise a free port number starting at 5000 is chosen.
  589. The connect fails if the combination of local and remote address/port pairs
  590. are already assigned to another port.
  591. .TP
  592. .BI announce\ X
  593. .I X
  594. is a decimal port number or
  595. .LR * .
  596. Set the local port
  597. number to
  598. .I X
  599. and accept calls to
  600. .IR X .
  601. If
  602. .I X
  603. is
  604. .LR * ,
  605. accept
  606. calls for any port that no process has explicitly announced.
  607. The local IP address cannot be set.
  608. .B Announce
  609. fails if the connection is already announced or connected.
  610. .TP
  611. .BI bind\ X
  612. .I X
  613. is a decimal port number or
  614. .LR * .
  615. Set the local port number to
  616. .IR X .
  617. This exists to support emulation
  618. of BSD sockets by the APE libraries (see
  619. .IR pcc (1))
  620. and is not otherwise used.
  621. .\" this is gone
  622. .\" .TP
  623. .\" .BI backlog\ n
  624. .\" Set the maximum number of unanswered (queued) incoming
  625. .\" connections to an announced port to
  626. .\" .IR n .
  627. .\" By default
  628. .\" .I n
  629. .\" is set to five. If more than
  630. .\" .I n
  631. .\" connections are pending,
  632. .\" further requests for a service will be rejected.
  633. .TP
  634. .BI ttl\ n
  635. Set the time to live IP field in outgoing packets to
  636. .IR n .
  637. .TP
  638. .BI tos\ n
  639. Set the service type IP field in outgoing packets to
  640. .IR n .
  641. .TP
  642. .B ignoreadvice
  643. Don't break (UDP) connections because of ICMP errors.
  644. .TP
  645. .BI addmulti\ "ifc-ip [ mcast-ip ]"
  646. Treat
  647. .I ifc-ip
  648. on this multicast interface as a local address.
  649. If
  650. .I mcast-ip
  651. is present,
  652. use it as the interface's multicast address.
  653. .TP
  654. .BI remmulti\ ip
  655. Remove the address
  656. .I ip
  657. from this multicast interface.
  658. .PP
  659. Port numbers must be in the range 1 to 32767.
  660. .PP
  661. Several files report the status of a
  662. connection.
  663. The
  664. .B remote
  665. and
  666. .B local
  667. files contain the IP address and port number for the remote and local side of the
  668. connection. The
  669. .B status
  670. file contains protocol-dependent information to help debug network connections.
  671. On receiving and error or EOF reading or writing the
  672. .B data
  673. file, the
  674. .B err
  675. file contains the reason for error.
  676. .PP
  677. A process may accept incoming connections by
  678. .IR open (2)ing
  679. the
  680. .B listen
  681. file.
  682. The
  683. .B open
  684. will block until a new connection request arrives.
  685. Then
  686. .B open
  687. will return an open file descriptor which points to the control file of the
  688. newly accepted connection.
  689. This procedure will accept all calls for the
  690. given protocol.
  691. See
  692. .IR dial (2).
  693. .
  694. .SS TCP
  695. TCP connections are reliable point-to-point byte streams; there are no
  696. message delimiters.
  697. A connection is determined by the address and port numbers of the two
  698. ends.
  699. TCP
  700. .B ctl
  701. files support the following additional messages:
  702. .TF "\fLkeepalive\fI n\fR"
  703. .PD
  704. .TP
  705. .B hangup
  706. close down this TCP connection
  707. .TP
  708. .BI keepalive \ n
  709. turn on keep alive messages.
  710. .IR N ,
  711. if given, is the milliseconds between keepalives
  712. (default 30000).
  713. .TP
  714. .BI checksum \ n
  715. emit TCP checksums of zero if
  716. .I n
  717. is zero; otherwise, and by default,
  718. TCP checksums are computed and sent normally.
  719. .TP
  720. .BI tcpporthogdefense \ onoff
  721. .I onoff
  722. of
  723. .L on
  724. enables the TCP port-hog defense for all TCP connections;
  725. .I onoff
  726. of
  727. .L off
  728. disables it.
  729. The defense is a solution to hijacked systems staking out ports
  730. as a form of denial-of-service attack.
  731. To avoid stateless TCP conversation hogs,
  732. .I ip
  733. picks a TCP sequence number at random for keepalives.
  734. If that number gets acked by the other end,
  735. .I ip
  736. shuts down the connection.
  737. Some firewalls,
  738. notably ones that perform stateful inspection,
  739. discard such out-of-specification keepalives,
  740. so connections through such firewalls
  741. will be killed after five minutes
  742. by the lack of keepalives.
  743. .
  744. .SS UDP
  745. UDP connections carry unreliable and unordered datagrams. A read from
  746. .B data
  747. will return the next datagram, discarding anything
  748. that doesn't fit in the read buffer.
  749. A write is sent as a single datagram.
  750. .PP
  751. By default, a UDP connection is a point-to-point link.
  752. Either a
  753. .B connect
  754. establishes a local and remote address/port pair or
  755. after an
  756. .BR announce ,
  757. each datagram coming from a different remote address/port pair
  758. establishes a new incoming connection.
  759. However, many-to-one semantics is also possible.
  760. .PP
  761. If, after an
  762. .BR announce ,
  763. the message
  764. .L headers
  765. is written to
  766. .BR ctl ,
  767. then all messages sent to the announced port
  768. are received on the announced connection prefixed
  769. with the corresponding structure,
  770. declared in
  771. .BR <ip.h> :
  772. .IP
  773. .EX
  774. typedef struct Udphdr Udphdr;
  775. struct Udphdr
  776. {
  777. uchar raddr[16]; /* V6 remote address and port */
  778. uchar laddr[16]; /* V6 local address and port */
  779. uchar ifcaddr[16]; /* V6 interface address (receive only) */
  780. uchar rport[2]; /* remote port */
  781. uchar lport[2]; /* local port */
  782. };
  783. .EE
  784. .PP
  785. Before a write, a user must prefix a similar structure to each message.
  786. The system overrides the user specified local port with the announced
  787. one. If the user specifies an address that isn't a unicast address in
  788. .BR /net/ipselftab ,
  789. that too is overridden.
  790. Since the prefixed structure is the same in read and write, it is relatively
  791. easy to write a server that responds to client requests by just copying new
  792. data into the message body and then writing back the same buffer that was
  793. read.
  794. .PP
  795. In this case (writing
  796. .L headers
  797. to the
  798. .I ctl
  799. file),
  800. no
  801. .I listen
  802. nor
  803. .I accept
  804. is needed;
  805. otherwise,
  806. the usual sequence of
  807. .IR announce ,
  808. .IR listen ,
  809. .I accept
  810. must be executed before performing I/O on the corresponding
  811. .I data
  812. file.
  813. .
  814. .SS RUDP
  815. RUDP is a reliable datagram protocol based on UDP,
  816. currently only for IPv4.
  817. Packets are delivered in order.
  818. RUDP does not support
  819. .BR listen .
  820. One must write either
  821. .L connect
  822. or
  823. .L announce
  824. followed immediately by
  825. .L headers
  826. to
  827. .BR ctl .
  828. .PP
  829. Unlike TCP, the reboot of one end of a connection does
  830. not force a closing of the connection. Communications will
  831. resume when the rebooted machine resumes talking. Any unacknowledged
  832. packets queued before the reboot will be lost. A reboot can
  833. be detected by reading the
  834. .B err
  835. file. It will contain the message
  836. .IP
  837. .BI hangup\ address ! port
  838. .PP
  839. where
  840. .I address
  841. and
  842. .I port
  843. are of the far side of the connection.
  844. Retransmitting a datagram more than 10 times
  845. is treated like a reboot:
  846. all queued messages are dropped, an error is queued to the
  847. .B err
  848. file, and the conversation resumes.
  849. .PP
  850. RUDP
  851. .I ctl
  852. files accept the following messages:
  853. .TF "\fLranddrop \fI[ percent ]\fR"
  854. .TP
  855. .B headers
  856. Corresponds to the
  857. .L headers
  858. format of UDP.
  859. .TP
  860. .BI "hangup " "IP port"
  861. Drop the connection to address
  862. .I IP
  863. and
  864. .IR port .
  865. .TP
  866. .BI "randdrop " "[ percent ]"
  867. Randomly drop
  868. .I percent
  869. of outgoing packets.
  870. Default is 10%.
  871. .
  872. .SS ICMP
  873. ICMP is a datagram protocol for IPv4 used to exchange control requests and
  874. their responses with other machines' IP implementations.
  875. ICMP is primarily a kernel-to-kernel protocol, but it is possible
  876. to generate `echo request' and read `echo reply' packets from user programs.
  877. .
  878. .SS ICMPV6
  879. ICMPv6 is the IPv6 equivalent of ICMP.
  880. If, after an
  881. .BR announce ,
  882. the message
  883. .L headers
  884. is written to
  885. .BR ctl ,
  886. then before a write,
  887. a user must prefix each message with a corresponding structure,
  888. declared in
  889. .BR <ip.h> :
  890. .IP
  891. .EX
  892. /*
  893. * user level icmpv6 with control message "headers"
  894. */
  895. typedef struct Icmp6hdr Icmp6hdr;
  896. struct Icmp6hdr {
  897. uchar unused[8];
  898. uchar laddr[IPaddrlen]; /* local address */
  899. uchar raddr[IPaddrlen]; /* remote address */
  900. };
  901. .EE
  902. .PP
  903. In this case (writing
  904. .L headers
  905. to the
  906. .I ctl
  907. file),
  908. no
  909. .I listen
  910. nor
  911. .I accept
  912. is needed;
  913. otherwise,
  914. the usual sequence of
  915. .IR announce ,
  916. .IR listen ,
  917. .I accept
  918. must be executed before performing I/O on the corresponding
  919. .I data
  920. file.
  921. .
  922. .SS GRE
  923. GRE is the encapsulation protocol used by PPTP.
  924. The kernel implements just enough of the protocol
  925. to multiplex it.
  926. Our implementation encapsulates in IPv4, per RFC 1702.
  927. .B Announce
  928. is not allowed in GRE, only
  929. .BR connect .
  930. Since GRE has no port numbers, the port number in the connect
  931. is actually the 16 bit
  932. .B eproto
  933. field in the GRE header.
  934. .PP
  935. Reads and writes transfer a
  936. GRE datagram starting at the GRE header.
  937. On write, the kernel fills in the
  938. .B eproto
  939. field with the port number specified
  940. in the connect message.
  941. .br
  942. .ne 3
  943. .
  944. .SS ESP
  945. ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
  946. for IPsec (RFC 4301).
  947. We currently implement only tunnel mode, not transport mode.
  948. It is used to set up an encrypted tunnel between machines.
  949. Like GRE, ESP has no port numbers. Instead, the
  950. port number in the
  951. .B connect
  952. message is the SPI (Security Association Identifier (sic)).
  953. IP packets are written to and read from
  954. .BR data .
  955. The kernel encrypts any packets written to
  956. .BR data ,
  957. appends a MAC, and prefixes an ESP header before
  958. sending to the other end of the tunnel.
  959. Received packets are checked against their MAC's,
  960. decrypted, and queued for reading from
  961. .BR data .
  962. The control messages are:
  963. .TF "\fLesp \fIalg secret\fR"
  964. .PD
  965. .TP
  966. .BI esp\ "alg secret
  967. Encrypt with the algorithm,
  968. .IR alg ,
  969. using
  970. .I secret
  971. as the key.
  972. Possible algorithms are:
  973. .BR null ,
  974. .BR des_56_cbc ,
  975. .BR des3_cbc ,
  976. and eventually
  977. .BR aes_128_cbc ,
  978. and
  979. .BR aes_ctr .
  980. .TP
  981. .BI ah\ "alg secret
  982. Use the hash algorithm,
  983. .IR alg ,
  984. with
  985. .I secret
  986. as the key for generating the MAC.
  987. Possible algorithms are:
  988. .BR null ,
  989. .BR hmac_sha1_96 ,
  990. .BR hmac_md5_96 ,
  991. and eventually
  992. .BR aes_xcbc_mac_96 .
  993. .TP
  994. .B header
  995. Turn on header mode. Every buffer read from
  996. .B data
  997. starts with 4 unused bytes, and the first 4 bytes
  998. of every buffer written to
  999. .B data
  1000. are ignored.
  1001. .TP
  1002. .B noheader
  1003. Turn off header mode.
  1004. .
  1005. .SS "IP packet filter
  1006. The directory
  1007. .B /net/ipmux
  1008. looks like another protocol directory.
  1009. It is a packet filter built on top of IP.
  1010. Each numbered
  1011. subdirectory represents a different filter.
  1012. The connect messages written to the
  1013. .I ctl
  1014. file describe the filter. Packets matching the filter can be read on the
  1015. .B data
  1016. file. Packets written to the
  1017. .B data
  1018. file are routed to an interface and transmitted.
  1019. .PP
  1020. A filter is a semicolon-separated list of
  1021. relations. Each relation describes a portion
  1022. of a packet to match. The possible relations are:
  1023. .TF "\fLdata[\fIn\fL:\fIm\fL]=\fIexpr\fR "
  1024. .PD
  1025. .TP
  1026. .BI proto= n
  1027. the IP protocol number must be
  1028. .IR n .
  1029. .TP
  1030. .BI data[ n : m ]= expr
  1031. bytes
  1032. .I n
  1033. through
  1034. .I m
  1035. following the IP packet must match
  1036. .IR expr .
  1037. .TP
  1038. .BI iph[ n : m ]= expr
  1039. bytes
  1040. .I n
  1041. through
  1042. .I m
  1043. of the IP packet header must match
  1044. .IR expr .
  1045. .TP
  1046. .BI ifc= expr
  1047. the packet must have been received on an interface whose address
  1048. matches
  1049. .IR expr .
  1050. .TP
  1051. .BI src= expr
  1052. The source address in the packet must match
  1053. .IR expr .
  1054. .TP
  1055. .BI dst= expr
  1056. The destination address in the packet must match
  1057. .IR expr .
  1058. .PP
  1059. .I Expr
  1060. is of the form:
  1061. .TP
  1062. .I \ value
  1063. .TP
  1064. .IB \ value | value | ...
  1065. .TP
  1066. .IB \ value & mask
  1067. .TP
  1068. .IB \ value | value & mask
  1069. .PP
  1070. If a mask is given, the relevant field is first ANDed with
  1071. the mask. The result is compared against the value or list
  1072. of values for a match. In the case of
  1073. .BR ifc ,
  1074. .BR dst ,
  1075. and
  1076. .B src
  1077. the value is a dot-formatted IP address and the mask is a dot-formatted
  1078. IP mask. In the case of
  1079. .BR data ,
  1080. .B iph
  1081. and
  1082. .BR proto ,
  1083. both value and mask are strings of 2 hexadecimal digits representing
  1084. 8-bit values.
  1085. .PP
  1086. A packet is delivered to only one filter.
  1087. The filters are merged into a single comparison tree.
  1088. If two filters match the same packet, the following
  1089. rules apply in order (here '>' means is preferred to):
  1090. .IP 1)
  1091. protocol > data > source > destination > interface
  1092. .IP 2)
  1093. lower data offsets > higher data offsets
  1094. .IP 3)
  1095. longer matches > shorter matches
  1096. .IP 4)
  1097. older > younger
  1098. .PP
  1099. So far this has just been used to implement a version of
  1100. OSPF in Inferno
  1101. and 6to4 tunnelling.
  1102. .br
  1103. .ne 5
  1104. .
  1105. .SS Statistics
  1106. The
  1107. .B stats
  1108. files are read only and contain statistics useful to network monitoring.
  1109. .br
  1110. .ne 12
  1111. .PP
  1112. Reading
  1113. .B /net/ipifc/stats
  1114. returns a list of 19 tagged and newline-separated fields representing:
  1115. .EX
  1116. .ft 1
  1117. .2C
  1118. .in +0.25i
  1119. forwarding status (0 and 2 mean forwarding off,
  1120. 1 means on)
  1121. default TTL
  1122. input packets
  1123. input header errors
  1124. input address errors
  1125. packets forwarded
  1126. input packets for unknown protocols
  1127. input packets discarded
  1128. input packets delivered to higher level protocols
  1129. output packets
  1130. output packets discarded
  1131. output packets with no route
  1132. timed out fragments in reassembly queue
  1133. requested reassemblies
  1134. successful reassemblies
  1135. failed reassemblies
  1136. successful fragmentations
  1137. unsuccessful fragmentations
  1138. fragments created
  1139. .in -0.25i
  1140. .1C
  1141. .ft
  1142. .EE
  1143. .br
  1144. .ne 16
  1145. .PP
  1146. Reading
  1147. .B /net/icmp/stats
  1148. returns a list of 26 tagged and newline-separated fields representing:
  1149. .EX
  1150. .ft 1
  1151. .2C
  1152. .in +0.25i
  1153. messages received
  1154. bad received messages
  1155. unreachables received
  1156. time exceededs received
  1157. input parameter problems received
  1158. source quenches received
  1159. redirects received
  1160. echo requests received
  1161. echo replies received
  1162. timestamps received
  1163. timestamp replies received
  1164. address mask requests received
  1165. address mask replies received
  1166. messages sent
  1167. transmission errors
  1168. unreachables sent
  1169. time exceededs sent
  1170. input parameter problems sent
  1171. source quenches sent
  1172. redirects sent
  1173. echo requests sent
  1174. echo replies sent
  1175. timestamps sent
  1176. timestamp replies sent
  1177. address mask requests sent
  1178. address mask replies sent
  1179. .in -0.25i
  1180. .1C
  1181. .EE
  1182. .PP
  1183. Reading
  1184. .B /net/tcp/stats
  1185. returns a list of 11 tagged and newline-separated fields representing:
  1186. .EX
  1187. .ft 1
  1188. .2C
  1189. .in +0.25i
  1190. maximum number of connections
  1191. total outgoing calls
  1192. total incoming calls
  1193. number of established connections to be reset
  1194. number of currently established connections
  1195. segments received
  1196. segments sent
  1197. segments retransmitted
  1198. retransmit timeouts
  1199. bad received segments
  1200. transmission failures
  1201. .in -0.25i
  1202. .1C
  1203. .EE
  1204. .PP
  1205. Reading
  1206. .B /net/udp/stats
  1207. returns a list of 4 tagged and newline-separated fields representing:
  1208. .EX
  1209. .ft 1
  1210. .2C
  1211. .in +0.25i
  1212. datagrams received
  1213. datagrams received for bad ports
  1214. malformed datagrams received
  1215. datagrams sent
  1216. .in -0.25i
  1217. .1C
  1218. .EE
  1219. .PP
  1220. Reading
  1221. .B /net/gre/stats
  1222. returns a list of 1 tagged number representing:
  1223. .EX
  1224. .ft 1
  1225. .in +0.25i
  1226. header length errors
  1227. .in -0.25i
  1228. .EE
  1229. .SH "SEE ALSO"
  1230. .IR dial (2),
  1231. .IR ip (2),
  1232. .IR bridge (3),
  1233. .\" .IR ike (4),
  1234. .IR ndb (6),
  1235. .IR listen (8)
  1236. .br
  1237. .PD 0
  1238. .TF "\fL/lib/rfc/rfc2822"
  1239. .TP
  1240. .B /lib/rfc/rfc2460
  1241. IPv6
  1242. .TP
  1243. .B /lib/rfc/rfc4291
  1244. IPv6 address architecture
  1245. .TP
  1246. .B /lib/rfc/rfc4443
  1247. ICMPv6
  1248. .SH SOURCE
  1249. .B /sys/src/9/ip
  1250. .SH BUGS
  1251. .I Ipmux
  1252. has not been heavily used and should be considered experimental.
  1253. It may disappear in favor of a more traditional packet filter in the future.