tcp.c 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025
  1. #include "u.h"
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. #include "ip.h"
  8. enum
  9. {
  10. QMAX = 64*1024-1,
  11. IP_TCPPROTO = 6,
  12. TCP4_IPLEN = 8,
  13. TCP4_PHDRSIZE = 12,
  14. TCP4_HDRSIZE = 20,
  15. TCP4_TCBPHDRSZ = 40,
  16. TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE,
  17. TCP6_IPLEN = 0,
  18. TCP6_PHDRSIZE = 40,
  19. TCP6_HDRSIZE = 20,
  20. TCP6_TCBPHDRSZ = 60,
  21. TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE,
  22. TcptimerOFF = 0,
  23. TcptimerON = 1,
  24. TcptimerDONE = 2,
  25. MAX_TIME = (1<<20), /* Forever */
  26. TCP_ACK = 50, /* Timed ack sequence in ms */
  27. URG = 0x20, /* Data marked urgent */
  28. ACK = 0x10, /* Acknowledge is valid */
  29. PSH = 0x08, /* Whole data pipe is pushed */
  30. RST = 0x04, /* Reset connection */
  31. SYN = 0x02, /* Pkt. is synchronise */
  32. FIN = 0x01, /* Start close down */
  33. EOLOPT = 0,
  34. NOOPOPT = 1,
  35. MAXBACKMS = 30000, /* longest backoff time (ms) before hangup */
  36. MSSOPT = 2,
  37. MSS_LENGTH = 4, /* Mean segment size */
  38. MSL2 = 10,
  39. MSPTICK = 50, /* Milliseconds per timer tick */
  40. DEF_MSS = 1460, /* Default mean segment */
  41. DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
  42. DEF_RTT = 500, /* Default round trip */
  43. DEF_KAT = 30000, /* Default time ms) between keep alives */
  44. TCP_LISTEN = 0, /* Listen connection */
  45. TCP_CONNECT = 1, /* Outgoing connection */
  46. SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */
  47. TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */
  48. FORCE = 1,
  49. CLONE = 2,
  50. RETRAN = 4,
  51. ACTIVE = 8,
  52. SYNACK = 16,
  53. LOGAGAIN = 3,
  54. LOGDGAIN = 2,
  55. Closed = 0, /* Connection states */
  56. Listen,
  57. Syn_sent,
  58. Syn_received,
  59. Established,
  60. Finwait1,
  61. Finwait2,
  62. Close_wait,
  63. Closing,
  64. Last_ack,
  65. Time_wait,
  66. Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */
  67. NLHT = 256, /* hash table size, must be a power of 2 */
  68. LHTMASK = NLHT-1
  69. };
  70. /* Must correspond to the enumeration above */
  71. char *tcpstates[] =
  72. {
  73. "Closed", "Listen", "Syn_sent", "Syn_received",
  74. "Established", "Finwait1", "Finwait2", "Close_wait",
  75. "Closing", "Last_ack", "Time_wait"
  76. };
  77. typedef struct Tcptimer Tcptimer;
  78. struct Tcptimer
  79. {
  80. Tcptimer *next;
  81. Tcptimer *prev;
  82. Tcptimer *readynext;
  83. int state;
  84. int start;
  85. int count;
  86. void (*func)(void*);
  87. void *arg;
  88. };
  89. /*
  90. * v4 and v6 pseudo headers used for
  91. * checksuming tcp
  92. */
  93. typedef struct Tcp4hdr Tcp4hdr;
  94. struct Tcp4hdr
  95. {
  96. uchar vihl; /* Version and header length */
  97. uchar tos; /* Type of service */
  98. uchar length[2]; /* packet length */
  99. uchar id[2]; /* Identification */
  100. uchar frag[2]; /* Fragment information */
  101. uchar Unused;
  102. uchar proto;
  103. uchar tcplen[2];
  104. uchar tcpsrc[4];
  105. uchar tcpdst[4];
  106. uchar tcpsport[2];
  107. uchar tcpdport[2];
  108. uchar tcpseq[4];
  109. uchar tcpack[4];
  110. uchar tcpflag[2];
  111. uchar tcpwin[2];
  112. uchar tcpcksum[2];
  113. uchar tcpurg[2];
  114. /* Options segment */
  115. uchar tcpopt[2];
  116. uchar tcpmss[2];
  117. };
  118. typedef struct Tcp6hdr Tcp6hdr;
  119. struct Tcp6hdr
  120. {
  121. uchar vcf[4];
  122. uchar ploadlen[2];
  123. uchar proto;
  124. uchar ttl;
  125. uchar tcpsrc[IPaddrlen];
  126. uchar tcpdst[IPaddrlen];
  127. uchar tcpsport[2];
  128. uchar tcpdport[2];
  129. uchar tcpseq[4];
  130. uchar tcpack[4];
  131. uchar tcpflag[2];
  132. uchar tcpwin[2];
  133. uchar tcpcksum[2];
  134. uchar tcpurg[2];
  135. /* Options segment */
  136. uchar tcpopt[2];
  137. uchar tcpmss[2];
  138. };
  139. /*
  140. * this represents the control info
  141. * for a single packet. It is derived from
  142. * a packet in ntohtcp{4,6}() and stuck into
  143. * a packet in htontcp{4,6}().
  144. */
  145. typedef struct Tcp Tcp;
  146. struct Tcp
  147. {
  148. ushort source;
  149. ushort dest;
  150. ulong seq;
  151. ulong ack;
  152. uchar flags;
  153. ushort wnd;
  154. ushort urg;
  155. ushort mss;
  156. ushort len; /* size of data */
  157. };
  158. /*
  159. * this header is malloc'd to thread together fragments
  160. * waiting to be coalesced
  161. */
  162. typedef struct Reseq Reseq;
  163. struct Reseq
  164. {
  165. Reseq *next;
  166. Tcp seg;
  167. Block *bp;
  168. ushort length;
  169. };
  170. /*
  171. * the qlock in the Conv locks this structure
  172. */
  173. typedef struct Tcpctl Tcpctl;
  174. struct Tcpctl
  175. {
  176. uchar state; /* Connection state */
  177. uchar type; /* Listening or active connection */
  178. uchar code; /* Icmp code */
  179. struct {
  180. ulong una; /* Unacked data pointer */
  181. ulong nxt; /* Next sequence expected */
  182. ulong ptr; /* Data pointer */
  183. ushort wnd; /* Tcp send window */
  184. ulong urg; /* Urgent data pointer */
  185. ulong wl2;
  186. /* to implement tahoe and reno TCP */
  187. ulong dupacks; /* number of duplicate acks rcvd */
  188. int recovery; /* loss recovery flag */
  189. ulong rxt; /* right window marker for recovery */
  190. } snd;
  191. struct {
  192. ulong nxt; /* Receive pointer to next uchar slot */
  193. ushort wnd; /* Receive window incoming */
  194. ulong urg; /* Urgent pointer */
  195. ulong lastacked; /* Last ack sent */
  196. int blocked;
  197. int una; /* unacked data segs */
  198. } rcv;
  199. ulong iss; /* Initial sequence number */
  200. ushort cwind; /* Congestion window */
  201. ushort ssthresh; /* Slow start threshold */
  202. int resent; /* Bytes just resent */
  203. int irs; /* Initial received squence */
  204. ushort mss; /* Mean segment size */
  205. int rerecv; /* Overlap of data rerecevived */
  206. ushort window; /* Recevive window */
  207. uchar backoff; /* Exponential backoff counter */
  208. int backedoff; /* ms we've backed off for rexmits */
  209. uchar flags; /* State flags */
  210. Reseq *reseq; /* Resequencing queue */
  211. Tcptimer timer; /* Activity timer */
  212. Tcptimer acktimer; /* Acknowledge timer */
  213. Tcptimer rtt_timer; /* Round trip timer */
  214. Tcptimer katimer; /* keep alive timer */
  215. ulong rttseq; /* Round trip sequence */
  216. int srtt; /* Shortened round trip */
  217. int mdev; /* Mean deviation of round trip */
  218. int kacounter; /* count down for keep alive */
  219. uint sndsyntime; /* time syn sent */
  220. ulong time; /* time Finwait2 or Syn_received was sent */
  221. int nochecksum; /* non-zero means don't send checksums */
  222. int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
  223. union {
  224. Tcp4hdr tcp4hdr;
  225. Tcp6hdr tcp6hdr;
  226. } protohdr; /* prototype header */
  227. };
  228. /*
  229. * New calls are put in limbo rather than having a conversation structure
  230. * allocated. Thus, a SYN attack results in lots of limbo'd calls but not
  231. * any real Conv structures mucking things up. Calls in limbo rexmit their
  232. * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
  233. *
  234. * In particular they aren't on a listener's queue so that they don't figure
  235. * in the input queue limit.
  236. *
  237. * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
  238. * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore
  239. * there is no hashing of this list.
  240. */
  241. typedef struct Limbo Limbo;
  242. struct Limbo
  243. {
  244. Limbo *next;
  245. uchar laddr[IPaddrlen];
  246. uchar raddr[IPaddrlen];
  247. ushort lport;
  248. ushort rport;
  249. ulong irs; /* initial received sequence */
  250. ulong iss; /* initial sent sequence */
  251. ushort mss; /* mss from the other end */
  252. ulong lastsend; /* last time we sent a synack */
  253. uchar version; /* v4 or v6 */
  254. uchar rexmits; /* number of retransmissions */
  255. };
  256. int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
  257. ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
  258. enum {
  259. /* MIB stats */
  260. MaxConn,
  261. ActiveOpens,
  262. PassiveOpens,
  263. EstabResets,
  264. CurrEstab,
  265. InSegs,
  266. OutSegs,
  267. RetransSegs,
  268. RetransTimeouts,
  269. InErrs,
  270. OutRsts,
  271. /* non-MIB stats */
  272. CsumErrs,
  273. HlenErrs,
  274. LenErrs,
  275. OutOfOrder,
  276. Nstats
  277. };
  278. static char *statnames[] =
  279. {
  280. [MaxConn] "MaxConn",
  281. [ActiveOpens] "ActiveOpens",
  282. [PassiveOpens] "PassiveOpens",
  283. [EstabResets] "EstabResets",
  284. [CurrEstab] "CurrEstab",
  285. [InSegs] "InSegs",
  286. [OutSegs] "OutSegs",
  287. [RetransSegs] "RetransSegs",
  288. [RetransTimeouts] "RetransTimeouts",
  289. [InErrs] "InErrs",
  290. [OutRsts] "OutRsts",
  291. [CsumErrs] "CsumErrs",
  292. [HlenErrs] "HlenErrs",
  293. [LenErrs] "LenErrs",
  294. [OutOfOrder] "OutOfOrder",
  295. };
  296. typedef struct Tcppriv Tcppriv;
  297. struct Tcppriv
  298. {
  299. /* List of active timers */
  300. QLock tl;
  301. Tcptimer *timers;
  302. Rendez tcpr; /* used by tcpackproc */
  303. /* hash table for matching conversations */
  304. Ipht ht;
  305. /* calls in limbo waiting for an ACK to our SYN ACK */
  306. int nlimbo;
  307. Limbo *lht[NLHT];
  308. /* for keeping track of tcpackproc */
  309. QLock apl;
  310. int ackprocstarted;
  311. ulong stats[Nstats];
  312. };
  313. /*
  314. * Setting tcpporthogdefense to non-zero enables Dong Lin's
  315. * solution to hijacked systems staking out port's as a form
  316. * of DoS attack.
  317. *
  318. * To avoid stateless Conv hogs, we pick a sequence number at random. If
  319. * it that number gets acked by the other end, we shut down the connection.
  320. * Look for tcpporthogedefense in the code.
  321. */
  322. int tcpporthogdefense = 0;
  323. int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
  324. void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
  325. void localclose(Conv*, char*);
  326. void procsyn(Conv*, Tcp*);
  327. void tcpiput(Proto*, Ipifc*, Block*);
  328. void tcpoutput(Conv*);
  329. int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
  330. void tcpstart(Conv*, int, ushort);
  331. void tcptimeout(void*);
  332. void tcpsndsyn(Tcpctl*);
  333. void tcprcvwin(Conv*);
  334. void tcpacktimer(void*);
  335. void tcpkeepalive(void*);
  336. void tcpsetkacounter(Tcpctl*);
  337. void tcprxmit(Conv*);
  338. void tcpsettimer(Tcpctl*);
  339. void tcpsynackrtt(Conv*);
  340. static void limborexmit(Proto*);
  341. static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
  342. void
  343. tcpsetstate(Conv *s, uchar newstate)
  344. {
  345. Tcpctl *tcb;
  346. uchar oldstate;
  347. Tcppriv *tpriv;
  348. tpriv = s->p->priv;
  349. tcb = (Tcpctl*)s->ptcl;
  350. oldstate = tcb->state;
  351. if(oldstate == newstate)
  352. return;
  353. if(oldstate == Established)
  354. tpriv->stats[CurrEstab]--;
  355. if(newstate == Established)
  356. tpriv->stats[CurrEstab]++;
  357. /**
  358. print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
  359. tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
  360. **/
  361. switch(newstate) {
  362. case Closed:
  363. qclose(s->rq);
  364. qclose(s->wq);
  365. qclose(s->eq);
  366. break;
  367. case Close_wait: /* Remote closes */
  368. qhangup(s->rq, nil);
  369. break;
  370. }
  371. tcb->state = newstate;
  372. if(oldstate == Syn_sent && newstate != Closed)
  373. Fsconnected(s, nil);
  374. }
  375. static char*
  376. tcpconnect(Conv *c, char **argv, int argc)
  377. {
  378. char *e;
  379. e = Fsstdconnect(c, argv, argc);
  380. if(e != nil)
  381. return e;
  382. tcpstart(c, TCP_CONNECT, QMAX);
  383. return nil;
  384. }
  385. static int
  386. tcpstate(Conv *c, char *state, int n)
  387. {
  388. Tcpctl *s;
  389. s = (Tcpctl*)(c->ptcl);
  390. return snprint(state, n,
  391. "%s srtt %d mdev %d cwin %d swin %d rwin %d timer.start %d timer.count %d rerecv %d\n",
  392. tcpstates[s->state], s->srtt, s->mdev,
  393. s->cwind, s->snd.wnd, s->rcv.wnd,
  394. s->timer.start, s->timer.count, s->rerecv);
  395. }
  396. static int
  397. tcpinuse(Conv *c)
  398. {
  399. Tcpctl *s;
  400. s = (Tcpctl*)(c->ptcl);
  401. return s->state != Closed;
  402. }
  403. static char*
  404. tcpannounce(Conv *c, char **argv, int argc)
  405. {
  406. char *e;
  407. e = Fsstdannounce(c, argv, argc);
  408. if(e != nil)
  409. return e;
  410. tcpstart(c, TCP_LISTEN, QMAX);
  411. Fsconnected(c, nil);
  412. return nil;
  413. }
  414. /*
  415. * tcpclose is always called with the q locked
  416. */
  417. static void
  418. tcpclose(Conv *c)
  419. {
  420. Tcpctl *tcb;
  421. tcb = (Tcpctl*)c->ptcl;
  422. qhangup(c->rq, nil);
  423. qhangup(c->wq, nil);
  424. qhangup(c->eq, nil);
  425. qflush(c->rq);
  426. switch(tcb->state) {
  427. case Listen:
  428. /*
  429. * reset any incoming calls to this listener
  430. */
  431. Fsconnected(c, "Hangup");
  432. localclose(c, nil);
  433. break;
  434. case Closed:
  435. case Syn_sent:
  436. localclose(c, nil);
  437. break;
  438. case Syn_received:
  439. case Established:
  440. tcb->flgcnt++;
  441. tcb->snd.nxt++;
  442. tcpsetstate(c, Finwait1);
  443. tcpoutput(c);
  444. break;
  445. case Close_wait:
  446. tcb->flgcnt++;
  447. tcb->snd.nxt++;
  448. tcpsetstate(c, Last_ack);
  449. tcpoutput(c);
  450. break;
  451. }
  452. }
  453. void
  454. tcpkick(void *x)
  455. {
  456. Conv *s = x;
  457. Tcpctl *tcb;
  458. tcb = (Tcpctl*)s->ptcl;
  459. if(waserror()){
  460. qunlock(s);
  461. nexterror();
  462. }
  463. qlock(s);
  464. switch(tcb->state) {
  465. case Syn_sent:
  466. case Syn_received:
  467. case Established:
  468. case Close_wait:
  469. /*
  470. * Push data
  471. */
  472. tcprcvwin(s);
  473. tcpoutput(s);
  474. break;
  475. default:
  476. localclose(s, "Hangup");
  477. break;
  478. }
  479. qunlock(s);
  480. poperror();
  481. }
  482. void
  483. tcprcvwin(Conv *s) /* Call with tcb locked */
  484. {
  485. int w;
  486. Tcpctl *tcb;
  487. tcb = (Tcpctl*)s->ptcl;
  488. w = QMAX - qlen(s->rq);
  489. if(w < 0)
  490. w = 0;
  491. tcb->rcv.wnd = w;
  492. if(w == 0)
  493. tcb->rcv.blocked = 1;
  494. }
  495. void
  496. tcpacktimer(void *v)
  497. {
  498. Tcpctl *tcb;
  499. Conv *s;
  500. s = v;
  501. tcb = (Tcpctl*)s->ptcl;
  502. if(waserror()){
  503. qunlock(s);
  504. nexterror();
  505. }
  506. qlock(s);
  507. if(tcb->state != Closed){
  508. tcb->flags |= FORCE;
  509. tcprcvwin(s);
  510. tcpoutput(s);
  511. }
  512. qunlock(s);
  513. poperror();
  514. }
  515. static void
  516. tcpcreate(Conv *c)
  517. {
  518. c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
  519. c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
  520. }
  521. static void
  522. timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
  523. {
  524. if(newstate != TcptimerON){
  525. if(t->state == TcptimerON){
  526. // unchain
  527. if(priv->timers == t){
  528. priv->timers = t->next;
  529. if(t->prev != nil)
  530. panic("timerstate1");
  531. }
  532. if(t->next)
  533. t->next->prev = t->prev;
  534. if(t->prev)
  535. t->prev->next = t->next;
  536. t->next = t->prev = nil;
  537. }
  538. } else {
  539. if(t->state != TcptimerON){
  540. // chain
  541. if(t->prev != nil || t->next != nil)
  542. panic("timerstate2");
  543. t->prev = nil;
  544. t->next = priv->timers;
  545. if(t->next)
  546. t->next->prev = t;
  547. priv->timers = t;
  548. }
  549. }
  550. t->state = newstate;
  551. }
  552. void
  553. tcpackproc(void *a)
  554. {
  555. Tcptimer *t, *tp, *timeo;
  556. Proto *tcp;
  557. Tcppriv *priv;
  558. int loop;
  559. tcp = a;
  560. priv = tcp->priv;
  561. for(;;) {
  562. tsleep(&priv->tcpr, return0, 0, MSPTICK);
  563. qlock(&priv->tl);
  564. timeo = nil;
  565. loop = 0;
  566. for(t = priv->timers; t != nil; t = tp) {
  567. if(loop++ > 10000)
  568. panic("tcpackproc1");
  569. tp = t->next;
  570. if(t->state == TcptimerON) {
  571. t->count--;
  572. if(t->count == 0) {
  573. timerstate(priv, t, TcptimerDONE);
  574. t->readynext = timeo;
  575. timeo = t;
  576. }
  577. }
  578. }
  579. qunlock(&priv->tl);
  580. loop = 0;
  581. for(t = timeo; t != nil; t = t->readynext) {
  582. if(loop++ > 10000)
  583. panic("tcpackproc2");
  584. if(t->state == TcptimerDONE && t->func != nil && !waserror()){
  585. (*t->func)(t->arg);
  586. poperror();
  587. }
  588. }
  589. limborexmit(tcp);
  590. }
  591. }
  592. void
  593. tcpgo(Tcppriv *priv, Tcptimer *t)
  594. {
  595. if(t == nil || t->start == 0)
  596. return;
  597. qlock(&priv->tl);
  598. t->count = t->start;
  599. timerstate(priv, t, TcptimerON);
  600. qunlock(&priv->tl);
  601. }
  602. void
  603. tcphalt(Tcppriv *priv, Tcptimer *t)
  604. {
  605. if(t == nil)
  606. return;
  607. qlock(&priv->tl);
  608. timerstate(priv, t, TcptimerOFF);
  609. qunlock(&priv->tl);
  610. }
  611. int
  612. backoff(int n)
  613. {
  614. if(n < 5)
  615. return 1 << n;
  616. return 64;
  617. }
  618. void
  619. localclose(Conv *s, char *reason) /* called with tcb locked */
  620. {
  621. Tcpctl *tcb;
  622. Reseq *rp,*rp1;
  623. Tcppriv *tpriv;
  624. tpriv = s->p->priv;
  625. tcb = (Tcpctl*)s->ptcl;
  626. iphtrem(&tpriv->ht, s);
  627. tcphalt(tpriv, &tcb->timer);
  628. tcphalt(tpriv, &tcb->rtt_timer);
  629. tcphalt(tpriv, &tcb->acktimer);
  630. tcphalt(tpriv, &tcb->katimer);
  631. /* Flush reassembly queue; nothing more can arrive */
  632. for(rp = tcb->reseq; rp != nil; rp = rp1) {
  633. rp1 = rp->next;
  634. freeblist(rp->bp);
  635. free(rp);
  636. }
  637. tcb->reseq = nil;
  638. if(tcb->state == Syn_sent)
  639. Fsconnected(s, reason);
  640. if(s->state == Announced)
  641. wakeup(&s->listenr);
  642. qhangup(s->rq, reason);
  643. qhangup(s->wq, reason);
  644. tcpsetstate(s, Closed);
  645. }
  646. /* mtu (- TCP + IP hdr len) of 1st hop */
  647. int
  648. tcpmtu(Proto *tcp, uchar *addr, int version)
  649. {
  650. Ipifc *ifc;
  651. int mtu;
  652. ifc = findipifc(tcp->f, addr, 0);
  653. switch(version){
  654. default:
  655. case V4:
  656. mtu = DEF_MSS;
  657. if(ifc != nil)
  658. mtu = ifc->maxmtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
  659. break;
  660. case V6:
  661. mtu = DEF_MSS6;
  662. if(ifc != nil)
  663. mtu = ifc->maxmtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
  664. break;
  665. }
  666. return mtu;
  667. }
  668. void
  669. inittcpctl(Conv *s, int mode)
  670. {
  671. Tcpctl *tcb;
  672. Tcp4hdr* h4;
  673. Tcp6hdr* h6;
  674. tcb = (Tcpctl*)s->ptcl;
  675. memset(tcb, 0, sizeof(Tcpctl));
  676. tcb->ssthresh = 65535;
  677. tcb->srtt = tcp_irtt<<LOGAGAIN;
  678. tcb->mdev = 0;
  679. /* setup timers */
  680. tcb->timer.start = tcp_irtt / MSPTICK;
  681. tcb->timer.func = tcptimeout;
  682. tcb->timer.arg = s;
  683. tcb->rtt_timer.start = MAX_TIME;
  684. tcb->acktimer.start = TCP_ACK / MSPTICK;
  685. tcb->acktimer.func = tcpacktimer;
  686. tcb->acktimer.arg = s;
  687. tcb->katimer.start = DEF_KAT / MSPTICK;
  688. tcb->katimer.func = tcpkeepalive;
  689. tcb->katimer.arg = s;
  690. /* create a prototype(pseudo) header */
  691. if(mode != TCP_LISTEN){
  692. if(ipcmp(s->laddr, IPnoaddr) == 0)
  693. findlocalip(s->p->f, s->laddr, s->raddr);
  694. switch(s->ipversion){
  695. case V4:
  696. h4 = &tcb->protohdr.tcp4hdr;
  697. memset(h4, 0, sizeof(*h4));
  698. h4->proto = IP_TCPPROTO;
  699. hnputs(h4->tcpsport, s->lport);
  700. hnputs(h4->tcpdport, s->rport);
  701. v6tov4(h4->tcpsrc, s->laddr);
  702. v6tov4(h4->tcpdst, s->raddr);
  703. break;
  704. case V6:
  705. h6 = &tcb->protohdr.tcp6hdr;
  706. memset(h6, 0, sizeof(*h6));
  707. h6->proto = IP_TCPPROTO;
  708. hnputs(h6->tcpsport, s->lport);
  709. hnputs(h6->tcpdport, s->rport);
  710. ipmove(h6->tcpsrc, s->laddr);
  711. ipmove(h6->tcpdst, s->raddr);
  712. break;
  713. default:
  714. panic("inittcpctl: version %d", s->ipversion);
  715. }
  716. }
  717. tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion);
  718. }
  719. /*
  720. * called with s qlocked
  721. */
  722. void
  723. tcpstart(Conv *s, int mode, ushort window)
  724. {
  725. Tcpctl *tcb;
  726. Tcppriv *tpriv;
  727. char kpname[KNAMELEN];
  728. tpriv = s->p->priv;
  729. if(tpriv->ackprocstarted == 0){
  730. qlock(&tpriv->apl);
  731. if(tpriv->ackprocstarted == 0){
  732. sprint(kpname, "#I%dtcpack", s->p->f->dev);
  733. kproc(kpname, tcpackproc, s->p);
  734. tpriv->ackprocstarted = 1;
  735. }
  736. qunlock(&tpriv->apl);
  737. }
  738. tcb = (Tcpctl*)s->ptcl;
  739. /* Send SYN, go into SYN_SENT state */
  740. inittcpctl(s, mode);
  741. tcb->window = window;
  742. tcb->rcv.wnd = window;
  743. iphtadd(&tpriv->ht, s);
  744. switch(mode) {
  745. case TCP_LISTEN:
  746. tpriv->stats[PassiveOpens]++;
  747. tcb->flags |= CLONE;
  748. tcpsetstate(s, Listen);
  749. break;
  750. case TCP_CONNECT:
  751. tpriv->stats[ActiveOpens]++;
  752. tcb->flags |= ACTIVE;
  753. tcpsndsyn(tcb);
  754. tcpsetstate(s, Syn_sent);
  755. tcpoutput(s);
  756. break;
  757. }
  758. }
  759. static char*
  760. tcpflag(ushort flag)
  761. {
  762. static char buf[128];
  763. sprint(buf, "%d", flag>>10); /* Head len */
  764. if(flag & URG)
  765. strcat(buf, " URG");
  766. if(flag & ACK)
  767. strcat(buf, " ACK");
  768. if(flag & PSH)
  769. strcat(buf, " PSH");
  770. if(flag & RST)
  771. strcat(buf, " RST");
  772. if(flag & SYN)
  773. strcat(buf, " SYN");
  774. if(flag & FIN)
  775. strcat(buf, " FIN");
  776. return buf;
  777. }
  778. Block *
  779. htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
  780. {
  781. int dlen;
  782. Tcp6hdr *h;
  783. ushort csum;
  784. ushort hdrlen;
  785. hdrlen = TCP6_HDRSIZE;
  786. if(tcph->mss)
  787. hdrlen += MSS_LENGTH;
  788. if(data) {
  789. dlen = blocklen(data);
  790. data = padblock(data, hdrlen + TCP6_PKT);
  791. if(data == nil)
  792. return nil;
  793. }
  794. else {
  795. dlen = 0;
  796. data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
  797. if(data == nil)
  798. return nil;
  799. data->wp += hdrlen + TCP6_PKT;
  800. }
  801. /* copy in pseudo ip header plus port numbers */
  802. h = (Tcp6hdr *)(data->rp);
  803. memmove(h, ph, TCP6_TCBPHDRSZ);
  804. /* compose pseudo tcp header, do cksum calculation */
  805. hnputl(h->vcf, hdrlen + dlen);
  806. h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
  807. h->ttl = ph->proto;
  808. /* copy in variable bits */
  809. hnputl(h->tcpseq, tcph->seq);
  810. hnputl(h->tcpack, tcph->ack);
  811. hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
  812. hnputs(h->tcpwin, tcph->wnd);
  813. hnputs(h->tcpurg, tcph->urg);
  814. if(tcph->mss != 0){
  815. h->tcpopt[0] = MSSOPT;
  816. h->tcpopt[1] = MSS_LENGTH;
  817. hnputs(h->tcpmss, tcph->mss);
  818. }
  819. if(tcb != nil && tcb->nochecksum){
  820. h->tcpcksum[0] = h->tcpcksum[1] = 0;
  821. } else {
  822. csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
  823. hnputs(h->tcpcksum, csum);
  824. }
  825. /* move from pseudo header back to normal ip header */
  826. memset(h->vcf, 0, 4);
  827. h->vcf[0] = IP_VER6;
  828. hnputs(h->ploadlen, hdrlen+dlen);
  829. h->proto = ph->proto;
  830. return data;
  831. }
  832. Block *
  833. htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
  834. {
  835. int dlen;
  836. Tcp4hdr *h;
  837. ushort csum;
  838. ushort hdrlen;
  839. hdrlen = TCP4_HDRSIZE;
  840. if(tcph->mss)
  841. hdrlen += MSS_LENGTH;
  842. if(data) {
  843. dlen = blocklen(data);
  844. data = padblock(data, hdrlen + TCP4_PKT);
  845. if(data == nil)
  846. return nil;
  847. }
  848. else {
  849. dlen = 0;
  850. data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
  851. if(data == nil)
  852. return nil;
  853. data->wp += hdrlen + TCP4_PKT;
  854. }
  855. /* copy in pseudo ip header plus port numbers */
  856. h = (Tcp4hdr *)(data->rp);
  857. memmove(h, ph, TCP4_TCBPHDRSZ);
  858. /* copy in variable bits */
  859. hnputs(h->tcplen, hdrlen + dlen);
  860. hnputl(h->tcpseq, tcph->seq);
  861. hnputl(h->tcpack, tcph->ack);
  862. hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
  863. hnputs(h->tcpwin, tcph->wnd);
  864. hnputs(h->tcpurg, tcph->urg);
  865. if(tcph->mss != 0){
  866. h->tcpopt[0] = MSSOPT;
  867. h->tcpopt[1] = MSS_LENGTH;
  868. hnputs(h->tcpmss, tcph->mss);
  869. }
  870. if(tcb != nil && tcb->nochecksum){
  871. h->tcpcksum[0] = h->tcpcksum[1] = 0;
  872. } else {
  873. csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
  874. hnputs(h->tcpcksum, csum);
  875. }
  876. return data;
  877. }
  878. int
  879. ntohtcp6(Tcp *tcph, Block **bpp)
  880. {
  881. Tcp6hdr *h;
  882. uchar *optr;
  883. ushort hdrlen;
  884. ushort optlen;
  885. int n;
  886. *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
  887. if(*bpp == nil)
  888. return -1;
  889. h = (Tcp6hdr *)((*bpp)->rp);
  890. tcph->source = nhgets(h->tcpsport);
  891. tcph->dest = nhgets(h->tcpdport);
  892. tcph->seq = nhgetl(h->tcpseq);
  893. tcph->ack = nhgetl(h->tcpack);
  894. hdrlen = (h->tcpflag[0] & 0xf0)>>2;
  895. if(hdrlen < TCP6_HDRSIZE) {
  896. freeblist(*bpp);
  897. return -1;
  898. }
  899. tcph->flags = h->tcpflag[1];
  900. tcph->wnd = nhgets(h->tcpwin);
  901. tcph->urg = nhgets(h->tcpurg);
  902. tcph->mss = 0;
  903. tcph->len = nhgets(h->ploadlen) - hdrlen;
  904. *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
  905. if(*bpp == nil)
  906. return -1;
  907. optr = h->tcpopt;
  908. n = hdrlen - TCP6_HDRSIZE;
  909. while(n > 0 && *optr != EOLOPT) {
  910. if(*optr == NOOPOPT) {
  911. n--;
  912. optr++;
  913. continue;
  914. }
  915. optlen = optr[1];
  916. if(optlen < 2 || optlen > n)
  917. break;
  918. switch(*optr) {
  919. case MSSOPT:
  920. if(optlen == MSS_LENGTH)
  921. tcph->mss = nhgets(optr+2);
  922. break;
  923. }
  924. n -= optlen;
  925. optr += optlen;
  926. }
  927. return hdrlen;
  928. }
  929. int
  930. ntohtcp4(Tcp *tcph, Block **bpp)
  931. {
  932. Tcp4hdr *h;
  933. uchar *optr;
  934. ushort hdrlen;
  935. ushort optlen;
  936. int n;
  937. *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
  938. if(*bpp == nil)
  939. return -1;
  940. h = (Tcp4hdr *)((*bpp)->rp);
  941. tcph->source = nhgets(h->tcpsport);
  942. tcph->dest = nhgets(h->tcpdport);
  943. tcph->seq = nhgetl(h->tcpseq);
  944. tcph->ack = nhgetl(h->tcpack);
  945. hdrlen = (h->tcpflag[0] & 0xf0)>>2;
  946. if(hdrlen < TCP4_HDRSIZE) {
  947. freeblist(*bpp);
  948. return -1;
  949. }
  950. tcph->flags = h->tcpflag[1];
  951. tcph->wnd = nhgets(h->tcpwin);
  952. tcph->urg = nhgets(h->tcpurg);
  953. tcph->mss = 0;
  954. tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
  955. *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
  956. if(*bpp == nil)
  957. return -1;
  958. optr = h->tcpopt;
  959. n = hdrlen - TCP4_HDRSIZE;
  960. while(n > 0 && *optr != EOLOPT) {
  961. if(*optr == NOOPOPT) {
  962. n--;
  963. optr++;
  964. continue;
  965. }
  966. optlen = optr[1];
  967. if(optlen < 2 || optlen > n)
  968. break;
  969. switch(*optr) {
  970. case MSSOPT:
  971. if(optlen == MSS_LENGTH)
  972. tcph->mss = nhgets(optr+2);
  973. break;
  974. }
  975. n -= optlen;
  976. optr += optlen;
  977. }
  978. return hdrlen;
  979. }
  980. /*
  981. * For outgiing calls, generate an initial sequence
  982. * number and put a SYN on the send queue
  983. */
  984. void
  985. tcpsndsyn(Tcpctl *tcb)
  986. {
  987. tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
  988. tcb->rttseq = tcb->iss;
  989. tcb->snd.wl2 = tcb->iss;
  990. tcb->snd.una = tcb->iss;
  991. tcb->snd.ptr = tcb->rttseq;
  992. tcb->snd.nxt = tcb->rttseq;
  993. tcb->flgcnt++;
  994. tcb->flags |= FORCE;
  995. tcb->sndsyntime = NOW;
  996. }
  997. void
  998. sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
  999. {
  1000. Block *hbp;
  1001. uchar rflags;
  1002. Tcppriv *tpriv;
  1003. Tcp4hdr ph4;
  1004. Tcp6hdr ph6;
  1005. netlog(tcp->f, Logtcp, "sndrst: %s", reason);
  1006. tpriv = tcp->priv;
  1007. if(seg->flags & RST)
  1008. return;
  1009. /* make pseudo header */
  1010. switch(version) {
  1011. case V4:
  1012. memset(&ph4, 0, sizeof(ph4));
  1013. ph4.vihl = IP_VER4;
  1014. v6tov4(ph4.tcpsrc, dest);
  1015. v6tov4(ph4.tcpdst, source);
  1016. ph4.proto = IP_TCPPROTO;
  1017. hnputs(ph4.tcplen, TCP4_HDRSIZE);
  1018. hnputs(ph4.tcpsport, seg->dest);
  1019. hnputs(ph4.tcpdport, seg->source);
  1020. break;
  1021. case V6:
  1022. memset(&ph6, 0, sizeof(ph6));
  1023. ph6.vcf[0] = IP_VER6;
  1024. ipmove(ph6.tcpsrc, dest);
  1025. ipmove(ph6.tcpdst, source);
  1026. ph6.proto = IP_TCPPROTO;
  1027. hnputs(ph6.ploadlen, TCP6_HDRSIZE);
  1028. hnputs(ph6.tcpsport, seg->dest);
  1029. hnputs(ph6.tcpdport, seg->source);
  1030. break;
  1031. default:
  1032. panic("sndrst: version %d", version);
  1033. }
  1034. tpriv->stats[OutRsts]++;
  1035. rflags = RST;
  1036. /* convince the other end that this reset is in band */
  1037. if(seg->flags & ACK) {
  1038. seg->seq = seg->ack;
  1039. seg->ack = 0;
  1040. }
  1041. else {
  1042. rflags |= ACK;
  1043. seg->ack = seg->seq;
  1044. seg->seq = 0;
  1045. if(seg->flags & SYN)
  1046. seg->ack++;
  1047. seg->ack += length;
  1048. if(seg->flags & FIN)
  1049. seg->ack++;
  1050. }
  1051. seg->flags = rflags;
  1052. seg->wnd = 0;
  1053. seg->urg = 0;
  1054. seg->mss = 0;
  1055. switch(version) {
  1056. case V4:
  1057. hbp = htontcp4(seg, nil, &ph4, nil);
  1058. if(hbp == nil)
  1059. return;
  1060. ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
  1061. break;
  1062. case V6:
  1063. hbp = htontcp6(seg, nil, &ph6, nil);
  1064. if(hbp == nil)
  1065. return;
  1066. ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
  1067. break;
  1068. default:
  1069. panic("sndrst2: version %d", version);
  1070. }
  1071. }
  1072. /*
  1073. * send a reset to the remote side and close the conversation
  1074. * called with s qlocked
  1075. */
  1076. char*
  1077. tcphangup(Conv *s)
  1078. {
  1079. Tcp seg;
  1080. Tcpctl *tcb;
  1081. Block *hbp;
  1082. tcb = (Tcpctl*)s->ptcl;
  1083. if(waserror())
  1084. return commonerror();
  1085. if(s->raddr != 0) {
  1086. seg.flags = RST | ACK;
  1087. seg.ack = tcb->rcv.nxt;
  1088. tcb->rcv.lastacked = tcb->rcv.nxt;
  1089. seg.seq = tcb->snd.ptr;
  1090. seg.wnd = 0;
  1091. seg.urg = 0;
  1092. seg.mss = 0;
  1093. switch(s->ipversion) {
  1094. case V4:
  1095. tcb->protohdr.tcp4hdr.vihl = IP_VER4;
  1096. hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
  1097. ipoput4(s->p->f, hbp, 0, s->ttl, s->tos);
  1098. break;
  1099. case V6:
  1100. tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
  1101. hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
  1102. ipoput6(s->p->f, hbp, 0, s->ttl, s->tos);
  1103. break;
  1104. default:
  1105. panic("tcphangup: version %d", s->ipversion);
  1106. }
  1107. }
  1108. localclose(s, nil);
  1109. poperror();
  1110. return nil;
  1111. }
  1112. /*
  1113. * (re)send a SYN ACK
  1114. */
  1115. int
  1116. sndsynack(Proto *tcp, Limbo *lp)
  1117. {
  1118. Block *hbp;
  1119. Tcp4hdr ph4;
  1120. Tcp6hdr ph6;
  1121. Tcp seg;
  1122. /* make pseudo header */
  1123. switch(lp->version) {
  1124. case V4:
  1125. memset(&ph4, 0, sizeof(ph4));
  1126. ph4.vihl = IP_VER4;
  1127. v6tov4(ph4.tcpsrc, lp->laddr);
  1128. v6tov4(ph4.tcpdst, lp->raddr);
  1129. ph4.proto = IP_TCPPROTO;
  1130. hnputs(ph4.tcplen, TCP4_HDRSIZE);
  1131. hnputs(ph4.tcpsport, lp->lport);
  1132. hnputs(ph4.tcpdport, lp->rport);
  1133. break;
  1134. case V6:
  1135. memset(&ph6, 0, sizeof(ph6));
  1136. ph6.vcf[0] = IP_VER6;
  1137. ipmove(ph6.tcpsrc, lp->laddr);
  1138. ipmove(ph6.tcpdst, lp->raddr);
  1139. ph6.proto = IP_TCPPROTO;
  1140. hnputs(ph6.ploadlen, TCP6_HDRSIZE);
  1141. hnputs(ph6.tcpsport, lp->lport);
  1142. hnputs(ph6.tcpdport, lp->rport);
  1143. break;
  1144. default:
  1145. panic("sndrst: version %d", lp->version);
  1146. }
  1147. seg.seq = lp->iss;
  1148. seg.ack = lp->irs+1;
  1149. seg.flags = SYN|ACK;
  1150. seg.urg = 0;
  1151. seg.mss = tcpmtu(tcp, lp->laddr, lp->version);
  1152. seg.wnd = QMAX;
  1153. switch(lp->version) {
  1154. case V4:
  1155. hbp = htontcp4(&seg, nil, &ph4, nil);
  1156. if(hbp == nil)
  1157. return -1;
  1158. ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
  1159. break;
  1160. case V6:
  1161. hbp = htontcp6(&seg, nil, &ph6, nil);
  1162. if(hbp == nil)
  1163. return -1;
  1164. ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
  1165. break;
  1166. default:
  1167. panic("sndsnack: version %d", lp->version);
  1168. }
  1169. lp->lastsend = NOW;
  1170. return 0;
  1171. }
  1172. #define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
  1173. /*
  1174. * put a call into limbo and respond with a SYN ACK
  1175. *
  1176. * called with proto locked
  1177. */
  1178. static void
  1179. limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
  1180. {
  1181. Limbo *lp, **l;
  1182. Tcppriv *tpriv;
  1183. int h;
  1184. tpriv = s->p->priv;
  1185. h = hashipa(source, seg->source);
  1186. for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
  1187. lp = *l;
  1188. if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
  1189. continue;
  1190. if(ipcmp(lp->raddr, source) != 0)
  1191. continue;
  1192. if(ipcmp(lp->laddr, dest) != 0)
  1193. continue;
  1194. /* each new SYN restarts the retransmits */
  1195. lp->irs = seg->seq;
  1196. break;
  1197. }
  1198. lp = *l;
  1199. if(lp == nil){
  1200. if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
  1201. lp = tpriv->lht[h];
  1202. tpriv->lht[h] = lp->next;
  1203. lp->next = nil;
  1204. } else {
  1205. lp = malloc(sizeof(*lp));
  1206. if(lp == nil)
  1207. return;
  1208. tpriv->nlimbo++;
  1209. }
  1210. *l = lp;
  1211. lp->version = version;
  1212. ipmove(lp->laddr, dest);
  1213. ipmove(lp->raddr, source);
  1214. lp->lport = seg->dest;
  1215. lp->rport = seg->source;
  1216. lp->mss = seg->mss;
  1217. lp->irs = seg->seq;
  1218. lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
  1219. }
  1220. if(sndsynack(s->p, lp) < 0){
  1221. *l = lp->next;
  1222. tpriv->nlimbo--;
  1223. free(lp);
  1224. }
  1225. }
  1226. /*
  1227. * resend SYN ACK's once every SYNACK_RXTIMER ms.
  1228. */
  1229. static void
  1230. limborexmit(Proto *tcp)
  1231. {
  1232. Tcppriv *tpriv;
  1233. Limbo **l, *lp;
  1234. int h;
  1235. int seen;
  1236. ulong now;
  1237. tpriv = tcp->priv;
  1238. if(!canqlock(tcp))
  1239. return;
  1240. seen = 0;
  1241. now = NOW;
  1242. for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
  1243. for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
  1244. lp = *l;
  1245. seen++;
  1246. if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
  1247. continue;
  1248. /* time it out after 1 second */
  1249. if(++(lp->rexmits) > 5){
  1250. tpriv->nlimbo--;
  1251. *l = lp->next;
  1252. free(lp);
  1253. continue;
  1254. }
  1255. /* if we're being attacked, don't bother resending SYN ACK's */
  1256. if(tpriv->nlimbo > 100)
  1257. continue;
  1258. if(sndsynack(tcp, lp) < 0){
  1259. tpriv->nlimbo--;
  1260. *l = lp->next;
  1261. free(lp);
  1262. continue;
  1263. }
  1264. l = &lp->next;
  1265. }
  1266. }
  1267. qunlock(tcp);
  1268. }
  1269. /*
  1270. * lookup call in limbo. if found, throw it out.
  1271. *
  1272. * called with proto locked
  1273. */
  1274. static void
  1275. limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
  1276. {
  1277. Limbo *lp, **l;
  1278. int h;
  1279. Tcppriv *tpriv;
  1280. tpriv = s->p->priv;
  1281. /* find a call in limbo */
  1282. h = hashipa(src, segp->source);
  1283. for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
  1284. lp = *l;
  1285. if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
  1286. continue;
  1287. if(ipcmp(lp->laddr, dst) != 0)
  1288. continue;
  1289. if(ipcmp(lp->raddr, src) != 0)
  1290. continue;
  1291. /* RST can only follow the SYN */
  1292. if(segp->seq == lp->irs+1){
  1293. tpriv->nlimbo--;
  1294. *l = lp->next;
  1295. free(lp);
  1296. }
  1297. break;
  1298. }
  1299. }
  1300. /*
  1301. * lookup call in limbo. if found, create a new conversation
  1302. *
  1303. * called with proto locked
  1304. */
  1305. static Conv*
  1306. tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
  1307. {
  1308. Conv *new;
  1309. Tcpctl *tcb;
  1310. Tcppriv *tpriv;
  1311. Tcp4hdr *h4;
  1312. Tcp6hdr *h6;
  1313. Limbo *lp, **l;
  1314. int h;
  1315. /* unless it's just an ack, it can't be someone coming out of limbo */
  1316. if((segp->flags & SYN) || (segp->flags & ACK) == 0)
  1317. return nil;
  1318. tpriv = s->p->priv;
  1319. /* find a call in limbo */
  1320. h = hashipa(src, segp->source);
  1321. for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
  1322. netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
  1323. src, segp->source, lp->raddr, lp->rport,
  1324. dst, segp->dest, lp->laddr, lp->lport,
  1325. version, lp->version
  1326. );
  1327. if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
  1328. continue;
  1329. if(ipcmp(lp->laddr, dst) != 0)
  1330. continue;
  1331. if(ipcmp(lp->raddr, src) != 0)
  1332. continue;
  1333. /* we're assuming no data with the initial SYN */
  1334. if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
  1335. netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
  1336. segp->seq, lp->irs+1, segp->ack, lp->iss+1);
  1337. lp = nil;
  1338. } else {
  1339. tpriv->nlimbo--;
  1340. *l = lp->next;
  1341. }
  1342. break;
  1343. }
  1344. if(lp == nil)
  1345. return nil;
  1346. new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
  1347. if(new == nil)
  1348. return nil;
  1349. memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
  1350. tcb = (Tcpctl*)new->ptcl;
  1351. tcb->flags &= ~CLONE;
  1352. tcb->timer.arg = new;
  1353. tcb->timer.state = TcptimerOFF;
  1354. tcb->acktimer.arg = new;
  1355. tcb->acktimer.state = TcptimerOFF;
  1356. tcb->katimer.arg = new;
  1357. tcb->katimer.state = TcptimerOFF;
  1358. tcb->rtt_timer.arg = new;
  1359. tcb->rtt_timer.state = TcptimerOFF;
  1360. tcb->irs = lp->irs;
  1361. tcb->rcv.nxt = tcb->irs+1;
  1362. tcb->rcv.urg = tcb->rcv.nxt;
  1363. tcb->iss = lp->iss;
  1364. tcb->rttseq = tcb->iss;
  1365. tcb->snd.wl2 = tcb->iss;
  1366. tcb->snd.una = tcb->iss+1;
  1367. tcb->snd.ptr = tcb->iss+1;
  1368. tcb->snd.nxt = tcb->iss+1;
  1369. tcb->flgcnt = 0;
  1370. tcb->flags |= SYNACK;
  1371. /* our sending max segment size cannot be bigger than what he asked for */
  1372. if(lp->mss != 0 && lp->mss < tcb->mss)
  1373. tcb->mss = lp->mss;
  1374. /* the congestion window always starts out as a single segment */
  1375. tcb->snd.wnd = segp->wnd;
  1376. tcb->cwind = tcb->mss;
  1377. /* set initial round trip time */
  1378. tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
  1379. tcpsynackrtt(new);
  1380. free(lp);
  1381. /* set up proto header */
  1382. switch(version){
  1383. case V4:
  1384. h4 = &tcb->protohdr.tcp4hdr;
  1385. memset(h4, 0, sizeof(*h4));
  1386. h4->proto = IP_TCPPROTO;
  1387. hnputs(h4->tcpsport, new->lport);
  1388. hnputs(h4->tcpdport, new->rport);
  1389. v6tov4(h4->tcpsrc, dst);
  1390. v6tov4(h4->tcpdst, src);
  1391. break;
  1392. case V6:
  1393. h6 = &tcb->protohdr.tcp6hdr;
  1394. memset(h6, 0, sizeof(*h6));
  1395. h6->proto = IP_TCPPROTO;
  1396. hnputs(h6->tcpsport, new->lport);
  1397. hnputs(h6->tcpdport, new->rport);
  1398. ipmove(h6->tcpsrc, dst);
  1399. ipmove(h6->tcpdst, src);
  1400. break;
  1401. default:
  1402. panic("tcpincoming: version %d", new->ipversion);
  1403. }
  1404. tcpsetstate(new, Established);
  1405. iphtadd(&tpriv->ht, new);
  1406. return new;
  1407. }
  1408. int
  1409. seq_within(ulong x, ulong low, ulong high)
  1410. {
  1411. if(low <= high){
  1412. if(low <= x && x <= high)
  1413. return 1;
  1414. }
  1415. else {
  1416. if(x >= low || x <= high)
  1417. return 1;
  1418. }
  1419. return 0;
  1420. }
  1421. int
  1422. seq_lt(ulong x, ulong y)
  1423. {
  1424. return (int)(x-y) < 0;
  1425. }
  1426. int
  1427. seq_le(ulong x, ulong y)
  1428. {
  1429. return (int)(x-y) <= 0;
  1430. }
  1431. int
  1432. seq_gt(ulong x, ulong y)
  1433. {
  1434. return (int)(x-y) > 0;
  1435. }
  1436. int
  1437. seq_ge(ulong x, ulong y)
  1438. {
  1439. return (int)(x-y) >= 0;
  1440. }
  1441. /*
  1442. * use the time between the first SYN and it's ack as the
  1443. * initial round trip time
  1444. */
  1445. void
  1446. tcpsynackrtt(Conv *s)
  1447. {
  1448. Tcpctl *tcb;
  1449. int delta;
  1450. Tcppriv *tpriv;
  1451. tcb = (Tcpctl*)s->ptcl;
  1452. tpriv = s->p->priv;
  1453. delta = NOW - tcb->sndsyntime;
  1454. tcb->srtt = delta<<LOGAGAIN;
  1455. tcb->mdev = delta<<LOGDGAIN;
  1456. /* halt round trip timer */
  1457. tcphalt(tpriv, &tcb->rtt_timer);
  1458. }
  1459. void
  1460. update(Conv *s, Tcp *seg)
  1461. {
  1462. int rtt, delta;
  1463. Tcpctl *tcb;
  1464. ushort acked, expand;
  1465. Tcppriv *tpriv;
  1466. tpriv = s->p->priv;
  1467. tcb = (Tcpctl*)s->ptcl;
  1468. /* if everything has been acked, force output(?) */
  1469. if(seq_gt(seg->ack, tcb->snd.nxt)) {
  1470. tcb->flags |= FORCE;
  1471. return;
  1472. }
  1473. /* added by Dong Lin for fast retransmission */
  1474. if(seg->ack == tcb->snd.una
  1475. && tcb->snd.una != tcb->snd.nxt
  1476. && seg->len == 0
  1477. && seg->wnd == tcb->snd.wnd) {
  1478. /* this is a pure ack w/o window update */
  1479. netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
  1480. tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
  1481. if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
  1482. /*
  1483. * tahoe tcp rxt the packet, half sshthresh,
  1484. * and set cwnd to one packet
  1485. */
  1486. tcb->snd.recovery = 1;
  1487. tcb->snd.rxt = tcb->snd.nxt;
  1488. netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
  1489. tcprxmit(s);
  1490. } else {
  1491. /* do reno tcp here. */
  1492. }
  1493. }
  1494. /*
  1495. * update window
  1496. */
  1497. if( seq_gt(seg->ack, tcb->snd.wl2)
  1498. || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
  1499. tcb->snd.wnd = seg->wnd;
  1500. tcb->snd.wl2 = seg->ack;
  1501. }
  1502. if(!seq_gt(seg->ack, tcb->snd.una)){
  1503. /*
  1504. * don't let us hangup if sending into a closed window and
  1505. * we're still getting acks
  1506. */
  1507. if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
  1508. tcb->backedoff = MAXBACKMS/4;
  1509. }
  1510. return;
  1511. }
  1512. /*
  1513. * any positive ack turns off fast rxt,
  1514. * (should we do new-reno on partial acks?)
  1515. */
  1516. if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
  1517. tcb->snd.dupacks = 0;
  1518. tcb->snd.recovery = 0;
  1519. } else
  1520. netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
  1521. /* Compute the new send window size */
  1522. acked = seg->ack - tcb->snd.una;
  1523. /* avoid slow start and timers for SYN acks */
  1524. if((tcb->flags & SYNACK) == 0) {
  1525. tcb->flags |= SYNACK;
  1526. acked--;
  1527. tcb->flgcnt--;
  1528. goto done;
  1529. }
  1530. /* slow start as long as we're not recovering from lost packets */
  1531. if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
  1532. if(tcb->cwind < tcb->ssthresh) {
  1533. expand = tcb->mss;
  1534. if(acked < expand)
  1535. expand = acked;
  1536. }
  1537. else
  1538. expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
  1539. if(tcb->cwind + expand < tcb->cwind)
  1540. expand = 65535 - tcb->cwind;
  1541. if(tcb->cwind + expand > tcb->snd.wnd)
  1542. expand = tcb->snd.wnd - tcb->cwind;
  1543. if(expand != 0)
  1544. tcb->cwind += expand;
  1545. }
  1546. /* Adjust the timers according to the round trip time */
  1547. if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
  1548. tcphalt(tpriv, &tcb->rtt_timer);
  1549. if((tcb->flags&RETRAN) == 0) {
  1550. tcb->backoff = 0;
  1551. tcb->backedoff = 0;
  1552. rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
  1553. if(rtt == 0)
  1554. rtt = 1; /* otherwise all close systems will rexmit in 0 time */
  1555. rtt *= MSPTICK;
  1556. if(tcb->srtt == 0) {
  1557. tcb->srtt = rtt << LOGAGAIN;
  1558. tcb->mdev = rtt << LOGDGAIN;
  1559. } else {
  1560. delta = rtt - (tcb->srtt>>LOGAGAIN);
  1561. tcb->srtt += delta;
  1562. if(tcb->srtt <= 0)
  1563. tcb->srtt = 1;
  1564. delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
  1565. tcb->mdev += delta;
  1566. if(tcb->mdev <= 0)
  1567. tcb->mdev = 1;
  1568. }
  1569. tcpsettimer(tcb);
  1570. }
  1571. }
  1572. done:
  1573. if(qdiscard(s->wq, acked) < acked)
  1574. tcb->flgcnt--;
  1575. tcb->snd.una = seg->ack;
  1576. if(seq_gt(seg->ack, tcb->snd.urg))
  1577. tcb->snd.urg = seg->ack;
  1578. if(tcb->snd.una != tcb->snd.nxt)
  1579. tcpgo(tpriv, &tcb->timer);
  1580. else
  1581. tcphalt(tpriv, &tcb->timer);
  1582. if(seq_lt(tcb->snd.ptr, tcb->snd.una))
  1583. tcb->snd.ptr = tcb->snd.una;
  1584. tcb->flags &= ~RETRAN;
  1585. tcb->backoff = 0;
  1586. tcb->backedoff = 0;
  1587. }
  1588. void
  1589. tcpiput(Proto *tcp, Ipifc*, Block *bp)
  1590. {
  1591. Tcp seg;
  1592. Tcp4hdr *h4;
  1593. Tcp6hdr *h6;
  1594. int hdrlen;
  1595. Tcpctl *tcb;
  1596. ushort length;
  1597. uchar source[IPaddrlen], dest[IPaddrlen];
  1598. Conv *s;
  1599. Fs *f;
  1600. Tcppriv *tpriv;
  1601. uchar version;
  1602. f = tcp->f;
  1603. tpriv = tcp->priv;
  1604. tpriv->stats[InSegs]++;
  1605. h4 = (Tcp4hdr*)(bp->rp);
  1606. h6 = (Tcp6hdr*)(bp->rp);
  1607. if((h4->vihl&0xF0)==IP_VER4) {
  1608. version = V4;
  1609. length = nhgets(h4->length);
  1610. v4tov6(dest, h4->tcpdst);
  1611. v4tov6(source, h4->tcpsrc);
  1612. h4->Unused = 0;
  1613. hnputs(h4->tcplen, length-TCP4_PKT);
  1614. if((h4->tcpcksum[0] || h4->tcpcksum[1]) &&
  1615. ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
  1616. tpriv->stats[CsumErrs]++;
  1617. tpriv->stats[InErrs]++;
  1618. netlog(f, Logtcp, "bad tcp proto cksum\n");
  1619. freeblist(bp);
  1620. return;
  1621. }
  1622. hdrlen = ntohtcp4(&seg, &bp);
  1623. if(hdrlen < 0){
  1624. tpriv->stats[HlenErrs]++;
  1625. tpriv->stats[InErrs]++;
  1626. netlog(f, Logtcp, "bad tcp hdr len\n");
  1627. return;
  1628. }
  1629. /* trim the packet to the size claimed by the datagram */
  1630. length -= hdrlen+TCP4_PKT;
  1631. bp = trimblock(bp, hdrlen+TCP4_PKT, length);
  1632. if(bp == nil){
  1633. tpriv->stats[LenErrs]++;
  1634. tpriv->stats[InErrs]++;
  1635. netlog(f, Logtcp, "tcp len < 0 after trim\n");
  1636. return;
  1637. }
  1638. }
  1639. else {
  1640. int ttl = h6->ttl;
  1641. int proto = h6->proto;
  1642. version = V6;
  1643. length = nhgets(h6->ploadlen);
  1644. ipmove(dest, h6->tcpdst);
  1645. ipmove(source, h6->tcpsrc);
  1646. h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
  1647. h6->ttl = proto;
  1648. hnputl(h6->vcf, length);
  1649. if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
  1650. ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
  1651. tpriv->stats[CsumErrs]++;
  1652. tpriv->stats[InErrs]++;
  1653. netlog(f, Logtcp, "bad tcp proto cksum\n");
  1654. freeblist(bp);
  1655. return;
  1656. }
  1657. h6->ttl = ttl;
  1658. h6->proto = proto;
  1659. hnputs(h6->ploadlen, length);
  1660. hdrlen = ntohtcp6(&seg, &bp);
  1661. if(hdrlen < 0){
  1662. tpriv->stats[HlenErrs]++;
  1663. tpriv->stats[InErrs]++;
  1664. netlog(f, Logtcp, "bad tcp hdr len\n");
  1665. return;
  1666. }
  1667. /* trim the packet to the size claimed by the datagram */
  1668. length -= hdrlen;
  1669. bp = trimblock(bp, hdrlen+TCP6_PKT, length);
  1670. if(bp == nil){
  1671. tpriv->stats[LenErrs]++;
  1672. tpriv->stats[InErrs]++;
  1673. netlog(f, Logtcp, "tcp len < 0 after trim\n");
  1674. return;
  1675. }
  1676. }
  1677. /* lock protocol while searching for a conversation */
  1678. qlock(tcp);
  1679. /* Look for a matching conversation */
  1680. s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
  1681. if(s == nil){
  1682. netlog(f, Logtcp, "iphtlook failed");
  1683. reset:
  1684. qunlock(tcp);
  1685. sndrst(tcp, source, dest, length, &seg, version, "no conversation");
  1686. freeblist(bp);
  1687. return;
  1688. }
  1689. /* if it's a listener, look for the right flags and get a new conv */
  1690. tcb = (Tcpctl*)s->ptcl;
  1691. if(tcb->state == Listen){
  1692. if(seg.flags & RST){
  1693. limborst(s, &seg, source, dest, version);
  1694. qunlock(tcp);
  1695. freeblist(bp);
  1696. return;
  1697. }
  1698. /* if this is a new SYN, put the call into limbo */
  1699. if((seg.flags & SYN) && (seg.flags & ACK) == 0){
  1700. limbo(s, source, dest, &seg, version);
  1701. qunlock(tcp);
  1702. freeblist(bp);
  1703. return;
  1704. }
  1705. /*
  1706. * if there's a matching call in limbo, tcpincoming will
  1707. * return it in state Syn_received
  1708. */
  1709. s = tcpincoming(s, &seg, source, dest, version);
  1710. if(s == nil)
  1711. goto reset;
  1712. }
  1713. /* The rest of the input state machine is run with the control block
  1714. * locked and implements the state machine directly out of the RFC.
  1715. * Out-of-band data is ignored - it was always a bad idea.
  1716. */
  1717. tcb = (Tcpctl*)s->ptcl;
  1718. if(waserror()){
  1719. qunlock(s);
  1720. nexterror();
  1721. }
  1722. qlock(s);
  1723. qunlock(tcp);
  1724. if(tcb->kacounter > 0)
  1725. tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK);
  1726. if(tcb->kacounter < 3)
  1727. tcb->kacounter = 3;
  1728. switch(tcb->state) {
  1729. case Closed:
  1730. sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
  1731. goto raise;
  1732. case Syn_sent:
  1733. if(seg.flags & ACK) {
  1734. if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
  1735. sndrst(tcp, source, dest, length, &seg, version,
  1736. "bad seq in Syn_sent");
  1737. goto raise;
  1738. }
  1739. }
  1740. if(seg.flags & RST) {
  1741. if(seg.flags & ACK)
  1742. localclose(s, Econrefused);
  1743. goto raise;
  1744. }
  1745. if(seg.flags & SYN) {
  1746. procsyn(s, &seg);
  1747. if(seg.flags & ACK){
  1748. update(s, &seg);
  1749. tcpsynackrtt(s);
  1750. tcpsetstate(s, Established);
  1751. }
  1752. else {
  1753. tcb->time = NOW;
  1754. tcpsetstate(s, Syn_received);
  1755. }
  1756. if(length != 0 || (seg.flags & FIN))
  1757. break;
  1758. freeblist(bp);
  1759. goto output;
  1760. }
  1761. else
  1762. freeblist(bp);
  1763. qunlock(s);
  1764. poperror();
  1765. return;
  1766. case Syn_received:
  1767. /* doesn't matter if it's the correct ack, we're just trying to set timing */
  1768. if(seg.flags & ACK)
  1769. tcpsynackrtt(s);
  1770. break;
  1771. }
  1772. /*
  1773. * One DOS attack is to open connections to us and then forget about them,
  1774. * thereby tying up a conv at no long term cost to the attacker.
  1775. * This is an attempt to defeat these stateless DOS attacks. See
  1776. * corresponding code in tcpsendka().
  1777. */
  1778. if(tcb->state != Syn_received && (seg.flags & RST) == 0){
  1779. if(tcpporthogdefense
  1780. && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
  1781. print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
  1782. source, seg.source, dest, seg.dest, seg.flags,
  1783. tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
  1784. localclose(s, "stateless hog");
  1785. }
  1786. }
  1787. /* Cut the data to fit the receive window */
  1788. if(tcptrim(tcb, &seg, &bp, &length) == -1) {
  1789. netlog(f, Logtcp, "tcp len < 0, %lux\n", seg.seq);
  1790. update(s, &seg);
  1791. if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
  1792. tcphalt(tpriv, &tcb->rtt_timer);
  1793. tcphalt(tpriv, &tcb->acktimer);
  1794. tcphalt(tpriv, &tcb->katimer);
  1795. tcpsetstate(s, Time_wait);
  1796. tcb->timer.start = MSL2*(1000 / MSPTICK);
  1797. tcpgo(tpriv, &tcb->timer);
  1798. }
  1799. if(!(seg.flags & RST)) {
  1800. tcb->flags |= FORCE;
  1801. goto output;
  1802. }
  1803. qunlock(s);
  1804. poperror();
  1805. return;
  1806. }
  1807. /* Cannot accept so answer with a rst */
  1808. if(length && tcb->state == Closed) {
  1809. sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
  1810. goto raise;
  1811. }
  1812. /* The segment is beyond the current receive pointer so
  1813. * queue the data in the resequence queue
  1814. */
  1815. if(seg.seq != tcb->rcv.nxt)
  1816. if(length != 0 || (seg.flags & (SYN|FIN))) {
  1817. update(s, &seg);
  1818. if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
  1819. print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
  1820. tcb->flags |= FORCE;
  1821. goto output;
  1822. }
  1823. /*
  1824. * keep looping till we've processed this packet plus any
  1825. * adjacent packets in the resequence queue
  1826. */
  1827. for(;;) {
  1828. if(seg.flags & RST) {
  1829. if(tcb->state == Established)
  1830. tpriv->stats[EstabResets]++;
  1831. localclose(s, Econrefused);
  1832. goto raise;
  1833. }
  1834. if((seg.flags&ACK) == 0)
  1835. goto raise;
  1836. switch(tcb->state) {
  1837. case Syn_received:
  1838. if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
  1839. sndrst(tcp, source, dest, length, &seg, version,
  1840. "bad seq in Syn_received");
  1841. goto raise;
  1842. }
  1843. update(s, &seg);
  1844. tcpsetstate(s, Established);
  1845. case Established:
  1846. case Close_wait:
  1847. update(s, &seg);
  1848. break;
  1849. case Finwait1:
  1850. update(s, &seg);
  1851. if(qlen(s->wq)+tcb->flgcnt == 0){
  1852. tcphalt(tpriv, &tcb->rtt_timer);
  1853. tcphalt(tpriv, &tcb->acktimer);
  1854. tcpsetkacounter(tcb);
  1855. tcb->time = NOW;
  1856. tcpsetstate(s, Finwait2);
  1857. tcb->katimer.start = MSL2 * (1000 / MSPTICK);
  1858. tcpgo(tpriv, &tcb->katimer);
  1859. }
  1860. break;
  1861. case Finwait2:
  1862. update(s, &seg);
  1863. break;
  1864. case Closing:
  1865. update(s, &seg);
  1866. if(qlen(s->wq)+tcb->flgcnt == 0) {
  1867. tcphalt(tpriv, &tcb->rtt_timer);
  1868. tcphalt(tpriv, &tcb->acktimer);
  1869. tcphalt(tpriv, &tcb->katimer);
  1870. tcpsetstate(s, Time_wait);
  1871. tcb->timer.start = MSL2*(1000 / MSPTICK);
  1872. tcpgo(tpriv, &tcb->timer);
  1873. }
  1874. break;
  1875. case Last_ack:
  1876. update(s, &seg);
  1877. if(qlen(s->wq)+tcb->flgcnt == 0) {
  1878. localclose(s, nil);
  1879. goto raise;
  1880. }
  1881. case Time_wait:
  1882. tcb->flags |= FORCE;
  1883. if(tcb->timer.state != TcptimerON)
  1884. tcpgo(tpriv, &tcb->timer);
  1885. }
  1886. if((seg.flags&URG) && seg.urg) {
  1887. if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
  1888. tcb->rcv.urg = seg.urg + seg.seq;
  1889. pullblock(&bp, seg.urg);
  1890. }
  1891. }
  1892. else
  1893. if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
  1894. tcb->rcv.urg = tcb->rcv.nxt;
  1895. if(length == 0) {
  1896. if(bp != nil)
  1897. freeblist(bp);
  1898. }
  1899. else {
  1900. switch(tcb->state){
  1901. default:
  1902. /* Ignore segment text */
  1903. if(bp != nil)
  1904. freeblist(bp);
  1905. break;
  1906. case Syn_received:
  1907. case Established:
  1908. case Finwait1:
  1909. /* If we still have some data place on
  1910. * receive queue
  1911. */
  1912. if(bp) {
  1913. bp = packblock(bp);
  1914. if(bp == nil)
  1915. panic("tcp packblock");
  1916. qpassnolim(s->rq, bp);
  1917. bp = nil;
  1918. /* force an ack every 2 data messages */
  1919. if(++(tcb->rcv.una) >= 2)
  1920. tcb->flags |= FORCE;
  1921. }
  1922. tcb->rcv.nxt += length;
  1923. /*
  1924. * update our rcv window
  1925. */
  1926. tcprcvwin(s);
  1927. /*
  1928. * force an ack if we've got 2 segs since we
  1929. * last acked.
  1930. */
  1931. if(tcb->rcv.nxt - tcb->rcv.lastacked >= 2*tcb->mss)
  1932. tcb->flags |= FORCE;
  1933. /*
  1934. * turn on the acktimer if there's something
  1935. * to ack
  1936. */
  1937. if(tcb->acktimer.state != TcptimerON)
  1938. tcpgo(tpriv, &tcb->acktimer);
  1939. break;
  1940. case Finwait2:
  1941. /* no process to read the data, send a reset */
  1942. if(bp != nil)
  1943. freeblist(bp);
  1944. sndrst(tcp, source, dest, length, &seg, version,
  1945. "send to Finwait2");
  1946. qunlock(s);
  1947. poperror();
  1948. return;
  1949. }
  1950. }
  1951. if(seg.flags & FIN) {
  1952. tcb->flags |= FORCE;
  1953. switch(tcb->state) {
  1954. case Syn_received:
  1955. case Established:
  1956. tcb->rcv.nxt++;
  1957. tcpsetstate(s, Close_wait);
  1958. break;
  1959. case Finwait1:
  1960. tcb->rcv.nxt++;
  1961. if(qlen(s->wq)+tcb->flgcnt == 0) {
  1962. tcphalt(tpriv, &tcb->rtt_timer);
  1963. tcphalt(tpriv, &tcb->acktimer);
  1964. tcphalt(tpriv, &tcb->katimer);
  1965. tcpsetstate(s, Time_wait);
  1966. tcb->timer.start = MSL2*(1000/MSPTICK);
  1967. tcpgo(tpriv, &tcb->timer);
  1968. }
  1969. else
  1970. tcpsetstate(s, Closing);
  1971. break;
  1972. case Finwait2:
  1973. tcb->rcv.nxt++;
  1974. tcphalt(tpriv, &tcb->rtt_timer);
  1975. tcphalt(tpriv, &tcb->acktimer);
  1976. tcphalt(tpriv, &tcb->katimer);
  1977. tcpsetstate(s, Time_wait);
  1978. tcb->timer.start = MSL2 * (1000/MSPTICK);
  1979. tcpgo(tpriv, &tcb->timer);
  1980. break;
  1981. case Close_wait:
  1982. case Closing:
  1983. case Last_ack:
  1984. break;
  1985. case Time_wait:
  1986. tcpgo(tpriv, &tcb->timer);
  1987. break;
  1988. }
  1989. }
  1990. /*
  1991. * get next adjacent segment from the resequence queue.
  1992. * dump/trim any overlapping segments
  1993. */
  1994. for(;;) {
  1995. if(tcb->reseq == nil)
  1996. goto output;
  1997. if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
  1998. goto output;
  1999. getreseq(tcb, &seg, &bp, &length);
  2000. if(tcptrim(tcb, &seg, &bp, &length) == 0)
  2001. break;
  2002. }
  2003. }
  2004. output:
  2005. tcpoutput(s);
  2006. qunlock(s);
  2007. poperror();
  2008. return;
  2009. raise:
  2010. qunlock(s);
  2011. poperror();
  2012. freeblist(bp);
  2013. tcpkick(s);
  2014. }
  2015. /*
  2016. * always enters and exits with the s locked. We drop
  2017. * the lock to ipoput the packet so some care has to be
  2018. * taken by callers.
  2019. */
  2020. void
  2021. tcpoutput(Conv *s)
  2022. {
  2023. Tcp seg;
  2024. int msgs;
  2025. Tcpctl *tcb;
  2026. Block *hbp, *bp;
  2027. int sndcnt, n;
  2028. ulong ssize, dsize, usable, sent;
  2029. Fs *f;
  2030. Tcppriv *tpriv;
  2031. uchar version;
  2032. f = s->p->f;
  2033. tpriv = s->p->priv;
  2034. version = s->ipversion;
  2035. for(msgs = 0; msgs < 100; msgs++) {
  2036. tcb = (Tcpctl*)s->ptcl;
  2037. switch(tcb->state) {
  2038. case Listen:
  2039. case Closed:
  2040. case Finwait2:
  2041. return;
  2042. }
  2043. /* force an ack when a window has opened up */
  2044. if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
  2045. tcb->rcv.blocked = 0;
  2046. tcb->flags |= FORCE;
  2047. }
  2048. sndcnt = qlen(s->wq)+tcb->flgcnt;
  2049. sent = tcb->snd.ptr - tcb->snd.una;
  2050. /* Don't send anything else until our SYN has been acked */
  2051. if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
  2052. break;
  2053. /* Compute usable segment based on offered window and limit
  2054. * window probes to one
  2055. */
  2056. if(tcb->snd.wnd == 0){
  2057. if(sent != 0) {
  2058. if((tcb->flags&FORCE) == 0)
  2059. break;
  2060. // tcb->snd.ptr = tcb->snd.una;
  2061. }
  2062. usable = 1;
  2063. }
  2064. else {
  2065. usable = tcb->cwind;
  2066. if(tcb->snd.wnd < usable)
  2067. usable = tcb->snd.wnd;
  2068. usable -= sent;
  2069. }
  2070. ssize = sndcnt-sent;
  2071. if(ssize && usable < 2)
  2072. netlog(s->p->f, Logtcp, "throttled snd.wnd 0x%ux cwind 0x%ux\n",
  2073. tcb->snd.wnd, tcb->cwind);
  2074. if(usable < ssize)
  2075. ssize = usable;
  2076. if(tcb->mss < ssize)
  2077. ssize = tcb->mss;
  2078. dsize = ssize;
  2079. seg.urg = 0;
  2080. if(ssize == 0)
  2081. if((tcb->flags&FORCE) == 0)
  2082. break;
  2083. tcphalt(tpriv, &tcb->acktimer);
  2084. tcb->flags &= ~FORCE;
  2085. tcprcvwin(s);
  2086. /* By default we will generate an ack */
  2087. tcb->rcv.una = 0;
  2088. seg.source = s->lport;
  2089. seg.dest = s->rport;
  2090. seg.flags = ACK;
  2091. seg.mss = 0;
  2092. switch(tcb->state){
  2093. case Syn_sent:
  2094. seg.flags = 0;
  2095. if(tcb->snd.ptr == tcb->iss){
  2096. seg.flags |= SYN;
  2097. dsize--;
  2098. seg.mss = tcpmtu(s->p, s->laddr, s->ipversion);
  2099. }
  2100. break;
  2101. case Syn_received:
  2102. /*
  2103. * don't send any data with a SYN/ACK packet
  2104. * because Linux rejects the packet in its
  2105. * attempt to solve the SYN attack problem
  2106. */
  2107. if(tcb->snd.ptr == tcb->iss){
  2108. seg.flags |= SYN;
  2109. dsize = 0;
  2110. ssize = 1;
  2111. seg.mss = tcpmtu(s->p, s->laddr, s->ipversion);
  2112. }
  2113. break;
  2114. }
  2115. seg.seq = tcb->snd.ptr;
  2116. seg.ack = tcb->rcv.nxt;
  2117. tcb->rcv.lastacked = tcb->rcv.nxt;
  2118. seg.wnd = tcb->rcv.wnd;
  2119. /* Pull out data to send */
  2120. bp = nil;
  2121. if(dsize != 0) {
  2122. bp = qcopy(s->wq, dsize, sent);
  2123. if(BLEN(bp) != dsize) {
  2124. seg.flags |= FIN;
  2125. dsize--;
  2126. }
  2127. }
  2128. if(sent+dsize == sndcnt)
  2129. seg.flags |= PSH;
  2130. /* keep track of balance of resent data */
  2131. if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
  2132. n = tcb->snd.nxt - tcb->snd.ptr;
  2133. if(ssize < n)
  2134. n = ssize;
  2135. tcb->resent += n;
  2136. netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
  2137. s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
  2138. tpriv->stats[RetransSegs]++;
  2139. }
  2140. tcb->snd.ptr += ssize;
  2141. /* Pull up the send pointer so we can accept acks
  2142. * for this window
  2143. */
  2144. if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
  2145. tcb->snd.nxt = tcb->snd.ptr;
  2146. /* Build header, link data and compute cksum */
  2147. switch(version){
  2148. case V4:
  2149. tcb->protohdr.tcp4hdr.vihl = IP_VER4;
  2150. hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
  2151. if(hbp == nil) {
  2152. freeblist(bp);
  2153. return;
  2154. }
  2155. break;
  2156. case V6:
  2157. tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
  2158. hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
  2159. if(hbp == nil) {
  2160. freeblist(bp);
  2161. return;
  2162. }
  2163. break;
  2164. default:
  2165. hbp = nil; /* to suppress a warning */
  2166. panic("tcpoutput: version %d", version);
  2167. }
  2168. /* Start the transmission timers if there is new data and we
  2169. * expect acknowledges
  2170. */
  2171. if(ssize != 0){
  2172. if(tcb->timer.state != TcptimerON)
  2173. tcpgo(tpriv, &tcb->timer);
  2174. /* If round trip timer isn't running, start it.
  2175. * measure the longest packet only in case the
  2176. * transmission time dominates RTT
  2177. */
  2178. if(tcb->rtt_timer.state != TcptimerON)
  2179. if(ssize == tcb->mss) {
  2180. tcpgo(tpriv, &tcb->rtt_timer);
  2181. tcb->rttseq = tcb->snd.ptr;
  2182. }
  2183. }
  2184. tpriv->stats[OutSegs]++;
  2185. if(tcb->kacounter > 0)
  2186. tcpgo(tpriv, &tcb->katimer);
  2187. switch(version){
  2188. case V4:
  2189. ipoput4(f, hbp, 0, s->ttl, s->tos);
  2190. break;
  2191. case V6:
  2192. ipoput6(f, hbp, 0, s->ttl, s->tos);
  2193. break;
  2194. default:
  2195. panic("tcpoutput2: version %d", version);
  2196. }
  2197. if((msgs%4) == 1){
  2198. qunlock(s);
  2199. sched();
  2200. qlock(s);
  2201. }
  2202. }
  2203. }
  2204. /*
  2205. * the BSD convention (hack?) for keep alives. resend last uchar acked.
  2206. */
  2207. void
  2208. tcpsendka(Conv *s)
  2209. {
  2210. Tcp seg;
  2211. Tcpctl *tcb;
  2212. Block *hbp,*dbp;
  2213. tcb = (Tcpctl*)s->ptcl;
  2214. dbp = nil;
  2215. seg.urg = 0;
  2216. seg.source = s->lport;
  2217. seg.dest = s->rport;
  2218. seg.flags = ACK|PSH;
  2219. seg.mss = 0;
  2220. if(tcpporthogdefense)
  2221. seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
  2222. else
  2223. seg.seq = tcb->snd.una-1;
  2224. seg.ack = tcb->rcv.nxt;
  2225. tcb->rcv.lastacked = tcb->rcv.nxt;
  2226. seg.wnd = tcb->rcv.wnd;
  2227. if(tcb->state == Finwait2){
  2228. seg.flags |= FIN;
  2229. } else {
  2230. dbp = allocb(1);
  2231. dbp->wp++;
  2232. }
  2233. if(isv4(s->raddr)) {
  2234. /* Build header, link data and compute cksum */
  2235. tcb->protohdr.tcp4hdr.vihl = IP_VER4;
  2236. hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
  2237. if(hbp == nil) {
  2238. freeblist(dbp);
  2239. return;
  2240. }
  2241. ipoput4(s->p->f, hbp, 0, s->ttl, s->tos);
  2242. }
  2243. else {
  2244. /* Build header, link data and compute cksum */
  2245. tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
  2246. hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
  2247. if(hbp == nil) {
  2248. freeblist(dbp);
  2249. return;
  2250. }
  2251. ipoput6(s->p->f, hbp, 0, s->ttl, s->tos);
  2252. }
  2253. }
  2254. /*
  2255. * if we've timed out, close the connection
  2256. * otherwise, send a keepalive and restart the timer
  2257. */
  2258. void
  2259. tcpsetkacounter(Tcpctl *tcb)
  2260. {
  2261. tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK);;
  2262. if(tcb->kacounter < 3)
  2263. tcb->kacounter = 3;
  2264. }
  2265. void
  2266. tcpkeepalive(void *v)
  2267. {
  2268. Tcpctl *tcb;
  2269. Conv *s;
  2270. s = v;
  2271. tcb = (Tcpctl*)s->ptcl;
  2272. if(waserror()){
  2273. qunlock(s);
  2274. nexterror();
  2275. }
  2276. qlock(s);
  2277. if(tcb->state != Closed){
  2278. if(--(tcb->kacounter) <= 0) {
  2279. localclose(s, Etimedout);
  2280. } else {
  2281. tcpsendka(s);
  2282. tcpgo(s->p->priv, &tcb->katimer);
  2283. }
  2284. }
  2285. qunlock(s);
  2286. poperror();
  2287. }
  2288. /*
  2289. * start keepalive timer
  2290. */
  2291. char*
  2292. tcpstartka(Conv *s, char **f, int n)
  2293. {
  2294. Tcpctl *tcb;
  2295. int x;
  2296. tcb = (Tcpctl*)s->ptcl;
  2297. if(n > 1){
  2298. x = atoi(f[1]);
  2299. if(x >= MSPTICK)
  2300. tcb->katimer.start = x/MSPTICK;
  2301. }
  2302. tcpsetkacounter(tcb);
  2303. tcpgo(s->p->priv, &tcb->katimer);
  2304. return nil;
  2305. }
  2306. /*
  2307. * turn checksums on/off
  2308. */
  2309. char*
  2310. tcpsetchecksum(Conv *s, char **f, int)
  2311. {
  2312. Tcpctl *tcb;
  2313. tcb = (Tcpctl*)s->ptcl;
  2314. tcb->nochecksum = !atoi(f[1]);
  2315. return nil;
  2316. }
  2317. void
  2318. tcprxmit(Conv *s)
  2319. {
  2320. Tcpctl *tcb;
  2321. tcb = (Tcpctl*)s->ptcl;
  2322. tcb->flags |= RETRAN|FORCE;
  2323. tcb->snd.ptr = tcb->snd.una;
  2324. /*
  2325. * We should be halving the slow start thershhold (down to one
  2326. * mss) but leaving it at mss seems to work well enough
  2327. */
  2328. tcb->ssthresh = tcb->mss;
  2329. /*
  2330. * pull window down to a single packet
  2331. */
  2332. tcb->cwind = tcb->mss;
  2333. tcpoutput(s);
  2334. }
  2335. void
  2336. tcptimeout(void *arg)
  2337. {
  2338. Conv *s;
  2339. Tcpctl *tcb;
  2340. int maxback;
  2341. Tcppriv *tpriv;
  2342. s = (Conv*)arg;
  2343. tpriv = s->p->priv;
  2344. tcb = (Tcpctl*)s->ptcl;
  2345. if(waserror()){
  2346. qunlock(s);
  2347. nexterror();
  2348. }
  2349. qlock(s);
  2350. switch(tcb->state){
  2351. default:
  2352. tcb->backoff++;
  2353. if(tcb->state == Syn_sent)
  2354. maxback = MAXBACKMS/2;
  2355. else
  2356. maxback = MAXBACKMS;
  2357. tcb->backedoff += tcb->timer.start * MSPTICK;
  2358. if(tcb->backedoff >= maxback) {
  2359. localclose(s, Etimedout);
  2360. break;
  2361. }
  2362. netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
  2363. tcpsettimer(tcb);
  2364. tcprxmit(s);
  2365. tpriv->stats[RetransTimeouts]++;
  2366. tcb->snd.dupacks = 0;
  2367. break;
  2368. case Time_wait:
  2369. localclose(s, nil);
  2370. break;
  2371. case Closed:
  2372. break;
  2373. }
  2374. qunlock(s);
  2375. poperror();
  2376. }
  2377. int
  2378. inwindow(Tcpctl *tcb, int seq)
  2379. {
  2380. return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
  2381. }
  2382. /*
  2383. * set up state for a received SYN (or SYN ACK) packet
  2384. */
  2385. void
  2386. procsyn(Conv *s, Tcp *seg)
  2387. {
  2388. Tcpctl *tcb;
  2389. tcb = (Tcpctl*)s->ptcl;
  2390. tcb->flags |= FORCE;
  2391. tcb->rcv.nxt = seg->seq + 1;
  2392. tcb->rcv.urg = tcb->rcv.nxt;
  2393. tcb->irs = seg->seq;
  2394. /* our sending max segment size cannot be bigger than what he asked for */
  2395. if(seg->mss != 0 && seg->mss < tcb->mss)
  2396. tcb->mss = seg->mss;
  2397. /* the congestion window always starts out as a single segment */
  2398. tcb->snd.wnd = seg->wnd;
  2399. tcb->cwind = tcb->mss;
  2400. }
  2401. int
  2402. addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
  2403. {
  2404. Reseq *rp, *rp1;
  2405. int i;
  2406. static int once;
  2407. rp = malloc(sizeof(Reseq));
  2408. if(rp == nil){
  2409. freeblist(bp); /* bp always consumed by add_reseq */
  2410. return 0;
  2411. }
  2412. rp->seg = *seg;
  2413. rp->bp = bp;
  2414. rp->length = length;
  2415. /* Place on reassembly list sorting by starting seq number */
  2416. rp1 = tcb->reseq;
  2417. if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
  2418. rp->next = rp1;
  2419. tcb->reseq = rp;
  2420. if(rp->next != nil)
  2421. tpriv->stats[OutOfOrder]++;
  2422. return 0;
  2423. }
  2424. length = 0;
  2425. for(i = 0;; i++) {
  2426. length += rp1->length;
  2427. if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
  2428. rp->next = rp1->next;
  2429. rp1->next = rp;
  2430. if(rp->next != nil)
  2431. tpriv->stats[OutOfOrder]++;
  2432. break;
  2433. }
  2434. rp1 = rp1->next;
  2435. }
  2436. if(length > QMAX && once++ == 0){
  2437. print("very long tcp resequence queue: %d\n", length);
  2438. for(rp1 = tcb->reseq, i = 0; i < 10 && rp1 != nil; rp1 = rp1->next, i++)
  2439. print("0x%lux 0x%lux 0x%ux\n", rp1->seg.seq, rp1->seg.ack,
  2440. rp1->seg.flags);
  2441. return -1;
  2442. }
  2443. return 0;
  2444. }
  2445. void
  2446. getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
  2447. {
  2448. Reseq *rp;
  2449. rp = tcb->reseq;
  2450. if(rp == nil)
  2451. return;
  2452. tcb->reseq = rp->next;
  2453. *seg = rp->seg;
  2454. *bp = rp->bp;
  2455. *length = rp->length;
  2456. free(rp);
  2457. }
  2458. int
  2459. tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
  2460. {
  2461. ushort len;
  2462. uchar accept;
  2463. int dupcnt, excess;
  2464. accept = 0;
  2465. len = *length;
  2466. if(seg->flags & SYN)
  2467. len++;
  2468. if(seg->flags & FIN)
  2469. len++;
  2470. if(tcb->rcv.wnd == 0) {
  2471. if(len == 0 && seg->seq == tcb->rcv.nxt)
  2472. return 0;
  2473. }
  2474. else {
  2475. /* Some part of the segment should be in the window */
  2476. if(inwindow(tcb,seg->seq))
  2477. accept++;
  2478. else
  2479. if(len != 0) {
  2480. if(inwindow(tcb, seg->seq+len-1) ||
  2481. seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
  2482. accept++;
  2483. }
  2484. }
  2485. if(!accept) {
  2486. freeblist(*bp);
  2487. return -1;
  2488. }
  2489. dupcnt = tcb->rcv.nxt - seg->seq;
  2490. if(dupcnt > 0){
  2491. tcb->rerecv += dupcnt;
  2492. if(seg->flags & SYN){
  2493. seg->flags &= ~SYN;
  2494. seg->seq++;
  2495. if(seg->urg > 1)
  2496. seg->urg--;
  2497. else
  2498. seg->flags &= ~URG;
  2499. dupcnt--;
  2500. }
  2501. if(dupcnt > 0){
  2502. pullblock(bp, (ushort)dupcnt);
  2503. seg->seq += dupcnt;
  2504. *length -= dupcnt;
  2505. if(seg->urg > dupcnt)
  2506. seg->urg -= dupcnt;
  2507. else {
  2508. seg->flags &= ~URG;
  2509. seg->urg = 0;
  2510. }
  2511. }
  2512. }
  2513. excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
  2514. if(excess > 0) {
  2515. tcb->rerecv += excess;
  2516. *length -= excess;
  2517. *bp = trimblock(*bp, 0, *length);
  2518. if(*bp == nil)
  2519. panic("presotto is a boofhead");
  2520. seg->flags &= ~FIN;
  2521. }
  2522. return 0;
  2523. }
  2524. void
  2525. tcpadvise(Proto *tcp, Block *bp, char *msg)
  2526. {
  2527. Tcp4hdr *h4;
  2528. Tcp6hdr *h6;
  2529. Tcpctl *tcb;
  2530. uchar source[IPaddrlen];
  2531. uchar dest[IPaddrlen];
  2532. ushort psource, pdest;
  2533. Conv *s, **p;
  2534. h4 = (Tcp4hdr*)(bp->rp);
  2535. h6 = (Tcp6hdr*)(bp->rp);
  2536. if((h4->vihl&0xF0)==IP_VER4) {
  2537. v4tov6(dest, h4->tcpdst);
  2538. v4tov6(source, h4->tcpsrc);
  2539. psource = nhgets(h4->tcpsport);
  2540. pdest = nhgets(h4->tcpdport);
  2541. }
  2542. else {
  2543. ipmove(dest, h6->tcpdst);
  2544. ipmove(source, h6->tcpsrc);
  2545. psource = nhgets(h6->tcpsport);
  2546. pdest = nhgets(h6->tcpdport);
  2547. }
  2548. /* Look for a connection */
  2549. qlock(tcp);
  2550. for(p = tcp->conv; *p; p++) {
  2551. s = *p;
  2552. tcb = (Tcpctl*)s->ptcl;
  2553. if(s->rport == pdest)
  2554. if(s->lport == psource)
  2555. if(tcb->state != Closed)
  2556. if(ipcmp(s->raddr, dest) == 0)
  2557. if(ipcmp(s->laddr, source) == 0){
  2558. qlock(s);
  2559. qunlock(tcp);
  2560. switch(tcb->state){
  2561. case Syn_sent:
  2562. localclose(s, msg);
  2563. break;
  2564. }
  2565. qunlock(s);
  2566. freeblist(bp);
  2567. return;
  2568. }
  2569. }
  2570. qunlock(tcp);
  2571. freeblist(bp);
  2572. }
  2573. /* called with c qlocked */
  2574. char*
  2575. tcpctl(Conv* c, char** f, int n)
  2576. {
  2577. if(n == 1 && strcmp(f[0], "hangup") == 0)
  2578. return tcphangup(c);
  2579. if(n >= 1 && strcmp(f[0], "keepalive") == 0)
  2580. return tcpstartka(c, f, n);
  2581. if(n >= 1 && strcmp(f[0], "checksum") == 0)
  2582. return tcpsetchecksum(c, f, n);
  2583. return "unknown control request";
  2584. }
  2585. int
  2586. tcpstats(Proto *tcp, char *buf, int len)
  2587. {
  2588. Tcppriv *priv;
  2589. char *p, *e;
  2590. int i;
  2591. priv = tcp->priv;
  2592. p = buf;
  2593. e = p+len;
  2594. for(i = 0; i < Nstats; i++)
  2595. p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
  2596. return p - buf;
  2597. }
  2598. /*
  2599. * garbage collect any stale conversations:
  2600. * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
  2601. * - Finwait2 after 5 minutes
  2602. *
  2603. * this is called whenever we run out of channels. Both checks are
  2604. * of questionable validity so we try to use them only when we're
  2605. * up against the wall.
  2606. */
  2607. int
  2608. tcpgc(Proto *tcp)
  2609. {
  2610. Conv *c, **pp, **ep;
  2611. int n;
  2612. Tcpctl *tcb;
  2613. n = 0;
  2614. ep = &tcp->conv[tcp->nc];
  2615. for(pp = tcp->conv; pp < ep; pp++) {
  2616. c = *pp;
  2617. if(c == nil)
  2618. break;
  2619. if(!canqlock(c))
  2620. continue;
  2621. tcb = (Tcpctl*)c->ptcl;
  2622. switch(tcb->state){
  2623. case Syn_received:
  2624. if(NOW - tcb->time > 5000){
  2625. localclose(c, "timed out");
  2626. n++;
  2627. }
  2628. break;
  2629. case Finwait2:
  2630. if(NOW - tcb->time > 5*60*1000){
  2631. localclose(c, "timed out");
  2632. n++;
  2633. }
  2634. break;
  2635. }
  2636. qunlock(c);
  2637. }
  2638. return n;
  2639. }
  2640. void
  2641. tcpsettimer(Tcpctl *tcb)
  2642. {
  2643. int x;
  2644. /* round trip depenency */
  2645. x = backoff(tcb->backoff) *
  2646. (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
  2647. /* bounded twixt 1/2 and 10 seconds */
  2648. if(x < 500/MSPTICK)
  2649. x = 500/MSPTICK;
  2650. else if(x > (10000/MSPTICK))
  2651. x = 10000/MSPTICK;
  2652. tcb->timer.start = x;
  2653. }
  2654. void
  2655. tcpinit(Fs *fs)
  2656. {
  2657. Proto *tcp;
  2658. Tcppriv *tpriv;
  2659. tcp = smalloc(sizeof(Proto));
  2660. tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
  2661. tcp->name = "tcp";
  2662. tcp->connect = tcpconnect;
  2663. tcp->announce = tcpannounce;
  2664. tcp->ctl = tcpctl;
  2665. tcp->state = tcpstate;
  2666. tcp->create = tcpcreate;
  2667. tcp->close = tcpclose;
  2668. tcp->rcv = tcpiput;
  2669. tcp->advise = tcpadvise;
  2670. tcp->stats = tcpstats;
  2671. tcp->inuse = tcpinuse;
  2672. tcp->gc = tcpgc;
  2673. tcp->ipproto = IP_TCPPROTO;
  2674. tcp->nc = scalednconv();
  2675. tcp->ptclsize = sizeof(Tcpctl);
  2676. tpriv->stats[MaxConn] = tcp->nc;
  2677. Fsproto(fs, tcp);
  2678. }