devbridge.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * IPv4 Ethernet bridge
  11. */
  12. #include "u.h"
  13. #include "../port/lib.h"
  14. #include "mem.h"
  15. #include "dat.h"
  16. #include "fns.h"
  17. #include "../ip/ip.h"
  18. #include "../port/netif.h"
  19. #include "../port/error.h"
  20. typedef struct Bridge Bridge;
  21. typedef struct Port Port;
  22. typedef struct Centry Centry;
  23. typedef struct Iphdr Iphdr;
  24. typedef struct Tcphdr Tcphdr;
  25. enum
  26. {
  27. Qtopdir= 1, /* top level directory */
  28. Qbridgedir, /* bridge* directory */
  29. Qbctl,
  30. Qstats,
  31. Qcache,
  32. Qlog,
  33. Qportdir, /* directory for a protocol */
  34. Qpctl,
  35. Qlocal,
  36. Qstatus,
  37. MaxQ,
  38. Maxbridge= 4,
  39. Maxport= 128, // power of 2
  40. CacheHash= 257, // prime
  41. CacheLook= 5, // how many cache entries to examine
  42. CacheSize= (CacheHash+CacheLook-1),
  43. CacheTimeout= 5*60, // timeout for cache entry in seconds
  44. MaxMTU= IP_MAX, // allow for jumbo frames and large UDP
  45. TcpMssMax = 1300, // max desirable Tcp MSS value
  46. TunnelMtu = 1400,
  47. };
  48. static Dirtab bridgedirtab[]={
  49. "ctl", {Qbctl}, 0, 0666,
  50. "stats", {Qstats}, 0, 0444,
  51. "cache", {Qcache}, 0, 0444,
  52. "log", {Qlog}, 0, 0666,
  53. };
  54. static Dirtab portdirtab[]={
  55. "ctl", {Qpctl}, 0, 0666,
  56. "local", {Qlocal}, 0, 0444,
  57. "status", {Qstatus}, 0, 0444,
  58. };
  59. enum {
  60. Logcache= (1<<0),
  61. Logmcast= (1<<1),
  62. };
  63. // types of interfaces
  64. enum
  65. {
  66. Tether,
  67. Ttun,
  68. };
  69. static Logflag logflags[] =
  70. {
  71. { "cache", Logcache, },
  72. { "multicast", Logmcast, },
  73. { nil, 0, },
  74. };
  75. static Dirtab *dirtab[MaxQ];
  76. #define TYPE(x) (((uint32_t)(x).path) & 0xff)
  77. #define PORT(x) ((((uint32_t)(x).path) >> 8)&(Maxport-1))
  78. #define QID(x, y) (((x)<<8) | (y))
  79. struct Centry
  80. {
  81. uint8_t d[Eaddrlen];
  82. int port;
  83. long expire; // entry expires this many seconds after bootime
  84. long src;
  85. long dst;
  86. };
  87. struct Bridge
  88. {
  89. QLock QLock;
  90. int nport;
  91. Port *port[Maxport];
  92. Centry cache[CacheSize];
  93. unsigned long hit;
  94. unsigned long miss;
  95. unsigned long copy;
  96. long delay0; // constant microsecond delay per packet
  97. long delayn; // microsecond delay per byte
  98. int tcpmss; // modify tcpmss value
  99. Log Log;
  100. };
  101. struct Port
  102. {
  103. Ref Ref;
  104. int id;
  105. Bridge *bridge;
  106. int closed;
  107. Chan *data[2]; // channel to data
  108. Proc *readp; // read proc
  109. // the following uniquely identifies the port
  110. int type;
  111. char name[KNAMELEN];
  112. // owner hash - avoids bind/unbind races
  113. unsigned long ownhash;
  114. // various stats
  115. int in; // number of packets read
  116. int inmulti; // multicast or broadcast
  117. int inunknown; // unknown address
  118. int out; // number of packets read
  119. int outmulti; // multicast or broadcast
  120. int outunknown; // unknown address
  121. int outfrag; // fragmented the packet
  122. int nentry; // number of cache entries for this port
  123. };
  124. enum {
  125. IP_TCPPROTO = 6,
  126. EOLOPT = 0,
  127. NOOPOPT = 1,
  128. MSSOPT = 2,
  129. MSS_LENGTH = 4, /* Mean segment size */
  130. SYN = 0x02, /* Pkt. is synchronise */
  131. IPHDR = 20, /* sizeof(Iphdr) */
  132. };
  133. struct Iphdr
  134. {
  135. uint8_t vihl; /* Version and header length */
  136. uint8_t tos; /* Type of service */
  137. uint8_t length[2]; /* packet length */
  138. uint8_t id[2]; /* ip->identification */
  139. uint8_t frag[2]; /* Fragment information */
  140. uint8_t ttl; /* Time to live */
  141. uint8_t proto; /* Protocol */
  142. uint8_t cksum[2]; /* Header checksum */
  143. uint8_t src[4]; /* IP source */
  144. uint8_t dst[4]; /* IP destination */
  145. };
  146. struct Tcphdr
  147. {
  148. uint8_t sport[2];
  149. uint8_t dport[2];
  150. uint8_t seq[4];
  151. uint8_t ack[4];
  152. uint8_t flag[2];
  153. uint8_t win[2];
  154. uint8_t cksum[2];
  155. uint8_t urg[2];
  156. };
  157. static Bridge bridgetab[Maxbridge];
  158. static int m2p[] = {
  159. [OREAD] = 4,
  160. [OWRITE] = 2,
  161. [ORDWR] = 6
  162. };
  163. static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
  164. static void portbind(Bridge *b, int argc, char *argv[]);
  165. static void portunbind(Bridge *b, int argc, char *argv[]);
  166. static void etherread(void *a);
  167. static char *cachedump(Bridge *b);
  168. static void portfree(Port *port);
  169. static void cacheflushport(Bridge *b, int port);
  170. static void etherwrite(Port *port, Block *bp);
  171. static void
  172. bridgeinit(void)
  173. {
  174. int i;
  175. Dirtab *dt;
  176. // setup dirtab with non directory entries
  177. for(i=0; i<nelem(bridgedirtab); i++) {
  178. dt = bridgedirtab + i;
  179. dirtab[TYPE(dt->qid)] = dt;
  180. }
  181. for(i=0; i<nelem(portdirtab); i++) {
  182. dt = portdirtab + i;
  183. dirtab[TYPE(dt->qid)] = dt;
  184. }
  185. }
  186. static Chan*
  187. bridgeattach(char* spec)
  188. {
  189. Chan *c;
  190. int dev;
  191. dev = atoi(spec);
  192. print("at %s %d\n", spec, dev);
  193. if(dev<0 || dev >= Maxbridge)
  194. error("bad specification");
  195. c = devattach('B', spec);
  196. mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
  197. c->devno = dev;
  198. return c;
  199. }
  200. static Walkqid*
  201. bridgewalk(Chan *c, Chan *nc, char **name, int nname)
  202. {
  203. return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
  204. }
  205. static int
  206. bridgestat(Chan* c, uint8_t* db, int n)
  207. {
  208. return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
  209. }
  210. static Chan*
  211. bridgeopen(Chan* c, int omode)
  212. {
  213. int perm;
  214. Bridge *b;
  215. omode &= 3;
  216. perm = m2p[omode];
  217. USED(perm);
  218. b = bridgetab + c->devno;
  219. USED(b);
  220. switch(TYPE(c->qid)) {
  221. default:
  222. break;
  223. case Qlog:
  224. logopen(&b->Log);
  225. break;
  226. case Qcache:
  227. c->aux = cachedump(b);
  228. break;
  229. }
  230. c->mode = openmode(omode);
  231. c->flag |= COPEN;
  232. c->offset = 0;
  233. return c;
  234. }
  235. static void
  236. bridgeclose(Chan* c)
  237. {
  238. Bridge *b = bridgetab + c->devno;
  239. switch(TYPE(c->qid)) {
  240. case Qcache:
  241. if(c->flag & COPEN)
  242. free(c->aux);
  243. break;
  244. case Qlog:
  245. if(c->flag & COPEN)
  246. logclose(&b->Log);
  247. break;
  248. }
  249. }
  250. static int32_t
  251. bridgeread(Chan *c, void *a, int32_t n, int64_t off)
  252. {
  253. Proc *up = externup();
  254. char buf[256];
  255. Bridge *b = bridgetab + c->devno;
  256. Port *port;
  257. int i, ingood, outgood;
  258. USED(off);
  259. switch(TYPE(c->qid)) {
  260. default:
  261. error(Egreg);
  262. case Qtopdir:
  263. case Qbridgedir:
  264. case Qportdir:
  265. return devdirread(c, a, n, 0, 0, bridgegen);
  266. case Qlog:
  267. return logread(&b->Log, a, off, n);
  268. case Qlocal:
  269. return 0; /* TO DO */
  270. case Qstatus:
  271. qlock(&b->QLock);
  272. if(waserror()){
  273. qunlock(&b->QLock);
  274. nexterror();
  275. }
  276. port = b->port[PORT(c->qid)];
  277. if(port == 0)
  278. strcpy(buf, "unbound\n");
  279. else {
  280. i = 0;
  281. switch(port->type) {
  282. default:
  283. panic("bridgeread: unknown port type: %d",
  284. port->type);
  285. case Tether:
  286. i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
  287. break;
  288. case Ttun:
  289. i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
  290. break;
  291. }
  292. ingood = port->in - port->inmulti - port->inunknown;
  293. outgood = port->out - port->outmulti - port->outunknown;
  294. snprint(buf+i, sizeof(buf)-i,
  295. "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
  296. port->in, ingood, port->inmulti, port->inunknown,
  297. port->out, outgood, port->outmulti,
  298. port->outunknown, port->outfrag);
  299. }
  300. poperror();
  301. qunlock(&b->QLock);
  302. return readstr(off, a, n, buf);
  303. case Qbctl:
  304. snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
  305. b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
  306. n = readstr(off, a, n, buf);
  307. return n;
  308. case Qcache:
  309. n = readstr(off, a, n, c->aux);
  310. return n;
  311. case Qstats:
  312. snprint(buf, sizeof(buf), "hit=%lu miss=%lu copy=%lu\n",
  313. b->hit, b->miss, b->copy);
  314. n = readstr(off, a, n, buf);
  315. return n;
  316. }
  317. }
  318. static void
  319. bridgeoption(Bridge *b, char *option, int value)
  320. {
  321. if(strcmp(option, "tcpmss") == 0)
  322. b->tcpmss = value;
  323. else
  324. error("unknown bridge option");
  325. }
  326. static int32_t
  327. bridgewrite(Chan *c, void *a, int32_t n, int64_t off)
  328. {
  329. Proc *up = externup();
  330. Bridge *b = bridgetab + c->devno;
  331. Cmdbuf *cb;
  332. char *arg0, *p;
  333. USED(off);
  334. switch(TYPE(c->qid)) {
  335. default:
  336. error(Eperm);
  337. case Qbctl:
  338. cb = parsecmd(a, n);
  339. qlock(&b->QLock);
  340. if(waserror()) {
  341. qunlock(&b->QLock);
  342. free(cb);
  343. nexterror();
  344. }
  345. if(cb->nf == 0)
  346. error("short write");
  347. arg0 = cb->f[0];
  348. if(strcmp(arg0, "bind") == 0) {
  349. portbind(b, cb->nf-1, cb->f+1);
  350. } else if(strcmp(arg0, "unbind") == 0) {
  351. portunbind(b, cb->nf-1, cb->f+1);
  352. } else if(strcmp(arg0, "cacheflush") == 0) {
  353. log(&b->Log, Logcache, "cache flush\n");
  354. memset(b->cache, 0, CacheSize*sizeof(Centry));
  355. } else if(strcmp(arg0, "set") == 0) {
  356. if(cb->nf != 2)
  357. error("usage: set option");
  358. bridgeoption(b, cb->f[1], 1);
  359. } else if(strcmp(arg0, "clear") == 0) {
  360. if(cb->nf != 2)
  361. error("usage: clear option");
  362. bridgeoption(b, cb->f[1], 0);
  363. } else if(strcmp(arg0, "delay") == 0) {
  364. if(cb->nf != 3)
  365. error("usage: delay delay0 delayn");
  366. b->delay0 = strtol(cb->f[1], nil, 10);
  367. b->delayn = strtol(cb->f[2], nil, 10);
  368. } else
  369. error("unknown control request");
  370. poperror();
  371. qunlock(&b->QLock);
  372. free(cb);
  373. return n;
  374. case Qlog:
  375. cb = parsecmd(a, n);
  376. p = logctl(&b->Log, cb->nf, cb->f, logflags);
  377. free(cb);
  378. if(p != nil)
  379. error(p);
  380. return n;
  381. }
  382. }
  383. static int
  384. bridgegen(Chan *c, char *_, Dirtab*__, int ___, int s, Dir *dp)
  385. {
  386. Proc *up = externup();
  387. Bridge *b = bridgetab + c->devno;
  388. int type = TYPE(c->qid);
  389. Dirtab *dt;
  390. Qid qid;
  391. if(s == DEVDOTDOT){
  392. switch(TYPE(c->qid)){
  393. case Qtopdir:
  394. case Qbridgedir:
  395. snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->devno);
  396. mkqid(&qid, Qtopdir, 0, QTDIR);
  397. devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
  398. break;
  399. case Qportdir:
  400. snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->devno);
  401. mkqid(&qid, Qbridgedir, 0, QTDIR);
  402. devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
  403. break;
  404. default:
  405. panic("bridgewalk %llux", c->qid.path);
  406. }
  407. return 1;
  408. }
  409. switch(type) {
  410. default:
  411. /* non-directory entries end up here */
  412. if(c->qid.type & QTDIR)
  413. panic("bridgegen: unexpected directory");
  414. if(s != 0)
  415. return -1;
  416. dt = dirtab[TYPE(c->qid)];
  417. if(dt == nil)
  418. panic("bridgegen: unknown type: %lu", TYPE(c->qid));
  419. devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
  420. return 1;
  421. case Qtopdir:
  422. if(s != 0)
  423. return -1;
  424. snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->devno);
  425. mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
  426. devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
  427. return 1;
  428. case Qbridgedir:
  429. if(s<nelem(bridgedirtab)) {
  430. dt = bridgedirtab+s;
  431. devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
  432. return 1;
  433. }
  434. s -= nelem(bridgedirtab);
  435. if(s >= b->nport)
  436. return -1;
  437. mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
  438. snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
  439. devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
  440. return 1;
  441. case Qportdir:
  442. if(s>=nelem(portdirtab))
  443. return -1;
  444. dt = portdirtab+s;
  445. mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
  446. devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
  447. return 1;
  448. }
  449. }
  450. // parse mac address; also in netif.c
  451. int
  452. parseaddr(uint8_t *to, char *from, int alen)
  453. {
  454. char nip[4];
  455. char *p;
  456. int i;
  457. p = from;
  458. for(i = 0; i < alen; i++){
  459. if(*p == 0)
  460. return -1;
  461. nip[0] = *p++;
  462. if(*p == 0)
  463. return -1;
  464. nip[1] = *p++;
  465. nip[2] = 0;
  466. to[i] = strtoul(nip, 0, 16);
  467. if(*p == ':')
  468. p++;
  469. }
  470. return 0;
  471. }
  472. // assumes b is locked
  473. static void
  474. portbind(Bridge *b, int argc, char *argv[])
  475. {
  476. Proc *up = externup();
  477. Port *port;
  478. Chan *ctl;
  479. int type = 0, i, n;
  480. unsigned long ownhash;
  481. char *dev, *dev2 = nil;
  482. char buf[100], name[KNAMELEN], path[8*KNAMELEN];
  483. static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
  484. extern Dev *devtab[];
  485. memset(name, 0, KNAMELEN);
  486. if(argc < 4)
  487. error(usage);
  488. if(strcmp(argv[0], "ether") == 0) {
  489. if(argc != 4)
  490. error(usage);
  491. type = Tether;
  492. strncpy(name, argv[1], KNAMELEN);
  493. name[KNAMELEN-1] = 0;
  494. // parseaddr(addr, argv[1], Eaddrlen);
  495. } else if(strcmp(argv[0], "tunnel") == 0) {
  496. if(argc != 5)
  497. error(usage);
  498. type = Ttun;
  499. strncpy(name, argv[1], KNAMELEN);
  500. name[KNAMELEN-1] = 0;
  501. // parseip(addr, argv[1]);
  502. dev2 = argv[4];
  503. } else
  504. error(usage);
  505. ownhash = atoi(argv[2]);
  506. dev = argv[3];
  507. for(i=0; i<b->nport; i++) {
  508. port = b->port[i];
  509. if(port != nil && port->type == type &&
  510. memcmp(port->name, name, KNAMELEN) == 0)
  511. error("port in use");
  512. }
  513. for(i=0; i<Maxport; i++)
  514. if(b->port[i] == nil)
  515. break;
  516. if(i == Maxport)
  517. error("no more ports");
  518. port = smalloc(sizeof(Port));
  519. port->Ref.ref = 1;
  520. port->id = i;
  521. port->ownhash = ownhash;
  522. if(waserror()) {
  523. portfree(port);
  524. nexterror();
  525. }
  526. port->type = type;
  527. memmove(port->name, name, KNAMELEN);
  528. switch(port->type) {
  529. default:
  530. panic("portbind: unknown port type: %d", type);
  531. case Tether:
  532. snprint(path, sizeof(path), "%s/clone", dev);
  533. ctl = namec(path, Aopen, ORDWR, 0);
  534. if(waserror()) {
  535. cclose(ctl);
  536. nexterror();
  537. }
  538. // check addr?
  539. // get directory name
  540. n = devtab[ctl->qid.type]->read(ctl, buf, sizeof(buf)-1, 0);
  541. buf[n] = 0;
  542. snprint(path, sizeof(path), "%s/%lu/data", dev, strtoul(buf, 0, 0));
  543. // setup connection to be promiscuous
  544. snprint(buf, sizeof(buf), "connect -1");
  545. devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
  546. snprint(buf, sizeof(buf), "promiscuous");
  547. devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
  548. snprint(buf, sizeof(buf), "bridge");
  549. devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
  550. // open data port
  551. port->data[0] = namec(path, Aopen, ORDWR, 0);
  552. // dup it
  553. incref(&port->data[0]->r);
  554. port->data[1] = port->data[0];
  555. poperror();
  556. cclose(ctl);
  557. break;
  558. case Ttun:
  559. port->data[0] = namec(dev, Aopen, OREAD, 0);
  560. port->data[1] = namec(dev2, Aopen, OWRITE, 0);
  561. break;
  562. }
  563. poperror();
  564. /* committed to binding port */
  565. b->port[port->id] = port;
  566. port->bridge = b;
  567. if(b->nport <= port->id)
  568. b->nport = port->id+1;
  569. // assumes kproc always succeeds
  570. incref(&port->Ref);
  571. snprint(buf, sizeof(buf), "bridge:%s", dev);
  572. kproc(buf, etherread, port);
  573. }
  574. // assumes b is locked
  575. static void
  576. portunbind(Bridge *b, int argc, char *argv[])
  577. {
  578. int type = 0, i;
  579. char name[KNAMELEN];
  580. unsigned long ownhash;
  581. Port *port = nil;
  582. static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
  583. memset(name, 0, KNAMELEN);
  584. if(argc < 2 || argc > 3)
  585. error(usage);
  586. if(strcmp(argv[0], "ether") == 0) {
  587. type = Tether;
  588. strncpy(name, argv[1], KNAMELEN);
  589. name[KNAMELEN-1] = 0;
  590. // parseaddr(addr, argv[1], Eaddrlen);
  591. } else if(strcmp(argv[0], "tunnel") == 0) {
  592. type = Ttun;
  593. strncpy(name, argv[1], KNAMELEN);
  594. name[KNAMELEN-1] = 0;
  595. // parseip(addr, argv[1]);
  596. } else
  597. error(usage);
  598. if(argc == 3)
  599. ownhash = atoi(argv[2]);
  600. else
  601. ownhash = 0;
  602. for(i=0; i<b->nport; i++) {
  603. port = b->port[i];
  604. if(port != nil && port->type == type &&
  605. memcmp(port->name, name, KNAMELEN) == 0)
  606. break;
  607. }
  608. if(i == b->nport)
  609. error("port not found");
  610. if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
  611. error("bad owner hash");
  612. port->closed = 1;
  613. b->port[i] = nil; // port is now unbound
  614. cacheflushport(b, i);
  615. // try and stop reader
  616. if(port->readp)
  617. postnote(port->readp, 1, "unbind", 0);
  618. portfree(port);
  619. }
  620. // assumes b is locked
  621. static Centry *
  622. cachelookup(Bridge *b, uint8_t d[Eaddrlen])
  623. {
  624. int i;
  625. uint h;
  626. Centry *p;
  627. long sec;
  628. // dont cache multicast or broadcast
  629. if(d[0] & 1)
  630. return 0;
  631. h = 0;
  632. for(i=0; i<Eaddrlen; i++) {
  633. h *= 7;
  634. h += d[i];
  635. }
  636. h %= CacheHash;
  637. p = b->cache + h;
  638. sec = TK2SEC(machp()->ticks);
  639. for(i=0; i<CacheLook; i++,p++) {
  640. if(memcmp(d, p->d, Eaddrlen) == 0) {
  641. p->dst++;
  642. if(sec >= p->expire) {
  643. log(&b->Log, Logcache, "expired cache entry: %E %d\n",
  644. d, p->port);
  645. return nil;
  646. }
  647. p->expire = sec + CacheTimeout;
  648. return p;
  649. }
  650. }
  651. log(&b->Log, Logcache, "cache miss: %E\n", d);
  652. return nil;
  653. }
  654. // assumes b is locked
  655. static void
  656. cacheupdate(Bridge *b, uint8_t d[Eaddrlen], int port)
  657. {
  658. int i;
  659. uint h;
  660. Centry *p, *pp;
  661. long sec;
  662. // dont cache multicast or broadcast
  663. if(d[0] & 1) {
  664. log(&b->Log, Logcache, "bad source address: %E\n", d);
  665. return;
  666. }
  667. h = 0;
  668. for(i=0; i<Eaddrlen; i++) {
  669. h *= 7;
  670. h += d[i];
  671. }
  672. h %= CacheHash;
  673. p = b->cache + h;
  674. pp = p;
  675. sec = p->expire;
  676. // look for oldest entry
  677. for(i=0; i<CacheLook; i++,p++) {
  678. if(memcmp(p->d, d, Eaddrlen) == 0) {
  679. p->expire = TK2SEC(machp()->ticks) + CacheTimeout;
  680. if(p->port != port) {
  681. log(&b->Log, Logcache, "NIC changed port %d->%d: %E\n",
  682. p->port, port, d);
  683. p->port = port;
  684. }
  685. p->src++;
  686. return;
  687. }
  688. if(p->expire < sec) {
  689. sec = p->expire;
  690. pp = p;
  691. }
  692. }
  693. if(pp->expire != 0)
  694. log(&b->Log, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
  695. pp->expire = TK2SEC(machp()->ticks) + CacheTimeout;
  696. memmove(pp->d, d, Eaddrlen);
  697. pp->port = port;
  698. pp->src = 1;
  699. pp->dst = 0;
  700. log(&b->Log, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
  701. }
  702. // assumes b is locked
  703. static void
  704. cacheflushport(Bridge *b, int port)
  705. {
  706. Centry *ce;
  707. int i;
  708. ce = b->cache;
  709. for(i=0; i<CacheSize; i++,ce++) {
  710. if(ce->port != port)
  711. continue;
  712. memset(ce, 0, sizeof(Centry));
  713. }
  714. }
  715. static char *
  716. cachedump(Bridge *b)
  717. {
  718. Proc *up = externup();
  719. int i, n;
  720. long sec, off;
  721. char *buf, *p, *ep;
  722. Centry *ce;
  723. char c;
  724. qlock(&b->QLock);
  725. if(waserror()) {
  726. qunlock(&b->QLock);
  727. nexterror();
  728. }
  729. sec = TK2SEC(machp()->ticks);
  730. n = 0;
  731. for(i=0; i<CacheSize; i++)
  732. if(b->cache[i].expire != 0)
  733. n++;
  734. n *= 51; // change if print format is changed
  735. n += 10; // some slop at the end
  736. buf = malloc(n);
  737. if(buf == nil)
  738. error(Enomem);
  739. p = buf;
  740. ep = buf + n;
  741. ce = b->cache;
  742. off = seconds() - sec;
  743. for(i=0; i<CacheSize; i++,ce++) {
  744. if(ce->expire == 0)
  745. continue;
  746. c = (sec < ce->expire)?'v':'e';
  747. p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
  748. ce->port, ce->src, ce->dst, ce->expire+off, c);
  749. }
  750. *p = 0;
  751. poperror();
  752. qunlock(&b->QLock);
  753. return buf;
  754. }
  755. // assumes b is locked, no error return
  756. static void
  757. ethermultiwrite(Bridge *b, Block *bp, Port *port)
  758. {
  759. Port *oport;
  760. Etherpkt *ep;
  761. int i, mcast;
  762. ep = (Etherpkt*)bp->rp;
  763. mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
  764. oport = nil;
  765. for(i=0; i<b->nport; i++) {
  766. if(i == port->id || b->port[i] == nil)
  767. continue;
  768. /*
  769. * we need to forward multicast packets for ipv6,
  770. * so always do it.
  771. */
  772. if(mcast)
  773. b->port[i]->outmulti++;
  774. else
  775. b->port[i]->outunknown++;
  776. // delay one so that the last write does not copy
  777. if(oport != nil) {
  778. b->copy++;
  779. etherwrite(oport, copyblock(bp, blocklen(bp)));
  780. }
  781. oport = b->port[i];
  782. }
  783. // last write free block
  784. if(oport)
  785. etherwrite(oport, bp);
  786. else
  787. freeb(bp);
  788. }
  789. static void
  790. tcpmsshack(Etherpkt *epkt, int n)
  791. {
  792. int hl, optlen;
  793. Iphdr *iphdr;
  794. Tcphdr *tcphdr;
  795. unsigned long mss, cksum;
  796. uint8_t *optr;
  797. /* ignore non-ipv4 packets */
  798. if(nhgets(epkt->type) != ETIP4)
  799. return;
  800. iphdr = (Iphdr*)(epkt->data);
  801. n -= ETHERHDRSIZE;
  802. if(n < IPHDR)
  803. return;
  804. /* ignore bad packets */
  805. if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
  806. hl = (iphdr->vihl&0xF)<<2;
  807. if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
  808. return;
  809. } else
  810. hl = IP_HLEN4<<2;
  811. /* ignore non-tcp packets */
  812. if(iphdr->proto != IP_TCPPROTO)
  813. return;
  814. n -= hl;
  815. if(n < sizeof(Tcphdr))
  816. return;
  817. tcphdr = (Tcphdr*)((uint8_t*)(iphdr) + hl);
  818. // MSS can only appear in SYN packet
  819. if(!(tcphdr->flag[1] & SYN))
  820. return;
  821. hl = (tcphdr->flag[0] & 0xf0)>>2;
  822. if(n < hl)
  823. return;
  824. // check for MSS option
  825. optr = (uint8_t*)tcphdr + sizeof(Tcphdr);
  826. n = hl - sizeof(Tcphdr);
  827. for(;;) {
  828. if(n <= 0 || *optr == EOLOPT)
  829. return;
  830. if(*optr == NOOPOPT) {
  831. n--;
  832. optr++;
  833. continue;
  834. }
  835. optlen = optr[1];
  836. if(optlen < 2 || optlen > n)
  837. return;
  838. if(*optr == MSSOPT && optlen == MSS_LENGTH)
  839. break;
  840. n -= optlen;
  841. optr += optlen;
  842. }
  843. mss = nhgets(optr+2);
  844. if(mss <= TcpMssMax)
  845. return;
  846. // fit checksum
  847. cksum = nhgets(tcphdr->cksum);
  848. if(optr-(uint8_t*)tcphdr & 1) {
  849. print("tcpmsshack: odd alignment!\n");
  850. // odd alignments are a pain
  851. cksum += nhgets(optr+1);
  852. cksum -= (optr[1]<<8)|(TcpMssMax>>8);
  853. cksum += (cksum>>16);
  854. cksum &= 0xffff;
  855. cksum += nhgets(optr+3);
  856. cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
  857. cksum += (cksum>>16);
  858. } else {
  859. cksum += mss;
  860. cksum -= TcpMssMax;
  861. cksum += (cksum>>16);
  862. }
  863. hnputs(tcphdr->cksum, cksum);
  864. hnputs(optr+2, TcpMssMax);
  865. }
  866. /*
  867. * process to read from the ethernet
  868. */
  869. static void
  870. etherread(void *a)
  871. {
  872. Proc *up = externup();
  873. Port *port = a;
  874. Bridge *b = port->bridge;
  875. Block *bp;
  876. Etherpkt *ep;
  877. Centry *ce;
  878. long md, n;
  879. extern Dev *devtab[];
  880. qlock(&b->QLock);
  881. port->readp = up; /* hide identity under a rock for unbind */
  882. while(!port->closed){
  883. // release lock to read - error means it is time to quit
  884. qunlock(&b->QLock);
  885. if(waserror()) {
  886. print("etherread read error: %s\n", up->errstr);
  887. qlock(&b->QLock);
  888. break;
  889. }
  890. bp = devtab[port->data[0]->qid.type]->bread(port->data[0], MaxMTU, 0);
  891. poperror();
  892. qlock(&b->QLock);
  893. if(bp == nil)
  894. break;
  895. n = blocklen(bp);
  896. if(port->closed || n < ETHERMINTU){
  897. freeb(bp);
  898. continue;
  899. }
  900. if(waserror()) {
  901. // print("etherread bridge error\n");
  902. freeb(bp);
  903. continue;
  904. }
  905. port->in++;
  906. ep = (Etherpkt*)bp->rp;
  907. cacheupdate(b, ep->s, port->id);
  908. if(b->tcpmss)
  909. tcpmsshack(ep, n);
  910. /*
  911. * delay packets to simulate a slow link
  912. */
  913. if(b->delay0 != 0 || b->delayn != 0){
  914. md = b->delay0 + b->delayn * n;
  915. if(md > 0)
  916. microdelay(md);
  917. }
  918. poperror(); /* must now dispose of bp */
  919. if(ep->d[0] & 1) {
  920. log(&b->Log, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
  921. port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
  922. port->inmulti++;
  923. ethermultiwrite(b, bp, port);
  924. } else {
  925. ce = cachelookup(b, ep->d);
  926. if(ce == nil) {
  927. b->miss++;
  928. port->inunknown++;
  929. ethermultiwrite(b, bp, port);
  930. }else if(ce->port != port->id){
  931. b->hit++;
  932. etherwrite(b->port[ce->port], bp);
  933. }else
  934. freeb(bp);
  935. }
  936. }
  937. // print("etherread: trying to exit\n");
  938. port->readp = nil;
  939. portfree(port);
  940. qunlock(&b->QLock);
  941. pexit("hangup", 1);
  942. }
  943. static int
  944. fragment(Etherpkt *epkt, int n)
  945. {
  946. Iphdr *iphdr;
  947. if(n <= TunnelMtu)
  948. return 0;
  949. /* ignore non-ipv4 packets */
  950. if(nhgets(epkt->type) != ETIP4)
  951. return 0;
  952. iphdr = (Iphdr*)(epkt->data);
  953. n -= ETHERHDRSIZE;
  954. /*
  955. * ignore: IP runt packets, bad packets (I don't handle IP
  956. * options for the moment), packets with don't-fragment set,
  957. * and short blocks.
  958. */
  959. if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
  960. iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
  961. return 0;
  962. return 1;
  963. }
  964. static void
  965. etherwrite(Port *port, Block *bp)
  966. {
  967. Proc *up = externup();
  968. Iphdr *eh, *feh;
  969. Etherpkt *epkt;
  970. int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
  971. Block *xp, *nb;
  972. uint16_t fragoff, frag;
  973. extern Dev *devtab[];
  974. port->out++;
  975. epkt = (Etherpkt*)bp->rp;
  976. n = blocklen(bp);
  977. if(port->type != Ttun || !fragment(epkt, n)) {
  978. if(!waserror()){
  979. devtab[port->data[1]->qid.type]->bwrite(port->data[1], bp, 0);
  980. poperror();
  981. }
  982. return;
  983. }
  984. port->outfrag++;
  985. if(waserror()){
  986. freeblist(bp);
  987. return;
  988. }
  989. seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
  990. eh = (Iphdr*)(epkt->data);
  991. len = nhgets(eh->length);
  992. frag = nhgets(eh->frag);
  993. mf = frag & IP_MF;
  994. frag <<= 3;
  995. dlen = len - IPHDR;
  996. xp = bp;
  997. lid = nhgets(eh->id);
  998. offset = ETHERHDRSIZE+IPHDR;
  999. while(xp != nil && offset && offset >= BLEN(xp)) {
  1000. offset -= BLEN(xp);
  1001. xp = xp->next;
  1002. }
  1003. xp->rp += offset;
  1004. if(0)
  1005. print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
  1006. seglen, dlen, mf, frag);
  1007. for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
  1008. nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
  1009. feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
  1010. memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
  1011. nb->wp += ETHERHDRSIZE+IPHDR;
  1012. if((fragoff + seglen) >= dlen) {
  1013. seglen = dlen - fragoff;
  1014. hnputs(feh->frag, (frag+fragoff)>>3 | mf);
  1015. }
  1016. else
  1017. hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
  1018. hnputs(feh->length, seglen + IPHDR);
  1019. hnputs(feh->id, lid);
  1020. /* Copy up the data area */
  1021. chunk = seglen;
  1022. while(chunk) {
  1023. blklen = chunk;
  1024. if(BLEN(xp) < chunk)
  1025. blklen = BLEN(xp);
  1026. memmove(nb->wp, xp->rp, blklen);
  1027. nb->wp += blklen;
  1028. xp->rp += blklen;
  1029. chunk -= blklen;
  1030. if(xp->rp == xp->wp)
  1031. xp = xp->next;
  1032. }
  1033. feh->cksum[0] = 0;
  1034. feh->cksum[1] = 0;
  1035. hnputs(feh->cksum, ipcsum(&feh->vihl));
  1036. /* don't generate small packets */
  1037. if(BLEN(nb) < ETHERMINTU)
  1038. nb->wp = nb->rp + ETHERMINTU;
  1039. devtab[port->data[1]->qid.type]->bwrite(port->data[1], nb, 0);
  1040. }
  1041. poperror();
  1042. freeblist(bp);
  1043. }
  1044. // hold b lock
  1045. static void
  1046. portfree(Port *port)
  1047. {
  1048. if(decref(&port->Ref) != 0)
  1049. return;
  1050. if(port->data[0])
  1051. cclose(port->data[0]);
  1052. if(port->data[1])
  1053. cclose(port->data[1]);
  1054. memset(port, 0, sizeof(Port));
  1055. free(port);
  1056. }
  1057. Dev bridgedevtab = {
  1058. 'B',
  1059. "bridge",
  1060. devreset,
  1061. bridgeinit,
  1062. devshutdown,
  1063. bridgeattach,
  1064. bridgewalk,
  1065. bridgestat,
  1066. bridgeopen,
  1067. devcreate,
  1068. bridgeclose,
  1069. bridgeread,
  1070. devbread,
  1071. bridgewrite,
  1072. devbwrite,
  1073. devremove,
  1074. devwstat,
  1075. };