Browse Source

devbridge from Charles Forsyth.

We talked about the license and based on provenance the standard UCB
GPL is the correct one.

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Ronald G. Minnich 7 years ago
parent
commit
73c07f87aa
5 changed files with 1388 additions and 1 deletions
  1. 1 0
      sys/src/9/amd64/build.json
  2. 2 0
      sys/src/9/amd64/core.json
  3. 1196 0
      sys/src/9/port/devbridge.c
  4. 188 0
      sys/src/9/port/log.c
  5. 1 1
      sys/src/9/port/portfns.h

+ 1 - 0
sys/src/9/amd64/build.json

@@ -18,6 +18,7 @@
 				"Dev": [
 					"acpi",
 					"arch",
+					"bridge",
 					"cap",
 					"cons",
 					"coreboot",

+ 2 - 0
sys/src/9/amd64/core.json

@@ -74,6 +74,7 @@
 			"archamd64.c",
 			"asm.c",
 			"backtrace.c",
+			"../port/devbridge.c",
 		        "coreboot.c",
 			"ctype.c",
 			"devarch.c",
@@ -85,6 +86,7 @@
 			"i8254.c",
 			"i8259.c",
 			"ioapic.c",
+			"../port/log.c",
 			"main.c",
 			"map.c",
 			"memory.c",

+ 1196 - 0
sys/src/9/port/devbridge.c

@@ -0,0 +1,1196 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * IPv4 Ethernet bridge
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../ip/ip.h"
+#include "../port/netif.h"
+#include "../port/error.h"
+
+typedef struct Bridge 	Bridge;
+typedef struct Port 	Port;
+typedef struct Centry	Centry;
+typedef struct Iphdr	Iphdr;
+typedef struct Tcphdr	Tcphdr;
+
+enum
+{
+	Qtopdir=	1,		/* top level directory */
+
+	Qbridgedir,			/* bridge* directory */
+	Qbctl,
+	Qstats,
+	Qcache,
+	Qlog,
+
+	Qportdir,			/* directory for a protocol */
+	Qpctl,
+	Qlocal,
+	Qstatus,
+
+	MaxQ,
+
+	Maxbridge=	4,
+	Maxport=	128,		// power of 2
+	CacheHash=	257,		// prime
+	CacheLook=	5,		// how many cache entries to examine
+	CacheSize=	(CacheHash+CacheLook-1),
+	CacheTimeout=	5*60,		// timeout for cache entry in seconds
+	MaxMTU=	IP_MAX,	// allow for jumbo frames and large UDP
+
+	TcpMssMax = 1300,		// max desirable Tcp MSS value
+	TunnelMtu = 1400,
+};
+
+static Dirtab bridgedirtab[]={
+	"ctl",		{Qbctl},	0,	0666,
+	"stats",	{Qstats},	0,	0444,
+	"cache",	{Qcache},	0,	0444,
+	"log",		{Qlog},		0,	0666,
+};
+
+static Dirtab portdirtab[]={
+	"ctl",		{Qpctl},	0,	0666,
+	"local",	{Qlocal},	0,	0444,
+	"status",	{Qstatus},	0,	0444,
+};
+
+enum {
+	Logcache=	(1<<0),
+	Logmcast=	(1<<1),
+};
+
+// types of interfaces
+enum
+{
+	Tether,
+	Ttun,
+};
+
+static Logflag logflags[] =
+{
+	{ "cache",	Logcache, },
+	{ "multicast",	Logmcast, },
+	{ nil,		0, },
+};
+
+static Dirtab	*dirtab[MaxQ];
+
+#define TYPE(x) 	(((uint32_t)(x).path) & 0xff)
+#define PORT(x) 	((((uint32_t)(x).path) >> 8)&(Maxport-1))
+#define QID(x, y) 	(((x)<<8) | (y))
+
+struct Centry
+{
+	uint8_t	d[Eaddrlen];
+	int	port;
+	long	expire;		// entry expires this many seconds after bootime
+	long	src;
+	long	dst;
+};
+
+struct Bridge
+{
+	QLock QLock;
+	int	nport;
+	Port	*port[Maxport];
+	Centry	cache[CacheSize];
+	unsigned long	hit;
+	unsigned long	miss;
+	unsigned long	copy;
+	long	delay0;		// constant microsecond delay per packet
+	long	delayn;		// microsecond delay per byte
+	int	tcpmss;		// modify tcpmss value
+
+	Log Log;
+};
+
+struct Port
+{
+	Ref Ref;
+	int	id;
+	Bridge	*bridge;
+	int	closed;
+
+	Chan	*data[2];	// channel to data
+
+	Proc	*readp;		// read proc
+	
+	// the following uniquely identifies the port
+	int	type;
+	char	name[KNAMELEN];
+	
+	// owner hash - avoids bind/unbind races
+	unsigned long	ownhash;
+
+	// various stats
+	int	in;		// number of packets read
+	int	inmulti;	// multicast or broadcast
+	int	inunknown;	// unknown address
+	int	out;		// number of packets read
+	int	outmulti;	// multicast or broadcast
+	int	outunknown;	// unknown address
+	int	outfrag;	// fragmented the packet
+	int	nentry;		// number of cache entries for this port
+};
+
+enum {
+	IP_TCPPROTO	= 6,
+	EOLOPT		= 0,
+	NOOPOPT		= 1,
+	MSSOPT		= 2,
+	MSS_LENGTH	= 4,		/* Mean segment size */
+	SYN		= 0x02,		/* Pkt. is synchronise */
+	IPHDR		= 20,		/* sizeof(Iphdr) */
+};
+
+struct Iphdr
+{
+	uint8_t	vihl;		/* Version and header length */
+	uint8_t	tos;		/* Type of service */
+	uint8_t	length[2];	/* packet length */
+	uint8_t	id[2];		/* ip->identification */
+	uint8_t	frag[2];	/* Fragment information */
+	uint8_t	ttl;		/* Time to live */
+	uint8_t	proto;		/* Protocol */
+	uint8_t	cksum[2];	/* Header checksum */
+	uint8_t	src[4];		/* IP source */
+	uint8_t	dst[4];		/* IP destination */
+};
+
+struct Tcphdr
+{
+	uint8_t	sport[2];
+	uint8_t	dport[2];
+	uint8_t	seq[4];
+	uint8_t	ack[4];
+	uint8_t	flag[2];
+	uint8_t	win[2];
+	uint8_t	cksum[2];
+	uint8_t	urg[2];
+};
+
+static Bridge bridgetab[Maxbridge];
+
+static int m2p[] = {
+	[OREAD] =	4,
+	[OWRITE] =	2,
+	[ORDWR] =	6
+};
+
+static int	bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
+static void	portbind(Bridge *b, int argc, char *argv[]);
+static void	portunbind(Bridge *b, int argc, char *argv[]);
+static void	etherread(void *a);
+static char	*cachedump(Bridge *b);
+static void	portfree(Port *port);
+static void	cacheflushport(Bridge *b, int port);
+static void	etherwrite(Port *port, Block *bp);
+
+static void
+bridgeinit(void)
+{
+	int i;
+	Dirtab *dt;
+
+	// setup dirtab with non directory entries
+	for(i=0; i<nelem(bridgedirtab); i++) {
+		dt = bridgedirtab + i;
+		dirtab[TYPE(dt->qid)] = dt;
+	}
+	for(i=0; i<nelem(portdirtab); i++) {
+		dt = portdirtab + i;
+		dirtab[TYPE(dt->qid)] = dt;
+	}
+}
+
+static Chan*
+bridgeattach(char* spec)
+{
+	Chan *c;
+	int dev;
+
+	dev = atoi(spec);
+print("at %s %d\n", spec, dev);
+	if(dev<0 || dev >= Maxbridge)
+		error("bad specification");
+
+	c = devattach('B', spec);
+	mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
+	c->devno = dev;
+	return c;
+}
+
+static Walkqid*
+bridgewalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
+}
+
+static int
+bridgestat(Chan* c, uint8_t* db, int n)
+{
+	return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
+}
+
+static Chan*
+bridgeopen(Chan* c, int omode)
+{
+	int perm;
+	Bridge *b;
+
+	omode &= 3;
+	perm = m2p[omode];
+	USED(perm);
+
+	b = bridgetab + c->devno;
+	USED(b);
+
+	switch(TYPE(c->qid)) {
+	default:
+		break;
+	case Qlog:
+		logopen(&b->Log);
+		break;
+	case Qcache:
+		c->aux = cachedump(b);
+		break;
+	}
+	c->mode = openmode(omode);
+	c->flag |= COPEN;
+	c->offset = 0;
+	return c;
+}
+
+static void
+bridgeclose(Chan* c)
+{
+	Bridge *b  = bridgetab + c->devno;
+
+	switch(TYPE(c->qid)) {
+	case Qcache:
+		if(c->flag & COPEN)
+			free(c->aux);
+		break;
+	case Qlog:
+		if(c->flag & COPEN)
+			logclose(&b->Log);
+		break;
+	}
+}
+
+static int32_t
+bridgeread(Chan *c, void *a, int32_t n, int64_t off)
+{
+	Proc *up = externup();
+	char buf[256];
+	Bridge *b = bridgetab + c->devno;
+	Port *port;
+	int i, ingood, outgood;
+
+	USED(off);
+	switch(TYPE(c->qid)) {
+	default:
+		error(Egreg);
+	case Qtopdir:
+	case Qbridgedir:
+	case Qportdir:
+		return devdirread(c, a, n, 0, 0, bridgegen);
+	case Qlog:
+		return logread(&b->Log, a, off, n);
+	case Qlocal:
+		return 0;	/* TO DO */
+	case Qstatus:
+		qlock(&b->QLock);
+		if(waserror()){
+			qunlock(&b->QLock);
+			nexterror();
+		}
+		port = b->port[PORT(c->qid)];
+		if(port == 0)
+			strcpy(buf, "unbound\n");
+		else {
+			i = 0;
+			switch(port->type) {
+			default:
+				panic("bridgeread: unknown port type: %d",
+					port->type);
+			case Tether:
+				i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
+				break;
+			case Ttun:
+				i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
+				break;
+			}
+			ingood = port->in - port->inmulti - port->inunknown;
+			outgood = port->out - port->outmulti - port->outunknown;
+			snprint(buf+i, sizeof(buf)-i,
+				"in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
+				port->in, ingood, port->inmulti, port->inunknown,
+				port->out, outgood, port->outmulti,
+				port->outunknown, port->outfrag);
+		}
+		poperror();
+		qunlock(&b->QLock);
+		return readstr(off, a, n, buf);
+	case Qbctl:
+		snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
+			b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
+		n = readstr(off, a, n, buf);
+		return n;
+	case Qcache:
+		n = readstr(off, a, n, c->aux);
+		return n;
+	case Qstats:
+		snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
+			b->hit, b->miss, b->copy);
+		n = readstr(off, a, n, buf);
+		return n;
+	}
+}
+
+static void
+bridgeoption(Bridge *b, char *option, int value)
+{
+	if(strcmp(option, "tcpmss") == 0)
+		b->tcpmss = value;
+	else
+		error("unknown bridge option");
+}
+
+
+static int32_t
+bridgewrite(Chan *c, void *a, int32_t n, int64_t off)
+{
+	Proc *up = externup();
+	Bridge *b = bridgetab + c->devno;
+	Cmdbuf *cb;
+	char *arg0, *p;
+	
+	USED(off);
+	switch(TYPE(c->qid)) {
+	default:
+		error(Eperm);
+	case Qbctl:
+		cb = parsecmd(a, n);
+		qlock(&b->QLock);
+		if(waserror()) {
+			qunlock(&b->QLock);
+			free(cb);
+			nexterror();
+		}
+		if(cb->nf == 0)
+			error("short write");
+		arg0 = cb->f[0];
+		if(strcmp(arg0, "bind") == 0) {
+			portbind(b, cb->nf-1, cb->f+1);
+		} else if(strcmp(arg0, "unbind") == 0) {
+			portunbind(b, cb->nf-1, cb->f+1);
+		} else if(strcmp(arg0, "cacheflush") == 0) {
+			log(&b->Log, Logcache, "cache flush\n");
+			memset(b->cache, 0, CacheSize*sizeof(Centry));
+		} else if(strcmp(arg0, "set") == 0) {
+			if(cb->nf != 2)
+				error("usage: set option");
+			bridgeoption(b, cb->f[1], 1);
+		} else if(strcmp(arg0, "clear") == 0) {
+			if(cb->nf != 2)
+				error("usage: clear option");
+			bridgeoption(b, cb->f[1], 0);
+		} else if(strcmp(arg0, "delay") == 0) {
+			if(cb->nf != 3)
+				error("usage: delay delay0 delayn");
+			b->delay0 = strtol(cb->f[1], nil, 10);
+			b->delayn = strtol(cb->f[2], nil, 10);
+		} else
+			error("unknown control request");
+		poperror();
+		qunlock(&b->QLock);
+		free(cb);
+		return n;
+	case Qlog:
+		cb = parsecmd(a, n);
+		p = logctl(&b->Log, cb->nf, cb->f, logflags);
+		free(cb);
+		if(p != nil)
+			error(p);
+		return n;
+	}
+}
+
+static int
+bridgegen(Chan *c, char *_, Dirtab*__, int ___, int s, Dir *dp)
+{
+	Proc *up = externup();
+	Bridge *b = bridgetab + c->devno;
+	int type = TYPE(c->qid);
+	Dirtab *dt;
+	Qid qid;
+
+	if(s  == DEVDOTDOT){
+		switch(TYPE(c->qid)){
+		case Qtopdir:
+		case Qbridgedir:
+			snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->devno);
+			mkqid(&qid, Qtopdir, 0, QTDIR);
+			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+			break;
+		case Qportdir:
+			snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->devno);
+			mkqid(&qid, Qbridgedir, 0, QTDIR);
+			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+			break;
+		default:
+			panic("bridgewalk %llux", c->qid.path);
+		}
+		return 1;
+	}
+
+	switch(type) {
+	default:
+		/* non-directory entries end up here */
+		if(c->qid.type & QTDIR)
+			panic("bridgegen: unexpected directory");	
+		if(s != 0)
+			return -1;
+		dt = dirtab[TYPE(c->qid)];
+		if(dt == nil)
+			panic("bridgegen: unknown type: %lud", TYPE(c->qid));
+		devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
+		return 1;
+	case Qtopdir:
+		if(s != 0)
+			return -1;
+		snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->devno);
+		mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
+		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+		return 1;
+	case Qbridgedir:
+		if(s<nelem(bridgedirtab)) {
+			dt = bridgedirtab+s;
+			devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
+			return 1;
+		}
+		s -= nelem(bridgedirtab);
+		if(s >= b->nport)
+			return -1;
+		mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
+		snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
+		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+		return 1;
+	case Qportdir:
+		if(s>=nelem(portdirtab))
+			return -1;
+		dt = portdirtab+s;
+		mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
+		devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
+		return 1;
+	}
+}
+
+// parse mac address; also in netif.c
+int
+parseaddr(uint8_t *to, char *from, int alen)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	for(i = 0; i < alen; i++){
+		if(*p == 0)
+			return -1;
+		nip[0] = *p++;
+		if(*p == 0)
+			return -1;
+		nip[1] = *p++;
+		nip[2] = 0;
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return 0;
+}
+
+// assumes b is locked
+static void
+portbind(Bridge *b, int argc, char *argv[])
+{
+	Proc *up = externup();
+	Port *port;
+	Chan *ctl;
+	int type = 0, i, n;
+	unsigned long ownhash;
+	char *dev, *dev2 = nil;
+	char buf[100], name[KNAMELEN], path[8*KNAMELEN];
+	static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
+	extern Dev *devtab[];
+
+	memset(name, 0, KNAMELEN);
+	if(argc < 4)
+		error(usage);
+	if(strcmp(argv[0], "ether") == 0) {
+		if(argc != 4)
+			error(usage);
+		type = Tether;
+		strncpy(name, argv[1], KNAMELEN);
+		name[KNAMELEN-1] = 0;
+//		parseaddr(addr, argv[1], Eaddrlen);
+	} else if(strcmp(argv[0], "tunnel") == 0) {
+		if(argc != 5)
+			error(usage);
+		type = Ttun;
+		strncpy(name, argv[1], KNAMELEN);
+		name[KNAMELEN-1] = 0;
+//		parseip(addr, argv[1]);
+		dev2 = argv[4];
+	} else
+		error(usage);
+	ownhash = atoi(argv[2]);
+	dev = argv[3];
+	for(i=0; i<b->nport; i++) {
+		port = b->port[i];
+		if(port != nil && port->type == type &&
+		    memcmp(port->name, name, KNAMELEN) == 0)
+			error("port in use");
+	}
+	for(i=0; i<Maxport; i++)
+		if(b->port[i] == nil)
+			break;
+	if(i == Maxport)
+		error("no more ports");
+	port = smalloc(sizeof(Port));
+	port->Ref.ref = 1;
+	port->id = i;
+	port->ownhash = ownhash;
+
+	if(waserror()) {
+		portfree(port);
+		nexterror();
+	}
+	port->type = type;
+	memmove(port->name, name, KNAMELEN);
+	switch(port->type) {
+	default:
+		panic("portbind: unknown port type: %d", type);
+	case Tether:
+		snprint(path, sizeof(path), "%s/clone", dev);
+		ctl = namec(path, Aopen, ORDWR, 0);
+		if(waserror()) {
+			cclose(ctl);
+			nexterror();
+		}
+		// check addr?
+
+		// get directory name
+		n = devtab[ctl->qid.type]->read(ctl, buf, sizeof(buf)-1, 0);
+		buf[n] = 0;
+		snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
+
+		// setup connection to be promiscuous
+		snprint(buf, sizeof(buf), "connect -1");
+		devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
+		snprint(buf, sizeof(buf), "promiscuous");
+		devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
+		snprint(buf, sizeof(buf), "bridge");
+		devtab[ctl->qid.type]->write(ctl, buf, strlen(buf), 0);
+
+		// open data port
+		port->data[0] = namec(path, Aopen, ORDWR, 0);
+		// dup it
+		incref(&port->data[0]->r);
+		port->data[1] = port->data[0];
+
+		poperror();
+		cclose(ctl);		
+
+		break;
+	case Ttun:
+		port->data[0] = namec(dev, Aopen, OREAD, 0);
+		port->data[1] = namec(dev2, Aopen, OWRITE, 0);
+		break;
+	}
+
+	poperror();
+
+	/* committed to binding port */
+	b->port[port->id] = port;
+	port->bridge = b;
+	if(b->nport <= port->id)
+		b->nport = port->id+1;
+
+	// assumes kproc always succeeds
+	incref(&port->Ref);
+	snprint(buf, sizeof(buf), "bridge:%s", dev);
+	kproc(buf, etherread, port);
+}
+
+// assumes b is locked
+static void
+portunbind(Bridge *b, int argc, char *argv[])
+{
+	int type = 0, i;
+	char name[KNAMELEN];
+	unsigned long ownhash;
+	Port *port = nil;
+	static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
+
+	memset(name, 0, KNAMELEN);
+	if(argc < 2 || argc > 3)
+		error(usage);
+	if(strcmp(argv[0], "ether") == 0) {
+		type = Tether;
+		strncpy(name, argv[1], KNAMELEN);
+		name[KNAMELEN-1] = 0;
+//		parseaddr(addr, argv[1], Eaddrlen);
+	} else if(strcmp(argv[0], "tunnel") == 0) {
+		type = Ttun;
+		strncpy(name, argv[1], KNAMELEN);
+		name[KNAMELEN-1] = 0;
+//		parseip(addr, argv[1]);
+	} else
+		error(usage);
+	if(argc == 3)
+		ownhash = atoi(argv[2]);
+	else
+		ownhash = 0;
+	for(i=0; i<b->nport; i++) {
+		port = b->port[i];
+		if(port != nil && port->type == type &&
+		    memcmp(port->name, name, KNAMELEN) == 0)
+			break;
+	}
+	if(i == b->nport)
+		error("port not found");
+	if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
+		error("bad owner hash");
+
+	port->closed = 1;
+	b->port[i] = nil;	// port is now unbound
+	cacheflushport(b, i);
+
+	// try and stop reader
+	if(port->readp)
+		postnote(port->readp, 1, "unbind", 0);
+	portfree(port);
+}
+
+// assumes b is locked
+static Centry *
+cachelookup(Bridge *b, uint8_t d[Eaddrlen])
+{
+	int i;
+	uint h;
+	Centry *p;
+	long sec;
+
+	// dont cache multicast or broadcast
+	if(d[0] & 1)
+		return 0;
+
+	h = 0;
+	for(i=0; i<Eaddrlen; i++) {
+		h *= 7;
+		h += d[i];
+	}
+	h %= CacheHash;
+	p = b->cache + h;
+	sec = TK2SEC(machp()->ticks);
+	for(i=0; i<CacheLook; i++,p++) {
+		if(memcmp(d, p->d, Eaddrlen) == 0) {
+			p->dst++;
+			if(sec >= p->expire) {
+				log(&b->Log, Logcache, "expired cache entry: %E %d\n",
+					d, p->port);
+				return nil;
+			}
+			p->expire = sec + CacheTimeout;
+			return p;
+		}
+	}
+	log(&b->Log, Logcache, "cache miss: %E\n", d);
+	return nil;
+}
+
+// assumes b is locked
+static void
+cacheupdate(Bridge *b, uint8_t d[Eaddrlen], int port)
+{
+	int i;
+	uint h;
+	Centry *p, *pp;
+	long sec;
+
+	// dont cache multicast or broadcast
+	if(d[0] & 1) {
+		log(&b->Log, Logcache, "bad source address: %E\n", d);
+		return;
+	}
+	
+	h = 0;
+	for(i=0; i<Eaddrlen; i++) {
+		h *= 7;
+		h += d[i];
+	}
+	h %= CacheHash;
+	p = b->cache + h;
+	pp = p;
+	sec = p->expire;
+
+	// look for oldest entry
+	for(i=0; i<CacheLook; i++,p++) {
+		if(memcmp(p->d, d, Eaddrlen) == 0) {
+			p->expire = TK2SEC(machp()->ticks) + CacheTimeout;
+			if(p->port != port) {
+				log(&b->Log, Logcache, "NIC changed port %d->%d: %E\n",
+					p->port, port, d);
+				p->port = port;
+			}
+			p->src++;
+			return;
+		}
+		if(p->expire < sec) {
+			sec = p->expire;
+			pp = p;
+		}
+	}
+	if(pp->expire != 0)
+		log(&b->Log, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
+	pp->expire = TK2SEC(machp()->ticks) + CacheTimeout;
+	memmove(pp->d, d, Eaddrlen);
+	pp->port = port;
+	pp->src = 1;
+	pp->dst = 0;
+	log(&b->Log, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
+}
+
+// assumes b is locked
+static void
+cacheflushport(Bridge *b, int port)
+{
+	Centry *ce;
+	int i;
+
+	ce = b->cache;
+	for(i=0; i<CacheSize; i++,ce++) {
+		if(ce->port != port)
+			continue;
+		memset(ce, 0, sizeof(Centry));
+	}
+}
+
+static char *
+cachedump(Bridge *b)
+{
+	Proc *up = externup();
+	int i, n;
+	long sec, off;
+	char *buf, *p, *ep;
+	Centry *ce;
+	char c;
+
+	qlock(&b->QLock);
+	if(waserror()) {
+		qunlock(&b->QLock);
+		nexterror();
+	}
+	sec = TK2SEC(machp()->ticks);
+	n = 0;
+	for(i=0; i<CacheSize; i++)
+		if(b->cache[i].expire != 0)
+			n++;
+	
+	n *= 51;	// change if print format is changed
+	n += 10;	// some slop at the end
+	buf = malloc(n);
+	if(buf == nil)
+		error(Enomem);
+	p = buf;
+	ep = buf + n;
+	ce = b->cache;
+	off = seconds() - sec;
+	for(i=0; i<CacheSize; i++,ce++) {
+		if(ce->expire == 0)
+			continue;	
+		c = (sec < ce->expire)?'v':'e';
+		p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
+			ce->port, ce->src, ce->dst, ce->expire+off, c);
+	}
+	*p = 0;
+	poperror();
+	qunlock(&b->QLock);
+
+	return buf;
+}
+
+
+
+// assumes b is locked, no error return
+static void
+ethermultiwrite(Bridge *b, Block *bp, Port *port)
+{
+	Port *oport;
+	Etherpkt *ep;
+	int i, mcast;
+
+	ep = (Etherpkt*)bp->rp;
+	mcast = ep->d[0] & 1;		/* multicast bit of ethernet address */
+
+	oport = nil;
+	for(i=0; i<b->nport; i++) {
+		if(i == port->id || b->port[i] == nil)
+			continue;
+		/*
+		 * we need to forward multicast packets for ipv6,
+		 * so always do it.
+		 */
+		if(mcast)
+			b->port[i]->outmulti++;
+		else
+			b->port[i]->outunknown++;
+
+		// delay one so that the last write does not copy
+		if(oport != nil) {
+			b->copy++;
+			etherwrite(oport, copyblock(bp, blocklen(bp)));
+		}
+		oport = b->port[i];
+	}
+
+	// last write free block
+	if(oport)
+		etherwrite(oport, bp);
+	else
+		freeb(bp);
+}
+
+static void
+tcpmsshack(Etherpkt *epkt, int n)
+{
+	int hl, optlen;
+	Iphdr *iphdr;
+	Tcphdr *tcphdr;
+	unsigned long mss, cksum;
+	uint8_t *optr;
+
+	/* ignore non-ipv4 packets */
+	if(nhgets(epkt->type) != ETIP4)
+		return;
+	iphdr = (Iphdr*)(epkt->data);
+	n -= ETHERHDRSIZE;
+	if(n < IPHDR)
+		return;
+
+	/* ignore bad packets */
+	if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
+		hl = (iphdr->vihl&0xF)<<2;
+		if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
+			return;
+	} else
+		hl = IP_HLEN4<<2;
+
+	/* ignore non-tcp packets */
+	if(iphdr->proto != IP_TCPPROTO)
+		return;
+	n -= hl;
+	if(n < sizeof(Tcphdr))
+		return;
+	tcphdr = (Tcphdr*)((uint8_t*)(iphdr) + hl);
+	// MSS can only appear in SYN packet
+	if(!(tcphdr->flag[1] & SYN))
+		return;
+	hl = (tcphdr->flag[0] & 0xf0)>>2;
+	if(n < hl)
+		return;
+
+	// check for MSS option
+	optr = (uint8_t*)tcphdr + sizeof(Tcphdr);
+	n = hl - sizeof(Tcphdr);
+	for(;;) {
+		if(n <= 0 || *optr == EOLOPT)
+			return;
+		if(*optr == NOOPOPT) {
+			n--;
+			optr++;
+			continue;
+		}
+		optlen = optr[1];
+		if(optlen < 2 || optlen > n)
+			return;
+		if(*optr == MSSOPT && optlen == MSS_LENGTH)
+			break;
+		n -= optlen;
+		optr += optlen;
+	}
+
+	mss = nhgets(optr+2);
+	if(mss <= TcpMssMax)
+		return;
+	// fit checksum
+	cksum = nhgets(tcphdr->cksum);
+	if(optr-(uint8_t*)tcphdr & 1) {
+print("tcpmsshack: odd alignment!\n");
+		// odd alignments are a pain
+		cksum += nhgets(optr+1);
+		cksum -= (optr[1]<<8)|(TcpMssMax>>8);
+		cksum += (cksum>>16);
+		cksum &= 0xffff;
+		cksum += nhgets(optr+3);
+		cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
+		cksum += (cksum>>16);
+	} else {
+		cksum += mss;
+		cksum -= TcpMssMax;
+		cksum += (cksum>>16);
+	}
+	hnputs(tcphdr->cksum, cksum);
+	hnputs(optr+2, TcpMssMax);
+}
+
+/*
+ *  process to read from the ethernet
+ */
+static void
+etherread(void *a)
+{
+	Proc *up = externup();
+	Port *port = a;
+	Bridge *b = port->bridge;
+	Block *bp;
+	Etherpkt *ep;
+	Centry *ce;
+	long md, n;
+	extern Dev *devtab[];
+	
+	qlock(&b->QLock);
+	port->readp = up;	/* hide identity under a rock for unbind */
+
+	while(!port->closed){
+		// release lock to read - error means it is time to quit
+		qunlock(&b->QLock);
+		if(waserror()) {
+			print("etherread read error: %s\n", up->errstr);
+			qlock(&b->QLock);
+			break;
+		}
+		bp = devtab[port->data[0]->qid.type]->bread(port->data[0], MaxMTU, 0);
+		poperror();
+		qlock(&b->QLock);
+		if(bp == nil)
+			break;
+		n = blocklen(bp);
+		if(port->closed || n < ETHERMINTU){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()) {
+//			print("etherread bridge error\n");
+			freeb(bp);
+			continue;
+		}
+		port->in++;
+
+		ep = (Etherpkt*)bp->rp;
+		cacheupdate(b, ep->s, port->id);
+		if(b->tcpmss)
+			tcpmsshack(ep, n);
+
+		/*
+		 * delay packets to simulate a slow link
+		 */
+		if(b->delay0 != 0 || b->delayn != 0){
+			md = b->delay0 + b->delayn * n;
+			if(md > 0)
+				microdelay(md);
+		}
+
+		poperror();	/* must now dispose of bp */
+
+		if(ep->d[0] & 1) {
+			log(&b->Log, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
+				port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
+			port->inmulti++;
+			ethermultiwrite(b, bp, port);
+		} else {
+			ce = cachelookup(b, ep->d);
+			if(ce == nil) {
+				b->miss++;
+				port->inunknown++;
+				ethermultiwrite(b, bp, port);
+			}else if(ce->port != port->id){
+				b->hit++;
+				etherwrite(b->port[ce->port], bp);
+			}else
+				freeb(bp);
+		}
+	}
+//	print("etherread: trying to exit\n");
+	port->readp = nil;
+	portfree(port);
+	qunlock(&b->QLock);
+	pexit("hangup", 1);
+}
+
+static int
+fragment(Etherpkt *epkt, int n)
+{
+	Iphdr *iphdr;
+
+	if(n <= TunnelMtu)
+		return 0;
+
+	/* ignore non-ipv4 packets */
+	if(nhgets(epkt->type) != ETIP4)
+		return 0;
+	iphdr = (Iphdr*)(epkt->data);
+	n -= ETHERHDRSIZE;
+	/*
+	 * ignore: IP runt packets, bad packets (I don't handle IP
+	 * options for the moment), packets with don't-fragment set,
+	 * and short blocks.
+	 */
+	if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
+	    iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
+		return 0;
+
+	return 1;
+}
+
+static void
+etherwrite(Port *port, Block *bp)
+{
+	Proc *up = externup();
+	Iphdr *eh, *feh;
+	Etherpkt *epkt;
+	int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
+	Block *xp, *nb;
+	uint16_t fragoff, frag;
+	extern Dev *devtab[];
+
+	port->out++;
+	epkt = (Etherpkt*)bp->rp;
+	n = blocklen(bp);
+	if(port->type != Ttun || !fragment(epkt, n)) {
+		if(!waserror()){
+			devtab[port->data[1]->qid.type]->bwrite(port->data[1], bp, 0);
+			poperror();
+		}
+		return;
+	}
+	port->outfrag++;
+	if(waserror()){
+		freeblist(bp);	
+		return;
+	}
+
+	seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
+	eh = (Iphdr*)(epkt->data);
+	len = nhgets(eh->length);
+	frag = nhgets(eh->frag);
+	mf = frag & IP_MF;
+	frag <<= 3;
+	dlen = len - IPHDR;
+	xp = bp;
+	lid = nhgets(eh->id);
+	offset = ETHERHDRSIZE+IPHDR;
+	while(xp != nil && offset && offset >= BLEN(xp)) {
+		offset -= BLEN(xp);
+		xp = xp->next;
+	}
+	xp->rp += offset;
+	
+	if(0)
+		print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
+			seglen, dlen, mf, frag);
+	for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
+		nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
+		
+		feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
+
+		memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
+		nb->wp += ETHERHDRSIZE+IPHDR;
+
+		if((fragoff + seglen) >= dlen) {
+			seglen = dlen - fragoff;
+			hnputs(feh->frag, (frag+fragoff)>>3 | mf);
+		}
+		else	
+			hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
+
+		hnputs(feh->length, seglen + IPHDR);
+		hnputs(feh->id, lid);
+
+		/* Copy up the data area */
+		chunk = seglen;
+		while(chunk) {
+			blklen = chunk;
+			if(BLEN(xp) < chunk)
+				blklen = BLEN(xp);
+			memmove(nb->wp, xp->rp, blklen);
+			nb->wp += blklen;
+			xp->rp += blklen;
+			chunk -= blklen;
+			if(xp->rp == xp->wp)
+				xp = xp->next;
+		} 
+
+		feh->cksum[0] = 0;
+		feh->cksum[1] = 0;
+		hnputs(feh->cksum, ipcsum(&feh->vihl));
+	
+		/* don't generate small packets */
+		if(BLEN(nb) < ETHERMINTU)
+			nb->wp = nb->rp + ETHERMINTU;
+		devtab[port->data[1]->qid.type]->bwrite(port->data[1], nb, 0);
+	}
+	poperror();
+	freeblist(bp);	
+}
+
+// hold b lock
+static void
+portfree(Port *port)
+{
+	if(decref(&port->Ref) != 0)
+		return;
+
+	if(port->data[0])
+		cclose(port->data[0]);
+	if(port->data[1])
+		cclose(port->data[1]);
+	memset(port, 0, sizeof(Port));
+	free(port);
+}
+
+Dev bridgedevtab = {
+	'B',
+	"bridge",
+
+	devreset,
+	bridgeinit,
+	devshutdown,
+	bridgeattach,
+	bridgewalk,
+	bridgestat,
+	bridgeopen,
+	devcreate,
+	bridgeclose,
+	bridgeread,
+	devbread,
+	bridgewrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};

+ 188 - 0
sys/src/9/port/log.c

@@ -0,0 +1,188 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+static char Ebadlogctl[] = "unknown log ctl message";
+
+void
+logopen(Log *alog)
+{
+	Proc *up = externup();
+	lock(&alog->l);
+	if(waserror()){
+		unlock(&alog->l);
+		nexterror();
+	}
+	if(alog->opens == 0){
+		if(alog->nlog == 0)
+			alog->nlog = 4*1024;
+		if(alog->minread == 0)
+			alog->minread = 1;
+		if(alog->buf == nil && (alog->buf = malloc(alog->nlog)) == nil)
+			error(Enomem);
+		alog->rptr = alog->buf;
+		alog->end = alog->buf + alog->nlog;
+		alog->len = 0;
+	}
+	alog->opens++;
+	unlock(&alog->l);
+	poperror();
+}
+
+void
+logclose(Log *alog)
+{
+	lock(&alog->l);
+	alog->opens--;
+	if(alog->opens == 0){
+		free(alog->buf);
+		alog->buf = nil;
+	}
+	unlock(&alog->l);
+}
+
+int
+logready(void *a)
+{
+	Log *alog = a;
+
+	return alog->len >= alog->minread;
+}
+
+int32_t
+logread(Log *alog, void *a, int32_t n, int64_t _)
+{
+	Proc *up = externup();
+	int i, d;
+	char *p, *rptr;
+
+	qlock(&alog->readq);
+	if(waserror()){
+		qunlock(&alog->readq);
+		nexterror();
+	}
+
+	for(;;){
+		lock(&alog->l);
+		if(alog->len >= alog->minread || alog->len >= n){
+			if(n > alog->len)
+				n = alog->len;
+			d = 0;
+			rptr = alog->rptr;
+			alog->rptr += n;
+			if(alog->rptr >= alog->end){
+				d = alog->rptr - alog->end;
+				alog->rptr = alog->buf + d;
+			}
+			alog->len -= n;
+			unlock(&alog->l);
+
+			i = n-d;
+			p = a;
+			memmove(p, rptr, i);
+			memmove(p+i, alog->buf, d);
+			break;
+		}
+		else
+			unlock(&alog->l);
+
+		sleep(&alog->readr, logready, alog);
+	}
+
+	qunlock(&alog->readq);
+	poperror();
+
+	return n;
+}
+
+char*
+logctl(Log *alog, int argc, char *argv[], Logflag *flags)
+{
+	int i, set;
+	Logflag *fp;
+
+	if(argc < 2)
+		return Ebadlogctl;
+
+	if(strcmp("set", argv[0]) == 0)
+		set = 1;
+	else if(strcmp("clear", argv[0]) == 0)
+		set = 0;
+	else
+		return Ebadlogctl;
+
+	for(i = 1; i < argc; i++){
+		for(fp = flags; fp->name; fp++)
+			if(strcmp(fp->name, argv[i]) == 0)
+				break;
+		if(fp->name == nil)
+			continue;
+		if(set)
+			alog->logmask |= fp->mask;
+		else
+			alog->logmask &= ~fp->mask;
+	}
+
+	return nil;
+}
+
+void
+logn(Log *alog, int mask, void *buf, int n)
+{
+	char *fp, *t;
+	int dowake, i;
+
+	if(!(alog->logmask & mask))
+		return;
+
+	if(alog->opens == 0)
+		return;
+
+	if(n > alog->nlog)
+		return;
+
+	lock(&alog->l);
+	i = alog->len + n - alog->nlog;
+	if(i > 0){
+		alog->len -= i;
+		alog->rptr += i;
+		if(alog->rptr >= alog->end)
+			alog->rptr = alog->buf + (alog->rptr - alog->end);
+	}
+	t = alog->rptr + alog->len;
+	fp = buf;
+	alog->len += n;
+	while(n-- > 0){
+		if(t >= alog->end)
+			t = alog->buf + (t - alog->end);
+		*t++ = *fp++;
+	}
+	dowake = alog->len >= alog->minread;
+	unlock(&alog->l);
+
+	if(dowake)
+		wakeup(&alog->readr);
+}
+
+void
+log(Log *alog, int mask, char *fmt, ...)
+{
+	int n;
+	va_list arg;
+	char buf[128];
+
+	if(!(alog->logmask & mask))
+		return;
+
+	if(alog->opens == 0)
+		return;
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	logn(alog, mask, buf, n);
+}

+ 1 - 1
sys/src/9/port/portfns.h

@@ -181,7 +181,7 @@ void		logclose(Log*);
 char*		logctl(Log*, int, char**, Logflag*);
 void		logn(Log*, int, void*, int);
 void		logopen(Log*);
-int32_t		logread(Log*, void*, uint32_t, int32_t);
+int32_t		logread(Log*, void*, int32_t, int64_t);
 Page*		lookpage(Image*, uint32_t);
 Cmdtab*		lookupcmd(Cmdbuf*, Cmdtab*, int);
 void		mallocinit(void);