Browse Source

Added back USB support, but I'm sure drivers won't work. ACHTUNG! xalloc.c

Elbing Miss 9 years ago
parent
commit
ec95c1f09c

+ 5 - 3
sys/src/9/k10/archk10.c

@@ -12,8 +12,10 @@
 #include "mem.h"
 #include "dat.h"
 #include "fns.h"
+
 #undef DBG
 #define DBG iprint
+
 static int
 cpuidinit(void)
 {
@@ -132,9 +134,9 @@ cpuidhz(uint32_t *info0, uint32_t *info1)
 				msr = 0;
 				r = rdmsr(0x2a) & 0x1f;
 			}
-iprint("r %d\n", r);
+//iprint("r %d\n", r);
 			f = rdmsr(0xcd) & 0x07;
-iprint("f %d\n", f);
+//iprint("f %d\n", f);
 			switch(f){
 			default:
 				return 0;
@@ -160,7 +162,7 @@ iprint("f %d\n", f);
 				hz = 400000000000ll;
 				break;
 			}
-iprint("hz %d r %d\n", hz, r);
+//iprint("hz %d r %d\n", hz, r);
 			/*
 			 * Hz is *1000 at this point.
 			 * Do the scaling then round it.

+ 1502 - 0
sys/src/9/k10/devusb.c

@@ -0,0 +1,1502 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * USB device driver framework.
+ *
+ * This is in charge of providing access to actual HCIs
+ * and providing I/O to the various endpoints of devices.
+ * A separate user program (usbd) is in charge of
+ * enumerating the bus, setting up endpoints and
+ * starting devices (also user programs).
+ *
+ * The interface provided is a violation of the standard:
+ * you're welcome.
+ *
+ * The interface consists of a root directory with several files
+ * plus a directory (epN.M) with two files per endpoint.
+ * A device is represented by its first endpoint, which
+ * is a control endpoint automatically allocated for each device.
+ * Device control endpoints may be used to create new endpoints.
+ * Devices corresponding to hubs may also allocate new devices,
+ * perhaps also hubs. Initially, a hub device is allocated for
+ * each controller present, to represent its root hub. Those can
+ * never be removed.
+ *
+ * All endpoints refer to the first endpoint (epN.0) of the device,
+ * which keeps per-device information, and also to the HCI used
+ * to reach them. Although all endpoints cache that information.
+ *
+ * epN.M/data files permit I/O and are considered DMEXCL.
+ * epN.M/ctl files provide status info and accept control requests.
+ *
+ * Endpoints may be given file names to be listed also at #u,
+ * for those drivers that have nothing to do after configuring the
+ * device and its endpoints.
+ *
+ * Drivers for different controllers are kept at usb[oue]hci.c
+ * It's likely we could factor out much from controllers into
+ * a generic controller driver, the problem is that details
+ * regarding how to handle toggles, tokens, Tds, etc. will
+ * get in the way. Thus, code is probably easier the way it is.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"../port/usb.h"
+
+typedef struct Hcitype Hcitype;
+
+enum
+{
+	/* Qid numbers */
+	Qdir = 0,		/* #u */
+	Qusbdir,			/* #u/usb */
+	Qctl,			/* #u/usb/ctl - control requests */
+
+	Qep0dir,			/* #u/usb/ep0.0 - endpoint 0 dir */
+	Qep0io,			/* #u/usb/ep0.0/data - endpoint 0 I/O */
+	Qep0ctl,		/* #u/usb/ep0.0/ctl - endpoint 0 ctl. */
+	Qep0dummy,		/* give 4 qids to each endpoint */
+
+	Qepdir = 0,		/* (qid-qep0dir)&3 is one of these */
+	Qepio,			/* to identify which file for the endpoint */
+	Qepctl,
+
+	/* ... */
+
+	/* Usb ctls. */
+	CMdebug = 0,		/* debug on|off */
+	CMdump,			/* dump (data structures for debug) */
+
+	/* Ep. ctls */
+	CMnew = 0,		/* new nb ctl|bulk|intr|iso r|w|rw (endpoint) */
+	CMnewdev,		/* newdev full|low|high portnb (allocate new devices) */
+	CMhub,			/* hub (set the device as a hub) */
+	CMspeed,		/* speed full|low|high|no */
+	CMmaxpkt,		/* maxpkt size */
+	CMntds,			/* ntds nb (max nb. of tds per µframe) */
+	CMclrhalt,		/* clrhalt (halt was cleared on endpoint) */
+	CMpollival,		/* pollival interval (interrupt/iso) */
+	CMhz,			/* hz n (samples/sec; iso) */
+	CMsamplesz,		/* samplesz n (sample size; iso) */
+	CMinfo,			/* info infostr (ke.ep info for humans) */
+	CMdetach,		/* detach (abort I/O forever on this ep). */
+	CMaddress,		/* address (address is assigned) */
+	CMdebugep,		/* debug n (set/clear debug for this ep) */
+	CMname,			/* name str (show up as #u/name as well) */
+	CMtmout,		/* timeout n (activate timeouts for ep) */
+	CMpreset,		/* reset the port */
+
+	/* Hub feature selectors */
+	Rportenable	= 1,
+	Rportreset	= 4,
+
+};
+
+struct Hcitype
+{
+	char*	type;
+	int	(*reset)(Hci*);
+};
+
+#define QID(q)	((int)(q).path)
+
+static char Edetach[] = "device is detached";
+static char Enotconf[] = "endpoint not configured";
+char Estalled[] = "endpoint stalled";
+
+static Cmdtab usbctls[] =
+{
+	{CMdebug,	"debug",	2},
+	{CMdump,	"dump",		1},
+};
+
+static Cmdtab epctls[] =
+{
+	{CMnew,		"new",		4},
+	{CMnewdev,	"newdev",	3},
+	{CMhub,		"hub",		1},
+	{CMspeed,	"speed",	2},
+	{CMmaxpkt,	"maxpkt",	2},
+	{CMntds,	"ntds",		2},
+	{CMpollival,	"pollival",	2},
+	{CMsamplesz,	"samplesz",	2},
+	{CMhz,		"hz",		2},
+	{CMinfo,	"info",		0},
+	{CMdetach,	"detach",	1},
+	{CMaddress,	"address",	1},
+	{CMdebugep,	"debug",	2},
+	{CMclrhalt,	"clrhalt",	1},
+	{CMname,	"name",		2},
+	{CMtmout,	"timeout",	2},
+	{CMpreset,	"reset",	1},
+};
+
+static Dirtab usbdir[] =
+{
+	"ctl",		{Qctl},		0,	0666,
+};
+
+char *usbmodename[] =
+{
+	[OREAD]	"r",
+	[OWRITE]	"w",
+	[ORDWR]	"rw",
+};
+
+static char *ttname[] =
+{
+	[Tnone]	"none",
+	[Tctl]	"control",
+	[Tiso]	"iso",
+	[Tintr]	"interrupt",
+	[Tbulk]	"bulk",
+};
+
+static char *spname[] =
+{
+	[Fullspeed]	"full",
+	[Lowspeed]	"low",
+	[Highspeed]	"high",
+	[Nospeed]	"no",
+};
+
+static int	debug;
+static Hcitype	hcitypes[Nhcis];
+static Hci*	hcis[Nhcis];
+static QLock	epslck;		/* add, del, lookup endpoints */
+static Ep*	eps[Neps];	/* all endpoints known */
+static int	epmax;		/* 1 + last endpoint index used  */
+static int	usbidgen;	/* device address generator */
+
+/*
+ * Is there something like this in a library? should it be?
+ */
+char*
+seprintdata(char *s, char *se, unsigned char *d, int n)
+{
+	int i, l;
+
+	s = seprint(s, se, " %#p[%d]: ", d, n);
+	l = n;
+	if(l > 10)
+		l = 10;
+	for(i=0; i<l; i++)
+		s = seprint(s, se, " %2.2ux", d[i]);
+	if(l < n)
+		s = seprint(s, se, "...");
+	return s;
+}
+
+static int
+name2speed(char *name)
+{
+	int i;
+
+	for(i = 0; i < nelem(spname); i++)
+		if(strcmp(name, spname[i]) == 0)
+			return i;
+	return Nospeed;
+}
+
+static int
+name2ttype(char *name)
+{
+	int i;
+
+	for(i = 0; i < nelem(ttname); i++)
+		if(strcmp(name, ttname[i]) == 0)
+			return i;
+	/* may be a std. USB ep. type */
+	i = strtol(name, nil, 0);
+	switch(i+1){
+	case Tctl:
+	case Tiso:
+	case Tbulk:
+	case Tintr:
+		return i+1;
+	default:
+		return Tnone;
+	}
+}
+
+static int
+name2mode(char *mode)
+{
+	int i;
+
+	for(i = 0; i < nelem(usbmodename); i++)
+		if(strcmp(mode, usbmodename[i]) == 0)
+			return i;
+	return -1;
+}
+
+static int
+qid2epidx(int q)
+{
+	q = (q-Qep0dir)/4;
+	if(q < 0 || q >= epmax || eps[q] == nil)
+		return -1;
+	return q;
+}
+
+static int
+isqtype(int q, int type)
+{
+	if(q < Qep0dir)
+		return 0;
+	q -= Qep0dir;
+	return (q & 3) == type;
+}
+
+void
+addhcitype(char* t, int (*r)(Hci*))
+{
+	static int ntype;
+
+	if(ntype == Nhcis)
+		panic("too many USB host interface types");
+	hcitypes[ntype].type = t;
+	hcitypes[ntype].reset = r;
+	ntype++;
+}
+
+static char*
+seprintep(char *s, char *se, Ep *ep, int all)
+{
+	Mach *m = machp();
+	static char* dsnames[] = { "config", "enabled", "detached", "reset" };
+	Udev *d;
+	int i;
+	int di;
+
+	d = ep->dev;
+
+	qlock(ep);
+	if(waserror()){
+		qunlock(ep);
+		nexterror();
+	}
+	di = ep->dev->nb;
+	if(all)
+		s = seprint(s, se, "dev %d ep %d ", di, ep->nb);
+	s = seprint(s, se, "%s", dsnames[ep->dev->state]);
+	s = seprint(s, se, " %s", ttname[ep->ttype]);
+	assert(ep->mode == OREAD || ep->mode == OWRITE || ep->mode == ORDWR);
+	s = seprint(s, se, " %s", usbmodename[ep->mode]);
+	s = seprint(s, se, " speed %s", spname[d->speed]);
+	s = seprint(s, se, " maxpkt %ld", ep->maxpkt);
+	s = seprint(s, se, " pollival %ld", ep->pollival);
+	s = seprint(s, se, " samplesz %ld", ep->samplesz);
+	s = seprint(s, se, " hz %ld", ep->hz);
+	s = seprint(s, se, " hub %d", ep->dev->hub);
+	s = seprint(s, se, " port %d", ep->dev->port);
+	if(ep->inuse)
+		s = seprint(s, se, " busy");
+	else
+		s = seprint(s, se, " idle");
+	if(all){
+		s = seprint(s, se, " load %uld", ep->load);
+		s = seprint(s, se, " ref %ld addr %#p", ep->ref, ep);
+		s = seprint(s, se, " idx %d", ep->idx);
+		if(ep->name != nil)
+			s = seprint(s, se, " name '%s'", ep->name);
+		if(ep->tmout != 0)
+			s = seprint(s, se, " tmout");
+		if(ep == ep->ep0){
+			s = seprint(s, se, " ctlrno %#x", ep->hp->ctlrno);
+			s = seprint(s, se, " eps:");
+			for(i = 0; i < nelem(d->eps); i++)
+				if(d->eps[i] != nil)
+					s = seprint(s, se, " ep%d.%d", di, i);
+		}
+	}
+	if(ep->info != nil)
+		s = seprint(s, se, "\n%s %s\n", ep->info, ep->hp->type);
+	else
+		s = seprint(s, se, "\n");
+	qunlock(ep);
+	poperror();
+	return s;
+}
+
+static Ep*
+epalloc(Hci *hp)
+{
+	Ep *ep;
+	int i;
+
+	ep = smalloc(sizeof(Ep));
+	ep->ref = 1;
+	qlock(&epslck);
+	for(i = 0; i < Neps; i++)
+		if(eps[i] == nil)
+			break;
+	if(i == Neps){
+		qunlock(&epslck);
+		free(ep);
+		print("usb: bug: too few endpoints.\n");
+		return nil;
+	}
+	ep->idx = i;
+	if(epmax <= i)
+		epmax = i+1;
+	eps[i] = ep;
+	ep->hp = hp;
+	ep->maxpkt = 8;
+	ep->ntds = 1;
+	ep->samplesz = ep->pollival = ep->hz = 0; /* make them void */
+	qunlock(&epslck);
+	return ep;
+}
+
+static Ep*
+getep(int i)
+{
+	Ep *ep;
+
+	if(i < 0 || i >= epmax || eps[i] == nil)
+		return nil;
+	qlock(&epslck);
+	ep = eps[i];
+	if(ep != nil)
+		incref(ep);
+	qunlock(&epslck);
+	return ep;
+}
+
+static void
+putep(Ep *ep)
+{
+	Udev *d;
+
+	if(ep != nil && decref(ep) == 0){
+		d = ep->dev;
+		deprint("usb: ep%d.%d %#p released\n", d->nb, ep->nb, ep);
+		qlock(&epslck);
+		eps[ep->idx] = nil;
+		if(ep->idx == epmax-1)
+			epmax--;
+		if(ep == ep->ep0 && ep->dev != nil && ep->dev->nb == usbidgen)
+			usbidgen--;
+		qunlock(&epslck);
+		if(d != nil){
+			qlock(ep->ep0);
+			d->eps[ep->nb] = nil;
+			qunlock(ep->ep0);
+		}
+		if(ep->ep0 != ep){
+			putep(ep->ep0);
+			ep->ep0 = nil;
+		}
+		free(ep->info);
+		free(ep->name);
+		free(ep);
+	}
+}
+
+static void
+dumpeps(void)
+{
+	Mach *m = machp();
+	int i;
+	static char buf[512];
+	char *s;
+	char *e;
+	Ep *ep;
+
+	print("usb dump eps: epmax %d Neps %d (ref=1+ for dump):\n", epmax, Neps);
+	for(i = 0; i < epmax; i++){
+		s = buf;
+		e = buf+sizeof(buf);
+		ep = getep(i);
+		if(ep != nil){
+			if(waserror()){
+				putep(ep);
+				nexterror();
+			}
+			s = seprint(s, e, "ep%d.%d ", ep->dev->nb, ep->nb);
+			seprintep(s, e, ep, 1);
+			print("%s", buf);
+			ep->hp->seprintep(buf, e, ep);
+			print("%s", buf);
+			poperror();
+			putep(ep);
+		}
+	}
+	print("usb dump hcis:\n");
+	for(i = 0; i < Nhcis; i++)
+		if(hcis[i] != nil)
+			hcis[i]->dump(hcis[i]);
+}
+
+static int
+newusbid(Hci *hci)
+{
+	int id;
+
+	qlock(&epslck);
+	id = ++usbidgen;
+	if(id >= 0x7F)
+		print("#u: too many device addresses; reuse them more\n");
+	qunlock(&epslck);
+	return id;
+}
+
+/*
+ * Create endpoint 0 for a new device
+ */
+static Ep*
+newdev(Hci *hp, int ishub, int isroot)
+{
+	Ep *ep;
+	Udev *d;
+
+	ep = epalloc(hp);
+	d = ep->dev = smalloc(sizeof(Udev));
+	d->nb = newusbid(hp);
+	d->eps[0] = ep;
+	ep->nb = 0;
+	ep->toggle[0] = ep->toggle[1] = 0;
+	d->ishub = ishub;
+	d->isroot = isroot;
+	if(hp->highspeed != 0)
+		d->speed = Highspeed;
+	else
+		d->speed = Fullspeed;
+	d->state = Dconfig;		/* address not yet set */
+	ep->dev = d;
+	ep->ep0 = ep;			/* no ref counted here */
+	ep->ttype = Tctl;
+	ep->tmout = Xfertmout;
+	ep->mode = ORDWR;
+	dprint("newdev %#p ep%d.%d %#p\n", d, d->nb, ep->nb, ep);
+	return ep;
+}
+
+/*
+ * Create a new endpoint for the device
+ * accessed via the given endpoint 0.
+ */
+static Ep*
+newdevep(Ep *ep, int i, int tt, int mode)
+{
+	Ep *nep;
+	Udev *d;
+
+	d = ep->dev;
+	if(d->eps[i] != nil)
+		error("endpoint already in use");
+	nep = epalloc(ep->hp);
+	incref(ep);
+	d->eps[i] = nep;
+	nep->nb = i;
+	nep->toggle[0] = nep->toggle[1] = 0;
+	nep->ep0 = ep;
+	nep->dev = ep->dev;
+	nep->mode = mode;
+	nep->ttype = tt;
+	nep->debug = ep->debug;
+	/* set defaults */
+	switch(tt){
+	case Tctl:
+		nep->tmout = Xfertmout;
+		break;
+	case Tintr:
+		nep->pollival = 10;
+		break;
+	case Tiso:
+		nep->tmout = Xfertmout;
+		nep->pollival = 10;
+		nep->samplesz = 4;
+		nep->hz = 44100;
+		break;
+	}
+	deprint("newdevep ep%d.%d %#p\n", d->nb, nep->nb, nep);
+	return ep;
+}
+
+static int
+epdataperm(int mode)
+{
+
+	switch(mode){
+	case OREAD:
+		return 0440|DMEXCL;
+		break;
+	case OWRITE:
+		return 0220|DMEXCL;
+		break;
+	default:
+		return 0660|DMEXCL;
+	}
+}
+
+static int
+usbgen(Chan *c, char *l, Dirtab *d, int n, int s, Dir *dp)
+{
+	Mach *m = machp();
+	Qid q;
+	Dirtab *dir;
+	int perm;
+	char *se;
+	Ep *ep;
+	int nb;
+	int mode;
+
+	if(0)ddprint("usbgen q %#x s %d...", QID(c->qid), s);
+	if(s == DEVDOTDOT){
+		if(QID(c->qid) <= Qusbdir){
+			mkqid(&q, Qdir, 0, QTDIR);
+			devdir(c, q, "#u", 0, eve, 0555, dp);
+		}else{
+			mkqid(&q, Qusbdir, 0, QTDIR);
+			devdir(c, q, "usb", 0, eve, 0555, dp);
+		}
+		if(0)ddprint("ok\n");
+		return 1;
+	}
+
+	switch(QID(c->qid)){
+	case Qdir:				/* list #u */
+		if(s == 0){
+			mkqid(&q, Qusbdir, 0, QTDIR);
+			devdir(c, q, "usb", 0, eve, 0555, dp);
+			if(0)ddprint("ok\n");
+			return 1;
+		}
+		s--;
+		if(s < 0 || s >= epmax)
+			goto Fail;
+		ep = getep(s);
+		if(ep == nil || ep->name == nil){
+			if(ep != nil)
+				putep(ep);
+			if(0)ddprint("skip\n");
+			return 0;
+		}
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		mkqid(&q, Qep0io+s*4, 0, QTFILE);
+		devdir(c, q, ep->name, 0, eve, epdataperm(ep->mode), dp);
+		putep(ep);
+		poperror();
+		if(0)ddprint("ok\n");
+		return 1;
+
+	case Qusbdir:				/* list #u/usb */
+	Usbdir:
+		if(s < nelem(usbdir)){
+			dir = &usbdir[s];
+			mkqid(&q, dir->qid.path, 0, QTFILE);
+			devdir(c, q, dir->name, dir->length, eve, dir->perm, dp);
+			if(0)ddprint("ok\n");
+			return 1;
+		}
+		s -= nelem(usbdir);
+		if(s < 0 || s >= epmax)
+			goto Fail;
+		ep = getep(s);
+		if(ep == nil){
+			if(0)ddprint("skip\n");
+			return 0;
+		}
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		se = m->externup->genbuf+sizeof(m->externup->genbuf);
+		seprint(m->externup->genbuf, se, "ep%d.%d", ep->dev->nb, ep->nb);
+		mkqid(&q, Qep0dir+4*s, 0, QTDIR);
+		putep(ep);
+		poperror();
+		devdir(c, q, m->externup->genbuf, 0, eve, 0755, dp);
+		if(0)ddprint("ok\n");
+		return 1;
+
+	case Qctl:
+		s = 0;
+		goto Usbdir;
+
+	default:				/* list #u/usb/epN.M */
+		nb = qid2epidx(QID(c->qid));
+		ep = getep(nb);
+		if(ep == nil)
+			goto Fail;
+		mode = ep->mode;
+		putep(ep);
+		if(isqtype(QID(c->qid), Qepdir)){
+		Epdir:
+			switch(s){
+			case 0:
+				mkqid(&q, Qep0io+nb*4, 0, QTFILE);
+				perm = epdataperm(mode);
+				devdir(c, q, "data", 0, eve, perm, dp);
+				break;
+			case 1:
+				mkqid(&q, Qep0ctl+nb*4, 0, QTFILE);
+				devdir(c, q, "ctl", 0, eve, 0664, dp);
+				break;
+			default:
+				goto Fail;
+			}
+		}else if(isqtype(QID(c->qid), Qepctl)){
+			s = 1;
+			goto Epdir;
+		}else{
+			s = 0;
+			goto Epdir;
+		}
+		if(0)ddprint("ok\n");
+		return 1;
+	}
+Fail:
+	if(0)ddprint("fail\n");
+	return -1;
+}
+
+static Hci*
+hciprobe(int cardno, int ctlrno)
+{
+	Hci *hp;
+	char *type;
+	char name[64];
+	static int epnb = 1;	/* guess the endpoint nb. for the controller */
+
+	ddprint("hciprobe %d %d\n", cardno, ctlrno);
+	hp = smalloc(sizeof(Hci));
+	hp->ctlrno = ctlrno;
+	hp->tbdf = BUSUNKNOWN;
+
+	if(cardno < 0){
+		//if(isaconfig("usb", ctlrno, hp) == 0){
+		//	free(hp);
+		//	return nil;
+		//}
+		for(cardno = 0; cardno < Nhcis; cardno++){
+			if(hcitypes[cardno].type == nil)
+				break;
+			type = hp->type;
+			if(type==nil || *type==0)
+				type = "uhci";
+			if(cistrcmp(hcitypes[cardno].type, type) == 0)
+				break;
+		}
+	}
+
+	if(cardno >= Nhcis || hcitypes[cardno].type == nil){
+		free(hp);
+		return nil;
+	}
+	dprint("%s...", hcitypes[cardno].type);
+	if(hcitypes[cardno].reset(hp) < 0){
+		free(hp);
+		return nil;
+	}
+
+	/*
+	 * IRQ2 doesn't really exist, it's used to gang the interrupt
+	 * controllers together. A device set to IRQ2 will appear on
+	 * the second interrupt controller as IRQ9.
+	 */
+	if(hp->irq == 2)
+		hp->irq = 9;
+	snprint(name, sizeof(name), "usb%s", hcitypes[cardno].type);
+	intrenable(hp->irq, hp->interrupt, hp, hp->tbdf, name);
+
+	/*
+	 * modern machines have too many usb controllers to list on
+	 * the console.
+	 */
+	dprint("#u/usb/ep%d.0: %s: port 0x%luX irq %d\n",
+		epnb, hcitypes[cardno].type, hp->port, hp->irq);
+	epnb++;
+	return hp;
+}
+
+static void
+usbreset(void)
+{
+	int cardno, ctlrno;
+	Hci *hp;
+
+	/*
+	if(getconf("*nousbprobe"))
+		return;
+	*/
+	dprint("usbreset\n");
+
+	for(ctlrno = 0; ctlrno < Nhcis; ctlrno++)
+		if((hp = hciprobe(-1, ctlrno)) != nil)
+			hcis[ctlrno] = hp;
+	cardno = ctlrno = 0;
+	while(cardno < Nhcis && ctlrno < Nhcis && hcitypes[cardno].type != nil)
+		if(hcis[ctlrno] != nil)
+			ctlrno++;
+		else{
+			hp = hciprobe(cardno, ctlrno);
+			if(hp == nil)
+				cardno++;
+			hcis[ctlrno++] = hp;
+		}
+	if(hcis[Nhcis-1] != nil)
+		print("usbreset: bug: Nhcis (%d) too small\n", Nhcis);
+}
+
+static void
+usbinit(void)
+{
+	Hci *hp;
+	int ctlrno;
+	Ep *d;
+	char info[40];
+
+	dprint("usbinit\n");
+	for(ctlrno = 0; ctlrno < Nhcis; ctlrno++){
+		hp = hcis[ctlrno];
+		if(hp != nil){
+			if(hp->init != nil)
+				hp->init(hp);
+			d = newdev(hp, 1, 1);		/* new root hub */
+			d->dev->state = Denabled;	/* although addr == 0 */
+			d->maxpkt = 64;
+			snprint(info, sizeof(info), "ports %d", hp->nports);
+			kstrdup(&d->info, info);
+		}
+	}
+}
+
+static Chan*
+usbattach(char *spec)
+{
+	return devattach(L'u', spec);
+}
+
+static Walkqid*
+usbwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, nil, 0, usbgen);
+}
+
+static int
+usbstat(Chan *c, unsigned char *db, int n)
+{
+	return devstat(c, db, n, nil, 0, usbgen);
+}
+
+/*
+ * µs for the given transfer, for bandwidth allocation.
+ * This is a very rough worst case for what 5.11.3
+ * of the usb 2.0 spec says.
+ * Also, we are using maxpkt and not actual transfer sizes.
+ * Only when we are sure we
+ * are not exceeding b/w might we consider adjusting it.
+ */
+static uint32_t
+usbload(int speed, int maxpkt)
+{
+	enum{ Hostns = 1000, Hubns = 333 };
+	uint32_t l;
+	uint32_t bs;
+
+	l = 0;
+	bs = 10UL * maxpkt;
+	switch(speed){
+	case Highspeed:
+		l = 55*8*2 + 2 * (3 + bs) + Hostns;
+		break;
+	case Fullspeed:
+		l = 9107 + 84 * (4 + bs) + Hostns;
+		break;
+	case Lowspeed:
+		l = 64107 + 2 * Hubns + 667 * (3 + bs) + Hostns;
+		break;
+	default:
+		print("usbload: bad speed %d\n", speed);
+		/* let it run */
+	}
+	return l / 1000UL;	/* in µs */
+}
+
+static Chan*
+usbopen(Chan *c, int omode)
+{
+	Mach *m = machp();
+	int q;
+	Ep *ep;
+	int mode;
+
+	mode = openmode(omode);
+	q = QID(c->qid);
+
+	if(q >= Qep0dir && qid2epidx(q) < 0)
+		error(Eio);
+	if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir))
+		return devopen(c, omode, nil, 0, usbgen);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	deprint("usbopen q %#x fid %d omode %d\n", q, c->fid, mode);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	qlock(ep);
+	if(ep->inuse){
+		qunlock(ep);
+		error(Einuse);
+	}
+	ep->inuse = 1;
+	qunlock(ep);
+	if(waserror()){
+		ep->inuse = 0;
+		nexterror();
+	}
+	if(mode != OREAD && ep->mode == OREAD)
+		error(Eperm);
+	if(mode != OWRITE && ep->mode == OWRITE)
+		error(Eperm);
+	if(ep->ttype == Tnone)
+		error(Enotconf);
+	ep->clrhalt = 0;
+	ep->rhrepl = -1;
+	if(ep->load == 0)
+		ep->load = usbload(ep->dev->speed, ep->maxpkt);
+	ep->hp->epopen(ep);
+
+	poperror();	/* ep->inuse */
+	poperror();	/* don't putep(): ref kept for fid using the ep. */
+
+	c->mode = mode;
+	c->flag |= COPEN;
+	c->offset = 0;
+	c->aux = nil;	/* paranoia */
+	return c;
+}
+
+static void
+epclose(Ep *ep)
+{
+	Mach *m = machp();
+	qlock(ep);
+	if(waserror()){
+		qunlock(ep);
+		nexterror();
+	}
+	if(ep->inuse){
+		ep->hp->epclose(ep);
+		ep->inuse = 0;
+	}
+	qunlock(ep);
+	poperror();
+}
+
+static void
+usbclose(Chan *c)
+{
+	Mach *m = machp();
+	int q;
+	Ep *ep;
+
+	q = QID(c->qid);
+	if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir))
+		return;
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		return;
+	deprint("usbclose q %#x fid %d ref %ld\n", q, c->fid, ep->ref);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(c->flag & COPEN){
+		free(c->aux);
+		c->aux = nil;
+		epclose(ep);
+		putep(ep);	/* release ref kept since usbopen */
+		c->flag &= ~COPEN;
+	}
+	poperror();
+	putep(ep);
+}
+
+static int32_t
+ctlread(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	Mach *m = machp();
+	int q;
+	char *s;
+	char *us;
+	char *se;
+	Ep *ep;
+	int i;
+
+	q = QID(c->qid);
+	us = s = smalloc(READSTR);
+	se = s + READSTR;
+	if(waserror()){
+		free(us);
+		nexterror();
+	}
+	if(q == Qctl)
+		for(i = 0; i < epmax; i++){
+			ep = getep(i);
+			if(ep != nil){
+				if(waserror()){
+					putep(ep);
+					nexterror();
+				}
+				s = seprint(s, se, "ep%d.%d ", ep->dev->nb, ep->nb);
+				s = seprintep(s, se, ep, 0);
+				poperror();
+			}
+			putep(ep);
+		}
+	else{
+		ep = getep(qid2epidx(q));
+		if(ep == nil)
+			error(Eio);
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		if(c->aux != nil){
+			/* After a new endpoint request we read
+			 * the new endpoint name back.
+			 */
+			strecpy(s, se, c->aux);
+			free(c->aux);
+			c->aux = nil;
+		}else
+			seprintep(s, se, ep, 0);
+		poperror();
+		putep(ep);
+	}
+	n = readstr(offset, a, n, us);
+	poperror();
+	free(us);
+	return n;
+}
+
+/*
+ * Fake root hub emulation.
+ */
+static int32_t
+rhubread(Ep *ep, void *a, int32_t n)
+{
+	char *b;
+
+	if(ep->dev->isroot == 0 || ep->nb != 0 || n < 2)
+		return -1;
+	if(ep->rhrepl < 0)
+		return -1;
+
+	b = a;
+	memset(b, 0, n);
+	PUT2(b, ep->rhrepl);
+	ep->rhrepl = -1;
+	return n;
+}
+
+static int32_t
+rhubwrite(Ep *ep, void *a, int32_t n)
+{
+	unsigned char *s;
+	int cmd;
+	int feature;
+	int port;
+	Hci *hp;
+
+	if(ep->dev == nil || ep->dev->isroot == 0 || ep->nb != 0)
+		return -1;
+	if(n != Rsetuplen)
+		error("root hub is a toy hub");
+	ep->rhrepl = -1;
+	s = a;
+	if(s[Rtype] != (Rh2d|Rclass|Rother) && s[Rtype] != (Rd2h|Rclass|Rother))
+		error("root hub is a toy hub");
+	hp = ep->hp;
+	cmd = s[Rreq];
+	feature = GET2(s+Rvalue);
+	port = GET2(s+Rindex);
+	if(port < 1 || port > hp->nports)
+		error("bad hub port number");
+	switch(feature){
+	case Rportenable:
+		ep->rhrepl = hp->portenable(hp, port, cmd == Rsetfeature);
+		break;
+	case Rportreset:
+		ep->rhrepl = hp->portreset(hp, port, cmd == Rsetfeature);
+		break;
+	case Rgetstatus:
+		ep->rhrepl = hp->portstatus(hp, port);
+		break;
+	default:
+		ep->rhrepl = 0;
+	}
+	return n;
+}
+
+static int32_t
+usbread(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	Mach *m = machp();
+	int q;
+	Ep *ep;
+	int nr;
+
+	q = QID(c->qid);
+
+	if(c->qid.type == QTDIR)
+		return devdirread(c, a, n, nil, 0, usbgen);
+
+	if(q == Qctl || isqtype(q, Qepctl))
+		return ctlread(c, a, n, offset);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(ep->mode == OWRITE || ep->inuse == 0)
+		error(Ebadusefd);
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tctl:
+		nr = rhubread(ep, a, n);
+		if(nr >= 0){
+			n = nr;
+			break;
+		}
+		/* else fall */
+	default:
+		ddeprint("\nusbread q %#x fid %d cnt %ld off %lld\n",q,c->fid,n,offset);
+		n = ep->hp->epread(ep, a, n);
+		break;
+	}
+	poperror();
+	putep(ep);
+	return n;
+}
+
+static int32_t
+pow2(int n)
+{
+	return 1 << n;
+}
+
+static void
+setmaxpkt(Ep *ep, char* s)
+{
+	int32_t spp;	/* samples per packet */
+
+	if(ep->dev->speed == Highspeed)
+		spp = (ep->hz * ep->pollival * ep->ntds + 7999) / 8000;
+	else
+		spp = (ep->hz * ep->pollival + 999) / 1000;
+	ep->maxpkt = spp * ep->samplesz;
+	deprint("usb: %s: setmaxpkt: hz %ld poll %ld"
+		" ntds %d %s speed -> spp %ld maxpkt %ld\n", s,
+		ep->hz, ep->pollival, ep->ntds, spname[ep->dev->speed],
+		spp, ep->maxpkt);
+	if(ep->maxpkt > 1024){
+		print("usb: %s: maxpkt %ld > 1024. truncating\n", s, ep->maxpkt);
+		ep->maxpkt = 1024;
+	}
+}
+
+/*
+ * Many endpoint ctls. simply update the portable representation
+ * of the endpoint. The actual controller driver will look
+ * at them to setup the endpoints as dictated.
+ */
+static int32_t
+epctl(Ep *ep, Chan *c, void *a, int32_t n)
+{
+	Mach *m = machp();
+	int i, l, mode, nb, tt;
+	char *b, *s;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+	Ep *nep;
+	Udev *d;
+	static char *Info = "info ";
+
+	d = ep->dev;
+
+	cb = parsecmd(a, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, epctls, nelem(epctls));
+	if(ct == nil)
+		error(Ebadctl);
+	i = ct->index;
+	if(i == CMnew || i == CMspeed || i == CMhub || i == CMpreset)
+		if(ep != ep->ep0)
+			error("allowed only on a setup endpoint");
+	if(i != CMclrhalt && i != CMdetach && i != CMdebugep && i != CMname)
+		if(ep != ep->ep0 && ep->inuse != 0)
+			error("must configure before using");
+	switch(i){
+	case CMnew:
+		deprint("usb epctl %s\n", cb->f[0]);
+		nb = strtol(cb->f[1], nil, 0);
+		if(nb < 0 || nb >= Ndeveps)
+			error("bad endpoint number");
+		tt = name2ttype(cb->f[2]);
+		if(tt == Tnone)
+			error("unknown endpoint type");
+		mode = name2mode(cb->f[3]);
+		if(mode < 0)
+			error("unknown i/o mode");
+		newdevep(ep, nb, tt, mode);
+		break;
+	case CMnewdev:
+		deprint("usb epctl %s\n", cb->f[0]);
+		if(ep != ep->ep0 || d->ishub == 0)
+			error("not a hub setup endpoint");
+		l = name2speed(cb->f[1]);
+		if(l == Nospeed)
+			error("speed must be full|low|high");
+		nep = newdev(ep->hp, 0, 0);
+		nep->dev->speed = l;
+		if(nep->dev->speed  != Lowspeed)
+			nep->maxpkt = 64;	/* assume full speed */
+		nep->dev->hub = d->nb;
+		nep->dev->port = atoi(cb->f[2]);
+		/* next read request will read
+		 * the name for the new endpoint
+		 */
+		l = sizeof(m->externup->genbuf);
+		snprint(m->externup->genbuf, l, "ep%d.%d", nep->dev->nb, nep->nb);
+		kstrdup((char**)&c->aux, m->externup->genbuf);
+		break;
+	case CMhub:
+		deprint("usb epctl %s\n", cb->f[0]);
+		d->ishub = 1;
+		break;
+	case CMspeed:
+		l = name2speed(cb->f[1]);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l == Nospeed)
+			error("speed must be full|low|high");
+		qlock(ep->ep0);
+		d->speed = l;
+		qunlock(ep->ep0);
+		break;
+	case CMmaxpkt:
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l < 1 || l > 1024)
+			error("maxpkt not in [1:1024]");
+		qlock(ep);
+		ep->maxpkt = l;
+		qunlock(ep);
+		break;
+	case CMntds:
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l < 1 || l > 3)
+			error("ntds not in [1:3]");
+		qlock(ep);
+		ep->ntds = l;
+		qunlock(ep);
+		break;
+	case CMpollival:
+		if(ep->ttype != Tintr && ep->ttype != Tiso)
+			error("not an intr or iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(ep->ttype == Tiso ||
+		   (ep->ttype == Tintr && ep->dev->speed == Highspeed)){
+			if(l < 1 || l > 16)
+				error("pollival power not in [1:16]");
+			l = pow2(l-1);
+		}else
+			if(l < 1 || l > 255)
+				error("pollival not in [1:255]");
+		qlock(ep);
+		ep->pollival = l;
+		if(ep->ttype == Tiso)
+			setmaxpkt(ep, "pollival");
+		qunlock(ep);
+		break;
+	case CMsamplesz:
+		if(ep->ttype != Tiso)
+			error("not an iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l <= 0 || l > 8)
+			error("samplesz not in [1:8]");
+		qlock(ep);
+		ep->samplesz = l;
+		setmaxpkt(ep, "samplesz");
+		qunlock(ep);
+		break;
+	case CMhz:
+		if(ep->ttype != Tiso)
+			error("not an iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l <= 0 || l > 100000)
+			error("hz not in [1:100000]");
+		qlock(ep);
+		ep->hz = l;
+		setmaxpkt(ep, "hz");
+		qunlock(ep);
+		break;
+	case CMclrhalt:
+		qlock(ep);
+		deprint("usb epctl %s\n", cb->f[0]);
+		ep->clrhalt = 1;
+		qunlock(ep);
+		break;
+	case CMinfo:
+		deprint("usb epctl %s\n", cb->f[0]);
+		l = strlen(Info);
+		s = a;
+		if(n < l+2 || strncmp(Info, s, l) != 0)
+			error(Ebadctl);
+		if(n > 1024)
+			n = 1024;
+		b = smalloc(n);
+		memmove(b, s+l, n-l);
+		b[n-l] = 0;
+		if(b[n-l-1] == '\n')
+			b[n-l-1] = 0;
+		qlock(ep);
+		free(ep->info);
+		ep->info = b;
+		qunlock(ep);
+		break;
+	case CMaddress:
+		deprint("usb epctl %s\n", cb->f[0]);
+		ep->dev->state = Denabled;
+		break;
+	case CMdetach:
+		if(ep->dev->isroot != 0)
+			error("can't detach a root hub");
+		deprint("usb epctl %s ep%d.%d\n",
+			cb->f[0], ep->dev->nb, ep->nb);
+		ep->dev->state = Ddetach;
+		/* Release file system ref. for its endpoints */
+		for(i = 0; i < nelem(ep->dev->eps); i++)
+			putep(ep->dev->eps[i]);
+		break;
+	case CMdebugep:
+		if(strcmp(cb->f[1], "on") == 0)
+			ep->debug = 1;
+		else if(strcmp(cb->f[1], "off") == 0)
+			ep->debug = 0;
+		else
+			ep->debug = strtoul(cb->f[1], nil, 0);
+		print("usb: ep%d.%d debug %d\n",
+			ep->dev->nb, ep->nb, ep->debug);
+		break;
+	case CMname:
+		deprint("usb epctl %s %s\n", cb->f[0], cb->f[1]);
+		validname(cb->f[1], 0);
+		kstrdup(&ep->name, cb->f[1]);
+		break;
+	case CMtmout:
+		deprint("usb epctl %s\n", cb->f[0]);
+		if(ep->ttype == Tiso || ep->ttype == Tctl)
+			error("ctl ignored for this endpoint type");
+		ep->tmout = strtoul(cb->f[1], nil, 0);
+		if(ep->tmout != 0 && ep->tmout < Xfertmout)
+			ep->tmout = Xfertmout;
+		break;
+	case CMpreset:
+		deprint("usb epctl %s\n", cb->f[0]);
+		if(ep->ttype != Tctl)
+			error("not a control endpoint");
+		if(ep->dev->state != Denabled)
+			error("forbidden on devices not enabled");
+		ep->dev->state = Dreset;
+		break;
+	default:
+		panic("usb: unknown epctl %d", ct->index);
+	}
+	free(cb);
+	poperror();
+	return n;
+}
+
+static int32_t
+usbctl(void *a, int32_t n)
+{
+	Mach *m = machp();
+	Cmdtab *ct;
+	Cmdbuf *cb;
+	Ep *ep;
+	int i;
+
+	cb = parsecmd(a, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, usbctls, nelem(usbctls));
+	dprint("usb ctl %s\n", cb->f[0]);
+	switch(ct->index){
+	case CMdebug:
+		if(strcmp(cb->f[1], "on") == 0)
+			debug = 1;
+		else if(strcmp(cb->f[1], "off") == 0)
+			debug = 0;
+		else
+			debug = strtol(cb->f[1], nil, 0);
+		print("usb: debug %d\n", debug);
+		for(i = 0; i < epmax; i++)
+			if((ep = getep(i)) != nil){
+				ep->hp->debug(ep->hp, debug);
+				putep(ep);
+			}
+		break;
+	case CMdump:
+		dumpeps();
+		break;
+	}
+	free(cb);
+	poperror();
+	return n;
+}
+
+static int32_t
+ctlwrite(Chan *c, void *a, int32_t n)
+{
+	Mach *m = machp();
+	int q;
+	Ep *ep;
+
+	q = QID(c->qid);
+	if(q == Qctl)
+		return usbctl(a, n);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(isqtype(q, Qepctl) && c->aux != nil){
+		/* Be sure we don't keep a cloned ep name */
+		free(c->aux);
+		c->aux = nil;
+		error("read, not write, expected");
+	}
+	n = epctl(ep, c, a, n);
+	putep(ep);
+	poperror();
+	return n;
+}
+
+static int32_t
+usbwrite(Chan *c, void *a, int32_t n, int64_t off)
+{
+	Mach *m = machp();
+	int nr, q;
+	Ep *ep;
+
+	if(c->qid.type == QTDIR)
+		error(Eisdir);
+
+	q = QID(c->qid);
+
+	if(q == Qctl || isqtype(q, Qepctl))
+		return ctlwrite(c, a, n);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(ep->mode == OREAD || ep->inuse == 0)
+		error(Ebadusefd);
+
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tctl:
+		nr = rhubwrite(ep, a, n);
+		if(nr >= 0){
+			n = nr;
+			break;
+		}
+		/* else fall */
+	default:
+		ddeprint("\nusbwrite q %#x fid %d cnt %ld off %lld\n",q, c->fid, n, off);
+		ep->hp->epwrite(ep, a, n);
+	}
+	putep(ep);
+	poperror();
+	return n;
+}
+
+void
+usbshutdown(void)
+{
+	Hci *hp;
+	int i;
+
+	for(i = 0; i < Nhcis; i++){
+		hp = hcis[i];
+		if(hp == nil)
+			continue;
+		if(hp->shutdown == nil)
+			print("#u: no shutdown function for %s\n", hp->type);
+		else
+			hp->shutdown(hp);
+	}
+}
+
+Dev usbdevtab = {
+	L'u',
+	"usb",
+
+	usbreset,
+	usbinit,
+	usbshutdown,
+	usbattach,
+	usbwalk,
+	usbstat,
+	usbopen,
+	devcreate,
+	usbclose,
+	usbread,
+	devbread,
+	usbwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};

+ 49 - 0
sys/src/9/k10/io.h

@@ -156,6 +156,55 @@ enum {					/* type 0 and type 1 pre-defined header */
 	PciINTP		= 0x3D,		/* interrupt pin */
 };
 
+/* ccrb (base class code) values; controller types */
+enum {
+	Pcibcpci1	= 0,		/* pci 1.0; no class codes defined */
+	Pcibcstore	= 1,		/* mass storage */
+	Pcibcnet	= 2,		/* network */
+	Pcibcdisp	= 3,		/* display */
+	Pcibcmmedia	= 4,		/* multimedia */
+	Pcibcmem	= 5,		/* memory */
+	Pcibcbridge	= 6,		/* bridge */
+	Pcibccomm	= 7,		/* simple comms (e.g., serial) */
+	Pcibcbasesys	= 8,		/* base system */
+	Pcibcinput	= 9,		/* input */
+	Pcibcdock	= 0xa,		/* docking stations */
+	Pcibcproc	= 0xb,		/* processors */
+	Pcibcserial	= 0xc,		/* serial bus (e.g., USB) */
+	Pcibcwireless	= 0xd,		/* wireless */
+	Pcibcintell	= 0xe,		/* intelligent i/o */
+	Pcibcsatcom	= 0xf,		/* satellite comms */
+	Pcibccrypto	= 0x10,		/* encryption/decryption */
+	Pcibcdacq	= 0x11,		/* data acquisition & signal proc. */
+};
+
+/* ccru (sub-class code) values; common cases only */
+enum {
+	/* mass storage */
+	Pciscscsi	= 0,		/* SCSI */
+	Pciscide	= 1,		/* IDE (ATA) */
+	Pciscsata	= 6,		/* SATA */
+
+	/* network */
+	Pciscether	= 0,		/* Ethernet */
+
+	/* display */
+	Pciscvga	= 0,		/* VGA */
+	Pciscxga	= 1,		/* XGA */
+	Pcisc3d		= 2,		/* 3D */
+
+	/* bridges */
+	Pcischostpci	= 0,		/* host/pci */
+	Pciscpcicpci	= 1,		/* pci/pci */
+
+	/* simple comms */
+	Pciscserial	= 0,		/* 16450, etc. */
+	Pciscmultiser	= 1,		/* multiport serial */
+
+	/* serial bus */
+	Pciscusb	= 3,		/* USB */
+};
+
 enum {					/* type 0 pre-defined header */
 	PciCIS		= 0x28,		/* cardbus CIS pointer */
 	PciSVID		= 0x2C,		/* subsystem vendor ID */

+ 5 - 0
sys/src/9/k10/k8cpu

@@ -27,6 +27,7 @@ dev +dev
 	pci
 
 	uart
+	usb
 
 uart +dev
 	uarti8250
@@ -47,6 +48,9 @@ link +dev
 ##	etherbcm	pci ethermii
 #	ethermedium
 	loopbackmedium
+	usbuhci
+	usbohci
+	usbehci         usbehcipc
 	netdevmedium
 
 #	ht
@@ -78,6 +82,7 @@ rootdir
 #	/amd64/bin/auth/factotum factotum
 #	/amd64/bin/ip/ipconfig ipconfig
 	/amd64/bin/ipconfig
+#	/amd64/bin/usb/usbd
 	/amd64/bin/rc
 	/rc/lib/rcmain
 	/amd64/bin/bind

+ 13 - 0
sys/src/9/k10/uncached.h

@@ -0,0 +1,13 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * On the PC, processor accesses, memory caches and DMA are all
+ * coherent, so we don't need to use uncached memory.
+ */

+ 244 - 0
sys/src/9/k10/usbehci.h

@@ -0,0 +1,244 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/* override default macros from ../port/usb.h */
+#undef	dprint
+#undef	ddprint
+#undef	deprint
+#undef	ddeprint
+#define dprint		if(ehcidebug)print
+#define ddprint		if(ehcidebug>1)print
+#define deprint		if(ehcidebug || ep->debug)print
+#define ddeprint	if(ehcidebug>1 || ep->debug>1)print
+
+typedef struct Ctlr Ctlr;
+typedef struct Ecapio Ecapio;
+typedef struct Edbgio Edbgio;
+typedef struct Eopio Eopio;
+typedef struct Isoio Isoio;
+typedef struct Poll Poll;
+typedef struct Qh Qh;
+typedef struct Qtree Qtree;
+
+#pragma incomplete Ctlr;
+#pragma incomplete Ecapio;
+#pragma incomplete Edbgio;
+#pragma incomplete Eopio;
+#pragma incomplete Isoio;
+#pragma incomplete Poll;
+#pragma incomplete Qh;
+#pragma incomplete Qtree;
+
+/*
+ * EHCI interface registers and bits
+ */
+enum
+{
+#ifdef WTF
+	/* Ecapio->parms reg. */
+	Cnports		= 0xF,		/* nport bits */
+	Cdbgportshift	= 20,		/* debug port */
+	Cdbgportmask	= 0xF,
+
+	/* Ecapio->capparms bits */
+	C64		= 1<<0,		/* 64-bits */
+	Cpfl		= 1<<1,	/* program'ble frame list: can be <1024 */
+	Casp		= 1<<2,		/* asynch. sched. park */
+	Ceecpshift	= 8,		/* extended capabilities ptr. */
+	Ceecpmask	= (1<<8) - 1,
+
+	Clegacy		= 1,		/* legacy support cap. id */
+	CLbiossem	= 2,		/* legacy cap. bios sem. */
+	CLossem		= 3,		/* legacy cap. os sem */
+	CLcontrol	= 4,		/* legacy support control & status */
+
+	/* typed links  */
+	Lterm		= 1,
+	Litd		= 0<<1,
+	Lqh		= 1<<1,
+	Lsitd		= 2<<1,
+	Lfstn		= 3<<1,		/* we don't use these */
+
+	/* Cmd reg. */
+	Cstop		= 0x00000,	/* stop running */
+	Crun		= 0x00001,	/* start operation */
+	Chcreset	= 0x00002,	/* host controller reset */
+	Cflsmask	= 0x0000C,	/* frame list size bits */
+	Cfls1024	= 0x00000,	/* frame list size 1024 */
+	Cfls512		= 0x00004,	/* frame list size 512 frames */
+	Cfls256		= 0x00008,	/* frame list size 256 frames */
+	Cpse		= 0x00010,	/* periodic sched. enable */
+	Case		= 0x00020,	/* async sched. enable */
+	Ciasync		= 0x00040,	/* interrupt on async advance doorbell */
+	Citcmask	= 0xff << 16,
+#endif
+	Citc1		= 0x10000,	/* interrupt threshold ctl. 1 µframe */
+	Citc4		= 0x40000,	/* same. 2 µframes */
+	/* ... */
+	Citc8		= 0x80000,	/* same. 8 µframes (can go up to 64) */
+#ifdef WTF
+	/* Sts reg. */
+	Sasyncss	= 0x08000,	/* aync schedule status */
+	Speriodss	= 0x04000,	/* periodic schedule status */
+	Srecl		= 0x02000,	/* reclamnation (empty async sched.) */
+	Shalted		= 0x01000,	/* h.c. is halted */
+	Sasync		= 0x00020,	/* interrupt on async advance */
+	Sherr		= 0x00010,	/* host system error */
+	Sfrroll		= 0x00008,	/* frame list roll over */
+	Sportchg	= 0x00004,	/* port change detect */
+	Serrintr	= 0x00002,		/* error interrupt */
+	Sintr		= 0x00001,	/* interrupt */
+	Sintrs		= 0x0003F,	/* interrupts status */
+
+	/* Intr reg. */
+	Iusb		= 0x01,		/* intr. on usb */
+	Ierr		= 0x02,		/* intr. on usb error */
+	Iportchg	= 0x04,		/* intr. on port change */
+	Ifrroll		= 0x08,		/* intr. on frlist roll over */
+	Ihcerr		= 0x10,		/* intr. on host error */
+	Iasync		= 0x20,		/* intr. on async advance enable */
+	Iall		= 0x3F,		/* all interrupts */
+
+	/* Config reg. */
+	Callmine	= 1,		/* route all ports to us */
+
+	/* Portsc reg. */
+	Pspresent	= 0x00000001,	/* device present */
+	Psstatuschg	= 0x00000002,	/* Pspresent changed */
+	Psenable	= 0x00000004,	/* device enabled */
+	Pschange	= 0x00000008,	/* Psenable changed */
+	Psresume	= 0x00000040,	/* resume detected */
+	Pssuspend	= 0x00000080,	/* port suspended */
+	Psreset		= 0x00000100,	/* port reset */
+	Pspower		= 0x00001000,	/* port power on */
+	Psowner		= 0x00002000,	/* port owned by companion */
+	Pslinemask	= 0x00000C00,	/* line status bits */
+	Pslow		= 0x00000400,	/* low speed device */
+
+	/* Debug port csw reg. */
+	Cowner	= 0x40000000,		/* port owned by ehci */
+	Cenable	= 0x10000000,		/* debug port enabled */
+	Cdone	= 0x00010000,		/* request is done */
+	Cbusy	= 0x00000400,		/* port in use by a driver */
+	Cerrmask= 0x00000380,		/* error code bits */
+	Chwerr	= 0x00000100,		/* hardware error */
+	Cterr	= 0x00000080,		/* transaction error */
+	Cfailed	= 0x00000040,		/* transaction did fail */
+	Cgo	= 0x00000020,		/* execute the transaction */
+	Cwrite	= 0x00000010,		/* request is a write */
+	Clen	= 0x0000000F,		/* data len */
+
+	/* Debug port pid reg. */
+	Prpidshift	= 16,		/* received pid */
+	Prpidmask	= 0xFF,
+	Pspidshift	= 8,		/* sent pid */
+	Pspidmask	= 0xFF,
+	Ptokshift	= 0,		/* token pid */
+	Ptokmask	= 0xFF,
+
+	Ptoggle		= 0x00008800,	/* to update toggles */
+	Ptogglemask	= 0x0000FF00,
+
+	/* Debug port addr reg. */
+	Adevshift	= 8,		/* device address */
+	Adevmask	= 0x7F,
+	Aepshift	= 0,		/* endpoint number */
+	Aepmask		= 0xF,
+#endif
+};
+
+#ifdef WTF
+/*
+ * Capability registers (hw)
+ */
+struct Ecapio
+{
+	uint32_t	cap;		/* 00 controller capability register */
+	uint32_t	parms;		/* 04 structural parameters register */
+	uint32_t	capparms;	/* 08 capability parameters */
+	uint32_t	portroute;	/* 0c not on the CS5536 */
+};
+
+/*
+ * Debug port registers (hw)
+ */
+struct Edbgio
+{
+	uint32_t	csw;		/* control and status */
+	uint32_t	pid;		/* USB pid */
+	unsigned char	data[8];	/* data buffer */
+	uint32_t	addr;		/* device and endpoint addresses */
+};
+#endif
+
+struct Poll
+{
+	// There is already a Lock in Rendez.
+	//Lock;
+	Rendez;
+	int	must;
+	int	does;
+};
+
+struct Ctlr
+{
+	Rendez;                 /* for waiting to async advance doorbell */
+	// There is already a Lock in Rendez.
+	//Lock;			/* for ilock. qh lists and basic ctlr I/O */
+	QLock	portlck;	/* for port resets/enable... (and doorbell) */
+	int	active;		/* in use or not */
+	Pcidev*	pcidev;
+	Ecapio*	capio;		/* Capability i/o regs */
+	Eopio*	opio;		/* Operational i/o regs */
+
+	int	nframes;	/* 1024, 512, or 256 frames in the list */
+	uint32_t*	frames;		/* periodic frame list (hw) */
+	Qh*	qhs;		/* async Qh circular list for bulk/ctl */
+	Qtree*	tree;		/* tree of Qhs for the periodic list */
+	int	ntree;		/* number of dummy qhs in tree */
+	Qh*	intrqhs;		/* list of (not dummy) qhs in tree  */
+	Isoio*	iso;		/* list of active Iso I/O */
+	uint32_t	load;
+	uint32_t	isoload;
+	int	nintr;		/* number of interrupts attended */
+	int	ntdintr;	/* number of intrs. with something to do */
+	int	nqhintr;	/* number of async td intrs. */
+	int	nisointr;	/* number of periodic td intrs. */
+	int	nreqs;
+	Poll	poll;
+};
+
+/*
+ * PC-specific stuff
+ */
+
+/*
+ * Operational registers (hw)
+ */
+struct Eopio
+{
+	uint32_t	cmd;		/* 00 command */
+	uint32_t	sts;		/* 04 status */
+	uint32_t	intr;		/* 08 interrupt enable */
+	uint32_t	frno;		/* 0c frame index */
+	uint32_t	seg;		/* 10 bits 63:32 of EHCI datastructs (unused) */
+	uint32_t	frbase;		/* 14 frame list base addr, 4096-byte boundary */
+	uint32_t	link;		/* 18 link for async list */
+	unsigned char	d2c[0x40-0x1c];	/* 1c dummy */
+	uint32_t	config;		/* 40 1: all ports default-routed to this HC */
+	uint32_t	portsc[1];	/* 44 Port status and control, one per port */
+};
+
+extern int ehcidebug;
+extern Ecapio *ehcidebugcapio;
+extern int ehcidebugport;
+
+void	ehcilinkage(Hci *hp);
+void	ehcimeminit(Ctlr *ctlr);
+void	ehcirun(Ctlr *ctlr, int on);

+ 299 - 0
sys/src/9/k10/usbehcipc.c

@@ -0,0 +1,299 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * PC-specific code for
+ * USB Enhanced Host Controller Interface (EHCI) driver
+ * High speed USB 2.0.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"../port/usb.h"
+#include	"../port/portusbehci.h"
+#include	"usbehci.h"
+
+static Ctlr* ctlrs[Nhcis];
+static int maxehci = Nhcis;
+
+/* Isn't this cap list search in a helper function? */
+static void
+getehci(Ctlr* ctlr)
+{
+	int i, ptr, cap, sem;
+
+	ptr = (ctlr->capio->capparms >> Ceecpshift) & Ceecpmask;
+	for(; ptr != 0; ptr = pcicfgr8(ctlr->pcidev, ptr+1)){
+		if(ptr < 0x40 || (ptr & ~0xFC))
+			break;
+		cap = pcicfgr8(ctlr->pcidev, ptr);
+		if(cap != Clegacy)
+			continue;
+		sem = pcicfgr8(ctlr->pcidev, ptr+CLbiossem);
+		if(sem == 0)
+			continue;
+		pcicfgw8(ctlr->pcidev, ptr+CLossem, 1);
+		for(i = 0; i < 100; i++){
+			if(pcicfgr8(ctlr->pcidev, ptr+CLbiossem) == 0)
+				break;
+			delay(10);
+		}
+		if(i == 100)
+			dprint("ehci %#p: bios timed out\n", ctlr->capio);
+		pcicfgw32(ctlr->pcidev, ptr+CLcontrol, 0);	/* no SMIs */
+		ctlr->opio->config = 0;
+		coherence();
+		return;
+	}
+}
+
+static void
+ehcireset(Ctlr *ctlr)
+{
+	Eopio *opio;
+	int i;
+
+	ilock(ctlr);
+	dprint("ehci %#p reset\n", ctlr->capio);
+	opio = ctlr->opio;
+
+	/*
+	 * Turn off legacy mode. Some controllers won't
+	 * interrupt us as expected otherwise.
+	 */
+	ehcirun(ctlr, 0);
+	pcicfgw16(ctlr->pcidev, 0xc0, 0x2000);
+
+	/*
+	 * reclaim from bios
+	 */
+	getehci(ctlr);
+
+	/* clear high 32 bits of address signals if it's 64 bits capable.
+	 * This is probably not needed but it does not hurt and others do it.
+	 */
+	if((ctlr->capio->capparms & C64) != 0){
+		dprint("ehci: 64 bits\n");
+		opio->seg = 0;
+		coherence();
+	}
+
+	if(ehcidebugcapio != ctlr->capio){
+		opio->cmd |= Chcreset;	/* controller reset */
+		coherence();
+		for(i = 0; i < 100; i++){
+			if((opio->cmd & Chcreset) == 0)
+				break;
+			delay(1);
+		}
+		if(i == 100)
+			print("ehci %#p controller reset timed out\n", ctlr->capio);
+	}
+
+	/* requesting more interrupts per µframe may miss interrupts */
+	opio->cmd &= ~Citcmask;
+	opio->cmd |= 1 << Citcshift;		/* max of 1 intr. per 125 µs */
+	coherence();
+	switch(opio->cmd & Cflsmask){
+	case Cfls1024:
+		ctlr->nframes = 1024;
+		break;
+	case Cfls512:
+		ctlr->nframes = 512;
+		break;
+	case Cfls256:
+		ctlr->nframes = 256;
+		break;
+	default:
+		panic("ehci: unknown fls %ld", opio->cmd & Cflsmask);
+	}
+	dprint("ehci: %d frames\n", ctlr->nframes);
+	iunlock(ctlr);
+}
+
+static void
+setdebug(Hci *hp, int d)
+{
+	ehcidebug = d;
+}
+
+static void
+shutdown(Hci *hp)
+{
+	int i;
+	Ctlr *ctlr;
+	Eopio *opio;
+
+	ctlr = hp->aux;
+	ilock(ctlr);
+	opio = ctlr->opio;
+	opio->cmd |= Chcreset;		/* controller reset */
+	coherence();
+	for(i = 0; i < 100; i++){
+		if((opio->cmd & Chcreset) == 0)
+			break;
+		delay(1);
+	}
+	if(i >= 100)
+		print("ehci %#p controller reset timed out\n", ctlr->capio);
+	delay(100);
+	ehcirun(ctlr, 0);
+	opio->frbase = 0;
+	iunlock(ctlr);
+}
+
+static void
+scanpci(void)
+{
+	static int already = 0;
+	int i;
+	uint32_t io;
+	Ctlr *ctlr;
+	Pcidev *p;
+	Ecapio *capio;
+
+	if(already)
+		return;
+	already = 1;
+	p = nil;
+	while ((p = pcimatch(p, 0, 0)) != nil) {
+		/*
+		 * Find EHCI controllers (Programming Interface = 0x20).
+		 */
+		if(p->ccrb != Pcibcserial || p->ccru != Pciscusb)
+			continue;
+		switch(p->ccrp){
+		case 0x20:
+			io = p->mem[0].bar & ~0x0f;
+			break;
+		default:
+			continue;
+		}
+		//if(0 && p->vid == Vintel && p->did == 0x3b34) {
+		//	print("usbehci: ignoring known bad ctlr %#ux/%#ux\n",
+		//		p->vid, p->did);
+		//	continue;
+		//}
+		if(io == 0){
+			print("usbehci: %x %x: failed to map registers\n",
+				p->vid, p->did);
+			continue;
+		}
+		if(p->intl == 0xff || p->intl == 0) {
+			print("usbehci: no irq assigned for port %#lux\n", io);
+			continue;
+		}
+		dprint("usbehci: %#x %#x: port %#lux size %#x irq %d\n",
+			p->vid, p->did, io, p->mem[0].size, p->intl);
+
+		ctlr = malloc(sizeof(Ctlr));
+		if (ctlr == nil)
+			panic("usbehci: out of memory");
+		ctlr->pcidev = p;
+		capio = ctlr->capio = vmap(io, p->mem[0].size);
+		ctlr->opio = (Eopio*)((uintptr)capio + (capio->cap & 0xff));
+		pcisetbme(p);
+		pcisetpms(p, 0);
+		for(i = 0; i < Nhcis; i++)
+			if(ctlrs[i] == nil){
+				ctlrs[i] = ctlr;
+				break;
+			}
+		if(i >= Nhcis)
+			print("ehci: bug: more than %d controllers\n", Nhcis);
+
+		/*
+		 * currently, if we enable a second ehci controller on zt
+		 * systems w x58m motherboard, we'll wedge solid after iunlock
+		 * in init for the second one.
+		 */
+		if (i >= maxehci) {
+			print("usbehci: ignoring controllers after first %d, "
+				"at %#p\n", maxehci, io);
+			ctlrs[i] = nil;
+		}
+	}
+}
+
+static int
+reset(Hci *hp)
+{
+	int i;
+	//char *s;
+	Ctlr *ctlr;
+	Ecapio *capio;
+	Pcidev *p;
+	static Lock resetlck;
+
+	/* no longer
+	s = getconf("*maxehci");
+	if (s != nil && s[0] >= '0' && s[0] <= '9')
+		maxehci = atoi(s);
+	if(maxehci == 0 || getconf("*nousbehci"))
+		return -1;
+	*/
+
+	ilock(&resetlck);
+	scanpci();
+
+	/*
+	 * Any adapter matches if no hp->port is supplied,
+	 * otherwise the ports must match.
+	 */
+	ctlr = nil;
+	for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){
+		ctlr = ctlrs[i];
+		if(ctlr->active == 0)
+		if(hp->port == 0 || hp->port == (uintptr)ctlr->capio){
+			ctlr->active = 1;
+			break;
+		}
+	}
+	iunlock(&resetlck);
+	if(i >= Nhcis || ctlrs[i] == nil)
+		return -1;
+
+	p = ctlr->pcidev;
+	hp->aux = ctlr;
+	hp->port = (uintptr)ctlr->capio;
+	hp->irq = p->intl;
+	hp->tbdf = p->tbdf;
+
+	capio = ctlr->capio;
+	hp->nports = capio->parms & Cnports;
+
+	ddprint("echi: %s, ncc %lud npcc %lud\n",
+		capio->parms & 0x10000 ? "leds" : "no leds",
+		(capio->parms >> 12) & 0xf, (capio->parms >> 8) & 0xf);
+	ddprint("ehci: routing %s, %sport power ctl, %d ports\n",
+		capio->parms & 0x40 ? "explicit" : "automatic",
+		capio->parms & 0x10 ? "" : "no ", hp->nports);
+
+	ehcireset(ctlr);
+	ehcimeminit(ctlr);
+
+	/*
+	 * Linkage to the generic HCI driver.
+	 */
+	ehcilinkage(hp);
+	hp->shutdown = shutdown;
+	hp->debug = setdebug;
+	return 0;
+}
+
+void
+usbehcilink(void)
+{
+	addhcitype("ehci", reset);
+}

+ 2598 - 0
sys/src/9/k10/usbohci.c

@@ -0,0 +1,2598 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * USB Open Host Controller Interface (Ohci) driver
+ *
+ * BUGS:
+ * - Missing isochronous input streams.
+ * - Too many delays and ilocks.
+ * - bandwidth admission control must be done per-frame.
+ * - Buffering could be handled like in uhci, to avoid
+ * needed block allocation and avoid allocs for small Tds.
+ * - must warn of power overruns.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+#include	"../port/usb.h"
+
+typedef struct Ctlio Ctlio;
+typedef struct Ctlr Ctlr;
+typedef struct Ed Ed;
+typedef struct Edpool Edpool;
+typedef struct Epx Epx;
+typedef struct Hcca Hcca;
+typedef struct Isoio Isoio;
+typedef struct Ohci Ohci;
+typedef struct Qio Qio;
+typedef struct Qtree Qtree;
+typedef struct Td Td;
+typedef struct Tdpool Tdpool;
+
+enum
+{
+	Incr		= 64,		/* for Td and Ed pools */
+
+	Align		= 0x20,		/* OHCI only requires 0x10 */
+					/* use always a power of 2 */
+
+	Abortdelay	= 1,		/* delay after cancelling Tds (ms) */
+	Tdatomic		= 8,		/* max nb. of Tds per bulk I/O op. */
+	Enabledelay	= 100,		/* waiting for a port to enable */
+
+
+	/* Queue states (software) */
+	Qidle		= 0,
+	Qinstall,
+	Qrun,
+	Qdone,
+	Qclose,
+	Qfree,
+
+	/* Ed control bits */
+	Edmpsmask	= 0x7ff,	/* max packet size */
+	Edmpsshift	= 16,
+	Edlow		= 1 << 13,	/* low speed */
+	Edskip		= 1 << 14,	/* skip this ed */
+	Ediso		= 1 << 15,	/* iso Tds used */
+	Edtddir		= 0,		/* get dir from td */
+	Edin		= 2 << 11,	/* direction in */
+	Edout		= 1 << 11,	/* direction out */
+	Eddirmask	= 3 << 11,	/* direction bits */
+	Edhalt		= 1,		/* halted (in head ptr) */
+	Edtoggle	= 2,		/* toggle (in head ptr) 1 == data1 */
+
+	/* Td control bits */
+	Tdround		= 1<<18,	/* (rounding) short packets ok */
+	Tdtoksetup	= 0<<19,	/* setup packet */
+	Tdtokin		= 2<<19,	/* in packet */
+	Tdtokout	= 1<<19,	/* out packet */
+	Tdtokmask	= 3<<19,	/* in/out/setup bits */
+	Tdnoioc		= 7<<21,	/* intr. cnt. value for no interrupt */
+	Tdusetog	= 1<<25,	/* use toggle from Td (1) or Ed (0) */
+	Tddata1		= 1<<24,	/* data toggle (1 == data1) */
+	Tddata0		= 0<<24,
+	Tdfcmask	= 7,		/* frame count (iso) */
+	Tdfcshift	= 24,
+	Tdsfmask	= 0xFFFF,	/* starting frame (iso) */
+	Tderrmask	= 3,		/* error counter */
+	Tderrshift	= 26,
+	Tdccmask	= 0xf,		/* condition code (status) */
+	Tdccshift	= 28,
+	Tdiccmask	= 0xf,		/* condition code (iso, offsets) */
+	Tdiccshift	= 12,
+
+	Ntdframes	= 0x10000,	/* # of different iso frame numbers */
+
+	/* Td errors (condition code) */
+	Tdok		= 0,
+	Tdcrc		= 1,
+	Tdbitstuff	= 2,
+	Tdbadtog	= 3,
+	Tdstalled	= 4,
+	Tdtmout		= 5,
+	Tdpidchk	= 6,
+	Tdbadpid	= 7,
+	Tddataovr	= 8,
+	Tddataund	= 9,
+	Tdbufovr	= 0xC,
+	Tdbufund	= 0xD,
+	Tdnotacc	= 0xE,
+
+	/* control register */
+	Cple		= 0x04,		/* periodic list enable */
+	Cie		= 0x08,		/* iso. list enable */
+	Ccle		= 0x10,		/* ctl list enable */
+	Cble		= 0x20,		/* bulk list enable */
+	Cfsmask		= 3 << 6,	/* functional state... */
+	Cfsreset	= 0 << 6,
+	Cfsresume	= 1 << 6,
+	Cfsoper		= 2 << 6,
+	Cfssuspend	= 3 << 6,
+
+	/* command status */
+	Sblf =	1 << 2,			/* bulk list (load) flag */
+	Sclf =	1 << 1,			/* control list (load) flag */
+	Shcr =	1 << 0,			/* host controller reset */
+
+	/* intr enable */
+	Mie =	1 << 31,
+	Oc =	1 << 30,
+	Rhsc =	1 << 6,
+	Fno =	1 << 5,
+	Ue =	1 << 4,
+	Rd =	1 << 3,
+	Sf =	1 << 2,
+	Wdh =	1 << 1,
+	So =	1 << 0,
+
+	Fmaxpktmask = 0x7fff,
+	Fmaxpktshift = 16,
+	HcRhDescA_POTPGT_MASK =	0xff << 24,
+	HcRhDescA_POTPGT_SHIFT =	24,
+
+	/* Rh status */
+	Lps =	1 << 0,
+	Cgp =	1 << 0,
+	Oci =	1 << 1,
+	Psm =	1 << 8,
+	Nps =	1 << 9,
+	Drwe =	1 << 15,
+	Srwe =	1 << 15,
+	Lpsc =	1 << 16,
+	Ccic =	1 << 17,
+	Crwe =	1 << 31,
+
+	/* port status */
+	Ccs =	0x00001,	/* current connect status */
+	Pes =	0x00002,	/* port enable status */
+	Pss =	0x00004,	/* port suspend status */
+	Poci =	0x00008,	/* over current indicator */
+	Prs =	0x00010,	/* port reset status */
+	Pps =	0x00100,	/* port power status */
+	Lsda =	0x00200,	/* low speed device attached */
+	Csc =	0x10000,	/* connect status change */
+	Pesc =	0x20000,	/* enable status change */
+	Pssc =	0x40000,	/* suspend status change */
+	Ocic =	0x80000,	/* over current ind. change */
+	Prsc =	0x100000,	/* reset status change */
+
+	/* port status write bits */
+	Cpe =	0x001,		/* clear port enable */
+	Spe =	0x002,		/* set port enable */
+	Spr =	0x010,		/* set port reset */
+	Spp =	0x100,		/* set port power */
+	Cpp =	0x200,		/* clear port power */
+
+};
+
+/*
+ * Endpoint descriptor. (first 4 words used by hardware)
+ */
+struct Ed {
+	uint32_t	ctrl;
+	uint32_t	tail;		/* transfer descriptor */
+	uint32_t	head;
+	uint32_t	nexted;
+
+	Ed*	next;		/* sw; in free list or next in list */
+	Td*	tds;		/* in use by current xfer; all for iso */
+	Ep*	ep;		/* debug/align */
+	Ed*	inext;		/* debug/align (dump interrupt eds). */
+};
+
+/*
+ * Endpoint I/O state (software), per direction.
+ */
+struct Qio
+{
+	QLock;			/* for the entire I/O process */
+	Rendez;			/* wait for completion */
+	Ed*	ed;		/* to place Tds on it */
+	int	sched;		/* queue number (intr/iso) */
+	int	toggle;		/* Tddata0/Tddata1 */
+	uint32_t	usbid;		/* device/endpoint address */
+	int	tok;		/* Tdsetup, Tdtokin, Tdtokout */
+	int32_t	iotime;		/* last I/O time; to hold interrupt polls */
+	int	debug;		/* for the endpoint */
+	char*	err;		/* error status */
+	int	state;		/* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */
+	int32_t	bw;		/* load (intr/iso) */
+};
+
+struct Ctlio
+{
+	Qio;			/* single Ed for all transfers */
+	unsigned char*	data;		/* read from last ctl req. */
+	int	ndata;		/* number of bytes read */
+};
+
+struct Isoio
+{
+	Qio;
+	int	nframes;	/* number of frames for a full second */
+	Td*	atds;		/* Tds avail for further I/O */
+	int	navail;		/* number of avail Tds */
+	uint32_t	frno;		/* next frame number avail for I/O */
+	uint32_t	left;		/* remainder after rounding Hz to samples/ms */
+	int	nerrs;		/* consecutive errors on iso I/O */
+};
+
+/*
+ * Transfer descriptor. Size must be multiple of 32
+ * First block is used by hardware (aligned to 32).
+ */
+struct Td
+{
+	uint32_t	ctrl;
+	uint32_t	cbp;		/* current buffer pointer */
+	uint32_t	nexttd;
+	uint32_t	be;
+	uint16_t	offsets[8];	/* used by Iso Tds only */
+
+	Td*	next;		/* in free or Ed tds list */
+	Td*	anext;		/* in avail td list (iso) */
+	Ep*	ep;		/* using this Td for I/O */
+	Qio*	io;		/* using this Td for I/O */
+	Block*	bp;		/* data for this Td */
+	uint32_t	nbytes;		/* bytes in this Td */
+	uint32_t	cbp0;		/* initial value for cbp */
+	uint32_t	last;		/* true for last Td in Qio */
+};
+
+/*
+ * Host controller communication area (hardware)
+ */
+struct Hcca
+{
+	uint32_t	intrtable[32];
+	uint16_t	framenumber;
+	uint16_t	pad1;
+	uint32_t	donehead;
+	unsigned char	reserved[116];
+};
+
+/*
+ * I/O registers
+ */
+struct Ohci
+{
+	/* control and status group */
+	uint32_t	revision;		/*00*/
+	uint32_t	control;		/*04*/
+	uint32_t	cmdsts;			/*08*/
+	uint32_t	intrsts;			/*0c*/
+	uint32_t	intrenable;		/*10*/
+	uint32_t	intrdisable;		/*14*/
+
+	/* memory pointer group */
+	uint32_t	hcca;			/*18*/
+	uint32_t	periodcurred;		/*1c*/
+	uint32_t	ctlheaded;		/*20*/
+	uint32_t	ctlcurred;		/*24*/
+	uint32_t	bulkheaded;		/*28*/
+	uint32_t	bulkcurred;		/*2c*/
+	uint32_t	donehead;		/*30*/
+
+	/* frame counter group */
+	uint32_t	fminterval;		/*34*/
+	uint32_t	fmremaining;		/*38*/
+	uint32_t	fmnumber;		/*3c*/
+	uint32_t	periodicstart;		/*40*/
+	uint32_t	lsthreshold;		/*44*/
+
+	/* root hub group */
+	uint32_t	rhdesca;		/*48*/
+	uint32_t	rhdescb;		/*4c*/
+	uint32_t	rhsts;			/*50*/
+	uint32_t	rhportsts[15];		/*54*/
+	uint32_t	pad25[20];		/*90*/
+
+	/* unknown */
+	uint32_t	hostueaddr;		/*e0*/
+	uint32_t	hostuests;		/*e4*/
+	uint32_t	hosttimeoutctrl;		/*e8*/
+	uint32_t	pad59;			/*ec*/
+	uint32_t	pad60;			/*f0*/
+	uint32_t	hostrevision;		/*f4*/
+	uint32_t	pad62[2];
+					/*100*/
+};
+
+/*
+ * Endpoint tree (software)
+ */
+struct Qtree
+{
+	int	nel;
+	int	depth;
+	uint32_t*	bw;
+	Ed**	root;
+};
+
+struct Tdpool
+{
+	Lock;
+	Td*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+struct Edpool
+{
+	Lock;
+	Ed*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+struct Ctlr
+{
+	Lock;			/* for ilock; lists and basic ctlr I/O */
+	QLock	resetl;		/* lock controller during USB reset */
+	int	active;
+	Ctlr*	next;
+	int	nports;
+
+	Ohci*	ohci;		/* base I/O address */
+	Hcca*	hcca;		/* intr/done Td lists (used by hardware) */
+	int	overrun;	/* sched. overrun */
+	Ed*	intrhd;		/* list of intr. eds in tree */
+	Qtree*	tree;		/* tree for t Ep i/o */
+	int	ntree;		/* number of dummy Eds in tree */
+	Pcidev*	pcidev;
+};
+
+#define dqprint		if(debug || io && io->debug)print
+#define ddqprint		if(debug>1 || (io && io->debug>1))print
+#define diprint		if(debug || iso && iso->debug)print
+#define ddiprint		if(debug>1 || (iso && iso->debug>1))print
+#define TRUNC(x, sz)	((x) & ((sz)-1))
+
+static int ohciinterrupts[Nttypes];
+static char* iosname[] = { "idle", "install", "run", "done", "close", "FREE" };
+
+static int debug;
+static Edpool edpool;
+static Tdpool tdpool;
+static Ctlr* ctlrs[Nhcis];
+
+/* Never used
+static	char	EnotWritten[] = "usb write unfinished";
+static	char	EnotRead[] = "usb read unfinished";
+static	char	Eunderrun[] = "usb endpoint underrun";
+
+static	QLock	usbhstate;	/ * protects name space state * /
+
+
+static int	schedendpt(Ctlr *ub, Ep *ep);
+static void	unschedendpt(Ctlr *ub, Ep *ep);
+static int32_t	qtd(Ctlr*, Ep*, int, Block*, unsigned char*, unsigned char*, int, uint32_t);
+*/
+
+static char* errmsgs[] =
+{
+[Tdcrc]		"crc error",
+[Tdbitstuff]	"bit stuffing error",
+[Tdbadtog]	"bad toggle",
+[Tdstalled]	Estalled,
+[Tdtmout]	"timeout error",
+[Tdpidchk]	"pid check error",
+[Tdbadpid]	"bad pid",
+[Tddataovr]	"data overrun",
+[Tddataund]	"data underrun",
+[Tdbufovr]	"buffer overrun",
+[Tdbufund]	"buffer underrun",
+[Tdnotacc]	"not accessed"
+};
+
+static void*
+pa2ptr(uint32_t pa)
+{
+	if(pa == 0)
+		return nil;
+	else
+		return KADDR(pa);
+}
+
+static uint32_t
+ptr2pa(void *p)
+{
+	if(p == nil)
+		return 0;
+	else
+		return PADDR(p);
+}
+
+static void
+waitSOF(Ctlr *ub)
+{
+	int frame = ub->hcca->framenumber & 0x3f;
+
+	do {
+		delay(2);
+	} while(frame == (ub->hcca->framenumber & 0x3f));
+}
+
+static char*
+errmsg(int err)
+{
+
+	if(err < nelem(errmsgs))
+		return errmsgs[err];
+	return nil;
+}
+
+static Ed*
+ctlhd(Ctlr *ctlr)
+{
+	return pa2ptr(ctlr->ohci->ctlheaded);
+}
+
+static Ed*
+bulkhd(Ctlr *ctlr)
+{
+	return pa2ptr(ctlr->ohci->bulkheaded);
+}
+
+static void
+edlinked(Ed *ed, Ed *next)
+{
+	if(ed == nil)
+		print("edlinked: nil ed: pc %#p\n", getcallerpc(&ed));
+	ed->nexted = ptr2pa(next);
+	ed->next = next;
+}
+
+static void
+setctlhd(Ctlr *ctlr, Ed *ed)
+{
+	ctlr->ohci->ctlheaded = ptr2pa(ed);
+	if(ed != nil)
+		ctlr->ohci->cmdsts |= Sclf;	/* reload it on next pass */
+}
+
+static void
+setbulkhd(Ctlr *ctlr, Ed *ed)
+{
+	ctlr->ohci->bulkheaded = ptr2pa(ed);
+	if(ed != nil)
+		ctlr->ohci->cmdsts |= Sblf;	/* reload it on next pass */
+}
+
+static void
+unlinkctl(Ctlr *ctlr, Ed *ed)
+{
+	Ed *this, *prev, *next;
+
+	ctlr->ohci->control &= ~Ccle;
+	waitSOF(ctlr);
+	this = ctlhd(ctlr);
+	ctlr->ohci->ctlcurred = 0;
+	prev = nil;
+	while(this != nil && this != ed){
+		prev = this;
+		this = this->next;
+	}
+	if(this == nil){
+		print("unlinkctl: not found\n");
+		return;
+	}
+	next = this->next;
+	if(prev == nil)
+		setctlhd(ctlr, next);
+	else
+		edlinked(prev, next);
+	ctlr->ohci->control |= Ccle;
+	edlinked(ed, nil);		/* wipe out next field */
+}
+
+static void
+unlinkbulk(Ctlr *ctlr, Ed *ed)
+{
+	Ed *this, *prev, *next;
+
+	ctlr->ohci->control &= ~Cble;
+	waitSOF(ctlr);
+	this = bulkhd(ctlr);
+	ctlr->ohci->bulkcurred = 0;
+	prev = nil;
+	while(this != nil && this != ed){
+		prev = this;
+		this = this->next;
+	}
+	if(this == nil){
+		print("unlinkbulk: not found\n");
+		return;
+	}
+	next = this->next;
+	if(prev == nil)
+		setbulkhd(ctlr, next);
+	else
+		edlinked(prev, next);
+	ctlr->ohci->control |= Cble;
+	edlinked(ed, nil);		/* wipe out next field */
+}
+
+static void
+edsetaddr(Ed *ed, uint32_t addr)
+{
+	uint32_t ctrl;
+
+	ctrl = ed->ctrl & ~((Epmax<<7)|Devmax);
+	ctrl |= (addr & ((Epmax<<7)|Devmax));
+	ed->ctrl = ctrl;
+}
+
+/*
+static void
+edsettog(Ed *ed, int c)
+{
+	if(c != 0)
+		ed->head |= Edtoggle;
+	else
+		ed->head &= ~Edtoggle;
+}
+*/
+
+static int
+edtoggle(Ed *ed)
+{
+	return ed->head & Edtoggle;
+}
+
+static int
+edhalted(Ed *ed)
+{
+	return ed->head & Edhalt;
+}
+
+static int
+edmaxpkt(Ed *ed)
+{
+	return (ed->ctrl >> Edmpsshift) & Edmpsmask;
+}
+
+static void
+edsetmaxpkt(Ed *ed, int m)
+{
+	uint32_t c;
+
+	c = ed->ctrl & ~(Edmpsmask << Edmpsshift);
+	ed->ctrl = c | ((m&Edmpsmask) << Edmpsshift);
+}
+
+static int
+tderrs(Td *td)
+{
+	return (td->ctrl >> Tdccshift) & Tdccmask;
+}
+
+static int
+tdtok(Td *td)
+{
+	return (td->ctrl & Tdtokmask);
+}
+
+static Td*
+tdalloc(void)
+{
+	Td *td;
+	Td *pool;
+	int i;
+
+	lock(&tdpool);
+	if(tdpool.free == nil){
+		ddprint("ohci: tdalloc %d Tds\n", Incr);
+		pool = xspanalloc(Incr*sizeof(Td), Align, 0);
+		if(pool == nil)
+			panic("tdalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = tdpool.free;
+			tdpool.free = &pool[i];
+		}
+		tdpool.nalloc += Incr;
+		tdpool.nfree += Incr;
+	}
+	tdpool.ninuse++;
+	tdpool.nfree--;
+	td = tdpool.free;
+	tdpool.free = td->next;
+	memset(td, 0, sizeof(Td));
+	unlock(&tdpool);
+
+	assert(((uintptr)td & 0xF) == 0);
+	return td;
+}
+
+static void
+tdfree(Td *td)
+{
+	if(td == 0)
+		return;
+	freeb(td->bp);
+	td->bp = nil;
+	lock(&tdpool);
+	if(td->nexttd == 0x77777777)
+		panic("ohci: tdfree: double free");
+	memset(td, 7, sizeof(Td));	/* poison */
+	td->next = tdpool.free;
+	tdpool.free = td;
+	tdpool.ninuse--;
+	tdpool.nfree++;
+	unlock(&tdpool);
+}
+
+static Ed*
+edalloc(void)
+{
+	Ed *ed, *pool;
+	int i;
+
+	lock(&edpool);
+	if(edpool.free == nil){
+		ddprint("ohci: edalloc %d Eds\n", Incr);
+		pool = xspanalloc(Incr*sizeof(Ed), Align, 0);
+		if(pool == nil)
+			panic("edalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = edpool.free;
+			edpool.free = &pool[i];
+		}
+		edpool.nalloc += Incr;
+		edpool.nfree += Incr;
+	}
+	edpool.ninuse++;
+	edpool.nfree--;
+	ed = edpool.free;
+	edpool.free = ed->next;
+	memset(ed, 0, sizeof(Ed));
+	unlock(&edpool);
+
+	return ed;
+}
+
+static void
+edfree(Ed *ed)
+{
+	Td *td, *next;
+	int i;
+
+	if(ed == 0)
+		return;
+	i = 0;
+	for(td = ed->tds; td != nil; td = next){
+		next = td->next;
+		tdfree(td);
+		if(i++ > 2000){
+			print("ohci: bug: ed with more than 2000 tds\n");
+			break;
+		}
+	}
+	lock(&edpool);
+	if(ed->nexted == 0x99999999)
+		panic("ohci: edfree: double free");
+	memset(ed, 9, sizeof(Ed));	/* poison */
+	ed->next = edpool.free;
+	edpool.free = ed;
+	edpool.ninuse--;
+	edpool.nfree++;
+	unlock(&edpool);
+	ddprint("edfree: ed %#p\n", ed);
+}
+
+/*
+ * return smallest power of 2 >= n
+ */
+static int
+flog2(int n)
+{
+	int i;
+
+	for(i = 0; (1 << i) < n; i++)
+		;
+	return i;
+}
+
+/*
+ * return smallest power of 2 <= n
+ */
+static int
+flog2lower(int n)
+{
+	int i;
+
+	for(i = 0; (1 << (i + 1)) <= n; i++)
+		;
+	return i;
+}
+
+static int
+pickschedq(Qtree *qt, int pollival, uint32_t bw, uint32_t limit)
+{
+	int i, j, d, upperb, q;
+	uint32_t best, worst, total;
+
+	d = flog2lower(pollival);
+	if(d > qt->depth)
+		d = qt->depth;
+	q = -1;
+	worst = 0;
+	best = ~0;
+	upperb = (1 << (d+1)) - 1;
+	for(i = (1 << d) - 1; i < upperb; i++){
+		total = qt->bw[0];
+		for(j = i; j > 0; j = (j - 1) / 2)
+			total += qt->bw[j];
+		if(total < best){
+			best = total;
+			q = i;
+		}
+		if(total > worst)
+			worst = total;
+	}
+	if(worst + bw >= limit)
+		return -1;
+	return q;
+}
+
+static int
+schedq(Ctlr *ctlr, Qio *io, int pollival)
+{
+	int q;
+	Ed *ted;
+
+	q = pickschedq(ctlr->tree, pollival, io->bw, ~0);
+	ddqprint("ohci: sched %#p q %d, ival %d, bw %ld\n", io, q, pollival, io->bw);
+	if(q < 0){
+		print("ohci: no room for ed\n");
+		return -1;
+	}
+	ctlr->tree->bw[q] += io->bw;
+	ted = ctlr->tree->root[q];
+	io->sched = q;
+	edlinked(io->ed, ted->next);
+	edlinked(ted, io->ed);
+	io->ed->inext = ctlr->intrhd;
+	ctlr->intrhd = io->ed;
+	return 0;
+}
+
+static void
+unschedq(Ctlr *ctlr, Qio *qio)
+{
+	int q;
+	Ed *prev, *this, *next;
+	Ed **l;
+
+	q = qio->sched;
+	if(q < 0)
+		return;
+	ctlr->tree->bw[q] -= qio->bw;
+
+	prev = ctlr->tree->root[q];
+	this = prev->next;
+	while(this != nil && this != qio->ed){
+		prev = this;
+		this = this->next;
+	}
+	if(this == nil)
+		print("ohci: unschedq %d: not found\n", q);
+	else{
+		next = this->next;
+		edlinked(prev, next);
+	}
+	waitSOF(ctlr);
+	for(l = &ctlr->intrhd; *l != nil; l = &(*l)->inext)
+		if(*l == qio->ed){
+			*l = (*l)->inext;
+			return;
+		}
+	print("ohci: unschedq: ed %#p not found\n", qio->ed);
+}
+
+static char*
+seprinttdtok(char *s, char *e, int tok)
+{
+	switch(tok){
+	case Tdtoksetup:
+		s = seprint(s, e, " setup");
+		break;
+	case Tdtokin:
+		s = seprint(s, e, " in");
+		break;
+	case Tdtokout:
+		s = seprint(s, e, " out");
+		break;
+	}
+	return s;
+}
+
+
+static char*
+seprinttd(char *s, char *e, Td *td, int iso)
+{
+	int i;
+	Block *bp;
+
+	if(td == nil)
+		return seprint(s, e, "<nil td>\n");
+	s = seprint(s, e, "%#p ep %#p ctrl %#p", td, td->ep, td->ctrl);
+	s = seprint(s, e, " cc=%#ulx", (td->ctrl >> Tdccshift) & Tdccmask);
+	if(iso == 0){
+		if((td->ctrl & Tdround) != 0)
+			s = seprint(s, e, " rnd");
+		s = seprinttdtok(s, e, td->ctrl & Tdtokmask);
+		if((td->ctrl & Tdusetog) != 0)
+			s = seprint(s, e, " d%d", (td->ctrl & Tddata1) ? 1 : 0);
+		else
+			s = seprint(s, e, " d-");
+		s = seprint(s, e, " ec=%uld", (td->ctrl >> Tderrshift) & Tderrmask);
+	}else{
+		s = seprint(s, e, " fc=%uld", (td->ctrl >> Tdfcshift) & Tdfcmask);
+		s = seprint(s, e, " sf=%uld", td->ctrl & Tdsfmask);
+	}
+	s = seprint(s, e, " cbp0 %#p cbp %#p next %#p be %#p %s",
+		td->cbp0, td->cbp, td->nexttd, td->be, td->last ? "last" : "");
+	s = seprint(s, e, "\n\t\t%ld bytes", td->nbytes);
+	if((bp = td->bp) != nil){
+		s = seprint(s, e, " rp %#p wp %#p ", bp->rp, bp->wp);
+		if(BLEN(bp) > 0)
+			s = seprintdata(s, e, bp->rp, bp->wp - bp->rp);
+	}
+	if(iso == 0)
+		return seprint(s, e, "\n");
+	s = seprint(s, e, "\n\t\t");
+	/* we use only offsets[0] */
+	i = 0;
+	s = seprint(s, e, "[%d] %#ux cc=%#ux sz=%ud\n", i, td->offsets[i],
+		(td->offsets[i] >> Tdiccshift) & Tdiccmask,
+		td->offsets[i] & 0x7FF);
+	return s;
+}
+
+static void
+dumptd(Td *td, char *p, int iso)
+{
+	static char buf[512];	/* Too much */
+	char *s;
+
+	s = seprint(buf, buf+sizeof(buf), "%s: ", p);
+	s = seprinttd(s, buf+sizeof(buf), td, iso);
+	if(s > buf && s[-1] != '\n')
+		s[-1] = '\n';
+	print("\t%s", buf);
+}
+
+static void
+dumptds(Td *td, char *p, int iso)
+{
+	int i;
+
+	for(i = 0; td != nil; td = td->next){
+		dumptd(td, p, iso);
+		if(td->last)
+			break;
+		if(tdtok(td) == Tdtokin && ++i > 2){
+			print("\t\t...\n");
+			break;
+		}
+	}
+}
+
+static void
+dumped(Ed *ed)
+{
+	char *buf, *s, *e;
+
+	if(ed == nil){
+		print("<null ed>\n");
+		return;
+	}
+	buf = malloc(512);
+	/* no waserror; may want to use from interrupt context */
+	if(buf == nil)
+		return;
+	e = buf+512;
+	s = seprint(buf, e, "\ted %#p: ctrl %#p", ed, ed->ctrl);
+	if((ed->ctrl & Edskip) != 0)
+		s = seprint(s, e, " skip");
+	if((ed->ctrl & Ediso) != 0)
+		s = seprint(s, e, " iso");
+	if((ed->ctrl & Edlow) != 0)
+		s = seprint(s, e, " low");
+	s = seprint(s, e, " d%d", (ed->head & Edtoggle) ? 1 : 0);
+	if((ed->ctrl & Eddirmask) == Edin)
+		s = seprint(s, e, " in");
+	if((ed->ctrl & Eddirmask) == Edout)
+		s = seprint(s, e, " out");
+	if(edhalted(ed))
+		s = seprint(s, e, " hlt");
+	s = seprint(s, e, " ep%uld.%uld", (ed->ctrl>>7)&Epmax, ed->ctrl&0x7f);
+	s = seprint(s, e, " maxpkt %uld", (ed->ctrl>>Edmpsshift)&Edmpsmask);
+	seprint(s, e, " tail %#p head %#p next %#p\n",ed->tail,ed->head,ed->nexted);
+	print("%s", buf);
+	free(buf);
+	if(ed->tds != nil && (ed->ctrl & Ediso) == 0)
+		dumptds(ed->tds, "td", 0);
+}
+
+static char*
+seprintio(char *s, char *e, Qio *io, char *pref)
+{
+	s = seprint(s, e, "%s qio %#p ed %#p", pref, io, io->ed);
+	s = seprint(s, e, " tog %d iot %ld err %s id %#ulx",
+		io->toggle, io->iotime, io->err, io->usbid);
+	s = seprinttdtok(s, e, io->tok);
+	s = seprint(s, e, " %s\n", iosname[io->state]);
+	return s;
+}
+
+static char*
+seprintep(char* s, char* e, Ep *ep)
+{
+	Isoio *iso;
+	Qio *io;
+	Ctlio *cio;
+
+	if(ep == nil)
+		return seprint(s, e, "<nil ep>\n");
+	if(ep->aux == nil)
+		return seprint(s, e, "no mdep\n");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		s = seprintio(s, e, cio, "c");
+		s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata);
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OWRITE)
+			s = seprintio(s, e, &io[OREAD], "r");
+		if(ep->mode != OREAD)
+			s = seprintio(s, e, &io[OWRITE], "w");
+		break;
+	case Tiso:
+		iso = ep->aux;
+		s = seprintio(s, e, iso, "w");
+		s = seprint(s, e, "\tntds %d avail %d frno %uld left %uld next avail %#p\n",
+			iso->nframes, iso->navail, iso->frno, iso->left, iso->atds);
+		break;
+	}
+	return s;
+}
+
+static char*
+seprintctl(char *s, char *se, uint32_t ctl)
+{
+	s = seprint(s, se, "en=");
+	if((ctl&Cple) != 0)
+		s = seprint(s, se, "p");
+	if((ctl&Cie) != 0)
+		s = seprint(s, se, "i");
+	if((ctl&Ccle) != 0)
+		s = seprint(s, se, "c");
+	if((ctl&Cble) != 0)
+		s = seprint(s, se, "b");
+	switch(ctl & Cfsmask){
+	case Cfsreset:
+		return seprint(s, se, " reset");
+	case Cfsresume:
+		return seprint(s, se, " resume");
+	case Cfsoper:
+		return seprint(s, se, " run");
+	case Cfssuspend:
+		return seprint(s, se, " suspend");
+	default:
+		return seprint(s, se, " ???");
+	}
+}
+
+static void
+dump(Hci *hp)
+{
+	Ctlr *ctlr;
+	Ed *ed;
+	char cs[20];
+
+	ctlr = hp->aux;
+	ilock(ctlr);
+	seprintctl(cs, cs+sizeof(cs), ctlr->ohci->control);
+	print("ohci ctlr %#p: frno %#ux ctl %#lux %s sts %#lux intr %#lux\n",
+		ctlr, ctlr->hcca->framenumber, ctlr->ohci->control, cs,
+		ctlr->ohci->cmdsts, ctlr->ohci->intrsts);
+	print("ctlhd %#ulx cur %#ulx bulkhd %#ulx cur %#ulx done %#ulx\n",
+		ctlr->ohci->ctlheaded, ctlr->ohci->ctlcurred,
+		ctlr->ohci->bulkheaded, ctlr->ohci->bulkcurred,
+		ctlr->ohci->donehead);
+	if(ctlhd(ctlr) != nil)
+		print("[ctl]\n");
+	for(ed = ctlhd(ctlr); ed != nil; ed = ed->next)
+		dumped(ed);
+	if(bulkhd(ctlr) != nil)
+		print("[bulk]\n");
+	for(ed = bulkhd(ctlr); ed != nil; ed = ed->next)
+		dumped(ed);
+	if(ctlr->intrhd != nil)
+		print("[intr]\n");
+	for(ed = ctlr->intrhd; ed != nil; ed = ed->inext)
+		dumped(ed);
+	if(ctlr->tree->root[0]->next != nil)
+		print("[iso]");
+	for(ed = ctlr->tree->root[0]->next; ed != nil; ed = ed->next)
+		dumped(ed);
+	print("%d eds in tree\n", ctlr->ntree);
+	iunlock(ctlr);
+	lock(&tdpool);
+	print("%d tds allocated = %d in use + %d free\n",
+		tdpool.nalloc, tdpool.ninuse, tdpool.nfree);
+	unlock(&tdpool);
+	lock(&edpool);
+	print("%d eds allocated = %d in use + %d free\n",
+		edpool.nalloc, edpool.ninuse, edpool.nfree);
+	unlock(&edpool);
+}
+
+/*
+ * Compute size for the next iso Td and setup its
+ * descriptor for I/O according to the buffer size.
+ */
+static void
+isodtdinit(Ep *ep, Isoio *iso, Td *td)
+{
+	Block *bp;
+	int32_t size;
+	int i;
+
+	bp = td->bp;
+	assert(bp != nil && BLEN(bp) == 0);
+	size = (ep->hz+iso->left) * ep->pollival / 1000;
+	iso->left = (ep->hz+iso->left) * ep->pollival % 1000;
+	size *= ep->samplesz;
+	if(size > ep->maxpkt){
+		print("ohci: ep%d.%d: size > maxpkt\n",
+			ep->dev->nb, ep->nb);
+		print("size = %uld max = %ld\n", size, ep->maxpkt);
+		size = ep->maxpkt;
+	}
+	td->nbytes = size;
+	memset(bp->wp, 0, size);	/* in case we don't fill it on time */
+	td->cbp0 = td->cbp = ptr2pa(bp->rp) & ~0xFFF;
+	td->ctrl = TRUNC(iso->frno, Ntdframes);
+	td->offsets[0] = (ptr2pa(bp->rp) & 0xFFF);
+	td->offsets[0] |= (Tdnotacc << Tdiccshift);
+	/* in case the controller checks out the offests... */
+	for(i = 1; i < nelem(td->offsets); i++)
+		td->offsets[i] = td->offsets[0];
+	td->be = ptr2pa(bp->rp + size - 1);
+	td->ctrl |= (0 << Tdfcshift);	/* frame count is 1 */
+
+	iso->frno = TRUNC(iso->frno + ep->pollival, Ntdframes);
+}
+
+/*
+ * start I/O on the dummy td and setup a new dummy to fill up.
+ */
+static void
+isoadvance(Ep *ep, Isoio *iso, Td *td)
+{
+	Td *dtd;
+
+	dtd = iso->atds;
+	iso->atds = dtd->anext;
+	iso->navail--;
+	dtd->anext = nil;
+	dtd->bp->wp = dtd->bp->rp;
+	dtd->nexttd = 0;
+	td->nexttd = ptr2pa(dtd);
+	isodtdinit(ep, iso, dtd);
+	iso->ed->tail = ptr2pa(dtd);
+}
+
+static int
+isocanwrite(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	return iso->state == Qclose || iso->err != nil ||
+		iso->navail > iso->nframes / 2;
+}
+
+/*
+ * Service a completed/failed Td from the done queue.
+ * It may be of any transfer type.
+ * The queue is not in completion order.
+ * (It's actually in reverse completion order).
+ *
+ * When an error, a short packet, or a last Td is found
+ * we awake the process waiting for the transfer.
+ * Although later we will process other Tds completed
+ * before, epio won't be able to touch the current Td
+ * until interrupt returns and releases the lock on the
+ * controller.
+ */
+static void
+qhinterrupt(Ctlr *ctrl, Ep *ep, Qio *io, Td *td, int n)
+{
+	Block *bp;
+	int mode, err;
+	Ed *ed;
+
+	ed = io->ed;
+	if(io->state != Qrun)
+		return;
+	if(tdtok(td) == Tdtokin)
+		mode = OREAD;
+	else
+		mode = OWRITE;
+	bp = td->bp;
+	err = tderrs(td);
+
+	switch(err){
+	case Tddataovr:			/* Overrun is not an error */
+		break;
+	case Tdok:
+		/* virtualbox doesn't always report underflow on short packets */
+		if(td->cbp == 0)
+			break;
+		/* fall through */
+	case Tddataund:
+		/* short input packets are ok */
+		if(mode == OREAD){
+			if(td->cbp == 0)
+				panic("ohci: short packet but cbp == 0");
+			/*
+			 * td->cbp and td->cbp0 are the real addresses
+			 * corresponding to virtual addresses bp->wp and
+			 * bp->rp respectively.
+			 */
+			bp->wp = bp->rp + (td->cbp - td->cbp0);
+			if(bp->wp < bp->rp)
+				panic("ohci: wp < rp");
+			/*
+			 * It's ok. clear error and flag as last in xfer.
+			 * epio must ignore following Tds.
+			 */
+			td->last = 1;
+			td->ctrl &= ~(Tdccmask << Tdccshift);
+			break;
+		}
+		/* else fall; it's an error */
+	case Tdcrc:
+	case Tdbitstuff:
+	case Tdbadtog:
+	case Tdstalled:
+	case Tdtmout:
+	case Tdpidchk:
+	case Tdbadpid:
+		bp->wp = bp->rp;	/* no bytes in xfer. */
+		io->err = errmsg(err);
+		if(debug || ep->debug){
+			print("tdinterrupt: failed err %d (%s)\n", err, io->err);
+			dumptd(td, "failed", ed->ctrl & Ediso);
+		}
+		td->last = 1;
+		break;
+	default:
+		panic("ohci: td cc %ud unknown", err);
+	}
+
+	if(td->last != 0){
+		/*
+		 * clear td list and halt flag.
+		 */
+		ed->head = (ed->head & Edtoggle) | ed->tail;
+		ed->tds = pa2ptr(ed->tail);
+		io->state = Qdone;
+		wakeup(io);
+	}
+}
+
+/*
+ * BUG: Iso input streams are not implemented.
+ */
+static void
+isointerrupt(Ctlr *ctlr, Ep *ep, Qio *io, Td *td, int n)
+{
+	Isoio *iso;
+	Block *bp;
+	Ed *ed;
+	int err, isoerr;
+
+	iso = ep->aux;
+	ed = io->ed;
+	if(io->state == Qclose)
+		return;
+	bp = td->bp;
+	/*
+	 * When we get more than half the frames consecutive errors
+	 * we signal an actual error. Errors in the entire Td are
+	 * more serious and are always singaled.
+	 * Errors like overrun are not really errors. In fact, for
+	 * output, errors cannot be really detected. The driver will
+	 * hopefully notice I/O errors on input endpoints and detach the device.
+	 */
+	err = tderrs(td);
+	isoerr = (td->offsets[0] >> Tdiccshift) & Tdiccmask;
+	if(isoerr == Tdok || isoerr == Tdnotacc)
+		iso->nerrs = 0;
+	else if(iso->nerrs++ > iso->nframes/2)
+		err = Tdstalled;
+	if(err != Tdok && err != Tddataovr){
+		bp->wp = bp->rp;
+		io->err = errmsg(err);
+		if(debug || ep->debug){
+			print("ohci: isointerrupt: ep%d.%d: err %d (%s) frnum 0x%lux\n",
+				ep->dev->nb, ep->nb,
+				err, errmsg(err), ctlr->ohci->fmnumber);
+			dumptd(td, "failed", ed->ctrl & Ediso);
+		}
+	}
+	td->bp->wp = td->bp->rp;
+	td->nbytes = 0;
+	td->anext = iso->atds;
+	iso->atds = td;
+	iso->navail++;
+	/*
+	 * If almost all Tds are avail the user is not doing I/O at the
+	 * required rate. We put another Td in place to keep the polling rate.
+	 */
+	if(iso->err == nil && iso->navail > iso->nframes - 10)
+		isoadvance(ep, iso, pa2ptr(iso->ed->tail));
+	/*
+	 * If there's enough buffering futher I/O can be done.
+	 */
+	if(isocanwrite(iso))
+		wakeup(iso);
+}
+
+static void
+interrupt(Ureg *ureg, void *arg)
+{
+	Td *td, *ntd;
+	Hci *hp;
+	Ctlr *ctlr;
+	uint32_t status, curred;
+	int i, frno;
+
+	hp = arg;
+	ctlr = hp->aux;
+	ilock(ctlr);
+	ctlr->ohci->intrdisable = Mie;
+	coherence();
+	status = ctlr->ohci->intrsts & ctlr->ohci->intrenable;
+	status &= Oc|Rhsc|Fno|Ue|Rd|Sf|Wdh|So;
+	frno = TRUNC(ctlr->ohci->fmnumber, Ntdframes);
+	if(status & Wdh){
+		/* lsb of donehead has bit to flag other intrs.  */
+		td = pa2ptr(ctlr->hcca->donehead & ~0xF);
+
+		for(i = 0; td != nil && i < 1024; i++){
+			if(0)ddprint("ohci tdinterrupt: td %#p\n", td);
+			ntd = pa2ptr(td->nexttd & ~0xF);
+			td->nexttd = 0;
+			if(td->ep == nil || td->io == nil)
+				panic("ohci: interrupt: ep %#p io %#p",
+					td->ep, td->io);
+			ohciinterrupts[td->ep->ttype]++;
+			if(td->ep->ttype == Tiso)
+				isointerrupt(ctlr, td->ep, td->io, td, frno);
+			else
+				qhinterrupt(ctlr, td->ep, td->io, td, frno);
+			td = ntd;
+		}
+		if(i >= 1024)
+			print("ohci: bug: more than 1024 done Tds?\n");
+		ctlr->hcca->donehead = 0;
+	}
+
+	ctlr->ohci->intrsts = status;
+	status &= ~Wdh;
+	status &= ~Sf;
+	if(status & So){
+		print("ohci: sched overrun: too much load\n");
+		ctlr->overrun++;
+		status &= ~So;
+	}
+	if((status & Ue) != 0){
+		curred = ctlr->ohci->periodcurred;
+		print("ohci: unrecoverable error frame 0x%.8lux ed 0x%.8lux, "
+			"ints %d %d %d %d\n",
+			ctlr->ohci->fmnumber, curred,
+			ohciinterrupts[Tctl], ohciinterrupts[Tintr],
+			ohciinterrupts[Tbulk], ohciinterrupts[Tiso]);
+		if(curred != 0)
+			dumped(pa2ptr(curred));
+		status &= ~Ue;
+	}
+	if(status != 0)
+		print("ohci interrupt: unhandled sts 0x%.8lux\n", status);
+	ctlr->ohci->intrenable = Mie | Wdh | Ue;
+	iunlock(ctlr);
+}
+
+/*
+ * The old dummy Td is used to implement the new Td.
+ * A new dummy is linked at the end of the old one and
+ * returned, to link further Tds if needed.
+ */
+static Td*
+epgettd(Ep *ep, Qio *io, Td **dtdp, int flags, void *a, int count)
+{
+	Td *td, *dtd;
+	Block *bp;
+
+	if(count <= BIGPGSZ)
+		bp = allocb(count);
+	else{
+		if(count > 2*BIGPGSZ)
+			panic("ohci: transfer > two pages");
+		/* maximum of one physical page crossing allowed */
+		bp = allocb(count+BIGPGSZ);
+		bp->rp = (unsigned char*)BIGPGROUND((uintptr)bp->rp);
+		bp->wp = bp->rp;
+	}
+	dtd = *dtdp;
+	td = dtd;
+	td->bp = bp;
+	if(count > 0){
+		td->cbp0 = td->cbp = ptr2pa(bp->wp);
+		td->be = ptr2pa(bp->wp + count - 1);
+		if(a != nil){
+			/* validaddr((uintptr)a, count, 0); DEBUG */
+			memmove(bp->wp, a, count);
+		}
+		bp->wp += count;
+	}
+	td->nbytes = count;
+	td->ctrl = io->tok|Tdusetog|io->toggle|flags;
+	if(io->toggle == Tddata0)
+		io->toggle = Tddata1;
+	else
+		io->toggle = Tddata0;
+	assert(td->ep == ep);
+	td->io = io;
+ 	dtd = tdalloc();	/* new dummy */
+	dtd->ep = ep;
+	td->nexttd = ptr2pa(dtd);
+	td->next = dtd;
+	*dtdp = dtd;
+	return td;
+}
+
+/*
+ * Try to get them idle
+ */
+static void
+aborttds(Qio *io)
+{
+	Ed *ed;
+	Td *td;
+
+	ed = io->ed;
+	if(ed == nil)
+		return;
+	ed->ctrl |= Edskip;
+	for(td = ed->tds; td != nil; td = td->next)
+		if(td->bp != nil)
+			td->bp->wp = td->bp->rp;
+	ed->head = (ed->head&0xF) | ed->tail;
+	if((ed->ctrl & Ediso) == 0)
+		ed->tds = pa2ptr(ed->tail);
+}
+
+static int
+epiodone(void *a)
+{
+	Qio *io;
+
+	io = a;
+	return io->state != Qrun;
+}
+
+static void
+epiowait(Ctlr *ctlr, Qio *io, int tmout, uint32_t n)
+{
+	Mach *m = machp();
+	Ed *ed;
+	int timedout;
+
+	ed = io->ed;
+	if(0)ddqprint("ohci io %#p sleep on ed %#p state %s\n",
+		io, ed, iosname[io->state]);
+	timedout = 0;
+	if(waserror()){
+		dqprint("ohci io %#p ed %#p timed out\n", io, ed);
+		timedout++;
+	}else{
+		if(tmout == 0)
+			sleep(io, epiodone, io);
+		else
+			tsleep(io, epiodone, io, tmout);
+		poperror();
+	}
+	ilock(ctlr);
+	if(io->state == Qrun)
+		timedout = 1;
+	else if(io->state != Qdone && io->state != Qclose)
+		panic("epio: ed not done and not closed");
+	if(timedout){
+		aborttds(io);
+		io->err = "request timed out";
+		iunlock(ctlr);
+		if(!waserror()){
+			tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+			poperror();
+		}
+		ilock(ctlr);
+	}
+	if(io->state != Qclose)
+		io->state = Qidle;
+	iunlock(ctlr);
+}
+
+/*
+ * Non iso I/O.
+ * To make it work for control transfers, the caller may
+ * lock the Qio for the entire control transfer.
+ */
+static int32_t
+epio(Ep *ep, Qio *io, void *a, int32_t count, int mustlock)
+{
+	Mach *m = machp();
+	Ed *ed;
+	Ctlr *ctlr;
+	char buf[80];
+	char *err;
+	unsigned char *c;
+	Td *td, *ltd, *ntd, *td0;
+	int last, ntds, tmout;
+	int32_t tot, n;
+	uint32_t load;
+
+	ed = io->ed;
+	ctlr = ep->hp->aux;
+	io->debug = ep->debug;
+	tmout = ep->tmout;
+	ddeprint("ohci: %s ep%d.%d io %#p count %ld\n",
+		io->tok == Tdtokin ? "in" : "out",
+		ep->dev->nb, ep->nb, io, count);
+	if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){
+		seprintdata(buf, buf+sizeof(buf), a, count);
+		print("\t%s\n", buf);
+	}
+	if(mustlock){
+		qlock(io);
+		if(waserror()){
+			qunlock(io);
+			nexterror();
+		}
+	}
+	io->err = nil;
+	ilock(ctlr);
+	if(io->state == Qclose){	/* Tds released by cancelio */
+		iunlock(ctlr);
+		error(io->err ? io->err : Eio);
+	}
+	if(io->state != Qidle)
+		panic("epio: qio not idle");
+	io->state = Qinstall;
+
+	c = a;
+	ltd = td0 = ed->tds;
+	load = tot = 0;
+	do{
+		n = 2*BIGPGSZ;
+		if(count-tot < n)
+			n = count-tot;
+		if(c != nil && io->tok != Tdtokin)
+			td = epgettd(ep, io, &ltd, 0, c+tot, n);
+		else
+			td = epgettd(ep, io, &ltd, 0, nil, n);
+		tot += n;
+		load += ep->load;
+	}while(tot < count);
+	if(td0 == nil || ltd == nil || td0 == ltd)
+		panic("epio: no td");
+	td->last = 1;
+	if(debug > 2 || ep->debug > 2)
+		dumptds(td0, "put td", ep->ttype == Tiso);
+	iunlock(ctlr);
+
+	ilock(ctlr);
+	if(io->state != Qclose){
+		io->iotime = TK2MS(m->ticks);
+		io->state = Qrun;
+		ed->tail = ptr2pa(ltd);
+		if(ep->ttype == Tctl)
+			ctlr->ohci->cmdsts |= Sclf;
+		else if(ep->ttype == Tbulk)
+			ctlr->ohci->cmdsts |= Sblf;
+	}
+	iunlock(ctlr);
+
+	epiowait(ctlr, io, tmout, load);
+	ilock(ctlr);
+	if(debug > 1 || ep->debug > 1)
+		dumptds(td0, "got td", 0);
+	iunlock(ctlr);
+
+	tot = 0;
+	c = a;
+	ntds = last = 0;
+	for(td = td0; td != ltd; td = ntd){
+		ntds++;
+		/*
+		 * If the Td is flagged as last we must
+		 * ignore any following Td. The block may
+		 * seem to have bytes but interrupt has not seen
+		 * those Tds through the done queue, and they are void.
+		 */
+		if(last == 0 && tderrs(td) == Tdok){
+			n = BLEN(td->bp);
+			tot += n;
+			if(c != nil && tdtok(td) == Tdtokin && n > 0){
+				memmove(c, td->bp->rp, n);
+				c += n;
+			}
+		}
+		last |= td->last;
+		ntd = td->next;
+		tdfree(td);
+	}
+	if(edtoggle(ed) == 0)
+		io->toggle = Tddata0;
+	else
+		io->toggle = Tddata1;
+
+	err = io->err;
+	if(mustlock){
+		qunlock(io);
+		poperror();
+	}
+	ddeprint("ohci: io %#p: %d tds: return %ld err '%s'\n\n",
+		io, ntds, tot, err);
+	if(err != nil)
+		error(err);
+	if(tot < 0)
+		error(Eio);
+	return tot;
+}
+
+/*
+ * halt condition was cleared on the endpoint. update our toggles.
+ */
+static void
+clrhalt(Ep *ep)
+{
+	Qio *io;
+
+	ep->clrhalt = 0;
+	switch(ep->ttype){
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OREAD){
+			qlock(&io[OWRITE]);
+			io[OWRITE].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OWRITE);
+			qunlock(&io[OWRITE]);
+		}
+		if(ep->mode != OWRITE){
+			qlock(&io[OREAD]);
+			io[OREAD].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OREAD);
+			qunlock(&io[OREAD]);
+		}
+		break;
+	}
+}
+
+static int32_t
+epread(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlio *cio;
+	Qio *io;
+	char buf[80];
+	uint32_t delta;
+
+	if(ep->aux == nil)
+		panic("epread: not open");
+
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		qlock(cio);
+		if(waserror()){
+			qunlock(cio);
+			nexterror();
+		}
+		ddeprint("epread ctl ndata %d\n", cio->ndata);
+		if(cio->ndata < 0)
+			error("request expected");
+		else if(cio->ndata == 0){
+			cio->ndata = -1;
+			count = 0;
+		}else{
+			if(count > cio->ndata)
+				count = cio->ndata;
+			if(count > 0)
+				memmove(a, cio->data, count);
+			/* BUG for big transfers */
+			free(cio->data);
+			cio->data = nil;
+			cio->ndata = 0;	/* signal EOF next time */
+		}
+		qunlock(cio);
+		poperror();
+		if(debug>1 || ep->debug){
+			seprintdata(buf, buf+sizeof(buf), a, count);
+			print("epread: %s\n", buf);
+		}
+		return count;
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OREAD].iotime + 1;
+		if(delta < ep->pollival / 2)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival/2 - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tiso:
+		panic("ohci: iso read not implemented");
+		break;
+	default:
+		panic("epread: bad ep ttype %d", ep->ttype);
+	}
+	return -1;
+}
+
+/*
+ * Control transfers are one setup write (data0)
+ * plus zero or more reads/writes (data1, data0, ...)
+ * plus a final write/read with data1 to ack.
+ * For both host to device and device to host we perform
+ * the entire transfer when the user writes the request,
+ * and keep any data read from the device for a later read.
+ * We call epio three times instead of placing all Tds at
+ * the same time because doing so leads to crc/tmout errors
+ * for some devices.
+ * Upon errors on the data phase we must still run the status
+ * phase or the device may cease responding in the future.
+ */
+static int32_t
+epctlio(Ep *ep, Ctlio *cio, void *a, int32_t count)
+{
+	Mach *m = machp();
+	unsigned char *c;
+	int32_t len;
+
+	ddeprint("epctlio: cio %#p ep%d.%d count %ld\n",
+		cio, ep->dev->nb, ep->nb, count);
+	if(count < Rsetuplen)
+		error("short usb command");
+	qlock(cio);
+	free(cio->data);
+	cio->data = nil;
+	cio->ndata = 0;
+	if(waserror()){
+		qunlock(cio);
+		free(cio->data);
+		cio->data = nil;
+		cio->ndata = 0;
+		nexterror();
+	}
+
+	/* set the address if unset and out of configuration state */
+	if(ep->dev->state != Dconfig && ep->dev->state != Dreset)
+		if(cio->usbid == 0){
+			cio->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax);
+			edsetaddr(cio->ed, cio->usbid);
+		}
+	/* adjust maxpkt if the user has learned a different one */
+	if(edmaxpkt(cio->ed) != ep->maxpkt)
+		edsetmaxpkt(cio->ed, ep->maxpkt);
+	c = a;
+	cio->tok = Tdtoksetup;
+	cio->toggle = Tddata0;
+	if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen)
+		error(Eio);
+
+	a = c + Rsetuplen;
+	count -= Rsetuplen;
+
+	cio->toggle = Tddata1;
+	if(c[Rtype] & Rd2h){
+		cio->tok = Tdtokin;
+		len = GET2(c+Rcount);
+		if(len <= 0)
+			error("bad length in d2h request");
+		if(len > Maxctllen)
+			error("d2h data too large to fit in ohci");
+		a = cio->data = smalloc(len+1);
+	}else{
+		cio->tok = Tdtokout;
+		len = count;
+	}
+	if(len > 0)
+		if(waserror())
+			len = -1;
+		else{
+			len = epio(ep, cio, a, len, 0);
+			poperror();
+		}
+	if(c[Rtype] & Rd2h){
+		count = Rsetuplen;
+		cio->ndata = len;
+		cio->tok = Tdtokout;
+	}else{
+		if(len < 0)
+			count = -1;
+		else
+			count = Rsetuplen + len;
+		cio->tok = Tdtokin;
+	}
+	cio->toggle = Tddata1;
+	epio(ep, cio, nil, 0, 0);
+	qunlock(cio);
+	poperror();
+	ddeprint("epctlio cio %#p return %ld\n", cio, count);
+	return count;
+}
+
+/*
+ * Put new samples in the dummy Td.
+ * BUG: This does only a transfer per Td. We could do up to 8.
+ */
+static int32_t
+putsamples(Ctlr *ctlr, Ep *ep, Isoio *iso, unsigned char *b, int32_t count)
+{
+	Td *td;
+	uint32_t n;
+
+	td = pa2ptr(iso->ed->tail);
+	n = count;
+	if(n > td->nbytes - BLEN(td->bp))
+		n = td->nbytes - BLEN(td->bp);
+	assert(td->bp->wp + n <= td->bp->lim);
+	memmove(td->bp->wp, b, n);
+	td->bp->wp += n;
+	if(BLEN(td->bp) == td->nbytes){	/* full Td: activate it */
+		ilock(ctlr);
+		isoadvance(ep, iso, td);
+		iunlock(ctlr);
+	}
+	return n;
+}
+
+static int32_t
+episowrite(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	int32_t tot, nw;
+	char *err;
+	unsigned char *b;
+	Ctlr *ctlr;
+	Isoio *iso;
+
+	ctlr = ep->hp->aux;
+	iso = ep->aux;
+	iso->debug = ep->debug;
+
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	diprint("ohci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	b = a;
+	for(tot = 0; tot < count; tot += nw){
+		while(isocanwrite(iso) == 0){
+			iunlock(ctlr);
+			diprint("ohci: episowrite: %#p sleep\n", iso);
+			if(waserror()){
+				if(iso->err == nil)
+					iso->err = "I/O timed out";
+				ilock(ctlr);
+				break;
+			}
+			tsleep(iso, isocanwrite, iso, ep->tmout);
+			poperror();
+			ilock(ctlr);
+		}
+		err = iso->err;
+		iso->err = nil;
+		if(iso->state == Qclose || err != nil){
+			iunlock(ctlr);
+			error(err ? err : Eio);
+		}
+		if(iso->state != Qrun)
+			panic("episowrite: iso not running");
+		iunlock(ctlr);		/* We could page fault here */
+		nw = putsamples(ctlr, ep, iso, b+tot, count-tot);
+		ilock(ctlr);
+	}
+	if(iso->state != Qclose)
+		iso->state = Qdone;
+	iunlock(ctlr);
+	err = iso->err;		/* in case it failed early */
+	iso->err = nil;
+	qunlock(iso);
+	poperror();
+	if(err != nil)
+		error(err);
+	diprint("ohci: episowrite: %#p %ld bytes\n", iso, tot);
+	return tot;
+}
+
+static int32_t
+epwrite(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Qio *io;
+	Ctlio *cio;
+	uint32_t delta;
+	unsigned char *b;
+	int32_t tot, nw;
+
+	if(ep->aux == nil)
+		panic("ohci: epwrite: not open");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		return epctlio(ep, cio, a, count);
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		/*
+		 * Put at most Tdatomic Tds (512 bytes) at a time.
+		 * Otherwise some devices produce babble errors.
+		 */
+		b = a;
+		assert(a != nil);
+		for(tot = 0; tot < count ; tot += nw){
+			nw = count - tot;
+			if(nw > Tdatomic * ep->maxpkt)
+				nw = Tdatomic * ep->maxpkt;
+			nw = epio(ep, &io[OWRITE], b+tot, nw, 1);
+		}
+		return tot;
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OWRITE].iotime + 1;
+		if(delta < ep->pollival)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, 1);
+	case Tiso:
+		return episowrite(ep, a, count);
+	default:
+		panic("ohci: epwrite: bad ep ttype %d", ep->ttype);
+	}
+	return -1;
+}
+
+static Ed*
+newed(Ctlr *ctlr, Ep *ep, Qio *io, char *c)
+{
+	Mach *m = machp();
+	Ed *ed;
+	Td *td;
+
+	ed = io->ed = edalloc();	/* no errors raised here, really */
+	td = tdalloc();
+	td->ep = ep;
+	td->io = io;
+	ed->tail =  ptr2pa(td);
+	ed->head = ptr2pa(td);
+	ed->tds = td;
+	ed->ep = ep;
+	ed->ctrl = (ep->maxpkt & Edmpsmask) << Edmpsshift;
+	if(ep->ttype == Tiso)
+		ed->ctrl |= Ediso;
+	if(waserror()){
+		edfree(ed);
+		io->ed = nil;
+		nexterror();
+	}
+	/* For setup endpoints we start with the config address */
+	if(ep->ttype != Tctl)
+		edsetaddr(io->ed, io->usbid);
+	if(ep->dev->speed == Lowspeed)
+		ed->ctrl |= Edlow;
+	switch(io->tok){
+	case Tdtokin:
+		ed->ctrl |= Edin;
+		break;
+	case Tdtokout:
+		ed->ctrl |= Edout;
+		break;
+	default:
+		ed->ctrl |= Edtddir;	/* Td will say */
+		break;
+	}
+
+	switch(ep->ttype){
+	case Tctl:
+		ilock(ctlr);
+		edlinked(ed, ctlhd(ctlr));
+		setctlhd(ctlr, ed);
+		iunlock(ctlr);
+		break;
+	case Tbulk:
+		ilock(ctlr);
+		edlinked(ed, bulkhd(ctlr));
+		setbulkhd(ctlr, ed);
+		iunlock(ctlr);
+		break;
+	case Tintr:
+	case Tiso:
+		ilock(ctlr);
+		schedq(ctlr, io, ep->pollival);
+		iunlock(ctlr);
+		break;
+	default:
+		panic("ohci: newed: bad ttype");
+	}
+	poperror();
+	return ed;
+}
+
+static void
+isoopen(Ctlr *ctlr, Ep *ep)
+{
+	Td *td, *edtds;
+	Isoio *iso;
+	int i;
+
+	iso = ep->aux;
+	iso->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax);
+	iso->bw = ep->hz * ep->samplesz;	/* bytes/sec */
+	if(ep->mode != OWRITE){
+		print("ohci: bug: iso input streams not implemented\n");
+		error("ohci iso input streams not implemented");
+	}else
+		iso->tok = Tdtokout;
+
+	iso->left = 0;
+	iso->nerrs = 0;
+	iso->frno = TRUNC(ctlr->ohci->fmnumber + 10, Ntdframes);
+	iso->nframes = 1000 / ep->pollival;
+	if(iso->nframes < 10){
+		print("ohci: isoopen: less than 10 frames; using 10.\n");
+		iso->nframes = 10;
+	}
+	iso->navail = iso->nframes;
+	iso->atds = edtds = nil;
+	for(i = 0; i < iso->nframes-1; i++){	/* -1 for dummy */
+		td = tdalloc();
+		td->ep = ep;
+		td->io = iso;
+		td->bp = allocb(ep->maxpkt);
+		td->anext = iso->atds;		/* link as avail */
+		iso->atds = td;
+		td->next = edtds;
+		edtds = td;
+	}
+	newed(ctlr, ep, iso, "iso");		/* allocates a dummy td */
+	iso->ed->tds->bp = allocb(ep->maxpkt);	/* but not its block */
+	iso->ed->tds->next = edtds;
+	isodtdinit(ep, iso, iso->ed->tds);
+}
+
+/*
+ * Allocate the endpoint and set it up for I/O
+ * in the controller. This must follow what's said
+ * in Ep regarding configuration, including perhaps
+ * the saved toggles (saved on a previous close of
+ * the endpoint data file by epclose).
+ */
+static void
+epopen(Ep *ep)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	Qio *io;
+	Ctlio *cio;
+	uint32_t usbid;
+
+	ctlr = ep->hp->aux;
+	deprint("ohci: epopen ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux != nil)
+		panic("ohci: epopen called with open ep");
+	if(waserror()){
+		free(ep->aux);
+		ep->aux = nil;
+		nexterror();
+	}
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tiso:
+		ep->aux = smalloc(sizeof(Isoio));
+		isoopen(ctlr, ep);
+		break;
+	case Tctl:
+		cio = ep->aux = smalloc(sizeof(Ctlio));
+		cio->debug = ep->debug;
+		cio->ndata = -1;
+		cio->data = nil;
+		cio->tok = -1;	/* invalid; Tds will say */
+		if(ep->dev->isroot != 0 && ep->nb == 0)	/* root hub */
+			break;
+		newed(ctlr, ep, cio, "epc");
+		break;
+	case Tbulk:
+		ep->pollival = 1;	/* assume this; doesn't really matter */
+		/* and fall... */
+	case Tintr:
+		io = ep->aux = smalloc(sizeof(Qio)*2);
+		io[OREAD].debug = io[OWRITE].debug = ep->debug;
+		usbid = (ep->nb<<7)|(ep->dev->nb & Devmax);
+		if(ep->mode != OREAD){
+			if(ep->toggle[OWRITE] != 0)
+				io[OWRITE].toggle = Tddata1;
+			else
+				io[OWRITE].toggle = Tddata0;
+			io[OWRITE].tok = Tdtokout;
+			io[OWRITE].usbid = usbid;
+			io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			newed(ctlr, ep, io+OWRITE, "epw");
+		}
+		if(ep->mode != OWRITE){
+			if(ep->toggle[OREAD] != 0)
+				io[OREAD].toggle = Tddata1;
+			else
+				io[OREAD].toggle = Tddata0;
+			io[OREAD].tok = Tdtokin;
+			io[OREAD].usbid = usbid;
+			io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			newed(ctlr, ep, io+OREAD, "epr");
+		}
+		break;
+	}
+	deprint("ohci: epopen done:\n");
+	if(debug || ep->debug)
+		dump(ep->hp);
+	poperror();
+}
+
+static void
+cancelio(Ep *ep, Qio *io)
+{
+	Mach *m = machp();
+	Ed *ed;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+
+	ilock(ctlr);
+	if(io == nil || io->state == Qclose){
+		assert(io == nil || io->ed == nil);
+		iunlock(ctlr);
+		return;
+	}
+	ed = io->ed;
+	io->state = Qclose;
+	io->err = Eio;
+	aborttds(io);
+	iunlock(ctlr);
+	if(!waserror()){
+		tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+		poperror();
+	}
+
+	wakeup(io);
+	qlock(io);
+	/* wait for epio if running */
+	qunlock(io);
+
+	ilock(ctlr);
+	switch(ep->ttype){
+	case Tctl:
+		unlinkctl(ctlr, ed);
+		break;
+	case Tbulk:
+		unlinkbulk(ctlr, ed);
+		break;
+	case Tintr:
+	case Tiso:
+		unschedq(ctlr, io);
+		break;
+	default:
+		panic("ohci cancelio: bad ttype");
+	}
+	iunlock(ctlr);
+	edfree(io->ed);
+	io->ed = nil;
+}
+
+static void
+epclose(Ep *ep)
+{
+	Ctlio *cio;
+	Isoio *iso;
+	Qio *io;
+
+	deprint("ohci: epclose ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux == nil)
+		panic("ohci: epclose called with closed ep");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		cancelio(ep, cio);
+		free(cio->data);
+		cio->data = nil;
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OWRITE){
+			cancelio(ep, &io[OREAD]);
+			if(io[OREAD].toggle == Tddata1)
+				ep->toggle[OREAD] = 1;
+		}
+		if(ep->mode != OREAD){
+			cancelio(ep, &io[OWRITE]);
+			if(io[OWRITE].toggle == Tddata1)
+				ep->toggle[OWRITE] = 1;
+		}
+		break;
+	case Tiso:
+		iso = ep->aux;
+		cancelio(ep, iso);
+		break;
+	default:
+		panic("epclose: bad ttype %d", ep->ttype);
+	}
+
+	deprint("ohci: epclose ep%d.%d: done\n", ep->dev->nb, ep->nb);
+	free(ep->aux);
+	ep->aux = nil;
+}
+
+static int
+portreset(Hci *hp, int port, int on)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	Ohci *ohci;
+
+	if(on == 0)
+		return 0;
+
+	ctlr = hp->aux;
+	qlock(&ctlr->resetl);
+	if(waserror()){
+		qunlock(&ctlr->resetl);
+		nexterror();
+	}
+	ilock(ctlr);
+	ohci = ctlr->ohci;
+	ohci->rhportsts[port - 1] = Spp;
+	if((ohci->rhportsts[port - 1] & Ccs) == 0){
+		iunlock(ctlr);
+		error("port not connected");
+	}
+	ohci->rhportsts[port - 1] = Spr;
+	while((ohci->rhportsts[port - 1] & Prsc) == 0){
+		iunlock(ctlr);
+		dprint("ohci: portreset, wait for reset complete\n");
+		ilock(ctlr);
+	}
+	ohci->rhportsts[port - 1] = Prsc;
+	iunlock(ctlr);
+	poperror();
+	qunlock(&ctlr->resetl);
+	return 0;
+}
+
+static int
+portenable(Hci *hp, int port, int on)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	dprint("ohci: %#p port %d enable=%d\n", ctlr->ohci, port, on);
+	qlock(&ctlr->resetl);
+	if(waserror()){
+		qunlock(&ctlr->resetl);
+		nexterror();
+	}
+	ilock(ctlr);
+	if(on)
+		ctlr->ohci->rhportsts[port - 1] = Spe | Spp;
+	else
+		ctlr->ohci->rhportsts[port - 1] = Cpe;
+	iunlock(ctlr);
+	tsleep(&m->externup->sleep, return0, 0, Enabledelay);
+	poperror();
+	qunlock(&ctlr->resetl);
+	return 0;
+}
+
+static int
+portstatus(Hci *hp, int port)
+{
+	int v;
+	Ctlr *ub;
+	uint32_t ohcistatus;
+
+	/*
+	 * We must return status bits as a
+	 * get port status hub request would do.
+	 */
+	ub = hp->aux;
+	ohcistatus = ub->ohci->rhportsts[port - 1];
+	v = 0;
+	if(ohcistatus & Ccs)
+		v |= HPpresent;
+	if(ohcistatus & Pes)
+		v |= HPenable;
+	if(ohcistatus & Pss)
+		v |= HPsuspend;
+	if(ohcistatus & Prs)
+		v |= HPreset;
+	else {
+		/* port is not in reset; these potential writes are ok */
+		if(ohcistatus & Csc){
+			v |= HPstatuschg;
+			ub->ohci->rhportsts[port - 1] = Csc;
+		}
+		if(ohcistatus & Pesc){
+			v |= HPchange;
+			ub->ohci->rhportsts[port - 1] = Pesc;
+		}
+	}
+	if(ohcistatus & Lsda)
+		v |= HPslow;
+	if(v & (HPstatuschg|HPchange))
+		ddprint("ohci port %d sts %#ulx hub sts %#x\n", port, ohcistatus, v);
+	return v;
+}
+
+static void
+dumpohci(Ctlr *ctlr)
+{
+	int i;
+	uint32_t *ohci;
+
+	ohci = &ctlr->ohci->revision;
+	print("ohci registers: \n");
+	for(i = 0; i < sizeof(Ohci)/sizeof(uint32_t); i++)
+		if(i < 3 || ohci[i] != 0)
+			print("\t[%#2.2x]\t%#8.8ulx\n", i * 4, ohci[i]);
+	print("\n");
+}
+
+static void
+init(Hci *hp)
+{
+	Ctlr *ctlr;
+	Ohci *ohci;
+	int i;
+	uint32_t ival, ctrl, fmi;
+
+	ctlr = hp->aux;
+	dprint("ohci %#p init\n", ctlr->ohci);
+	ohci = ctlr->ohci;
+
+	fmi =  ctlr->ohci->fminterval;
+	ctlr->ohci->cmdsts = Shcr;         /* reset the block */
+	while(ctlr->ohci->cmdsts & Shcr)
+		delay(1);  /* wait till reset complete, Ohci says 10us max. */
+	ctlr->ohci->fminterval = fmi;
+
+	/*
+	 * now that soft reset is done we are in suspend state.
+	 * Setup registers which take in suspend state
+	 * (will only be here for 2ms).
+	 */
+
+	ctlr->ohci->hcca = ptr2pa(ctlr->hcca);
+	setctlhd(ctlr, nil);
+	ctlr->ohci->ctlcurred = 0;
+	setbulkhd(ctlr, nil);
+	ctlr->ohci->bulkcurred = 0;
+
+	ohci->intrenable = Mie | Wdh | Ue;
+	ohci->control |= Ccle | Cble | Cple | Cie | Cfsoper;
+
+	/* set frame after operational */
+	ohci->rhdesca = Nps;	/* no power switching */
+	if(ohci->rhdesca & Nps){
+		dprint("ohci: ports are not power switched\n");
+	}else{
+		dprint("ohci: ports are power switched\n");
+		ohci->rhdesca &= ~Psm;
+		ohci->rhsts &= ~Lpsc;
+	}
+	for(i = 0; i < ctlr->nports; i++)	/* paranoia */
+		ohci->rhportsts[i] = 0;		/* this has no effect */
+	delay(50);
+
+	for(i = 0; i < ctlr->nports; i++){
+		ohci->rhportsts[i] =  Spp;
+		if((ohci->rhportsts[i] & Ccs) != 0)
+			ohci->rhportsts[i] |= Spr;
+	}
+	delay(100);
+
+	ctrl = ohci->control;
+	if((ctrl & Cfsmask) != Cfsoper){
+		ctrl = (ctrl & ~Cfsmask) | Cfsoper;
+		ohci->control = ctrl;
+		ohci->rhsts = Lpsc;
+	}
+	ival = ohci->fminterval & ~(Fmaxpktmask << Fmaxpktshift);
+	ohci->fminterval = ival | (5120 << Fmaxpktshift);
+
+	if(debug > 1)
+		dumpohci(ctlr);
+}
+
+static void
+scanpci(void)
+{
+	uint32_t mem;
+	Ctlr *ctlr;
+	Pcidev *p;
+	int i;
+	static int already = 0;
+
+	if(already)
+		return;
+	already = 1;
+	p = nil;
+	while(p = pcimatch(p, 0, 0)) {
+		/*
+		 * Find Ohci controllers (Programming Interface = 0x10).
+		 */
+		if(p->ccrb != Pcibcserial || p->ccru != Pciscusb ||
+		    p->ccrp != 0x10)
+			continue;
+		mem = p->mem[0].bar & ~0x0F;
+		dprint("ohci: %x/%x port 0x%lux size 0x%x irq %d\n",
+			p->vid, p->did, mem, p->mem[0].size, p->intl);
+		if(mem == 0){
+			print("ohci: failed to map registers\n");
+			continue;
+		}
+		if(p->intl == 0xFF || p->intl == 0) {
+			print("ohci: no irq assigned for port %#lux\n", mem);
+			continue;
+		}
+
+		ctlr = malloc(sizeof(Ctlr));
+		if (ctlr == nil)
+			panic("ohci: out of memory");
+		ctlr->pcidev = p;
+		ctlr->ohci = vmap(mem, p->mem[0].size);
+		dprint("scanpci: ctlr %#p, ohci %#p\n", ctlr, ctlr->ohci);
+		pcisetbme(p);
+		pcisetpms(p, 0);
+		for(i = 0; i < Nhcis; i++)
+			if(ctlrs[i] == nil){
+				ctlrs[i] = ctlr;
+				break;
+			}
+		if(i == Nhcis)
+			print("ohci: bug: no more controllers\n");
+	}
+}
+
+static void
+usbdebug(Hci *hci, int d)
+{
+	debug = d;
+}
+
+/*
+ * build the periodic scheduling tree:
+ * framesize must be a multiple of the tree size
+ */
+static void
+mkqhtree(Ctlr *ctlr)
+{
+	int i, n, d, o, leaf0, depth;
+	Ed **tree;
+	Qtree *qt;
+
+	depth = flog2(32);
+	n = (1 << (depth+1)) - 1;
+	qt = mallocz(sizeof(*qt), 1);
+	if(qt == nil)
+		panic("usb: can't allocate scheduling tree");
+	qt->nel = n;
+	qt->depth = depth;
+	qt->bw = mallocz(n * sizeof(qt->bw), 1);
+	qt->root = tree = mallocz(n * sizeof(Ed *), 1);
+	if(qt->bw == nil || qt->root == nil)
+		panic("usb: can't allocate scheduling tree");
+	for(i = 0; i < n; i++){
+		if((tree[i] = edalloc()) == nil)
+			panic("mkqhtree");
+		tree[i]->ctrl = (8 << Edmpsshift);	/* not needed */
+		tree[i]->ctrl |= Edskip;
+
+		if(i > 0)
+			edlinked(tree[i], tree[(i-1)/2]);
+		else
+			edlinked(tree[i], nil);
+	}
+	ctlr->ntree = i;
+	dprint("ohci: tree: %d endpoints allocated\n", i);
+
+	/* distribute leaves evenly round the frame list */
+	leaf0 = n / 2;
+	for(i = 0; i < 32; i++){
+		o = 0;
+		for(d = 0; d < depth; d++){
+			o <<= 1;
+			if(i & (1 << d))
+				o |= 1;
+		}
+		if(leaf0 + o >= n){
+			print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n);
+			break;
+		}
+		ctlr->hcca->intrtable[i] = ptr2pa(tree[leaf0 + o]);
+	}
+	ctlr->tree = qt;
+}
+
+static void
+ohcimeminit(Ctlr *ctlr)
+{
+	Hcca *hcca;
+
+	edfree(edalloc());	/* allocate pools now */
+	tdfree(tdalloc());
+
+	hcca = xspanalloc(sizeof(Hcca), 256, 0);
+	if(hcca == nil)
+		panic("usbhreset: no memory for Hcca");
+	memset(hcca, 0, sizeof(*hcca));
+	ctlr->hcca = hcca;
+
+	mkqhtree(ctlr);
+}
+
+static void
+ohcireset(Ctlr *ctlr)
+{
+	ilock(ctlr);
+	dprint("ohci %#p reset\n", ctlr->ohci);
+
+	/*
+	 * usually enter here in reset, wait till its through,
+	 * then do our own so we are on known timing conditions.
+	 * Is this needed?
+	 */
+	delay(100);
+	ctlr->ohci->control = 0;
+	delay(100);
+
+	/* legacy support register: turn off lunacy mode */
+	pcicfgw16(ctlr->pcidev, 0xc0, 0x2000);
+
+	iunlock(ctlr);
+}
+
+static void
+shutdown(Hci *hp)
+{
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+
+	ilock(ctlr);
+	ctlr->ohci->intrdisable = Mie;
+	ctlr->ohci->control = 0;
+	coherence();
+	delay(100);
+	iunlock(ctlr);
+}
+
+static int
+reset(Hci *hp)
+{
+	int i;
+	Ctlr *ctlr;
+	Pcidev *p;
+	static Lock resetlck;
+
+	/*
+	if(getconf("*nousbohci"))
+		return -1;
+	*/
+	ilock(&resetlck);
+	scanpci();
+
+	/*
+	 * Any adapter matches if no hp->port is supplied,
+	 * otherwise the ports must match.
+	 */
+	ctlr = nil;
+	for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){
+		ctlr = ctlrs[i];
+		if(ctlr->active == 0)
+		if(hp->port == 0 || hp->port == (uintptr)ctlr->ohci){
+			ctlr->active = 1;
+			break;
+		}
+	}
+	iunlock(&resetlck);
+	if(ctlrs[i] == nil || i == Nhcis)
+		return -1;
+	if(ctlr->ohci->control == ~0)
+		return -1;
+
+
+	p = ctlr->pcidev;
+	hp->aux = ctlr;
+	hp->port = (uintptr)ctlr->ohci;
+	hp->irq = p->intl;
+	hp->tbdf = p->tbdf;
+	ctlr->nports = hp->nports = ctlr->ohci->rhdesca & 0xff;
+
+	ohcireset(ctlr);
+	ohcimeminit(ctlr);
+
+	/*
+	 * Linkage to the generic HCI driver.
+	 */
+	hp->init = init;
+	hp->dump = dump;
+	hp->interrupt = interrupt;
+	hp->epopen = epopen;
+	hp->epclose = epclose;
+	hp->epread = epread;
+	hp->epwrite = epwrite;
+	hp->seprintep = seprintep;
+	hp->portenable = portenable;
+	hp->portreset = portreset;
+	hp->portstatus = portstatus;
+	hp->shutdown = shutdown;
+	hp->debug = usbdebug;
+	hp->type = "ohci";
+	return 0;
+}
+
+void
+usbohcilink(void)
+{
+	addhcitype("ohci", reset);
+}

+ 2340 - 0
sys/src/9/k10/usbuhci.c

@@ -0,0 +1,2340 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * USB Universal Host Controller Interface (sic) driver.
+ *
+ * BUGS:
+ * - Too many delays and ilocks.
+ * - bandwidth admission control must be done per-frame.
+ * - interrupt endpoints should go on a tree like [oe]hci.
+ * - must warn of power overruns.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"../port/usb.h"
+
+typedef struct Ctlio Ctlio;
+typedef struct Ctlr Ctlr;
+typedef struct Isoio Isoio;
+typedef struct Qh Qh;
+typedef struct Qhpool Qhpool;
+typedef struct Qio Qio;
+typedef struct Td Td;
+typedef struct Tdpool Tdpool;
+
+enum
+{
+	Resetdelay	= 100,		/* delay after a controller reset (ms) */
+	Enabledelay	= 100,		/* waiting for a port to enable */
+	Abortdelay	= 5,		/* delay after cancelling Tds (ms) */
+	Incr		= 64,		/* for Td and Qh pools */
+
+	Tdatomic	= 8,		/* max nb. of Tds per bulk I/O op. */
+
+	/* Queue states (software) */
+	Qidle		= 0,
+	Qinstall,
+	Qrun,
+	Qdone,
+	Qclose,
+	Qfree,
+
+	/*
+	 * HW constants
+	 */
+
+	Nframes		= 1024,		/* 2ⁿ for xspanalloc; max 1024 */
+	Align		= 16,		/* for data structures */
+
+	/* Size of small buffer kept within Tds. (software) */
+	/* Keep as a multiple of Align to maintain alignment of Tds in pool */
+	Tdndata		= 1*Align,
+
+	/* i/o space
+	 * Some ports are short, some are int32_t, some are byte.
+	 * We use ins[bsl] and not vmap.
+	 */
+	Cmd		= 0,
+		Crun		= 0x01,
+		Chcreset	= 0x02,	/* host controller reset */
+		Cgreset		= 0x04,	/* global reset */
+		Cegsm		= 0x08,	/* enter global suspend */
+		Cfgr		= 0x10,	/* forge global resume */
+		Cdbg		= 0x20,	/* single step, debug */
+		Cmaxp		= 0x80,	/* max packet */
+
+	Status		= 2,
+		Susbintr		= 0x01,	/* interrupt */
+		Seintr		= 0x02, /* error interrupt */
+		Sresume		= 0x04, /* resume detect */
+		Shserr		= 0x08, /* host system error */
+		Shcerr		= 0x10, /* host controller error */
+		Shalted		= 0x20, /* controller halted */
+		Sall		= 0x3F,
+
+	Usbintr 		= 4,
+		Itmout		= 0x01, /* timeout or crc */
+		Iresume		= 0x02, /* resume interrupt enable */
+		Ioc		= 0x04, /* interrupt on complete */
+		Ishort		= 0x08, /* short packet interrupt */
+		Iall		= 0x0F,
+	Frnum		= 6,
+	Flbaseadd 	= 8,
+	SOFmod		= 0xC,		/* start of frame modifier register */
+
+	Portsc0		= 0x10,
+		PSpresent	= 0x0001,	/* device present */
+		PSstatuschg	= 0x0002,	/* PSpresent changed */
+		PSenable	= 0x0004,	/* device enabled */
+		PSchange	= 0x0008,	/* PSenable changed */
+		PSresume	= 0x0040,	/* resume detected */
+		PSreserved1	= 0x0080,	/* always read as 1; reserved */
+		PSslow		= 0x0100,	/* device has low speed */
+		PSreset		= 0x0200,	/* port reset */
+		PSsuspend	= 0x1000,	/* port suspended */
+
+	/* Transfer descriptor link */
+	Tdterm		= 0x1,		/* nil (terminate) */
+	Tdlinkqh	= 0x2,			/* link refers to a QH */
+	Tdvf		= 0x4,		/* run linked Tds first (depth-first)*/
+
+	/* Transfer status bits */
+	Tdbitstuff	= 0x00020000,	/* bit stuffing error */
+	Tdcrcto		= 0x00040000,	/* crc or timeout error */
+	Tdnak		= 0x00080000,	/* nak packet received */
+	Tdbabble	= 0x00100000,	/* babble detected */
+	Tddberr		= 0x00200000,	/* data buf. error */
+	Tdstalled	= 0x00400000,	/* serious error to ep. */
+	Tdactive		= 0x00800000,	/* enabled/in use by hw */
+	/* Transfer control bits */
+	Tdioc		= 0x01000000,	/* interrupt on complete */
+	Tdiso		= 0x02000000,	/* isochronous select */
+	Tdlow		= 0x04000000,	/* low speed device */
+	Tderr1		= 0x08000000,	/* bit 0 of error counter */
+	Tderr2		= 0x10000000,	/* bit 1 of error counter */
+	Tdspd		= 0x20000000,	/* short packet detect */
+
+	Tdlen		= 0x000003FF,	/* actual length field */
+
+	Tdfatalerr	= Tdnak|Tdbabble|Tdstalled, /* hw retries others */
+	Tderrors	= Tdfatalerr|Tdbitstuff|Tdcrcto|Tddberr,
+
+	/* Transfer descriptor token bits */
+	Tddata0		= 0,
+	Tddata1		= 0x80000,	/* data toggle (1==DATA1) */
+	Tdtokin		= 0x69,
+	Tdtokout	= 0xE1,
+	Tdtoksetup	= 0x2D,
+
+	Tdmaxpkt	= 0x800,	/* max packet size */
+
+	/* Queue head bits */
+	QHterm		= 1<<0,		/* nil (terminate) */
+	QHlinkqh		= 1<<1,		/* link refers to a QH */
+	QHvf		= 1<<2,		/* vertical first (depth first) */
+};
+
+struct Ctlr
+{
+	Lock;			/* for ilock. qh lists and basic ctlr I/O */
+	QLock	portlck;	/* for port resets/enable... */
+	Pcidev*	pcidev;
+	int	active;
+	int	port;		/* I/O address */
+	Qh*	qhs;		/* list of Qhs for this controller */
+	Qh*	qh[Tmax];	/* Dummy Qhs to insert Qhs after */
+	Isoio*	iso;		/* list of active iso I/O */
+	uint32_t*	frames;		/* frame list (used by hw) */
+	uint32_t	load;		/* max load for a single frame */
+	uint32_t	isoload;		/* max iso load for a single frame */
+	int	nintr;		/* number of interrupts attended */
+	int	ntdintr;		/* number of intrs. with something to do */
+	int	nqhintr;		/* number of intrs. for Qhs */
+	int	nisointr;	/* number of intrs. for iso transfers */
+};
+
+struct Qio
+{
+	QLock;			/* for the entire I/O process */
+	Rendez;			/* wait for completion */
+	Qh*	qh;		/* Td list (field const after init) */
+	int	usbid;		/* usb address for endpoint/device */
+	int	toggle;		/* Tddata0/Tddata1 */
+	int	tok;		/* Tdtoksetup, Tdtokin, Tdtokout */
+	uint32_t	iotime;		/* time of last I/O */
+	int	debug;		/* debug flag from the endpoint */
+	char*	err;		/* error string */
+};
+
+struct Ctlio
+{
+	Qio;			/* a single Qio for each RPC */
+	unsigned char*	data;		/* read from last ctl req. */
+	int	ndata;		/* number of bytes read */
+};
+
+struct Isoio
+{
+	QLock;
+	Rendez;			/* wait for space/completion/errors */
+	int	usbid;		/* address used for device/endpoint */
+	int	tok;		/* Tdtokin or Tdtokout */
+	int	state;		/* Qrun -> Qdone -> Qrun... -> Qclose */
+	int	nframes;	/* Nframes/ep->pollival */
+	unsigned char*	data;		/* iso data buffers if not embedded */
+	int	td0frno;	/* frame number for first Td */
+	Td*	tdu;		/* next td for user I/O in tdps */
+	Td*	tdi;		/* next td processed by interrupt */
+	char*	err;		/* error string */
+	int	nerrs;		/* nb of consecutive I/O errors */
+	int32_t	nleft;		/* number of bytes left from last write */
+	int	debug;		/* debug flag from the endpoint */
+	Isoio*	next;		/* in list of active Isoios */
+	Td*	tdps[Nframes];	/* pointer to Td used for i-th frame or nil */
+};
+
+struct Tdpool
+{
+	Lock;
+	Td*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+struct Qhpool
+{
+	Lock;
+	Qh*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+/*
+ * HW data structures
+ */
+
+/*
+ * Queue header (known by hw).
+ * 16-byte aligned. first two words used by hw.
+ * They are taken from the pool upon endpoint opening and
+ * queued after the dummy queue header for the endpoint type
+ * in the controller. Actual I/O happens as Tds are linked into it.
+ * The driver does I/O in lock-step.
+ * The user builds a list of Tds and links it into the Qh,
+ * then the Qh goes from Qidle to Qrun and nobody touches it until
+ * it becomes Qdone at interrupt time.
+ * At that point the user collects the Tds and it goes Qidle.
+ * A premature cancel may set the state to Qclose and abort I/O.
+ * The Ctlr lock protects change of state for Qhs in use.
+ */
+struct Qh
+{
+	uint32_t	link;		/* link to next horiz. item (eg. Qh) */
+	uint32_t	elink;		/* link to element (eg. Td; updated by hw) */
+
+	uint32_t	state;		/* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */
+	Qio*	io;		/* for this queue */
+
+	Qh*	next;		/* in active or free list */
+	Td*	tds;		/* Td list in this Qh (initially, elink) */
+	char*	tag;		/* debug and align, mostly */
+	uint32_t	align;
+};
+
+/*
+ * Transfer descriptor.
+ * 16-byte aligned. first two words used by hw. Next 4 by sw.
+ * We keep an embedded buffer for small I/O transfers.
+ * They are taken from the pool when buffers are needed for I/O
+ * and linked at the Qh/Isoio for the endpoint and direction requiring it.
+ * The block keeps actual data. They are protected from races by
+ * the queue or the pool keeping it. The owner of the link to the Td
+ * is free to use it and can be the only one using it.
+ */
+struct Td
+{
+	uint32_t	link;		/* Link to next Td or Qh */
+	uint32_t	csw;		/* control and status word (updated by hw) */
+	uint32_t	token;		/* endpt, device, pid */
+	uint32_t	buffer;		/* buffer pointer */
+
+	Td*	next;		/* in qh or Isoio or free list */
+	uint32_t	ndata;		/* bytes available/used at data */
+	unsigned char*	data;		/* pointer to actual data */
+	void*	buff;		/* allocated data, for large transfers */
+
+	unsigned char	sbuff[Tdndata];	/* embedded buffer, for small transfers */
+};
+
+#define INB(x)		inb(ctlr->port+(x))
+#define	INS(x)		ins(ctlr->port+(x))
+#define INL(x)		inl(ctlr->port+(x))
+#define OUTB(x, v)	outb(ctlr->port+(x), (v))
+#define	OUTS(x, v)	outs(ctlr->port+(x), (v))
+#define OUTL(x, v)	outl(ctlr->port+(x), (v))
+#define TRUNC(x, sz)	((x) & ((sz)-1))
+#define PTR(q)		((void*)KADDR((uint32_t)(q) & ~ (0xF|PCIWINDOW)))
+#define QPTR(q)		((Qh*)PTR(q))
+#define TPTR(q)		((Td*)PTR(q))
+#define PORT(p)		(Portsc0 + 2*(p))
+#define diprint		if(debug || iso->debug)print
+#define ddiprint		if(debug>1 || iso->debug>1)print
+#define dqprint		if(debug || (qh->io && qh->io->debug))print
+#define ddqprint		if(debug>1 || (qh->io && qh->io->debug>1))print
+
+static Ctlr* ctlrs[Nhcis];
+
+static Tdpool tdpool;
+static Qhpool qhpool;
+static int debug;
+
+static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" };
+
+static void
+uhcicmd(Ctlr *ctlr, int c)
+{
+	OUTS(Cmd, c);
+}
+
+static void
+uhcirun(Ctlr *ctlr, int on)
+{
+	int i;
+
+	ddprint("uhci %#ux setting run to %d\n", ctlr->port, on);
+
+	if(on)
+		uhcicmd(ctlr, INS(Cmd)|Crun);
+	else
+		uhcicmd(ctlr, INS(Cmd) & ~Crun);
+	for(i = 0; i < 100; i++)
+		if(on == 0 && (INS(Status) & Shalted) != 0)
+			break;
+		else if(on != 0 && (INS(Status) & Shalted) == 0)
+			break;
+		else
+			delay(1);
+	if(i == 100)
+		dprint("uhci %#x run cmd timed out\n", ctlr->port);
+	ddprint("uhci %#ux cmd %#ux sts %#ux\n",
+		ctlr->port, INS(Cmd), INS(Status));
+}
+
+static int
+tdlen(Td *td)
+{
+	return (td->csw+1) & Tdlen;
+}
+
+static int
+maxtdlen(Td *td)
+{
+	return ((td->token>>21)+1) & (Tdmaxpkt-1);
+}
+
+static int
+tdtok(Td *td)
+{
+	return td->token & 0xFF;
+}
+
+static char*
+seprinttd(char *s, char *se, Td *td)
+{
+	s = seprint(s, se, "%#p link %#ulx", td, td->link);
+	if((td->link & Tdvf) != 0)
+		s = seprint(s, se, "V");
+	if((td->link & Tdterm) != 0)
+		s = seprint(s, se, "T");
+	if((td->link & Tdlinkqh) != 0)
+		s = seprint(s, se, "Q");
+	s = seprint(s, se, " csw %#ulx ", td->csw);
+	if(td->csw & Tdactive)
+		s = seprint(s, se, "a");
+	if(td->csw & Tdiso)
+		s = seprint(s, se, "I");
+	if(td->csw & Tdioc)
+		s = seprint(s, se, "i");
+	if(td->csw & Tdlow)
+		s = seprint(s, se, "l");
+	if((td->csw & (Tderr1|Tderr2)) == 0)
+		s = seprint(s, se, "z");
+	if(td->csw & Tderrors)
+		s = seprint(s, se, " err %#ulx", td->csw & Tderrors);
+	if(td->csw & Tdstalled)
+		s = seprint(s, se, "s");
+	if(td->csw & Tddberr)
+		s = seprint(s, se, "d");
+	if(td->csw & Tdbabble)
+		s = seprint(s, se, "b");
+	if(td->csw & Tdnak)
+		s = seprint(s, se, "n");
+	if(td->csw & Tdcrcto)
+		s = seprint(s, se, "c");
+	if(td->csw & Tdbitstuff)
+		s = seprint(s, se, "B");
+	s = seprint(s, se, " stslen %d", tdlen(td));
+
+	s = seprint(s, se, " token %#ulx", td->token);
+	if(td->token == 0)		/* the BWS loopback Td, ignore rest */
+		return s;
+	s = seprint(s, se, " maxlen %d", maxtdlen(td));
+	if(td->token & Tddata1)
+		s = seprint(s, se, " d1");
+	else
+		s = seprint(s, se, " d0");
+	s = seprint(s, se, " id %#ulx:", (td->token>>15) & Epmax);
+	s = seprint(s, se, "%#ulx", (td->token>>8) & Devmax);
+	switch(tdtok(td)){
+	case Tdtokin:
+		s = seprint(s, se, " in");
+		break;
+	case Tdtokout:
+		s = seprint(s, se, " out");
+		break;
+	case Tdtoksetup:
+		s = seprint(s, se, " setup");
+		break;
+	default:
+		s = seprint(s, se, " BADPID");
+	}
+	s = seprint(s, se, "\n\t  buffer %#ulx data %#p", td->buffer, td->data);
+	s = seprint(s, se, " ndata %uld sbuff %#p buff %#p",
+		td->ndata, td->sbuff, td->buff);
+	if(td->ndata > 0)
+		s = seprintdata(s, se, td->data, td->ndata);
+	return s;
+}
+
+static void
+isodump(Isoio *iso, int all)
+{
+	char buf[256];
+	Td *td;
+	int i;
+
+	print("iso %#p %s state %d nframes %d"
+		" td0 %#p tdu %#p tdi %#p data %#p\n",
+		iso, iso->tok == Tdtokin ? "in" : "out",
+		iso->state, iso->nframes, iso->tdps[iso->td0frno],
+		iso->tdu, iso->tdi, iso->data);
+	if(iso->err != nil)
+		print("\terr='%s'\n", iso->err);
+	if(all == 0){
+		seprinttd(buf, buf+sizeof(buf), iso->tdu);
+		print("\ttdu %s\n", buf);
+		seprinttd(buf, buf+sizeof(buf), iso->tdi);
+		print("\ttdi %s\n", buf);
+	}else{
+		td = iso->tdps[iso->td0frno];
+		for(i = 0; i < iso->nframes; i++){
+			seprinttd(buf, buf+sizeof(buf), td);
+			if(td == iso->tdi)
+				print("i->");
+			if(td == iso->tdu)
+				print("u->");
+			print("\t%s\n", buf);
+			td = td->next;
+		}
+	}
+}
+
+static int
+sameptr(void *p, uint32_t l)
+{
+	if(l & QHterm)
+		return p == nil;
+	return PTR(l) == p;
+}
+
+static void
+dumptd(Td *td, char *pref)
+{
+	char buf[256];
+	char *s;
+	char *se;
+	int i;
+
+	i = 0;
+	se = buf+sizeof(buf);
+	for(; td != nil; td = td->next){
+		s = seprinttd(buf, se, td);
+		if(!sameptr(td->next, td->link))
+			seprint(s, se, " next %#p != link %#ulx %#p",
+				td->next, td->link, TPTR(td->link));
+		print("%std %s\n", pref, buf);
+		if(i++ > 20){
+			print("...more tds...\n");
+			break;
+		}
+	}
+}
+
+static void
+qhdump(Qh *qh, char *pref)
+{
+	char buf[256];
+	char *s;
+	char *se;
+	uint32_t td;
+	int i;
+
+	s = buf;
+	se = buf+sizeof(buf);
+	s = seprint(s, se, "%sqh %s %#p state %s link %#ulx", pref,
+		qh->tag, qh, qhsname[qh->state], qh->link);
+	if(!sameptr(qh->tds, qh->elink))
+		s = seprint(s, se, " [tds %#p != elink %#ulx %#p]",
+			qh->tds, qh->elink, TPTR(qh->elink));
+	if(!sameptr(qh->next, qh->link))
+		s = seprint(s, se, " [next %#p != link %#ulx %#p]",
+			qh->next, qh->link, QPTR(qh->link));
+	if((qh->link & Tdterm) != 0)
+		s = seprint(s, se, "T");
+	if((qh->link & Tdlinkqh) != 0)
+		s = seprint(s, se, "Q");
+	s = seprint(s, se, " elink %#ulx", qh->elink);
+	if((qh->elink & Tdterm) != 0)
+		s = seprint(s, se, "T");
+	if((qh->elink & Tdlinkqh) != 0)
+		s = seprint(s, se, "Q");
+	s = seprint(s, se, " io %#p", qh->io);
+	if(qh->io != nil && qh->io->err != nil)
+		seprint(s, se, " err='%s'", qh->io->err);
+	print("%s\n", buf);
+	dumptd(qh->tds, "\t");
+	if((qh->elink & QHterm) == 0){
+		print("\thw tds:");
+		i = 0;
+		for(td = qh->elink; (td & Tdterm) == 0; td = TPTR(td)->link){
+			print(" %#ulx", td);
+			if(td == TPTR(td)->link)	/* BWS Td */
+				break;
+			if(i++ > 40){
+				print("...");
+				break;
+			}
+		}
+		print("\n");
+	}
+}
+
+static void
+xdump(Ctlr *ctlr, int doilock)
+{
+	Isoio *iso;
+	Qh *qh;
+	int i;
+
+	if(doilock){
+		if(ctlr == ctlrs[0]){
+			lock(&tdpool);
+			print("tds: alloc %d = inuse %d + free %d\n",
+				tdpool.nalloc, tdpool.ninuse, tdpool.nfree);
+			unlock(&tdpool);
+			lock(&qhpool);
+			print("qhs: alloc %d = inuse %d + free %d\n",
+				qhpool.nalloc, qhpool.ninuse, qhpool.nfree);
+			unlock(&qhpool);
+		}
+		ilock(ctlr);
+	}
+	print("uhci port %#x frames %#p nintr %d ntdintr %d",
+		ctlr->port, ctlr->frames, ctlr->nintr, ctlr->ntdintr);
+	print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr);
+	print("cmd %#ux sts %#ux fl %#ulx ps1 %#ux ps2 %#ux frames[0] %#ulx\n",
+		INS(Cmd), INS(Status),
+		INL(Flbaseadd), INS(PORT(0)), INS(PORT(1)),
+		ctlr->frames[0]);
+	for(iso = ctlr->iso; iso != nil; iso = iso->next)
+		isodump(iso, 1);
+	i = 0;
+	for(qh = ctlr->qhs; qh != nil; qh = qh->next){
+		qhdump(qh, "");
+		if(i++ > 20){
+			print("qhloop\n");
+			break;
+		}
+	}
+	print("\n");
+	if(doilock)
+		iunlock(ctlr);
+}
+
+static void
+dump(Hci *hp)
+{
+	xdump(hp->aux, 1);
+}
+
+static Td*
+tdalloc(void)
+{
+	int i;
+	Td *td;
+	Td *pool;
+
+	lock(&tdpool);
+	if(tdpool.free == nil){
+		ddprint("uhci: tdalloc %d Tds\n", Incr);
+		pool = xspanalloc(Incr*sizeof(Td), Align, 0);
+		if(pool == nil)
+			panic("tdalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = tdpool.free;
+			tdpool.free = &pool[i];
+		}
+		tdpool.nalloc += Incr;
+		tdpool.nfree += Incr;
+	}
+	td = tdpool.free;
+	tdpool.free = td->next;
+	tdpool.ninuse++;
+	tdpool.nfree--;
+	unlock(&tdpool);
+
+	memset(td, 0, sizeof(Td));
+	td->link = Tdterm;
+	assert(((uint64_t)td & 0xF) == 0);
+	return td;
+}
+
+static void
+tdfree(Td *td)
+{
+	if(td == nil)
+		return;
+	free(td->buff);
+	td->buff = nil;
+	lock(&tdpool);
+	td->next = tdpool.free;
+	tdpool.free = td;
+	tdpool.ninuse--;
+	tdpool.nfree++;
+	unlock(&tdpool);
+}
+
+static void
+qhlinkqh(Qh* qh, Qh* next)
+{
+	if(next == nil)
+		qh->link = QHterm;
+	else{
+		next->link = qh->link;
+		next->next = qh->next;
+		qh->link = PCIWADDR(next)|QHlinkqh;
+	}
+	qh->next = next;
+}
+
+static void
+qhlinktd(Qh *qh, Td *td)
+{
+	qh->tds = td;
+	if(td == nil)
+		qh->elink = QHvf|QHterm;
+	else
+		qh->elink = PCIWADDR(td);
+}
+
+static void
+tdlinktd(Td *td, Td *next)
+{
+	td->next = next;
+	if(next == nil)
+		td->link = Tdterm;
+	else
+		td->link = PCIWADDR(next)|Tdvf;
+}
+
+static Qh*
+qhalloc(Ctlr *ctlr, Qh *prev, Qio *io, char *tag)
+{
+	int i;
+	Qh *qh;
+	Qh *pool;
+
+	lock(&qhpool);
+	if(qhpool.free == nil){
+		ddprint("uhci: qhalloc %d Qhs\n", Incr);
+		pool = xspanalloc(Incr*sizeof(Qh), Align, 0);
+		if(pool == nil)
+			panic("qhalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = qhpool.free;
+			qhpool.free = &pool[i];
+		}
+		qhpool.nalloc += Incr;
+		qhpool.nfree += Incr;
+	}
+	qh = qhpool.free;
+	qhpool.free = qh->next;
+	qh->next = nil;
+	qh->link = QHterm;
+	qhpool.ninuse++;
+	qhpool.nfree--;
+	unlock(&qhpool);
+
+	qh->tds = nil;
+	qh->elink = QHterm;
+	qh->state = Qidle;
+	qh->io = io;
+	qh->tag = nil;
+	kstrdup(&qh->tag, tag);
+
+	if(prev != nil){
+		coherence();
+		ilock(ctlr);
+		qhlinkqh(prev, qh);
+		iunlock(ctlr);
+	}
+
+	assert(((uint64_t)qh & 0xF) == 0);
+	return qh;
+}
+
+static void
+qhfree(Ctlr *ctlr, Qh *qh)
+{
+	Td *td;
+	Td *ltd;
+	Qh *q;
+
+	if(qh == nil)
+		return;
+
+	ilock(ctlr);
+	for(q = ctlr->qhs; q != nil; q = q->next)
+		if(q->next == qh)
+			break;
+	if(q == nil)
+		panic("qhfree: nil q");
+	q->next = qh->next;
+	q->link = qh->link;
+	iunlock(ctlr);
+
+	for(td = qh->tds; td != nil; td = ltd){
+		ltd = td->next;
+		tdfree(td);
+	}
+	lock(&qhpool);
+	qh->state = Qfree;	/* paranoia */
+	qh->next = qhpool.free;
+	qh->tag = nil;
+	qh->io = nil;
+	qhpool.free = qh;
+	qhpool.ninuse--;
+	qhpool.nfree++;
+	unlock(&qhpool);
+	ddprint("qhfree: qh %#p\n", qh);
+}
+
+static char*
+errmsg(int err)
+{
+	if(err == 0)
+		return "ok";
+	if(err & Tdcrcto)
+		return "crc/timeout error";
+	if(err & Tdbabble)
+		return "babble detected";
+	if(err & Tddberr)
+		return "db error";
+	if(err & Tdbitstuff)
+		return "bit stuffing error";
+	if(err & Tdstalled)
+		return Estalled;
+	return Eio;
+}
+
+static int
+isocanread(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	return iso->state == Qclose ||
+		(iso->state == Qrun &&
+		iso->tok == Tdtokin && iso->tdi != iso->tdu);
+}
+
+static int
+isocanwrite(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	return iso->state == Qclose ||
+		(iso->state == Qrun &&
+		iso->tok == Tdtokout && iso->tdu->next != iso->tdi);
+}
+
+static void
+tdisoinit(Isoio *iso, Td *td, int32_t count)
+{
+	td->ndata = count;
+	td->token = ((count-1)<<21)| ((iso->usbid & 0x7FF)<<8) | iso->tok;
+	td->csw = Tderr1|Tdiso|Tdactive|Tdioc;
+}
+
+/*
+ * Process Iso i/o on interrupt. For writes update just error status.
+ * For reads update tds to reflect data and also error status.
+ * When tdi aproaches tdu, advance tdu; data may be lost.
+ * (If nframes is << Nframes tdu might be far away but this avoids
+ * races regarding frno.)
+ * If we suffer errors for more than half the frames we stall.
+ */
+static void
+isointerrupt(Ctlr *ctlr, Isoio* iso)
+{
+	Td *tdi;
+	int err;
+	int i;
+	int nframes;
+
+	tdi = iso->tdi;
+	if((tdi->csw & Tdactive) != 0)		/* nothing new done */
+		return;
+	ctlr->nisointr++;
+	ddiprint("isointr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu);
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("isointr: iso state");
+	if(debug > 1 || iso->debug > 1)
+		isodump(iso, 0);
+
+	nframes = iso->nframes / 2;		/* limit how many we look */
+	if(nframes > 64)
+		nframes = 64;
+
+	for(i = 0; i < nframes && (tdi->csw & Tdactive) == 0; i++){
+		tdi->csw &= ~Tdioc;
+		err = tdi->csw & Tderrors;
+		if(err == 0)
+			iso->nerrs = 0;
+		else if(iso->nerrs++ > iso->nframes/2)
+			tdi->csw |= Tdstalled;
+		if((tdi->csw & Tdstalled) != 0){
+			if(iso->err == nil){
+				iso->err = errmsg(err);
+				diprint("isointerrupt: tdi %#p error %#ux %s\n",
+					tdi, err, iso->err);
+				diprint("ctlr load %uld\n", ctlr->load);
+			}
+			tdi->ndata = 0;
+		}else
+			tdi->ndata = tdlen(tdi);
+
+		if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){
+			memset(iso->tdu->data, 0, maxtdlen(iso->tdu));
+			tdisoinit(iso, iso->tdu, maxtdlen(iso->tdu));
+			iso->tdu = iso->tdu->next;
+			iso->nleft = 0;
+		}
+		tdi = tdi->next;
+	}
+	ddiprint("isointr: %d frames processed\n", nframes);
+	if(i == nframes)
+		tdi->csw |= Tdioc;
+	iso->tdi = tdi;
+	if(isocanwrite(iso) || isocanread(iso)){
+		diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso,
+			iso->tdi, iso->tdu);
+		wakeup(iso);
+	}
+
+}
+
+/*
+ * Process a Qh upon interrupt. There's one per ongoing user I/O.
+ * User process releases resources later, that is not done here.
+ * We may find in this order one or more Tds:
+ * - none/many non active and completed Tds
+ * - none/one (usually(!) not active) and failed Td
+ * - none/many active Tds.
+ * Upon errors the entire transfer is aborted and error reported.
+ * Otherwise, the transfer is complete only when all Tds are done or
+ * when a read with less than maxpkt is found.
+ * Use the software list and not qh->elink to avoid races.
+ * We could use qh->elink to see if there's something new or not.
+ */
+static void
+qhinterrupt(Ctlr *ctlr, Qh *qh)
+{
+	Td *td;
+	int err;
+
+	ctlr->nqhintr++;
+	if(qh->state != Qrun)
+		panic("qhinterrupt: qh state");
+	if(qh->tds == nil)
+		panic("qhinterrupt: no tds");
+	if((qh->tds->csw & Tdactive) == 0)
+		ddqprint("qhinterrupt port %#ux qh %#p p0 %#x p1 %#x\n",
+			ctlr->port, qh, INS(PORT(0)), INS(PORT(1)));
+	for(td = qh->tds; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			return;
+		td->csw &= ~Tdioc;
+		if((td->csw & Tdstalled) != 0){
+			err = td->csw & Tderrors;
+			/* just stalled is end of xfer but not an error */
+			if(err != Tdstalled && qh->io->err == nil){
+				qh->io->err = errmsg(td->csw & Tderrors);
+				dqprint("qhinterrupt: td %#p error %#ux %s\n",
+					td, err, qh->io->err);
+				dqprint("ctlr load %uld\n", ctlr->load);
+			}
+			break;
+		}
+		if((td->csw & Tdnak) != 0){	/* retransmit; not serious */
+			td->csw &= ~Tdnak;
+			if(td->next == nil)
+				td->csw |= Tdioc;
+		}
+		td->ndata = tdlen(td);
+		if(td->ndata < maxtdlen(td)){	/* EOT */
+			td = td->next;
+			break;
+		}
+	}
+
+	/*
+	 * Done. Make void the Tds not used (errors or EOT) and wakeup epio.
+	 */
+	qh->elink = QHterm;
+	for(; td != nil; td = td->next)
+		td->ndata = 0;
+	qh->state = Qdone;
+	wakeup(qh->io);
+}
+
+static void
+interrupt(Ureg *ureg, void *a)
+{
+	Hci *hp;
+	Ctlr *ctlr;
+	int frptr;
+	int frno;
+	Qh *qh;
+	Isoio *iso;
+	int sts;
+	int cmd;
+
+	hp = a;
+	ctlr = hp->aux;
+	ilock(ctlr);
+	ctlr->nintr++;
+	sts = INS(Status);
+	if((sts & Sall) == 0){		/* not for us; sharing irq */
+		iunlock(ctlr);
+		return;
+	}
+	OUTS(Status, sts & Sall);
+	cmd = INS(Cmd);
+	if(cmd & Crun == 0){
+		print("uhci %#ux: not running: uhci bug?\n", ctlr->port);
+		/* BUG: should abort everything in this case */
+	}
+	if(debug > 1){
+		frptr = INL(Flbaseadd);
+		frno = INL(Frnum);
+		frno = TRUNC(frno, Nframes);
+		print("cmd %#ux sts %#ux frptr %#ux frno %d\n",
+			cmd, sts, frptr, frno);
+	}
+	ctlr->ntdintr++;
+	/*
+	 * Will we know in USB 3.0 who the interrupt was for?.
+	 * Do they still teach indexing in CS?
+	 * This is Intel's doing.
+	 */
+	for(iso = ctlr->iso; iso != nil; iso = iso->next)
+		if(iso->state == Qrun || iso->state == Qdone)
+			isointerrupt(ctlr, iso);
+	for(qh = ctlr->qhs; qh != nil; qh = qh->next)
+		if(qh->state == Qrun)
+			qhinterrupt(ctlr, qh);
+		else if(qh->state == Qclose)
+			qhlinktd(qh, nil);
+	iunlock(ctlr);
+}
+
+/*
+ * iso->tdu is the next place to put data. When it gets full
+ * it is activated and tdu advanced.
+ */
+static int32_t
+putsamples(Isoio *iso, unsigned char *b, int32_t count)
+{
+	int32_t tot;
+	int32_t n;
+
+	for(tot = 0; isocanwrite(iso) && tot < count; tot += n){
+		n = count-tot;
+		if(n > maxtdlen(iso->tdu) - iso->nleft)
+			n = maxtdlen(iso->tdu) - iso->nleft;
+		memmove(iso->tdu->data+iso->nleft, b+tot, n);
+		iso->nleft += n;
+		if(iso->nleft == maxtdlen(iso->tdu)){
+			tdisoinit(iso, iso->tdu, iso->nleft);
+			iso->nleft = 0;
+			iso->tdu = iso->tdu->next;
+		}
+	}
+	return tot;
+}
+
+/*
+ * Queue data for writing and return error status from
+ * last writes done, to maintain buffered data.
+ */
+static int32_t
+episowrite(Ep *ep, Isoio *iso, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	unsigned char *b;
+	int tot;
+	int nw;
+	char *err;
+
+	iso->debug = ep->debug;
+	diprint("uhci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	b = a;
+	for(tot = 0; tot < count; tot += nw){
+		while(isocanwrite(iso) == 0){
+			iunlock(ctlr);
+			diprint("uhci: episowrite: %#p sleep\n", iso);
+			if(waserror()){
+				if(iso->err == nil)
+					iso->err = "I/O timed out";
+				ilock(ctlr);
+				break;
+			}
+			tsleep(iso, isocanwrite, iso, ep->tmout);
+			poperror();
+			ilock(ctlr);
+		}
+		err = iso->err;
+		iso->err = nil;
+		if(iso->state == Qclose || err != nil){
+			iunlock(ctlr);
+			error(err ? err : Eio);
+		}
+		if(iso->state != Qrun)
+			panic("episowrite: iso not running");
+		iunlock(ctlr);		/* We could page fault here */
+		nw = putsamples(iso, b+tot, count-tot);
+		ilock(ctlr);
+	}
+	if(iso->state != Qclose)
+		iso->state = Qdone;
+	iunlock(ctlr);
+	err = iso->err;		/* in case it failed early */
+	iso->err = nil;
+	qunlock(iso);
+	poperror();
+	if(err != nil)
+		error(err);
+	diprint("uhci: episowrite: %#p %d bytes\n", iso, tot);
+	return tot;
+}
+
+/*
+ * Available data is kept at tdu and following tds, up to tdi (excluded).
+ */
+static int32_t
+episoread(Ep *ep, Isoio *iso, void *a, int count)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	unsigned char *b;
+	int nr;
+	int tot;
+	Td *tdu;
+
+	iso->debug = ep->debug;
+	diprint("uhci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	b = a;
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	iso->err = nil;
+	iso->nerrs = 0;
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	while(isocanread(iso) == 0){
+		iunlock(ctlr);
+		diprint("uhci: episoread: %#p sleep\n", iso);
+		if(waserror()){
+			if(iso->err == nil)
+				iso->err = "I/O timed out";
+			ilock(ctlr);
+			break;
+		}
+		tsleep(iso, isocanread, iso, ep->tmout);
+		poperror();
+		ilock(ctlr);
+	}
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qdone;
+	assert(iso->tdu != iso->tdi);
+
+	for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){
+		tdu = iso->tdu;
+		if(tdu->csw & Tdactive){
+			diprint("uhci: episoread: %#p tdu active\n", iso);
+			break;
+		}
+		nr = tdu->ndata;
+		if(tot + nr > count)
+			nr = count - tot;
+		if(nr == 0)
+			print("uhci: ep%d.%d: too many polls\n",
+				ep->dev->nb, ep->nb);
+		else{
+			iunlock(ctlr);		/* We could page fault here */
+			memmove(b+tot, tdu->data, nr);
+			ilock(ctlr);
+			if(nr < tdu->ndata)
+				memmove(tdu->data, tdu->data+nr, tdu->ndata - nr);
+			tdu->ndata -= nr;
+		}
+		if(tdu->ndata == 0){
+			tdisoinit(iso, tdu, ep->maxpkt);
+			iso->tdu = tdu->next;
+		}
+	}
+	iunlock(ctlr);
+	qunlock(iso);
+	poperror();
+	diprint("uhci: episoread: %#p %d bytes err '%s'\n", iso, tot, iso->err);
+	if(iso->err != nil)
+		error(iso->err);
+	return tot;
+}
+
+static int
+nexttoggle(int tog)
+{
+	if(tog == Tddata0)
+		return Tddata1;
+	else
+		return Tddata0;
+}
+
+static Td*
+epgettd(Ep *ep, Qio *io, int flags, void *a, int count)
+{
+	Td *td;
+	int tok;
+
+	if(ep->maxpkt < count)
+		error("maxpkt too short");
+	td = tdalloc();
+	if(count <= Tdndata)
+		td->data = td->sbuff;
+	else
+		td->data = td->buff = smalloc(ep->maxpkt);
+	td->buffer = PCIWADDR(td->data);
+	td->ndata = count;
+	if(a != nil && count > 0)
+		memmove(td->data, a, count);
+	td->csw = Tderr2|Tderr1|flags;
+	if(ep->dev->speed == Lowspeed)
+		td->csw |= Tdlow;
+	tok = io->tok | io->toggle;
+	io->toggle = nexttoggle(io->toggle);
+	td->token = ((count-1)<<21) | ((io->usbid&0x7FF)<<8) | tok;
+
+	return td;
+}
+
+/*
+ * Try to get them idle
+ */
+static void
+aborttds(Qh *qh)
+{
+	Td *td;
+
+	qh->state = Qdone;
+	qh->elink = QHterm;
+	for(td = qh->tds; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			td->ndata = 0;
+		td->csw &= ~(Tdactive|Tdioc);
+	}
+}
+
+static int
+epiodone(void *a)
+{
+	Qh *qh;
+
+	qh = a;
+	return qh->state != Qrun;
+}
+
+static void
+epiowait(Ctlr *ctlr, Qio *io, int tmout, uint32_t load)
+{
+	Mach *m = machp();
+	Qh *qh;
+	int timedout;
+
+	qh = io->qh;
+	ddqprint("uhci io %#p sleep on qh %#p state %uld\n", io, qh, qh->state);
+	timedout = 0;
+	if(waserror()){
+		dqprint("uhci io %#p qh %#p timed out\n", io, qh);
+		timedout++;
+	}else{
+		if(tmout == 0)
+			sleep(io, epiodone, qh);
+		else
+			tsleep(io, epiodone, qh, tmout);
+		poperror();
+	}
+	ilock(ctlr);
+	if(qh->state == Qrun)
+		timedout = 1;
+	else if(qh->state != Qdone && qh->state != Qclose)
+		panic("epio: queue not done and not closed");
+	if(timedout){
+		aborttds(io->qh);
+		io->err = "request timed out";
+		iunlock(ctlr);
+		if(!waserror()){
+			tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+			poperror();
+		}
+		ilock(ctlr);
+	}
+	if(qh->state != Qclose)
+		qh->state = Qidle;
+	qhlinktd(qh, nil);
+	ctlr->load -= load;
+	iunlock(ctlr);
+}
+
+/*
+ * Non iso I/O.
+ * To make it work for control transfers, the caller may
+ * lock the Qio for the entire control transfer.
+ */
+static int32_t
+epio(Ep *ep, Qio *io, void *a, int32_t count, int mustlock)
+{
+	Mach *m = machp();
+	Td *td, *ltd, *td0, *ntd;
+	Ctlr *ctlr;
+	Qh* qh;
+	int32_t n, tot;
+	char buf[128];
+	unsigned char *c;
+	int saved, ntds, tmout;
+	uint32_t load;
+	char *err;
+
+	qh = io->qh;
+	ctlr = ep->hp->aux;
+	io->debug = ep->debug;
+	tmout = ep->tmout;
+	ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n",
+		io->tok == Tdtokin ? "in" : "out",
+		ep->dev->nb, ep->nb, io, count, ctlr->load);
+	if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){
+		seprintdata(buf, buf+sizeof(buf), a, count);
+		print("uchi epio: user data: %s\n", buf);
+	}
+	if(mustlock){
+		qlock(io);
+		if(waserror()){
+			qunlock(io);
+			nexterror();
+		}
+	}
+	io->err = nil;
+	ilock(ctlr);
+	if(qh->state == Qclose){	/* Tds released by cancelio */
+		iunlock(ctlr);
+		error(io->err ? io->err : Eio);
+	}
+	if(qh->state != Qidle)
+		panic("epio: qh not idle");
+	qh->state = Qinstall;
+	iunlock(ctlr);
+
+	c = a;
+	td0 = ltd = nil;
+	load = tot = 0;
+	do{
+		n = ep->maxpkt;
+		if(count-tot < n)
+			n = count-tot;
+		if(c != nil && io->tok != Tdtokin)
+			td = epgettd(ep, io, Tdactive, c+tot, n);
+		else
+			td = epgettd(ep, io, Tdactive|Tdspd, nil, n);
+		if(td0 == nil)
+			td0 = td;
+		else
+			tdlinktd(ltd, td);
+		ltd = td;
+		tot += n;
+		load += ep->load;
+	}while(tot < count);
+	if(td0 == nil || ltd == nil)
+		panic("epio: no td");
+
+	ltd->csw |= Tdioc;	/* the last one interrupts */
+	ddeprint("uhci: load %uld ctlr load %uld\n", load, ctlr->load);
+	ilock(ctlr);
+	if(qh->state != Qclose){
+		io->iotime = TK2MS(m->ticks);
+		qh->state = Qrun;
+		coherence();
+		qhlinktd(qh, td0);
+		ctlr->load += load;
+	}
+	iunlock(ctlr);
+
+	epiowait(ctlr, io, tmout, load);
+
+	if(debug > 1 || ep->debug > 1)
+		dumptd(td0, "epio: got tds: ");
+
+	tot = 0;
+	c = a;
+	saved = 0;
+	ntds = 0;
+	for(td = td0; td != nil; td = ntd){
+		ntds++;
+		/*
+		 * Use td tok, not io tok, because of setup packets.
+		 * Also, if the Td was stalled or active (previous Td
+		 * was a short packet), we must save the toggle as it is.
+		 */
+		if(td->csw & (Tdstalled|Tdactive)){
+			if(saved++ == 0)
+				io->toggle = td->token & Tddata1;
+		}else{
+			tot += td->ndata;
+			if(c != nil && tdtok(td) == Tdtokin && td->ndata > 0){
+				memmove(c, td->data, td->ndata);
+				c += td->ndata;
+			}
+		}
+		ntd = td->next;
+		tdfree(td);
+	}
+	err = io->err;
+	if(mustlock){
+		qunlock(io);
+		poperror();
+	}
+	ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n",
+		io, ntds, tot, err);
+	if(err != nil)
+		error(err);
+	if(tot < 0)
+		error(Eio);
+	return tot;
+}
+
+/*
+ * halt condition was cleared on the endpoint. update our toggles.
+ */
+static void
+clrhalt(Ep *ep)
+{
+	Qio *io;
+
+	ep->clrhalt = 0;
+	switch(ep->ttype){
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OREAD){
+			qlock(&io[OWRITE]);
+			io[OWRITE].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OWRITE);
+			qunlock(&io[OWRITE]);
+		}
+		if(ep->mode != OWRITE){
+			qlock(&io[OREAD]);
+			io[OREAD].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OREAD);
+			qunlock(&io[OREAD]);
+		}
+		break;
+	}
+}
+
+static int32_t
+epread(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlio *cio;
+	Qio *io;
+	Isoio *iso;
+	char buf[160];
+	uint32_t delta;
+
+	ddeprint("uhci: epread\n");
+	if(ep->aux == nil)
+		panic("epread: not open");
+
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		qlock(cio);
+		if(waserror()){
+			qunlock(cio);
+			nexterror();
+		}
+		ddeprint("epread ctl ndata %d\n", cio->ndata);
+		if(cio->ndata < 0)
+			error("request expected");
+		else if(cio->ndata == 0){
+			cio->ndata = -1;
+			count = 0;
+		}else{
+			if(count > cio->ndata)
+				count = cio->ndata;
+			if(count > 0)
+				memmove(a, cio->data, count);
+			/* BUG for big transfers */
+			free(cio->data);
+			cio->data = nil;
+			cio->ndata = 0;	/* signal EOF next time */
+		}
+		qunlock(cio);
+		poperror();
+		if(debug>1 || ep->debug){
+			seprintdata(buf, buf+sizeof(buf), a, count);
+			print("epread: %s\n", buf);
+		}
+		return count;
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OREAD].iotime + 1;
+		if(delta < ep->pollival / 2)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival/2 - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episoread(ep, iso, a, count);
+	default:
+		panic("epread: bad ep ttype %d", ep->ttype);
+	}
+	return -1;
+}
+
+/*
+ * Control transfers are one setup write (data0)
+ * plus zero or more reads/writes (data1, data0, ...)
+ * plus a final write/read with data1 to ack.
+ * For both host to device and device to host we perform
+ * the entire transfer when the user writes the request,
+ * and keep any data read from the device for a later read.
+ * We call epio three times instead of placing all Tds at
+ * the same time because doing so leads to crc/tmout errors
+ * for some devices.
+ * Upon errors on the data phase we must still run the status
+ * phase or the device may cease responding in the future.
+ */
+static int32_t
+epctlio(Ep *ep, Ctlio *cio, void *a, int32_t count)
+{
+	Mach *m = machp();
+	unsigned char *c;
+	int32_t len;
+
+	ddeprint("epctlio: cio %#p ep%d.%d count %ld\n",
+		cio, ep->dev->nb, ep->nb, count);
+	if(count < Rsetuplen)
+		error("short usb comand");
+	qlock(cio);
+	free(cio->data);
+	cio->data = nil;
+	cio->ndata = 0;
+	if(waserror()){
+		qunlock(cio);
+		free(cio->data);
+		cio->data = nil;
+		cio->ndata = 0;
+		nexterror();
+	}
+
+	/* set the address if unset and out of configuration state */
+	if(ep->dev->state != Dconfig && ep->dev->state != Dreset)
+		if(cio->usbid == 0)
+			cio->usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb&Devmax);
+	c = a;
+	cio->tok = Tdtoksetup;
+	cio->toggle = Tddata0;
+	if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen)
+		error(Eio);
+	a = c + Rsetuplen;
+	count -= Rsetuplen;
+
+	cio->toggle = Tddata1;
+	if(c[Rtype] & Rd2h){
+		cio->tok = Tdtokin;
+		len = GET2(c+Rcount);
+		if(len <= 0)
+			error("bad length in d2h request");
+		if(len > Maxctllen)
+			error("d2h data too large to fit in uhci");
+		a = cio->data = smalloc(len+1);
+	}else{
+		cio->tok = Tdtokout;
+		len = count;
+	}
+	if(len > 0)
+		if(waserror())
+			len = -1;
+		else{
+			len = epio(ep, cio, a, len, 0);
+			poperror();
+		}
+	if(c[Rtype] & Rd2h){
+		count = Rsetuplen;
+		cio->ndata = len;
+		cio->tok = Tdtokout;
+	}else{
+		if(len < 0)
+			count = -1;
+		else
+			count = Rsetuplen + len;
+		cio->tok = Tdtokin;
+	}
+	cio->toggle = Tddata1;
+	epio(ep, cio, nil, 0, 0);
+	qunlock(cio);
+	poperror();
+	ddeprint("epctlio cio %#p return %ld\n", cio, count);
+	return count;
+}
+
+static int32_t
+epwrite(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlio *cio;
+	Isoio *iso;
+	Qio *io;
+	uint32_t delta;
+	char *b;
+	int tot;
+	int nw;
+
+	ddeprint("uhci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux == nil)
+		panic("uhci: epwrite: not open");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		return epctlio(ep, cio, a, count);
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		/*
+		 * Put at most Tdatomic Tds (512 bytes) at a time.
+		 * Otherwise some devices produce babble errors.
+		 */
+		b = a;
+		for(tot = 0; tot < count ; tot += nw){
+			nw = count - tot;
+			if(nw > Tdatomic * ep->maxpkt)
+				nw = Tdatomic * ep->maxpkt;
+			nw = epio(ep, &io[OWRITE], b+tot, nw, 1);
+		}
+		return tot;
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OWRITE].iotime + 1;
+		if(delta < ep->pollival)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episowrite(ep, iso, a, count);
+	default:
+		panic("uhci: epwrite: bad ep ttype %d", ep->ttype);
+	}
+	return -1;
+}
+
+static void
+isoopen(Ep *ep)
+{
+	Ctlr *ctlr;
+	Isoio *iso;
+	int frno;
+	int i;
+	Td* td;
+	Td* ltd;
+	int size;
+	int left;
+
+	if(ep->mode == ORDWR)
+		error("iso i/o is half-duplex");
+	ctlr = ep->hp->aux;
+	iso = ep->aux;
+	iso->debug = ep->debug;
+	iso->next = nil;			/* paranoia */
+	if(ep->mode == OREAD)
+		iso->tok = Tdtokin;
+	else
+		iso->tok = Tdtokout;
+	iso->usbid = ((ep->nb & Epmax)<<7)|(ep->dev->nb & Devmax);
+	iso->state = Qidle;
+	iso->nframes = Nframes/ep->pollival;
+	if(iso->nframes < 3)
+		error("uhci isoopen bug");	/* we need at least 3 tds */
+
+	ilock(ctlr);
+	if(ctlr->load + ep->load > 800)
+		print("usb: uhci: bandwidth may be exceeded\n");
+	ctlr->load += ep->load;
+	ctlr->isoload += ep->load;
+	dprint("uhci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload);
+	iunlock(ctlr);
+
+	/*
+	 * From here on this cannot raise errors
+	 * unless we catch them and release here all memory allocated.
+	 */
+	if(ep->maxpkt > Tdndata)
+		iso->data = smalloc(iso->nframes*ep->maxpkt);
+	ilock(ctlr);
+	frno = INS(Frnum) + 10;			/* start 10ms ahead */
+	frno = TRUNC(frno, Nframes);
+	iunlock(ctlr);
+	iso->td0frno = frno;
+	ltd = nil;
+	left = 0;
+	for(i = 0; i < iso->nframes; i++){
+		td = iso->tdps[frno] = tdalloc();
+		if(ep->mode == OREAD)
+			size = ep->maxpkt;
+		else{
+			size = (ep->hz+left) * ep->pollival / 1000;
+			size *= ep->samplesz;
+			left = (ep->hz+left) * ep->pollival % 1000;
+			if(size > ep->maxpkt){
+				print("uhci: ep%d.%d: size > maxpkt\n",
+					ep->dev->nb, ep->nb);
+				print("size = %d max = %ld\n", size, ep->maxpkt);
+				size = ep->maxpkt;
+			}
+		}
+		if(size > Tdndata)
+			td->data = iso->data + i * ep->maxpkt;
+		else
+			td->data = td->sbuff;
+		td->buffer = PCIWADDR(td->data);
+		tdisoinit(iso, td, size);
+		if(ltd != nil)
+			ltd->next = td;
+		ltd = td;
+		frno = TRUNC(frno+ep->pollival, Nframes);
+	}
+	ltd->next = iso->tdps[iso->td0frno];
+	iso->tdi = iso->tdps[iso->td0frno];
+	iso->tdu = iso->tdi;	/* read: right now; write: 1s ahead */
+	ilock(ctlr);
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		iso->tdps[frno]->link = ctlr->frames[frno];
+		frno = TRUNC(frno+ep->pollival, Nframes);
+	}
+	coherence();
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		ctlr->frames[frno] = PCIWADDR(iso->tdps[frno]);
+		frno = TRUNC(frno+ep->pollival, Nframes);
+	}
+	iso->next = ctlr->iso;
+	ctlr->iso = iso;
+	iso->state = Qdone;
+	iunlock(ctlr);
+	if(debug > 1 || iso->debug >1)
+		isodump(iso, 0);
+}
+
+/*
+ * Allocate the endpoint and set it up for I/O
+ * in the controller. This must follow what's said
+ * in Ep regarding configuration, including perhaps
+ * the saved toggles (saved on a previous close of
+ * the endpoint data file by epclose).
+ */
+static void
+epopen(Ep *ep)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	Qh *cqh;
+	Qio *io;
+	Ctlio *cio;
+	int usbid;
+
+	ctlr = ep->hp->aux;
+	deprint("uhci: epopen ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux != nil)
+		panic("uhci: epopen called with open ep");
+	if(waserror()){
+		free(ep->aux);
+		ep->aux = nil;
+		nexterror();
+	}
+	if(ep->maxpkt > Tdmaxpkt){
+		print("uhci: maxkpkt too large: using %d\n", Tdmaxpkt);
+		ep->maxpkt = Tdmaxpkt;
+	}
+	cqh = ctlr->qh[ep->ttype];
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tiso:
+		ep->aux = smalloc(sizeof(Isoio));
+		isoopen(ep);
+		break;
+	case Tctl:
+		cio = ep->aux = smalloc(sizeof(Ctlio));
+		cio->debug = ep->debug;
+		cio->ndata = -1;
+		cio->data = nil;
+		if(ep->dev->isroot != 0 && ep->nb == 0)	/* root hub */
+			break;
+		cio->qh = qhalloc(ctlr, cqh, cio, "epc");
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux = smalloc(sizeof(Qio)*2);
+		io[OREAD].debug = io[OWRITE].debug = ep->debug;
+		usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb &Devmax);
+		if(ep->mode != OREAD){
+			if(ep->toggle[OWRITE] != 0)
+				io[OWRITE].toggle = Tddata1;
+			else
+				io[OWRITE].toggle = Tddata0;
+			io[OWRITE].tok = Tdtokout;
+			io[OWRITE].qh = qhalloc(ctlr, cqh, io+OWRITE, "epw");
+			io[OWRITE].usbid = usbid;
+		}
+		if(ep->mode != OWRITE){
+			if(ep->toggle[OREAD] != 0)
+				io[OREAD].toggle = Tddata1;
+			else
+				io[OREAD].toggle = Tddata0;
+			io[OREAD].tok = Tdtokin;
+			io[OREAD].qh = qhalloc(ctlr, cqh, io+OREAD, "epr");
+			io[OREAD].usbid = usbid;
+		}
+		break;
+	}
+	if(debug>1 || ep->debug)
+		dump(ep->hp);
+	deprint("uhci: epopen done\n");
+	poperror();
+}
+
+static void
+cancelio(Ctlr *ctlr, Qio *io)
+{
+	Mach *m = machp();
+	Qh *qh;
+
+	ilock(ctlr);
+	qh = io->qh;
+	if(io == nil || io->qh == nil || io->qh->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	dqprint("uhci: cancelio for qh %#p state %s\n",
+		qh, qhsname[qh->state]);
+	aborttds(qh);
+	qh->state = Qclose;
+	iunlock(ctlr);
+	if(!waserror()){
+		tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+		poperror();
+	}
+
+	wakeup(io);
+	qlock(io);
+	/* wait for epio if running */
+	qunlock(io);
+
+	qhfree(ctlr, qh);
+	io->qh = nil;
+}
+
+static void
+cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, uint32_t load)
+{
+	Mach *m = machp();
+	Isoio **il;
+	uint32_t *lp;
+	int i;
+	int frno;
+	Td *td;
+
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("bad iso state");
+	iso->state = Qclose;
+	if(ctlr->isoload < load)
+		panic("uhci: low isoload");
+	ctlr->isoload -= load;
+	ctlr->load -= load;
+	for(il = &ctlr->iso; *il != nil; il = &(*il)->next)
+		if(*il == iso)
+			break;
+	if(*il == nil)
+		panic("isocancel: not found");
+	*il = iso->next;
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		td = iso->tdps[frno];
+		td->csw &= ~(Tdioc|Tdactive);
+		for(lp=&ctlr->frames[frno]; !(*lp & Tdterm);
+					lp = &TPTR(*lp)->link)
+			if(TPTR(*lp) == td)
+				break;
+		if(*lp & Tdterm)
+			panic("cancelisoio: td not found");
+		*lp = td->link;
+		frno = TRUNC(frno+pollival, Nframes);
+	}
+	iunlock(ctlr);
+
+	/*
+	 * wakeup anyone waiting for I/O and
+	 * wait to be sure no I/O is in progress in the controller.
+	 * and then wait to be sure episo-io is no int32_ter running.
+	 */
+	wakeup(iso);
+	diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso);
+	tsleep(&m->externup->sleep, return0, 0, 5);
+	qlock(iso);
+	qunlock(iso);
+	diprint("cancelisoio iso %#p releasing iso\n", iso);
+
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		tdfree(iso->tdps[frno]);
+		iso->tdps[frno] = nil;
+		frno = TRUNC(frno+pollival, Nframes);
+	}
+	free(iso->data);
+	iso->data = nil;
+}
+
+static void
+epclose(Ep *ep)
+{
+	Ctlr *ctlr;
+	Ctlio *cio;
+	Isoio *iso;
+	Qio *io;
+
+	ctlr = ep->hp->aux;
+	deprint("uhci: epclose ep%d.%d\n", ep->dev->nb, ep->nb);
+
+	if(ep->aux == nil)
+		panic("uhci: epclose called with closed ep");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		cancelio(ctlr, cio);
+		free(cio->data);
+		cio->data = nil;
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		ep->toggle[OREAD] = ep->toggle[OWRITE] = 0;
+		if(ep->mode != OWRITE){
+			cancelio(ctlr, &io[OREAD]);
+			if(io[OREAD].toggle == Tddata1)
+				ep->toggle[OREAD] = 1;
+		}
+		if(ep->mode != OREAD){
+			cancelio(ctlr, &io[OWRITE]);
+			if(io[OWRITE].toggle == Tddata1)
+				ep->toggle[OWRITE] = 1;
+		}
+		break;
+	case Tiso:
+		iso = ep->aux;
+		cancelisoio(ctlr, iso, ep->pollival, ep->load);
+		break;
+	default:
+		panic("epclose: bad ttype %d", ep->ttype);
+	}
+
+	free(ep->aux);
+	ep->aux = nil;
+
+}
+
+static char*
+seprintep(char *s, char *e, Ep *ep)
+{
+	Ctlio *cio;
+	Qio *io;
+	Isoio *iso;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+	ilock(ctlr);
+	if(ep->aux == nil){
+		*s = 0;
+		iunlock(ctlr);
+		return s;
+	}
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		s = seprint(s,e,"cio %#p qh %#p"
+			" id %#x tog %#x tok %#x err %s\n",
+			cio, cio->qh, cio->usbid, cio->toggle,
+			cio->tok, cio->err);
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OWRITE)
+			s = seprint(s,e,"r: qh %#p id %#x tog %#x tok %#x err %s\n",
+				io[OREAD].qh, io[OREAD].usbid, io[OREAD].toggle,
+				io[OREAD].tok, io[OREAD].err);
+		if(ep->mode != OREAD)
+			s = seprint(s,e,"w: qh %#p id %#x tog %#x tok %#x err %s\n",
+				io[OWRITE].qh, io[OWRITE].usbid, io[OWRITE].toggle,
+				io[OWRITE].tok, io[OWRITE].err);
+		break;
+	case Tiso:
+		iso = ep->aux;
+		s = seprint(s,e,"iso %#p id %#x tok %#x tdu %#p tdi %#p err %s\n",
+			iso, iso->usbid, iso->tok, iso->tdu, iso->tdi, iso->err);
+		break;
+	}
+	iunlock(ctlr);
+	return s;
+}
+
+static int
+portenable(Hci *hp, int port, int on)
+{
+	Mach *m = machp();
+	int s;
+	int ioport;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	dprint("uhci: %#x port %d enable=%d\n", ctlr->port, port, on);
+	ioport = PORT(port-1);
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	ilock(ctlr);
+	s = INS(ioport);
+	if(on)
+		OUTS(ioport, s | PSenable);
+	else
+		OUTS(ioport, s & ~PSenable);
+	microdelay(64);
+	iunlock(ctlr);
+	tsleep(&m->externup->sleep, return0, 0, Enabledelay);
+	dprint("uhci %#ux port %d enable=%d: sts %#x\n",
+		ctlr->port, port, on, INS(ioport));
+	qunlock(&ctlr->portlck);
+	poperror();
+	return 0;
+}
+
+static int
+portreset(Hci *hp, int port, int on)
+{
+	int i, p;
+	Ctlr *ctlr;
+
+	if(on == 0)
+		return 0;
+	ctlr = hp->aux;
+	dprint("uhci: %#ux port %d reset\n", ctlr->port, port);
+	p = PORT(port-1);
+	ilock(ctlr);
+	OUTS(p, PSreset);
+	delay(50);
+	OUTS(p, INS(p) & ~PSreset);
+	OUTS(p, INS(p) | PSenable);
+	microdelay(64);
+	for(i=0; i<1000 && (INS(p) & PSenable) == 0; i++)
+		;
+	OUTS(p, (INS(p) & ~PSreset)|PSenable);
+	iunlock(ctlr);
+	dprint("uhci %#ux after port %d reset: sts %#x\n",
+		ctlr->port, port, INS(p));
+	return 0;
+}
+
+static int
+portstatus(Hci *hp, int port)
+{
+	Mach *m = machp();
+	int s;
+	int r;
+	int ioport;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	ioport = PORT(port-1);
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		iunlock(ctlr);
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	ilock(ctlr);
+	s = INS(ioport);
+	if(s & (PSstatuschg | PSchange)){
+		OUTS(ioport, s);
+		ddprint("uhci %#ux port %d status %#x\n", ctlr->port, port, s);
+	}
+	iunlock(ctlr);
+	qunlock(&ctlr->portlck);
+	poperror();
+
+	/*
+	 * We must return status bits as a
+	 * get port status hub request would do.
+	 */
+	r = 0;
+	if(s & PSpresent)
+		r |= HPpresent;
+	if(s & PSenable)
+		r |= HPenable;
+	if(s & PSsuspend)
+		r |= HPsuspend;
+	if(s & PSreset)
+		r |= HPreset;
+	if(s & PSslow)
+		r |= HPslow;
+	if(s & PSstatuschg)
+		r |= HPstatuschg;
+	if(s & PSchange)
+		r |= HPchange;
+	return r;
+}
+
+static void
+scanpci(void)
+{
+	static int already = 0;
+	int io;
+	int i;
+	Ctlr *ctlr;
+	Pcidev *p;
+
+	if(already)
+		return;
+	already = 1;
+	p = nil;
+	while(p = pcimatch(p, 0, 0)){
+		/*
+		 * Find UHCI controllers (Programming Interface = 0).
+		 */
+		if(p->ccrb != Pcibcserial || p->ccru != Pciscusb)
+			continue;
+		switch(p->ccrp){
+		case 0:
+			io = p->mem[4].bar & ~0x0F;
+			break;
+		default:
+			continue;
+		}
+		if(io == 0){
+			print("usbuhci: %#x %#x: failed to map registers\n",
+				p->vid, p->did);
+			continue;
+		}
+		if(ioalloc(io, p->mem[4].size, 0, "usbuhci") < 0){
+			print("usbuhci: port %#ux in use\n", io);
+			continue;
+		}
+		if(p->intl == 0xFF || p->intl == 0){
+			print("usbuhci: no irq assigned for port %#ux\n", io);
+			continue;
+		}
+
+		dprint("uhci: %#x %#x: port %#ux size %#x irq %d\n",
+			p->vid, p->did, io, p->mem[4].size, p->intl);
+
+		ctlr = malloc(sizeof(Ctlr));
+		if (ctlr == nil)
+			panic("uhci: out of memory");
+		ctlr->pcidev = p;
+		ctlr->port = io;
+		for(i = 0; i < Nhcis; i++)
+			if(ctlrs[i] == nil){
+				ctlrs[i] = ctlr;
+				break;
+			}
+		if(i == Nhcis)
+			print("uhci: bug: no more controllers\n");
+	}
+}
+
+static void
+uhcimeminit(Ctlr *ctlr)
+{
+	Td* td;
+	Qh *qh;
+	int frsize;
+	int i;
+
+	ctlr->qhs = ctlr->qh[Tctl] = qhalloc(ctlr, nil, nil, "CTL");
+	ctlr->qh[Tintr] = qhalloc(ctlr, ctlr->qh[Tctl], nil, "INT");
+	ctlr->qh[Tbulk] = qhalloc(ctlr, ctlr->qh[Tintr], nil, "BLK");
+
+	/* idle Td from dummy Qh at the end. looped back to itself */
+	/* This is a workaround for PIIX4 errata 29773804.pdf */
+	qh = qhalloc(ctlr, ctlr->qh[Tbulk], nil, "BWS");
+	td = tdalloc();
+	td->link = PCIWADDR(td);
+	qhlinktd(qh, td);
+
+	/* loop (hw only) from the last qh back to control xfers.
+	 * this may be done only for some of them. Disable until ehci comes.
+	 */
+	if(0)
+	qh->link = PCIWADDR(ctlr->qhs);
+
+	frsize = Nframes*sizeof(uint32_t);
+	ctlr->frames = xspanalloc(frsize, frsize, 0);
+	if(ctlr->frames == nil)
+		panic("uhci reset: no memory");
+
+	ctlr->iso = nil;
+	for(i = 0; i < Nframes; i++)
+		ctlr->frames[i] = PCIWADDR(ctlr->qhs)|QHlinkqh;
+	OUTL(Flbaseadd, PCIWADDR(ctlr->frames));
+	OUTS(Frnum, 0);
+	dprint("uhci %#ux flb %#ulx frno %#ux\n", ctlr->port,
+		INL(Flbaseadd), INS(Frnum));
+}
+
+static void
+init(Hci *hp)
+{
+	Ctlr *ctlr;
+	int sts;
+	int i;
+
+	ctlr = hp->aux;
+	dprint("uhci %#ux init\n", ctlr->port);
+	coherence();
+	ilock(ctlr);
+	OUTS(Usbintr, Itmout|Iresume|Ioc|Ishort);
+	uhcirun(ctlr, 1);
+	dprint("uhci: init: cmd %#ux sts %#ux sof %#ux",
+		INS(Cmd), INS(Status), INS(SOFmod));
+	dprint(" flb %#ulx frno %#ux psc0 %#ux psc1 %#ux",
+		INL(Flbaseadd), INS(Frnum), INS(PORT(0)), INS(PORT(1)));
+	/* guess other ports */
+	for(i = 2; i < 6; i++){
+		sts = INS(PORT(i));
+		if(sts != 0xFFFF && (sts & PSreserved1) == 1){
+			dprint(" psc%d %#ux", i, sts);
+			hp->nports++;
+		}else
+			break;
+	}
+	for(i = 0; i < hp->nports; i++)
+		OUTS(PORT(i), 0);
+	iunlock(ctlr);
+}
+
+static void
+uhcireset(Ctlr *ctlr)
+{
+	int i;
+	int sof;
+
+	ilock(ctlr);
+	dprint("uhci %#ux reset\n", ctlr->port);
+
+	/*
+	 * Turn off legacy mode. Some controllers won't
+	 * interrupt us as expected otherwise.
+	 */
+	uhcirun(ctlr, 0);
+	pcicfgw16(ctlr->pcidev, 0xc0, 0x2000);
+
+	OUTS(Usbintr, 0);
+	sof = INB(SOFmod);
+	uhcicmd(ctlr, Cgreset);			/* global reset */
+	delay(Resetdelay);
+	uhcicmd(ctlr, 0);			/* all halt */
+	uhcicmd(ctlr, Chcreset);			/* controller reset */
+	for(i = 0; i < 100; i++){
+		if((INS(Cmd) & Chcreset) == 0)
+			break;
+		delay(1);
+	}
+	if(i == 100)
+		print("uhci %#x controller reset timed out\n", ctlr->port);
+	OUTB(SOFmod, sof);
+	iunlock(ctlr);
+}
+
+static void
+setdebug(Hci *hci, int d)
+{
+	debug = d;
+}
+
+static void
+shutdown(Hci *hp)
+{
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+
+	ilock(ctlr);
+	uhcirun(ctlr, 0);
+	delay(100);
+	iunlock(ctlr);
+}
+
+static int
+reset(Hci *hp)
+{
+	static Lock resetlck;
+	int i;
+	Ctlr *ctlr;
+	Pcidev *p;
+
+	/*
+	if(getconf("*nousbuhci"))
+		return -1;
+	*/
+	
+	ilock(&resetlck);
+	scanpci();
+
+	/*
+	 * Any adapter matches if no hp->port is supplied,
+	 * otherwise the ports must match.
+	 */
+	ctlr = nil;
+	for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){
+		ctlr = ctlrs[i];
+		if(ctlr->active == 0)
+		if(hp->port == 0 || hp->port == ctlr->port){
+			ctlr->active = 1;
+			break;
+		}
+	}
+	iunlock(&resetlck);
+	if(ctlrs[i] == nil || i == Nhcis)
+		return -1;
+
+	p = ctlr->pcidev;
+	hp->aux = ctlr;
+	hp->port = ctlr->port;
+	hp->irq = p->intl;
+	hp->tbdf = p->tbdf;
+	hp->nports = 2;			/* default */
+
+	uhcireset(ctlr);
+	uhcimeminit(ctlr);
+
+	/*
+	 * Linkage to the generic HCI driver.
+	 */
+	hp->init = init;
+	hp->dump = dump;
+	hp->interrupt = interrupt;
+	hp->epopen = epopen;
+	hp->epclose = epclose;
+	hp->epread = epread;
+	hp->epwrite = epwrite;
+	hp->seprintep = seprintep;
+	hp->portenable = portenable;
+	hp->portreset = portreset;
+	hp->portstatus = portstatus;
+	hp->shutdown = shutdown;
+	hp->debug = setdebug;
+	hp->type = "uhci";
+	return 0;
+}
+
+void
+usbuhcilink(void)
+{
+	addhcitype("uhci", reset);
+}

+ 10 - 0
sys/src/9/port/portfns.h

@@ -403,6 +403,16 @@ Proc*		wakeup(Rendez*);
 int		walk(Chan**, char**, int, int, int*);
 void		wlock(RWlock*);
 void		wunlock(RWlock*);
+/* xalloc */
+void*           xalloc(uint32_t);
+void*           xallocz(uint32_t, int);
+void            xfree(void*);
+void            xhole(uintmem, uint32_t);
+void            xinit(void);
+int             xmerge(void*, void*);
+void*           xspanalloc(uint32_t, int, uint32_t);
+void            xsummary(void);
+/* end xalloc */
 void		yield(void);
 uintptr_t		zgetaddr(Segment*);
 void		zgrow(Segment*);

+ 153 - 0
sys/src/9/port/portusbehci.h

@@ -0,0 +1,153 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * ECHI portable hardware definitions
+ */
+
+typedef struct Ecapio Ecapio;
+typedef struct Edbgio Edbgio;
+
+#pragma incomplete Ecapio;
+#pragma incomplete Edbgio;
+
+/*
+ * EHCI interface registers and bits
+ */
+enum
+{
+	/* Ecapio->parms reg. */
+	Cnports		= 0xF,		/* nport bits */
+	Cdbgportshift	= 20,		/* debug port */
+	Cdbgportmask	= 0xF,
+
+	/* Ecapio->capparms bits */
+	C64		= 1<<0,		/* 64-bits */
+	Cpfl		= 1<<1,	/* program'ble frame list: can be <1024 */
+	Casp		= 1<<2,		/* asynch. sched. park */
+	Ceecpshift	= 8,		/* extended capabilities ptr. */
+	Ceecpmask	= (1<<8) - 1,
+
+	Clegacy		= 1,		/* legacy support cap. id */
+	CLbiossem	= 2,		/* legacy cap. bios sem. */
+	CLossem		= 3,		/* legacy cap. os sem */
+	CLcontrol	= 4,		/* legacy support control & status */
+
+	/* typed links  */
+	Lterm		= 1,
+	Litd		= 0<<1,
+	Lqh		= 1<<1,
+	Lsitd		= 2<<1,
+	Lfstn		= 3<<1,		/* we don't use these */
+
+	/* Cmd reg. */
+	Cstop		= 0x00000,	/* stop running */
+	Crun		= 0x00001,	/* start operation */
+	Chcreset	= 0x00002,	/* host controller reset */
+	Cflsmask	= 0x0000C,	/* frame list size bits */
+	Cfls1024	= 0x00000,	/* frame list size 1024 */
+	Cfls512		= 0x00004,	/* frame list size 512 frames */
+	Cfls256		= 0x00008,	/* frame list size 256 frames */
+	Cpse		= 0x00010,	/* periodic sched. enable */
+	Case		= 0x00020,	/* async sched. enable */
+	Ciasync		= 0x00040,	/* interrupt on async advance doorbell */
+	/* interrupt threshold ctl. in µframes (1-32 in powers of 2) */
+	Citcshift	= 16,
+	Citcmask	= 0xff << Citcshift,
+
+	/* Sts reg. */
+	Sasyncss	= 0x08000,	/* aync schedule status */
+	Speriodss	= 0x04000,	/* periodic schedule status */
+	Srecl		= 0x02000,	/* reclamnation (empty async sched.) */
+	Shalted		= 0x01000,	/* h.c. is halted */
+	Sasync		= 0x00020,	/* interrupt on async advance */
+	Sherr		= 0x00010,	/* host system error */
+	Sfrroll		= 0x00008,	/* frame list roll over */
+	Sportchg	= 0x00004,	/* port change detect */
+	Serrintr	= 0x00002,		/* error interrupt */
+	Sintr		= 0x00001,	/* interrupt */
+	Sintrs		= 0x0003F,	/* interrupts status */
+
+	/* Intr reg. */
+	Iusb		= 0x01,		/* intr. on usb */
+	Ierr		= 0x02,		/* intr. on usb error */
+	Iportchg	= 0x04,		/* intr. on port change */
+	Ifrroll		= 0x08,		/* intr. on frlist roll over */
+	Ihcerr		= 0x10,		/* intr. on host error */
+	Iasync		= 0x20,		/* intr. on async advance enable */
+	Iall		= 0x3F,		/* all interrupts */
+
+	/* Config reg. */
+	Callmine	= 1,		/* route all ports to us */
+
+	/* Portsc reg. */
+	Pspresent	= 0x00000001,	/* device present */
+	Psstatuschg	= 0x00000002,	/* Pspresent changed */
+	Psenable	= 0x00000004,	/* device enabled */
+	Pschange	= 0x00000008,	/* Psenable changed */
+	Psresume	= 0x00000040,	/* resume detected */
+	Pssuspend	= 0x00000080,	/* port suspended */
+	Psreset		= 0x00000100,	/* port reset */
+	Pspower		= 0x00001000,	/* port power on */
+	Psowner		= 0x00002000,	/* port owned by companion */
+	Pslinemask	= 0x00000C00,	/* line status bits */
+	Pslow		= 0x00000400,	/* low speed device */
+
+	/* Debug port csw reg. */
+	Cowner	= 0x40000000,		/* port owned by ehci */
+	Cenable	= 0x10000000,		/* debug port enabled */
+	Cdone	= 0x00010000,		/* request is done */
+	Cbusy	= 0x00000400,		/* port in use by a driver */
+	Cerrmask= 0x00000380,		/* error code bits */
+	Chwerr	= 0x00000100,		/* hardware error */
+	Cterr	= 0x00000080,		/* transaction error */
+	Cfailed	= 0x00000040,		/* transaction did fail */
+	Cgo	= 0x00000020,		/* execute the transaction */
+	Cwrite	= 0x00000010,		/* request is a write */
+	Clen	= 0x0000000F,		/* data len */
+
+	/* Debug port pid reg. */
+	Prpidshift	= 16,		/* received pid */
+	Prpidmask	= 0xFF,
+	Pspidshift	= 8,		/* sent pid */
+	Pspidmask	= 0xFF,
+	Ptokshift	= 0,		/* token pid */
+	Ptokmask	= 0xFF,
+
+	Ptoggle		= 0x00008800,	/* to update toggles */
+	Ptogglemask	= 0x0000FF00,
+
+	/* Debug port addr reg. */
+	Adevshift	= 8,		/* device address */
+	Adevmask	= 0x7F,
+	Aepshift	= 0,		/* endpoint number */
+	Aepmask		= 0xF,
+};
+
+/*
+ * Capability registers (hw)
+ */
+struct Ecapio
+{
+	uint32_t	cap;		/* 00 controller capability register */
+	uint32_t	parms;		/* 04 structural parameters register */
+	uint32_t	capparms;	/* 08 capability parameters */
+	uint32_t	portroute;	/* 0c not on the CS5536 */
+};
+
+/*
+ * Debug port registers (hw)
+ */
+struct Edbgio
+{
+	uint32_t	csw;		/* control and status */
+	uint32_t	pid;		/* USB pid */
+	unsigned char	data[8];	/* data buffer */
+	uint32_t	addr;		/* device and endpoint addresses */
+};

+ 205 - 0
sys/src/9/port/usb.h

@@ -0,0 +1,205 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * common USB definitions.
+ */
+#define dprint		if(debug)print
+#define ddprint		if(debug>1)print
+#define deprint		if(debug || ep->debug)print
+#define ddeprint	if(debug>1 || ep->debug>1)print
+
+#define	GET2(p)		((((p)[1]&0xFF)<<8)|((p)[0]&0xFF))
+#define	PUT2(p,v)	{((p)[0] = (v)); ((p)[1] = (v)>>8);}
+
+typedef struct Udev Udev;	/* USB device */
+typedef struct Ep Ep;		/* Endpoint */
+typedef struct Hci Hci;		/* Host Controller Interface */
+typedef struct Hciimpl Hciimpl;	/* Link to the controller impl. */
+
+enum
+{
+	/* fundamental constants */
+	Ndeveps	= 16,		/* max nb. of endpoints per device */
+
+	/* tunable parameters */
+	Nhcis	= 16,		/* max nb. of HCIs */
+	Neps	= 64,		/* max nb. of endpoints */
+	Maxctllen = 32*1024, /* max allowed sized for ctl. xfers; see Maxdevconf */
+	Xfertmout = 2000,	/* default request time out (ms) */
+
+	/* transfer types. keep this order */
+	Tnone = 0,		/* no tranfer type configured */
+	Tctl,			/* wr req + rd/wr data + wr/rd sts */
+	Tiso,			/* stream rd or wr (real time) */
+	Tbulk,			/* stream rd or wr */
+	Tintr,			/* msg rd or wr */
+	Nttypes,		/* number of transfer types */
+
+	Epmax	= 0xF,		/* max ep. addr */
+	Devmax	= 0x7F,		/* max dev. addr */
+
+	/* Speeds */
+	Fullspeed = 0,
+	Lowspeed,
+	Highspeed,
+	Nospeed,
+
+	/* request type */
+	Rh2d = 0<<7,
+	Rd2h = 1<<7,
+	Rstd = 0<<5,
+	Rclass =  1<<5,
+	Rdev = 0,
+	Rep = 2,
+	Rother = 3,
+
+	/* req offsets */
+	Rtype	= 0,
+	Rreq	= 1,
+	Rvalue	= 2,
+	Rindex	= 4,
+	Rcount	= 6,
+	Rsetuplen = 8,
+
+	/* standard requests */
+	Rgetstatus	= 0,
+	Rclearfeature	= 1,
+	Rsetfeature	= 3,
+	Rsetaddr	= 5,
+	Rgetdesc	= 6,
+
+	/* device states */
+	Dconfig	 = 0,		/* configuration in progress */
+	Denabled,		/* address assigned */
+	Ddetach,		/* device is detached */
+	Dreset,			/* its port is being reset */
+
+	/* (root) Hub reply to port status (reported to usbd) */
+	HPpresent	= 0x1,
+	HPenable	= 0x2,
+	HPsuspend	= 0x4,
+	HPovercurrent	= 0x8,
+	HPreset		= 0x10,
+	HPpower		= 0x100,
+	HPslow		= 0x200,
+	HPhigh		= 0x400,
+	HPstatuschg	= 0x10000,
+	HPchange	= 0x20000,
+};
+
+/*
+ * Services provided by the driver.
+ * epopen allocates hardware structures to prepare the endpoint
+ * for I/O. This happens when the user opens the data file.
+ * epclose releases them. This happens when the data file is closed.
+ * epwrite tries to write the given bytes, waiting until all of them
+ * have been written (or failed) before returning; but not for Iso.
+ * epread does the same for reading.
+ * It can be assumed that endpoints are DMEXCL but concurrent
+ * read/writes may be issued and the controller must take care.
+ * For control endpoints, device-to-host requests must be followed by
+ * a read of the expected length if needed.
+ * The port requests are called when usbd issues commands for root
+ * hubs. Port status must return bits as a hub request would do.
+ * Toggle handling and other details are left for the controller driver
+ * to avoid mixing too much the controller and the comon device.
+ * While an endpoint is closed, its toggles are saved in the Ep struct.
+ */
+struct Hciimpl
+{
+	void	*aux;				/* for controller info */
+	void	(*init)(Hci*);			/* init. controller */
+	void	(*dump)(Hci*);			/* debug */
+	void	(*interrupt)(Ureg*, void*);	/* service interrupt */
+	void	(*epopen)(Ep*);			/* prepare ep. for I/O */
+	void	(*epclose)(Ep*);		/* terminate I/O on ep. */
+	int32_t	(*epread)(Ep*,void*,int32_t);	/* transmit data for ep */
+	int32_t	(*epwrite)(Ep*,void*,int32_t);	/* receive data for ep */
+	char*	(*seprintep)(char*,char*,Ep*);	/* debug */
+	int	(*portenable)(Hci*, int, int);	/* enable/disable port */
+	int	(*portreset)(Hci*, int, int);	/* set/clear port reset */
+	int	(*portstatus)(Hci*, int);	/* get port status */
+	void	(*shutdown)(Hci*);		/* shutdown for reboot */
+	void	(*debug)(Hci*, int);		/* set/clear debug flag */
+};
+
+struct Hci
+{
+	ISAConf;				/* hardware info */
+	int	tbdf;				/* type+busno+devno+funcno */
+	int	ctlrno;				/* controller number */
+	int	nports;				/* number of ports in hub */
+	int	highspeed;
+	Hciimpl;					/* HCI driver  */
+};
+
+/*
+ * USB endpoint.
+ * All endpoints are kept in a global array. The first
+ * block of fields is constant after endpoint creation.
+ * The rest is configuration information given to all controllers.
+ * The first endpoint for a device (known as ep0) represents the
+ * device and is used to configure it and create other endpoints.
+ * Its QLock also protects per-device data in dev.
+ * See Hciimpl for clues regarding how this is used by controllers.
+ */
+struct Ep
+{
+	Ref;			/* one per fid (and per dev ep for ep0s) */
+
+	/* const once inited. */
+	int	idx;		/* index in global eps array */
+	int	nb;		/* endpoint number in device */
+	Hci*	hp;		/* HCI it beint32_ts to */
+	Udev*	dev;		/* device for the endpoint */
+	Ep*	ep0;		/* control endpoint for its device */
+
+	QLock;			/* protect fields below */
+	char*	name;		/* for ep file names at #u/ */
+	int	inuse;		/* endpoint is open */
+	int	mode;		/* OREAD, OWRITE, or ORDWR */
+	int	clrhalt;	/* true if halt was cleared on ep. */
+	int	debug;		/* per endpoint debug flag */
+	char*	info;		/* for humans to read */
+	int32_t	maxpkt;		/* maximum packet size */
+	int	ttype;		/* tranfer type */
+	uint32_t	load;		/* in µs, for a fransfer of maxpkt bytes */
+	void*	aux;		/* for controller specific info */
+	int	rhrepl;		/* fake root hub replies */
+	int	toggle[2];	/* saved toggles (while ep is not in use) */
+	int32_t	pollival;		/* poll interval ([µ]frames; intr/iso) */
+	int32_t	hz;		/* poll frequency (iso) */
+	int32_t	samplesz;	/* sample size (iso) */
+	int	ntds;		/* nb. of Tds per µframe */
+	int	tmout;		/* 0 or timeout for transfers (ms) */
+};
+
+/*
+ * Per-device configuration and cached list of endpoints.
+ * eps[0]->QLock protects it.
+ */
+struct Udev
+{
+	int	nb;		/* USB device number */
+	int	state;		/* state for the device */
+	int	ishub;		/* hubs can allocate devices */
+	int	isroot;		/* is a root hub */
+	int	speed;		/* Full/Low/High/No -speed */
+	int	hub;		/* dev number for the parent hub */
+	int	port;		/* port number in the parent hub */
+	Ep*	eps[Ndeveps];	/* end points for this device (cached) */
+};
+
+void	addhcitype(char *type, int (*reset)(Hci*));
+
+extern char *usbmodename[];
+extern char Estalled[];
+
+extern char *seprintdata(char*,char*,unsigned char*,int);

+ 3250 - 0
sys/src/9/port/usbehci.c

@@ -0,0 +1,3250 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * USB Enhanced Host Controller Interface (EHCI) driver
+ * High speed USB 2.0.
+ *
+ * Note that all of our unlock routines call coherence.
+ *
+ * BUGS:
+ * - Too many delays and ilocks.
+ * - bandwidth admission control must be done per-frame.
+ * - requires polling (some controllers miss interrupts).
+ * - must warn of power overruns.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"../port/usb.h"
+#include	"../port/portusbehci.h"
+#include	"usbehci.h"
+#include	"uncached.h"
+
+#define diprint		if(ehcidebug || iso->debug)print
+#define ddiprint	if(ehcidebug>1 || iso->debug>1)print
+#define dqprint		if(ehcidebug || (qh->io && qh->io->debug))print
+#define ddqprint	if(ehcidebug>1 || (qh->io && qh->io->debug>1))print
+
+#define TRUNC(x, sz)	((x) & ((sz)-1))
+#define LPTR(q)		((uint32_t*)KADDR((q) & ~0x1F))
+
+typedef struct Ctlio Ctlio;
+typedef union Ed Ed;
+typedef struct Edpool Edpool;
+typedef struct Itd Itd;
+typedef struct Qio Qio;
+typedef struct Qtd Qtd;
+typedef struct Sitd Sitd;
+typedef struct Td Td;
+
+/*
+ * EHCI interface registers and bits
+ */
+enum
+{
+	/* Queue states (software) */
+	Qidle		= 0,
+	Qinstall,
+	Qrun,
+	Qdone,
+	Qclose,
+	Qfree,
+
+	Enabledelay	= 100,		/* waiting for a port to enable */
+	Abortdelay	= 5,		/* delay after cancelling Tds (ms) */
+
+	Incr		= 64,		/* for pools of Tds, Qhs, etc. */
+	Align		= 128,		/* in bytes for all those descriptors */
+
+	/* Keep them as a power of 2, lower than ctlr->nframes */
+	/* Also, keep Nisoframes >= Nintrleafs */
+	Nintrleafs	= 32,		/* nb. of leaf frames in intr. tree */
+	Nisoframes	= 64,		/* nb. of iso frames (in window) */
+
+	/*
+	 * HW constants
+	 */
+
+	/* Itd bits (csw[]) */
+	Itdactive	= 0x80000000,	/* execution enabled */
+	Itddberr	= 0x40000000,	/* data buffer error */
+	Itdbabble	= 0x20000000,	/* babble error */
+	Itdtrerr	= 0x10000000,	/* transaction error */
+	Itdlenshift	= 16,		/* transaction length */
+	Itdlenmask	= 0xFFF,
+	Itdioc		= 0x00008000,	/* interrupt on complete */
+	Itdpgshift	= 12,		/* page select field */
+	Itdoffshift	= 0,		/* transaction offset */
+	/* Itd bits, buffer[] */
+	Itdepshift	= 8,		/* endpoint address (buffer[0]) */
+	Itddevshift	= 0,		/* device address (buffer[0]) */
+	Itdin		= 0x800,	/* is input (buffer[1]) */
+	Itdout		= 0,
+	Itdmaxpktshift	= 0,		/* max packet (buffer[1]) */
+	Itdntdsshift	= 0,		/* nb. of tds per µframe (buffer[2]) */
+
+	Itderrors	= Itddberr|Itdbabble|Itdtrerr,
+
+	/* Sitd bits (epc) */
+	Stdin		= 0x80000000,	/* input direction */
+	Stdportshift	= 24,		/* hub port number */
+	Stdhubshift	= 16,		/* hub address */
+	Stdepshift	= 8,		/* endpoint address */
+	Stddevshift	= 0,		/* device address */
+	/* Sitd bits (mfs) */
+	Stdssmshift	= 0,		/* split start mask */
+	Stdscmshift	= 8,		/* split complete mask */
+	/* Sitd bits (csw) */
+	Stdioc		= 0x80000000,	/* interrupt on complete */
+	Stdpg		= 0x40000000,	/* page select */
+	Stdlenshift	= 16,		/* total bytes to transfer */
+	Stdlenmask	= 0x3FF,
+	Stdactive	= 0x00000080,	/* active */
+	Stderr		= 0x00000040,	/* tr. translator error */
+	Stddberr	= 0x00000020,	/* data buffer error */
+	Stdbabble	= 0x00000010,	/* babble error */
+	Stdtrerr	= 0x00000008,	/* transaction error */
+	Stdmmf		= 0x00000004,	/* missed µframe */
+	Stddcs		= 0x00000002,	/* do complete split */
+
+	Stderrors	= Stderr|Stddberr|Stdbabble|Stdtrerr|Stdmmf,
+
+	/* Sitd bits buffer[1] */
+	Stdtpall	= 0x00000000,	/* all payload here (188 bytes) */
+	Stdtpbegin	= 0x00000008,	/* first payload for fs trans. */
+	Stdtcntmask	= 0x00000007,	/* T-count */
+
+	/* Td bits (csw) */
+	Tddata1		= 0x80000000,	/* data toggle 1 */
+	Tddata0		= 0x00000000,	/* data toggle 0 */
+	Tdlenshift	= 16,		/* total bytes to transfer */
+	Tdlenmask	= 0x7FFF,
+	Tdmaxpkt	= 0x5000,	/* max buffer for a Td */
+	Tdioc		= 0x00008000,	/* interrupt on complete */
+	Tdpgshift	= 12,		/* current page */
+	Tdpgmask	= 7,
+	Tderr1		= 0x00000400,	/* bit 0 of error counter */
+	Tderr2		= 0x00000800,	/* bit 1 of error counter */
+	Tdtokout	= 0x00000000,	/* direction out */
+	Tdtokin		= 0x00000100,	/* direction in */
+	Tdtoksetup	= 0x00000200,	/* setup packet */
+	Tdtok		= 0x00000300,	/* token bits */
+	Tdactive		= 0x00000080,	/* active */
+	Tdhalt		= 0x00000040,	/* halted */
+	Tddberr		= 0x00000020,	/* data buffer error */
+	Tdbabble	= 0x00000010,	/* babble error */
+	Tdtrerr		= 0x00000008,	/* transaction error */
+	Tdmmf		= 0x00000004,	/* missed µframe */
+	Tddcs		= 0x00000002,	/* do complete split */
+	Tdping		= 0x00000001,	/* do ping */
+
+	Tderrors	= Tdhalt|Tddberr|Tdbabble|Tdtrerr|Tdmmf,
+
+	/* Qh bits (eps0) */
+	Qhrlcmask	= 0xF,		/* nak reload count */
+	Qhrlcshift	= 28,		/* nak reload count */
+	Qhnhctl		= 0x08000000,	/* not-high speed ctl */
+	Qhmplmask	= 0x7FF,	/* max packet */
+	Qhmplshift	= 16,
+	Qhhrl		= 0x00008000,	/* head of reclamation list */
+	Qhdtc		= 0x00004000,	/* data toggle ctl. */
+	Qhint		= 0x00000080,	/* inactivate on next transition */
+	Qhspeedmask	= 0x00003000,	/* speed bits */
+	Qhfull		= 0x00000000,	/* full speed */
+	Qhlow		= 0x00001000,	/* low speed */
+	Qhhigh		= 0x00002000,	/* high speed */
+
+	/* Qh bits (eps1) */
+	Qhmultshift	= 30,		/* multiple tds per µframe */
+	Qhmultmask	= 3,
+	Qhportshift	= 23,		/* hub port number */
+	Qhhubshift	= 16,		/* hub address */
+	Qhscmshift	= 8,		/* split completion mask bits */
+	Qhismshift	= 0,		/* interrupt sched. mask bits */
+};
+
+/*
+ * Endpoint tree (software)
+ */
+struct Qtree
+{
+	int	nel;
+	int	depth;
+	uint32_t*	bw;
+	Qh**	root;
+};
+
+/*
+ * One per endpoint per direction, to control I/O.
+ */
+struct Qio
+{
+	QLock;			/* for the entire I/O process */
+	Rendez;			/* wait for completion */
+	Qh*	qh;		/* Td list (field const after init) */
+	int	usbid;		/* usb address for endpoint/device */
+	int	toggle;		/* Tddata0/Tddata1 */
+	int	tok;		/* Tdtoksetup, Tdtokin, Tdtokout */
+	uint32_t	iotime;		/* last I/O time; to hold interrupt polls */
+	int	debug;		/* debug flag from the endpoint */
+	char*	err;		/* error string */
+	char*	tag;		/* debug (no room in Qh for this) */
+	uint32_t	bw;
+};
+
+struct Ctlio
+{
+	Qio;			/* a single Qio for each RPC */
+	unsigned char*	data;		/* read from last ctl req. */
+	int	ndata;		/* number of bytes read */
+};
+
+struct Isoio
+{
+	QLock;
+	Rendez;			/* wait for space/completion/errors */
+	int	usbid;		/* address used for device/endpoint */
+	int	tok;		/* Tdtokin or Tdtokout */
+	int	state;		/* Qrun -> Qdone -> Qrun... -> Qclose */
+	int	nframes;	/* number of frames ([S]Itds) used */
+	unsigned char*	data;		/* iso data buffers if not embedded */
+	char*	err;		/* error string */
+	int	nerrs;		/* nb of consecutive I/O errors */
+	uint32_t	maxsize;	/* ntds * ep->maxpkt */
+	int32_t	nleft;		/* number of bytes left from last write */
+	int	debug;		/* debug flag from the endpoint */
+	int	hs;		/* is high speed? */
+	Isoio*	next;		/* in list of active Isoios */
+	uint32_t	td0frno;	/* first frame used in ctlr */
+	union{
+		Itd*	tdi;	/* next td processed by interrupt */
+		Sitd*	stdi;
+	};
+	union{
+		Itd*	tdu;	/* next td for user I/O in tdps */
+		Sitd*	stdu;
+	};
+	union{
+		Itd**	itdps;	/* itdps[i]: ptr to Itd for i-th frame or nil */
+		Sitd**	sitdps;	/* sitdps[i]: ptr to Sitd for i-th frame or nil */
+		uint32_t**	tdps;	/* same thing, as seen by hw */
+	};
+};
+
+struct Edpool
+{
+	Lock;
+	Ed*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+/*
+ * We use the 64-bit version for Itd, Sitd, Td, and Qh.
+ * If the ehci is 64-bit capable it assumes we are using those
+ * structures even when the system is 32 bits.
+ */
+
+/*
+ * Iso transfer descriptor.  hw: 92 bytes, 108 bytes total
+ * aligned to 32.
+ */
+struct Itd
+{
+	uint32_t	link;		/* to next hw struct */
+	uint32_t	csw[8];		/* sts/length/pg/off. updated by hw */
+	uint32_t	buffer[7];	/* buffer pointers, addrs, maxsz */
+	uint32_t	xbuffer[7];	/* high 32 bits of buffer for 64-bits */
+
+	uint32_t	_pad0;		/* pad to next cache line */
+	/* cache-line boundary here */
+
+	/* software */
+	Itd*	next;
+	uint32_t	ndata;		/* number of bytes in data */
+	uint32_t	mdata;		/* max number of bytes in data */
+	unsigned char*	data;
+};
+
+/*
+ * Split transaction iso transfer descriptor.
+ * hw: 36 bytes, 52 bytes total. aligned to 32.
+ */
+struct Sitd
+{
+	uint32_t	link;		/* to next hw struct */
+	uint32_t	epc;		/* static endpoint state. addrs */
+	uint32_t	mfs;		/* static endpoint state. µ-frame sched. */
+	uint32_t	csw;		/* transfer state. updated by hw */
+	uint32_t	buffer[2];	/* buf. ptr/offset. offset updated by hw */
+				/* buf ptr/TP/Tcnt. TP/Tcnt updated by hw */
+	uint32_t	blink;		/* back pointer */
+	/* cache-line boundary after xbuffer[0] */
+	uint32_t	xbuffer[2];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Sitd*	next;
+	uint32_t	ndata;		/* number of bytes in data */
+	uint32_t	mdata;		/* max number of bytes in data */
+	unsigned char*	data;
+};
+
+/*
+ * Queue element transfer descriptor.
+ * hw: first 52 bytes, total 68+sbuff bytes.  aligned to 32 bytes.
+ */
+struct Td
+{
+	uint32_t	nlink;		/* to next Td */
+	uint32_t	alink;		/* alternate link to next Td */
+	uint32_t	csw;		/* cmd/sts. updated by hw */
+	uint32_t	buffer[5];	/* buf ptrs. offset updated by hw */
+	/* cache-line boundary here */
+	uint32_t	xbuffer[5];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Td*	next;		/* in qh or Isoio or free list */
+	uint32_t	ndata;		/* bytes available/used at data */
+	unsigned char*	data;		/* pointer to actual data */
+	unsigned char*	buff;		/* allocated data buffer or nil */
+	unsigned char	sbuff[1];	/* first byte of embedded buffer */
+};
+
+/*
+ * Queue head. Aligned to 32 bytes.
+ * hw: first 68 bytes, 92 total.
+ */
+struct Qh
+{
+	uint32_t	link;		/* to next Qh in round robin */
+	uint32_t	eps0;		/* static endpoint state. addrs */
+	uint32_t	eps1;		/* static endpoint state. µ-frame sched. */
+
+	/* updated by hw */
+	uint32_t	tclink;		/* current Td (No Term bit here!) */
+	uint32_t	nlink;		/* to next Td */
+	uint32_t	alink;		/* alternate link to next Td */
+	uint32_t	csw;		/* cmd/sts. updated by hw */
+	/* cache-line boundary after buffer[0] */
+	uint32_t	buffer[5];	/* buf ptrs. offset updated by hw */
+	uint32_t	xbuffer[5];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Qh*	next;		/* in controller list/tree of Qhs */
+	int	state;		/* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */
+	Qio*	io;		/* for this queue */
+	Td*	tds;		/* for this queue */
+	int	sched;		/* slot for for intr. Qhs */
+	Qh*	inext;		/* next in list of intr. qhs */
+};
+
+/*
+ * We can avoid frame span traversal nodes if we don't span frames.
+ * Just schedule transfers that can fit on the current frame and
+ * wait a little bit otherwise.
+ */
+
+/*
+ * Software. Ehci descriptors provided by pool.
+ * There are soo few because we avoid using Fstn.
+ */
+union Ed
+{
+	Ed*	next;		/* in free list */
+	Qh	qh;
+	Td	td;
+	Itd	itd;
+	Sitd	sitd;
+	unsigned char	align[Align];
+};
+
+int ehcidebug = 0;
+
+static Edpool edpool;
+//static char Ebug[] = "not yet implemented";
+static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" };
+
+Ecapio* ehcidebugcapio;
+int ehcidebugport;
+
+void
+ehcirun(Ctlr *ctlr, int on)
+{
+	int i;
+	Eopio *opio;
+
+	ddprint("ehci %#p %s\n", ctlr->capio, on ? "starting" : "halting");
+	opio = ctlr->opio;
+	if(on)
+		opio->cmd |= Crun;
+	else
+		opio->cmd = Cstop;
+	coherence();
+	for(i = 0; i < 100; i++)
+		if(on == 0 && (opio->sts & Shalted) != 0)
+			break;
+		else if(on != 0 && (opio->sts & Shalted) == 0)
+			break;
+		else
+			delay(1);
+	if(i == 100)
+		print("ehci %#p %s cmd timed out\n",
+			ctlr->capio, on ? "run" : "halt");
+	ddprint("ehci %#p cmd %#lux sts %#lux\n",
+		ctlr->capio, opio->cmd, opio->sts);
+}
+
+static void*
+edalloc(void)
+{
+	Ed *ed, *pool;
+	int i;
+
+	lock(&edpool);
+	if(edpool.free == nil){
+		pool = xspanalloc(Incr*sizeof(Ed), Align, 0);
+		if(pool == nil)
+			panic("edalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = edpool.free;
+			edpool.free = &pool[i];
+		}
+		edpool.nalloc += Incr;
+		edpool.nfree += Incr;
+		dprint("ehci: edalloc: %d eds\n", edpool.nalloc);
+	}
+	ed = edpool.free;
+	edpool.free = ed->next;
+	edpool.ninuse++;
+	edpool.nfree--;
+	unlock(&edpool);
+
+	memset(ed, 0, sizeof(Ed));	/* safety */
+	assert(((uint64_t)ed & 0xF) == 0);
+	return ed;
+}
+
+static void
+edfree(void *a)
+{
+	Ed *ed;
+
+	ed = a;
+	lock(&edpool);
+	ed->next = edpool.free;
+	edpool.free = ed;
+	edpool.ninuse--;
+	edpool.nfree++;
+	unlock(&edpool);
+}
+
+/*
+ * Allocate and do some initialization.
+ * Free after releasing buffers used.
+ */
+
+static Itd*
+itdalloc(void)
+{
+	Itd *td;
+
+	td = edalloc();
+	td->link = Lterm;
+	return td;
+}
+
+static void
+itdfree(Itd *td)
+{
+	edfree(td);
+}
+
+static Sitd*
+sitdalloc(void)
+{
+	Sitd *td;
+
+	td = edalloc();
+	td->link = td->blink = Lterm;
+	return td;
+}
+
+static void
+sitdfree(Sitd *td)
+{
+	edfree(td);
+}
+
+static Td*
+tdalloc(void)
+{
+	Td *td;
+
+	td = edalloc();
+	td->nlink = td->alink = Lterm;
+	return td;
+}
+
+static void
+tdfree(Td *td)
+{
+	if(td == nil)
+		return;
+	free(td->buff);
+	edfree(td);
+}
+
+static void
+tdlinktd(Td *td, Td *next)
+{
+	td->next = next;
+	td->alink = Lterm;
+	if(next == nil)
+		td->nlink = Lterm;
+	else
+		td->nlink = PADDR(next);
+	coherence();
+}
+
+static Qh*
+qhlinkqh(Qh *qh, Qh *next)
+{
+	qh->next = next;
+	if(next == nil)
+		qh->link = Lterm;
+	else
+		qh->link = PADDR(next)|Lqh;
+	coherence();
+	return qh;
+}
+
+static void
+qhsetaddr(Qh *qh, uint32_t addr)
+{
+	uint32_t eps0;
+
+	eps0 = qh->eps0 & ~((Epmax<<8)|Devmax);
+	qh->eps0 = eps0 | addr & Devmax | ((addr >> 7) & Epmax) << 8;
+	coherence();
+}
+
+/*
+ * return largest power of 2 <= n
+ */
+static int
+flog2lower(int n)
+{
+	int i;
+
+	for(i = 0; (1 << (i + 1)) <= n; i++)
+		;
+	return i;
+}
+
+static int
+pickschedq(Qtree *qt, int pollival, uint32_t bw, uint32_t limit)
+{
+	int i, j, d, upperb, q;
+	uint32_t best, worst, total;
+
+	d = flog2lower(pollival);
+	if(d > qt->depth)
+		d = qt->depth;
+	q = -1;
+	worst = 0;
+	best = ~0;
+	upperb = (1 << (d+1)) - 1;
+	for(i = (1 << d) - 1; i < upperb; i++){
+		total = qt->bw[0];
+		for(j = i; j > 0; j = (j - 1) / 2)
+			total += qt->bw[j];
+		if(total < best){
+			best = total;
+			q = i;
+		}
+		if(total > worst)
+			worst = total;
+	}
+	if(worst + bw >= limit)
+		return -1;
+	return q;
+}
+
+static int
+schedq(Ctlr *ctlr, Qh *qh, int pollival)
+{
+	int q;
+	Qh *tqh;
+	uint32_t bw;
+
+	bw = qh->io->bw;
+	q = pickschedq(ctlr->tree, pollival, 0, ~0);
+	ddqprint("ehci: sched %#p q %d, ival %d, bw %uld\n",
+		qh->io, q, pollival, bw);
+	if(q < 0){
+		print("ehci: no room for ed\n");
+		return -1;
+	}
+	ctlr->tree->bw[q] += bw;
+	tqh = ctlr->tree->root[q];
+	qh->sched = q;
+	qhlinkqh(qh, tqh->next);
+	qhlinkqh(tqh, qh);
+	coherence();
+	qh->inext = ctlr->intrqhs;
+	ctlr->intrqhs = qh;
+	coherence();
+	return 0;
+}
+
+static void
+unschedq(Ctlr *ctlr, Qh *qh)
+{
+	int q;
+	Qh *prev, *this, *next;
+	Qh **l;
+	uint32_t bw;
+
+	bw = qh->io->bw;
+	q = qh->sched;
+	if(q < 0)
+		return;
+	ctlr->tree->bw[q] -= bw;
+
+	prev = ctlr->tree->root[q];
+	this = prev->next;
+	while(this != nil && this != qh){
+		prev = this;
+		this = this->next;
+	}
+	if(this == nil)
+		print("ehci: unschedq %d: not found\n", q);
+	else{
+		next = this->next;
+		qhlinkqh(prev, next);
+	}
+	for(l = &ctlr->intrqhs; *l != nil; l = &(*l)->inext)
+		if(*l == qh){
+			*l = (*l)->inext;
+			return;
+		}
+	print("ehci: unschedq: qh %#p not found\n", qh);
+}
+
+static uint32_t
+qhmaxpkt(Qh *qh)
+{
+	return (qh->eps0 >> Qhmplshift) & Qhmplmask;
+}
+
+static void
+qhsetmaxpkt(Qh *qh, int maxpkt)
+{
+	uint32_t eps0;
+
+	eps0 = qh->eps0 & ~(Qhmplmask << Qhmplshift);
+	qh->eps0 = eps0 | (maxpkt & Qhmplmask) << Qhmplshift;
+	coherence();
+}
+
+/*
+ * Initialize the round-robin circular list of ctl/bulk Qhs
+ * if ep is nil. Otherwise, allocate and link a new Qh in the ctlr.
+ */
+static Qh*
+qhalloc(Ctlr *ctlr, Ep *ep, Qio *io, char* tag)
+{
+	Qh *qh;
+	int ttype;
+
+	qh = edalloc();
+	qh->nlink = Lterm;
+	qh->alink = Lterm;
+	qh->csw = Tdhalt;
+	qh->state = Qidle;
+	qh->sched = -1;
+	qh->io = io;
+	if(ep != nil){
+		qh->eps0 = 0;
+		qhsetmaxpkt(qh, ep->maxpkt);
+		if(ep->dev->speed == Lowspeed)
+			qh->eps0 |= Qhlow;
+		if(ep->dev->speed == Highspeed)
+			qh->eps0 |= Qhhigh;
+		else if(ep->ttype == Tctl)
+			qh->eps0 |= Qhnhctl;
+		qh->eps0 |= Qhdtc | 8 << Qhrlcshift;	/* 8 naks max */
+		coherence();
+		qhsetaddr(qh, io->usbid);
+		qh->eps1 = (ep->ntds & Qhmultmask) << Qhmultshift;
+		qh->eps1 |= ep->dev->port << Qhportshift;
+		qh->eps1 |= ep->dev->hub << Qhhubshift;
+		qh->eps1 |= 034 << Qhscmshift;
+		if(ep->ttype == Tintr)
+			qh->eps1 |= 1 << Qhismshift;	/* intr. start µf. */
+		coherence();
+		if(io != nil)
+			io->tag = tag;
+	}
+	ilock(ctlr);
+	ttype = Tctl;
+	if(ep != nil)
+		ttype = ep->ttype;
+	switch(ttype){
+	case Tctl:
+	case Tbulk:
+		if(ctlr->qhs == nil){
+			ctlr->qhs = qhlinkqh(qh, qh);
+			qh->eps0 |= Qhhigh | Qhhrl;
+			coherence();
+			ctlr->opio->link = PADDR(qh)|Lqh;
+			coherence();
+		}else{
+			qhlinkqh(qh, ctlr->qhs->next);
+			qhlinkqh(ctlr->qhs, qh);
+		}
+		break;
+	case Tintr:
+		schedq(ctlr, qh, ep->pollival);
+		break;
+	default:
+		print("ehci: qhalloc called for ttype != ctl/bulk\n");
+	}
+	iunlock(ctlr);
+	return qh;
+}
+
+static int
+qhadvanced(void *a)
+{
+	Ctlr *ctlr;
+
+	ctlr = a;
+	return (ctlr->opio->cmd & Ciasync) == 0;
+}
+
+/*
+ * called when a qh is removed, to be sure the hw is not
+ * keeping pointers into it.
+ */
+static void
+qhcoherency(Ctlr *ctlr)
+{
+	Mach *m = machp();
+	int i;
+
+	qlock(&ctlr->portlck);
+	ctlr->opio->cmd |= Ciasync;	/* ask for intr. on async advance */
+	coherence();
+	for(i = 0; i < 3 && qhadvanced(ctlr) == 0; i++)
+		if(!waserror()){
+			tsleep(ctlr, qhadvanced, ctlr, Abortdelay);
+			poperror();
+		}
+	dprint("ehci: qhcoherency: doorbell %d\n", qhadvanced(ctlr));
+	if(i == 3)
+		print("ehci: async advance doorbell did not ring\n");
+	ctlr->opio->cmd &= ~Ciasync;	/* try to clean */
+	qunlock(&ctlr->portlck);
+}
+
+static void
+qhfree(Ctlr *ctlr, Qh *qh)
+{
+	Td *td, *ltd;
+	Qh *q;
+
+	if(qh == nil)
+		return;
+	ilock(ctlr);
+	if(qh->sched < 0){
+		for(q = ctlr->qhs; q != nil; q = q->next)
+			if(q->next == qh)
+				break;
+		if(q == nil)
+			panic("qhfree: nil q");
+		q->next = qh->next;
+		q->link = qh->link;
+		coherence();
+	}else
+		unschedq(ctlr, qh);
+	iunlock(ctlr);
+
+	qhcoherency(ctlr);
+
+	for(td = qh->tds; td != nil; td = ltd){
+		ltd = td->next;
+		tdfree(td);
+	}
+
+	edfree(qh);
+}
+
+static void
+qhlinktd(Qh *qh, Td *td)
+{
+	uint32_t csw;
+	int i;
+
+	csw = qh->csw;
+	qh->tds = td;
+	if(td == nil)
+		qh->csw = (csw & ~Tdactive) | Tdhalt;
+	else{
+		csw &= Tddata1 | Tdping;	/* save */
+		qh->csw = Tdhalt;
+		coherence();
+		qh->tclink = 0;
+		qh->alink = Lterm;
+		qh->nlink = PADDR(td);
+		for(i = 0; i < nelem(qh->buffer); i++)
+			qh->buffer[i] = 0;
+		coherence();
+		qh->csw = csw & ~(Tdhalt|Tdactive);	/* activate next */
+	}
+	coherence();
+}
+
+static char*
+seprintlink(char *s, char *se, char *name, uint32_t l, int typed)
+{
+	s = seprint(s, se, "%s %ulx", name, l);
+	if((l & Lterm) != 0)
+		return seprint(s, se, "T");
+	if(typed == 0)
+		return s;
+	switch(l & (3<<1)){
+	case Litd:
+		return seprint(s, se, "I");
+	case Lqh:
+		return seprint(s, se, "Q");
+	case Lsitd:
+		return seprint(s, se, "S");
+	default:
+		return seprint(s, se, "F");
+	}
+}
+
+static char*
+seprintitd(char *s, char *se, Itd *td)
+{
+	int i;
+	uint32_t b0, b1;
+	char flags[6];
+	char *rw;
+
+	if(td == nil)
+		return seprint(s, se, "<nil itd>\n");
+	b0 = td->buffer[0];
+	b1 = td->buffer[1];
+
+	s = seprint(s, se, "itd %#p", td);
+	rw = (b1 & Itdin) ? "in" : "out";
+	s = seprint(s, se, " %s ep %uld dev %uld max %uld mult %uld",
+		rw, (b0>>8)&Epmax, (b0&Devmax),
+		td->buffer[1] & 0x7ff, b1 & 3);
+	s = seprintlink(s, se, " link", td->link, 1);
+	s = seprint(s, se, "\n");
+	for(i = 0; i < nelem(td->csw); i++){
+		memset(flags, '-', 5);
+		if((td->csw[i] & Itdactive) != 0)
+			flags[0] = 'a';
+		if((td->csw[i] & Itdioc) != 0)
+			flags[1] = 'i';
+		if((td->csw[i] & Itddberr) != 0)
+			flags[2] = 'd';
+		if((td->csw[i] & Itdbabble) != 0)
+			flags[3] = 'b';
+		if((td->csw[i] & Itdtrerr) != 0)
+			flags[4] = 't';
+		flags[5] = 0;
+		s = seprint(s, se, "\ttd%d %s", i, flags);
+		s = seprint(s, se, " len %uld", (td->csw[i] >> 16) & 0x7ff);
+		s = seprint(s, se, " pg %uld", (td->csw[i] >> 12) & 0x7);
+		s = seprint(s, se, " off %uld\n", td->csw[i] & 0xfff);
+	}
+	s = seprint(s, se, "\tbuffs:");
+	for(i = 0; i < nelem(td->buffer); i++)
+		s = seprint(s, se, " %#lux", td->buffer[i] >> 12);
+	return seprint(s, se, "\n");
+}
+
+static char*
+seprintsitd(char *s, char *se, Sitd *td)
+{
+	char rw, pg, ss;
+	char flags[8];
+	static char pc[4] = { 'a', 'b', 'm', 'e' };
+
+	if(td == nil)
+		return seprint(s, se, "<nil sitd>\n");
+	s = seprint(s, se, "sitd %#p", td);
+	rw = (td->epc & Stdin) ? 'r' : 'w';
+	s = seprint(s, se, " %c ep %uld dev %uld",
+		rw, (td->epc>>8)&0xf, td->epc&0x7f);
+	s = seprint(s, se, " max %uld", (td->csw >> 16) & 0x3ff);
+	s = seprint(s, se, " hub %uld", (td->epc >> 16) & 0x7f);
+	s = seprint(s, se, " port %uld\n", (td->epc >> 24) & 0x7f);
+	memset(flags, '-', 7);
+	if((td->csw & Stdactive) != 0)
+		flags[0] = 'a';
+	if((td->csw & Stdioc) != 0)
+		flags[1] = 'i';
+	if((td->csw & Stderr) != 0)
+		flags[2] = 'e';
+	if((td->csw & Stddberr) != 0)
+		flags[3] = 'd';
+	if((td->csw & Stdbabble) != 0)
+		flags[4] = 'b';
+	if((td->csw & Stdtrerr) != 0)
+		flags[5] = 't';
+	if((td->csw & Stdmmf) != 0)
+		flags[6] = 'n';
+	flags[7] = 0;
+	ss = (td->csw & Stddcs) ? 'c' : 's';
+	pg = (td->csw & Stdpg) ? '1' : '0';
+	s = seprint(s, se, "\t%s %cs pg%c", flags, ss, pg);
+	s = seprint(s, se, " b0 %#lux b1 %#lux off %uld\n",
+		td->buffer[0] >> 12, td->buffer[1] >> 12, td->buffer[0] & 0xfff);
+	s = seprint(s, se, "\ttpos %c tcnt %uld",
+		pc[(td->buffer[0]>>3)&3], td->buffer[1] & 7);
+	s = seprint(s, se, " ssm %#lux csm %#lux cspm %#lux",
+		td->mfs & 0xff, (td->mfs>>8) & 0xff, (td->csw>>8) & 0xff);
+	s = seprintlink(s, se, " link", td->link, 1);
+	s = seprintlink(s, se, " blink", td->blink, 0);
+	return seprint(s, se, "\n");
+}
+
+static int32_t
+maxtdlen(Td *td)
+{
+	return (td->csw >> Tdlenshift) & Tdlenmask;
+}
+
+static int32_t
+tdlen(Td *td)
+{
+	if(td->data == nil)
+		return 0;
+	return td->ndata - maxtdlen(td);
+}
+
+static char*
+seprinttd(char *s, char *se, Td *td, char *tag)
+{
+	int i;
+	char t, ss;
+	char flags[9];
+	static char *tok[4] = { "out", "in", "setup", "BUG" };
+
+	if(td == nil)
+		return seprint(s, se, "%s <nil td>\n", tag);
+	s = seprint(s, se, "%s %#p", tag, td);
+	s = seprintlink(s, se, " nlink", td->nlink, 0);
+	s = seprintlink(s, se, " alink", td->alink, 0);
+	s = seprint(s, se, " %s", tok[(td->csw & Tdtok) >> 8]);
+	if((td->csw & Tdping) != 0)
+		s = seprint(s, se, " png");
+	memset(flags, '-', 8);
+	if((td->csw & Tdactive) != 0)
+		flags[0] = 'a';
+	if((td->csw & Tdioc) != 0)
+		flags[1] = 'i';
+	if((td->csw & Tdhalt) != 0)
+		flags[2] = 'h';
+	if((td->csw & Tddberr) != 0)
+		flags[3] = 'd';
+	if((td->csw & Tdbabble) != 0)
+		flags[4] = 'b';
+	if((td->csw & Tdtrerr) != 0)
+		flags[5] = 't';
+	if((td->csw & Tdmmf) != 0)
+		flags[6] = 'n';
+	if((td->csw & (Tderr2|Tderr1)) == 0)
+		flags[7] = 'z';
+	flags[8] = 0;
+	t = (td->csw & Tddata1) ? '1' : '0';
+	ss = (td->csw & Tddcs) ? 'c' : 's';
+	s = seprint(s, se, "\n\td%c %s %cs", t, flags, ss);
+	s = seprint(s, se, " max %uld", maxtdlen(td));
+	s = seprint(s, se, " pg %uld off %#lux\n",
+		(td->csw >> Tdpgshift) & Tdpgmask, td->buffer[0] & 0xFFF);
+	s = seprint(s, se, "\tbuffs:");
+	for(i = 0; i < nelem(td->buffer); i++)
+		s = seprint(s, se, " %#lux", td->buffer[i]>>12);
+	if(td->data != nil)
+		s = seprintdata(s, se, td->data, td->ndata);
+	return seprint(s, se, "\n");
+}
+
+static void
+dumptd(Td *td, char *pref)
+{
+	char buf[256];
+	char *se;
+	int i;
+
+	i = 0;
+	se = buf+sizeof(buf);
+	for(; td != nil; td = td->next){
+		seprinttd(buf, se, td, pref);
+		print("%s", buf);
+		if(i++ > 20){
+			print("...more tds...\n");
+			break;
+		}
+	}
+}
+
+static void
+qhdump(Qh *qh)
+{
+	char buf[256];
+	char *s, *se, *tag;
+	Td td;
+	static char *speed[] = {"full", "low", "high", "BUG"};
+
+	if(qh == nil){
+		print("<nil qh>\n");
+		return;
+	}
+	if(qh->io == nil)
+		tag = "qh";
+	else
+		tag = qh->io->tag;
+	se = buf+sizeof(buf);
+	s = seprint(buf, se, "%s %#p", tag, qh);
+	s = seprint(s, se, " ep %uld dev %uld",
+		(qh->eps0>>8)&0xf, qh->eps0&0x7f);
+	s = seprint(s, se, " hub %uld", (qh->eps1 >> 16) & 0x7f);
+	s = seprint(s, se, " port %uld", (qh->eps1 >> 23) & 0x7f);
+	s = seprintlink(s, se, " link", qh->link, 1);
+	seprint(s, se, "  clink %#lux", qh->tclink);
+	print("%s\n", buf);
+	s = seprint(buf, se, "\tnrld %uld", (qh->eps0 >> Qhrlcshift) & Qhrlcmask);
+	s = seprint(s, se, " nak %uld", (qh->alink >> 1) & 0xf);
+	s = seprint(s, se, " max %uld ", qhmaxpkt(qh));
+	if((qh->eps0 & Qhnhctl) != 0)
+		s = seprint(s, se, "c");
+	if((qh->eps0 & Qhhrl) != 0)
+		s = seprint(s, se, "h");
+	if((qh->eps0 & Qhdtc) != 0)
+		s = seprint(s, se, "d");
+	if((qh->eps0 & Qhint) != 0)
+		s = seprint(s, se, "i");
+	s = seprint(s, se, " %s", speed[(qh->eps0 >> 12) & 3]);
+	s = seprint(s, se, " mult %uld", (qh->eps1 >> Qhmultshift) & Qhmultmask);
+	seprint(s, se, " scm %#lux ism %#lux\n",
+		(qh->eps1 >> 8 & 0xff), qh->eps1 & 0xff);
+	print("%s\n", buf);
+	memset(&td, 0, sizeof(td));
+	memmove(&td, &qh->nlink, 32);	/* overlay area */
+	seprinttd(buf, se, &td, "\tovl");
+	print("%s", buf);
+}
+
+static void
+isodump(Isoio* iso, int all)
+{
+	Itd *td, *tdi, *tdu;
+	Sitd *std, *stdi, *stdu;
+	char buf[256];
+	int i;
+
+	if(iso == nil){
+		print("<nil iso>\n");
+		return;
+	}
+	print("iso %#p %s %s speed state %d nframes %d maxsz %uld",
+		iso, iso->tok == Tdtokin ? "in" : "out",
+		iso->hs ? "high" : "full",
+		iso->state, iso->nframes, iso->maxsize);
+	print(" td0 %uld tdi %#p tdu %#p data %#p\n",
+		iso->td0frno, iso->tdi, iso->tdu, iso->data);
+	if(iso->err != nil)
+		print("\terr %s\n", iso->err);
+	if(iso->err != nil)
+		print("\terr='%s'\n", iso->err);
+	if(all == 0)
+		if(iso->hs != 0){
+			tdi = iso->tdi;
+			seprintitd(buf, buf+sizeof(buf), tdi);
+			print("\ttdi %s\n", buf);
+			tdu = iso->tdu;
+			seprintitd(buf, buf+sizeof(buf), tdu);
+			print("\ttdu %s\n", buf);
+		}else{
+			stdi = iso->stdi;
+			seprintsitd(buf, buf+sizeof(buf), stdi);
+			print("\tstdi %s\n", buf);
+			stdu = iso->stdu;
+			seprintsitd(buf, buf+sizeof(buf), stdu);
+			print("\tstdu %s\n", buf);
+		}
+	else
+		for(i = 0; i < Nisoframes; i++)
+			if(iso->tdps[i] != nil)
+				if(iso->hs != 0){
+					td = iso->itdps[i];
+					seprintitd(buf, buf+sizeof(buf), td);
+					if(td == iso->tdi)
+						print("i->");
+					if(td == iso->tdu)
+						print("i->");
+					print("[%d]\t%s", i, buf);
+				}else{
+					std = iso->sitdps[i];
+					seprintsitd(buf, buf+sizeof(buf), std);
+					if(std == iso->stdi)
+						print("i->");
+					if(std == iso->stdu)
+						print("u->");
+					print("[%d]\t%s", i, buf);
+				}
+}
+
+static void
+dump(Hci *hp)
+{
+	int i;
+	char *s, *se;
+	char buf[128];
+	Ctlr *ctlr;
+	Eopio *opio;
+	Isoio *iso;
+	Qh *qh;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	ilock(ctlr);
+	print("ehci port %#p frames %#p (%d fr.) nintr %d ntdintr %d",
+		ctlr->capio, ctlr->frames, ctlr->nframes,
+		ctlr->nintr, ctlr->ntdintr);
+	print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr);
+	print("\tcmd %#lux sts %#lux intr %#lux frno %uld",
+		opio->cmd, opio->sts, opio->intr, opio->frno);
+	print(" base %#lux link %#lux fr0 %#lux\n",
+		opio->frbase, opio->link, ctlr->frames[0]);
+	se = buf+sizeof(buf);
+	s = seprint(buf, se, "\t");
+	for(i = 0; i < hp->nports; i++){
+		s = seprint(s, se, "p%d %#lux ", i, opio->portsc[i]);
+		if(hp->nports > 4 && i == hp->nports/2 - 1)
+			s = seprint(s, se, "\n\t");
+	}
+	print("%s\n", buf);
+	qh = ctlr->qhs;
+	i = 0;
+	do{
+		qhdump(qh);
+		qh = qh->next;
+	}while(qh != ctlr->qhs && i++ < 100);
+	if(i > 100)
+		print("...too many Qhs...\n");
+	if(ctlr->intrqhs != nil)
+		print("intr qhs:\n");
+	for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext)
+		qhdump(qh);
+	if(ctlr->iso != nil)
+		print("iso:\n");
+	for(iso = ctlr->iso; iso != nil; iso = iso->next)
+		isodump(ctlr->iso, 0);
+	print("%d eds in tree\n", ctlr->ntree);
+	iunlock(ctlr);
+	lock(&edpool);
+	print("%d eds allocated = %d in use + %d free\n",
+		edpool.nalloc, edpool.ninuse, edpool.nfree);
+	unlock(&edpool);
+}
+
+static char*
+errmsg(int err)
+{
+	if(err == 0)
+		return "ok";
+	if(err & Tddberr)
+		return "data buffer error";
+	if(err & Tdbabble)
+		return "babble detected";
+	if(err & Tdtrerr)
+		return "transaction error";
+	if(err & Tdmmf)
+		return "missed µframe";
+	if(err & Tdhalt)
+		return Estalled;	/* [uo]hci report this error */
+	return Eio;
+}
+
+static char*
+ierrmsg(int err)
+{
+	if(err == 0)
+		return "ok";
+	if(err & Itddberr)
+		return "data buffer error";
+	if(err & Itdbabble)
+		return "babble detected";
+	if(err & Itdtrerr)
+		return "transaction error";
+	return Eio;
+}
+
+static char*
+serrmsg(int err)
+{
+	if(err & Stderr)
+		return "translation translator error";
+	/* other errors have same numbers than Td errors */
+	return errmsg(err);
+}
+
+static int
+isocanread(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	if(iso->state == Qclose)
+		return 1;
+	if(iso->state == Qrun && iso->tok == Tdtokin){
+		if(iso->hs != 0 && iso->tdi != iso->tdu)
+			return 1;
+		if(iso->hs == 0 && iso->stdi != iso->stdu)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+isocanwrite(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	if(iso->state == Qclose)
+		return 1;
+	if(iso->state == Qrun && iso->tok == Tdtokout){
+		if(iso->hs != 0 && iso->tdu->next != iso->tdi)
+			return 1;
+		if(iso->hs == 0 && iso->stdu->next != iso->stdi)
+			return 1;
+	}
+	return 0;
+}
+
+static void
+itdinit(Isoio *iso, Itd *td)
+{
+	int p, t;
+	uint32_t pa, tsize, size;
+
+	/*
+	 * BUG: This does not put an integral number of samples
+	 * on each µframe unless samples per packet % 8 == 0
+	 * Also, all samples are packed early on each frame.
+	 */
+	p = 0;
+	size = td->ndata = td->mdata;
+	pa = PADDR(td->data);
+	for(t = 0; size > 0 && t < 8; t++){
+		tsize = size;
+		if(tsize > iso->maxsize)
+			tsize = iso->maxsize;
+		size -= tsize;
+		assert(p < nelem(td->buffer));
+		td->csw[t] = tsize << Itdlenshift | p << Itdpgshift |
+			(pa & 0xFFF) << Itdoffshift | Itdactive | Itdioc;
+		coherence();
+		if(((pa+tsize) & ~0xFFF) != (pa & ~0xFFF))
+			p++;
+		pa += tsize;
+	}
+}
+
+static void
+sitdinit(Isoio *iso, Sitd *td)
+{
+	td->ndata = td->mdata & Stdlenmask;
+	td->buffer[0] = PADDR(td->data);
+	td->buffer[1] = (td->buffer[0] & ~0xFFF) + 0x1000;
+	if(iso->tok == Tdtokin || td->ndata <= 188)
+		td->buffer[1] |= Stdtpall;
+	else
+		td->buffer[1] |= Stdtpbegin;
+	if(iso->tok == Tdtokin)
+		td->buffer[1] |= 1;
+	else
+		td->buffer[1] |= ((td->ndata + 187) / 188) & Stdtcntmask;
+	coherence();
+	td->csw = td->ndata << Stdlenshift | Stdactive | Stdioc;
+	coherence();
+}
+
+static int
+itdactive(Itd *td)
+{
+	int i;
+
+	for(i = 0; i < nelem(td->csw); i++)
+		if((td->csw[i] & Itdactive) != 0)
+			return 1;
+	return 0;
+}
+
+static int
+isohsinterrupt(Ctlr *ctlr, Isoio *iso)
+{
+	int err, i, nframes, t;
+	Itd *tdi;
+
+	tdi = iso->tdi;
+	assert(tdi != nil);
+	if(itdactive(tdi))			/* not all tds are done */
+		return 0;
+	ctlr->nisointr++;
+	ddiprint("isohsintr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu);
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("isofsintr: iso state");
+	if(ehcidebug > 1 || iso->debug > 1)
+		isodump(iso, 0);
+
+	nframes = iso->nframes / 2;		/* limit how many we look */
+	if(nframes > Nisoframes)
+		nframes = Nisoframes;
+
+	if(iso->tok == Tdtokin)
+		tdi->ndata = 0;
+	/* else, it has the number of bytes transferred */
+
+	for(i = 0; i < nframes && itdactive(tdi) == 0; i++){
+		if(iso->tok == Tdtokin)
+			tdi->ndata += (tdi->csw[i] >> Itdlenshift) & Itdlenmask;
+		err = 0;
+		coherence();
+		for(t = 0; t < nelem(tdi->csw); t++){
+			tdi->csw[t] &= ~Itdioc;
+			coherence();
+			err |= tdi->csw[t] & Itderrors;
+		}
+		if(err == 0)
+			iso->nerrs = 0;
+		else if(iso->nerrs++ > iso->nframes/2){
+			if(iso->err == nil){
+				iso->err = ierrmsg(err);
+				diprint("isohsintr: tdi %#p error %#ux %s\n",
+					tdi, err, iso->err);
+				diprint("ctlr load %uld\n", ctlr->load);
+			}
+			tdi->ndata = 0;
+		}else
+			tdi->ndata = 0;
+		if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){
+			memset(iso->tdu->data, 0, iso->tdu->mdata);
+			itdinit(iso, iso->tdu);
+			iso->tdu = iso->tdu->next;
+			iso->nleft = 0;
+		}
+		tdi = tdi->next;
+		coherence();
+	}
+	ddiprint("isohsintr: %d frames processed\n", nframes);
+	if(i == nframes){
+		tdi->csw[0] |= Itdioc;
+		coherence();
+	}
+	iso->tdi = tdi;
+	coherence();
+	if(isocanwrite(iso) || isocanread(iso)){
+		diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso,
+			iso->tdi, iso->tdu);
+		wakeup(iso);
+	}
+	return 1;
+}
+
+static int
+isofsinterrupt(Ctlr *ctlr, Isoio *iso)
+{
+	int err, i, nframes;
+	Sitd *stdi;
+
+	stdi = iso->stdi;
+	assert(stdi != nil);
+	if((stdi->csw & Stdactive) != 0)		/* nothing new done */
+		return 0;
+	ctlr->nisointr++;
+	ddiprint("isofsintr: iso %#p: tdi %#p tdu %#p\n", iso, stdi, iso->stdu);
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("isofsintr: iso state");
+	if(ehcidebug > 1 || iso->debug > 1)
+		isodump(iso, 0);
+
+	nframes = iso->nframes / 2;		/* limit how many we look */
+	if(nframes > Nisoframes)
+		nframes = Nisoframes;
+
+	for(i = 0; i < nframes && (stdi->csw & Stdactive) == 0; i++){
+		stdi->csw &= ~Stdioc;
+		/* write back csw and see if it produces errors */
+		coherence();
+		err = stdi->csw & Stderrors;
+		if(err == 0){
+			iso->nerrs = 0;
+			if(iso->tok == Tdtokin)
+				stdi->ndata = (stdi->csw>>Stdlenshift)&Stdlenmask;
+			/* else len is assumed correct */
+		}else if(iso->nerrs++ > iso->nframes/2){
+			if(iso->err == nil){
+				iso->err = serrmsg(err);
+				diprint("isofsintr: tdi %#p error %#ux %s\n",
+					stdi, err, iso->err);
+				diprint("ctlr load %uld\n", ctlr->load);
+			}
+			stdi->ndata = 0;
+		}else
+			stdi->ndata = 0;
+
+		if(stdi->next == iso->stdu || stdi->next->next == iso->stdu){
+			memset(iso->stdu->data, 0, iso->stdu->mdata);
+			coherence();
+			sitdinit(iso, iso->stdu);
+			iso->stdu = iso->stdu->next;
+			iso->nleft = 0;
+		}
+		coherence();
+		stdi = stdi->next;
+	}
+	ddiprint("isofsintr: %d frames processed\n", nframes);
+	if(i == nframes){
+		stdi->csw |= Stdioc;
+		coherence();
+	}
+	iso->stdi = stdi;
+	coherence();
+	if(isocanwrite(iso) || isocanread(iso)){
+		diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso,
+			iso->stdi, iso->stdu);
+		wakeup(iso);
+	}
+	return 1;
+}
+
+static int
+qhinterrupt(Ctlr *ctlr, Qh *qh)
+{
+	Td *td;
+	int err;
+
+	if(qh->state != Qrun)
+		panic("qhinterrupt: qh state");
+	td = qh->tds;
+	if(td == nil)
+		panic("qhinterrupt: no tds");
+	if((td->csw & Tdactive) == 0)
+		ddqprint("qhinterrupt port %#p qh %#p\n", ctlr->capio, qh);
+	for(; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			return 0;
+		err = td->csw & Tderrors;
+		if(err != 0){
+			if(qh->io->err == nil){
+				qh->io->err = errmsg(err);
+				dqprint("qhintr: td %#p csw %#lux error %#ux %s\n",
+					td, td->csw, err, qh->io->err);
+			}
+			break;
+		}
+		td->ndata = tdlen(td);
+		coherence();
+		if(td->ndata < maxtdlen(td)){	/* EOT */
+			td = td->next;
+			break;
+		}
+	}
+	/*
+	 * Done. Make void the Tds not used (errors or EOT) and wakeup epio.
+	 */
+	for(; td != nil; td = td->next)
+		td->ndata = 0;
+	coherence();
+	qh->state = Qdone;
+	coherence();
+	wakeup(qh->io);
+	return 1;
+}
+
+static int
+ehciintr(Hci *hp)
+{
+	Ctlr *ctlr;
+	Eopio *opio;
+	Isoio *iso;
+	uint32_t sts;
+	Qh *qh;
+	int i, some;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+
+	/*
+	 * Will we know in USB 3.0 who the interrupt was for?.
+	 * Do they still teach indexing in CS?
+	 * This is Intel's doing.
+	 */
+	ilock(ctlr);
+	ctlr->nintr++;
+	sts = opio->sts & Sintrs;
+	if(sts == 0){		/* not ours; shared intr. */
+		iunlock(ctlr);
+		return 0;
+	}
+	opio->sts = sts;
+	coherence();
+	if((sts & Sherr) != 0)
+		print("ehci: port %#p fatal host system error\n", ctlr->capio);
+	if((sts & Shalted) != 0)
+		print("ehci: port %#p: halted\n", ctlr->capio);
+	if((sts & Sasync) != 0){
+		dprint("ehci: doorbell\n");
+		wakeup(ctlr);
+	}
+	/*
+	 * We enter always this if, even if it seems the
+	 * interrupt does not report anything done/failed.
+	 * Some controllers don't post interrupts right.
+	 */
+	some = 0;
+	if((sts & (Serrintr|Sintr)) != 0){
+		ctlr->ntdintr++;
+		if(ehcidebug > 1){
+			print("ehci port %#p frames %#p nintr %d ntdintr %d",
+				ctlr->capio, ctlr->frames,
+				ctlr->nintr, ctlr->ntdintr);
+			print(" nqhintr %d nisointr %d\n",
+				ctlr->nqhintr, ctlr->nisointr);
+			print("\tcmd %#lux sts %#lux intr %#lux frno %uld",
+				opio->cmd, opio->sts, opio->intr, opio->frno);
+		}
+
+		/* process the Iso transfers */
+		for(iso = ctlr->iso; iso != nil; iso = iso->next)
+			if(iso->state == Qrun || iso->state == Qdone)
+				if(iso->hs != 0)
+					some += isohsinterrupt(ctlr, iso);
+				else
+					some += isofsinterrupt(ctlr, iso);
+
+		/* process the qhs in the periodic tree */
+		for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext)
+			if(qh->state == Qrun)
+				some += qhinterrupt(ctlr, qh);
+
+		/* process the async Qh circular list */
+		qh = ctlr->qhs;
+		i = 0;
+		do{
+			if (qh == nil)
+				panic("ehciintr: nil qh");
+			if(qh->state == Qrun)
+				some += qhinterrupt(ctlr, qh);
+			qh = qh->next;
+		}while(qh != ctlr->qhs && i++ < 100);
+		if(i > 100)
+			print("echi: interrupt: qh loop?\n");
+	}
+//	if (some == 0)
+//		panic("ehciintr: no work");
+	iunlock(ctlr);
+	return some;
+}
+
+static void
+interrupt(Ureg *ureg, void* a)
+{
+	ehciintr(a);
+}
+
+static int
+portenable(Hci *hp, int port, int on)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	Eopio *opio;
+	int s;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	s = opio->portsc[port-1];
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	dprint("ehci %#p port %d enable=%d; sts %#x\n",
+		ctlr->capio, port, on, s);
+	ilock(ctlr);
+	if(s & (Psstatuschg | Pschange))
+		opio->portsc[port-1] = s;
+	if(on)
+		opio->portsc[port-1] |= Psenable;
+	else
+		opio->portsc[port-1] &= ~Psenable;
+	coherence();
+	microdelay(64);
+	iunlock(ctlr);
+	tsleep(&m->externup->sleep, return0, 0, Enabledelay);
+	dprint("ehci %#p port %d enable=%d: sts %#lux\n",
+		ctlr->capio, port, on, opio->portsc[port-1]);
+	qunlock(&ctlr->portlck);
+	poperror();
+	return 0;
+}
+
+/*
+ * If we detect during status that the port is low-speed or
+ * during reset that it's full-speed, the device is not for
+ * ourselves. The companion controller will take care.
+ * Low-speed devices will not be seen by usbd. Full-speed
+ * ones are seen because it's only after reset that we know what
+ * they are (usbd may notice a device not enabled in this case).
+ */
+static void
+portlend(Ctlr *ctlr, int port, char *ss)
+{
+	Eopio *opio;
+	uint32_t s;
+
+	opio = ctlr->opio;
+
+	dprint("ehci %#p port %d: %s speed device: no int32_ter owned\n",
+		ctlr->capio, port, ss);
+	s = opio->portsc[port-1] & ~(Pschange|Psstatuschg);
+	opio->portsc[port-1] = s | Psowner;
+	coherence();
+}
+
+static int
+portreset(Hci *hp, int port, int on)
+{
+	Mach *m = machp();
+	uint32_t *portscp;
+	Eopio *opio;
+	Ctlr *ctlr;
+	int i;
+
+	if(on == 0)
+		return 0;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		iunlock(ctlr);
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	portscp = &opio->portsc[port-1];
+	dprint("ehci %#p port %d reset; sts %#lux\n", ctlr->capio, port, *portscp);
+	ilock(ctlr);
+	/* Shalted must be zero, else Psreset will stay set */
+	if (opio->sts & Shalted)
+		iprint("ehci %#p: halted yet trying to reset port\n",
+			ctlr->capio);
+	*portscp = (*portscp & ~Psenable) | Psreset;	/* initiate reset */
+	coherence();
+
+	/*
+	 * usb 2 spec: reset must finish within 20 ms.
+	 * linux says spec says it can take 50 ms. for hubs.
+	 */
+	for(i = 0; *portscp & Psreset && i < 50; i++)
+		delay(10);
+	if (*portscp & Psreset)
+		iprint("ehci %#p: port %d didn't reset within %d ms; sts %#lux\n",
+			ctlr->capio, port, i * 10, *portscp);
+	*portscp &= ~Psreset;		/* force appearance of reset done */
+	coherence();
+	delay(10);			/* ehci spec: enable within 2 ms. */
+
+	if((*portscp & Psenable) == 0)
+		portlend(ctlr, port, "full");
+
+	iunlock(ctlr);
+	dprint("ehci %#p after port %d reset; sts %#lux\n",
+		ctlr->capio, port, *portscp);
+	qunlock(&ctlr->portlck);
+	poperror();
+	return 0;
+}
+
+static int
+portstatus(Hci *hp, int port)
+{
+	Mach *m = machp();
+	int s, r;
+	Eopio *opio;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		iunlock(ctlr);
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	ilock(ctlr);
+	s = opio->portsc[port-1];
+	if(s & (Psstatuschg | Pschange)){
+		opio->portsc[port-1] = s;
+		coherence();
+		ddprint("ehci %#p port %d status %#x\n", ctlr->capio, port, s);
+	}
+	/*
+	 * If the port is a low speed port we yield ownership now
+	 * to the [uo]hci companion controller and pretend it's not here.
+	 */
+	if((s & Pspresent) != 0 && (s & Pslinemask) == Pslow){
+		portlend(ctlr, port, "low");
+		s &= ~Pspresent;		/* not for us this time */
+	}
+	iunlock(ctlr);
+	qunlock(&ctlr->portlck);
+	poperror();
+
+	/*
+	 * We must return status bits as a
+	 * get port status hub request would do.
+	 */
+	r = 0;
+	if(s & Pspresent)
+		r |= HPpresent|HPhigh;
+	if(s & Psenable)
+		r |= HPenable;
+	if(s & Pssuspend)
+		r |= HPsuspend;
+	if(s & Psreset)
+		r |= HPreset;
+	if(s & Psstatuschg)
+		r |= HPstatuschg;
+	if(s & Pschange)
+		r |= HPchange;
+	return r;
+}
+
+static char*
+seprintio(char *s, char *e, Qio *io, char *pref)
+{
+	s = seprint(s,e,"%s io %#p qh %#p id %#x", pref, io, io->qh, io->usbid);
+	s = seprint(s,e," iot %ld", io->iotime);
+	s = seprint(s,e," tog %#x tok %#x err %s", io->toggle, io->tok, io->err);
+	return s;
+}
+
+static char*
+seprintep(char *s, char *e, Ep *ep)
+{
+	Qio *io;
+	Ctlio *cio;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+	ilock(ctlr);
+	if(ep->aux == nil){
+		*s = 0;
+		iunlock(ctlr);
+		return s;
+	}
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		s = seprintio(s, e, cio, "c");
+		s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata);
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OWRITE)
+			s = seprintio(s, e, &io[OREAD], "r");
+		if(ep->mode != OREAD)
+			s = seprintio(s, e, &io[OWRITE], "w");
+		break;
+	case Tiso:
+		*s = 0;
+		break;
+	}
+	iunlock(ctlr);
+	return s;
+}
+
+/*
+ * halt condition was cleared on the endpoint. update our toggles.
+ */
+static void
+clrhalt(Ep *ep)
+{
+	Qio *io;
+
+	ep->clrhalt = 0;
+	coherence();
+	switch(ep->ttype){
+	case Tintr:
+	case Tbulk:
+		io = ep->aux;
+		if(ep->mode != OREAD){
+			qlock(&io[OWRITE]);
+			io[OWRITE].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OWRITE);
+			qunlock(&io[OWRITE]);
+		}
+		if(ep->mode != OWRITE){
+			qlock(&io[OREAD]);
+			io[OREAD].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OREAD);
+			qunlock(&io[OREAD]);
+		}
+		break;
+	}
+}
+
+/*
+static void
+xdump(char* pref, void *qh)
+{
+	int i;
+	uint32_t *u;
+
+	u = qh;
+	print("%s %#p:", pref, u);
+	for(i = 0; i < 16; i++)
+		if((i%4) == 0)
+			print("\n %#8.8ulx", u[i]);
+		else
+			print(" %#8.8ulx", u[i]);
+	print("\n");
+}
+*/
+
+static int32_t
+episohscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, unsigned char *b, int32_t count)
+{
+	int nr;
+	int32_t tot;
+	Itd *tdu;
+
+	for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){
+		tdu = iso->tdu;
+		if(itdactive(tdu))
+			break;
+		nr = tdu->ndata;
+		if(tot + nr > count)
+			nr = count - tot;
+		if(nr == 0)
+			print("ehci: ep%d.%d: too many polls\n",
+				ep->dev->nb, ep->nb);
+		else{
+			iunlock(ctlr);		/* We could page fault here */
+			memmove(b+tot, tdu->data, nr);
+			ilock(ctlr);
+			if(nr < tdu->ndata)
+				memmove(tdu->data, tdu->data+nr, tdu->ndata - nr);
+			tdu->ndata -= nr;
+			coherence();
+		}
+		if(tdu->ndata == 0){
+			itdinit(iso, tdu);
+			iso->tdu = tdu->next;
+		}
+	}
+	return tot;
+}
+
+static int32_t
+episofscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, unsigned char *b, int32_t count)
+{
+	int nr;
+	int32_t tot;
+	Sitd *stdu;
+
+	for(tot = 0; iso->stdi != iso->stdu && tot < count; tot += nr){
+		stdu = iso->stdu;
+		if(stdu->csw & Stdactive){
+			diprint("ehci: episoread: %#p tdu active\n", iso);
+			break;
+		}
+		nr = stdu->ndata;
+		if(tot + nr > count)
+			nr = count - tot;
+		if(nr == 0)
+			print("ehci: ep%d.%d: too many polls\n",
+				ep->dev->nb, ep->nb);
+		else{
+			iunlock(ctlr);		/* We could page fault here */
+			memmove(b+tot, stdu->data, nr);
+			ilock(ctlr);
+			if(nr < stdu->ndata)
+				memmove(stdu->data, stdu->data+nr,
+					stdu->ndata - nr);
+			stdu->ndata -= nr;
+			coherence();
+		}
+		if(stdu->ndata == 0){
+			sitdinit(iso, stdu);
+			iso->stdu = stdu->next;
+		}
+	}
+	return tot;
+}
+
+static int32_t
+episoread(Ep *ep, Isoio *iso, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	unsigned char *b;
+	int32_t tot;
+
+	iso->debug = ep->debug;
+	diprint("ehci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	b = a;
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	iso->err = nil;
+	iso->nerrs = 0;
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	coherence();
+	while(isocanread(iso) == 0){
+		iunlock(ctlr);
+		diprint("ehci: episoread: %#p sleep\n", iso);
+		if(waserror()){
+			if(iso->err == nil)
+				iso->err = "I/O timed out";
+			ilock(ctlr);
+			break;
+		}
+		tsleep(iso, isocanread, iso, ep->tmout);
+		poperror();
+		ilock(ctlr);
+	}
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qdone;
+	coherence();
+	assert(iso->tdu != iso->tdi);
+
+	if(iso->hs != 0)
+		tot = episohscpy(ctlr, ep, iso, b, count);
+	else
+		tot = episofscpy(ctlr, ep, iso, b, count);
+	iunlock(ctlr);
+	qunlock(iso);
+	poperror();
+	diprint("uhci: episoread: %#p %uld bytes err '%s'\n", iso, tot, iso->err);
+	if(iso->err != nil)
+		error(iso->err);
+	return tot;
+}
+
+/*
+ * iso->tdu is the next place to put data. When it gets full
+ * it is activated and tdu advanced.
+ */
+static int32_t
+putsamples(Isoio *iso, unsigned char *b, int32_t count)
+{
+	int32_t tot, n;
+
+	for(tot = 0; isocanwrite(iso) && tot < count; tot += n){
+		n = count-tot;
+		if(iso->hs != 0){
+			if(n > iso->tdu->mdata - iso->nleft)
+				n = iso->tdu->mdata - iso->nleft;
+			memmove(iso->tdu->data + iso->nleft, b + tot, n);
+			coherence();
+			iso->nleft += n;
+			if(iso->nleft == iso->tdu->mdata){
+				itdinit(iso, iso->tdu);
+				iso->nleft = 0;
+				iso->tdu = iso->tdu->next;
+			}
+		}else{
+			if(n > iso->stdu->mdata - iso->nleft)
+				n = iso->stdu->mdata - iso->nleft;
+			memmove(iso->stdu->data + iso->nleft, b + tot, n);
+			coherence();
+			iso->nleft += n;
+			if(iso->nleft == iso->stdu->mdata){
+				sitdinit(iso, iso->stdu);
+				iso->nleft = 0;
+				iso->stdu = iso->stdu->next;
+			}
+		}
+	}
+	return tot;
+}
+
+/*
+ * Queue data for writing and return error status from
+ * last writes done, to maintain buffered data.
+ */
+static int32_t
+episowrite(Ep *ep, Isoio *iso, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	unsigned char *b;
+	int tot, nw;
+	char *err;
+
+	iso->debug = ep->debug;
+	diprint("ehci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	coherence();
+	b = a;
+	for(tot = 0; tot < count; tot += nw){
+		while(isocanwrite(iso) == 0){
+			iunlock(ctlr);
+			diprint("ehci: episowrite: %#p sleep\n", iso);
+			if(waserror()){
+				if(iso->err == nil)
+					iso->err = "I/O timed out";
+				ilock(ctlr);
+				break;
+			}
+			tsleep(iso, isocanwrite, iso, ep->tmout);
+			poperror();
+			ilock(ctlr);
+		}
+		err = iso->err;
+		iso->err = nil;
+		if(iso->state == Qclose || err != nil){
+			iunlock(ctlr);
+			error(err ? err : Eio);
+		}
+		if(iso->state != Qrun)
+			panic("episowrite: iso not running");
+		iunlock(ctlr);		/* We could page fault here */
+		nw = putsamples(iso, b+tot, count-tot);
+		ilock(ctlr);
+	}
+	if(iso->state != Qclose)
+		iso->state = Qdone;
+	iunlock(ctlr);
+	err = iso->err;		/* in case it failed early */
+	iso->err = nil;
+	qunlock(iso);
+	poperror();
+	if(err != nil)
+		error(err);
+	diprint("ehci: episowrite: %#p %d bytes\n", iso, tot);
+	return tot;
+}
+
+static int
+nexttoggle(int toggle, int count, int maxpkt)
+{
+	int np;
+
+	np = count / maxpkt;
+	if(np == 0)
+		np = 1;
+	if((np % 2) == 0)
+		return toggle;
+	if(toggle == Tddata1)
+		return Tddata0;
+	else
+		return Tddata1;
+}
+
+static Td*
+epgettd(Qio *io, int flags, void *a, int count, int maxpkt)
+{
+	Td *td;
+	uint32_t pa;
+	int i;
+
+	if(count > Tdmaxpkt)
+		panic("ehci: epgettd: too many bytes");
+	td = tdalloc();
+	td->csw = flags | io->toggle | io->tok | count << Tdlenshift |
+		Tderr2 | Tderr1;
+
+	/*
+	 * use the space wasted by alignment as an
+	 * embedded buffer if count bytes fit in there.
+	 */
+	assert(Align > sizeof(Td));
+	if(count <= Align - sizeof(Td)){
+		td->data = td->sbuff;
+		td->buff = nil;
+	}else
+		td->data = td->buff = smalloc(Tdmaxpkt);
+
+	pa = PADDR(td->data);
+	for(i = 0; i < nelem(td->buffer); i++){
+		td->buffer[i] = pa;
+		if(i > 0)
+			td->buffer[i] &= ~0xFFF;
+		pa += 0x1000;
+	}
+	td->ndata = count;
+	if(a != nil && count > 0)
+		memmove(td->data, a, count);
+	coherence();
+	io->toggle = nexttoggle(io->toggle, count, maxpkt);
+	coherence();
+	return td;
+}
+
+/*
+ * Try to get them idle
+ */
+static void
+aborttds(Qh *qh)
+{
+	Td *td;
+
+	qh->state = Qdone;
+	coherence();
+	if(qh->sched >= 0 && (qh->eps0 & Qhspeedmask) != Qhhigh)
+		qh->eps0 |= Qhint;	/* inactivate on next pass */
+	coherence();
+	for(td = qh->tds; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			td->ndata = 0;
+		td->csw |= Tdhalt;
+		coherence();
+	}
+}
+
+/*
+ * Some controllers do not post the usb/error interrupt after
+ * the work has been done. It seems that we must poll for them.
+ */
+static int
+workpending(void *a)
+{
+	Ctlr *ctlr;
+
+	ctlr = a;
+	return ctlr->nreqs > 0;
+}
+
+static void
+ehcipoll(void* a)
+{
+	Mach *m = machp();
+	Hci *hp;
+	Ctlr *ctlr;
+	Poll *poll;
+	int i;
+
+	hp = a;
+	ctlr = hp->aux;
+	poll = &ctlr->poll;
+	for(;;){
+		if(ctlr->nreqs == 0){
+			if(0)ddprint("ehcipoll %#p sleep\n", ctlr->capio);
+			sleep(poll, workpending, ctlr);
+			if(0)ddprint("ehcipoll %#p awaken\n", ctlr->capio);
+		}
+		for(i = 0; i < 16 && ctlr->nreqs > 0; i++)
+			if(ehciintr(hp) == 0)
+				 break;
+		do{
+			tsleep(&m->externup->sleep, return0, 0, 1);
+			ehciintr(hp);
+		}while(ctlr->nreqs > 0);
+	}
+}
+
+static void
+pollcheck(Hci *hp)
+{
+	Ctlr *ctlr;
+	Poll *poll;
+
+	ctlr = hp->aux;
+	poll = &ctlr->poll;
+
+	if(poll->must != 0 && poll->does == 0){
+		lock(poll);
+		if(poll->must != 0 && poll->does == 0){
+			poll->does++;
+			print("ehci %#p: polling\n", ctlr->capio);
+			kproc("ehcipoll", ehcipoll, hp);
+		}
+		unlock(poll);
+	}
+}
+
+static int
+epiodone(void *a)
+{
+	Qh *qh;
+
+	qh = a;
+	return qh->state != Qrun;
+}
+
+static void
+epiowait(Hci *hp, Qio *io, int tmout, uint32_t load)
+{
+	Mach *m = machp();
+	Qh *qh;
+	int timedout;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	qh = io->qh;
+	ddqprint("ehci %#p: io %#p sleep on qh %#p state %s\n",
+		ctlr->capio, io, qh, qhsname[qh->state]);
+	timedout = 0;
+	if(waserror()){
+		dqprint("ehci %#p: io %#p qh %#p timed out\n",
+			ctlr->capio, io, qh);
+		timedout++;
+	}else{
+		if(tmout == 0)
+			sleep(io, epiodone, qh);
+		else
+			tsleep(io, epiodone, qh, tmout);
+		poperror();
+	}
+
+	ilock(ctlr);
+	/* Are we missing interrupts? */
+	if(qh->state == Qrun){
+		iunlock(ctlr);
+		ehciintr(hp);
+		ilock(ctlr);
+		if(qh->state == Qdone){
+			dqprint("ehci %#p: polling required\n", ctlr->capio);
+			ctlr->poll.must = 1;
+			pollcheck(hp);
+		}
+	}
+
+	if(qh->state == Qrun){
+//		dqprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n",
+		iprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n",
+			ctlr->capio, io, qh);
+		timedout = 1;
+	}else if(qh->state != Qdone && qh->state != Qclose)
+		panic("ehci: epio: queue state %d", qh->state);
+	if(timedout){
+		aborttds(io->qh);
+		io->err = "request timed out";
+		iunlock(ctlr);
+		if(!waserror()){
+			tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+			poperror();
+		}
+		ilock(ctlr);
+	}
+	if(qh->state != Qclose)
+		qh->state = Qidle;
+	coherence();
+	qhlinktd(qh, nil);
+	ctlr->load -= load;
+	ctlr->nreqs--;
+	iunlock(ctlr);
+}
+
+/*
+ * Non iso I/O.
+ * To make it work for control transfers, the caller may
+ * lock the Qio for the entire control transfer.
+ */
+static int32_t
+epio(Ep *ep, Qio *io, void *a, int32_t count, int mustlock)
+{
+	Mach *m = machp();
+	int saved, ntds, tmout;
+	int32_t n, tot;
+	uint32_t load;
+	char *err;
+	char buf[128];
+	unsigned char *c;
+	Ctlr *ctlr;
+	Qh* qh;
+	Td *td, *ltd, *td0, *ntd;
+
+	qh = io->qh;
+	ctlr = ep->hp->aux;
+	io->debug = ep->debug;
+	tmout = ep->tmout;
+	ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n",
+		io->tok == Tdtokin ? "in" : "out",
+		ep->dev->nb, ep->nb, io, count, ctlr->load);
+	if((ehcidebug > 1 || ep->debug > 1) && io->tok != Tdtokin){
+		seprintdata(buf, buf+sizeof(buf), a, count);
+		print("echi epio: user data: %s\n", buf);
+	}
+	if(mustlock){
+		qlock(io);
+		if(waserror()){
+			qunlock(io);
+			nexterror();
+		}
+	}
+	io->err = nil;
+	ilock(ctlr);
+	if(qh->state == Qclose){	/* Tds released by cancelio */
+		iunlock(ctlr);
+		error(io->err ? io->err : Eio);
+	}
+	if(qh->state != Qidle)
+		panic("epio: qh not idle");
+	qh->state = Qinstall;
+	iunlock(ctlr);
+
+	c = a;
+	td0 = ltd = nil;
+	load = tot = 0;
+	do{
+		n = (Tdmaxpkt / ep->maxpkt) * ep->maxpkt;
+		if(count-tot < n)
+			n = count-tot;
+		if(c != nil && io->tok != Tdtokin)
+			td = epgettd(io, Tdactive, c+tot, n, ep->maxpkt);
+		else
+			td = epgettd(io, Tdactive, nil, n, ep->maxpkt);
+		if(td0 == nil)
+			td0 = td;
+		else
+			tdlinktd(ltd, td);
+		ltd = td;
+		tot += n;
+		load += ep->load;
+	}while(tot < count);
+	if(td0 == nil || ltd == nil)
+		panic("epio: no td");
+
+	ltd->csw |= Tdioc;		/* the last one interrupts */
+	coherence();
+
+	ddeprint("ehci: load %uld ctlr load %uld\n", load, ctlr->load);
+	if(ehcidebug > 1 || ep->debug > 1)
+		dumptd(td0, "epio: put: ");
+
+	ilock(ctlr);
+	if(qh->state != Qclose){
+		io->iotime = TK2MS(m->ticks);
+		qh->state = Qrun;
+		coherence();
+		qhlinktd(qh, td0);
+		ctlr->nreqs++;
+		ctlr->load += load;
+	}
+	iunlock(ctlr);
+
+	if(ctlr->poll.does)
+		wakeup(&ctlr->poll);
+
+	epiowait(ep->hp, io, tmout, load);
+	if(ehcidebug > 1 || ep->debug > 1){
+		dumptd(td0, "epio: got: ");
+		qhdump(qh);
+	}
+
+	tot = 0;
+	c = a;
+	saved = 0;
+	ntds = 0;
+	for(td = td0; td != nil; td = ntd){
+		ntds++;
+		/*
+		 * Use td tok, not io tok, because of setup packets.
+		 * Also, we must save the next toggle value from the
+		 * last completed Td (in case of a short packet, or
+		 * fewer than the requested number of packets in the
+		 * Td being transferred).
+		 */
+		if(td->csw & (Tdhalt|Tdactive))
+			saved++;
+		else{
+			if(!saved){
+				io->toggle = td->csw & Tddata1;
+				coherence();
+			}
+			tot += td->ndata;
+			if(c != nil && (td->csw & Tdtok) == Tdtokin && td->ndata > 0){
+				memmove(c, td->data, td->ndata);
+				c += td->ndata;
+			}
+		}
+		ntd = td->next;
+		tdfree(td);
+	}
+	err = io->err;
+	if(mustlock){
+		qunlock(io);
+		poperror();
+	}
+	ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n",
+		io, ntds, tot, err);
+	if(err == Estalled)
+		return 0;	/* that's our convention */
+	if(err != nil)
+		error(err);
+	if(tot < 0)
+		error(Eio);
+	return tot;
+}
+
+static int32_t
+epread(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Ctlio *cio;
+	Qio *io;
+	Isoio *iso;
+	char buf[160];
+	uint32_t delta;
+
+	ddeprint("ehci: epread\n");
+	if(ep->aux == nil)
+		panic("epread: not open");
+
+	pollcheck(ep->hp);
+
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		qlock(cio);
+		if(waserror()){
+			qunlock(cio);
+			nexterror();
+		}
+		ddeprint("epread ctl ndata %d\n", cio->ndata);
+		if(cio->ndata < 0)
+			error("request expected");
+		else if(cio->ndata == 0){
+			cio->ndata = -1;
+			count = 0;
+		}else{
+			if(count > cio->ndata)
+				count = cio->ndata;
+			if(count > 0)
+				memmove(a, cio->data, count);
+			/* BUG for big transfers */
+			free(cio->data);
+			cio->data = nil;
+			cio->ndata = 0;	/* signal EOF next time */
+		}
+		qunlock(cio);
+		poperror();
+		if(ehcidebug>1 || ep->debug){
+			seprintdata(buf, buf+sizeof(buf), a, count);
+			print("epread: %s\n", buf);
+		}
+		return count;
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OREAD].iotime + 1;
+		if(delta < ep->pollival / 2)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival/2 - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episoread(ep, iso, a, count);
+	}
+	return -1;
+}
+
+/*
+ * Control transfers are one setup write (data0)
+ * plus zero or more reads/writes (data1, data0, ...)
+ * plus a final write/read with data1 to ack.
+ * For both host to device and device to host we perform
+ * the entire transfer when the user writes the request,
+ * and keep any data read from the device for a later read.
+ * We call epio three times instead of placing all Tds at
+ * the same time because doing so leads to crc/tmout errors
+ * for some devices.
+ * Upon errors on the data phase we must still run the status
+ * phase or the device may cease responding in the future.
+ */
+static int32_t
+epctlio(Ep *ep, Ctlio *cio, void *a, int32_t count)
+{
+	Mach *m = machp();
+	unsigned char *c;
+	int32_t len;
+
+	ddeprint("epctlio: cio %#p ep%d.%d count %ld\n",
+		cio, ep->dev->nb, ep->nb, count);
+	if(count < Rsetuplen)
+		error("short usb comand");
+	qlock(cio);
+	free(cio->data);
+	cio->data = nil;
+	cio->ndata = 0;
+	if(waserror()){
+		free(cio->data);
+		cio->data = nil;
+		cio->ndata = 0;
+		qunlock(cio);
+		nexterror();
+	}
+
+	/* set the address if unset and out of configuration state */
+	if(ep->dev->state != Dconfig && ep->dev->state != Dreset)
+		if(cio->usbid == 0){
+			cio->usbid = (ep->nb&Epmax) << 7 | ep->dev->nb&Devmax;
+			coherence();
+			qhsetaddr(cio->qh, cio->usbid);
+		}
+	/* adjust maxpkt if the user has learned a different one */
+	if(qhmaxpkt(cio->qh) != ep->maxpkt)
+		qhsetmaxpkt(cio->qh, ep->maxpkt);
+	c = a;
+	cio->tok = Tdtoksetup;
+	cio->toggle = Tddata0;
+	coherence();
+	if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen)
+		error(Eio);
+	a = c + Rsetuplen;
+	count -= Rsetuplen;
+
+	cio->toggle = Tddata1;
+	if(c[Rtype] & Rd2h){
+		cio->tok = Tdtokin;
+		len = GET2(c+Rcount);
+		if(len <= 0)
+			error("bad length in d2h request");
+		if(len > Maxctllen)
+			error("d2h data too large to fit in ehci");
+		a = cio->data = smalloc(len+1);
+	}else{
+		cio->tok = Tdtokout;
+		len = count;
+	}
+	coherence();
+	if(len > 0)
+		if(waserror())
+			len = -1;
+		else{
+			len = epio(ep, cio, a, len, 0);
+			poperror();
+		}
+	if(c[Rtype] & Rd2h){
+		count = Rsetuplen;
+		cio->ndata = len;
+		cio->tok = Tdtokout;
+	}else{
+		if(len < 0)
+			count = -1;
+		else
+			count = Rsetuplen + len;
+		cio->tok = Tdtokin;
+	}
+	cio->toggle = Tddata1;
+	coherence();
+	epio(ep, cio, nil, 0, 0);
+	qunlock(cio);
+	poperror();
+	ddeprint("epctlio cio %#p return %ld\n", cio, count);
+	return count;
+}
+
+static int32_t
+epwrite(Ep *ep, void *a, int32_t count)
+{
+	Mach *m = machp();
+	Qio *io;
+	Ctlio *cio;
+	Isoio *iso;
+	uint32_t delta;
+
+	pollcheck(ep->hp);
+
+	ddeprint("ehci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux == nil)
+		panic("ehci: epwrite: not open");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		return epctlio(ep, cio, a, count);
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(m->ticks) - io[OWRITE].iotime + 1;
+		if(delta < ep->pollival)
+			tsleep(&m->externup->sleep, return0, 0, ep->pollival - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episowrite(ep, iso, a, count);
+	}
+	return -1;
+}
+
+static void
+isofsinit(Ep *ep, Isoio *iso)
+{
+	int32_t left;
+	Sitd *td, *ltd;
+	int i;
+	uint32_t frno;
+
+	left = 0;
+	ltd = nil;
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		td = sitdalloc();
+		td->data = iso->data + i * ep->maxpkt;
+		td->epc = ep->dev->port << Stdportshift;
+		td->epc |= ep->dev->hub << Stdhubshift;
+		td->epc |= ep->nb << Stdepshift;
+		td->epc |= ep->dev->nb << Stddevshift;
+		td->mfs = 034 << Stdscmshift | 1 << Stdssmshift;
+		if(ep->mode == OREAD){
+			td->epc |= Stdin;
+			td->mdata = ep->maxpkt;
+		}else{
+			td->mdata = (ep->hz+left) * ep->pollival / 1000;
+			td->mdata *= ep->samplesz;
+			left = (ep->hz+left) * ep->pollival % 1000;
+			if(td->mdata > ep->maxpkt){
+				print("ehci: ep%d.%d: size > maxpkt\n",
+					ep->dev->nb, ep->nb);
+				print("size = %ld max = %ld\n",
+					td->mdata,ep->maxpkt);
+				td->mdata = ep->maxpkt;
+			}
+		}
+		coherence();
+
+		iso->sitdps[frno] = td;
+		coherence();
+		sitdinit(iso, td);
+		if(ltd != nil)
+			ltd->next = td;
+		ltd = td;
+		frno = TRUNC(frno+ep->pollival, Nisoframes);
+	}
+	ltd->next = iso->sitdps[iso->td0frno];
+	coherence();
+}
+
+static void
+isohsinit(Ep *ep, Isoio *iso)
+{
+	int ival, p;
+	int32_t left;
+	uint32_t frno, i, pa;
+	Itd *ltd, *td;
+
+	iso->hs = 1;
+	ival = 1;
+	if(ep->pollival > 8)
+		ival = ep->pollival/8;
+	left = 0;
+	ltd = nil;
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		td = itdalloc();
+		td->data = iso->data + i * 8 * iso->maxsize;
+		pa = PADDR(td->data) & ~0xFFF;
+		for(p = 0; p < 8; p++)
+			td->buffer[i] = pa + p * 0x1000;
+		td->buffer[0] = PADDR(iso->data) & ~0xFFF |
+			ep->nb << Itdepshift | ep->dev->nb << Itddevshift;
+		if(ep->mode == OREAD)
+			td->buffer[1] |= Itdin;
+		else
+			td->buffer[1] |= Itdout;
+		td->buffer[1] |= ep->maxpkt << Itdmaxpktshift;
+		td->buffer[2] |= ep->ntds << Itdntdsshift;
+
+		if(ep->mode == OREAD)
+			td->mdata = 8 * iso->maxsize;
+		else{
+			td->mdata = (ep->hz + left) * ep->pollival / 1000;
+			td->mdata *= ep->samplesz;
+			left = (ep->hz + left) * ep->pollival % 1000;
+		}
+		coherence();
+		iso->itdps[frno] = td;
+		coherence();
+		itdinit(iso, td);
+		if(ltd != nil)
+			ltd->next = td;
+		ltd = td;
+		frno = TRUNC(frno + ival, Nisoframes);
+	}
+}
+
+static void
+isoopen(Ctlr *ctlr, Ep *ep)
+{
+	int ival;		/* pollival in ms */
+	int tpf;		/* tds per frame */
+	int i, n, w, woff;
+	uint32_t frno;
+	Isoio *iso;
+
+	iso = ep->aux;
+	switch(ep->mode){
+	case OREAD:
+		iso->tok = Tdtokin;
+		break;
+	case OWRITE:
+		iso->tok = Tdtokout;
+		break;
+	default:
+		error("iso i/o is half-duplex");
+	}
+	iso->usbid = ep->nb << 7 | ep->dev->nb & Devmax;
+	iso->state = Qidle;
+	coherence();
+	iso->debug = ep->debug;
+	ival = ep->pollival;
+	tpf = 1;
+	if(ep->dev->speed == Highspeed){
+		tpf = 8;
+		if(ival <= 8)
+			ival = 1;
+		else
+			ival /= 8;
+	}
+	assert(ival != 0);
+	iso->nframes = Nisoframes / ival;
+	if(iso->nframes < 3)
+		error("uhci isoopen bug");	/* we need at least 3 tds */
+	iso->maxsize = ep->ntds * ep->maxpkt;
+	if(ctlr->load + ep->load > 800)
+		print("usb: ehci: bandwidth may be exceeded\n");
+	ilock(ctlr);
+	ctlr->load += ep->load;
+	ctlr->isoload += ep->load;
+	ctlr->nreqs++;
+	dprint("ehci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload);
+	diprint("iso nframes %d pollival %uld ival %d maxpkt %uld ntds %d\n",
+		iso->nframes, ep->pollival, ival, ep->maxpkt, ep->ntds);
+	iunlock(ctlr);
+	if(ctlr->poll.does)
+		wakeup(&ctlr->poll);
+
+	/*
+	 * From here on this cannot raise errors
+	 * unless we catch them and release here all memory allocated.
+	 */
+	assert(ep->maxpkt > 0 && ep->ntds > 0 && ep->ntds < 4);
+	assert(ep->maxpkt <= 1024);
+	iso->tdps = smalloc(sizeof(uintptr) * Nisoframes);
+	iso->data = smalloc(iso->nframes * tpf * ep->ntds * ep->maxpkt);
+	iso->td0frno = TRUNC(ctlr->opio->frno + 10, Nisoframes);
+	/* read: now; write: 1s ahead */
+
+	if(ep->dev->speed == Highspeed)
+		isohsinit(ep, iso);
+	else
+		isofsinit(ep, iso);
+	iso->tdu = iso->tdi = iso->itdps[iso->td0frno];
+	iso->stdu = iso->stdi = iso->sitdps[iso->td0frno];
+	coherence();
+
+	ilock(ctlr);
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		*iso->tdps[frno] = ctlr->frames[frno];
+		frno = TRUNC(frno+ival, Nisoframes);
+	}
+
+	/*
+	 * Iso uses a virtual frame window of Nisoframes, and we must
+	 * fill the actual ctlr frame array by placing ctlr->nframes/Nisoframes
+	 * copies of the window in the frame array.
+	 */
+	assert(ctlr->nframes >= Nisoframes && Nisoframes >= iso->nframes);
+	assert(Nisoframes >= Nintrleafs);
+	n = ctlr->nframes / Nisoframes;
+	for(w = 0; w < n; w++){
+		frno = iso->td0frno;
+		woff = w * Nisoframes;
+		for(i = 0; i < iso->nframes ; i++){
+			assert(woff+frno < ctlr->nframes);
+			assert(iso->tdps[frno] != nil);
+			if(ep->dev->speed == Highspeed)
+				ctlr->frames[woff+frno] = PADDR(iso->tdps[frno])
+					|Litd;
+			else
+				ctlr->frames[woff+frno] = PADDR(iso->tdps[frno])
+					|Lsitd;
+			coherence();
+			frno = TRUNC(frno+ep->pollival, Nisoframes);
+		}
+	}
+	coherence();
+	iso->next = ctlr->iso;
+	ctlr->iso = iso;
+	coherence();
+	iso->state = Qdone;
+	iunlock(ctlr);
+	if(ehcidebug > 1 || iso->debug >1)
+		isodump(iso, 0);
+}
+
+/*
+ * Allocate the endpoint and set it up for I/O
+ * in the controller. This must follow what's said
+ * in Ep regarding configuration, including perhaps
+ * the saved toggles (saved on a previous close of
+ * the endpoint data file by epclose).
+ */
+static void
+epopen(Ep *ep)
+{
+	Mach *m = machp();
+	Ctlr *ctlr;
+	Ctlio *cio;
+	Qio *io;
+	int usbid;
+
+	ctlr = ep->hp->aux;
+	deprint("ehci: epopen ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux != nil)
+		panic("ehci: epopen called with open ep");
+	if(waserror()){
+		free(ep->aux);
+		ep->aux = nil;
+		nexterror();
+	}
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tiso:
+		ep->aux = smalloc(sizeof(Isoio));
+		isoopen(ctlr, ep);
+		break;
+	case Tctl:
+		cio = ep->aux = smalloc(sizeof(Ctlio));
+		cio->debug = ep->debug;
+		cio->ndata = -1;
+		cio->data = nil;
+		if(ep->dev->isroot != 0 && ep->nb == 0)	/* root hub */
+			break;
+		cio->qh = qhalloc(ctlr, ep, cio, "epc");
+		break;
+	case Tbulk:
+		ep->pollival = 1;	/* assume this; doesn't really matter */
+		/* and fall... */
+	case Tintr:
+		io = ep->aux = smalloc(sizeof(Qio)*2);
+		io[OREAD].debug = io[OWRITE].debug = ep->debug;
+		usbid = (ep->nb&Epmax) << 7 | ep->dev->nb &Devmax;
+		assert(ep->pollival != 0);
+		if(ep->mode != OREAD){
+			if(ep->toggle[OWRITE] != 0)
+				io[OWRITE].toggle = Tddata1;
+			else
+				io[OWRITE].toggle = Tddata0;
+			io[OWRITE].tok = Tdtokout;
+			io[OWRITE].usbid = usbid;
+			io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			io[OWRITE].qh = qhalloc(ctlr, ep, io+OWRITE, "epw");
+		}
+		if(ep->mode != OWRITE){
+			if(ep->toggle[OREAD] != 0)
+				io[OREAD].toggle = Tddata1;
+			else
+				io[OREAD].toggle = Tddata0;
+			io[OREAD].tok = Tdtokin;
+			io[OREAD].usbid = usbid;
+			io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			io[OREAD].qh = qhalloc(ctlr, ep, io+OREAD, "epr");
+		}
+		break;
+	}
+	coherence();
+	if(ehcidebug>1 || ep->debug)
+		dump(ep->hp);
+	deprint("ehci: epopen done\n");
+	poperror();
+}
+
+static void
+cancelio(Ctlr *ctlr, Qio *io)
+{
+	Mach *m = machp();
+	Qh *qh;
+
+	ilock(ctlr);
+	qh = io->qh;
+	if(io == nil || io->qh == nil || io->qh->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	dqprint("ehci: cancelio for qh %#p state %s\n",
+		qh, qhsname[qh->state]);
+	aborttds(qh);
+	qh->state = Qclose;
+	iunlock(ctlr);
+	if(!waserror()){
+		tsleep(&m->externup->sleep, return0, 0, Abortdelay);
+		poperror();
+	}
+	wakeup(io);
+	qlock(io);
+	/* wait for epio if running */
+	qunlock(io);
+
+	qhfree(ctlr, qh);
+	io->qh = nil;
+}
+
+static void
+cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, uint32_t load)
+{
+	Mach *m = machp();
+	int frno, i, n, t, w, woff;
+	uint32_t *lp, *tp;
+	Isoio **il;
+	Itd *td;
+	Sitd *std;
+
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	ctlr->nreqs--;
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("bad iso state");
+	iso->state = Qclose;
+	coherence();
+	if(ctlr->isoload < load)
+		panic("ehci: low isoload");
+	ctlr->isoload -= load;
+	ctlr->load -= load;
+	for(il = &ctlr->iso; *il != nil; il = &(*il)->next)
+		if(*il == iso)
+			break;
+	if(*il == nil)
+		panic("cancleiso: not found");
+	*il = iso->next;
+
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		tp = iso->tdps[frno];
+		if(iso->hs != 0){
+			td = iso->itdps[frno];
+			for(t = 0; t < nelem(td->csw); t++)
+				td->csw[t] &= ~(Itdioc|Itdactive);
+		}else{
+			std = iso->sitdps[frno];
+			std->csw &= ~(Stdioc|Stdactive);
+		}
+		coherence();
+		for(lp = &ctlr->frames[frno]; !(*lp & Lterm);
+		    lp = &LPTR(*lp)[0])
+			if(LPTR(*lp) == tp)
+				break;
+		if(*lp & Lterm)
+			panic("cancelisoio: td not found");
+		*lp = tp[0];
+		/*
+		 * Iso uses a virtual frame window of Nisoframes, and we must
+		 * restore pointers in copies of the window kept at ctlr->frames.
+		 */
+		if(lp == &ctlr->frames[frno]){
+			n = ctlr->nframes / Nisoframes;
+			for(w = 1; w < n; w++){
+				woff = w * Nisoframes;
+				ctlr->frames[woff+frno] = *lp;
+			}
+		}
+		coherence();
+		frno = TRUNC(frno+pollival, Nisoframes);
+	}
+	iunlock(ctlr);
+
+	/*
+	 * wakeup anyone waiting for I/O and
+	 * wait to be sure no I/O is in progress in the controller.
+	 * and then wait to be sure episo* is no int32_ter running.
+	 */
+	wakeup(iso);
+	diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso);
+	tsleep(&m->externup->sleep, return0, 0, 5);
+	qlock(iso);
+	qunlock(iso);
+	diprint("cancelisoio iso %#p releasing iso\n", iso);
+
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		if(iso->hs != 0)
+			itdfree(iso->itdps[frno]);
+		else
+			sitdfree(iso->sitdps[frno]);
+		iso->tdps[frno] = nil;
+		frno = TRUNC(frno+pollival, Nisoframes);
+	}
+	free(iso->tdps);
+	iso->tdps = nil;
+	free(iso->data);
+	iso->data = nil;
+	coherence();
+}
+
+static void
+epclose(Ep *ep)
+{
+	Qio *io;
+	Ctlio *cio;
+	Isoio *iso;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+	deprint("ehci: epclose ep%d.%d\n", ep->dev->nb, ep->nb);
+
+	if(ep->aux == nil)
+		panic("ehci: epclose called with closed ep");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		cancelio(ctlr, cio);
+		free(cio->data);
+		cio->data = nil;
+		break;
+	case Tintr:
+	case Tbulk:
+		io = ep->aux;
+		ep->toggle[OREAD] = ep->toggle[OWRITE] = 0;
+		if(ep->mode != OWRITE){
+			cancelio(ctlr, &io[OREAD]);
+			if(io[OREAD].toggle == Tddata1)
+				ep->toggle[OREAD] = 1;
+		}
+		if(ep->mode != OREAD){
+			cancelio(ctlr, &io[OWRITE]);
+			if(io[OWRITE].toggle == Tddata1)
+				ep->toggle[OWRITE] = 1;
+		}
+		coherence();
+		break;
+	case Tiso:
+		iso = ep->aux;
+		cancelisoio(ctlr, iso, ep->pollival, ep->load);
+		break;
+	default:
+		panic("epclose: bad ttype");
+	}
+	free(ep->aux);
+	ep->aux = nil;
+}
+
+/*
+ * return smallest power of 2 >= n
+ */
+static int
+flog2(int n)
+{
+	int i;
+
+	for(i = 0; (1 << i) < n; i++)
+		;
+	return i;
+}
+
+/*
+ * build the periodic scheduling tree:
+ * framesize must be a multiple of the tree size
+ */
+static void
+mkqhtree(Ctlr *ctlr)
+{
+	int i, n, d, o, leaf0, depth;
+	uint32_t leafs[Nintrleafs];
+	Qh *qh;
+	Qh **tree;
+	Qtree *qt;
+
+	depth = flog2(Nintrleafs);
+	n = (1 << (depth+1)) - 1;
+	qt = mallocz(sizeof(*qt), 1);
+	if(qt == nil)
+		panic("ehci: mkqhtree: no memory");
+	qt->nel = n;
+	qt->depth = depth;
+	qt->bw = mallocz(n * sizeof(qt->bw), 1);
+	qt->root = tree = mallocz(n * sizeof(Qh *), 1);
+	if(qt->bw == nil || tree == nil)
+		panic("ehci: mkqhtree: no memory");
+	for(i = 0; i < n; i++){
+		tree[i] = qh = edalloc();
+		if(qh == nil)
+			panic("ehci: mkqhtree: no memory");
+		qh->nlink = qh->alink = qh->link = Lterm;
+		qh->csw = Tdhalt;
+		qh->state = Qidle;
+		coherence();
+		if(i > 0)
+			qhlinkqh(tree[i], tree[(i-1)/2]);
+	}
+	ctlr->ntree = i;
+	dprint("ehci: tree: %d endpoints allocated\n", i);
+
+	/* distribute leaves evenly round the frame list */
+	leaf0 = n / 2;
+	for(i = 0; i < Nintrleafs; i++){
+		o = 0;
+		for(d = 0; d < depth; d++){
+			o <<= 1;
+			if(i & (1 << d))
+				o |= 1;
+		}
+		if(leaf0 + o >= n){
+			print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n);
+			break;
+		}
+		leafs[i] = PADDR(tree[leaf0 + o]) | Lqh;
+	}
+	assert((ctlr->nframes % Nintrleafs) == 0);
+	for(i = 0; i < ctlr->nframes; i += Nintrleafs){
+		memmove(ctlr->frames + i, leafs, sizeof leafs);
+		coherence();
+	}
+	ctlr->tree = qt;
+	coherence();
+}
+
+void
+ehcimeminit(Ctlr *ctlr)
+{
+	int i, frsize;
+	Eopio *opio;
+
+	opio = ctlr->opio;
+	frsize = ctlr->nframes * sizeof(uint32_t);
+	assert((frsize & 0xFFF) == 0);		/* must be 4k aligned */
+	ctlr->frames = xspanalloc(frsize, frsize, 0);
+	if(ctlr->frames == nil)
+		panic("ehci reset: no memory");
+
+	for (i = 0; i < ctlr->nframes; i++)
+		ctlr->frames[i] = Lterm;
+	opio->frbase = PADDR(ctlr->frames);
+	opio->frno = 0;
+	coherence();
+
+	qhalloc(ctlr, nil, nil, nil);	/* init async list */
+	mkqhtree(ctlr);			/* init sync list */
+	edfree(edalloc());		/* try to get some ones pre-allocated */
+
+	dprint("ehci %#p flb %#lux frno %#lux\n",
+		ctlr->capio, opio->frbase, opio->frno);
+}
+
+static void
+init(Hci *hp)
+{
+	Ctlr *ctlr;
+	Eopio *opio;
+	int i;
+	static int ctlrno;
+
+	hp->highspeed = 1;
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	dprint("ehci %#p init\n", ctlr->capio);
+
+	ilock(ctlr);
+	/*
+	 * Unless we activate frroll interrupt
+	 * some machines won't post other interrupts.
+	 */
+	opio->intr = Iusb|Ierr|Iportchg|Ihcerr|Iasync;
+	coherence();
+	opio->cmd |= Cpse;
+	coherence();
+	opio->cmd |= Case;
+	coherence();
+	ehcirun(ctlr, 1);
+	/*
+	 * route all ports by default to only one ehci (the first).
+	 * it's not obvious how multiple ehcis could work and on some
+	 * machines, setting Callmine on all ehcis makes the machine seize up.
+	 */
+	opio->config = (ctlrno == 0? Callmine: 0);
+	coherence();
+
+	for (i = 0; i < hp->nports; i++)
+		opio->portsc[i] = Pspower;
+	iunlock(ctlr);
+	if(ehcidebug > 1)
+		dump(hp);
+	ctlrno++;
+}
+
+void
+ehcilinkage(Hci *hp)
+{
+	hp->init = init;
+	hp->dump = dump;
+	hp->interrupt = interrupt;
+	hp->epopen = epopen;
+	hp->epclose = epclose;
+	hp->epread = epread;
+	hp->epwrite = epwrite;
+	hp->seprintep = seprintep;
+	hp->portenable = portenable;
+	hp->portreset = portreset;
+	hp->portstatus = portstatus;
+//	hp->shutdown = shutdown;
+//	hp->debug = setdebug;
+	hp->type = "ehci";
+}

+ 292 - 0
sys/src/9/port/xalloc.c

@@ -0,0 +1,292 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum
+{
+	Nhole		= 128,
+	Magichole	= 0x484F4C45,			/* HOLE */
+};
+
+typedef struct Hole Hole;
+typedef struct Xalloc Xalloc;
+typedef struct Xhdr Xhdr;
+
+struct Hole
+{
+	uint32_t	addr;
+	uint32_t	size;
+	uint32_t	top;
+	Hole*	link;
+};
+
+struct Xhdr
+{
+	uint32_t	size;
+	uint32_t	magix;
+	char	data[];
+};
+
+struct Xalloc
+{
+	Lock;
+	Hole	hole[Nhole];
+	Hole*	flist;
+	Hole*	table;
+};
+
+static Xalloc	xlists;
+
+void*
+xallocz(uint32_t size, int zero)
+{
+	Xhdr *p;
+	Hole *h, **l;
+
+	/* add room for magix & size overhead, round up to nearest vlong */
+	size += BY2V + offsetof(Xhdr, data[0]);
+	size &= ~(BY2V-1);
+
+	ilock(&xlists);
+	l = &xlists.table;
+	for(h = *l; h; h = h->link) {
+		if(h->size >= size) {
+			p = (Xhdr*)KADDR(h->addr);
+			h->addr += size;
+			h->size -= size;
+			if(h->size == 0) {
+				*l = h->link;
+				h->link = xlists.flist;
+				xlists.flist = h;
+			}
+			iunlock(&xlists);
+			if(zero)
+				memset(p, 0, size);
+			p->magix = Magichole;
+			p->size = size;
+			return p->data;
+		}
+		l = &h->link;
+	}
+	iunlock(&xlists);
+	return nil;
+}
+
+void*
+xalloc(uint32_t size)
+{
+	return xallocz(size, 1);
+}
+
+void
+xhole(uintmem addr, uint32_t size)
+{
+	uint32_t top;
+	Hole *h, *c, **l;
+
+	if(size == 0)
+		return;
+
+	top = addr + size;
+	ilock(&xlists);
+	l = &xlists.table;
+	for(h = *l; h; h = h->link) {
+		if(h->top == addr) {
+			h->size += size;
+			h->top = h->addr+h->size;
+			c = h->link;
+			if(c && h->top == c->addr) {
+				h->top += c->size;
+				h->size += c->size;
+				h->link = c->link;
+				c->link = xlists.flist;
+				xlists.flist = c;
+			}
+			iunlock(&xlists);
+			return;
+		}
+		if(h->addr > addr)
+			break;
+		l = &h->link;
+	}
+	if(h && top == h->addr) {
+		h->addr -= size;
+		h->size += size;
+		iunlock(&xlists);
+		return;
+	}
+
+	if(xlists.flist == nil) {
+		iunlock(&xlists);
+		print("xfree: no free holes, leaked %lud bytes\n", size);
+		return;
+	}
+
+	h = xlists.flist;
+	xlists.flist = h->link;
+	h->addr = addr;
+	h->top = top;
+	h->size = size;
+	h->link = *l;
+	*l = h;
+	iunlock(&xlists);
+}
+
+void
+xsummary(void)
+{
+	int i;
+	Hole *h;
+
+	i = 0;
+	for(h = xlists.flist; h; h = h->link)
+		i++;
+
+	print("%d holes free", i);
+	i = 0;
+	for(h = xlists.table; h; h = h->link) {
+		if (0) {
+			print("addr %#.8lux top %#.8lux size %lud\n",
+				h->addr, h->top, h->size);
+			delay(10);
+		}
+		i += h->size;
+		if (h == h->link) {
+			print("xsummary: infinite loop broken\n");
+			break;
+		}
+	}
+	print(" %d bytes free\n", i);
+}
+
+void*
+xspanalloc(uint32_t size, int align, uint32_t span)
+{
+	uint64_t a, v, t;
+	a = (uint64_t)xalloc(size+align+span);
+	if(a == 0)
+		panic("xspanalloc: %lud %d %lux", size, align, span);
+
+	if(span > 2) {
+		v = (a + span) & ~(span-1);
+		t = v - a;
+		if(t > 0)
+			xhole(PADDR(UINT2PTR(a)), t);
+		t = a + span - v;
+		if(t > 0)
+			xhole(PADDR(UINT2PTR(v+size+align)), t);
+	}
+	else
+		v = a;
+
+	if(align > 1)
+		v = (v + align) & ~(align-1);
+
+	return (void*)v;
+}
+
+#ifdef WTF
+void
+xinit(void)
+{
+	int i, n, upages, kpages;
+	//uint32_t maxpages;
+	Confmem *m;
+	Pallocmem *pm;
+	Hole *h, *eh;
+
+	eh = &xlists.hole[Nhole-1];
+	for(h = xlists.hole; h < eh; h++)
+		h->link = h+1;
+
+	xlists.flist = xlists.hole;
+
+	upages = conf.upages;
+	kpages = conf.npage - upages;
+	pm = Palloc.mem;
+	for(i=0; i<nelem(conf.mem); i++){
+		m = &conf.mem[i];
+		n = m->npage;
+		if(n > kpages)
+			n = kpages;
+		/* don't try to use non-KADDR-able memory for kernel */
+		//maxpages = cankaddr(m->base)/BIGPGSZ;
+		//if(n > maxpages)
+		//	n = maxpages;
+		/* first give to kernel */
+		if(n > 0){
+			m->kbase = (uint64_t)KADDR(m->base);
+			m->klimit = (uint64_t)KADDR(m->base+n*BIGPGSZ);
+			xhole(m->base, n*BIGPGSZ);
+			kpages -= n;
+		}
+		/* if anything left over, give to user */
+		if(n < m->npage){
+			if(pm >= Palloc.mem+nelem(Palloc.mem)){
+				print("xinit: losing %lud pages\n", m->npage-n);
+				continue;
+			}
+			pm->base = m->base+n*BIGPGSZ;
+			pm->npage = m->npage - n;
+			pm++;
+		}
+	}
+//	xsummary();			/* call it from main if desired */
+}
+#endif
+
+void
+xfree(void *p)
+{
+	Xhdr *x;
+
+	x = (Xhdr*)((uint64_t)p - offsetof(Xhdr, data[0]));
+	if(x->magix != Magichole) {
+		xsummary();
+		panic("xfree(%#p) %#ux != %#lux", p, Magichole, x->magix);
+	}
+	xhole(PADDR(UINT2PTR((uint64_t)x)), x->size);
+}
+
+int
+xmerge(void *vp, void *vq)
+{
+	Xhdr *p, *q;
+
+	p = (Xhdr*)(((uint64_t)vp - offsetof(Xhdr, data[0])));
+	q = (Xhdr*)(((uint64_t)vq - offsetof(Xhdr, data[0])));
+	if(p->magix != Magichole || q->magix != Magichole) {
+		int i;
+		uint32_t *wd;
+		void *badp;
+
+		xsummary();
+		badp = (p->magix != Magichole? p: q);
+		wd = (uint32_t *)badp - 12;
+		for (i = 24; i-- > 0; ) {
+			print("%#p: %lux", wd, *wd);
+			if (wd == badp)
+				print(" <-");
+			print("\n");
+			wd++;
+		}
+		panic("xmerge(%#p, %#p) bad magic %#lux, %#lux",
+			vp, vq, p->magix, q->magix);
+	}
+	if((unsigned char*)p+p->size == (unsigned char*)q) {
+		p->size += q->size;
+		return 1;
+	}
+	return 0;
+}