Browse Source

Plan 9 from Bell Labs 2009-11-14

David du Colombier 14 years ago
parent
commit
1588b04715

+ 21 - 8
sys/doc/port.ms

@@ -67,9 +67,7 @@ The MIPS compiler
 This compiler generates code for the R2000, R3000, and R4000 machines configured
 to be big-endians.  The compiler generates no R4000-specific instructions
 although the assembler and loader support the new user-mode instructions.
-There is no support for little-endian machines.
-(A little-endian port exists, but is not included in the distribution.
-Contact us if you need it.)
+There are options to generate code for little-endian machines.
 Considering its speed, the Plan 9 compiler generates good code,
 but the commercial
 MIPS compiler with all the stops pulled out consistently beats it
@@ -169,7 +167,8 @@ The PowerPC compiler
 .PP
 The PowerPC compiler supports the 32-bit PowerPC architecture only;
 it does not support either the 64-bit extensions or the POWER compatibility instructions.
-It has been used for production operating system work on the 603, 603e, 604e, 821, 823, and 860.
+It has been used for production operating system work on the 603, 603e, 604e, 821, 823, and 860,
+and experimental work on the 405, 440 and 450.
 On the 8xx floating-point instructions must be emulated.
 Instruction scheduling is not implemented; otherwise the code generated
 is similar to that for the other load-store architectures.
@@ -178,14 +177,15 @@ counter register, several condition code registers, and multiply-accumulate
 instructions, but they are sometimes
 used by assembly language routines in the libraries.
 .SH
-The Acorn ARM compiler
+The ARM compiler
 .PP
 The ARM compiler is fairly solid; it has been used for some production
 operating system work including Inferno and the Plan 9 kernel
-for the iPAQ, which uses a StrongArm SA1.
+for the iPAQ, which uses a StrongArm SA1, and the Sheevaplug.
 The compiler supports the ARMv4 architecture;
-it does not support the Thumb instruction set.
-It has been used on ARM7500FE processors and the Strongarm SA1 core machines.
+it does not support the Thumb instruction sets.
+It has been used on ARM7500FE, ARM926 and Cortex-A8 processors
+and the Strongarm SA1 core machines.
 The compiler generates instructions for the ARM floating-point coprocessor.
 .SH
 The AMD 29000 compiler
@@ -339,6 +339,18 @@ See the file
 .CW /sys/src/9/bitsy/Booting101
 for information about installing Plan 9 on the iPAQ.
 .SH
+The Sheevaplug operating system
+.PP
+This is an ARM CPU-server kernel that emulates floating-point and
+CAS (compare-and-swap) instructions.
+It is derived from a port of native Inferno to the Sheevaplug
+by Salva Peir\f(Jpó\fP and Mechiel Lukkien.
+There are many features of the Marvell Kirkwood system-on-a-chip
+that it does not exploit.
+Initially, there are drivers for Gigabit Ethernet and the console serial port;
+we hope to add USB and NAND or SPI Flash drivers.
+.ig
+.SH
 The file server
 .PP
 The file server runs on only a handful of distinct machines.
@@ -445,3 +457,4 @@ learn about the WORM software.
 Again, see
 .I fsconfig (8)
 for details.
+..

File diff suppressed because it is too large
+ 561 - 557
sys/doc/port.ps


+ 16 - 3
sys/man/8/booting

@@ -136,7 +136,19 @@ will be downloaded by default.
 Once the kernel is loaded,
 it prompts for the Ethernet
 protocol to use to reach the root file server; request the default.
+.SS Sheevaplug CPU Server
+Type
+.IP
+.EX
+setenv bootdelay 2
+setenv bootcmd 'bootp; bootp; tftp 0x800000; go 0x800000'
+saveenv
+.EE
 .PP
+at U-Boot the first time;
+thereafter, the Sheevaplug will automatically boot via BOOTP and TFTP
+when reset or turned on.
+.ig
 .SS File servers
 The CPU servers and terminals run essentially the same program, but
 the Plan 9 file servers run a distinct system.
@@ -145,14 +157,15 @@ The file servers accept only the commands described in
 on their consoles.
 .SS PC File Server
 Boot the PC file server like a regular PC, loading the appropriate file system kernel.
+..
 .SH "SEE ALSO"
 .IR 9load (8),
 .IR boot (8),
-.IR fs (8),
+.\" .IR fs (8),
 .IR init (8),
 .IR plan9.ini (8)
 .SH SOURCE
 Sources for the various boot programs are under
 .BR /sys/src/boot .
-.SH BUGS
-The file server should be able to boot from its own disk.
+.\" .SH BUGS
+.\" The file server should be able to boot from its own disk.

+ 186 - 0
sys/src/9/kw/arch.c

@@ -0,0 +1,186 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+/*
+ * A lot of this stuff doesn't belong here
+ * but this is a convenient dumping ground for
+ * later sorting into the appropriate buckets.
+ */
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp+4;
+	ureg->r14 = PTR2UINT(sched);
+}
+
+/*
+ * called in sysfile.c
+ */
+void
+evenaddr(uintptr addr)
+{
+	if(addr & 3){
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+	uvlong t;
+	Tos *tos;
+
+	/* precise time accounting, kernel exit */
+	tos = (Tos*)(USTKTOP-sizeof(Tos));
+	t = fastticks(nil);
+	tos->kcycles += t - up->kentry;
+	tos->pcycles = up->pcycles;
+	tos->cyclefreq = Frequency;
+	tos->pid = up->pid;
+}
+
+/*
+ *  return the userpc the last exception happened at
+ */
+uintptr
+userpc(void)
+{
+	Ureg *ureg = up->dbgreg;
+	return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	USED(ureg, pureg, uva, n);
+}
+
+/*
+ *  this is the body for all kproc's
+ */
+static void
+linkproc(void)
+{
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc exiting", 0);
+}
+
+/*
+ *  setup stack and initial PC for a new kernel proc.  This is architecture
+ *  dependent because of the starting stack location
+ */
+void
+kprocchild(Proc *p, void (*func)(void*), void *arg)
+{
+	p->sched.pc = PTR2UINT(linkproc);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK);
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+/*
+ *  pc output by dumpaproc
+ */
+uintptr
+dbgpc(Proc* p)
+{
+	Ureg *ureg;
+
+	ureg = p->dbgreg;
+	if(ureg == 0)
+		return 0;
+
+	return ureg->pc;
+}
+
+/*
+ *  set mach dependent process state for a new process
+ */
+void
+procsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc* p)
+{
+	fpuprocsave(p);
+}
+
+void
+procrestore(Proc* p)
+{
+	uvlong t;
+
+	if(p->kp)
+		return;
+	t = lcycles();
+	p->pcycles -= t;
+
+	fpuprocrestore(p);
+}
+
+int
+userureg(Ureg* ureg)
+{
+	return (ureg->psr & PsrMask) == PsrMusr;
+}
+
+long
+_xdec(long *p)
+{
+	int s, v;
+
+	s = splhi();
+	v = --*p;
+	splx(s);
+	return v;
+}
+
+void
+_xinc(long *p)
+{
+	int s;
+
+	s = splhi();
+	++*p;
+	splx(s);
+}
+
+int
+cas32(void* addr, u32int old, u32int new)
+{
+	int r, s;
+
+	s = splhi();
+	if(r = (*(u32int*)addr == old))
+		*(u32int*)addr = new;
+	splx(s);
+
+	return r;
+}

+ 256 - 0
sys/src/9/kw/archkw.c

@@ -0,0 +1,256 @@
+/*
+ * stuff specific to marvell's kirkwood architecture
+ * as seen in the sheevaplug
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+// #include "../port/flashif.h"
+
+typedef struct GpioReg GpioReg;
+struct GpioReg {
+	ulong	dataout;
+	ulong	dataoutena;
+	ulong	blinkena;
+	ulong	datainpol;
+	ulong	datain;
+	ulong	intrcause;
+	ulong	intrmask;
+	ulong	intrlevelmask;
+};
+
+typedef struct L2uncache L2uncache;
+typedef struct L2win L2win;
+struct L2uncache {
+	struct L2win {
+		ulong	base;
+		ulong	size;
+	} win[4];
+};
+
+enum {
+	/* L2win->base bits */
+	L2enable	= 1<<0,
+};
+
+typedef struct Addrmap Addrmap;
+typedef struct Addrwin Addrwin;
+struct Addrmap {
+	struct Addrwin {
+		ulong	ctl;
+		ulong	base;
+		ulong	remaplo;
+		ulong	remaphi;
+	} win[8];
+	ulong	dirba;		/* device internal reg's base addr.: Regbase */
+};
+
+enum {
+	/* Addrwin->ctl bits */
+	Winenable	= 1<<0,
+};
+
+/*
+ * u-boot leaves us with this address map:
+ *
+ * 0 targ 4 attr 0xe8 size 256MB addr 0x9::  remap addr 0x9::	pci mem
+ * 1 targ 1 attr 0x2f size   8MB addr 0xf9:: remap addr 0xf9::	nand flash
+ * 2 targ 4 attr 0xe0 size  16MB addr 0xf::  remap addr 0xc::	pci i/o
+ * 3 targ 1 attr 0x1e size  16MB addr 0xf8:: remap addr 0x0	spi flash
+ * 4 targ 1 attr 0x1d size  16MB addr 0xff::			boot rom
+ * 5 targ 1 attr 0x1e size 128MB addr 0xe8::	disabled	spi flash
+ * 6 targ 1 attr 0x1d size 128MB addr 0xf::	disabled	boot rom
+ * 7 targ 3 attr 0x1  size  64K  addr 0xfb::			crypto
+ */
+#define WINTARG(ctl)	(((ctl) >> 4) & 017)
+#define WINATTR(ctl)	(((ctl) >> 8) & 0377)
+#define WIN64KSIZE(ctl)	(((ctl) >> 16) + 1)
+
+enum {
+	Targflash = 1,
+	Attrspi	= 0x1e,
+};
+
+static void
+addrmap(void)
+{
+	int i, sawspi;
+	ulong ctl, targ, attr, size64k;
+	Addrmap *map;
+	Addrwin *win;
+
+	sawspi = 0;
+	map = (Addrmap *)AddrWin;
+	for (i = 0; i < nelem(map->win); i++) {
+		win = &map->win[i];
+		ctl = win->ctl;
+		targ = WINTARG(ctl);
+		attr = WINATTR(ctl);
+		size64k = WIN64KSIZE(ctl);
+		if (targ == Targflash && attr == Attrspi &&
+		    size64k == 128*MB/(64*1024)) {
+			sawspi = 1;
+			if (!(ctl & Winenable)) {
+				win->ctl |= Winenable;
+				coherence();
+				iprint("address map: enabled window %d for spi:\n"
+					"\ttarg %ld attr %#lux size %,ld addr %#lux",
+					i, targ, attr, size64k * 64*1024,
+					win->base);
+				if (i < 4)
+					iprint(" remap addr %#llux",
+						(uvlong)win->remaphi<<32 |
+						win->remaplo);
+				iprint("\n");
+			}
+		}
+	}
+	if (!sawspi)
+		panic("address map: no existing window for spi");
+//	iprint("dirba %#lux\n", map->dirba);
+}
+
+void
+l2cacheon(void)
+{
+	L2uncache *l2p;
+
+	l1cachesoff();
+	cacheuwbinv();
+
+	/* disable caching of i/o registers */
+	l2p = (L2uncache *)Addrl2cache;
+	memset(l2p, 0, sizeof *l2p);
+	/* l2: don't cache upper half of address space */
+	l2p->win[0].base = 0x80000000 | L2enable;	/* 64K multiple */
+	l2p->win[0].size = (32768-1) << 16;		/* 64K multiples */
+	coherence();
+
+	cacheuwbinv();
+
+	/* writeback requires extra care */
+	CPUCSREG->l2cfg |= L2on | L2ecc | L2writethru;
+	coherence();
+
+	cachedinv();
+
+	l2cachecfgon();
+	l1cacheson();
+
+	print("l2 cache enabled write-through\n");
+}
+
+/* called late in main */
+void
+archconfinit(void)
+{
+	m->cpuhz = 1200*1000*1000;
+	m->delayloop = m->cpuhz/6000;  /* only an initial estimate */
+	addrmap();
+}
+
+void
+archkwlink(void)
+{
+}
+
+int
+archether(int ctlno, Ether *ether)
+{
+	if(ctlno != 0)
+		return -1;
+	ether->type = "kirkwood";
+	ether->port = ctlno;
+//	ether->mbps = 1000;
+	return 1;
+}
+
+/* LED/USB gpios */
+enum
+{
+	SheevaOEValLow	= 1<<29,        /* USB_PWEN low */
+	SheevaOEValHigh	= 1<<17,        /* LED pin high */
+	SheevaOELow	= ~0,
+	SheevaOEHigh	= ~0,
+};
+
+/* called early in main */
+void
+archreset(void)
+{
+	/* watchdog disabled */
+	TIMERREG->ctl = 0;
+
+	/* configure gpios */
+	((GpioReg*)AddrGpio0)->dataout = SheevaOEValLow;
+	((GpioReg*)AddrGpio0)->dataoutena = SheevaOELow;
+
+	((GpioReg*)AddrGpio1)->dataout = SheevaOEValHigh;
+	((GpioReg*)AddrGpio1)->dataoutena = SheevaOEHigh;
+	coherence();
+
+	CPUCSREG->l2cfg &= ~L2on;
+	coherence();
+}
+
+void
+archreboot(void)
+{
+	iprint("reset!\n");
+	delay(100);
+
+	CPUCSREG->rstout = RstoutSoft;
+	CPUCSREG->softreset = ResetSystem;
+	CPUCSREG->cpucsr = Reset;
+	delay(500);
+
+	splhi();
+	iprint("waiting...");
+	for(;;)
+		idlehands();
+}
+
+long
+lcycles(void)
+{
+	return 0;
+}
+
+void
+archconsole(void)
+{
+//	uartconsole(0, "b115200");
+//serialputs("uart0 console @ 115200\n", strlen("uart0 console @ 115200\n"));
+}
+
+#ifdef USE_FLASH
+void
+archflashwp(Flash*, int)
+{
+}
+
+/*
+ * for ../port/devflash.c:/^flashreset
+ * retrieve flash type, virtual base and length and return 0;
+ * return -1 on error (no flash)
+ */
+int
+archflashreset(int bank, Flash *f)
+{
+	if(bank != 0)
+		return -1;
+	f->type = "nand";
+	f->addr = (void*)PHYSNAND;
+	f->size = 0;	/* done by probe */
+	f->width = 1;
+	f->interleave = 0;
+	return 0;
+}
+#endif

+ 167 - 0
sys/src/9/kw/arm.h

@@ -0,0 +1,167 @@
+/*
+ * Program Status Registers
+ */
+#define PsrMusr		0x00000010		/* mode */
+#define PsrMfiq		0x00000011
+#define PsrMirq		0x00000012
+#define PsrMsvc		0x00000013
+#define PsrMabt		0x00000017
+#define PsrMund		0x0000001B
+#define PsrMsys		0x0000001F
+#define PsrMask		0x0000001F
+
+#define PsrDfiq		0x00000040		/* disable FIQ interrupts */
+#define PsrDirq		0x00000080		/* disable IRQ interrupts */
+
+#define PsrV		0x10000000		/* overflow */
+#define PsrC		0x20000000		/* carry/borrow/extend */
+#define PsrZ		0x40000000		/* zero */
+#define PsrN		0x80000000		/* negative/less than */
+
+/*
+ * Coprocessors
+ */
+#define CpSC		15			/* System Control */
+
+/*
+ * opcode 1
+ */
+#define	CpDef		0			/* default */
+#define CpL2		1			/* L2 cache operations */
+
+/*
+ * Primary (CRn) CpSC registers.
+ */
+#define	CpID		0			/* ID and cache type */
+#define	CpCONTROL	1			/* miscellaneous control */
+#define	CpTTB		2			/* Translation Table Base */
+#define	CpDAC		3			/* Domain Access Control */
+#define	CpFSR		5			/* Fault Status */
+#define	CpFAR		6			/* Fault Address */
+#define	CpCACHE		7			/* cache/write buffer control */
+#define	CpTLB		8			/* TLB control */
+#define	CpCLD		9			/* Cache Lockdown */
+#define CpTLD		10			/* TLB Lockdown */
+#define	CpPID		13			/* Process ID */
+#define CpTESTCFG	15			/* test config. (arm926) */
+
+/*
+ * CpID opcode2 fields.
+ */
+#define CpIDid		0			/* main ID */
+#define CpIDct		1			/* cache type */
+
+/*
+ * CpCONTROL
+ */
+#define CpCmmu		0x00000001		/* M: MMU enable */
+#define CpCalign	0x00000002		/* A: alignment fault enable */
+#define CpCdcache	0x00000004		/* C: data cache on */
+#define CpCwb		0x00000008		/* W: write buffer turned on */
+#define CpCi32		0x00000010		/* P: 32-bit program space */
+#define CpCd32		0x00000020		/* D: 32-bit data space */
+#define CpCbe		0x00000080		/* B: big-endian operation */
+#define CpCsystem	0x00000100		/* S: system permission */
+#define CpCrom		0x00000200		/* R: ROM permission */
+#define CpCicache	0x00001000		/* I: instruction cache on */
+#define CpChv		0x00002000		/* V: high vectors */
+
+/*
+ * CpCACHE Secondary (CRm) registers and opcode2 fields.
+ * In ARM-speak, 'flush' means invalidate and 'clean' means writeback.
+ */
+#define CpCACHEintr	0			/* interrupt */
+#define CpCACHEinvi	5			/* instruction */
+#define CpCACHEinvd	6			/* data */
+#define CpCACHEinvu	7			/* unified */
+#define CpCACHEwb	10			/* writeback */
+#define CpCACHEwbi	14			/* writeback+invalidate */
+
+#define CpCACHEall	0			/* entire */
+#define CpCACHEse	1			/* single entry */
+#define CpCACHEsi	2			/* set/index */
+#define CpCACHEtest	3			/* test loop */
+#define CpCACHEwait	4			/* wait */
+
+/*
+ * CpTLB Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpTLBinvi	5			/* instruction */
+#define CpTLBinvd	6			/* data */
+#define CpTLBinvu	7			/* unified */
+
+#define CpTLBinv	0			/* invalidate all */
+#define CpTLBinvse	1			/* invalidate single entry */
+
+/*
+ * CpTESTCFG Secondary (CRm) registers and opcode2 fields; sheeva only.
+ */
+#define CpTCl2cfg	1
+#define CpTCl2flush	9
+#define CpTCl2waylck	10
+#define CpTCl2inv	11
+#define CpTCl2perfctl	12
+#define CpTCl2perfcnt	13
+
+/* CpTCl2cfg */
+#define CpTCl2conf	0
+
+/* CpTCl2flush & CpTCl2inv */
+#define CpTCl2all	0
+#define CpTCl2seva	1
+#define CpTCl2way	2
+#define CpTCl2sepa	3
+#define CpTCl2valow	4
+#define CpTCl2vahigh	5			/* also triggers flush or inv */
+
+/* CpTCl2flush
+#define CpTCecccnt	6			/* ecc error count */
+#define CpTCeccthr	7			/* ecc error threshold */
+
+/* CpTCwaylck */
+#define CpTCwaylock	7
+
+/* CpTCl2inv */
+#define CpTCl2erraddr	7			/* ecc error address */
+
+/* CpTCl2perfctl */
+#define CpTCl2perf0ctl	0
+#define CpTCl2perf1ctl	1
+
+/* CpTCl2perfcnt */
+#define CpTCl2perf0low	0
+#define CpTCl2perf0high	1
+#define CpTCl2perf1low	2
+#define CpTCl2perf1high	3
+
+/*
+ * MMU.
+ */
+
+#define Fault		0x00000000u		/* L[12] */
+
+/* in pre-armv7 only, the 0x10 bit must be on */
+#define Coarse		0x00000011u		/* L1 */
+#define Section		0x00000012u		/* L1 1MB */
+#define Fine		0x00000013u		/* L1 */
+
+#define Large		0x00000001u		/* L2 64KB */
+#define Small		0x00000002u		/* L2 4KB */
+#define Tiny		0x00000003u		/* L2 1KB */
+#define Buffered	0x00000004u		/* L[12] */
+#define Cached		0x00000008u		/* L[12] */
+
+#define Dom0		0
+#define Noaccess	0			/* AP, DAC */
+#define Krw		1			/* AP */
+#define Uro		2			/* AP */
+#define Urw		3			/* AP */
+#define Client		1			/* DAC */
+#define Manager		3			/* DAC */
+
+#define AP(n, v) F((v), ((n)*2)+4, 2)
+#define L1AP(ap) (AP(3, (ap)))
+#define L2AP(ap) (AP(3, (ap))|AP(2, (ap))|AP(1, (ap))|AP(0, (ap))) /* pre-armv7 */
+#define DAC(n, v) F((v), (n)*2, 2)
+
+#define HVECTORS	0xffff0000		/* physical addr of vectors */

+ 64 - 0
sys/src/9/kw/arm.s

@@ -0,0 +1,64 @@
+/*
+ * sheevaplug machine assist, definitions
+ * arm926ej-s processor at 1.2GHz
+ *
+ * loader uses R11 as scratch.
+ */
+#include "mem.h"
+#include "arm.h"
+
+#undef B					/* B is for 'botch' */
+
+#define PADDR(a)	((a) & ~KZERO)
+#define KADDR(a)	(KZERO|(a))
+
+#define L1X(va)		(((((va))>>20) & 0x0fff)<<2)
+
+#define MACHADDR	(L1-MACHSIZE)
+
+#define PTEDRAM		(Dom0|L1AP(Krw)|Section|Cached|Buffered)
+#define PTEIO		(Dom0|L1AP(Krw)|Section)
+
+/* wave at the user; clobbers R1 & R7; needs R12 (SB) set */
+#define WAVE(c) \
+	BARRIERS; \
+	MOVW	$PHYSCONS, R7; \
+	MOVW	$(c), R1; \
+	MOVW	R1, (R7); \
+	BARRIERS
+
+#define DMB	\
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEdmbarr
+/*
+ * data synchronisation barrier (formerly drain write buffer).
+ * waits for cache, tlb, branch-prediction, memory; the lot.
+ * on sheeva, also flushes L2 eviction buffer.
+ * zeroes R0.
+ */
+#define DSB	\
+	MOVW	$0, R0; \
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
+/* prefetch flush; zeroes R0 */
+#define ISB	\
+	MOVW	$0, R0; \
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEwait; \
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2inv), CpTCl2seva
+
+/* zeroes R0 */
+#define	BARRIERS	DSB; ISB
+
+/*
+ * invoked with PTE bits in R2, pa in R3, PTE pointed to by R4.
+ * fill PTE pointed to by R4 and increment R4 past it.
+ * increment R3 by a MB.  clobbers R1.
+ */
+#define FILLPTE() \
+	ORR	R3, R2, R1;			/* pte bits in R2, pa in R3 */ \
+	MOVW	R1, (R4); \
+	ADD	$4, R4;				/* bump PTE address */ \
+	ADD	$MiB, R3;			/* bump pa */ \
+
+/* zero PTE pointed to by R4 and increment R4 past it. assumes R0 is 0. */
+#define ZEROPTE() \
+	MOVW	R0, (R4); \
+	ADD	$4, R4;				/* bump PTE address */

+ 136 - 0
sys/src/9/kw/clock.c

@@ -0,0 +1,136 @@
+/*
+ * sheevaplug clock
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "ureg.h"
+
+enum {
+	Tcycles = CLOCKFREQ / HZ,		/* cycles per clock tick */
+};
+
+static void
+clockintr(Ureg *ureg, void*)
+{
+	m->fastclock++;
+	timerintr(ureg, 0);
+	intrclear(Irqbridge, IRQcputimer0);
+}
+
+void
+clockinit(void)
+{
+	int s;
+	long cyc;
+	TimerReg *tmr = TIMERREG;
+
+	tmr->ctl = 0;
+	intrenable(Irqbridge, IRQcputimer0, clockintr, nil, "clock");
+
+	s = spllo();			/* risky */
+	/* take any deferred clock (& other) interrupts here */
+	splx(s);
+
+	/* adjust m->bootdelay, used by delay()? */
+	m->ticks = 0;
+	m->fastclock = 0;
+
+	tmr->timer0 = Tcycles;
+	tmr->ctl = Tmr0enable;		/* just once */
+
+	s = spllo();			/* risky */
+	/* one iteration seems to take about 40 ns. */
+	for (cyc = Tcycles; cyc > 0 && m->fastclock == 0; cyc--)
+		;
+	splx(s);
+
+	if (m->fastclock == 0) {
+		serialputc('?');
+		if (tmr->timer0 == 0)
+			panic("clock not interrupting");
+		else if (tmr->timer0 == tmr->reload0)
+			panic("clock not ticking");
+		else
+			panic("clock running very slowly");
+	}
+
+	tmr->ctl = 0;
+	tmr->timer0  = Tcycles;
+	tmr->reload0 = Tcycles;
+	tmr->ctl = Tmr0enable | Tmr0periodic;
+}
+
+void
+timerset(uvlong next)
+{
+#ifdef FANCYTIMERS
+	Tn *tn;
+	Tval offset;
+
+	ilock(&timers.tn1lock);
+	tn = (Tn*)Tn1;
+	tn->cr = Tm;
+
+	offset = next + tn->cv;
+	if(offset < timers.tn1minperiod)
+		offset = timers.tn1minperiod;
+	else if(offset > timers.tn1maxperiod)
+		offset = timers.tn1maxperiod;
+
+	tn->lc = offset;
+	tn->cr = Tm|Te;
+	iunlock(&timers.tn1lock);
+#else
+	USED(next);
+#endif
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+	if(hz)
+		*hz = HZ;
+	return m->fastclock;
+}
+
+ulong
+µs(void)
+{
+	return fastticks2us(fastticks(nil));
+}
+
+void
+microdelay(int l)
+{
+	int i;
+
+	l *= m->delayloop;
+	l /= 1000;
+	if(l <= 0)
+		l = 1;
+	for(i = 0; i < l; i++)
+		;
+}
+
+void
+delay(int l)
+{
+	ulong i, j;
+
+	j = m->delayloop;
+	while(l-- > 0)
+		for(i=0; i < j; i++)
+			;
+}
+
+ulong
+perfticks(void)
+{
+//	return ((Tn*)Tn0)->cv;		// TODO: FANCYTIMERS
+	return (ulong)fastticks(nil);
+}

+ 260 - 0
sys/src/9/kw/dat.h

@@ -0,0 +1,260 @@
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPsave	FPsave;
+typedef struct ISAConf	ISAConf;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct MMMU	MMMU;
+typedef struct Mach	Mach;
+typedef struct Notsave	Notsave;
+typedef struct Page	Page;
+typedef struct Pcidev	Pcidev;
+typedef struct PhysUart	PhysUart;
+typedef struct PFPU	PFPU;
+typedef struct PMMU	PMMU;
+typedef struct PNOTIFY	PNOTIFY;
+typedef struct Proc	Proc;
+typedef u32int		PTE;
+typedef struct Uart	Uart;
+typedef struct Ureg	Ureg;
+typedef uvlong		Tval;
+
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+
+#define MAXSYSARG	5	/* for mount(fd, mpt, flag, arg, srv) */
+
+/*
+ *  parameters for sysproc.c
+ */
+#define AOUT_MAGIC	(E_MAGIC)
+
+struct Lock
+{
+	ulong	key;
+	u32int	sr;
+	uintptr	pc;
+	Proc*	p;
+	Mach*	m;
+	int	isilock;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+/*
+ * emulated floating point
+ */
+struct PFPU
+{
+	ulong	status;
+	ulong	control;
+	ulong	regs[8][3];
+
+	int	fpstate;
+};
+
+/*
+ * PFPU.status
+ */
+enum
+{
+	FPinit,
+	FPactive,
+	FPinactive,
+};
+
+struct Confmem
+{
+	uintptr	base;
+	usize	npage;
+	uintptr	limit;
+	uintptr	kbase;
+	uintptr	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	Confmem	mem[1];		/* physical memory */
+	ulong	npage;		/* total physical pages of memory */
+	usize	upages;		/* user page pool */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* max interrupt time allocation in bytes */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+//	ulong	hz;		/* processor cycle freq */
+//	ulong	mhz;
+};
+
+struct FPsave {
+	int	emptiness;
+};
+struct Notsave {
+	int	emptiness;
+};
+
+/*
+ *  MMU stuff in Mach.
+ */
+struct MMMU
+{
+	PTE*	mmul1;		/* l1 for this processor */
+	int	mmul1lo;
+	int	mmul1hi;
+	int	mmupid;
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR	1		/* 1 level cache, don't worry about VCE's */
+struct PMMU
+{
+	Page*	mmul2;
+	Page*	mmul2cache;	/* free mmu pages */
+
+	PFPU;			/* hack */
+};
+
+/*
+ *  things saved in the Proc structure during a notify
+ */
+struct PNOTIFY
+{
+	void	emptiness;
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+	int	machno;			/* physical id of processor */
+	uintptr	splpc;			/* pc of last caller to splhi */
+
+	Proc*	proc;			/* current process */
+
+	MMMU;
+	int	flushmmu;		/* flush current proc mmu state */
+
+	ulong	ticks;			/* of the clock since boot time */
+	Label	sched;			/* scheduler wakeup */
+	Lock	alarmlock;		/* access to alarm list */
+	void*	alarm;			/* alarms bound to this clock */
+	int	inclockintr;
+
+	Proc*	readied;		/* for runproc */
+	ulong	schedticks;		/* next forced context switch */
+
+	int	cputype;
+	ulong	delayloop;
+
+	/* stats */
+	int	tlbfault;
+	int	tlbpurge;
+	int	pfault;
+	int	cs;
+	int	syscall;
+	int	load;
+	int	intr;
+	vlong	fastclock;		/* last sampled value */
+	uvlong	inidle;			/* time spent in idlehands() */
+	ulong	spuriousintr;
+	int	lastintr;
+	int	ilockdepth;
+	Perf	perf;			/* performance counters */
+
+//	int	cpumhz;
+	uvlong	cpuhz;			/* speed of cpu */
+	uvlong	cyclefreq;		/* Frequency of user readable cycle counter */
+
+	/* save areas for exceptions */
+	u32int	sfiq[5];
+	u32int	sirq[5];
+	u32int	sund[5];
+	u32int	sabt[5];
+#define fiqstack sfiq
+#define irqstack sirq
+#define abtstack sabt
+#define undstack sund
+
+	int	stack[1];
+};
+
+/*
+ * Fake kmap.
+ */
+typedef void		KMap;
+#define	VA(k)		((uintptr)(k))
+#define	kmap(p)		(KMap*)((p)->pa|kseg0)
+#define	kunmap(k)
+
+struct
+{
+	Lock;
+	int	machs;			/* bitmap of active CPUs */
+	int	exiting;		/* shutdown */
+	int	ispanic;		/* shutdown in response to a panic */
+}active;
+
+enum {
+//	Frequency	= 1200*1000*1000/2,	/* half the processor clock */
+	Frequency	= 1200*1000*1000,	/* the processor clock */
+};
+
+extern register Mach* m;			/* R10 */
+extern register Proc* up;			/* R9 */
+extern uintptr kseg0;
+extern Mach* machaddr[MAXMACH];
+
+enum {
+	Nvec = 8,	/* # of vectors at start of lexception.s */
+};
+
+/*
+ * Layout of physical 0.
+ */
+typedef struct Vectorpage {
+	void	(*vectors[Nvec])(void);
+	uint	vtable[Nvec];
+} Vectorpage;
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char		*type;
+	ulong	port;
+	int	irq;
+	ulong	dma;
+	ulong	mem;
+	ulong	size;
+	ulong	freq;
+
+	int	nopt;
+	char	*opt[NISAOPT];
+};
+
+#define	MACHP(n)	(machaddr[n])
+
+/*
+ * Horrid. But the alternative is 'defined'.
+ */
+#ifdef _DBGC_
+#define DBGFLG		(dbgflg[_DBGC_])
+#else
+#define DBGFLG		(0)
+#endif /* _DBGC_ */
+
+int vflag;
+extern char dbgflg[256];
+
+#define dbgprint	print		/* for now */

+ 226 - 0
sys/src/9/kw/devarch.c

@@ -0,0 +1,226 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+
+#include "../ip/ip.h"
+
+enum {
+	Qdir = 0,
+	Qbase,
+
+	Qmax = 16,
+};
+
+typedef long Rdwrfn(Chan*, void*, long, vlong);
+
+static Rdwrfn *readfn[Qmax];
+static Rdwrfn *writefn[Qmax];
+
+static Dirtab archdir[Qmax] = {
+	".",		{ Qdir, 0, QTDIR },	0,	0555,
+};
+
+Lock archwlock;	/* the lock is only for changing archdir */
+int narchdir = Qbase;
+
+/*
+ * Add a file to the #P listing.  Once added, you can't delete it.
+ * You can't add a file with the same name as one already there,
+ * and you get a pointer to the Dirtab entry so you can do things
+ * like change the Qid version.  Changing the Qid path is disallowed.
+ */
+Dirtab*
+addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+{
+	int i;
+	Dirtab d;
+	Dirtab *dp;
+
+	memset(&d, 0, sizeof d);
+	strcpy(d.name, name);
+	d.perm = perm;
+
+	lock(&archwlock);
+	if(narchdir >= Qmax){
+		unlock(&archwlock);
+		return nil;
+	}
+
+	for(i=0; i<narchdir; i++)
+		if(strcmp(archdir[i].name, name) == 0){
+			unlock(&archwlock);
+			return nil;
+		}
+
+	d.qid.path = narchdir;
+	archdir[narchdir] = d;
+	readfn[narchdir] = rdfn;
+	writefn[narchdir] = wrfn;
+	dp = &archdir[narchdir++];
+	unlock(&archwlock);
+
+	return dp;
+}
+
+static Chan*
+archattach(char* spec)
+{
+	return devattach('P', spec);
+}
+
+Walkqid*
+archwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+	return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
+}
+
+static int
+archstat(Chan* c, uchar* dp, int n)
+{
+	return devstat(c, dp, n, archdir, narchdir, devgen);
+}
+
+static Chan*
+archopen(Chan* c, int omode)
+{
+	return devopen(c, omode, archdir, narchdir, devgen);
+}
+
+static void
+archclose(Chan*)
+{
+}
+
+static long
+archread(Chan *c, void *a, long n, vlong offset)
+{
+	Rdwrfn *fn;
+
+	switch((ulong)c->qid.path){
+	case Qdir:
+		return devdirread(c, a, n, archdir, narchdir, devgen);
+
+	default:
+		if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+
+	return 0;
+}
+
+static long
+archwrite(Chan *c, void *a, long n, vlong offset)
+{
+	Rdwrfn *fn;
+
+	if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+		return fn(c, a, n, offset);
+	error(Eperm);
+
+	return 0;
+}
+
+void archinit(void);
+
+Dev archdevtab = {
+	'P',
+	"arch",
+
+	devreset,
+	archinit,
+	devshutdown,
+	archattach,
+	archwalk,
+	archstat,
+	archopen,
+	devcreate,
+	archclose,
+	archread,
+	devbread,
+	archwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};
+
+/* convert AddrDevid register to a string in buf and return buf */
+char *
+cputype2name(char *buf, int size)
+{
+	char *s, *es, *soc;
+
+	s = buf;
+	es = buf + size;
+	m->cputype = *(ulong*)AddrDevid;
+	switch(m->cputype & 3) {
+	case 0:
+		soc = "88F6180";
+		break;
+	case 1:
+		soc = "88F619[02]";
+		break;
+	case 2:
+		soc = "88F6281 (arm926ej-s)";
+		break;
+	default:
+		soc = "unknown";
+		break;
+	}
+	seprint(s, es, "Marvell %s", soc);
+	return buf;
+}
+
+static long
+cputyperead(Chan*, void *a, long n, vlong offset)
+{
+	char name[64], str[128];
+
+	cputype2name(name, sizeof name);
+	snprint(str, sizeof str, "ARM %s %llud\n", name, m->cpuhz / 1000000);
+	return readstr(offset, a, n, str);
+}
+
+static long
+tbread(Chan*, void *a, long n, vlong offset)
+{
+	char str[16];
+	uvlong tb;
+
+	cycles(&tb);
+
+	snprint(str, sizeof(str), "%16.16llux", tb);
+	return readstr(offset, a, n, str);
+}
+
+static long
+nsread(Chan*, void *a, long n, vlong offset)
+{
+	char str[16];
+	uvlong tb;
+
+	cycles(&tb);
+
+	snprint(str, sizeof(str), "%16.16llux", (tb/700)* 1000);
+	return readstr(offset, a, n, str);
+}
+
+uvlong
+fastns(void)
+{
+//	return gettbl();
+	return 0;
+}
+
+void
+archinit(void)
+{
+	addarchfile("cputype", 0444, cputyperead, nil);
+	addarchfile("timebase",0444, tbread, nil);
+//	addarchfile("nsec", 0444, nsread, nil);
+}

+ 534 - 0
sys/src/9/kw/devether.c

@@ -0,0 +1,534 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+
+#define devno	dev
+#define iq	in
+
+extern int archether(int ctlno, Ether *ether);
+
+static Ether *etherxx[MaxEther];
+
+Chan*
+etherattach(char* spec)
+{
+	int ctlrno;
+	char *p;
+	Chan *chan;
+
+	ctlrno = 0;
+	if(spec && *spec){
+		ctlrno = strtoul(spec, &p, 0);
+		if((ctlrno == 0 && p == spec) || *p != 0)
+			error(Ebadarg);
+		if(ctlrno < 0 || ctlrno >= MaxEther)
+			error(Ebadarg);
+	}
+	if(etherxx[ctlrno] == 0)
+		error(Enodev);
+
+	chan = devattach('l', spec);
+	if(waserror()){
+		chanfree(chan);
+		nexterror();
+	}
+	chan->devno = ctlrno;
+	if(etherxx[ctlrno]->attach)
+		etherxx[ctlrno]->attach(etherxx[ctlrno]);
+	poperror();
+	return chan;
+}
+
+static Walkqid*
+etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
+{
+	return netifwalk(etherxx[chan->devno], chan, nchan, name, nname);
+}
+
+static int
+etherstat(Chan* chan, uchar* dp, int n)
+{
+	return netifstat(etherxx[chan->devno], chan, dp, n);
+}
+
+static Chan*
+etheropen(Chan* chan, int omode)
+{
+	return netifopen(etherxx[chan->devno], chan, omode);
+}
+
+static void
+ethercreate(Chan*, char*, int, ulong)
+{
+}
+
+static void
+etherclose(Chan* chan)
+{
+	netifclose(etherxx[chan->devno], chan);
+}
+
+static long
+etherread(Chan* chan, void* buf, long n, vlong off)
+{
+	Ether *ether;
+	ulong offset = off;
+
+	ether = etherxx[chan->devno];
+	if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
+		/*
+		 * With some controllers it is necessary to reach
+		 * into the chip to extract statistics.
+		 */
+		if(NETTYPE(chan->qid.path) == Nifstatqid)
+			return ether->ifstat(ether, buf, n, offset);
+		else if(NETTYPE(chan->qid.path) == Nstatqid)
+			ether->ifstat(ether, buf, 0, offset);
+	}
+
+	return netifread(ether, chan, buf, n, offset);
+}
+
+static Block*
+etherbread(Chan* chan, long n, ulong offset)
+{
+	return netifbread(etherxx[chan->devno], chan, n, offset);
+}
+
+static int
+etherwstat(Chan* chan, uchar* dp, int n)
+{
+	return netifwstat(etherxx[chan->devno], chan, dp, n);
+}
+
+static void
+etherrtrace(Netfile* f, Etherpkt* pkt, int len)
+{
+	int i, n;
+	Block *bp;
+
+	if(qwindow(f->iq) <= 0)
+		return;
+	if(len > 58)
+		n = 58;
+	else
+		n = len;
+	bp = iallocb(64);
+	if(bp == nil)
+		return;
+	memmove(bp->wp, pkt->d, n);
+	i = TK2MS(MACHP(0)->ticks);
+	bp->wp[58] = len>>8;
+	bp->wp[59] = len;
+	bp->wp[60] = i>>24;
+	bp->wp[61] = i>>16;
+	bp->wp[62] = i>>8;
+	bp->wp[63] = i;
+	bp->wp += 64;
+	qpass(f->iq, bp);
+}
+
+Block*
+etheriq(Ether* ether, Block* bp, int fromwire)
+{
+	Etherpkt *pkt;
+	ushort type;
+	int len, multi, tome, fromme;
+	Netfile **ep, *f, **fp, *fx;
+	Block *xbp;
+
+	ether->inpackets++;
+
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	type = (pkt->type[0]<<8)|pkt->type[1];
+	fx = 0;
+	ep = &ether->f[Ntypes];
+
+	multi = pkt->d[0] & 1;
+	/* check for valid multicast addresses */
+	if(multi && memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) != 0 && ether->prom == 0){
+		if(!activemulti(ether, pkt->d, sizeof(pkt->d))){
+			if(fromwire){
+				freeb(bp);
+				bp = 0;
+			}
+			return bp;
+		}
+	}
+
+	/* is it for me? */
+	tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
+
+	/*
+	 * Multiplex the packet to all the connections which want it.
+	 * If the packet is not to be used subsequently (fromwire != 0),
+	 * attempt to simply pass it into one of the connections, thereby
+	 * saving a copy of the data (usual case hopefully).
+	 */
+	for(fp = ether->f; fp < ep; fp++){
+		if(f = *fp)
+		if(f->type == type || f->type < 0)
+		if(tome || multi || f->prom){
+			/* Don't want to hear bridged packets */
+			if(f->bridge && !fromwire && !fromme)
+				continue;
+			if(!f->headersonly){
+				if(fromwire && fx == 0)
+					fx = f;
+				else if(xbp = iallocb(len)){
+					memmove(xbp->wp, pkt, len);
+					xbp->wp += len;
+					if(qpass(f->iq, xbp) < 0)
+						ether->soverflows++;
+				}
+				else
+					ether->soverflows++;
+			}
+			else
+				etherrtrace(f, pkt, len);
+		}
+	}
+
+	if(fx){
+		if(qpass(fx->iq, bp) < 0)
+			ether->soverflows++;
+		return 0;
+	}
+	if(fromwire){
+		freeb(bp);
+		return 0;
+	}
+
+	return bp;
+}
+
+static int
+etheroq(Ether* ether, Block* bp)
+{
+	int len, loopback, s;
+	Etherpkt *pkt;
+
+	ether->outpackets++;
+
+	/*
+	 * Check if the packet has to be placed back onto the input queue,
+	 * i.e. if it's a loopback or broadcast packet or the interface is
+	 * in promiscuous mode.
+	 * If it's a loopback packet indicate to etheriq that the data isn't
+	 * needed and return, etheriq will pass-on or free the block.
+	 * To enable bridging to work, only packets that were originated
+	 * by this interface are fed back.
+	 */
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	if(loopback || memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) == 0 || ether->prom){
+		s = splhi();
+		etheriq(ether, bp, 0);
+		splx(s);
+	}
+
+	if(!loopback){
+		qbwrite(ether->oq, bp);
+		if(ether->transmit != nil)
+			ether->transmit(ether);
+	} else
+		freeb(bp);
+
+	return len;
+}
+
+static long
+etherwrite(Chan* chan, void* buf, long n, vlong)
+{
+	Ether *ether;
+	Block *bp;
+	int nn, onoff;
+	Cmdbuf *cb;
+
+	ether = etherxx[chan->devno];
+	if(NETTYPE(chan->qid.path) != Ndataqid) {
+		nn = netifwrite(ether, chan, buf, n);
+		if(nn >= 0)
+			return nn;
+		cb = parsecmd(buf, n);
+		if(strcmp(cb->f[0], "nonblocking") == 0){
+			if(cb->nf <= 1)
+				onoff = 1;
+			else
+				onoff = atoi(cb->f[1]);
+			qnoblock(ether->oq, onoff);
+			free(cb);
+			return n;
+		}
+		free(cb);
+		if(ether->ctl!=nil)
+			return ether->ctl(ether,buf,n);
+			
+		error(Ebadctl);
+	}
+
+	if(n > ether->maxmtu)
+		error(Etoobig);
+	if(n < ether->minmtu)
+		error(Etoosmall);
+
+	bp = allocb(n);
+	if(waserror()){
+		freeb(bp);
+		nexterror();
+	}
+	memmove(bp->rp, buf, n);
+	memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
+	poperror();
+	bp->wp += n;
+
+	return etheroq(ether, bp);
+}
+
+static long
+etherbwrite(Chan* chan, Block* bp, ulong)
+{
+	Ether *ether;
+	long n;
+
+	n = BLEN(bp);
+	if(NETTYPE(chan->qid.path) != Ndataqid){
+		if(waserror()) {
+			freeb(bp);
+			nexterror();
+		}
+		n = etherwrite(chan, bp->rp, n, 0);
+		poperror();
+		freeb(bp);
+		return n;
+	}
+	ether = etherxx[chan->devno];
+
+	if(n > ether->maxmtu){
+		freeb(bp);
+		error(Etoobig);
+	}
+	if(n < ether->minmtu){
+		freeb(bp);
+		error(Etoosmall);
+	}
+
+	return etheroq(ether, bp);
+}
+
+static struct {
+	char*	type;
+	int	(*reset)(Ether*);
+} cards[MaxEther+1];
+
+void
+addethercard(char* t, int (*r)(Ether*))
+{
+	static int ncard;
+
+	if(ncard == MaxEther)
+		panic("too many ether cards");
+	cards[ncard].type = t;
+	cards[ncard].reset = r;
+	ncard++;
+}
+
+int
+parseether(uchar *to, char *from)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	for(i = 0; i < Eaddrlen; i++){
+		if(*p == 0)
+			return -1;
+		nip[0] = *p++;
+		if(*p == 0)
+			return -1;
+		nip[1] = *p++;
+		nip[2] = 0;
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return 0;
+}
+
+static void
+etherreset(void)
+{
+	Ether *ether;
+	int i, n, ctlrno;
+	char name[KNAMELEN], buf[128];
+
+	for(ether = 0, ctlrno = 0; ctlrno < MaxEther; ctlrno++){
+		if(ether == 0)
+			ether = malloc(sizeof(Ether));
+		memset(ether, 0, sizeof(Ether));
+		ether->ctlrno = ctlrno;
+		ether->mbps = 10;
+		ether->minmtu = ETHERMINTU;
+		ether->maxmtu = ETHERMAXTU;
+
+		if(archether(ctlrno, ether) <= 0)
+			continue;
+
+		for(n = 0; cards[n].type; n++){
+			if(cistrcmp(cards[n].type, ether->type))
+				continue;
+			for(i = 0; i < ether->nopt; i++){
+				if(cistrncmp(ether->opt[i], "ea=", 3) == 0){
+					if(parseether(ether->ea, &ether->opt[i][3]) == -1)
+						memset(ether->ea, 0, Eaddrlen);
+				}else if(cistrcmp(ether->opt[i], "fullduplex") == 0 ||
+					cistrcmp(ether->opt[i], "10BASE-TFD") == 0)
+					ether->fullduplex = 1;
+				else if(cistrcmp(ether->opt[i], "100BASE-TXFD") == 0)
+					ether->mbps = 100;
+			}
+			if(cards[n].reset(ether))
+				break;
+			snprint(name, sizeof(name), "ether%d", ctlrno);
+
+			if(ether->interrupt != nil)
+				intrenable(Irqlo, ether->irq, ether->interrupt,
+					ether, name);
+
+			i = sprint(buf, "#l%d: %s: %dMbps port %#lux irq %d",
+				ctlrno, ether->type, ether->mbps, ether->port,
+				ether->irq);
+			if(ether->mem)
+				i += sprint(buf+i, " addr %#lux", PADDR(ether->mem));
+			if(ether->size)
+				i += sprint(buf+i, " size 0x%luX", ether->size);
+			i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
+				ether->ea[0], ether->ea[1], ether->ea[2],
+				ether->ea[3], ether->ea[4], ether->ea[5]);
+			sprint(buf+i, "\n");
+			print("%s", buf);
+
+			if(ether->mbps >= 1000)
+				netifinit(ether, name, Ntypes, 4*1024*1024);
+			else if(ether->mbps >= 100)
+				netifinit(ether, name, Ntypes, 1024*1024);
+			else
+				netifinit(ether, name, Ntypes, 65*1024);
+			if(ether->oq == 0)
+				ether->oq = qopen(ether->limit, Qmsg, 0, 0);
+			if(ether->oq == 0)
+				panic("etherreset %s", name);
+			ether->alen = Eaddrlen;
+			memmove(ether->addr, ether->ea, Eaddrlen);
+			memset(ether->bcast, 0xFF, Eaddrlen);
+
+			etherxx[ctlrno] = ether;
+			ether = 0;
+			break;
+		}
+	}
+	if(ether)
+		free(ether);
+}
+
+static void
+ethershutdown(void)
+{
+	Ether *ether;
+	int i;
+
+	for(i = 0; i < MaxEther; i++){
+		ether = etherxx[i];
+		if(ether == nil)
+			continue;
+		if(ether->shutdown == nil) {
+			print("#l%d: no shutdown function\n", i);
+			continue;
+		}
+		(*ether->shutdown)(ether);
+	}
+}
+
+
+/* called from clock.c once per second */
+void
+etherclock(void)
+{
+}
+
+#define POLY 0xedb88320
+
+/* really slow 32 bit crc for ethers */
+ulong
+ethercrc(uchar *p, int len)
+{
+	int i, j;
+	ulong crc, b;
+
+	crc = 0xffffffff;
+	for(i = 0; i < len; i++){
+		b = *p++;
+		for(j = 0; j < 8; j++){
+			crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
+			b >>= 1;
+		}
+	}
+	return crc;
+}
+
+void
+dumpoq(Queue *oq)
+{
+	if (oq == nil)
+		print("no outq! ");
+	else if (qisclosed(oq))
+		print("outq closed ");
+	else if (qfull(oq))
+		print("outq full ");
+	else
+		print("outq %d ", qlen(oq));
+}
+
+void
+dumpnetif(Netif *netif)
+{
+	print("netif %s ", netif->name);
+	print("limit %d mbps %d link %d ",
+		netif->limit, netif->mbps, netif->link);
+	print("inpkts %lld outpkts %lld errs %d\n",
+		netif->inpackets, netif->outpackets,
+		netif->crcs + netif->oerrs + netif->frames + netif->overflows +
+		netif->buffs + netif->soverflows);
+}
+
+Dev etherdevtab = {
+	'l',
+	"ether",
+
+	etherreset,
+	devinit,
+	ethershutdown,
+	etherattach,
+	etherwalk,
+	etherstat,
+	etheropen,
+	ethercreate,
+	etherclose,
+	etherread,
+	etherbread,
+	etherwrite,
+	etherbwrite,
+	devremove,
+	etherwstat,
+};

+ 1024 - 0
sys/src/9/kw/devflash.c

@@ -0,0 +1,1024 @@
+/*
+ * flash memory file system
+ *
+ * implements partitions
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+enum {
+	Nflash = 2,
+	Maxwchunk= 1024,	/* max. chunk written by one call to falg->write */
+};
+
+
+/*
+ *  Flashes are either 8 or 16 bits wide.  On some installations (e.g., the
+ *  bitsy, they are interleaved: address 0 is in the first chip, address 2
+ *  on the second, address 4 on the first, etc.
+ *  We define Funit as the unit that matches the width of a single flash chip,
+ *  so Funit is either `uchar' or `ushort' (I haven't seen 32-bit wide flashes),
+ *  and we define Fword as the unit that matches a set of interleaved Funits.
+ *  We access interleaved flashes simultaneously, by doing single reads and
+ *  writes to both.  The macro `mirror' takes a command and replicates it for
+ *  this purpose.
+ *  The Blast board has a non-interleaved 16-bit wide flash.  When doing
+ *  writes to it, we must swap bytes.
+ */
+
+typedef struct FlashAlg FlashAlg;
+typedef struct Flash Flash;
+typedef struct FlashRegion FlashRegion;
+
+typedef	ushort		Funit;
+
+#define	mirror(x)	((x)<<16|(x))
+#define	reg(x)		(x)
+
+typedef	ushort		Fword;
+
+#define	Wshift		1
+#define fromendian(x)	(x)
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;
+	int	size;
+} Devport;
+
+#define	NCONFOPT	8
+
+struct DevConf
+{
+	RWlock;			/* write: configure/unconfigure/suspend; read: normal access */
+	ulong	mem;		/* mapped memory address */
+	Devport	*ports;		/* ports[0]: mapped i/o regs, access size */
+	int	nports;		/* always 1 for the bitsy */
+	int	size;
+	int	itype;		/* type of interrupt */
+	ulong	intnum;		/* interrupt number */
+	char	*type;		/* card type, mallocated */
+	int	nopt;		/* number of options */
+	char	*opt[NCONFOPT];	/* options */
+};
+
+/* this defines a contiguous set of erase blocks of one size */
+struct FlashRegion
+{
+	ulong	addr;		/* start of region */
+	ulong	end;		/* end of region + 1 */
+	ulong	n;		/* number of blocks */
+	ulong	size;		/* size of each block */
+};
+
+/* this defines a particular access algorithm */
+struct FlashAlg
+{
+	int	id;
+	char	*name;
+	void	(*identify)(Flash*);		/* identify device */
+	void	(*erase)(Flash*, ulong);	/* erase a region */
+	void	(*write)(Flash*, void*, long, ulong);	/* write a region */
+};
+
+struct Flash
+{
+	DevConf;			/* contains size */
+	RWlock;
+	Fword		*p;
+	ushort		algid;		/* access algorithm */
+	FlashAlg	*alg;
+	ushort		manid;		/* manufacturer id */
+	ushort		devid;		/* device id */
+	int		wbsize;		/* size of write buffer */
+	ulong		nr;		/* number of regions */
+	uchar		bootprotect;
+	ulong		offset;		/* beginning offset of this flash */
+	FlashRegion	r[32];
+};
+
+static void	ise_id(Flash*);
+static void	ise_erase(Flash*, ulong);
+static void	ise_write(Flash*, void*, long, ulong);
+
+static void	afs_id(Flash*);
+static void	afs_erase(Flash*, ulong);
+static void	afs_write(Flash*, void*, long, ulong);
+
+static ulong	blockstart(Flash*, ulong);
+static ulong	blockend(Flash*, ulong);
+
+FlashAlg falg[] =
+{
+	{ 1,	"Intel/Sharp Extended",	ise_id, ise_erase, ise_write	},
+	{ 2,	"AMD/Fujitsu Standard",	afs_id, afs_erase, afs_write	},
+};
+
+Flash flashes[Nflash];
+
+/*
+ *  common flash interface
+ */
+static uchar
+cfigetc(Flash *flash, int off)
+{
+	uchar rv;
+
+	flash->p[reg(0x55)] = mirror(0x98);
+	rv = flash->p[reg(off)];
+	flash->p[reg(0x55)] = mirror(0xFF);
+	return rv;
+}
+
+static ushort
+cfigets(Flash *flash, int off)
+{
+	return (cfigetc(flash, off+1)<<8)|cfigetc(flash, off);
+}
+
+static ulong
+cfigetl(Flash *flash, int off)
+{
+	return (cfigetc(flash, off+3)<<24)|(cfigetc(flash, off+2)<<16)|
+		(cfigetc(flash, off+1)<<8)|cfigetc(flash, off);
+}
+
+static void
+cfiquery(Flash *flash)
+{
+	uchar q, r, y;
+	ulong x, addr;
+
+	q = cfigetc(flash, 0x10);
+	r = cfigetc(flash, 0x11);
+	y = cfigetc(flash, 0x12);
+	if(q != 'Q' || r != 'R' || y != 'Y'){
+		print("cfi query failed: %ux %ux %ux\n", q, r, y);
+		return;
+	}
+	flash->algid = cfigetc(flash, 0x13);
+	flash->size = (sizeof(Fword)/sizeof(Funit)) * (1<<(cfigetc(flash, 0x27)));
+	flash->wbsize = (sizeof(Fword)/sizeof(Funit)) * (1<<(cfigetc(flash, 0x2a)));
+	flash->nr = cfigetc(flash, 0x2c);
+	if(flash->nr > nelem(flash->r)){
+		print("cfi reports > %d regions\n", nelem(flash->r));
+		flash->nr = nelem(flash->r);
+	}
+	addr = 0;
+	for(q = 0; q < flash->nr; q++){
+		x = cfigetl(flash, q * 4 + 0x2d);
+		flash->r[q].size = (sizeof(Fword)/sizeof(Funit)) * 256 * (x>>16);
+		flash->r[q].n = (x&0xffff)+1;
+		flash->r[q].addr = addr;
+		addr += flash->r[q].size*flash->r[q].n;
+		flash->r[q].end = addr;
+	}
+}
+
+/*
+ *  flash device interface
+ */
+
+enum
+{
+	Qtopdir,
+	Q2nddir,
+	Qfctl,
+	Qfdata,
+
+	Maxpart= 8,
+};
+
+
+typedef struct FPart FPart;
+struct FPart
+{
+	Flash	*flash;
+	char		*name;
+	char		*ctlname;
+	ulong	start;
+	ulong	end;
+};
+static FPart	part[Maxpart];
+
+#define FQID(p,q)	((p)<<8|(q))
+#define FTYPE(q)	((q) & 0xff)
+#define FPART(q)	(&part[(q) >>8])
+
+static int
+gen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
+{
+	Qid q;
+	FPart *fp;
+
+	q.vers = 0;
+
+	/* top level directory contains the name of the network */
+	if(c->qid.path == Qtopdir){
+		switch(i){
+		case DEVDOTDOT:
+			q.path = Qtopdir;
+			q.type = QTDIR;
+			devdir(c, q, "#F", 0, eve, DMDIR|0555, dp);
+			break;
+		case 0:
+			q.path = Q2nddir;
+			q.type = QTDIR;
+			devdir(c, q, "flash", 0, eve, DMDIR|0555, dp);
+			break;
+		default:
+			return -1;
+		}
+		return 1;
+	}
+
+	/* second level contains all partitions and their control files */
+	switch(i) {
+	case DEVDOTDOT:
+		q.path = Qtopdir;
+		q.type = QTDIR;
+		devdir(c, q, "#F", 0, eve, DMDIR|0555, dp);
+		break;
+	default:
+		if(i >= 2*Maxpart)
+			return -1;
+		fp = &part[i>>1];
+		if(fp->name == nil)
+			return 0;
+		if(i & 1){
+			q.path = FQID(i>>1, Qfdata);
+			q.type = QTFILE;
+			devdir(c, q, fp->name, fp->end-fp->start, eve, 0660, dp);
+		} else {
+			q.path = FQID(i>>1, Qfctl);
+			q.type = QTFILE;
+			devdir(c, q, fp->ctlname, 0, eve, 0660, dp);
+		}
+		break;
+	}
+	return 1;
+}
+
+static Flash *
+findflash(ulong addr)
+{
+	Flash *flash;
+
+	for (flash = flashes; flash < flashes + Nflash; flash++)
+		if(addr >= flash->offset && addr < flash->offset + flash->size)
+			return flash;
+	return nil;
+}
+
+static FPart*
+findpart(char *name)
+{
+	int i;
+
+	for(i = 0; i < Maxpart; i++)
+		if(part[i].name != nil && strcmp(name, part[i].name) == 0)
+			break;
+	if(i >= Maxpart)
+		return nil;
+	return &part[i];
+}
+
+static void
+addpart(FPart *fp, char *name, ulong start, ulong end)
+{
+	int i;
+	char ctlname[64];
+	Flash *flash;
+
+	if (start > end)
+		error(Ebadarg);
+	if(fp == nil){
+		flash = findflash(start);
+		if (flash == nil || end > flash->offset + flash->size)
+			error(Ebadarg);
+		start -= flash->offset;
+		end -= flash->offset;
+	} else {
+		start += fp->start;
+		end += fp->start;
+		if(start >= fp->end || end > fp->end){
+			error(Ebadarg);
+		}
+		flash = fp->flash;
+	}
+	if(blockstart(flash, start) != start)
+		error("must start on erase boundary");
+	if(blockstart(flash, end) != end && end != flash->size)
+		error("must end on erase boundary");
+
+	fp = findpart(name);
+	if(fp != nil)
+		error(Eexist);
+	for(i = 0; i < Maxpart; i++)
+		if(part[i].name == nil)
+			break;
+	if(i == Maxpart)
+		error("no more partitions");
+	fp = &part[i];
+	kstrdup(&fp->name, name);
+	snprint(ctlname, sizeof ctlname, "%sctl", name);
+	kstrdup(&fp->ctlname, ctlname);
+	fp->flash = flash;
+	fp->start = start;
+	fp->end = end;
+}
+
+static void
+rempart(FPart *fp)
+{
+	char *p, *cp;
+
+	p = fp->name;
+	fp->name = nil;
+	cp = fp->ctlname;
+	fp->ctlname = nil;
+	free(p);
+	free(cp);
+}
+
+void
+flashinit(void)
+{
+error("flash driver not ready for use yet");
+	int i, ctlrno;
+	char *fname;
+	ulong offset;
+	Flash *flash;
+
+	offset = 0;
+	for (ctlrno = 0; ctlrno < Nflash; ctlrno++){
+		flash = flashes + ctlrno;
+//		if(plan9config("flash", ctlrno, flash) == 0)
+//			continue;
+		flash->p = (Fword*)flash->mem;
+		cfiquery(flash);
+		for(i = 0; i < nelem(falg); i++)
+			if(flash->algid == falg[i].id){
+				flash->alg = &falg[i];
+				(*flash->alg->identify)(flash);
+				break;
+			}
+		flash->bootprotect = 1;
+		flash->offset = offset;
+		fname = malloc(8);
+		sprint(fname, "flash%d", ctlrno);
+		addpart(nil, fname, offset, offset + flash->size);
+		offset += flash->size;
+	}
+}
+
+static Chan*
+flashattach(char* spec)
+{
+	return devattach('F', spec);
+}
+
+static Walkqid*
+flashwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, nil, 0, gen);
+}
+
+static int
+flashstat(Chan *c, uchar *db, int n)
+{
+	return devstat(c, db, n, nil, 0, gen);
+}
+
+static Chan*
+flashopen(Chan* c, int omode)
+{
+	omode = openmode(omode);
+	if(strcmp(up->user, eve)!=0)
+		error(Eperm);
+	return devopen(c, omode, nil, 0, gen);
+}
+
+static void
+flashclose(Chan*)
+{
+}
+
+static long
+flashctlread(FPart *fp, void* a, long n, vlong off)
+{
+	char *buf, *p, *e;
+	int i;
+	ulong addr, end;
+	Flash *flash;
+
+	flash = fp->flash;
+	buf = smalloc(READSTR);
+	e = buf + READSTR;
+	p = seprint(buf, e, "0x%-9lux 0x%-9lux 0x%-9lux 0x%-9x 0x%-9ux 0x%-9ux\n",
+		flash->offset, fp->start, fp->end-fp->start, flash->wbsize,
+		 flash->manid, flash->devid);
+	addr = fp->start;
+	for(i = 0; i < flash->nr && addr < fp->end; i++)
+		if(flash->r[i].addr <= addr && flash->r[i].end > addr){
+			if(fp->end <= flash->r[i].end)
+				end = fp->end;
+			else
+				end = flash->r[i].end;
+			p = seprint(p, e, "0x%-9lux 0x%-9lux 0x%-9lux\n", addr,
+				(end-addr)/flash->r[i].size, flash->r[i].size);
+			addr = end;
+		}
+	n = readstr(off, a, n, buf);
+	free(buf);
+	return n;
+}
+
+static long
+flashdataread(FPart *fp, void* a, long n, vlong off)
+{
+	Flash *flash;
+
+	flash = fp->flash;
+	rlock(flash);
+	if(waserror()){
+		runlock(flash);
+		nexterror();
+	}
+	if(fp->name == nil)
+		error("partition vanished");
+	if(!iseve())
+		error(Eperm);
+	off += fp->start;
+	if(off >= fp->end)
+		n = 0;
+	if(off+n >= fp->end)
+		n = fp->end - off;
+	if(n > 0)
+		memmove(a, ((uchar*)flash->mem)+off, n);
+	runlock(flash);
+	poperror();
+
+	return n;
+}
+
+static long
+flashread(Chan* c, void* a, long n, vlong off)
+{
+	int t;
+
+	if(c->qid.type == QTDIR)
+		return devdirread(c, a, n, nil, 0, gen);
+	t = FTYPE(c->qid.path);
+	switch(t){
+	default:
+		error(Eperm);
+	case Qfctl:
+		n = flashctlread(FPART(c->qid.path), a, n, off);
+		break;
+	case Qfdata:
+		n = flashdataread(FPART(c->qid.path), a, n, off);
+		break;
+	}
+	return n;
+}
+
+static void
+bootprotect(ulong addr)
+{
+	FlashRegion *r;
+	Flash *flash;
+
+	flash = findflash(addr);
+	if (flash == nil)
+		error(Ebadarg);
+	if(flash->bootprotect == 0)
+		return;
+	if(flash->nr == 0)
+		error("writing over boot loader disallowed");
+	r = flash->r;
+	if(addr >= r->addr && addr < r->addr + r->size)
+		error("writing over boot loader disallowed");
+}
+
+static ulong
+blockstart(Flash *flash, ulong addr)
+{
+	FlashRegion *r, *e;
+	ulong x;
+
+	r = flash->r;
+	for(e = &flash->r[flash->nr]; r < e; r++)
+		if(addr >= r->addr && addr < r->end){
+			x = addr - r->addr;
+			x /= r->size;
+			return r->addr + x*r->size;
+		}
+	return -1;
+}
+
+static ulong
+blockend(Flash *flash, ulong addr)
+{
+	FlashRegion *r, *e;
+	ulong x;
+
+	r = flash->r;
+	for(e = &flash->r[flash->nr]; r < e; r++)
+		if(addr >= r->addr && addr < r->end){
+			x = addr - r->addr;
+			x /= r->size;
+			return r->addr + (x+1)*r->size;
+		}
+
+	return -1;
+}
+
+static long
+flashctlwrite(FPart *fp, char *p, long n)
+{
+	Cmdbuf *cmd;
+	ulong off;
+	Flash *flash;
+
+	if(fp == nil)
+		panic("flashctlwrite");
+
+	flash = fp->flash;
+	cmd = parsecmd(p, n);
+	wlock(flash);
+	if(waserror()){
+		wunlock(flash);
+		nexterror();
+	}
+	if(strcmp(cmd->f[0], "erase") == 0){
+		switch(cmd->nf){
+		case 2:
+			/* erase a single block in the partition */
+			off = atoi(cmd->f[1]);
+			off += fp->start;
+			if(off >= fp->end)
+				error("region not in partition");
+			if(off != blockstart(flash, off))
+				error("erase must be a block boundary");
+			bootprotect(off);
+			(*flash->alg->erase)(flash, off);
+			break;
+		case 1:
+			/* erase the whole partition */
+			bootprotect(fp->start);
+			for(off = fp->start; off < fp->end; off = blockend(flash, off))
+				(*flash->alg->erase)(flash, off);
+			break;
+		default:
+			error(Ebadarg);
+		}
+	} else if(strcmp(cmd->f[0], "add") == 0){
+		if(cmd->nf != 4)
+			error(Ebadarg);
+		addpart(fp, cmd->f[1], strtoul(cmd->f[2], nil, 0),
+			strtoul(cmd->f[3], nil, 0));
+	} else if(strcmp(cmd->f[0], "remove") == 0){
+		rempart(fp);
+	} else if(strcmp(cmd->f[0], "protectboot") == 0){
+		if(cmd->nf == 1 || strcmp(cmd->f[1], "off") != 0)
+			flash->bootprotect = 1;
+		else
+			flash->bootprotect = 0;
+	} else
+		error(Ebadarg);
+	poperror();
+	wunlock(flash);
+	free(cmd);
+
+	return n;
+}
+
+static long
+flashdatawrite(FPart *fp, uchar *p, long n, long off)
+{
+	int m, on;
+	long ooff;
+	uchar *buf, *end;
+	Flash *flash;
+
+	if(fp == nil)
+		panic("flashdatawrite");
+
+	flash = fp->flash;
+	buf = nil;
+	wlock(flash);
+	if(waserror()){
+		wunlock(flash);
+		if(buf != nil)
+			free(buf);
+		nexterror();
+	}
+
+	if(fp->name == nil)
+		error("partition vanished");
+	if(!iseve())
+		error(Eperm);
+
+	/* can't cross partition boundaries */
+	off += fp->start;
+	if(off >= fp->end || off+n > fp->end || n <= 0)
+		error(Ebadarg);
+
+	/* make sure we're not writing the boot sector */
+	bootprotect(off);
+
+	on = n;
+
+	/*
+	 *  get the data into kernel memory to avoid faults during writing.
+	 *  if write is not on a quad boundary or not a multiple of 4 bytes,
+	 *  extend with data already in flash.
+	 */
+	buf = smalloc(n+8);
+	m = off & 3;
+	if(m){
+		*(ulong*)buf = flash->p[off>>Wshift];
+		n += m;
+		off -= m;
+	}
+	if(n & 3){
+		n -= n & 3;
+		*(ulong*)(&buf[n]) = flash->p[(off+n)>>Wshift];
+		n += 4;
+	}
+	memmove(&buf[m], p, on);
+
+	/* (*flash->alg->write) can't cross blocks */
+	ooff = off;
+	p = buf;
+	for(end = p + n; p < end; p += m){
+		m = blockend(flash, off) - off;
+		if(m > end - p)
+			m = end - p;
+		if(m > Maxwchunk)
+			m = Maxwchunk;
+		(*flash->alg->write)(flash, p, m, off);
+		off += m;
+	}
+
+	/* make sure write succeeded */
+	if(memcmp(buf, &flash->p[ooff>>Wshift], n) != 0)
+		error("written bytes don't match");
+
+	wunlock(flash);
+	free(buf);
+	poperror();
+
+	return on;
+}
+
+static long
+flashwrite(Chan* c, void* a, long n, vlong off)
+{
+	int t;
+
+	if(c->qid.type == QTDIR)
+		error(Eperm);
+
+	if(!iseve())
+		error(Eperm);
+
+	t = FTYPE(c->qid.path);
+	switch(t){
+	default:
+		panic("flashwrite");
+	case Qfctl:
+		n = flashctlwrite(FPART(c->qid.path), a, n);
+		break;
+	case Qfdata:
+		n = flashdatawrite(FPART(c->qid.path), a, n, off);
+		break;
+	}
+	return n;
+}
+
+Dev flashdevtab = {
+	'F',
+	"flash",
+
+	devreset,
+	flashinit,
+	devshutdown,
+	flashattach,
+	flashwalk,
+	flashstat,
+	flashopen,
+	devcreate,
+	flashclose,
+	flashread,
+	devbread,
+	flashwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};
+
+enum
+{
+	/* status register */
+	ISEs_lockerr=		1<<1,
+	ISEs_powererr=		1<<3,
+	ISEs_progerr=		1<<4,
+	ISEs_eraseerr=		1<<5,
+	ISEs_ready=		1<<7,
+	ISEs_err= (ISEs_lockerr|ISEs_powererr|ISEs_progerr|ISEs_eraseerr),
+
+	/* extended status register */
+	ISExs_bufavail=		1<<7,
+
+	AFSs_ready=		1<<7,
+};
+
+/* intel/sharp extended command set */
+static void
+ise_reset(Flash* flash)
+{
+	flash->p[reg(0xaa)] = mirror(0xff);	/* reset */
+}
+
+static void
+ise_id(Flash* flash)
+{
+	ise_reset(flash);
+	flash->p[reg(0xaaa)] = mirror(0x90);	/* uncover vendor info */
+	flash->manid = fromendian(flash->p[reg(0x0)]);
+	flash->devid = fromendian(flash->p[reg(0x1)]);
+	ise_reset(flash);
+}
+
+static void
+ise_clearerror(Flash* flash)
+{
+	flash->p[reg(0x200)] = mirror(0x50);
+
+}
+
+static void
+ise_error(int bank, ulong status)
+{
+	char err[64];
+
+	if(status & (ISEs_lockerr)){
+		sprint(err, "flash%d: block locked %lux", bank, status);
+		error(err);
+	}
+	if(status & (ISEs_powererr)){
+		sprint(err, "flash%d: low prog voltage %lux", bank, status);
+		error(err);
+	}
+	if(status & (ISEs_progerr|ISEs_eraseerr)){
+		sprint(err, "flash%d: i/o error %lux", bank, status);
+		error(err);
+	}
+}
+
+static void
+ise_erase(Flash *flash, ulong addr)
+{
+	ulong start, x;
+
+	addr >>= Wshift;
+
+	flash->p[addr] = mirror(0x20);
+	flash->p[addr] = mirror(0xd0);
+	start = m->ticks;
+	do {
+		x = fromendian(flash->p[addr]);
+		if((x & mirror(ISEs_ready)) == mirror(ISEs_ready))
+			break;
+	} while(TK2MS(m->ticks-start) < 1500);
+
+	ise_clearerror(flash);
+	ise_error(0, x);
+	ise_error(1, x>>16);
+
+	ise_reset(flash);
+}
+
+/*
+ *  the flash spec claimes writing goes faster if we use
+ *  the write buffer.  We fill the write buffer and then
+ *  issue the write request.  After the write request,
+ *  subsequent reads will yield the status register.
+ *
+ *  returns the status, even on timeouts.
+ *
+ *  NOTE: I tried starting back to back buffered writes
+ *	without reading the status in between, as the
+ *	flowchart in the intel data sheet suggests.
+ *	However, it always responded with an illegal
+ *	command sequence, so I must be missing something.
+ *	If someone learns better, please email me, though
+ *	I doubt it will be much faster. -  presotto@bell-labs.com
+ */
+static long
+ise_wbwrite(Flash *flash, Fword *p, int n, ulong off, ulong baddr, ulong *status)
+{
+	Fword x;
+	ulong start;
+	int i, s;
+
+	/* put flash into write buffer mode */
+	start = m->ticks;
+	for(;;) {
+		s = splhi();
+		/* request write buffer mode */
+		flash->p[baddr] = mirror(0xe8);
+
+		/* look at extended status reg for status */
+		if((flash->p[baddr] & mirror(1<<7)) == mirror(1<<7))
+			break;
+		splx(s);
+
+		/* didn't work, keep trying for 2 secs */
+		if(TK2MS(m->ticks-start) > 2000){
+			/* set up to read status */
+			flash->p[baddr] = mirror(0x70);
+			*status = fromendian(flash->p[baddr]);
+			pprint("write buffered cmd timed out\n");
+			return -1;
+		}
+	}
+
+	/* fill write buffer */
+	flash->p[baddr] = mirror(n-1);
+	for(i = 0; i < n; i++)
+		flash->p[off+i] = *p++;
+
+	/* program from buffer */
+	flash->p[baddr] = mirror(0xd0);
+	splx(s);
+
+	/* wait till the programming is done */
+	start = m->ticks;
+	for(;;) {
+		x = flash->p[baddr];	/* read status register */
+		*status = fromendian(x);
+		if((x & mirror(ISEs_ready)) == mirror(ISEs_ready))
+			break;
+		if(TK2MS(m->ticks-start) > 2000){
+			pprint("read status timed out\n");
+			return -1;
+		}
+	}
+	if(x & mirror(ISEs_err))
+		return -1;
+
+	return n;
+}
+
+static void
+ise_write(Flash *flash, void *a, long n, ulong off)
+{
+	Fword *p, *end;
+	int i, wbsize;
+	ulong x, baddr;
+
+ 	/* everything in terms of Fwords */
+	wbsize = flash->wbsize >> Wshift;
+	baddr = blockstart(flash, off) >> Wshift;
+	off >>= Wshift;
+	n >>= Wshift;
+	p = a;
+
+	/* first see if write will succeed */
+	for(i = 0; i < n; i++)
+		if((p[i] & flash->p[off+i]) != p[i])
+			error("flash needs erase");
+
+	if(waserror()){
+		ise_reset(flash);
+		nexterror();
+	}
+
+	/*
+	 *  use the first write to reach
+ 	 *  a write buffer boundary.  the intel maunal
+	 *  says writes starting at wb boundaries
+	 *  maximize speed.
+	 */
+	i = wbsize - (off & (wbsize-1));
+	for(end = p + n; p < end;){
+		if(i > end - p)
+			i = end - p;
+
+		if(ise_wbwrite(flash, p, i, off, baddr, &x) < 0)
+			break;
+
+		off += i;
+		p += i;
+		i = wbsize;
+	}
+
+	ise_clearerror(flash);
+	ise_error(0, x);
+	ise_error(1, x>>16);
+
+	ise_reset(flash);
+	poperror();
+}
+
+/*
+ * amd/fujitsu standard command set
+ *	I don't have an amd chipset to work with
+ *	so I'm loathe to write this yet.  If someone
+ *	else does, please send it to me and I'll
+ *	incorporate it -- presotto@bell-labs.com
+ */
+static void
+afs_reset(Flash *flash)
+{
+	flash->p[reg(0xaa)] = mirror(0xf0);	/* reset */
+}
+
+static void
+afs_id(Flash *flash)
+{
+	afs_reset(flash);
+	flash->p[reg(0xaa)] = mirror(0xf0);	/* reset */
+	flash->p[reg(0xaaa)] = mirror(0xaa);	/* query vendor block */
+	flash->p[reg(0x554)] = mirror(0x55);
+	flash->p[reg(0xaaa)] = mirror(0x90);
+	flash->manid = fromendian(flash->p[reg(0x00)]);
+	afs_reset(flash);
+	flash->p[reg(0xaaa)] = mirror(0xaa);	/* query vendor block */
+	flash->p[reg(0x554)] = mirror(0x55);
+	flash->p[reg(0xaaa)] = mirror(0x90);
+	flash->devid = fromendian(flash->p[reg(0x02)]);
+	afs_reset(flash);
+}
+
+static void
+afs_erase(Flash *flash, ulong addr)
+{
+	ulong start, x;
+
+	addr >>= Wshift;
+	afs_reset(flash);
+	flash->p[reg(0x555)] = mirror(0xaa);
+	flash->p[reg(0x2aa)] = mirror(0x55);
+	flash->p[reg(0x555)] = mirror(0x80);
+	flash->p[reg(0x555)] = mirror(0xaa);
+	flash->p[reg(0x2aa)] = mirror(0x55);
+	flash->p[reg(addr)] = mirror(0x30);
+
+	start = m->ticks;
+	do {
+		x = flash->p[reg(addr)];
+		if((x & mirror(AFSs_ready)) == mirror(AFSs_ready))
+			break;
+	} while(TK2MS(m->ticks-start) < 1500);
+}
+
+static void
+afs_write(Flash *flash, void *a, long n, ulong offs)
+{
+	Fword *p;
+	int i;
+	ulong x, start;
+
+	/* everything in terms of Fwords */
+	offs >>= Wshift;
+	n >>= Wshift;
+	p = a;
+
+	/* first see if write will succeed */
+	for(i = 0; i < n; i++)
+		if((p[i] & flash->p[offs + i]) != p[i])
+			error("flash needs erase");
+
+	if(waserror()){
+		afs_reset(flash);
+		nexterror();
+	}
+
+	for(i = 0; i < n; i++){
+		flash->p[reg(0x555)] = mirror(0xaa);
+		flash->p[reg(0x2aa)] = mirror(0x55);
+		flash->p[reg(0x555)] = mirror(0xa0);
+		flash->p[reg(offs + i)] = mirror(p[i]);
+
+		start = m->ticks;
+		do {
+			x = flash->p[reg(offs + i)];
+			if(x == p[i])
+				break;
+		} while(TK2MS(m->ticks-start) < 1500);
+
+		if(x != p[i])
+			error("timed out");
+	}
+	poperror();
+}

+ 92 - 0
sys/src/9/kw/devtab.c

@@ -0,0 +1,92 @@
+/*
+ * Stub.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+extern Dev* devtab[];
+
+void
+devtabreset(void)
+{
+	int i;
+
+	for(i = 0; devtab[i] != nil; i++) {
+		if (devtab[i]->reset == nil)
+			panic("corrupt memory: nil devtab[%d]->reset", i);
+		devtab[i]->reset();
+	}
+}
+
+void
+devtabinit(void)
+{
+	int i;
+
+	for(i = 0; devtab[i] != nil; i++)
+		devtab[i]->init();
+}
+
+void
+devtabshutdown(void)
+{
+	int i;
+
+	/*
+	 * Shutdown in reverse order.
+	 */
+	for(i = 0; devtab[i] != nil; i++)
+		;
+	for(i--; i >= 0; i--)
+		devtab[i]->shutdown();
+}
+
+
+Dev*
+devtabget(int dc, int user)
+{
+	int i;
+
+	for(i = 0; devtab[i] != nil; i++){
+		if(devtab[i]->dc == dc)
+			return devtab[i];
+	}
+
+	if(user == 0)
+		panic("devtabget %C\n", dc);
+
+	return nil;
+}
+
+long
+devtabread(Chan*, void* buf, long n, vlong off)
+{
+	int i;
+	Dev *dev;
+	char *alloc, *e, *p;
+
+	alloc = malloc(READSTR);
+	if(alloc == nil)
+		error(Enomem);
+
+	p = alloc;
+	e = p + READSTR;
+	for(i = 0; devtab[i] != nil; i++){
+		dev = devtab[i];
+		p = seprint(p, e, "#%C %s\n", dev->dc, dev->name);
+	}
+
+	if(waserror()){
+		free(alloc);
+		nexterror();
+	}
+	n = readstr(off, buf, n, alloc);
+	free(alloc);
+	poperror();
+
+	return n;
+}

+ 1455 - 0
sys/src/9/kw/devusb.c

@@ -0,0 +1,1455 @@
+/*
+ * USB device driver.
+ *
+ * This is in charge of providing access to actual HCIs
+ * and providing I/O to the various endpoints of devices.
+ * A separate user program (usbd) is in charge of
+ * enumerating the bus, setting up endpoints and
+ * starting devices (also user programs).
+ *
+ * The interface provided is a violation of the standard:
+ * you're welcome.
+ *
+ * The interface consists of a root directory with several files
+ * plus a directory (epN.M) with two files per endpoint.
+ * A device is represented by its first endpoint, which
+ * is a control endpoint automatically allocated for each device.
+ * Device control endpoints may be used to create new endpoints.
+ * Devices corresponding to hubs may also allocate new devices,
+ * perhaps also hubs. Initially, a hub device is allocated for
+ * each controller present, to represent its root hub. Those can
+ * never be removed.
+ *
+ * All endpoints refer to the first endpoint (epN.0) of the device,
+ * which keeps per-device information, and also to the HCI used
+ * to reach them. Although all endpoints cache that information.
+ *
+ * epN.M/data files permit I/O and are considered DMEXCL.
+ * epN.M/ctl files provide status info and accept control requests.
+ *
+ * Endpoints may be given file names to be listed also at #u,
+ * for those drivers that have nothing to do after configuring the
+ * device and its endpoints.
+ *
+ * Drivers for different controllers are kept at usb[oue]hci.c
+ * It's likely we could factor out much from controllers into
+ * a generic controller driver, the problem is that details
+ * regarding how to handle toggles, tokens, Tds, etc. will
+ * get in the way. Thus, code is probably easier the way it is.
+ *
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"usb.h"
+
+typedef struct Hcitype Hcitype;
+
+enum
+{
+	/* Qid numbers */
+	Qdir = 0,		/* #u */
+	Qusbdir,			/* #u/usb */
+	Qctl,			/* #u/usb/ctl - control requests */
+
+	Qep0dir,			/* #u/usb/ep0.0 - endpoint 0 dir */
+	Qep0io,			/* #u/usb/ep0.0/data - endpoint 0 I/O */
+	Qep0ctl,		/* #u/usb/ep0.0/ctl - endpoint 0 ctl. */
+	Qep0dummy,		/* give 4 qids to each endpoint */
+
+	Qepdir = 0,		/* (qid-qep0dir)&3 is one of these */
+	Qepio,			/* to identify which file for the endpoint */
+	Qepctl,
+
+	/* ... */
+
+	/* Usb ctls. */
+	CMdebug = 0,		/* debug on|off */
+	CMdump,			/* dump (data structures for debug) */
+	CMreset,		/* reset the bus; start over */
+
+	/* Ep. ctls */
+	CMnew = 0,		/* new nb ctl|bulk|intr|iso r|w|rw (endpoint) */
+	CMnewdev,		/* newdev full|low|high portnb (allocate new devices) */
+	CMhub,			/* hub (set the device as a hub) */
+	CMspeed,		/* speed full|low|high|no */
+	CMmaxpkt,		/* maxpkt size */
+	CMntds,			/* ntds nb (max nb. of tds per µframe) */
+	CMclrhalt,		/* clrhalt (halt was cleared on endpoint) */
+	CMpollival,		/* pollival interval (interrupt/iso) */
+	CMhz,			/* hz n (samples/sec; iso) */
+	CMsamplesz,		/* samplesz n (sample size; iso) */
+	CMinfo,			/* info infostr (ke.ep info for humans) */
+	CMdetach,		/* detach (abort I/O forever on this ep). */
+	CMaddress,		/* address (address is assigned) */
+	CMdebugep,		/* debug n (set/clear debug for this ep) */
+	CMname,			/* name str (show up as #u/name as well) */
+
+	/* Hub feature selectors */
+	Rportenable	= 1,
+	Rportreset	= 4,
+
+};
+
+struct Hcitype
+{
+	char*	type;
+	int	(*reset)(Hci*);
+};
+
+#define QID(q)	((int)(q).path)
+
+static char Edetach[] = "device is detached";
+static char Enotconf[] = "endpoint not configured";
+char Estalled[] = "endpoint stalled";
+
+static Cmdtab usbctls[] =
+{
+	{CMdebug,	"debug",	2},
+	{CMdump,	"dump",		1},
+	{CMreset,	"reset",	1},
+};
+
+static Cmdtab epctls[] =
+{
+	{CMnew,		"new",		4},
+	{CMnewdev,	"newdev",	3},
+	{CMhub,		"hub",		1},
+	{CMspeed,	"speed",	2},
+	{CMmaxpkt,	"maxpkt",	2},
+	{CMntds,	"ntds",		2},
+	{CMpollival,	"pollival",	2},
+	{CMsamplesz,	"samplesz",	2},
+	{CMhz,		"hz",		2},
+	{CMinfo,		"info",		0},
+	{CMdetach,	"detach",	1},
+	{CMaddress,	"address",	1},
+	{CMdebugep,	"debug",	2},
+	{CMclrhalt,	"clrhalt",	1},
+	{CMname,	"name",		2},
+};
+
+static Dirtab usbdir[] =
+{
+	"ctl",		{Qctl},		0,	0666,
+};
+
+char *usbmodename[] =
+{
+	[OREAD]	"r",
+	[OWRITE]	"w",
+	[ORDWR]	"rw",
+};
+
+static char *ttname[] =
+{
+	[Tnone]	"none",
+	[Tctl]	"control",
+	[Tiso]	"iso",
+	[Tintr]	"interrupt",
+	[Tbulk]	"bulk",
+};
+
+static char *spname[] =
+{
+	[Fullspeed]	"full",
+	[Lowspeed]	"low",
+	[Highspeed]	"high",
+	[Nospeed]	"no",
+};
+
+static int	debug;
+static Hcitype	hcitypes[Nhcis];
+static Hci*	hcis[Nhcis];
+static QLock	epslck;		/* add, del, lookup endpoints */
+static Ep*	eps[Neps];	/* all endpoints known */
+static int	epmax;		/* 1 + last endpoint index used  */
+static int	usbidgen;	/* device address generator */
+
+/*
+ * Is there something like this in a library? should it be?
+ */
+char*
+seprintdata(char *s, char *se, uchar *d, int n)
+{
+	int i;
+	int l;
+
+	s = seprint(s, se, " %#p[%d]: ", d, n);
+	l = n;
+	if(l > 10)
+		l = 10;
+	for(i=0; i<l; i++)
+		s = seprint(s, se, " %2.2ux", d[i]);
+	if(l < n)
+		s = seprint(s, se, "...");
+	return s;
+}
+
+static int
+name2speed(char *name)
+{
+	int i;
+
+	for(i = 0; i < nelem(spname); i++)
+		if(strcmp(name, spname[i]) == 0)
+			return i;
+	return Nospeed;
+}
+
+static int
+name2ttype(char *name)
+{
+	int i;
+
+	for(i = 0; i < nelem(ttname); i++)
+		if(strcmp(name, ttname[i]) == 0)
+			return i;
+	/* may be a std. USB ep. type */
+	i = strtol(name, nil, 0);
+	switch(i+1){
+	case Tctl:
+	case Tiso:
+	case Tbulk:
+	case Tintr:
+		return i+1;
+	default:
+		return Tnone;
+	}
+}
+
+static int
+name2mode(char *mode)
+{
+	int i;
+
+	for(i = 0; i < nelem(usbmodename); i++)
+		if(strcmp(mode, usbmodename[i]) == 0)
+			return i;
+	return -1;
+}
+
+static int
+qid2epidx(int q)
+{
+	q = (q-Qep0dir)/4;
+	if(q < 0 || q >= epmax || eps[q] == nil)
+		return -1;
+	return q;
+}
+
+static int
+isqtype(int q, int type)
+{
+	if(q < Qep0dir)
+		return 0;
+	q -= Qep0dir;
+	return (q & 3) == type;
+}
+
+void
+addhcitype(char* t, int (*r)(Hci*))
+{
+	static int ntype;
+
+	if(ntype == Nhcis)
+		panic("too many USB host interface types");
+	hcitypes[ntype].type = t;
+	hcitypes[ntype].reset = r;
+	ntype++;
+}
+
+static char*
+seprintep(char *s, char *se, Ep *ep, int all)
+{
+	static char* dsnames[] = { "config", "enabled", "detached" };
+	Udev *d;
+	int i;
+	int di;
+
+	d = ep->dev;
+
+	qlock(ep);
+	if(waserror()){
+		qunlock(ep);
+		nexterror();
+	}
+	di = ep->dev->nb;
+	if(all)
+		s = seprint(s, se, "dev %d ep %d ", di, ep->nb);
+	s = seprint(s, se, "%s", dsnames[ep->dev->state]);
+	s = seprint(s, se, " %s", ttname[ep->ttype]);
+	assert(ep->mode == OREAD || ep->mode == OWRITE || ep->mode == ORDWR);
+	s = seprint(s, se, " %s", usbmodename[ep->mode]);
+	s = seprint(s, se, " speed %s", spname[d->speed]);
+	s = seprint(s, se, " maxpkt %ld", ep->maxpkt);
+	s = seprint(s, se, " pollival %ld", ep->pollival);
+	s = seprint(s, se, " samplesz %ld", ep->samplesz);
+	s = seprint(s, se, " hz %ld", ep->hz);
+	s = seprint(s, se, " hub %d", ep->dev->hub);
+	s = seprint(s, se, " port %d", ep->dev->port);
+	if(ep->inuse)
+		s = seprint(s, se, " busy");
+	else
+		s = seprint(s, se, " idle");
+	if(all){
+		s = seprint(s, se, " load %uld", ep->load);
+		s = seprint(s, se, " ref %ld addr %#p", ep->ref, ep);
+		s = seprint(s, se, " idx %d", ep->idx);
+		if(ep->name != nil)
+			s = seprint(s, se, " name '%s'", ep->name);
+		if(ep == ep->ep0){
+			s = seprint(s, se, " ctlrno %#x", ep->hp->ctlrno);
+			s = seprint(s, se, " eps:");
+			for(i = 0; i < nelem(d->eps); i++)
+				if(d->eps[i] != nil)
+					s = seprint(s, se, " ep%d.%d", di, i);
+		}
+	}
+	if(ep->info != nil)
+		s = seprint(s, se, "\n%s\n", ep->info);
+	else
+		s = seprint(s, se, "\n");
+	qunlock(ep);
+	poperror();
+	return s;
+}
+
+static Ep*
+epalloc(Hci *hp)
+{
+	Ep *ep;
+	int i;
+
+	ep = mallocz(sizeof(Ep), 1);
+	ep->ref = 1;
+	qlock(&epslck);
+	for(i = 0; i < Neps; i++)
+		if(eps[i] == nil)
+			break;
+	if(i == Neps){
+		qunlock(&epslck);
+		free(ep);
+		print("usb: bug: too few endpoints.\n");
+		return nil;	
+	}
+	ep->idx = i;
+	if(epmax <= i)
+		epmax = i+1;
+	eps[i] = ep;
+	ep->hp = hp;
+	ep->maxpkt = 8;
+	ep->ntds = 1;
+	ep->samplesz = ep->pollival = ep->hz = 0; /* make them void */
+	qunlock(&epslck);
+	return ep;
+}
+
+static Ep*
+getep(int i)
+{
+	Ep *ep;
+
+	if(i < 0 || i >= epmax || eps[i] == nil)
+		return nil;
+	qlock(&epslck);
+	ep = eps[i];
+	if(ep != nil)
+		incref(ep);
+	qunlock(&epslck);
+	return ep;
+}
+
+static void
+putep(Ep *ep)
+{
+	Udev *d;
+
+	if(ep != nil && decref(ep) == 0){
+		d = ep->dev;
+		deprint("usb: ep%d.%d %#p released\n", d->nb, ep->nb, ep);
+		qlock(&epslck);
+		eps[ep->idx] = nil;
+		if(ep->idx == epmax-1)
+			epmax--;
+		if(ep == ep->ep0 && ep->dev != nil && ep->dev->nb == usbidgen)
+			usbidgen--;
+		qunlock(&epslck);
+		if(d != nil){
+			qlock(ep->ep0);
+			d->eps[ep->nb] = nil;
+			qunlock(ep->ep0);
+		}
+		if(ep->ep0 != ep){
+			putep(ep->ep0);
+			ep->ep0 = nil;
+		}
+		free(ep->info);
+		free(ep->name);
+		free(ep);
+	}
+}
+
+static void
+dumpeps(void)
+{
+	int i;
+	static char buf[512];
+	char *s;
+	char *e;
+	Ep *ep;
+
+	print("usb dump eps: epmax %d Neps %d (ref=1+ for dump):\n", epmax, Neps);
+	for(i = 0; i < epmax; i++){
+		s = buf;
+		e = buf+sizeof(buf);
+		ep = getep(i);
+		if(ep != nil){
+			if(waserror()){
+				putep(ep);
+				nexterror();
+			}
+			s = seprint(s, e, "ep%d.%d ", ep->dev->nb, ep->nb);
+			seprintep(s, e, ep, 1);
+			print("%s", buf);
+			ep->hp->seprintep(buf, e, ep);
+			print("%s", buf);
+			poperror();
+			putep(ep);
+		}
+	}
+	print("usb dump hcis:\n");
+	for(i = 0; i < Nhcis; i++)
+		if(hcis[i] != nil)
+			hcis[i]->dump(hcis[i]);
+}
+
+static int
+newusbid(Hci *)
+{
+	int id;
+
+	qlock(&epslck);
+	id = ++usbidgen;
+	if(id >= 0x7F)
+		print("#u: too many device addresses; reuse them more\n");
+	qunlock(&epslck);
+	return id;
+}
+
+/*
+ * Create endpoint 0 for a new device
+ */
+static Ep*
+newdev(Hci *hp, int ishub, int isroot)
+{
+	Ep *ep;
+	Udev *d;
+
+	ep = epalloc(hp);
+	d = ep->dev = mallocz(sizeof(Udev), 1);
+	d->nb = newusbid(hp);
+	d->eps[0] = ep;
+	ep->nb = 0;
+	ep->toggle[0] = ep->toggle[1] = 0;
+	d->ishub = ishub;
+	d->isroot = isroot;
+	if(hp->highspeed != 0)
+		d->speed = Highspeed;
+	else
+		d->speed = Fullspeed;
+	d->state = Dconfig;		/* address not yet set */
+	ep->dev = d;
+	ep->ep0 = ep;			/* no ref counted here */
+	ep->ttype = Tctl;
+	ep->mode = ORDWR;
+	dprint("newdev %#p ep%d.%d %#p\n", d, d->nb, ep->nb, ep);
+	return ep;
+}
+
+/*
+ * Create a new endpoint for the device
+ * accessed via the given endpoint 0.
+ */
+static Ep*
+newdevep(Ep *ep, int i, int tt, int mode)
+{
+	Ep *nep;
+	Udev *d;
+
+	d = ep->dev;
+	if(d->eps[i] != nil)
+		error("endpoint already in use");
+	nep = epalloc(ep->hp);
+	incref(ep);
+	d->eps[i] = nep;
+	nep->nb = i;
+	nep->toggle[0] = nep->toggle[1] = 0;
+	nep->ep0 = ep;
+	nep->dev = ep->dev;
+	nep->mode = mode;
+	nep->ttype = tt;
+	nep->debug = ep->debug;
+	if(tt == Tintr || tt == Tiso)	/* assign defaults */
+		nep->pollival = 10;
+	if(tt == Tiso){
+		nep->samplesz = 4;
+		nep->hz = 44100;
+	}
+	deprint("newdevep ep%d.%d %#p\n", d->nb, nep->nb, nep);
+	return ep;
+}
+
+static int
+epdataperm(int mode)
+{
+
+	switch(mode){
+	case OREAD:
+		return 0440|DMEXCL;
+		break;
+	case OWRITE:
+		return 0220|DMEXCL;
+		break;
+	default:
+		return 0660|DMEXCL;
+	}
+}
+
+static int
+usbgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
+{
+	Qid q;
+	Dirtab *dir;
+	int perm;
+	char *se;
+	Ep *ep;
+	int nb;
+	int mode;
+
+	if(0)ddprint("usbgen q %#x s %d...", QID(c->qid), s);
+	if(s == DEVDOTDOT){
+		if(QID(c->qid) <= Qusbdir){
+			mkqid(&q, Qdir, 0, QTDIR);
+			devdir(c, q, "#u", 0, eve, 0555, dp);
+		}else{
+			mkqid(&q, Qusbdir, 0, QTDIR);
+			devdir(c, q, "usb", 0, eve, 0555, dp);
+		}
+		if(0)ddprint("ok\n");
+		return 1;
+	}
+
+	switch(QID(c->qid)){
+	case Qdir:				/* list #u */
+		if(s == 0){
+			mkqid(&q, Qusbdir, 0, QTDIR);
+			devdir(c, q, "usb", 0, eve, 0555, dp);
+			if(0)ddprint("ok\n");
+			return 1;
+		}
+		s--;
+		if(s < 0 || s >= epmax)
+			goto Fail;
+		ep = getep(s);
+		if(ep == nil || ep->name == nil){
+			if(ep != nil)
+				putep(ep);
+			if(0)ddprint("skip\n");
+			return 0;
+		}
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		mkqid(&q, Qep0io+s*4, 0, QTFILE);
+		devdir(c, q, ep->name, 0, eve, epdataperm(ep->mode), dp);
+		putep(ep);
+		poperror();
+		if(0)ddprint("ok\n");
+		return 1;
+
+	case Qusbdir:				/* list #u/usb */
+	Usbdir:
+		if(s < nelem(usbdir)){
+			dir = &usbdir[s];
+			mkqid(&q, dir->qid.path, 0, QTFILE);
+			devdir(c, q, dir->name, dir->length, eve, dir->perm, dp);
+			if(0)ddprint("ok\n");
+			return 1;
+		}
+		s -= nelem(usbdir);
+		if(s < 0 || s >= epmax)
+			goto Fail;
+		ep = getep(s);
+		if(ep == nil){
+			if(0)ddprint("skip\n");
+			return 0;
+		}
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		se = up->genbuf+sizeof(up->genbuf);
+		seprint(up->genbuf, se, "ep%d.%d", ep->dev->nb, ep->nb);
+		mkqid(&q, Qep0dir+4*s, 0, QTDIR);
+		putep(ep);
+		poperror();
+		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+		if(0)ddprint("ok\n");
+		return 1;
+
+	case Qctl:
+		s = 0;
+		goto Usbdir;
+
+	default:				/* list #u/usb/epN.M */
+		nb = qid2epidx(QID(c->qid));
+		ep = getep(nb);
+		if(ep == nil)
+			goto Fail;
+		mode = ep->mode;
+		putep(ep);
+		if(isqtype(QID(c->qid), Qepdir)){
+		Epdir:
+			switch(s){
+			case 0:
+				mkqid(&q, Qep0io+nb*4, 0, QTFILE);
+				perm = epdataperm(mode);
+				devdir(c, q, "data", 0, eve, perm, dp);
+				break;
+			case 1:
+				mkqid(&q, Qep0ctl+nb*4, 0, QTFILE);
+				devdir(c, q, "ctl", 0, eve, 0664, dp);
+				break;
+			default:
+				goto Fail;
+			}
+		}else if(isqtype(QID(c->qid), Qepctl)){
+			s = 1;
+			goto Epdir;
+		}else{
+			s = 0;
+			goto Epdir;
+		}
+		if(0)ddprint("ok\n");
+		return 1;
+	}
+Fail:
+	if(0)ddprint("fail\n");
+	return -1;
+	
+}
+
+static Hci*
+hciprobe(int cardno, int ctlrno)
+{
+	Hci *hp;
+	char *type;
+	char name[64];
+	static int epnb = 1;	/* guess the endpoint nb. for the controller */
+
+	ddprint("hciprobe %d %d\n", cardno, ctlrno);
+	hp = mallocz(sizeof(Hci), 1);
+	hp->ctlrno = ctlrno;
+	hp->tbdf = BUSUNKNOWN;
+
+	if(cardno < 0)
+		for(cardno = 0; cardno < Nhcis; cardno++){
+			if(hcitypes[cardno].type == nil)
+				break;
+			type = hp->type;
+			if(type==nil || *type==0)
+				type = "uhci";
+			if(cistrcmp(hcitypes[cardno].type, type) == 0)
+				break;
+		}
+
+	if(cardno >= Nhcis || hcitypes[cardno].type == nil){
+		free(hp);
+		return nil;
+	}
+	dprint("%s...", hcitypes[cardno].type);
+	if(hcitypes[cardno].reset(hp) < 0){
+		free(hp);
+		return nil;
+	}
+
+	snprint(name, sizeof(name), "usb%s", hcitypes[cardno].type);
+	intrenable(Irqlo, hp->irq, hp->interrupt, hp, name);
+	print("#u/usb/ep%d.0: %s: port 0x%luX irq %d\n",
+		epnb++, hcitypes[cardno].type, hp->port, hp->irq);
+	return hp;
+}
+
+static void
+usbreset(void)
+{
+	int cardno, ctlrno;
+	Hci *hp;
+
+	dprint("usbreset\n");
+
+	for(ctlrno = 0; ctlrno < Nhcis; ctlrno++)
+		if((hp = hciprobe(-1, ctlrno)) != nil)
+			hcis[ctlrno] = hp;
+	cardno = ctlrno = 0;
+	while(cardno < Nhcis && ctlrno < Nhcis && hcitypes[cardno].type != nil)
+		if(hcis[ctlrno] != nil)
+			ctlrno++;
+		else{
+			hp = hciprobe(cardno, ctlrno);
+			if(hp == nil)
+				cardno++;
+			hcis[ctlrno++] = hp;
+		}
+	if(hcis[Nhcis-1] != nil)
+		print("usbreset: bug: Nhcis too small\n");
+}
+
+static void
+usbinit(void)
+{
+	Hci *hp;
+	int ctlrno;
+	Ep *d;
+	char info[40];
+
+	dprint("usbinit\n");
+	for(ctlrno = 0; ctlrno < Nhcis; ctlrno++){
+		hp = hcis[ctlrno];
+		if(hp != nil){
+			if(hp->init != nil)
+				hp->init(hp);
+			d = newdev(hp, 1, 1);		/* new root hub */
+			d->dev->state = Denabled;	/* although addr == 0 */
+			d->maxpkt = 64;
+			snprint(info, sizeof(info), "ports %d", hp->nports);
+			kstrdup(&d->info, info);
+		}
+	}
+}
+
+static Chan*
+usbattach(char *spec)
+{
+	return devattach(L'u', spec);
+}
+
+static Walkqid*
+usbwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, nil, 0, usbgen);
+}
+
+static int
+usbstat(Chan *c, uchar *db, int n)
+{
+	return devstat(c, db, n, nil, 0, usbgen);
+}
+
+/*
+ * µs for the given transfer, for bandwidth allocation.
+ * This is a very rough worst case for what 5.11.3
+ * of the usb 2.0 spec says.
+ * Also, we are using maxpkt and not actual transfer sizes.
+ * Only when we are sure we
+ * are not exceeding b/w might we consider adjusting it.
+ */
+static ulong
+usbload(int speed, int maxpkt)
+{
+	enum{ Hostns = 1000, Hubns = 333 };
+	ulong l;
+	ulong bs;
+
+	l = 0;
+	bs = 10UL * maxpkt;
+	switch(speed){
+	case Highspeed:
+		l = 55*8*2 + 2 * (3 + bs) + Hostns;
+		break;
+	case Fullspeed:
+		l = 9107 + 84 * (4 + bs) + Hostns;
+		break;
+	case Lowspeed:
+		l = 64107 + 2 * Hubns + 667 * (3 + bs) + Hostns;
+		break;
+	default:
+		print("usbload: bad speed %d\n", speed);
+		/* let it run */
+	}
+	return l / 1000UL;	/* in µs */
+}
+
+static Chan*
+usbopen(Chan *c, int omode)
+{
+	int q;
+	Ep *ep;
+	int mode;
+
+	mode = openmode(omode);
+	q = QID(c->qid);
+
+	if(q >= Qep0dir && qid2epidx(q) < 0)
+		error(Eio);
+	if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir))
+		return devopen(c, omode, nil, 0, usbgen);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	deprint("usbopen q %#x fid %d omode %d\n", q, c->fid, mode);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	qlock(ep);
+	if(ep->inuse){
+		qunlock(ep);
+		error(Einuse);
+	}
+	ep->inuse = 1;
+	qunlock(ep);
+	if(waserror()){
+		ep->inuse = 0;
+		nexterror();
+	}
+	if(mode != OREAD && ep->mode == OREAD)
+		error(Eperm);
+	if(mode != OWRITE && ep->mode == OWRITE)
+		error(Eperm);
+	if(ep->ttype == Tnone)
+		error(Enotconf);
+	ep->clrhalt = 0;
+	ep->rhrepl = -1;
+	if(ep->load == 0)
+		ep->load = usbload(ep->dev->speed, ep->maxpkt);
+	ep->hp->epopen(ep);
+
+	poperror();	/* ep->inuse */
+	poperror();	/* don't putep(): ref kept for fid using the ep. */
+
+	c->mode = mode;
+	c->flag |= COPEN;
+	c->offset = 0;
+	c->aux = nil;	/* paranoia */
+	return c;
+}
+
+static void
+epclose(Ep *ep)
+{
+	qlock(ep);
+	if(waserror()){
+		qunlock(ep);
+		nexterror();
+	}
+	if(ep->inuse){
+		ep->hp->epclose(ep);
+		ep->inuse = 0;
+	}
+	qunlock(ep);
+	poperror();
+}
+
+static void
+usbclose(Chan *c)
+{
+	int q;
+	Ep *ep;
+
+	q = QID(c->qid);
+	if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir))
+		return;
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		return;
+	deprint("usbclose q %#x fid %d ref %ld\n", q, c->fid, ep->ref);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(c->flag & COPEN){
+		free(c->aux);
+		c->aux = nil;
+		epclose(ep);
+		putep(ep);	/* release ref kept since usbopen */
+		c->flag &= ~COPEN;
+	}
+	poperror();
+	putep(ep);
+}
+
+static long
+ctlread(Chan *c, void *a, long n, vlong offset)
+{
+	int q;
+	char *s;
+	char *us;
+	char *se;
+	Ep *ep;
+	int i;
+
+	q = QID(c->qid);
+	us = s = smalloc(READSTR);
+	se = s + READSTR;
+	if(waserror()){
+		free(us);
+		nexterror();
+	}
+	if(q == Qctl)
+		for(i = 0; i < epmax; i++){
+			ep = getep(i);
+			if(ep != nil){
+				if(waserror()){
+					putep(ep);
+					nexterror();
+				}
+				s = seprint(s, se, "ep%d.%d ", ep->dev->nb, ep->nb);
+				s = seprintep(s, se, ep, 0);
+				poperror();
+			}
+			putep(ep);
+		}
+	else{
+		ep = getep(qid2epidx(q));
+		if(ep == nil)
+			error(Eio);
+		if(waserror()){
+			putep(ep);
+			nexterror();
+		}
+		if(c->aux != nil){
+			/* After a new endpoint request we read
+			 * the new endpoint name back.
+			 */
+			strecpy(s, se, c->aux);
+			free(c->aux);
+			c->aux = nil;
+		}else
+			seprintep(s, se, ep, 0);
+		poperror();
+		putep(ep);
+	}
+	n = readstr(offset, a, n, us);
+	poperror();
+	free(us);
+	return n;
+}
+
+/*
+ * Fake root hub emulation.
+ */
+static long
+rhubread(Ep *ep, void *a, long n)
+{
+	char *b;
+
+	if(ep->dev->isroot == 0 || ep->nb != 0 || n < 2)
+		return -1;
+	if(ep->rhrepl < 0)
+		return -1;
+
+	b = a;
+	memset(b, 0, n);
+	PUT2(b, ep->rhrepl);
+	ep->rhrepl = -1;
+	return n;
+}
+
+static long
+rhubwrite(Ep *ep, void *a, long n)
+{
+	uchar *s;
+	int cmd;
+	int feature;
+	int port;
+	Hci *hp;
+
+	if(ep->dev == nil || ep->dev->isroot == 0 || ep->nb != 0)
+		return -1;
+	if(n != Rsetuplen)
+		error("root hub is a toy hub");
+	ep->rhrepl = -1;
+	s = a;
+	if(s[Rtype] != (Rh2d|Rclass|Rother) && s[Rtype] != (Rd2h|Rclass|Rother))
+		error("root hub is a toy hub");
+	hp = ep->hp;
+	cmd = s[Rreq];
+	feature = GET2(s+Rvalue);
+	port = GET2(s+Rindex);
+	if(port < 1 || port > hp->nports)
+		error("bad hub port number");
+	switch(feature){
+	case Rportenable:
+		ep->rhrepl = hp->portenable(hp, port, cmd == Rsetfeature);
+		break;
+	case Rportreset:
+		ep->rhrepl = hp->portreset(hp, port, cmd == Rsetfeature);
+		break;
+	case Rgetstatus:
+		ep->rhrepl = hp->portstatus(hp, port);
+		break;
+	default:
+		ep->rhrepl = 0;
+	}
+	return n;
+}
+
+static long
+usbread(Chan *c, void *a, long n, vlong offset)
+{
+	int q;
+	Ep *ep;
+	int nr;
+
+	q = QID(c->qid);
+
+	if(c->qid.type == QTDIR)
+		return devdirread(c, a, n, nil, 0, usbgen);
+
+	if(q == Qctl || isqtype(q, Qepctl))
+		return ctlread(c, a, n, offset);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(ep->mode == OWRITE || ep->inuse == 0)
+		error(Ebadusefd);
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tctl:
+		nr = rhubread(ep, a, n);
+		if(nr >= 0){
+			n = nr;
+			break;
+		}
+		/* else fall */
+	default:
+		ddeprint("\nusbread q %#x fid %d cnt %ld off %lld\n",q,c->fid,n,offset);
+		n = ep->hp->epread(ep, a, n);
+		break;
+	}
+	poperror();
+	putep(ep);
+	return n;
+}
+
+static long
+pow2(int n)
+{
+	long v;
+
+	for(v = 1; n > 0; n--)
+		v *= 2;
+	return v;
+}
+
+static void
+setmaxpkt(Ep *ep, char* s)
+{
+	long spp;	/* samples per packet */
+
+	if(ep->dev->speed == Highspeed)
+		spp = (ep->hz * ep->pollival * ep->ntds + 7999) / 8000;
+	else
+		spp = (ep->hz * ep->pollival + 999) / 1000;
+	ep->maxpkt = spp * ep->samplesz;
+	deprint("usb: %s: setmaxpkt: hz %ld poll %ld"
+		" ntds %d %s speed -> spp %ld maxpkt %ld\n", s,
+		ep->hz, ep->pollival, ep->ntds, spname[ep->dev->speed],
+		spp, ep->maxpkt);
+	if(ep->maxpkt > 1024){
+		print("usb: %s: maxpkt %ld > 1024. truncating\n", s, ep->maxpkt);
+		ep->maxpkt = 1024;
+	}
+}
+
+/*
+ * Many endpoint ctls. simply update the portable representation
+ * of the endpoint. The actual controller driver will look
+ * at them to setup the endpoints as dictated.
+ */
+static long
+epctl(Ep *ep, Chan *c, void *a, long n)
+{
+	static char *Info = "info ";
+	Ep *nep;
+	Udev *d;
+	int l;
+	char *s;
+	char *b;
+	int tt;
+	int i;
+	int mode;
+	int nb;
+	Cmdtab *ct;
+	Cmdbuf *cb;
+
+	d = ep->dev;
+
+	cb = parsecmd(a, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, epctls, nelem(epctls));
+	if(ct == nil)
+		error(Ebadctl);
+	i = ct->index;
+	if(i == CMnew || i == CMspeed || i == CMhub)
+		if(ep != ep->ep0)
+			error("allowed only on a setup endpoint");
+	if(i != CMclrhalt && i != CMdetach && i != CMdebugep && i != CMname)
+		if(ep != ep->ep0 && ep->inuse != 0)
+			error("must configure before using");
+	switch(i){
+	case CMnew:
+		deprint("usb epctl %s\n", cb->f[0]);
+		nb = strtol(cb->f[1], nil, 0);
+		if(nb < 0 || nb >= Ndeveps)
+			error("bad endpoint number");
+		tt = name2ttype(cb->f[2]);
+		if(tt == Tnone)
+			error("unknown endpoint type");
+		mode = name2mode(cb->f[3]);
+		if(mode < 0)
+			error("unknown i/o mode");
+		newdevep(ep, nb, tt, mode);
+		break;
+	case CMnewdev:
+		deprint("usb epctl %s\n", cb->f[0]);
+		if(ep != ep->ep0 || d->ishub == 0)
+			error("not a hub setup endpoint");
+		l = name2speed(cb->f[1]);
+		if(l == Nospeed)
+			error("speed must be full|low|high");
+		nep = newdev(ep->hp, 0, 0);
+		nep->dev->speed = l;
+		if(nep->dev->speed  != Lowspeed)
+			nep->maxpkt = 64;	/* assume full speed */
+		nep->dev->hub = d->nb;
+		nep->dev->port = atoi(cb->f[2]);
+		/* next read request will read
+		 * the name for the new endpoint
+		 */
+		l = sizeof(up->genbuf);
+		snprint(up->genbuf, l, "ep%d.%d", nep->dev->nb, nep->nb);
+		kstrdup(&c->aux, up->genbuf);
+		break;
+	case CMhub:
+		deprint("usb epctl %s\n", cb->f[0]);
+		d->ishub = 1;
+		break;
+	case CMspeed:
+		l = name2speed(cb->f[1]);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l == Nospeed)
+			error("speed must be full|low|high");
+		qlock(ep->ep0);
+		d->speed = l;
+		qunlock(ep->ep0);
+		break;
+	case CMmaxpkt:
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l < 1 || l > 1024)
+			error("maxpkt not in [1:1024]");
+		qlock(ep);
+		ep->maxpkt = l;
+		qunlock(ep);
+		break;
+	case CMntds:
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l < 1 || l > 3)
+			error("ntds not in [1:3]");
+		qlock(ep);
+		ep->ntds = l;
+		qunlock(ep);
+		break;
+	case CMpollival:
+		if(ep->ttype != Tintr && ep->ttype != Tiso)
+			error("not an intr or iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(ep->ttype == Tiso ||
+		   (ep->ttype == Tintr && ep->dev->speed == Highspeed)){
+			if(l < 1 || l > 16)
+				error("pollival power not in [1:16]");
+			l = pow2(l-1);
+		}else
+			if(l < 1 || l > 255)
+				error("pollival not in [1:255]");
+		qlock(ep);
+		ep->pollival = l;
+		if(ep->ttype == Tiso)
+			setmaxpkt(ep, "pollival");
+		qunlock(ep);
+		break;
+	case CMsamplesz:
+		if(ep->ttype != Tiso)
+			error("not an iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l <= 0 || l > 8)
+			error("samplesz not in [1:8]");
+		qlock(ep);
+		ep->samplesz = l;
+		setmaxpkt(ep, "samplesz");
+		qunlock(ep);
+		break;
+	case CMhz:
+		if(ep->ttype != Tiso)
+			error("not an iso endpoint");
+		l = strtoul(cb->f[1], nil, 0);
+		deprint("usb epctl %s %d\n", cb->f[0], l);
+		if(l <= 0 || l > 100000)
+			error("hz not in [1:100000]");
+		qlock(ep);
+		ep->hz = l;
+		setmaxpkt(ep, "hz");
+		qunlock(ep);
+		break;
+	case CMclrhalt:
+		qlock(ep);
+		deprint("usb epctl %s\n", cb->f[0]);
+		ep->clrhalt = 1;
+		qunlock(ep);
+		break;
+	case CMinfo:
+		deprint("usb epctl %s\n", cb->f[0]);
+		l = strlen(Info);
+		s = a;
+		if(n < l+2 || strncmp(Info, s, l) != 0)
+			error(Ebadctl);
+		if(n > 1024)
+			n = 1024;
+		b = smalloc(n);
+		memmove(b, s+l, n-l);
+		b[n-l] = 0;
+		if(b[n-l-1] == '\n')
+			b[n-l-1] = 0;
+		qlock(ep);
+		free(ep->info);
+		ep->info = b;
+		qunlock(ep);
+		break;
+	case CMaddress:
+		deprint("usb epctl %s\n", cb->f[0]);
+		ep->dev->state = Denabled;
+		break;
+	case CMdetach:
+		if(ep->dev->isroot != 0)
+			error("can't detach a root hub");
+		deprint("usb epctl %s ep%d.%d\n",
+			cb->f[0], ep->dev->nb, ep->nb);
+		ep->dev->state = Ddetach;
+		/* Release file system ref. for its endpoints */
+		for(i = 0; i < nelem(ep->dev->eps); i++)
+			putep(ep->dev->eps[i]);
+		break;
+	case CMdebugep:
+		if(strcmp(cb->f[1], "on") == 0)
+			ep->debug = 1;
+		else if(strcmp(cb->f[1], "off") == 0)
+			ep->debug = 0;
+		else
+			ep->debug = strtoul(cb->f[1], nil, 0);
+		print("usb: ep%d.%d debug %d\n",
+			ep->dev->nb, ep->nb, ep->debug);
+		break;
+	case CMname:
+		deprint("usb epctl %s %s\n", cb->f[0], cb->f[1]);
+		validname(cb->f[1], 0);
+		kstrdup(&ep->name, cb->f[1]);
+		break;
+	default:
+		panic("usb: unknown epctl %d", ct->index);
+	}
+	free(cb);
+	poperror();
+	return n;
+}
+
+static long
+usbctl(void *a, long n)
+{
+	Cmdtab *ct;
+	Cmdbuf *cb;
+	Ep *ep;
+	int i;
+
+	cb = parsecmd(a, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, usbctls, nelem(usbctls));
+	dprint("usb ctl %s\n", cb->f[0]);
+	switch(ct->index){
+	case CMdebug:
+		if(strcmp(cb->f[1], "on") == 0)
+			debug = 1;
+		else if(strcmp(cb->f[1], "off") == 0)
+			debug = 0;
+		else
+			debug = strtol(cb->f[1], nil, 0);
+		print("usb: debug %d\n", debug);
+		for(i = 0; i < epmax; i++)
+			if((ep = getep(i)) != nil){
+				ep->hp->debug(ep->hp, debug);
+				putep(ep);
+			}
+		break;
+	case CMreset:
+		print("devusb: CMreset not implemented\n");
+		error("not implemented");
+		/*
+		 * XXX: I'm not sure this is a good idea.
+		 * Usbd should not be restarted at all.
+		 * for(all eps)
+		 *	closeep(ep);
+		 * do a global reset once more
+		 * recreate root hub devices in place.
+		 */
+		break;
+	case CMdump:
+		dumpeps();
+		break;
+	}
+	free(cb);
+	poperror();
+	return n;
+}
+
+static long
+ctlwrite(Chan *c, void *a, long n)
+{
+	int q;
+	Ep *ep;
+
+	q = QID(c->qid);
+	if(q == Qctl)
+		return usbctl(a, n);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(isqtype(q, Qepctl) && c->aux != nil){
+		/* Be sure we don't keep a cloned ep name */
+		free(c->aux);
+		c->aux = nil;
+		error("read, not write, expected");
+	}
+	n = epctl(ep, c, a, n);
+	putep(ep);
+	poperror();
+	return n;
+}
+
+static long
+usbwrite(Chan *c, void *a, long n, vlong off)
+{
+	int q;
+	Ep *ep;
+	int nr;
+
+	if(c->qid.type == QTDIR)
+		error(Eisdir);
+
+	q = QID(c->qid);
+
+	if(q == Qctl || isqtype(q, Qepctl))
+		return ctlwrite(c, a, n);
+
+	ep = getep(qid2epidx(q));
+	if(ep == nil)
+		error(Eio);
+	if(waserror()){
+		putep(ep);
+		nexterror();
+	}
+	if(ep->dev->state == Ddetach)
+		error(Edetach);
+	if(ep->mode == OREAD || ep->inuse == 0)
+		error(Ebadusefd);
+
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tctl:
+		nr = rhubwrite(ep, a, n);
+		if(nr >= 0){
+			n = nr;
+			break;
+		}
+		/* else fall */
+	default:
+		ddeprint("\nusbwrite q %#x fid %d cnt %ld off %lld\n",q, c->fid, n, off);
+		ep->hp->epwrite(ep, a, n);
+	}
+	putep(ep);
+	poperror();
+	return n;
+}
+
+void
+usbshutdown(void)
+{
+	Hci *hci;
+	int i;
+
+	for(i = 0; i < Nhcis; i++){
+		hci = hcis[i];
+		if(hci == nil)
+			continue;
+		if(hci->shutdown == nil) {
+			print("#l%d: no shutdown function\n", i);
+			continue;
+		}
+		(*hci->shutdown)(hci);
+	}
+}
+
+Dev usbdevtab = {
+	L'u',
+	"usb",
+
+	usbreset,
+	usbinit,
+	usbshutdown,
+	usbattach,
+	usbwalk,
+	usbstat,
+	usbopen,
+	devcreate,
+	usbclose,
+	usbread,
+	devbread,
+	usbwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};

+ 55 - 0
sys/src/9/kw/etherif.h

@@ -0,0 +1,55 @@
+enum
+{
+	MaxEther	= 2,
+	Ntypes		= 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+	RWlock;				/* TO DO */
+	ISAConf;			/* hardware info */
+	int	ctlrno;
+//	int	tbdf;			/* type+busno+devno+funcno */
+	int	minmtu;
+	int	maxmtu;
+	uchar	ea[Eaddrlen];
+	void	*address;
+	int	tbusy;
+	int	encry;
+
+	void	(*attach)(Ether*);	/* filled in by reset routine */
+	void	(*closed)(Ether*);
+	void	(*detach)(Ether*);
+	void	(*transmit)(Ether*);
+	void	(*interrupt)(Ureg*, void*);
+	long	(*ifstat)(Ether*, void*, long, ulong);
+	long	(*ctl)(Ether*, void*, long); /* custom ctl messages */
+	void	(*power)(Ether*, int);	/* power on/off */
+	void	(*shutdown)(Ether*);	/* shutdown hardware before reboot */
+	void	*ctlr;
+	int	pcmslot;		/* PCMCIA */
+	int	fullduplex;		/* non-zero if full duplex */
+
+	Queue*	oq;
+
+	/* statistics */
+	ulong	interrupts;
+	ulong	dmarxintr;
+	ulong	dmatxintr;
+	ulong	promisc;
+	ulong	pktsdropped;
+	ulong	pktsmisaligned;
+	ulong	resets;			/* after initialisation */
+	ulong	bcasts;			/* broadcast pkts rcv'd */
+	ulong	mcasts;			/* multicast pkts rcv'd */
+
+	Netif;
+};
+
+extern Block* etheriq(Ether*, Block*, int);
+extern void addethercard(char*, int(*)(Ether*));
+extern ulong ethercrc(uchar*, int);
+extern int parseether(uchar*, char*);
+
+#define NEXT(x, l)	(((x)+1)%(l))
+#define PREV(x, l)	(((x) == 0) ? (l)-1: (x)-1)

+ 1495 - 0
sys/src/9/kw/etherkw.c

@@ -0,0 +1,1495 @@
+/*
+ * marvell kirkwood ethernet (88e1116) driver
+ * (as found in the sheevaplug).
+ * from /public/doc/marvell/sheeva/88f61xx.kirkwood.pdf
+ *
+ * features that could be implemented:
+ * - ip4, tcp, udp checksum offloading
+ * - multicast filtering
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+#include "../ip/ip.h"
+
+#define MASK(v)	((1UL<<(v)) - 1)
+
+// #undef	assert
+// #define assert(expr)
+
+#define	MIIDBG	if(0)iprint
+#define diprint	if(0)iprint
+
+enum {
+	Gberegs		= Regbase + 0x72000,
+
+	Nrx		= 512,
+	Ntx		= 512,
+	Nrxblks		= 1024,
+	Rxblklen	= 2+1522, /* ethernet uses first 2 bytes as padding */
+
+	Maxrxintrsec	= 20*1000,	/* max. rx intrs. / sec */
+	Etherstuck	= 90,	/* must send or receive a packet in this many sec.s */
+
+	Descralign	= 16,
+	Bufalign	= 8,
+
+	Qno		= 0,		/* do everything on queue zero */
+};
+
+typedef struct Ctlr Ctlr;
+typedef struct Gbereg Gbereg;
+typedef struct Mibstats Mibstats;
+typedef struct Rx Rx;
+typedef struct Tx Tx;
+
+static struct {
+	Lock;
+	Block	*head;
+} freeblocks;
+
+/* hardware receive buffer descriptor */
+struct Rx {
+	ulong	cs;
+	ulong	countsize;	/* bytes, buffer size */
+	ulong	buf;		/* phys. addr. of packet buffer */
+	ulong	next;		/* phys. addr. of next Rx */
+};
+
+/* hardware transmit buffer descriptor */
+struct Tx {
+	ulong	cs;
+	ulong	countchk;	/* bytes, checksum */
+	ulong	buf;		/* phys. addr. of packet buffer */
+	ulong	next;		/* phys. addr. of next Tx */
+};
+
+/* fixed by hw; part of Gberegs */
+struct Mibstats {
+	uvlong	rxby;			/* good bytes rcv'd */
+	ulong	badrxby;		/* bad bytes rcv'd */
+	ulong	mactxerr;		/* tx err pkts */
+	ulong	rxpkt;			/* good pkts rcv'd */
+	ulong	badrxpkt;		/* bad pkts rcv'd */
+	ulong	rxbcastpkt;		/* b'cast pkts rcv'd */
+	ulong	rxmcastpkt;		/* m'cast pkts rcv'd */
+
+	ulong	rx64;			/* pkts <= 64 bytes */
+	ulong	rx65_127;		/* pkts 65—127 bytes */
+	ulong	rx128_255;		/* pkts 128—255 bytes */
+	ulong	rx256_511;		/* pkts 256—511 bytes */
+	ulong	rx512_1023;		/* pkts 512—1023 bytes */
+	ulong	rx1024_max;		/* pkts >= 1024 bytes */
+
+	uvlong	txby;			/* good bytes sent */
+	ulong	txpkt;			/* good pkts sent */
+	/* half-duplex: pkts dropped due to excessive collisions */
+	ulong	txcollpktdrop;
+	ulong	txmcastpkt;		/* m'cast pkts sent */
+	ulong	txbcastpkt;		/* b'cast pkts sent */
+
+	ulong	badmacctlpkts;		/* bad mac ctl pkts */
+	ulong	txflctl;		/* flow-control pkts sent */
+	ulong	rxflctl;		/* good flow-control pkts rcv'd */
+	ulong	badrxflctl;		/* bad flow-control pkts rcv'd */
+
+	ulong	rxundersized;		/* runts */
+	ulong	rxfrags;		/* fragments rcv'd */
+	ulong	rxtoobig;		/* oversized pkts rcv'd */
+	ulong	rxjabber;		/* jabber pkts rcv'd */
+	ulong	rxerr;			/* rx error events */
+	ulong	crcerr;			/* crc error events */
+	ulong	collisions;		/* collision events */
+	ulong	latecoll;		/* late collisions */
+};
+
+struct Ctlr {
+	Lock;
+	Gbereg	*reg;
+
+	Lock	initlock;
+	int	init;
+
+	Rx	*rx;		/* receive descriptors */
+	Block	*rxb[Nrx];	/* blocks belonging to the descriptors */
+	int	rxhead;		/* descr ethernet will write to next */
+	int	rxtail;		/* next descr that might need a buffer */
+	Rendez	rrendez;	/* interrupt wakes up read process */
+	int	haveinput;
+
+	Tx	*tx;
+	Block	*txb[Ntx];
+	int	txhead;		/* next descr we can use for new packet */
+	int	txtail;		/* next descr to reclaim on tx complete */
+
+	Mii	*mii;
+	int	port;
+
+	/* stats */
+	ulong	intrs;
+	ulong	newintrs;
+	ulong	txunderrun;
+	ulong	txringfull;
+	ulong	rxdiscard;
+	ulong	rxoverrun;
+	ulong	nofirstlast;
+
+	Mibstats;
+};
+
+#define	Rxqon(q)	(1<<(q))
+#define	Txqon(q)	(1<<(q))
+
+enum {
+	/* sdma config, sdc */
+	Burst1		= 0,
+	Burst2,
+	Burst4,
+	Burst8,
+	Burst16,
+	SDCrifb		= 1<<0,		/* rx intr on pkt boundaries */
+#define SDCrxburst(v)	((v)<<1)
+	SDCrxnobyteswap	= 1<<4,
+	SDCtxnobyteswap	= 1<<5,
+	SDCswap64byte	= 1<<6,
+#define SDCtxburst(v)	((v)<<22)
+	/* rx intr ipg (inter packet gap) */
+#define SDCipgintrx(v)	((((v)>>15) & 1)<<25) | (((v) & MASK(15))<<7)
+
+	/* portcfg */
+	PCFGupromisc		= 1<<0,
+#define Rxqdefault(q)	((q)<<1)
+#define Rxqarp(q)	((q)<<4)
+	PCFGbcrejectnoiparp	= 1<<7,
+	PCFGbcrejectip		= 1<<8,
+	PCFGbcrejectarp		= 1<<9,
+	PCFGamnotxes		= 1<<12, /* auto mode, no summary update on tx */
+	PCFGtcpq	= 1<<14,
+	PCFGudpq	= 1<<15,
+#define	Rxqtcp(q)	((q)<<16)
+#define	Rxqudp(q)	((q)<<19)
+#define	Rxqbpdu(q)	((q)<<22)
+	PCFGrxcs	= 1<<25,	/* rx tcp checksum mode with header */
+
+	/* portcfgx */
+	PCFGXspanq	= 1<<1,
+	PCFGXcrcoff	= 1<<2,		/* no ethernet crc */
+
+	/* port serial control0, psc0 */
+	PSC0porton		= 1<<0,
+	PSC0forcelinkup		= 1<<1,
+	PSC0an_dplxoff		= 1<<2,	/* an_ = auto. negotiate */
+	PSC0an_flctloff		= 1<<3,
+	PSC0an_pauseadv		= 1<<4,
+	PSC0nofrclinkdown	= 1<<10,
+	PSC0an_spdoff		= 1<<13,
+	PSC0dteadv		= 1<<14,
+
+	/* max. input pkt size */
+#define PSC0mru(v)	((v)<<17)
+	PSC0mrumask	= PSC0mru(MASK(3)),
+	PSC0mru1518	= 0,
+	PSC0mru1522,
+	PSC0mru1552,
+	PSC0mru9022,
+	PSC0mru9192,
+	PSC0mru9700,
+
+	PSC0fd_frc		= 1<<21,	/* force full duplex */
+	PSC0flctlfrc		= 1<<22,
+	PSC0gmiispd_gbfrc	= 1<<23,
+	PSC0miispdfrc100mbps	= 1<<24,
+
+	/* port status 0, ps0 */
+	PS0linkup	= 1<<1,
+	PS0fd		= 1<<2,			/* full duplex */
+	PS0flctl	= 1<<3,
+	PS0gmii_gb	= 1<<4,
+	PS0mii100mbps	= 1<<5,
+	PS0txbusy	= 1<<7,
+	PS0txfifoempty	= 1<<10,
+	PS0rxfifo1empty	= 1<<11,
+	PS0rxfifo2empty	= 1<<12,
+
+	/* port serial control 1, psc1 */
+	PSC1loopback	= 1<<1,
+	PSC1mii		= 0<<2,
+	PSC1rgmii	= 1<<3,
+	PSC1portreset	= 1<<4,
+	PSC1clockbypass	= 1<<5,
+	PSC1iban	= 1<<6,
+	PSC1iban_bypass	= 1<<7,
+	PSC1iban_restart= 1<<8,
+	PSC1_gbonly	= 1<<11,
+	PSC1encolonbp	= 1<<15, /* "collision during back-pressure mib counting" */
+	PSC1coldomlimmask= MASK(6)<<16,
+#define PSC1coldomlim(v) (((v) & MASK(6))<<16)
+	PSC1miiallowoddpreamble	= 1<<22,
+
+	/* port status 1, ps1 */
+	PS1rxpause	= 1<<0,
+	PS1txpause	= 1<<1,
+	PS1pressure	= 1<<2,
+	PS1syncfail10ms	= 1<<3,
+	PS1an_done	= 1<<4,
+	PS1inbandan_bypassed	= 1<<5,
+	PS1serdesplllocked	= 1<<6,
+	PS1syncok	= 1<<7,
+	PS1nosquelch	= 1<<8,
+
+	/* irq */
+	Irx		= 1<<0,
+	Iextend		= 1<<1,
+#define Irxbufferq(q)	(1<<((q)+2))
+	Irxerr		= 1<<10,
+#define Irxerrq(q)	(1<<((q)+11))
+#define Itxendq(q)	(1<<((q)+19))
+	Isum		= 1<<31,
+
+	/* irq extended, irqe */
+#define	IEtxbufferq(q)	(1<<((q)+0))
+#define	IEtxerrq(q)	(1<<((q)+8))
+	IEphystschg	= 1<<16,
+	IEptp		= 1<<17,
+	IErxoverrun	= 1<<18,
+	IEtxunderrun	= 1<<19,
+	IElinkchg	= 1<<20,
+	IEintaddrerr	= 1<<23,
+	IEprbserr	= 1<<25,
+	IEsum		= 1<<31,
+
+	/* tx fifo urgent threshold (tx interrupt coalescing), pxtfut */
+#define TFUTipginttx(v)	(((v) & MASK(16))<<4);
+
+	/* minimal frame size, mfs */
+	MFS40by	= 10<<2,
+	MFS44by	= 11<<2,
+	MFS48by	= 12<<2,
+	MFS52by	= 13<<2,
+	MFS56by	= 14<<2,
+	MFS60by	= 15<<2,
+	MFS64by	= 16<<2,
+
+	/* receive descriptor */
+#define Bufsize(v)	((v)<<3)
+
+	/* receive descriptor status */
+	RCSmacerr	= 1<<0,
+	RCSmacmask	= 3<<1,
+	RCSmacce	= 0<<1,
+	RCSmacor	= 1<<1,
+	RCSmacmf	= 2<<1,
+	RCSl4chkshift	= 3,
+	RCSl4chkmask	= MASK(16),
+	RCSvlan		= 1<<17,
+	RCSbpdu		= 1<<18,
+	RCSl4mask	= 3<<21,
+	RCSl4tcp4	= 0<<21,
+	RCSl4udp4	= 1<<21,
+	RCSl4other	= 2<<21,
+	RCSl4rsvd	= 3<<21,
+	RCSl2ev2	= 1<<23,
+	RCSl3ip4	= 1<<24,
+	RCSip4headok	= 1<<25,
+	RCSlast		= 1<<26,
+	RCSfirst	= 1<<27,
+	RCSunknownaddr	= 1<<28,
+	RCSenableintr	= 1<<29,
+	RCSl4chkok	= 1<<30,
+	RCSdmaown	= 1<<31,
+
+	/* transmit descriptor status */
+	TCSmacerr	= 1<<0,
+	TCSmacmask	= 3<<1,
+	TCSmaclc	= 0<<1,
+	TCSmacur	= 1<<1,
+	TCSmacrl	= 2<<1,
+	TCSllc		= 1<<9,
+	TCSl4chkmode	= 1<<10,
+	TCSipv4hdlenshift= 11,
+	TCSvlan		= 1<<15,
+	TCSl4type	= 1<<16,
+	TCSgl4chk	= 1<<17,
+	TCSgip4chk	= 1<<18,
+	TCSpadding	= 1<<19,
+	TCSlast		= 1<<20,
+	TCSfirst	= 1<<21,
+	TCSenableintr	= 1<<23,
+	TCSautomode	= 1<<30,
+	TCSdmaown	= 1<<31,
+};
+
+enum {
+	/* SMI regs */
+	PhysmiTimeout	= 10000,	/* what units? in ms. */
+	Physmidataoff	= 0,		/* Data */
+	Physmidatamask	= 0xffff<<Physmidataoff,
+
+	Physmiaddroff 	= 16,		/* PHY device addr */
+	Physmiaddrmask	= 0x1f << Physmiaddroff,
+
+	Physmiop	= 26,
+	Physmiopmask	= 3<<Physmiop,
+	PhysmiopWr	= 0<<Physmiop,
+	PhysmiopRd	= 1<<Physmiop,
+
+	PhysmiReadok	= 1<<27,
+	PhysmiBusy	= 1<<28,
+
+	SmiRegaddroff	= 21,		/* PHY device register addr */
+	SmiRegaddrmask	= 0x1f << SmiRegaddroff,
+};
+
+struct Gbereg {
+	ulong	phy;			/* PHY address */
+	ulong	smi;			/* serial mgmt. interface */
+	ulong	euda;			/* ether default address */
+	ulong	eudid;			/* ether default id */
+	ulong	_pad0[PAD(0x80, 0xc)];
+
+	ulong	euirq;			/* interrupt cause */
+	ulong	euirqmask;		/* interrupt mask */
+	ulong	_pad1[PAD(0x94, 0x84)];
+
+	ulong	euea;			/* error address */
+	ulong	euiae;			/* internal error address */
+	ulong	_pad2[PAD(0xb0, 0x98)];
+
+	ulong	euc;			/* control */
+	ulong	_pad3[PAD(0x200, 0xb0)];
+
+	struct {
+		ulong	base;		/* window base */
+		ulong	size;		/* window size */
+	} base[6];
+	ulong	_pad4[PAD(0x280, 0x22c)];
+
+	ulong	harr[4];		/* high address remap */
+	ulong	bare;			/* base address enable */
+	ulong	epap;			/* port access protect */
+	ulong	_pad5[PAD(0x400, 0x294)];
+
+	ulong	portcfg;		/* port configuration */
+	ulong	portcfgx;		/* port config. extend */
+	ulong	mii;			/* mii serial parameters */
+	ulong	_pad6;
+	ulong	evlane;			/* vlan ether type */
+	ulong	macal;			/* mac address low */
+	ulong	macah;			/* mac address high */
+	ulong	sdc;			/* sdma config. */
+	ulong	dscp[7];		/* ip diff. serv. code point -> pri */
+	ulong	psc0;			/* port serial control 0 */
+	ulong	vpt2p;			/* vlan priority tag -> pri */
+	ulong	ps0;			/* ether port status 0 */
+	ulong	tqc;			/* transmit queue command */
+	ulong	psc1;			/* port serial control 1 */
+	ulong	ps1;			/* ether port status 1 */
+	ulong	mvhdr;			/* marvell header */
+	ulong	_pad8[2];
+
+	/* interrupts */
+	ulong	irq;			/* interrupt cause; some rw0c bits */
+	ulong	irqe;			/* " " extended; some rw0c bits */
+	ulong	irqmask;		/* interrupt mask (actually enable) */
+	ulong	irqemask;		/* " " extended */
+
+	ulong	_pad9;
+	ulong	pxtfut;			/* port tx fifo urgent threshold */
+	ulong	_pad10;
+	ulong	pxmfs;			/* port rx minimum frame size */
+	ulong	_pad11;
+
+	/*
+	 * # of input frames discarded by addr filtering or lack of resources;
+	 * zeroed upon read.
+	 */
+	ulong	pxdfc;			/* port rx discard frame counter */
+	ulong	pxofc;			/* port overrun frame counter */
+	ulong	_pad12[2];
+	ulong	piae;			/* port internal address error */
+	ulong	_pad13[PAD(0x4bc, 0x494)];
+	ulong	etherprio;		/* ether type priority */
+	ulong	_pad14[PAD(0x4dc, 0x4bc)];
+	ulong	tqfpc;			/* tx queue fixed priority config. */
+	ulong	pttbrc;			/* port tx token-bucket rate config. */
+	ulong	tqc1;			/* tx queue command 1 */
+	ulong	pmtu;			/* port maximum transmit unit */
+	ulong	pmtbs;			/* port maximum token bucket size */
+	ulong	_pad15[PAD(0x600, 0x4ec)];
+
+	struct {
+		ulong	_pad[3];
+		ulong	r;		/* phys. addr.: cur. rx desc. ptrs */
+	} crdp[8];
+	ulong	rqc;			/* rx queue command */
+	ulong	tcsdp;			/* phys. addr.: cur. tx desc. ptr */
+	ulong	_pad16[PAD(0x6c0, 0x684)];
+
+	ulong	tcqdp[8];		/* phys. addr.: cur. tx q. desc. ptr */
+	ulong	_pad17[PAD(0x700, 0x6dc)];
+
+	struct {
+		ulong	tbctr;		/* queue tx token-bucket counter */
+		ulong	tbcfg;		/* tx queue token-bucket config. */
+		ulong	acfg;		/* tx queue arbiter config. */
+		ulong	_pad;
+	} tq[8];
+	ulong	pttbc;			/* port tx token-bucket counter */
+	ulong	_pad18[PAD(0x7a8, 0x780)];
+
+	ulong	ipg2;			/* tx queue ipg */
+	ulong	_pad19[3];
+	ulong	ipg3;
+	ulong	_pad20;
+	ulong	htlp;			/* high token in low packet */
+	ulong	htap;			/* high token in async packet */
+	ulong	ltap;			/* low token in async packet */
+	ulong	_pad21;
+	ulong	ts;			/* tx speed */
+	ulong	_pad22[PAD(0x1000, 0x7d0)];
+
+	/* mac mib counters: statistics */
+	Mibstats;
+	ulong	_pad23[PAD(0x1400, 0x107c)];
+
+	/* multicast filtering */
+	ulong	dfsmt[64];	/* dest addr filter special m'cast table */
+	ulong	dfomt[64];	/* dest addr filter other m'cast table */
+
+	/* unicast filtering */
+	ulong	dfut[4];		/* dest addr filter unicast table */
+};
+
+vlong etherstart;
+
+
+static void getmibstats(Ctlr *);
+
+static void
+rxfreeb(Block *b)
+{
+	/* freeb(b) will have previously decremented b->ref to 0; raise to 1 */
+	_xinc(&b->ref);
+//iprint("fr %ld ", b->ref);
+	b->wp = b->rp =
+		(uchar*)((uintptr)(b->lim - Rxblklen) & ~(Bufalign - 1));
+	assert(((uintptr)b->rp & (Bufalign - 1)) == 0);
+	b->free = rxfreeb;
+
+	ilock(&freeblocks);
+	b->next = freeblocks.head;
+	freeblocks.head = b;
+	iunlock(&freeblocks);
+}
+
+static Block *
+rxallocb(void)
+{
+	Block *b;
+
+	ilock(&freeblocks);
+	b = freeblocks.head;
+	if(b != nil) {
+		freeblocks.head = b->next;
+		b->next = nil;
+		b->free = rxfreeb;
+	}
+	iunlock(&freeblocks);
+	return b;
+}
+
+static void
+rxkick(Ctlr *ctlr)
+{
+	Gbereg *reg = ctlr->reg;
+
+	if (reg->crdp[Qno].r == 0)
+		reg->crdp[Qno].r = PADDR(&ctlr->rx[ctlr->rxhead]);
+	if ((reg->rqc & 0xff) == 0)		/* all queues are stopped? */
+		reg->rqc = Rxqon(Qno);		/* restart */
+}
+
+static void
+txkick(Ctlr *ctlr)
+{
+	Gbereg *reg = ctlr->reg;
+
+	if (reg->tcqdp[Qno] == 0)
+		reg->tcqdp[Qno] = PADDR(&ctlr->tx[ctlr->txhead]);
+	if ((reg->tqc & 0xff) == 0)		/* all q's stopped? */
+		reg->tqc = Txqon(Qno);		/* restart */
+}
+
+static void
+rxreplenish(Ctlr *ctlr)
+{
+	Rx *r;
+	Block *b;
+
+	while(ctlr->rxb[ctlr->rxtail] == nil) {
+		b = rxallocb();
+		if(b == nil) {
+			iprint("etherkw: rxreplenish out of buffers\n");
+			break;
+		}
+
+		ctlr->rxb[ctlr->rxtail] = b;
+
+		/* set up receive descriptor */
+		r = &ctlr->rx[ctlr->rxtail];
+		assert(((uintptr)r & (Descralign - 1)) == 0);
+		r->countsize = Bufsize(Rxblklen);
+		r->buf = PADDR(b->rp);
+		cachedwbse(r, sizeof *r);
+
+		/* and fire */
+		r->cs = RCSdmaown | RCSenableintr;
+		cachedwbse(&r->cs, BY2SE);
+
+		ctlr->rxtail = NEXT(ctlr->rxtail, Nrx);
+	}
+	rxkick(ctlr);
+}
+
+static void
+dump(uchar *bp, long max)
+{
+	if (max > 64)
+		max = 64;
+	for (; max > 0; max--, bp++)
+		iprint("%02.2ux ", *bp);
+	print("...\n");
+}
+
+static void
+etheractive(void)
+{
+	etherstart = TK2MS(MACHP(0)->ticks)/1000;
+}
+
+static void
+ethercheck(void)
+{
+	if (etherstart != 0 &&
+	    TK2MS(MACHP(0)->ticks)/1000 - etherstart > Etherstuck)
+		iprint("ethernet stuck\n");
+}
+
+static void
+receive(Ether *ether)
+{
+	int i;
+	ulong n;
+	Block *b;
+	Ctlr *ctlr = ether->ctlr;
+	Rx *r;
+
+	ethercheck();
+	for (i = Nrx-2; i > 0; i--) {
+		r = &ctlr->rx[ctlr->rxhead];
+		assert(((uintptr)r & (Descralign - 1)) == 0);
+		cachedinvse(r, sizeof *r);
+		if(r->cs & RCSdmaown)
+			break;
+
+		b = ctlr->rxb[ctlr->rxhead];
+		if (b == nil)
+			panic("etherkw: nil ctlr->rxb[ctlr->rxhead] "
+				"in receive");
+		ctlr->rxb[ctlr->rxhead] = nil;
+		ctlr->rxhead = NEXT(ctlr->rxhead, Nrx);
+
+		if((r->cs & (RCSfirst|RCSlast)) != (RCSfirst|RCSlast)) {
+			ctlr->nofirstlast++;
+			freeb(b);
+			continue;
+		}
+		if(r->cs & RCSmacerr) {
+			freeb(b);
+			continue;
+		}
+
+		n = r->countsize >> 16;
+		assert(n >= 2 && n < 2048);
+
+		cachedinvse(b->rp, n);
+		b->wp = b->rp + n;
+		/*
+		 * skip hardware padding to align ipv4 address in memory
+		 * (mv-s104860-u0 §8.3.4.1)
+		 */
+		b->rp += 2;
+		etheriq(ether, b, 1);
+		etheractive();
+		if (i % (Nrx / 2) == 0)
+			rxreplenish(ctlr);
+	}
+	rxreplenish(ctlr);
+}
+
+static void
+txreplenish(Ctlr *ctlr)			/* free transmitted packets */
+{
+	while(ctlr->txtail != ctlr->txhead) {
+		cachedinvse(&ctlr->tx[ctlr->txtail].cs, BY2SE);
+		if(ctlr->tx[ctlr->txtail].cs & TCSdmaown)
+			break;
+		if(ctlr->txb[ctlr->txtail] == nil)
+			panic("no block for sent packet?!");
+		freeb(ctlr->txb[ctlr->txtail]);
+		ctlr->txb[ctlr->txtail] = nil;
+		ctlr->txtail = NEXT(ctlr->txtail, Ntx);
+		etheractive();
+	}
+}
+
+/*
+ * transmit strategy: fill the output ring as far as possible,
+ * perhaps leaving a few spare; kick off the output and take
+ * an interrupt only when the transmit queue is empty.
+ */
+static void
+transmit(Ether *ether)
+{
+	int i, kick, len;
+	Block *b;
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+	Tx *t;
+
+	ethercheck();
+	ilock(ctlr);
+	txreplenish(ctlr);			/* reap old packets */
+
+	/* queue new packets; don't use more than half the tx descs. */
+	kick = 0;
+	for (i = Ntx/2 - 2; i > 0; i--) {
+		t = &ctlr->tx[ctlr->txhead];
+		assert(((uintptr)t & (Descralign - 1)) == 0);
+		cachedinvse(t, sizeof *t);
+		if(t->cs & TCSdmaown) {		/* free descriptor? */
+			ctlr->txringfull++;
+			break;
+		}
+
+		b = qget(ether->oq);		/* outgoing packet? */
+		if (b == nil)
+			break;
+		len = BLEN(b);
+		if(len < ether->minmtu || len > ether->maxmtu) {
+			freeb(b);
+			continue;
+		}
+		ctlr->txb[ctlr->txhead] = b;
+
+		/* set up the transmit descriptor */
+		t->buf = PADDR(b->rp);
+		t->countchk = len << 16;
+		cachedwbse(t, sizeof *t);
+
+		/* and fire */
+		t->cs = TCSpadding | TCSfirst | TCSlast | TCSdmaown |
+			TCSenableintr;
+		cachedwbse(&t->cs, BY2SE);
+
+		kick++;
+		ctlr->txhead = NEXT(ctlr->txhead, Ntx);
+	}
+	if (kick) {
+		txkick(ctlr);
+
+		reg->irqmask  |= Itxendq(Qno);
+		reg->irqemask |= IEtxerrq(Qno) | IEtxunderrun;
+	}
+	iunlock(ctlr);
+}
+
+static void
+dumprxdescs(Ctlr *ctlr)
+{
+	int i;
+	Gbereg *reg = ctlr->reg;
+
+	iprint("\nrxhead %d rxtail %d; txcdp %#p rxcdp %#p\n",
+		ctlr->rxhead, ctlr->rxtail, reg->tcqdp[Qno], reg->crdp[Qno].r);
+	for (i = 0; i < Nrx; i++)
+		iprint("rxb %d @ %#p: %#p\n", i, &ctlr->rxb[i], ctlr->rxb[i]);
+	for (i = 0; i < Nrx; i++)
+		iprint("rx %d @ %#p: cs %#lux countsize %lud buf %#lux next %#lux\n",
+			i, &ctlr->rx[i], ctlr->rx[i].cs,
+			ctlr->rx[i].countsize >> 3, ctlr->rx[i].buf,
+			ctlr->rx[i].next);
+	delay(1000);
+}
+
+static int
+gotinput(void* ctlr)
+{
+	return ((Ctlr*)ctlr)->haveinput != 0;
+}
+
+static void
+rcvproc(void* arg)
+{
+	Ctlr *ctlr;
+	Ether *ether;
+
+	ether = arg;
+	ctlr = ether->ctlr;
+	for(;;){
+		sleep(&ctlr->rrendez, gotinput, ctlr);
+		ctlr->haveinput = 0;
+		receive(ether);
+	}
+}
+
+static void
+interrupt(Ureg*, void *arg)
+{
+	ulong irq, irqe, handled;
+	Ether *ether = arg;
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+	static int linkchg = 0;
+
+	handled = 0;
+	irq = reg->irq;
+	irqe = reg->irqe;
+	reg->irq = 0;				/* extinguish intr causes */
+	reg->irqe = 0;				/* " " " */
+	ethercheck();
+
+	if(irq & Irxbufferq(Qno)) {
+		/*
+		 * letting a kproc process the input takes far less real time
+		 * than doing it all at interrupt level.
+		 */
+		ctlr->haveinput = 1;
+		wakeup(&ctlr->rrendez);
+		handled++;
+	} else
+		rxkick(ctlr);
+
+	if(irq & Itxendq(Qno)) {		/* transmit ring empty? */
+		reg->irqmask  &= ~Itxendq(Qno);	/* prevent more interrupts */
+		reg->irqemask &= ~(IEtxerrq(Qno) | IEtxunderrun);
+		transmit(ether);
+		handled++;
+	}
+
+	if(irqe & IEsum) {
+		/*
+		 * IElinkchg appears to only be set when unplugging.
+		 * autonegotiation is likely not done yet, so linkup not valid,
+		 * thus we note the link change here, and check for
+		 * that and autonegotiation done below.
+		 */
+		if(irqe & IEphystschg) {
+			ether->link = (reg->ps0 & PS0linkup) != 0;
+			linkchg = 1;
+		}
+		if(irqe & IEtxerrq(Qno))
+			ether->oerrs++;
+		if(irqe & IErxoverrun)
+			ether->overflows++;
+		if(irqe & IEtxunderrun)
+			ctlr->txunderrun++;
+		if(irqe & (IEphystschg | IEtxerrq(Qno) | IErxoverrun |
+		    IEtxunderrun))
+			handled++;
+	}
+	if (irq & Isum) {
+		if (irq & Irxerrq(Qno)) {
+			ether->buffs++;		/* approx. error */
+			/* null descriptor pointer or descriptor owned by cpu */
+			panic("etherkw: rx err on queue 0");
+		}
+		if (irq & Irxerr) {
+			ether->buffs++;		/* approx. error */
+			/* null descriptor pointer or descriptor owned by cpu */
+			panic("etherkw: rx err");
+		}
+		if(irq & (Irxerr | Irxerrq(Qno)))
+			handled++;
+	}
+
+	if(linkchg && (reg->ps1 & PS1an_done)) {
+		handled++;
+		ether->link = (reg->ps0 & PS0linkup) != 0;
+		linkchg = 0;
+	}
+	ctlr->newintrs++;
+
+	if (!handled) {
+		irq  &= ~Isum;
+		irqe &= ~IEtxbufferq(Qno);
+		if (irq == 0 && irqe == 0) {
+			/* seems to be triggered by continuous output */
+			// iprint("etherkw: spurious interrupt\n");
+		} else
+			iprint("etherkw: interrupt cause unknown; "
+				"irq %#lux irqe %#lux\n", irq, irqe);
+	}
+	intrclear(Irqlo, IRQ0gbe0sum);
+}
+
+static int prom, mcast;
+
+void
+promiscuous(void *arg, int on)
+{
+	Ether *ether = arg;
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+
+	ilock(ctlr);
+	ether->prom = prom = on;
+	if(prom || mcast)
+		reg->portcfg |= PCFGupromisc;
+	else
+		reg->portcfg &= ~PCFGupromisc;
+	iunlock(ctlr);
+}
+
+void
+multicast(void *arg, uchar *addr, int on)
+{
+	Ether *e = arg;
+	Ctlr *ctlr = e->ctlr;
+	Gbereg *reg = ctlr->reg;
+
+	mcast |= on;
+	USED(addr);
+	ilock(ctlr);
+	if(prom || mcast)
+		reg->portcfg |= PCFGupromisc;		/* overkill */
+	else
+		reg->portcfg &= ~PCFGupromisc;
+	iunlock(ctlr);
+}
+
+static void quiesce(Gbereg *reg);
+
+static void
+shutdown(Ether *ether)
+{
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+
+	ilock(ctlr);
+	quiesce(reg);
+	reg->tcqdp[Qno]  = 0;
+	reg->crdp[Qno].r = 0;
+	reg->psc0 = 0;			/* no PSC0porton */
+	reg->psc1 |= PSC1portreset;
+	iunlock(ctlr);
+	delay(100);
+}
+
+enum {
+	CMjumbo,
+};
+
+static Cmdtab ctlmsg[] = {
+	CMjumbo,	"jumbo",	2,
+};
+
+long
+ctl(Ether *e, void *p, long n)
+{
+	Cmdbuf *cb;
+	Cmdtab *ct;
+	Ctlr *ctlr = e->ctlr;
+	Gbereg *reg = ctlr->reg;
+
+	cb = parsecmd(p, n);
+	if(waserror()) {
+		free(cb);
+		nexterror();
+	}
+
+	ct = lookupcmd(cb, ctlmsg, nelem(ctlmsg));
+	switch(ct->index) {
+	case CMjumbo:
+		if(strcmp(cb->f[1], "on") == 0) {
+			/* incoming packet queue doesn't expect jumbo frames */
+			error("jumbo disabled");
+			reg->psc0 = (reg->psc0 & ~PSC0mrumask) |
+				PSC0mru(PSC0mru9022);
+			e->maxmtu = 9022;
+		} else if(strcmp(cb->f[1], "off") == 0) {
+			reg->psc0 = (reg->psc0 & ~PSC0mrumask) |
+				PSC0mru(PSC0mru1522);
+			e->maxmtu = ETHERMAXTU;
+		} else
+			error(Ebadctl);
+		break;
+	default:
+		error(Ebadctl);
+		break;
+	}
+	free(cb);
+	poperror();
+	return n;
+}
+
+/*
+ * phy/mii goo
+ */
+
+static int
+smibusywait(Gbereg *reg, ulong waitbit)
+{
+	ulong timeout, smi_reg;
+
+	timeout = PhysmiTimeout;
+	/* wait till the SMI is not busy */
+	do {
+		/* read smi register */
+		smi_reg = reg->smi;
+		if (timeout-- == 0) {
+			MIIDBG("SMI busy timeout\n");
+			return -1;
+		}
+//		delay(1);
+	} while (smi_reg & waitbit);
+	return 0;
+}
+
+static int
+miird(Mii *mii, int pa, int ra)
+{
+	ulong smi_reg, timeout;
+	Ctlr *ctlr;
+	Gbereg *reg;
+
+	ctlr = (Ctlr*)mii->ctlr;
+	reg = ctlr->reg;
+
+	/* check to read params */
+	if (pa == 0xEE && ra == 0xEE)
+		return reg->phy & 0xff;
+
+	/* check params */
+	if (((pa<<Physmiaddroff) & ~Physmiaddrmask) ||
+	    ((ra<<SmiRegaddroff) & ~SmiRegaddrmask))
+		return -1;
+
+	smibusywait(reg, PhysmiBusy);
+
+	/* fill the phy address and regiser offset and read opcode */
+	reg->smi = pa << Physmiaddroff | ra << SmiRegaddroff | PhysmiopRd;
+
+	/* wait til read value is ready */
+//	if (smibusywait(reg, PhysmiReadok) < 0)
+//		return -1;
+	timeout = PhysmiTimeout;
+	do {
+		smi_reg = reg->smi;
+		if (timeout-- == 0) {
+			MIIDBG("SMI read-valid timeout\n");
+			return -1;
+		}
+//		delay(1);
+	} while (!(smi_reg & PhysmiReadok));
+
+	/* Wait for the data to update in the SMI register */
+	for (timeout = 0; timeout < PhysmiTimeout; timeout++)
+		;
+	return reg->smi & Physmidatamask;
+}
+
+static int
+miiwr(Mii *mii, int pa, int ra, int v)
+{
+	Ctlr *ctlr;
+	Gbereg *reg;
+	ulong smi_reg;
+
+	ctlr = (Ctlr*)mii->ctlr;
+	reg = ctlr->reg;
+
+	/* check params */
+	if (((pa<<Physmiaddroff) & ~Physmiaddrmask) ||
+	    ((ra<<SmiRegaddroff) & ~SmiRegaddrmask))
+		return -1;
+
+	smibusywait(reg, PhysmiBusy);
+
+	/* fill the phy address and register offset and read opcode */
+	smi_reg = v << Physmidataoff | pa << Physmiaddroff | ra << SmiRegaddroff;
+	smi_reg &= ~PhysmiopRd;
+	reg->smi = smi_reg;
+	return 0;
+}
+
+static int
+kirkwoodmii(Ether *ether)
+{
+	int i;
+	Ctlr *ctlr;
+	MiiPhy *phy;
+
+	MIIDBG("mii\n");
+	ctlr = ether->ctlr;
+	if((ctlr->mii = malloc(sizeof(Mii))) == nil)
+		return -1;
+	ctlr->mii->ctlr = ctlr;
+	ctlr->mii->mir = miird;
+	ctlr->mii->miw = miiwr;
+
+	if(mii(ctlr->mii, ~0) == 0 || (phy = ctlr->mii->curphy) == nil){
+		free(ctlr->mii);
+		ctlr->mii = nil;
+		iprint("etherkw: init mii failure\n");
+		return -1;
+	}
+
+	MIIDBG("oui %X phyno %d\n", phy->oui, phy->phyno);
+	if(miistatus(ctlr->mii) < 0){
+		miireset(ctlr->mii);
+		MIIDBG("miireset\n");
+		if(miiane(ctlr->mii, ~0, 0, ~0) < 0){
+			iprint("miiane failed\n");
+			return -1;
+		}
+		MIIDBG("miistatus\n");
+		miistatus(ctlr->mii);
+		if(miird(ctlr->mii, phy->phyno, Bmsr) & BmsrLs){
+			for(i = 0; ; i++){
+				if(i > 600){
+					iprint("etherkw: autonegotiation failed\n");
+					break;
+				}
+				if(miird(ctlr->mii, phy->phyno, Bmsr) & BmsrAnc)
+					break;
+				delay(10);
+			}
+			if(miistatus(ctlr->mii) < 0)
+				iprint("miistatus failed\n");
+		}else{
+			iprint("etherkw: no link\n");
+			phy->speed = 10;	/* simple default */
+		}
+	}
+
+	ether->mbps = phy->speed;
+//	iprint("etherkw: mii: fd=%d speed=%d tfc=%d rfc=%d\n",
+//		ctlr->port, phy->fd, phy->speed, phy->tfc, phy->rfc);
+	MIIDBG("mii done\n");
+	return 0;
+}
+
+static int
+miiphyinit(Mii *mii)			/* magic numbers 'r' us */
+{
+	ulong reg, devadr;
+
+	/* select mii phy */
+	devadr = miird(mii, 0xEE, 0xEE);
+//	print("devadr %lux\n", devadr);
+	if (devadr == -1) {
+		print("etherkw: can't read PHY dev address\n");
+		return -1;
+	}
+
+	/* leds link & activity */
+	miiwr(mii, devadr, 22, 0x3);
+	reg = miird(mii, devadr, 10);
+	reg &= ~0xf;
+	reg |= 0x1;
+	miiwr(mii, devadr, 10, reg);
+	miiwr(mii, devadr, 22, 0);
+
+	/* enable RGMII delay on Tx and Rx for CPU port */
+	miiwr(mii, devadr, 22, 2);
+	reg = miird(mii, devadr, 21);
+	reg |= (1<<5) | (1<<4);
+	miiwr(mii, devadr, 21, reg);
+	miiwr(mii, devadr, 22, 0);
+	return 0;
+}
+
+/*
+ * initialisation
+ */
+
+static void
+quiesce(Gbereg *reg)
+{
+	ulong v;
+
+	v = reg->tqc;
+	if (v & 0xFF)
+		reg->tqc = v << 8;		/* stop active channels */
+	v = reg->rqc;
+	if (v & 0xFF)
+		reg->rqc = v << 8;		/* stop active channels */
+	/* wait for all queues to stop */
+	while (reg->tqc & 0xFF || reg->rqc & 0xFF)
+		;
+}
+
+static void
+portreset(Gbereg *reg)
+{
+	ulong i;
+
+	quiesce(reg);
+	reg->psc0 &= ~PSC0porton;		/* disable port */
+	reg->psc1 &= ~(PSC1rgmii|PSC1portreset); /* set port & MII active */
+	for (i = 0; i < 4000; i++)		/* magic delay */
+		;
+}
+
+static void
+p16(uchar *p, ulong v)
+{
+	*p++ = v>>8;
+	*p   = v;
+}
+
+static void
+p32(uchar *p, ulong v)
+{
+	*p++ = v>>24;
+	*p++ = v>>16;
+	*p++ = v>>8;
+	*p   = v;
+}
+
+enum {
+	Pass = 1,
+};
+
+/*
+ * set ether->ea from hw mac address,
+ * configure unicast filtering to accept it.
+ */
+void
+archetheraddr(Ether *ether, Gbereg *reg, int queue)
+{
+	ulong nibble, ucreg, tbloff, regoff;
+
+	p32(ether->ea,   reg->macah);
+	p16(ether->ea+4, reg->macal);
+
+	/* accept frames on ea */
+	nibble = ether->ea[5] & 0xf;
+	tbloff = nibble / 4;
+	regoff = nibble % 4;
+
+	regoff *= 8;
+	ucreg = reg->dfut[tbloff];
+	ucreg &= 0xff << regoff;
+	ucreg |= (queue << 1 | Pass) << regoff;
+	reg->dfut[tbloff] = ucreg;
+}
+
+static void
+ctlrinit(Ether *ether)
+{
+	int i;
+	Block *b;
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+	Rx *r;
+	Tx *t;
+	static char name[KNAMELEN];
+	static Ctlr fakectlr;		/* bigger than 4K; keep off the stack */
+
+	ilock(&freeblocks);
+	for(i = 0; i < Nrxblks; i++) {
+		b = iallocb(Rxblklen+Bufalign-1);
+		if(b == nil) {
+			iprint("etherkw: no memory for rx buffers\n");
+			break;
+		}
+		assert(b->ref == 1);
+		b->wp = b->rp = (uchar*)
+			((uintptr)(b->lim - Rxblklen) & ~(Bufalign - 1));
+		assert(((uintptr)b->rp & (Bufalign - 1)) == 0);
+		b->free = rxfreeb;
+		b->next = freeblocks.head;
+		freeblocks.head = b;
+	}
+	iunlock(&freeblocks);
+
+	ctlr->rx = xspanalloc(Nrx * sizeof(Rx), Descralign, 0);
+	if(ctlr->rx == nil)
+		panic("etherkw: no memory for rx ring");
+	for(i = 0; i < Nrx; i++) {
+		r = &ctlr->rx[i];
+		assert(((uintptr)r & (Descralign - 1)) == 0);
+		r->cs = 0;	/* not owned by hardware until r->buf is set */
+		r->buf = 0;
+		r->next = PADDR(&ctlr->rx[NEXT(i, Nrx)]);
+		ctlr->rxb[i] = nil;
+	}
+	ctlr->rxtail = ctlr->rxhead = 0;
+	cachedwb();
+	rxreplenish(ctlr);
+
+	ctlr->tx = xspanalloc(Ntx * sizeof(Tx), Descralign, 0);
+	if(ctlr->tx == nil)
+		panic("etherkw: no memory for tx ring");
+	for(i = 0; i < Ntx; i++) {
+		t = &ctlr->tx[i];
+		assert(((uintptr)t & (Descralign - 1)) == 0);
+		t->cs = 0;
+		t->buf = 0;
+		t->next = PADDR(&ctlr->tx[NEXT(i, Ntx)]);
+		ctlr->txb[i] = nil;
+	}
+	ctlr->txtail = ctlr->txhead = 0;
+	cachedwb();
+
+	/* clear stats by reading them into fake ctlr */
+	getmibstats(&fakectlr);
+
+	reg->pxmfs = MFS64by;
+
+	/*
+	 * ipg's (inter packet gaps) for interrupt coalescing,
+	 * values in units of 64 clock cycles.  A full-sized
+	 * packet (1514 bytes) takes just over 12µs to transmit.
+	 */
+	if (CLOCKFREQ/(Maxrxintrsec*64) >= (1<<16))
+		panic("rx coalescing value %d too big for short",
+			CLOCKFREQ/(Maxrxintrsec*64));
+	reg->sdc = SDCrifb | SDCrxburst(Burst16) | SDCtxburst(Burst16) |
+		SDCrxnobyteswap | SDCtxnobyteswap |
+		SDCipgintrx(CLOCKFREQ/(Maxrxintrsec*64));
+	reg->pxtfut = 0;	// TFUTipginttx(CLOCKFREQ/(Maxrxintrsec*64));
+
+	/* allow just these interrupts */
+	reg->irqmask = Irxbufferq(Qno) | Irxerr | Itxendq(Qno);
+	reg->irqemask = IEtxerrq(Qno) | IEphystschg | IErxoverrun | IEtxunderrun;
+
+	reg->irq = 0;
+	reg->irqe = 0;
+	reg->euirqmask = 0;
+	reg->euirq = 0;
+
+	reg->tcqdp[Qno]  = PADDR(&ctlr->tx[ctlr->txhead]);
+	for (i = 1; i < nelem(reg->tcqdp); i++)
+		reg->tcqdp[i] = 0;
+	reg->crdp[Qno].r = PADDR(&ctlr->rx[ctlr->rxhead]);
+	for (i = 1; i < nelem(reg->crdp); i++)
+		reg->crdp[i].r = 0;
+
+	reg->portcfg = Rxqdefault(Qno) | Rxqarp(Qno);
+	reg->portcfgx = 0;
+
+	reg->psc1 = PSC1rgmii | PSC1encolonbp | PSC1coldomlim(0x23);
+	/* why 1522? 1518 should be enough */
+	reg->psc0 = PSC0porton | PSC0an_flctloff |
+		PSC0an_pauseadv | PSC0nofrclinkdown | PSC0mru(PSC0mru1522);
+
+	ether->link = (reg->ps0 & PS0linkup) != 0;
+
+	/* set ethernet MTU for leaky bucket mechanism to 0 (disabled) */
+	reg->pmtu = 0;
+	reg->rqc = Rxqon(Qno);
+
+	etheractive();
+
+	snprint(name, sizeof name, "#l%drproc", ether->ctlrno);
+	kproc(name, rcvproc, ether);
+}
+
+static void
+attach(Ether* ether)
+{
+	Ctlr *ctlr = ether->ctlr;
+
+	lock(&ctlr->initlock);
+	if(ctlr->init == 0) {
+		ctlrinit(ether);
+		ctlr->init = 1;
+	}
+	unlock(&ctlr->initlock);
+}
+
+/*
+ * statistics goo
+ */
+
+static void
+getmibstats(Ctlr *ctlr)
+{
+	Gbereg *reg = ctlr->reg;
+
+	/*
+	 * rxbyteslo & txbylo seem to return the same as the *hi-variant.
+	 * the docs claim [rt]xby 64 bit.  can we do an atomic 64 bit read?
+	 */
+
+	/* mib registers clear on read, store them */
+	ctlr->rxby	+= reg->rxby;
+	ctlr->badrxby	+= reg->badrxby;
+	ctlr->mactxerr	+= reg->mactxerr;
+	ctlr->rxpkt	+= reg->rxpkt;
+	ctlr->badrxpkt	+= reg->badrxpkt;
+	ctlr->rxbcastpkt+= reg->rxbcastpkt;
+	ctlr->rxmcastpkt+= reg->rxmcastpkt;
+	ctlr->rx64	+= reg->rx64;
+	ctlr->rx65_127	+= reg->rx65_127;
+	ctlr->rx128_255	+= reg->rx128_255;
+	ctlr->rx256_511	+= reg->rx256_511;
+	ctlr->rx512_1023+= reg->rx512_1023;
+	ctlr->rx1024_max+= reg->rx1024_max;
+	ctlr->txby	+= reg->txby;
+	ctlr->txpkt	+= reg->txpkt;
+	ctlr->txcollpktdrop+= reg->txcollpktdrop;
+	ctlr->txmcastpkt+= reg->txmcastpkt;
+	ctlr->txbcastpkt+= reg->txbcastpkt;
+	ctlr->badmacctlpkts+= reg->badmacctlpkts;
+	ctlr->txflctl	+= reg->txflctl;
+	ctlr->rxflctl	+= reg->rxflctl;
+	ctlr->badrxflctl+= reg->badrxflctl;
+	ctlr->rxundersized+= reg->rxundersized;
+	ctlr->rxfrags	+= reg->rxfrags;
+	ctlr->rxtoobig	+= reg->rxtoobig;
+	ctlr->rxjabber	+= reg->rxjabber;
+	ctlr->rxerr	+= reg->rxerr;
+	ctlr->crcerr	+= reg->crcerr;
+	ctlr->collisions+= reg->collisions;
+	ctlr->latecoll	+= reg->latecoll;
+}
+
+long
+ifstat(Ether *ether, void *a, long n, ulong off)
+{
+	Ctlr *ctlr = ether->ctlr;
+	Gbereg *reg = ctlr->reg;
+	char *buf, *p, *e;
+
+	buf = p = malloc(READSTR);
+	e = p + READSTR;
+
+	ilock(ctlr);
+
+	getmibstats(ctlr);
+
+	ctlr->intrs += ctlr->newintrs;
+	p = seprint(p, e, "interrupts: %lud\n", ctlr->intrs);
+	p = seprint(p, e, "new interrupts: %lud\n", ctlr->newintrs);
+	ctlr->newintrs = 0;
+	p = seprint(p, e, "tx underrun: %lud\n", ctlr->txunderrun);
+	p = seprint(p, e, "tx ring full: %lud\n", ctlr->txringfull);
+
+	ctlr->rxdiscard += reg->pxdfc;
+	ctlr->rxoverrun += reg->pxofc;
+	p = seprint(p, e, "rx discarded frames: %lud\n", ctlr->rxdiscard);
+	p = seprint(p, e, "rx overrun frames: %lud\n", ctlr->rxoverrun);
+	p = seprint(p, e, "no first+last flag: %lud\n", ctlr->nofirstlast);
+
+	p = seprint(p, e, "duplex: %s\n", (reg->ps0 & PS0fd)? "full": "half");
+	p = seprint(p, e, "flow control: %s\n", (reg->ps0 & PS0flctl)? "on": "off");
+	/* p = seprint(p, e, "speed: %d mbps\n", ); */
+
+	p = seprint(p, e, "received bytes: %llud\n", ctlr->rxby);
+	p = seprint(p, e, "bad received bytes: %lud\n", ctlr->badrxby);
+	p = seprint(p, e, "internal mac transmit errors: %lud\n", ctlr->mactxerr);
+	p = seprint(p, e, "total received frames: %lud\n", ctlr->rxpkt);
+	p = seprint(p, e, "received broadcast frames: %lud\n", ctlr->rxbcastpkt);
+	p = seprint(p, e, "received multicast frames: %lud\n", ctlr->rxmcastpkt);
+	p = seprint(p, e, "bad received frames: %lud\n", ctlr->badrxpkt);
+	p = seprint(p, e, "received frames 0-64: %lud\n", ctlr->rx64);
+	p = seprint(p, e, "received frames 65-127: %lud\n", ctlr->rx65_127);
+	p = seprint(p, e, "received frames 128-255: %lud\n", ctlr->rx128_255);
+	p = seprint(p, e, "received frames 256-511: %lud\n", ctlr->rx256_511);
+	p = seprint(p, e, "received frames 512-1023: %lud\n", ctlr->rx512_1023);
+	p = seprint(p, e, "received frames 1024-max: %lud\n", ctlr->rx1024_max);
+	p = seprint(p, e, "transmitted bytes: %llud\n", ctlr->txby);
+	p = seprint(p, e, "total transmitted frames: %lud\n", ctlr->txpkt);
+	p = seprint(p, e, "transmitted broadcast frames: %lud\n", ctlr->txbcastpkt);
+	p = seprint(p, e, "transmitted multicast frames: %lud\n", ctlr->txmcastpkt);
+	p = seprint(p, e, "transmit frames dropped by collision: %lud\n", ctlr->txcollpktdrop);
+	p = seprint(p, e, "misaligned buffers: %lud\n", ether->pktsmisaligned);
+
+	p = seprint(p, e, "bad mac control frames: %lud\n", ctlr->badmacctlpkts);
+	p = seprint(p, e, "transmitted flow control messages: %lud\n", ctlr->txflctl);
+	p = seprint(p, e, "received flow control messages: %lud\n", ctlr->rxflctl);
+	p = seprint(p, e, "bad received flow control messages: %lud\n", ctlr->badrxflctl);
+	p = seprint(p, e, "received undersized packets: %lud\n", ctlr->rxundersized);
+	p = seprint(p, e, "received fragments: %lud\n", ctlr->rxfrags);
+	p = seprint(p, e, "received oversized packets: %lud\n", ctlr->rxtoobig);
+	p = seprint(p, e, "received jabber packets: %lud\n", ctlr->rxjabber);
+	p = seprint(p, e, "mac receive errors: %lud\n", ctlr->rxerr);
+	p = seprint(p, e, "crc errors: %lud\n", ctlr->crcerr);
+	p = seprint(p, e, "collisions: %lud\n", ctlr->collisions);
+	p = seprint(p, e, "late collisions: %lud\n", ctlr->latecoll);
+	USED(p);
+	iunlock(ctlr);
+
+	n = readstr(off, a, n, buf);
+	free(buf);
+	return n;
+}
+
+
+static int
+reset(Ether *ether)
+{
+	Ctlr *ctlr;
+
+	ctlr = malloc(sizeof *ctlr);
+	memset(ctlr, 0, sizeof *ctlr);
+	ether->ctlr = ctlr;
+	switch(ether->ctlrno) {
+	case 0:
+		ctlr->reg = (Gbereg*)Gberegs;
+		break;
+	default:
+		panic("etherkirdwood: bad ether ctlr #%d", ether->ctlrno);
+	}
+
+	/* io cfg 0: 1.8v gbe */
+//	*(ulong *)0xf10100e0 |= 1 << 7 | 1 << 15;
+
+	portreset(ctlr->reg);
+
+	/* Set phy address of the port */
+	ctlr->port = ether->ctlrno;
+	ctlr->reg->phy = ether->ctlrno;
+	ether->port = (uintptr)ctlr->reg;
+	ether->irq = IRQ0gbe0sum;
+
+	if(kirkwoodmii(ether) < 0){
+		free(ctlr);
+		ether->ctlr = nil;
+		return -1;
+	}
+	miiphyinit(ctlr->mii);
+	archetheraddr(ether, ctlr->reg, 0);	/* 0 is the rx queue */
+
+	ether->attach = attach;
+	ether->transmit = transmit;
+	ether->interrupt = interrupt;
+	ether->ifstat = ifstat;
+	ether->shutdown = shutdown;
+	ether->ctl = ctl;
+
+	ether->arg = ether;
+	ether->promiscuous = promiscuous;
+	ether->multicast = multicast;
+
+	return 0;
+}
+
+void
+etherkwlink(void)
+{
+	addethercard("kirkwood", reset);
+}

+ 235 - 0
sys/src/9/kw/ethermii.c

@@ -0,0 +1,235 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+
+int
+mii(Mii* mii, int mask)
+{
+	MiiPhy *miiphy;
+	int bit, oui, phyno, r, rmask;
+
+	/*
+	 * Probe through mii for PHYs in mask;
+	 * return the mask of those found in the current probe.
+	 * If the PHY has not already been probed, update
+	 * the Mii information.
+	 */
+	rmask = 0;
+	for(phyno = 0; phyno < NMiiPhy; phyno++){
+		bit = 1<<phyno;
+		if(!(mask & bit))
+			continue;
+		if(mii->mask & bit){
+			rmask |= bit;
+			continue;
+		}
+		if(mii->mir(mii, phyno, Bmsr) == -1)
+			continue;
+		r = mii->mir(mii, phyno, Phyidr1);
+		oui = (r & 0x3FFF)<<6;
+		r = mii->mir(mii, phyno, Phyidr2);
+		oui |= r>>10;
+		if(oui == 0xFFFFF || oui == 0)
+			continue;
+
+		if((miiphy = malloc(sizeof(MiiPhy))) == nil)
+			continue;
+
+		miiphy->mii = mii;
+		miiphy->oui = oui;
+		miiphy->phyno = phyno;
+
+		miiphy->anar = ~0;
+		miiphy->fc = ~0;
+		miiphy->mscr = ~0;
+
+		mii->phy[phyno] = miiphy;
+		if(mii->curphy == nil)
+			mii->curphy = miiphy;
+		mii->mask |= bit;
+		mii->nphy++;
+
+		rmask |= bit;
+	}
+	return rmask;
+}
+
+int
+miimir(Mii* mii, int r)
+{
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	return mii->mir(mii, mii->curphy->phyno, r);
+}
+
+int
+miimiw(Mii* mii, int r, int data)
+{
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	return mii->miw(mii, mii->curphy->phyno, r, data);
+}
+
+int
+miireset(Mii* mii)
+{
+	int bmcr;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	bmcr = mii->mir(mii, mii->curphy->phyno, Bmcr);
+	bmcr |= BmcrR;
+	mii->miw(mii, mii->curphy->phyno, Bmcr, bmcr);
+	microdelay(1);
+
+	return 0;
+}
+
+int
+miiane(Mii* mii, int a, int p, int e)
+{
+	int anar, bmsr, mscr, r, phyno;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	phyno = mii->curphy->phyno;
+
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & BmsrAna))
+		return -1;
+
+	if(a != ~0)
+		anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a;
+	else if(mii->curphy->anar != ~0)
+		anar = mii->curphy->anar;
+	else{
+		anar = mii->mir(mii, phyno, Anar);
+		anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD);
+		if(bmsr & Bmsr10THD)
+			anar |= Ana10HD;
+		if(bmsr & Bmsr10TFD)
+			anar |= Ana10FD;
+		if(bmsr & Bmsr100TXHD)
+			anar |= AnaTXHD;
+		if(bmsr & Bmsr100TXFD)
+			anar |= AnaTXFD;
+	}
+	mii->curphy->anar = anar;
+
+	if(p != ~0)
+		anar |= (AnaAP|AnaP) & p;
+	else if(mii->curphy->fc != ~0)
+		anar |= mii->curphy->fc;
+	mii->curphy->fc = (AnaAP|AnaP) & anar;
+
+	if(bmsr & BmsrEs){
+		mscr = mii->mir(mii, phyno, Mscr);
+		mscr &= ~(Mscr1000TFD|Mscr1000THD);
+		if(e != ~0)
+			mscr |= (Mscr1000TFD|Mscr1000THD) & e;
+		else if(mii->curphy->mscr != ~0)
+			mscr = mii->curphy->mscr;
+		else{
+			r = mii->mir(mii, phyno, Esr);
+			if(r & Esr1000THD)
+				mscr |= Mscr1000THD;
+			if(r & Esr1000TFD)
+				mscr |= Mscr1000TFD;
+		}
+		mii->curphy->mscr = mscr;
+		mii->miw(mii, phyno, Mscr, mscr);
+	}
+	mii->miw(mii, phyno, Anar, anar);
+
+	r = mii->mir(mii, phyno, Bmcr);
+	if(!(r & BmcrR)){
+		r |= BmcrAne|BmcrRan;
+		mii->miw(mii, phyno, Bmcr, r);
+	}
+
+	return 0;
+}
+
+int
+miistatus(Mii* mii)
+{
+	MiiPhy *phy;
+	int anlpar, bmsr, p, r, phyno;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	phy = mii->curphy;
+	phyno = phy->phyno;
+
+	/*
+	 * Check Auto-Negotiation is complete and link is up.
+	 * (Read status twice as the Ls bit is sticky).
+	 */
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & (BmsrAnc|BmsrAna))) {
+		// print("miistatus: auto-neg incomplete\n");
+		return -1;
+	}
+
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & BmsrLs)){
+		// print("miistatus: link down\n");
+		phy->link = 0;
+		return -1;
+	}
+
+	phy->speed = phy->fd = phy->rfc = phy->tfc = 0;
+	if(phy->mscr){
+		r = mii->mir(mii, phyno, Mssr);
+		if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){
+			phy->speed = 1000;
+			phy->fd = 1;
+		}
+		else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD))
+			phy->speed = 1000;
+	}
+
+	anlpar = mii->mir(mii, phyno, Anlpar);
+	if(phy->speed == 0){
+		r = phy->anar & anlpar;
+		if(r & AnaTXFD){
+			phy->speed = 100;
+			phy->fd = 1;
+		}
+		else if(r & AnaTXHD)
+			phy->speed = 100;
+		else if(r & Ana10FD){
+			phy->speed = 10;
+			phy->fd = 1;
+		}
+		else if(r & Ana10HD)
+			phy->speed = 10;
+	}
+	if(phy->speed == 0) {
+		// print("miistatus: phy speed 0\n");
+		return -1;
+	}
+
+	if(phy->fd){
+		p = phy->fc;
+		r = anlpar & (AnaAP|AnaP);
+		if(p == AnaAP && r == (AnaAP|AnaP))
+			phy->tfc = 1;
+		else if(p == (AnaAP|AnaP) && r == AnaAP)
+			phy->rfc = 1;
+		else if((p & AnaP) && (r & AnaP))
+			phy->rfc = phy->tfc = 1;
+	}
+
+	phy->link = 1;
+
+	return 0;
+}

+ 116 - 0
sys/src/9/kw/ethermii.h

@@ -0,0 +1,116 @@
+typedef struct Mii Mii;
+typedef struct MiiPhy MiiPhy;
+
+enum {					/* registers */
+	Bmcr		= 0x00,		/* Basic Mode Control */
+	Bmsr		= 0x01,		/* Basic Mode Status */
+	Phyidr1		= 0x02,		/* PHY Identifier #1 */
+	Phyidr2		= 0x03,		/* PHY Identifier #2 */
+	Anar		= 0x04,		/* Auto-Negotiation Advertisement */
+	Anlpar		= 0x05,		/* AN Link Partner Ability */
+	Aner		= 0x06,		/* AN Expansion */
+	Annptr		= 0x07,		/* AN Next Page TX */
+	Annprr		= 0x08,		/* AN Next Page RX */
+	Mscr		= 0x09,		/* MASTER-SLAVE Control */
+	Mssr		= 0x0A,		/* MASTER-SLAVE Status */
+	Esr		= 0x0F,		/* Extended Status */
+
+	NMiiPhyr	= 32,
+	NMiiPhy		= 32,
+};
+
+enum {					/* Bmcr */
+	BmcrSs1		= 0x0040,	/* Speed Select[1] */
+	BmcrCte		= 0x0080,	/* Collision Test Enable */
+	BmcrDm		= 0x0100,	/* Duplex Mode */
+	BmcrRan		= 0x0200,	/* Restart Auto-Negotiation */
+	BmcrI		= 0x0400,	/* Isolate */
+	BmcrPd		= 0x0800,	/* Power Down */
+	BmcrAne		= 0x1000,	/* Auto-Negotiation Enable */
+	BmcrSs0		= 0x2000,	/* Speed Select[0] */
+	BmcrLe		= 0x4000,	/* Loopback Enable */
+	BmcrR		= 0x8000,	/* Reset */
+};
+
+enum {					/* Bmsr */
+	BmsrEc		= 0x0001,	/* Extended Capability */
+	BmsrJd		= 0x0002,	/* Jabber Detect */
+	BmsrLs		= 0x0004,	/* Link Status */
+	BmsrAna		= 0x0008,	/* Auto-Negotiation Ability */
+	BmsrRf		= 0x0010,	/* Remote Fault */
+	BmsrAnc		= 0x0020,	/* Auto-Negotiation Complete */
+	BmsrPs		= 0x0040,	/* Preamble Suppression Capable */
+	BmsrEs		= 0x0100,	/* Extended Status */
+	Bmsr100T2HD	= 0x0200,	/* 100BASE-T2 HD Capable */
+	Bmsr100T2FD	= 0x0400,	/* 100BASE-T2 FD Capable */
+	Bmsr10THD	= 0x0800,	/* 10BASE-T HD Capable */
+	Bmsr10TFD	= 0x1000,	/* 10BASE-T FD Capable */
+	Bmsr100TXHD	= 0x2000,	/* 100BASE-TX HD Capable */
+	Bmsr100TXFD	= 0x4000,	/* 100BASE-TX FD Capable */
+	Bmsr100T4	= 0x8000,	/* 100BASE-T4 Capable */
+};
+
+enum {					/* Anar/Anlpar */
+	Ana10HD		= 0x0020,	/* Advertise 10BASE-T */
+	Ana10FD		= 0x0040,	/* Advertise 10BASE-T FD */
+	AnaTXHD		= 0x0080,	/* Advertise 100BASE-TX */
+	AnaTXFD		= 0x0100,	/* Advertise 100BASE-TX FD */
+	AnaT4		= 0x0200,	/* Advertise 100BASE-T4 */
+	AnaP		= 0x0400,	/* Pause */
+	AnaAP		= 0x0800,	/* Asymmetrical Pause */
+	AnaRf		= 0x2000,	/* Remote Fault */
+	AnaAck		= 0x4000,	/* Acknowledge */
+	AnaNp		= 0x8000,	/* Next Page Indication */
+};
+
+enum {					/* Mscr */
+	Mscr1000THD	= 0x0100,	/* Advertise 1000BASE-T HD */
+	Mscr1000TFD	= 0x0200,	/* Advertise 1000BASE-T FD */
+};
+
+enum {					/* Mssr */
+	Mssr1000THD	= 0x0400,	/* Link Partner 1000BASE-T HD able */
+	Mssr1000TFD	= 0x0800,	/* Link Partner 1000BASE-T FD able */
+};
+
+enum {					/* Esr */
+	Esr1000THD	= 0x1000,	/* 1000BASE-T HD Capable */
+	Esr1000TFD	= 0x2000,	/* 1000BASE-T FD Capable */
+	Esr1000XHD	= 0x4000,	/* 1000BASE-X HD Capable */
+	Esr1000XFD	= 0x8000,	/* 1000BASE-X FD Capable */
+};
+
+typedef struct Mii {
+	Lock;
+	int	nphy;
+	int	mask;
+	MiiPhy*	phy[NMiiPhy];
+	MiiPhy*	curphy;
+
+	void*	ctlr;
+	int	(*mir)(Mii*, int, int);
+	int	(*miw)(Mii*, int, int, int);
+} Mii;
+
+typedef struct MiiPhy {
+	Mii*	mii;
+	int	oui;
+	int	phyno;
+
+	int	anar;
+	int	fc;
+	int	mscr;
+
+	int	link;
+	int	speed;
+	int	fd;
+	int	rfc;
+	int	tfc;
+};
+
+extern int mii(Mii*, int);
+extern int miiane(Mii*, int, int, int);
+extern int miimir(Mii*, int);
+extern int miimiw(Mii*, int, int);
+extern int miireset(Mii*);
+extern int miistatus(Mii*);

+ 180 - 0
sys/src/9/kw/fns.h

@@ -0,0 +1,180 @@
+#define checkmmu(a, b)
+#define countpagerefs(a, b)
+
+#include "../port/portfns.h"
+
+extern int led(int, int);
+extern void ledexit(int);
+extern void delay(int);
+extern void _uartputs(char*, int);
+extern int _uartprint(char*, ...);
+extern void uartkirkwoodconsole(void);
+extern void serialputs(char *, int);
+extern void serialputc(int c);
+
+#pragma	varargck argpos	_uartprint 1
+
+extern void archreboot(void);
+extern void archconfinit(void);
+extern void archreset(void);
+extern void barriers(void);
+extern void cachedinv(void);
+extern void cachedinvse(void*, int);
+extern void cachedwb(void);
+extern void cachedwbinv(void);
+extern void cachedwbinvse(void*, int);
+extern void cachedwbse(void*, int);
+extern void cacheiinv(void);
+extern void cacheuwbinv(void);
+extern uintptr cankaddr(uintptr pa);
+int	cmpswap(long*, long, long);
+
+/*
+ * cachedwb seems like overkill, but just calling barriers isn't enough.
+ */
+#define coherence cachedwb
+// #define coherence barriers
+
+extern u32int controlget(void);
+extern u32int cpctget(void);
+extern u32int cpidget(void);
+extern char *cputype2name(char *, int);
+extern void cpuidprint(void);
+#define cycles(ip) *(ip) = lcycles()
+extern u32int dacget(void);
+extern void dacput(u32int);
+extern void dcflushall(void);
+extern u32int farget(void);
+extern u32int fsrget(void);
+extern void icflushall(void);
+extern void l1cachesoff(void);
+extern void l1cacheson(void);
+extern void l2cachecfgon(void);
+extern void l2cacheon(void);
+extern void lastresortprint(char *buf, long bp);
+extern void mmuinvalidate(void);		/* 'mmu' or 'tlb'? */
+extern void mmuinvalidateaddr(u32int);		/* 'mmu' or 'tlb'? */
+extern u32int pidget(void);
+extern void pidput(u32int);
+void	procrestore(Proc *);
+void	procsave(Proc*);
+void	procsetup(Proc*);
+extern void _reset(void);
+extern void setr13(int, u32int*);
+#define tas _tas
+extern int _tas(ulong *);
+extern u32int ttbget(void);
+extern void ttbput(u32int);
+
+Dev*		devtabget(int, int);
+void		devtabinit(void);
+void		devtabreset(void);
+long		devtabread(Chan*, void*, long, vlong);
+void		devtabshutdown(void);
+
+extern void intrclear(int sort, int v);
+extern void intrenable(int sort, int v, void (*f)(Ureg*, void*), void *a, char *name);
+extern void intrdisable(int sort, int v, void (*f)(Ureg*, void*), void* a, char *name);
+extern void vectors(void);
+extern void vtable(void);
+
+/*
+ * Things called in main.
+ */
+extern void clockinit(void);
+extern void i8250console(void);
+extern void links(void);
+extern void mmuinit(void);
+extern void touser(uintptr);
+extern void trapinit(void);
+
+extern int fpiarm(Ureg*);
+extern int fpudevprocio(Proc*, void*, long, uintptr, int);
+extern void fpuinit(void);
+extern void fpunoted(void);
+extern void fpunotify(Ureg*);
+extern void fpuprocrestore(Proc*);
+extern void fpuprocsave(Proc*);
+extern void fpusysprocsetup(Proc*);
+extern void fpusysrfork(Ureg*);
+extern void fpusysrforkchild(Proc*, Proc*);
+extern int fpuemu(Ureg*);
+
+/*
+ * Miscellaneous machine dependent stuff.
+ */
+extern char* getenv(char*, char*, int);
+char*	getconf(char*);
+uintptr mmukmap(uintptr, uintptr, usize);
+uintptr mmukunmap(uintptr, uintptr, usize);
+extern void* mmuuncache(void*, usize);
+extern void* ucalloc(usize);
+extern void ucfree(void*);
+
+/*
+ * Things called from port.
+ */
+extern void delay(int);				/* only scheddump() */
+extern int islo(void);
+extern void microdelay(int);			/* only edf.c */
+extern void evenaddr(uintptr);
+extern void idlehands(void);
+extern void setkernur(Ureg*, Proc*);		/* only devproc.c */
+extern void spldone(void);
+extern int splfhi(void);
+extern int splflo(void);
+extern void sysprocsetup(Proc*);
+
+/*
+ * PCI
+ */
+ulong	pcibarsize(Pcidev*, int);
+void	pcibussize(Pcidev*, ulong*, ulong*);
+int	pcicfgr8(Pcidev*, int);
+int	pcicfgr16(Pcidev*, int);
+int	pcicfgr32(Pcidev*, int);
+void	pcicfgw8(Pcidev*, int, int);
+void	pcicfgw16(Pcidev*, int, int);
+void	pcicfgw32(Pcidev*, int, int);
+void	pciclrbme(Pcidev*);
+void	pciclrioe(Pcidev*);
+void	pciclrmwi(Pcidev*);
+int	pcigetpms(Pcidev*);
+void	pcihinv(Pcidev*);
+uchar	pciipin(Pcidev*, uchar);
+Pcidev* pcimatch(Pcidev*, int, int);
+Pcidev* pcimatchtbdf(int);
+void	pcireset(void);
+int	pciscan(int, Pcidev**);
+void	pcisetbme(Pcidev*);
+void	pcisetioe(Pcidev*);
+void	pcisetmwi(Pcidev*);
+int	pcisetpms(Pcidev*, int);
+int	cas32(void*, u32int, u32int);
+int	tas32(void*);
+
+#define CASU(p, e, n)	cas32((p), (u32int)(e), (u32int)(n))
+#define CASV(p, e, n)	cas32((p), (u32int)(e), (u32int)(n))
+#define CASW(addr, exp, new)	cas32((addr), (exp), (new))
+#define TAS(addr)	tas32(addr)
+
+extern void forkret(void);
+extern int userureg(Ureg*);
+void*	vmap(uintptr, usize);
+void	vunmap(void*, usize);
+
+extern void kexit(Ureg*);
+
+#define	getpgcolor(a)	0
+#define	kmapinval()
+
+#define PTR2UINT(p)	((uintptr)(p))
+#define UINT2PTR(i)	((void*)(i))
+
+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+
+/*
+ * These are not good enough.
+ */
+#define KADDR(pa)	UINT2PTR(KZERO|((uintptr)(pa)))
+#define PADDR(va)	PTR2UINT(((uintptr)(va)) & ~KSEGM)

+ 300 - 0
sys/src/9/kw/fpi.c

@@ -0,0 +1,300 @@
+/*
+ * Floating Point Interpreter.
+ * shamelessly stolen from an original by ark.
+ */
+#include "fpi.h"
+
+void
+fpiround(Internal *i)
+{
+	unsigned long guard;
+
+	guard = i->l & GuardMask;
+	i->l &= ~GuardMask;
+	if(guard > (LsBit>>1) || (guard == (LsBit>>1) && (i->l & LsBit))){
+		i->l += LsBit;
+		if(i->l & CarryBit){
+			i->l &= ~CarryBit;
+			i->h++;
+			if(i->h & CarryBit){
+				if (i->h & 0x01)
+					i->l |= CarryBit;
+				i->l >>= 1;
+				i->h >>= 1;
+				i->e++;
+			}
+		}
+	}
+}
+
+static void
+matchexponents(Internal *x, Internal *y)
+{
+	int count;
+
+	count = y->e - x->e;
+	x->e = y->e;
+	if(count >= 2*FractBits){
+		x->l = x->l || x->h;
+		x->h = 0;
+		return;
+	}
+	if(count >= FractBits){
+		count -= FractBits;
+		x->l = x->h|(x->l != 0);
+		x->h = 0;
+	}
+	while(count > 0){
+		count--;
+		if(x->h & 0x01)
+			x->l |= CarryBit;
+		if(x->l & 0x01)
+			x->l |= 2;
+		x->l >>= 1;
+		x->h >>= 1;
+	}
+}
+
+static void
+shift(Internal *i)
+{
+	i->e--;
+	i->h <<= 1;
+	i->l <<= 1;
+	if(i->l & CarryBit){
+		i->l &= ~CarryBit;
+		i->h |= 0x01;
+	}
+}
+
+static void
+normalise(Internal *i)
+{
+	while((i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+static void
+renormalise(Internal *i)
+{
+	if(i->e < -2 * FractBits)
+		i->e = -2 * FractBits;
+	while(i->e < 1){
+		i->e++;
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->h >>= 1;
+		i->l = (i->l>>1)|(i->l & 0x01);
+	}
+	if(i->e >= ExpInfinity)
+		SetInfinity(i);
+}
+
+void
+fpinormalise(Internal *x)
+{
+	if(!IsWeird(x) && !IsZero(x))
+		normalise(x);
+}
+
+void
+fpiadd(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	i->s = x->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	if(x->e > y->e){
+		t = x;
+		x = y;
+		y = t;
+	}
+	matchexponents(x, y);
+	i->e = x->e;
+	i->h = x->h + y->h;
+	i->l = x->l + y->l;
+	if(i->l & CarryBit){
+		i->h++;
+		i->l &= ~CarryBit;
+	}
+	if(i->h & (HiddenBit<<1)){
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->l = (i->l>>1)|(i->l & 0x01);
+		i->h >>= 1;
+		i->e++;
+	}
+	if(IsWeird(i))
+		SetInfinity(i);
+}
+
+void
+fpisub(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	if(y->e < x->e
+	   || (y->e == x->e && (y->h < x->h || (y->h == x->h && y->l < x->l)))){
+		t = x;
+		x = y;
+		y = t;
+	}
+	i->s = y->s;
+	if(IsNaN(y)){
+		SetQNaN(i);
+		return;
+	}
+	if(IsInfinity(y)){
+		if(IsInfinity(x))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	matchexponents(x, y);
+	i->e = y->e;
+	i->h = y->h - x->h;
+	i->l = y->l - x->l;
+	if(i->l < 0){
+		i->l += CarryBit;
+		i->h--;
+	}
+	if(i->h == 0 && i->l == 0)
+		SetZero(i);
+	else while(i->e > 1 && (i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+#define	CHUNK		(FractBits/2)
+#define	CMASK		((1<<CHUNK)-1)
+#define	HI(x)		((short)((x)>>CHUNK) & CMASK)
+#define	LO(x)		((short)(x) & CMASK)
+#define	SPILL(x)	((x)>>CHUNK)
+#define	M(x, y)		((long)a[x]*(long)b[y])
+#define	C(h, l)		(((long)((h) & CMASK)<<CHUNK)|((l) & CMASK))
+
+void
+fpimul(Internal *x, Internal *y, Internal *i)
+{
+	long a[4], b[4], c[7], f[4];
+
+	i->s = x->s^y->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y) || IsZero(x) || IsZero(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	else if(IsZero(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->e = x->e + y->e - (ExpBias - 1);
+
+	a[0] = HI(x->h); b[0] = HI(y->h);
+	a[1] = LO(x->h); b[1] = LO(y->h);
+	a[2] = HI(x->l); b[2] = HI(y->l);
+	a[3] = LO(x->l); b[3] = LO(y->l);
+
+	c[6] =                               M(3, 3);
+	c[5] =                     M(2, 3) + M(3, 2) + SPILL(c[6]);
+	c[4] =           M(1, 3) + M(2, 2) + M(3, 1) + SPILL(c[5]);
+	c[3] = M(0, 3) + M(1, 2) + M(2, 1) + M(3, 0) + SPILL(c[4]);
+	c[2] = M(0, 2) + M(1, 1) + M(2, 0)           + SPILL(c[3]);
+	c[1] = M(0, 1) + M(1, 0)                     + SPILL(c[2]);
+	c[0] = M(0, 0)                               + SPILL(c[1]);
+
+	f[0] = c[0];
+	f[1] = C(c[1], c[2]);
+	f[2] = C(c[3], c[4]);
+	f[3] = C(c[5], c[6]);
+
+	if((f[0] & HiddenBit) == 0){
+		f[0] <<= 1;
+		f[1] <<= 1;
+		f[2] <<= 1;
+		f[3] <<= 1;
+		if(f[1] & CarryBit){
+			f[0] |= 1;
+			f[1] &= ~CarryBit;
+		}
+		if(f[2] & CarryBit){
+			f[1] |= 1;
+			f[2] &= ~CarryBit;
+		}
+		if(f[3] & CarryBit){
+			f[2] |= 1;
+			f[3] &= ~CarryBit;
+		}
+		i->e--;
+	}
+	i->h = f[0];
+	i->l = f[1];
+	if(f[2] || f[3])
+		i->l |= 1;
+	renormalise(i);
+}
+
+void
+fpidiv(Internal *x, Internal *y, Internal *i)
+{
+	i->s = x->s^y->s;
+	if(IsNaN(x) || IsNaN(y)
+	   || (IsInfinity(x) && IsInfinity(y)) || (IsZero(x) && IsZero(y))){
+		SetQNaN(i);
+		return;
+	}
+	else if(IsZero(x) || IsInfinity(y)){
+		SetInfinity(i);
+		return;
+	}
+	else if(IsInfinity(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->h = 0;
+	i->l = 0;
+	i->e = y->e - x->e + (ExpBias + 2*FractBits - 1);
+	do{
+		if(y->h > x->h || (y->h == x->h && y->l >= x->l)){
+			i->l |= 0x01;
+			y->h -= x->h;
+			y->l -= x->l;
+			if(y->l < 0){
+				y->l += CarryBit;
+				y->h--;
+			}
+		}
+		shift(y);
+		shift(i);
+	}while ((i->h & HiddenBit) == 0);
+	if(y->h || y->l)
+		i->l |= 0x01;
+	renormalise(i);
+}
+
+int
+fpicmp(Internal *x, Internal *y)
+{
+	if(IsNaN(x) && IsNaN(y))
+		return 0;
+	if(IsInfinity(x) && IsInfinity(y))
+		return y->s - x->s;
+	if(x->e == y->e && x->h == y->h && x->l == y->l)
+		return y->s - x->s;
+	if(x->e < y->e
+	   || (x->e == y->e && (x->h < y->h || (x->h == y->h && x->l < y->l))))
+		return y->s ? 1: -1;
+	return x->s ? -1: 1;
+}

+ 61 - 0
sys/src/9/kw/fpi.h

@@ -0,0 +1,61 @@
+typedef long Word;
+typedef unsigned long Single;
+typedef struct {
+	unsigned long l;
+	unsigned long h;
+} Double;
+
+enum {
+	FractBits	= 28,
+	CarryBit	= 0x10000000,
+	HiddenBit	= 0x08000000,
+	MsBit		= HiddenBit,
+	NGuardBits	= 3,
+	GuardMask	= 0x07,
+	LsBit		= (1<<NGuardBits),
+
+	SingleExpBias	= 127,
+	SingleExpMax	= 255,
+	DoubleExpBias	= 1023,
+	DoubleExpMax	= 2047,
+
+	ExpBias		= DoubleExpBias,
+	ExpInfinity	= DoubleExpMax,
+};
+
+typedef struct {
+	unsigned char s;
+	short e;
+	long l;				/* 0000FFFFFFFFFFFFFFFFFFFFFFFFFGGG */
+	long h;				/* 0000HFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+} Internal;
+
+#define IsWeird(n)	((n)->e >= ExpInfinity)
+#define	IsInfinity(n)	(IsWeird(n) && (n)->h == HiddenBit && (n)->l == 0)
+#define	SetInfinity(n)	((n)->e = ExpInfinity, (n)->h = HiddenBit, (n)->l = 0)
+#define IsNaN(n)	(IsWeird(n) && (((n)->h & ~HiddenBit) || (n)->l))
+#define	SetQNaN(n)	((n)->s = 0, (n)->e = ExpInfinity, 		\
+			 (n)->h = HiddenBit|(LsBit<<1), (n)->l = 0)
+#define IsZero(n)	((n)->e == 1 && (n)->h == 0 && (n)->l == 0)
+#define SetZero(n)	((n)->e = 1, (n)->h = 0, (n)->l = 0)
+
+/*
+ * fpi.c
+ */
+extern void fpiround(Internal *);
+extern void fpiadd(Internal *, Internal *, Internal *);
+extern void fpisub(Internal *, Internal *, Internal *);
+extern void fpimul(Internal *, Internal *, Internal *);
+extern void fpidiv(Internal *, Internal *, Internal *);
+extern int fpicmp(Internal *, Internal *);
+extern void fpinormalise(Internal*);
+
+/*
+ * fpimem.c
+ */
+extern void fpis2i(Internal *, void *);
+extern void fpid2i(Internal *, void *);
+extern void fpiw2i(Internal *, void *);
+extern void fpii2s(void *, Internal *);
+extern void fpii2d(void *, Internal *);
+extern void fpii2w(Word *, Internal *);

+ 576 - 0
sys/src/9/kw/fpiarm.c

@@ -0,0 +1,576 @@
+/*
+ * this doesn't attempt to implement ARM floating-point properties
+ * that aren't visible in the Inferno environment.
+ * all arithmetic is done in double precision.
+ * the FP trap status isn't updated.
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+#include	"ureg.h"
+
+#include	"arm.h"
+#include	"fpi.h"
+
+/* undef this if correct kernel r13 isn't in Ureg;
+ * check calculation in fpiarm below
+ */
+
+
+#define	REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
+#define	FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&7])
+
+typedef struct FP2 FP2;
+typedef struct FP1 FP1;
+
+struct FP2 {
+	char*	name;
+	void	(*f)(Internal, Internal, Internal*);
+};
+
+struct FP1 {
+	char*	name;
+	void	(*f)(Internal*, Internal*);
+};
+
+enum {
+	N = 1<<31,
+	Z = 1<<30,
+	C = 1<<29,
+	V = 1<<28,
+	REGPC = 15,
+};
+
+enum {
+	fpemudebug = 0,
+};
+
+#undef OFR
+#define	OFR(X)	((ulong)&((Ureg*)0)->X)
+
+static	int	roff[] = {
+	OFR(r0), OFR(r1), OFR(r2), OFR(r3),
+	OFR(r4), OFR(r5), OFR(r6), OFR(r7),
+	OFR(r8), OFR(r9), OFR(r10), OFR(r11),
+	OFR(r12), OFR(r13), OFR(r14), OFR(pc),
+};
+
+static Internal fpconst[8] = {	/* indexed by op&7 */
+	/* s, e, l, h */
+	{0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
+	{0, 0x3FF, 0x00000000, 0x08000000},	/* 1.0 */
+	{0, 0x400, 0x00000000, 0x08000000},	/* 2.0 */
+	{0, 0x400, 0x00000000, 0x0C000000},	/* 3.0 */
+	{0, 0x401, 0x00000000, 0x08000000},	/* 4.0 */
+	{0, 0x401, 0x00000000, 0x0A000000},	/* 5.0 */
+	{0, 0x3FE, 0x00000000, 0x08000000},	/* 0.5 */
+	{0, 0x402, 0x00000000, 0x0A000000},	/* 10.0 */
+};
+
+/*
+ * arm binary operations
+ */
+
+static void
+fadd(Internal m, Internal n, Internal *d)
+{
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsub(Internal m, Internal n, Internal *d)
+{
+	m.s ^= 1;
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsubr(Internal m, Internal n, Internal *d)
+{
+	n.s ^= 1;
+	(n.s == m.s? fpiadd: fpisub)(&n, &m, d);
+}
+
+static void
+fmul(Internal m, Internal n, Internal *d)
+{
+	fpimul(&m, &n, d);
+}
+
+static void
+fdiv(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&m, &n, d);
+}
+
+static void
+fdivr(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&n, &m, d);
+}
+
+/*
+ * arm unary operations
+ */
+
+static void
+fmov(Internal *m, Internal *d)
+{
+	*d = *m;
+}
+
+static void
+fmovn(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s ^= 1;
+}
+
+static void
+fabsf(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s = 0;
+}
+
+static void
+frnd(Internal *m, Internal *d)
+{
+	short e;
+
+	(m->s? fsub: fadd)(fpconst[6], *m, d);
+	if(IsWeird(d))
+		return;
+	fpiround(d);
+	e = (d->e - ExpBias) + 1;
+	if(e <= 0)
+		SetZero(d);
+	else if(e > FractBits){
+		if(e < 2*FractBits)
+			d->l &= ~((1<<(2*FractBits - e))-1);
+	}else{
+		d->l = 0;
+		if(e < FractBits)
+			d->h &= ~((1<<(FractBits-e))-1);
+	}
+}
+
+static	FP1	optab1[16] = {	/* Fd := OP Fm */
+[0]	{"MOVF",	fmov},
+[1]	{"NEGF",	fmovn},
+[2]	{"ABSF",	fabsf},
+[3]	{"RNDF",	frnd},
+[4]	{"SQTF",	/*fsqt*/0},
+/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
+/* URD and NRM aren't implemented */
+};
+
+static	FP2	optab2[16] = {	/* Fd := Fn OP Fm */
+[0]	{"ADDF",	fadd},
+[1]	{"MULF",	fmul},
+[2]	{"SUBF",	fsub},
+[3]	{"RSUBF",	fsubr},
+[4]	{"DIVF",	fdiv},
+[5]	{"RDIVF",	fdivr},
+/* POW, RPW deprecated */
+[8]	{"REMF",	/*frem*/0},
+[9]	{"FMF",	fmul},	/* fast multiply */
+[10]	{"FDV",	fdiv},	/* fast divide */
+[11]	{"FRD",	fdivr},	/* fast reverse divide */
+/* POL deprecated */
+};
+
+static ulong
+fcmp(Internal *n, Internal *m)
+{
+	int i;
+	Internal rm, rn;
+
+	if(IsWeird(m) || IsWeird(n)){
+		/* BUG: should trap if not masked */
+		return V|C;
+	}
+	rn = *n;
+	rm = *m;
+	fpiround(&rn);
+	fpiround(&rm);
+	i = fpicmp(&rn, &rm);
+	if(i > 0)
+		return C;
+	else if(i == 0)
+		return C|Z;
+	else
+		return N;
+}
+
+static void
+fld(void (*f)(Internal*, void*), int d, ulong ea, int n, PFPU *ufp)
+{
+	void *mem;
+
+	mem = (void*)ea;
+	(*f)(&FR(ufp, d), mem);
+	if(fpemudebug)
+		print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
+}
+
+static void
+fst(void (*f)(void*, Internal*), ulong ea, int s, int n, PFPU *ufp)
+{
+	Internal tmp;
+	void *mem;
+
+	mem = (void*)ea;
+	tmp = FR(ufp, s);
+	if(fpemudebug)
+		print("MOV%c	F%d,#%lux\n", n==8? 'D': 'F', s, ea);
+	(*f)(mem, &tmp);
+}
+
+static int
+condok(int cc, int c)
+{
+	switch(c){
+	case 0:	/* Z set */
+		return cc&Z;
+	case 1:	/* Z clear */
+		return (cc&Z) == 0;
+	case 2:	/* C set */
+		return cc&C;
+	case 3:	/* C clear */
+		return (cc&C) == 0;
+	case 4:	/* N set */
+		return cc&N;
+	case 5:	/* N clear */
+		return (cc&N) == 0;
+	case 6:	/* V set */
+		return cc&V;
+	case 7:	/* V clear */
+		return (cc&V) == 0;
+	case 8:	/* C set and Z clear */
+		return cc&C && (cc&Z) == 0;
+	case 9:	/* C clear or Z set */
+		return (cc&C) == 0 || cc&Z;
+	case 10:	/* N set and V set, or N clear and V clear */
+		return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+	case 11:	/* N set and V clear, or N clear and V set */
+		return (cc&(N|V))==N || (cc&(N|V))==V;
+	case 12:	/* Z clear, and either N set and V set or N clear and V clear */
+		return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+	case 13:	/* Z set, or N set and V clear or N clear and V set */
+		return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+	case 14:	/* always */
+		return 1;
+	case 15:	/* never (reserved) */
+		return 0;
+	}
+	return 0;	/* not reached */
+}
+
+static void
+unimp(ulong pc, ulong op)
+{
+	char buf[60];
+
+	snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
+	if(fpemudebug)
+		print("FPE: %s\n", buf);
+	error(buf);
+	/* no return */
+}
+
+static void
+fpemu(ulong pc, ulong op, Ureg *ur, PFPU *ufp)
+{
+	int rn, rd, tag, o;
+	long off;
+	ulong ea;
+	Internal tmp, *fm, *fn;
+
+	/* note: would update fault status here if we noted numeric exceptions */
+
+	/*
+	 * LDF, STF; 10.1.1
+	 */
+	if(((op>>25)&7) == 6){
+		if(op & (1<<22))
+			unimp(pc, op);	/* packed or extended */
+		rn = (op>>16)&0xF;
+		off = (op&0xFF)<<2;
+		if((op & (1<<23)) == 0)
+			off = -off;
+		ea = REG(ur, rn);
+		if(rn == REGPC)
+			ea += 8;
+		if(op & (1<<24))
+			ea += off;
+		rd = (op>>12)&7;
+		if(op & (1<<20)){
+			if(op & (1<<15))
+				fld(fpid2i, rd, ea, 8, ufp);
+			else
+				fld(fpis2i, rd, ea, 4, ufp);
+		}else{
+			if(op & (1<<15))
+				fst(fpii2d, ea, rd, 8, ufp);
+			else
+				fst(fpii2s, ea, rd, 4, ufp);
+		}
+		if((op & (1<<24)) == 0)
+			ea += off;
+		if(op & (1<<21))
+			REG(ur, rn) = ea;
+		return;
+	}
+
+	/*
+	 * CPRT/transfer, 10.3
+	 */
+	if(op & (1<<4)){
+		rd = (op>>12) & 0xF;
+
+		/*
+		 * compare, 10.3.1
+		 */
+		if(rd == 15 && op & (1<<20)){
+			rn = (op>>16)&7;
+			fn = &FR(ufp, rn);
+			if(op & (1<<3)){
+				fm = &fpconst[op&7];
+				if(fpemudebug)
+					tag = 'C';
+			}else{
+				fm = &FR(ufp, op&7);
+				if(fpemudebug)
+					tag = 'F';
+			}
+			switch((op>>21)&7){
+			default:
+				unimp(pc, op);
+			case 4:	/* CMF: Fn :: Fm */
+			case 6:	/* CMFE: Fn :: Fm (with exception) */
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, fm);
+				break;
+			case 5:	/* CNF: Fn :: -Fm */
+			case 7:	/* CNFE: Fn :: -Fm (with exception) */
+				tmp = *fm;
+				tmp.s ^= 1;
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, &tmp);
+				break;
+			}
+			if(fpemudebug)
+				print("CMPF	%c%d,F%ld =%#ux\n",
+					tag, rn, op&7, ur->psr>>28);
+			return;
+		}
+
+		/*
+		 * other transfer, 10.3
+		 */
+		switch((op>>20)&0xF){
+		default:
+			unimp(pc, op);
+		case 0:	/* FLT */
+			rn = (op>>16) & 7;
+			fpiw2i(&FR(ufp, rn), &REG(ur, rd));
+			if(fpemudebug)
+				print("MOVW[FD]	R%d, F%d\n", rd, rn);
+			break;
+		case 1:	/* FIX */
+			if(op & (1<<3))
+				unimp(pc, op);
+			rn = op & 7;
+			tmp = FR(ufp, rn);
+			fpii2w(&REG(ur, rd), &tmp);
+			if(fpemudebug)
+				print("MOV[FD]W	F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
+			break;
+		case 2:	/* FPSR := Rd */
+			ufp->status = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPSR\n", rd);
+			break;
+		case 3:	/* Rd := FPSR */
+			REG(ur, rd) = ufp->status;
+			if(fpemudebug)
+				print("MOVW	FPSR, R%d\n", rd);
+			break;
+		case 4:	/* FPCR := Rd */
+			ufp->control = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPCR\n", rd);
+			break;
+		case 5:	/* Rd := FPCR */
+			REG(ur, rd) = ufp->control;
+			if(fpemudebug)
+				print("MOVW	FPCR, R%d\n", rd);
+			break;
+		}
+		return;
+	}
+
+	/*
+	 * arithmetic
+	 */
+
+	if(op & (1<<3)){	/* constant */
+		fm = &fpconst[op&7];
+		if(fpemudebug)
+			tag = 'C';
+	}else{
+		fm = &FR(ufp, op&7);
+		if(fpemudebug)
+			tag = 'F';
+	}
+	rd = (op>>12)&7;
+	o = (op>>20)&0xF;
+	if(op & (1<<15)){	/* monadic */
+		FP1 *fp;
+		fp = &optab1[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		if(fpemudebug)
+			print("%s	%c%ld,F%d\n", fp->name, tag, op&7, rd);
+		(*fp->f)(fm, &FR(ufp, rd));
+	} else {
+		FP2 *fp;
+		fp = &optab2[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		rn = (op>>16)&7;
+		if(fpemudebug)
+			print("%s	%c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
+		(*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
+	}
+}
+
+void
+casemu(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, ro, rn, *rd;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	ro = rp[op>>16 & 0x7];
+	rn = rp[op>>0 & 0x7];
+	rd = rp + (op>>12 & 0x7);
+	rp = (ulong*)*rd;
+	validaddr((ulong)rp, 4, 1);
+	splhi();
+	if(*rd = (*rp == ro))
+		*rp = rn;
+	spllo();
+}
+
+int ldrexvalid;
+
+void
+ldrex(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, *rd, *addr;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	rd = rp + (op>>16 & 0x7);
+	addr = (ulong*)*rd;
+	validaddr((ulong)addr, 4, 0);
+	ldrexvalid = 1;
+	rp[op>>12 & 0x7] = *addr;
+	if(fpemudebug)
+		print("ldrex, r%ld = [r%ld]@0x%8.8p = 0x%8.8lux",
+			op>>12 & 0x7, op>>16 & 0x7, addr, rp[op>>12 & 0x7]);
+}
+
+void
+strex(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, rn, *rd, *addr;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	rd = rp + (op>>16 & 0x7);
+	rn = rp[op>>0 & 0x7];
+	addr = (ulong*)*rd;
+	validaddr((ulong)addr, 4, 1);
+	splhi();
+	if(ldrexvalid){
+		if(fpemudebug)
+			print("strex valid, [r%ld]@0x%8.8p = r%ld = 0x%8.8lux",
+				op>>16 & 0x7, addr, op>>0 & 0x7, rn);
+		*addr = rn;
+		ldrexvalid = 0;
+		rp[op>>12 & 0x7] = 0;
+	}else{
+		if(fpemudebug)
+			print("strex invalid, r%ld = 1", op>>16 & 0x7);
+		rp[op>>12 & 0x7] = 1;
+	}
+	spllo();
+}
+
+struct {
+	ulong	opc;
+	ulong	mask;
+	void	(*f)(ulong, ulong, Ureg*);
+} specialopc[] = {
+	{ 0x01900f9f, 0x0ff00fff, ldrex },
+	{ 0x01800f90, 0x0ff00ff0, strex },
+	{ 0x0ed00100, 0x0ef08100, casemu },
+	{ 0x00000000, 0x00000000, nil }
+};
+
+/*
+ * returns the number of FP instructions emulated
+ */
+int
+fpiarm(Ureg *ur)
+{
+	ulong op, o;
+	PFPU *ufp;
+	int i, n;
+
+	if(up == nil)
+		panic("fpiarm not in a process");
+	ufp = &up->PFPU;
+	/* because all the state is in the proc structure,
+	 * it need not be saved/restored
+	 */
+	if(up->fpstate != FPactive){
+//		assert(sizeof(Internal) == sizeof(ufp->regs[0]));
+		up->fpstate = FPactive;
+		ufp->control = 0;
+		ufp->status = (0x01<<28)|(1<<12);	/* software emulation, alternative C flag */
+		for(n = 0; n < 8; n++)
+			FR(ufp, n) = fpconst[0];
+	}
+	for(n=0; ;n++){
+		validaddr(ur->pc, 4, 0);
+		op = *(ulong*)(ur->pc);
+		if(fpemudebug)
+			print("%#ux: %#8.8lux ", ur->pc, op);
+		o = (op>>24) & 0xF;
+		if(condok(ur->psr, op>>28)){
+			for(i = 0; specialopc[i].f; i++)
+				if((op & specialopc[i].mask) == specialopc[i].opc)
+					break;
+			if(specialopc[i].f)
+				specialopc[i].f(ur->pc, op, ur);
+			else if((op & 0xF00) != 0x100 || o != 0xE && (o&~1) != 0xC)
+				break;
+			else
+				fpemu(ur->pc, op, ur, ufp);
+		}else if((op & 0xF00) != 0x100 || o != 0xE && (o&~1) != 0xC)
+			break;
+		ur->pc += 4;
+	}
+	if(fpemudebug) print("\n");
+	return n;
+}

+ 136 - 0
sys/src/9/kw/fpimem.c

@@ -0,0 +1,136 @@
+#include "fpi.h"
+
+/*
+ * the following routines depend on memory format, not the machine
+ */
+
+void
+fpis2i(Internal *i, void *v)
+{
+	Single *s = v;
+
+	i->s = (*s & 0x80000000) ? 1: 0;
+	if((*s & ~0x80000000) == 0){
+		SetZero(i);
+		return;
+	}
+	i->e = ((*s>>23) & 0x00FF) - SingleExpBias + ExpBias;
+	i->h = (*s & 0x007FFFFF)<<(1+NGuardBits);
+	i->l = 0;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpid2i(Internal *i, void *v)
+{
+	Double *d = v;
+
+	i->s = (d->h & 0x80000000) ? 1: 0;
+	i->e = (d->h>>20) & 0x07FF;
+	i->h = ((d->h & 0x000FFFFF)<<(4+NGuardBits))|((d->l>>25) & 0x7F);
+	i->l = (d->l & 0x01FFFFFF)<<NGuardBits;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpiw2i(Internal *i, void *v)
+{
+	Word w, word = *(Word*)v;
+	short e;
+
+	if(word < 0){
+		i->s = 1;
+		word = -word;
+	}
+	else
+		i->s = 0;
+	if(word == 0){
+		SetZero(i);
+		return;
+	}
+	if(word > 0){
+		for (e = 0, w = word; w; w >>= 1, e++)
+			;
+	} else
+		e = 32;
+	if(e > FractBits){
+		i->h = word>>(e - FractBits);
+		i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+	}
+	else {
+		i->h = word<<(FractBits - e);
+		i->l = 0;
+	}
+	i->e = (e - 1) + ExpBias;
+}
+
+void
+fpii2s(void *v, Internal *i)
+{
+	short e;
+	Single *s = (Single*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	*s = i->s ? 0x80000000: 0;
+	e = i->e;
+	if(e < ExpBias){
+		if(e <= (ExpBias - SingleExpBias))
+			return;
+		e = SingleExpBias - (ExpBias - e);
+	}
+	else  if(e >= (ExpBias + (SingleExpMax-SingleExpBias))){
+		*s |= SingleExpMax<<23;
+		return;
+	}
+	else
+		e = SingleExpBias + (e - ExpBias);
+	*s |= (e<<23)|(i->h>>(1+NGuardBits));
+}
+
+void
+fpii2d(void *v, Internal *i)
+{
+	Double *d = (Double*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	i->l = ((i->h & GuardMask)<<25)|(i->l>>NGuardBits);
+	i->h >>= NGuardBits;
+	d->h = i->s ? 0x80000000: 0;
+	d->h |= (i->e<<20)|((i->h & 0x00FFFFFF)>>4);
+	d->l = (i->h<<28)|i->l;
+}
+
+void
+fpii2w(Word *word, Internal *i)
+{
+	Word w;
+	short e;
+
+	fpiround(i);
+	e = (i->e - ExpBias) + 1;
+	if(e <= 0)
+		w = 0;
+	else if(e > 31)
+		w = 0x7FFFFFFF;
+	else if(e > FractBits)
+		w = (i->h<<(e - FractBits))|(i->l>>(2*FractBits - e));
+	else
+		w = i->h>>(FractBits-e);
+	if(i->s)
+		w = -w;
+	*word = w;
+}

+ 25 - 0
sys/src/9/kw/init9.s

@@ -0,0 +1,25 @@
+/*
+ * This is the same as the C programme:
+ *
+ *	void
+ *	main(char* argv0)
+ *	{
+ *		startboot(argv0, &argv0);
+ *	}
+ *
+ * It is in assembler because SB needs to be
+ * set and doing this in C drags in too many
+ * other routines.
+ */
+TEXT main(SB), 1, $8
+	MOVW	$setR12(SB), R12		/* load the SB */
+	MOVW	$boot(SB), R0
+
+	ADD	$12, R13, R1			/* pointer to 0(FP) */
+
+	MOVW	R0, 4(R13)			/* pass argc, argv */
+	MOVW	R1, 8(R13)
+
+	BL	startboot(SB)
+_loop:
+	B	_loop

+ 417 - 0
sys/src/9/kw/io.h

@@ -0,0 +1,417 @@
+enum {
+	BusCBUS		= 0,		/* Corollary CBUS */
+	BusCBUSII,			/* Corollary CBUS II */
+	BusEISA,			/* Extended ISA */
+	BusFUTURE,			/* IEEE Futurebus */
+	BusINTERN,			/* Internal bus */
+	BusISA,				/* Industry Standard Architecture */
+	BusMBI,				/* Multibus I */
+	BusMBII,			/* Multibus II */
+	BusMCA,				/* Micro Channel Architecture */
+	BusMPI,				/* MPI */
+	BusMPSA,			/* MPSA */
+	BusNUBUS,			/* Apple Macintosh NuBus */
+	BusPCI,				/* Peripheral Component Interconnect */
+	BusPCMCIA,			/* PC Memory Card International Association */
+	BusTC,				/* DEC TurboChannel */
+	BusVL,				/* VESA Local bus */
+	BusVME,				/* VMEbus */
+	BusXPRESS,			/* Express System Bus */
+	BUSUNKNOWN = -1
+};
+
+#define MKBUS(t,b,d,f)	(((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8))
+#define BUSFNO(tbdf)	(((tbdf)>>8)&0x07)
+#define BUSDNO(tbdf)	(((tbdf)>>11)&0x1F)
+#define BUSBNO(tbdf)	(((tbdf)>>16)&0xFF)
+#define BUSTYPE(tbdf)	((tbdf)>>24)
+#define BUSBDF(tbdf)	((tbdf)&0x00FFFF00)
+
+/*
+ * PCI support code.
+ */
+enum {					/* type 0 & type 1 pre-defined header */
+	PciVID		= 0x00,		/* vendor ID */
+	PciDID		= 0x02,		/* device ID */
+	PciPCR		= 0x04,		/* command */
+	PciPSR		= 0x06,		/* status */
+	PciRID		= 0x08,		/* revision ID */
+	PciCCRp		= 0x09,		/* programming interface class code */
+	PciCCRu		= 0x0A,		/* sub-class code */
+	PciCCRb		= 0x0B,		/* base class code */
+	PciCLS		= 0x0C,		/* cache line size */
+	PciLTR		= 0x0D,		/* latency timer */
+	PciHDT		= 0x0E,		/* header type */
+	PciBST		= 0x0F,		/* BIST */
+};
+
+/* ccrb (base class code) values; controller types */
+enum {
+	Pcibcpci1	= 0,		/* pci 1.0; no class codes defined */
+	Pcibcstore	= 1,		/* mass storage */
+	Pcibcnet	= 2,		/* network */
+	Pcibcdisp	= 3,		/* display */
+	Pcibcmmedia	= 4,		/* multimedia */
+	Pcibcmem	= 5,		/* memory */
+	Pcibcbridge	= 6,		/* bridge */
+	Pcibccomm	= 7,		/* simple comms (e.g., serial) */
+	Pcibcbasesys	= 8,		/* base system */
+	Pcibcinput	= 9,		/* input */
+	Pcibcdock	= 0xa,		/* docking stations */
+	Pcibcproc	= 0xb,		/* processors */
+	Pcibcserial	= 0xc,		/* serial bus (e.g., USB) */
+	Pcibcwireless	= 0xd,		/* wireless */
+	Pcibcintell	= 0xe,		/* intelligent i/o */
+	Pcibcsatcom	= 0xf,		/* satellite comms */
+	Pcibccrypto	= 0x10,		/* encryption/decryption */
+	Pcibcdacq	= 0x11,		/* data acquisition & signal proc. */
+};
+
+/* ccru (sub-class code) values; common cases only */
+enum {
+	/* mass storage */
+	Pciscscsi	= 0,		/* SCSI */
+	Pciscide	= 1,		/* IDE (ATA) */
+
+	/* network */
+	Pciscether	= 0,		/* Ethernet */
+
+	/* display */
+	Pciscvga	= 0,		/* VGA */
+	Pciscxga	= 1,		/* XGA */
+	Pcisc3d		= 2,		/* 3D */
+
+	/* bridges */
+	Pcischostpci	= 0,		/* host/pci */
+	Pciscpcicpci	= 1,		/* pci/pci */
+
+	/* simple comms */
+	Pciscserial	= 0,		/* 16450, etc. */
+	Pciscmultiser	= 1,		/* multiport serial */
+
+	/* serial bus */
+	Pciscusb	= 3,		/* USB */
+};
+
+enum {					/* type 0 pre-defined header */
+	PciCIS		= 0x28,		/* cardbus CIS pointer */
+	PciSVID		= 0x2C,		/* subsystem vendor ID */
+	PciSID		= 0x2E,		/* cardbus CIS pointer */
+	PciEBAR0	= 0x30,		/* expansion ROM base address */
+	PciMGNT		= 0x3E,		/* burst period length */
+	PciMLT		= 0x3F,		/* maximum latency between bursts */
+};
+
+enum {					/* type 1 pre-defined header */
+	PciPBN		= 0x18,		/* primary bus number */
+	PciSBN		= 0x19,		/* secondary bus number */
+	PciUBN		= 0x1A,		/* subordinate bus number */
+	PciSLTR		= 0x1B,		/* secondary latency timer */
+	PciIBR		= 0x1C,		/* I/O base */
+	PciILR		= 0x1D,		/* I/O limit */
+	PciSPSR		= 0x1E,		/* secondary status */
+	PciMBR		= 0x20,		/* memory base */
+	PciMLR		= 0x22,		/* memory limit */
+	PciPMBR		= 0x24,		/* prefetchable memory base */
+	PciPMLR		= 0x26,		/* prefetchable memory limit */
+	PciPUBR		= 0x28,		/* prefetchable base upper 32 bits */
+	PciPULR		= 0x2C,		/* prefetchable limit upper 32 bits */
+	PciIUBR		= 0x30,		/* I/O base upper 16 bits */
+	PciIULR		= 0x32,		/* I/O limit upper 16 bits */
+	PciEBAR1	= 0x28,		/* expansion ROM base address */
+	PciBCR		= 0x3E,		/* bridge control register */
+};
+
+enum {					/* type 2 pre-defined header */
+	PciCBExCA	= 0x10,
+	PciCBSPSR	= 0x16,
+	PciCBPBN	= 0x18,		/* primary bus number */
+	PciCBSBN	= 0x19,		/* secondary bus number */
+	PciCBUBN	= 0x1A,		/* subordinate bus number */
+	PciCBSLTR	= 0x1B,		/* secondary latency timer */
+	PciCBMBR0	= 0x1C,
+	PciCBMLR0	= 0x20,
+	PciCBMBR1	= 0x24,
+	PciCBMLR1	= 0x28,
+	PciCBIBR0	= 0x2C,		/* I/O base */
+	PciCBILR0	= 0x30,		/* I/O limit */
+	PciCBIBR1	= 0x34,		/* I/O base */
+	PciCBILR1	= 0x38,		/* I/O limit */
+	PciCBSVID	= 0x40,		/* subsystem vendor ID */
+	PciCBSID	= 0x42,		/* subsystem ID */
+	PciCBLMBAR	= 0x44,		/* legacy mode base address */
+};
+
+typedef struct Pcisiz Pcisiz;
+struct Pcisiz
+{
+	Pcidev*	dev;
+	int	siz;
+	int	bar;
+};
+
+typedef struct Pcidev Pcidev;
+struct Pcidev
+{
+	int	tbdf;			/* type+bus+device+function */
+	ushort	vid;			/* vendor ID */
+	ushort	did;			/* device ID */
+
+	ushort	pcr;
+
+	uchar	rid;
+	uchar	ccrp;
+	uchar	ccru;
+	uchar	ccrb;
+	uchar	cls;
+	uchar	ltr;
+
+	struct {
+		ulong	bar;		/* base address */
+		int	size;
+	} mem[6];
+
+	struct {
+		ulong	bar;	
+		int	size;
+	} rom;
+	uchar	intl;			/* interrupt line */
+
+	Pcidev*	list;
+	Pcidev*	link;			/* next device on this bno */
+
+	Pcidev*	bridge;			/* down a bus */
+	struct {
+		ulong	bar;
+		int	size;
+	} ioa, mema;
+
+	int	pmrb;			/* power management register block */
+};
+
+#define PCIWINDOW	0
+#define PCIWADDR(va)	(PADDR(va)+PCIWINDOW)
+#define ISAWINDOW	0
+#define ISAWADDR(va)	(PADDR(va)+ISAWINDOW)
+
+/*
+ * Sheevaplug stuff
+ */
+
+/* weird padding macro */
+#define PAD(next, last)	(((next) - sizeof(ulong) - (last)) / sizeof(ulong))
+
+enum {
+	Regbase		= 0xf1000000,	/* PHYSIO in mem.h */
+	AddrSDramc	= Regbase+0x01400,
+	AddrSDramd	= Regbase+0x01500,
+
+	AddrMpp		= Regbase+0x10000,
+	AddrDevid	= Regbase+0x10034,
+	AddrClockctl	= Regbase+0x1004c,
+	AddrEfuse	= Regbase+0x1008c,
+	AddrIocfg0	= Regbase+0x100e0,
+	AddrGpio0	= Regbase+0x10100,
+	AddrGpio1	= Regbase+0x10140,
+	AddrRtc		= Regbase+0x10300,
+	AddrNandf       = Regbase+0x10418,
+	AddrSpi		= Regbase+0x10600,
+	AddrUart0	= Regbase+0x12000,
+	AddrUart1	= Regbase+0x12100,
+
+	AddrWin		= Regbase+0x20000,
+	AddrCpucsr	= Regbase+0x20100,
+	AddrIntr	= Regbase+0x20200,
+	AddrTimer	= Regbase+0x20300,
+	Addrl2cache	= Regbase+0x20a00,  /* uncacheable addresses for L2 */
+
+	Addrpci		= Regbase+0x40000,
+	Addrpcibase	= Regbase+0x41800,
+
+	Addrusb		= Regbase+0x50000,
+	AddrSdio	= Regbase+0x90000,
+};
+
+enum {
+	/* registers */
+	PciBAR0		= Addrpcibase + 4,	/* base address */
+	PciBAR1		= Addrpcibase + 8,
+
+	PciCP		= Addrpci + 0x64,	/* capabilities pointer */
+
+	PciINTL		= Addrpci + 0x3c,	/* interrupt line */
+	PciINTP		= PciINTL + 1,	/* interrupt pin */
+};
+enum {
+	/* rstout bits */
+	RstoutPex	= 1<<0,
+	RstoutWatchdog	= 1<<1,
+	RstoutSoft	= 1<<2,
+
+	/* softreset bits */
+	ResetSystem	= 1<<0,
+
+	/* cpucsr bits */
+	Reset		= 1<<1,
+};
+
+/*
+ * interrupt stuff
+ */
+
+enum {
+	Irqlo, Irqhi, Irqbridge,
+};
+
+enum {
+	/* main interrupt cause low register bit #s (LE) */
+	IRQ0hisum,		/* summary of main intr high cause reg */
+	IRQ0bridge,
+	IRQ0h2cdoorbell,
+	IRQ0c2hdoorbell,
+	_IRQ0reserved0,
+	IRQ0xor0chan0,
+	IRQ0xor0chan1,
+	IRQ0xor1chan0,
+	IRQ0xor1chan1,
+	IRQ0pex0int,		/* pex = pci-express */
+	_IRQ0reserved1,
+	IRQ0gbe0sum,
+	IRQ0gbe0rx,
+	IRQ0gbe0tx,
+	IRQ0gbe0misc,
+	IRQ0gbe1sum,
+	IRQ0gbe1rx,
+	IRQ0gbe1tx,
+	IRQ0gbe1misc,
+	IRQ0usb0,
+	_IRQ0reserved2,
+	IRQ0sata,
+	IRQ0crypto,
+	IRQ0spi,
+	IRQ0audio,
+	_IRQ0reserved3,
+	IRQ0ts0,
+	_IRQ0reserved4,
+	IRQ0sdio,
+	IRQ0twsi,
+	IRQ0avb,
+	IRQ0tdm,
+
+	/* main interrupt cause high register bit #s (LE) */
+	_IRQ1reserved0 = 0,
+	IRQ1uart0,
+	IRQ1uart1,
+	IRQ1gpiolo0,
+	IRQ1gpiolo1,
+	IRQ1gpiolo2,
+	IRQ1gpiolo3,
+	IRQ1gpiohi0,
+	IRQ1gpiohi1,
+	IRQ1gpiohi2,
+	IRQ1gpiohi3,
+	IRQ1xor0err,
+	IRQ1xor1err,
+	IRQ1pex0err,
+	_IRQ1reserved1,
+	IRQ1gbe0err,
+	IRQ1gbe1err,
+	IRQ1usberr,
+	IRQ1cryptoerr,
+	IRQ1audioerr,
+	_IRQ1reserved2,
+	_IRQ1reserved3,
+	IRQ1rtc,
+
+	/* bridged-interrupt causes */
+	IRQcpuself = 0,
+	IRQcputimer0,
+	IRQcputimer1,
+	IRQcputimerwd,
+};
+
+/*
+ * interrupt controller
+ */
+#define INTRREG		((IntrReg*)AddrIntr)
+typedef struct IntrReg IntrReg;
+struct IntrReg
+{
+	struct {
+		ulong	irq;		/* main intr cause reg (ro) */
+		ulong	irqmask;
+		ulong	fiqmask;
+		ulong	epmask;
+	} lo, hi;
+};
+
+
+/*
+ * CPU control & status (archkirkwood.c and trap.c)
+ */
+#define CPUCSREG	((CpucsReg*)AddrCpucsr)
+
+typedef struct CpucsReg CpucsReg;
+struct CpucsReg
+{
+	ulong	cpucfg;
+	ulong	cpucsr;
+	ulong	rstout;
+	ulong	softreset;
+	ulong	irq;
+	ulong	irqmask;
+	ulong	mempm;
+	ulong	clockgate;
+	ulong	biu;
+	ulong	pad0;
+	ulong	l2cfg;		/* turn l2 cache on or off, set coherency */
+	ulong	pad1[2];
+	ulong	l2tm0;
+	ulong	l2tm1;
+	ulong	pad2[2];
+	ulong	l2pm;
+	ulong	ram0;
+	ulong	ram1;
+	ulong	ram2;
+	ulong	ram3;
+};
+
+enum {
+	/* cpucfg bits */
+	Cfgvecinithi	= 1<<1,	/* boot at 0xffff0000, not 0; default 1 */
+	Cfgbigendreset	= 3<<1,	/* init. as big-endian at reset; default 0 */
+
+	/* l2cfg bits */
+	L2ecc		= 1<<2,
+	L2on		= 1<<3,
+	L2writethru	= 1<<4,		/* else write-back */
+};
+
+/*
+ * clocks (clock.c and archkirkwood.c)
+ */
+
+enum {
+	/* timer ctl bits */
+	Tmr0enable	= 1<<0,
+	Tmr0periodic	= 1<<1,
+	Tmr1enable	= 1<<2,
+	Tmr1periodic	= 1<<3,
+	TmrWDenable	= 1<<4,
+	TmrWDperiodic	= 1<<5,
+};
+
+#define TIMERREG	((TimerReg*)AddrTimer)
+
+typedef struct TimerReg TimerReg;
+struct TimerReg
+{
+	ulong	ctl;
+	ulong	pad[3];
+	ulong	reload0;
+	ulong	timer0;
+	ulong	reload1;
+	ulong	timer1;
+	ulong	reloadwd;
+	ulong	timerwd;
+};

+ 618 - 0
sys/src/9/kw/l.s

@@ -0,0 +1,618 @@
+/*
+ * sheevaplug machine assist
+ * arm926ej-s processor at 1.2GHz
+ *
+ * loader uses R11 as scratch.
+ */
+#include "arm.s"
+
+/*
+ * MCR and MRC are counter-intuitively named.
+ *	MCR	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# arm -> coproc
+ *	MRC	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# coproc -> arm
+ */
+
+/*
+ * Entered here from Das U-Boot with MMU disabled.
+ * Until the MMU is enabled it is OK to call functions provided
+ * they are within ±32MiB relative and do not require any
+ * local variables or more than one argument (i.e. there is
+ * no stack).
+ */
+TEXT _start(SB), 1, $-4
+	MOVW	$setR12(SB), R12		/* load the SB */
+_main:
+	/* SVC mode, interrupts disabled */
+	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R1
+	MOVW	R1, CPSR
+
+	/*
+	 * disable the MMU & caches,
+	 * switch to system permission & 32-bit addresses.
+	 */
+	MOVW	$(CpCsystem|CpCd32|CpCi32), R1
+	MCR     CpSC, 0, R1, C(CpCONTROL), C(0)
+	BARRIERS
+
+	/*
+	 * disable the Sheevaplug's L2 cache, invalidate all caches
+	 */
+
+	/* flush caches */
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
+	BARRIERS
+
+	/* drain L1 write buffer, also drains L2 eviction buffer on sheeva */
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
+	BARRIERS
+
+	/* invalidate l2 cache */
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+
+	/* disable l2 cache.  do this while l1 caches are off */
+	MRC	CpSC, CpL2, R1, C(CpTESTCFG), C(CpTCl2cfg), CpTCl2conf
+	BIC	$(1<<22 | 1<<28 | 1<<29), R1 /* l2 off, no wr alloc, no streaming */
+	MCR	CpSC, CpL2, R1, C(CpTESTCFG), C(CpTCl2cfg), CpTCl2conf
+	BARRIERS
+
+	/* invalidate l2 cache */
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+
+	/* flush caches */
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
+	BARRIERS
+
+WAVE('\r')
+	/* clear Mach */
+	MOVW	$PADDR(MACHADDR), R4		/* address of Mach */
+_machZ:
+	MOVW	R0, (R4)
+	ADD	$4, R4				/* bump PTE address */
+	CMP.S	$PADDR(L1+L1X(0)), R4
+	BNE	_machZ
+
+	/*
+	 * set up the MMU page table
+	 */
+
+	/* clear all PTEs first, to provide a default */
+WAVE('\n')
+	MOVW	$PADDR(L1+L1X(0)), R4		/* address of PTE for 0 */
+_ptenv0:
+	ZEROPTE()
+	CMP.S	$PADDR(L1+16*KiB), R4
+	BNE	_ptenv0
+
+	/* double map of PHYSDRAM, KZERO to PHYSDRAM for first few MBs */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$PHYSDRAM, R3			/* pa */
+	MOVW	$PADDR(L1+L1X(PHYSDRAM)), R4  /* address of PTE for PHYSDRAM */
+	MOVW	$16, R5
+_ptdbl:
+	FILLPTE()
+	SUB.S	$1, R5
+	BNE	_ptdbl
+
+	/*
+	 * back up and fill in PTEs for memory at KZERO
+	 * there is 1 bank of 512MB of SDRAM at PHYSDRAM
+	 */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$PHYSDRAM, R3
+	MOVW	$PADDR(L1+L1X(KZERO)), R4	/* start with PTE for KZERO */
+	MOVW	$512, R5			/* inner loop count */
+_ptekrw:					/* set PTEs for 512MiB */
+	FILLPTE()
+	SUB.S	$1, R5
+	BNE	_ptekrw
+
+	/*
+	 * back up and fill in PTE for MMIO
+	 */
+	MOVW	$PTEIO, R2			/* PTE bits */
+	MOVW	$PHYSIO, R3
+	MOVW	$PADDR(L1+L1X(VIRTIO)), R4	/* start with PTE for VIRTIO */
+	FILLPTE()
+
+	/* mmu.c sets up the vectors later */
+
+WAVE('P')
+	/* set the domain access control */
+	MOVW	$Client, R0
+	BL	dacput(SB)
+
+	/* set the translation table base */
+	MOVW	$PADDR(L1), R0
+	BL	ttbput(SB)
+
+	MOVW	$0, R0
+	BL	pidput(SB)		/* paranoia */
+
+	/* the little dance to turn the MMU & caches on */
+WAVE('l')
+	BL	cacheuwbinv(SB)
+	BL	mmuinvalidate(SB)
+	BL	mmuenable(SB)
+	BL	cacheuwbinv(SB)
+
+WAVE('a')
+	/* warp the PC into the virtual map */
+	MOVW	$KZERO, R0
+	BL	_r15warp(SB)
+
+	/* undo double map of 0, KZERO */
+	MOVW	$PADDR(L1+L1X(0)), R4		/* address of PTE for 0 */
+	MOVW	$0, R0
+	MOVW	$16, R5
+_ptudbl:
+	MOVW	R0, (R4)
+	ADD	$4, R4				/* bump PTE address */
+	ADD	$MiB, R0			/* bump pa */
+	SUB.S	$1, R5
+	BNE	_ptudbl
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvd), CpTLBinvse
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+WAVE('n')
+WAVE(' ')
+	/* pass Mach to main and set up the stack */
+	MOVW	$(MACHADDR), R0			/* Mach */
+	MOVW	R0, R13
+	ADD	$(MACHSIZE), R13		/* stack pointer */
+	SUB	$4, R13				/* space for link register */
+
+	BL	main(SB)			/* void main(Mach*) */
+	/* fall through */
+
+
+/* not used */
+TEXT _reset(SB), 1, $-4
+	/* turn the caches off */
+	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R0
+	MOVW	R0, CPSR
+	BL	cacheuwbinv(SB)
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCwb|CpCicache|CpCdcache|CpCalign), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+WAVE('R')
+
+	/* redo double map of 0, KZERO */
+	MOVW	$(L1+L1X(0)), R4		/* address of PTE for 0 */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$0, R3
+	MOVW	$16, R5
+_ptrdbl:
+	ORR	R3, R2, R1		/* first identity-map 0 to 0, etc. */
+	MOVW	R1, (R4)
+	ADD	$4, R4				/* bump PTE address */
+	ADD	$MiB, R3			/* bump pa */
+	SUB.S	$1, R5
+	BNE	_ptrdbl
+
+	BARRIERS
+WAVE('e')
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvd), CpTLBinv
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+	/* back to 29- or 26-bit addressing, mainly for SB */
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCd32|CpCi32), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+	/* turn the MMU off */
+	MOVW	$PHYSDRAM, R0
+	BL	_r15warp(SB)
+	BL	mmuinvalidate(SB)
+	BL	mmudisable(SB)
+
+WAVE('s')
+	/* set new reset vector */
+	MOVW	$0, R2
+	MOVW	$0xe59ff018, R3			/* MOVW 0x18(R15), R15 */
+	MOVW	R3, (R2)
+WAVE('e')
+
+	MOVW	$PHYSBOOTROM, R3
+	MOVW	R3, 0x20(R2)			/* where $0xe59ff018 jumps to */
+	BARRIERS
+WAVE('t')
+WAVE('\r')
+WAVE('\n')
+
+	/* ...and jump to it */
+	MOVW	R2, R15				/* software reboot */
+_limbo:						/* should not get here... */
+	B	_limbo				/* ... and can't get out */
+	BL	_div(SB)			/* hack to load _div, etc. */
+
+TEXT _r15warp(SB), 1, $-4
+	BIC	$0xf0000000, R14
+	ORR	R0, R14
+	RET
+
+/* clobbers R1, R6 */
+TEXT myputc(SB), 1, $-4
+	MOVW	$PHYSCONS, R6
+_busy:
+	MOVW	20(R6), R1
+	BIC.S	$~(1<<5), R1			/* (x->lsr & LSRthre) == 0? */
+	BEQ	_busy
+	MOVW	R3, (R6)			/* print */
+	BARRIERS
+	RET
+
+TEXT l1cacheson(SB), 1, $-4
+	MOVW	CPSR, R5
+	ORR	$(PsrDirq|PsrDfiq), R5, R4
+	MOVW	R4, CPSR			/* splhi */
+
+	BARRIERS
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	ORR	$(CpCdcache|CpCicache|CpCwb), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+	MOVW	R5, CPSR			/* splx */
+	RET
+
+TEXT l1cachesoff(SB), 1, $-4
+	MOVW	R14, R7				/* save link */
+
+	MOVW	CPSR, R5
+	ORR	$(PsrDirq|PsrDfiq), R5, R4
+	MOVW	R4, CPSR			/* splhi */
+	BARRIERS
+
+	BL	cacheuwbinv(SB)
+
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCdcache|CpCicache|CpCwb), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+	MOVW	R5, CPSR			/* splx */
+	MOVW	R7, R14				/* restore link */
+	RET
+
+TEXT cachedwb(SB), 1, $-4			/* D writeback */
+	BARRIERS
+_dwb:
+	MRC	CpSC, 0, R15, C(CpCACHE), C(CpCACHEwb), CpCACHEtest
+	BNE	_dwb
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	B	_wait
+
+TEXT cachedwbse(SB), 1, $-4			/* D writeback SE */
+	MOVW	R0, R2				/* first arg: address */
+	BARRIERS
+	MOVW	4(FP), R1			/* second arg: size */
+//	CMP.S	$(4*1024), R1
+//	BGT	_dwb
+	ADD	R2, R1
+	BIC	$31, R2
+_dwbse:
+	MCR	CpSC, 0, R2, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+	BARRIERS
+	MCR	CpSC, CpL2, R2, C(CpTESTCFG), C(CpTCl2flush), CpTCl2seva
+	BARRIERS
+	ADD	$32, R2
+	CMP.S	R2, R1
+	BGT	_dwbse
+	B	_wait
+
+TEXT cachedwbinv(SB), 1, $-4			/* D writeback+invalidate */
+	BARRIERS
+_dwbinv:
+	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwbi), CpCACHEtest
+	BNE	_dwbinv
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	B	_wait
+
+TEXT cachedwbinvse(SB), 1, $-4			/* D writeback+invalidate SE */
+	MOVW	R0, R2				/* first arg: address */
+	BARRIERS
+	MOVW	4(FP), R1			/* second arg: size */
+//	CMP.S	$(4*1024), R1
+//	BGT	_dwbinv
+	ADD	R2, R1
+	BIC	$31, R2
+_dwbinvse:
+	MCR	CpSC, 0, R2, C(CpCACHE), C(CpCACHEwbi), CpCACHEse
+	BARRIERS
+	MCR	CpSC, CpL2, R2, C(CpTESTCFG), C(CpTCl2flush), CpTCl2seva
+	BARRIERS
+	MCR	CpSC, CpL2, R2, C(CpTESTCFG), C(CpTCl2inv), CpTCl2seva
+	BARRIERS
+	ADD	$32, R2
+	CMP.S	R2, R1
+	BGT	_dwbinvse
+	B	_wait
+
+_wait:						/* drain write buffer */
+	MOVW	$0, R0
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
+	BARRIERS
+	RET
+
+TEXT cachedinvse(SB), 1, $-4			/* D invalidate SE */
+	MOVW	R0, R2				/* first arg: address */
+	BARRIERS
+	MOVW	4(FP), R1			/* second arg: size */
+//	CMP.S	$(4*1024), R1
+//	BGT	_dinv
+	ADD	R2, R1
+	BIC	$31, R2
+_dinvse:
+	MCR	CpSC, 0, R2, C(CpCACHE), C(CpCACHEinvd), CpCACHEse
+	BARRIERS
+	MCR	CpSC, CpL2, R2, C(CpTESTCFG), C(CpTCl2inv), CpTCl2seva
+	BARRIERS
+	ADD	$32, R2
+	CMP.S	R2, R1
+	BGT	_dinvse
+	RET
+
+TEXT cacheuwbinv(SB), 1, $-4			/* D+I writeback+invalidate */
+	MOVW	CPSR, R3			/* splhi */
+	ORR	$(PsrDirq), R3, R1
+	MOVW	R1, CPSR
+	BARRIERS
+
+_uwbinv:					/* D writeback+invalidate */
+	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwbi), CpCACHEtest
+	BNE	_uwbinv
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+
+	MOVW	$0, R0				/* I invalidate */
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+	MOVW	$0, R0				/* drain write buffer */
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
+	BARRIERS
+
+	MOVW	R3, CPSR			/* splx */
+	RET
+
+TEXT cacheiinv(SB), 1, $-4			/* I invalidate */
+	BARRIERS
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+	RET
+
+TEXT cachedinv(SB), 1, $-4			/* D invalidate */
+	BARRIERS
+_dinv:
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEall
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+	RET
+
+/* enable l2 cache in config coproc. reg.  do this while l1 caches are off */
+TEXT l2cachecfgon(SB), 1, $-4
+	BARRIERS
+	MRC	CpSC, CpL2, R1, C(CpTESTCFG), C(CpTCl2cfg), CpTCl2conf
+	ORR	$(1<<22 | 1<<24), R1		/* l2 on, prefetch off */
+	MCR	CpSC, CpL2, R1, C(CpTESTCFG), C(CpTCl2cfg), CpTCl2conf
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+	RET
+
+TEXT icflushall(SB), 1, $-4
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	RET
+
+TEXT dcflushall(SB), 1, $-4
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEall
+	BARRIERS
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEall
+	BARRIERS
+	MCR	CpSC, CpL2, R0, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	RET
+
+/*
+ *  enable mmu, i and d caches, and high vector
+ */
+TEXT mmuenable(SB), 1, $-4
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	ORR	$(CpChv|CpCmmu|CpCdcache|CpCicache|CpCwb|CpCsystem), R0
+	BIC	$(CpCrom), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+	RET
+
+TEXT mmudisable(SB), 1, $-4
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpChv|CpCmmu|CpCdcache|CpCicache|CpCwb), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+	RET
+
+TEXT mmuinvalidate(SB), 1, $-4			/* invalidate all */
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+	RET
+
+TEXT mmuinvalidateaddr(SB), 1, $-4		/* invalidate single entry */
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
+	BARRIERS
+	RET
+
+TEXT cpidget(SB), 1, $-4			/* main ID */
+	MRC	CpSC, 0, R0, C(CpID), C(0), CpIDid
+	RET
+
+TEXT cpctget(SB), 1, $-4			/* cache type */
+	MRC	CpSC, 0, R0, C(CpID), C(0), CpIDct
+	RET
+
+TEXT controlget(SB), 1, $-4			/* control */
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	RET
+
+TEXT ttbget(SB), 1, $-4				/* translation table base */
+	MRC	CpSC, 0, R0, C(CpTTB), C(0)
+	RET
+
+TEXT ttbput(SB), 1, $-4				/* translation table base */
+	MCR	CpSC, 0, R0, C(CpTTB), C(0)
+	BARRIERS
+	RET
+
+TEXT dacget(SB), 1, $-4				/* domain access control */
+	MRC	CpSC, 0, R0, C(CpDAC), C(0)
+	RET
+
+TEXT dacput(SB), 1, $-4				/* domain access control */
+	MCR	CpSC, 0, R0, C(CpDAC), C(0)
+	BARRIERS
+	RET
+
+TEXT fsrget(SB), 1, $-4				/* fault status */
+	MRC	CpSC, 0, R0, C(CpFSR), C(0)
+	RET
+
+TEXT farget(SB), 1, $-4				/* fault address */
+	MRC	CpSC, 0, R0, C(CpFAR), C(0x0)
+	RET
+
+TEXT pidget(SB), 1, $-4				/* address translation pid */
+	MRC	CpSC, 0, R0, C(CpPID), C(0x0)
+	RET
+
+TEXT pidput(SB), 1, $-4				/* address translation pid */
+	MCR	CpSC, 0, R0, C(CpPID), C(0x0)
+	BARRIERS
+	RET
+
+TEXT splhi(SB), 1, $-4
+	MOVW	$(MACHADDR+4), R2		/* save caller pc in Mach */
+	MOVW	R14, 0(R2)
+
+	MOVW	CPSR, R0			/* turn off interrupts */
+	ORR	$(PsrDirq), R0, R1
+	MOVW	R1, CPSR
+	RET
+
+TEXT spllo(SB), 1, $-4
+	MOVW	CPSR, R0
+	BIC	$(PsrDirq), R0, R1
+	MOVW	R1, CPSR
+	RET
+
+TEXT splx(SB), 1, $-4
+	MOVW	$(MACHADDR+0x04), R2		/* save caller pc in Mach */
+	MOVW	R14, 0(R2)
+
+	MOVW	R0, R1				/* reset interrupt level */
+	MOVW	CPSR, R0
+	MOVW	R1, CPSR
+	RET
+
+TEXT splxpc(SB), 1, $-4				/* for iunlock */
+	MOVW	R0, R1
+	MOVW	CPSR, R0
+	MOVW	R1, CPSR
+	RET
+
+TEXT spldone(SB), 1, $0
+	RET
+
+TEXT islo(SB), 1, $-4
+	MOVW	CPSR, R0
+	AND	$(PsrDirq), R0
+	EOR	$(PsrDirq), R0
+	RET
+
+TEXT splfhi(SB), $-4
+	MOVW	CPSR, R0
+	ORR	$(PsrDfiq|PsrDirq), R0, R1
+	MOVW	R1, CPSR
+	RET
+
+TEXT splflo(SB), $-4
+	MOVW	CPSR, R0
+	BIC	$(PsrDfiq), R0, R1
+	MOVW	R1, CPSR
+	RET
+
+TEXT tas32(SB), 1, $-4
+	MOVW	R0, R1
+	MOVW	$0xDEADDEAD, R0
+	MOVW	R0, R3
+	SWPW	R0, (R1)
+	CMP.S	R0, R3
+	BEQ	_tasout
+	EOR	R3, R3			/* R3 = 0 */
+	CMP.S	R0, R3
+	BEQ	_tasout
+	MOVW	$1, R15			/* abort: lock != 0 && lock != $0xDEADDEAD */
+_tasout:
+	RET
+
+TEXT setlabel(SB), 1, $-4
+	MOVW	R13, 0(R0)		/* sp */
+	MOVW	R14, 4(R0)		/* pc */
+	MOVW	$0, R0
+	RET
+
+TEXT gotolabel(SB), 1, $-4
+	MOVW	0(R0), R13		/* sp */
+	MOVW	4(R0), R14		/* pc */
+	MOVW	$1, R0
+	RET
+
+TEXT getcallerpc(SB), 1, $-4
+	MOVW	0(R13), R0
+	RET
+
+TEXT _idlehands(SB), 1, $-4
+	MOVW	CPSR, R3
+	ORR	$(PsrDirq|PsrDfiq), R3, R1	/* splhi */
+	MOVW	R1, CPSR
+
+	BARRIERS
+	MOVW	$0, R0				/* wait for interrupt */
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEintr), CpCACHEwait
+	BARRIERS
+
+	MOVW	R3, CPSR			/* splx */
+	RET
+
+TEXT barriers(SB), 1, $-4
+	BARRIERS
+	RET

+ 183 - 0
sys/src/9/kw/lexception.s

@@ -0,0 +1,183 @@
+/*
+ * arm exception handlers
+ */
+#include "mem.h"
+#include "arm.h"
+
+#undef B					/* B is for 'botch' */
+
+/*
+ *  exception vectors, copied by trapinit() to somewhere useful
+ */
+TEXT vectors(SB), 1, $-4
+	MOVW	0x18(R15), R15		/* reset */
+	MOVW	0x18(R15), R15		/* undefined instr. */
+	MOVW	0x18(R15), R15		/* SWI & SMC */
+	MOVW	0x18(R15), R15		/* prefetch abort */
+	MOVW	0x18(R15), R15		/* data abort */
+	MOVW	0x18(R15), R15		/* reserved */
+	MOVW	0x18(R15), R15		/* IRQ */
+	MOVW	0x18(R15), R15		/* FIQ */
+
+TEXT vtable(SB), 1, $-4
+	WORD	$_vsvc(SB)		/* reset, in svc mode already */
+	WORD	$_vund(SB)		/* undefined, switch to svc mode */
+	WORD	$_vsvc(SB)		/* swi, in svc mode already */
+	WORD	$_vpabt(SB)		/* prefetch abort, switch to svc mode */
+	WORD	$_vdabt(SB)		/* data abort, switch to svc mode */
+	WORD	$_vsvc(SB)		/* reserved */
+	WORD	$_virq(SB)		/* IRQ, switch to svc mode */
+	WORD	$_vfiq(SB)		/* FIQ, switch to svc mode */
+
+TEXT _vrst(SB), 1, $-4
+	BL	_reset(SB)
+
+TEXT _vsvc(SB), 1, $-4			/* SWI */
+	MOVW.W	R14, -4(R13)		/* ureg->pc = interupted PC */
+	MOVW	SPSR, R14		/* ureg->psr = SPSR */
+	MOVW.W	R14, -4(R13)		/* ... */
+	MOVW	$PsrMsvc, R14		/* ureg->type = PsrMsvc */
+	MOVW.W	R14, -4(R13)		/* ... */
+
+//	MOVM.DB.W.S [R0-R14], (R13)	/* save user level registers, at end r13 points to ureg */
+	MOVM.DB.S [R0-R14], (R13)	/* save user level registers */
+	SUB	$(15*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	$(KSEG0+16*KiB-MACHSIZE), R10	/* m */
+	MOVW	8(R10), R9		/* up */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$8, R13			/* space for argument+link */
+
+	BL	syscall(SB)
+
+	ADD	$(8+4*15), R13		/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$8, R13			/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+TEXT _vund(SB), 1, $-4			/* undefined */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMund, R0
+	B	_vswitch
+
+TEXT _vpabt(SB), 1, $-4			/* prefetch abort */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMabt, R0		/* r0 = type */
+	B	_vswitch
+
+TEXT _vdabt(SB), 1, $-4			/* data abort */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$(PsrMabt+1), R0	/* r0 = type */
+	B	_vswitch
+
+TEXT _virq(SB), 1, $-4			/* IRQ */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMirq, R0		/* r0 = type */
+	B	_vswitch
+
+	/*
+	 *  come here with type in R0 and R13 pointing above saved [r0-r4].
+	 *  we'll switch to SVC mode and then call trap.
+	 */
+_vswitch:
+	MOVW	SPSR, R1		/* save SPSR for ureg */
+	MOVW	R14, R2			/* save interrupted pc for ureg */
+	MOVW	R13, R3			/* save pointer to where the original [R0-R4] are */
+
+	/*
+	 * switch processor to svc mode.  this switches the banked registers
+	 * (r13 [sp] and r14 [link]) to those of svc mode.
+	 */
+	MOVW	CPSR, R14
+	BIC	$PsrMask, R14
+	ORR	$(PsrDirq|PsrDfiq|PsrMsvc), R14
+	MOVW	R14, CPSR		/* switch! */
+
+	AND.S	$0xf, R1, R4		/* interrupted code kernel or user? */
+	BEQ	_userexcep
+
+	/* here for trap from SVC mode */
+	MOVM.DB.W [R0-R2], (R13)	/* set ureg->{type, psr, pc}; r13 points to ureg->type  */
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+	/*
+	 * In order to get a predictable value in R13 after the stores,
+	 * separate the store-multiple from the stack-pointer adjustment.
+	 * We'll assume that the old value of R13 should be stored on the stack.
+	 */
+	/* save kernel level registers, at end r13 points to ureg */
+//	MOVM.DB.W [R0-R14], (R13)
+	MOVM.DB	[R0-R14], (R13)
+	SUB	$(15*4), R13		/* SP now points to saved R0 */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+//	BL	printr0(SB)
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+	MOVW	$0xdeaddead, R11	/* marker */
+
+	BL	trap(SB)
+
+	ADD	$(4*2+4*15), R13	/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+
+	MOVM.DB (R13), [R0-R14]		/* restore registers */
+
+	ADD	$(4*2), R13		/* pop past ureg->{type+psr} to pc */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+	/* here for trap from USER mode */
+_userexcep:
+	MOVM.DB.W [R0-R2], (R13)	/* set ureg->{type, psr, pc}; r13 points to ureg->type  */
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+//	MOVM.DB.W.S [R0-R14], (R13)	/* save kernel level registers, at end r13 points to ureg */
+	MOVM.DB.S [R0-R14], (R13)	/* save kernel level registers */
+	SUB	$(15*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	$(KSEG0+16*KiB-MACHSIZE), R10	/* m */
+	MOVW	8(R10), R9		/* up */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+
+	BL	trap(SB)
+
+	ADD	$(4*2+4*15), R13	/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$(4*2), R13		/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+TEXT _vfiq(SB), 1, $-4			/* FIQ */
+	RFE				/* FIQ is special, ignore it for now */
+
+/*
+ *  set the stack value for the mode passed in R0
+ */
+TEXT setr13(SB), 1, $-4
+	MOVW	4(FP), R1
+
+	MOVW	CPSR, R2
+	BIC	$PsrMask, R2, R3
+	ORR	R0, R3
+	MOVW	R3, CPSR
+
+	MOVW	R13, R0
+	MOVW	R1, R13
+
+	MOVW	R2, CPSR
+	RET

+ 38 - 0
sys/src/9/kw/lproc.s

@@ -0,0 +1,38 @@
+#include "mem.h"
+#include "arm.h"
+
+/*
+ *  This is the first jump from kernel to user mode.
+ *  Fake a return from interrupt.
+ *
+ *  Enter with R0 containing the user stack pointer.
+ *  UTZERO + 0x20 is always the entry point.
+ *
+ */
+TEXT touser(SB), 1, $-4
+	/* store the user stack pointer into the USR_r13 */
+	MOVM.DB.W [R0], (R13)
+	MOVM.S.IA.W (R13), [R13]
+
+	/* set up a PSR for user level */
+	MOVW	$(PsrMusr), R0
+	MOVW	R0, SPSR
+
+	/* save the PC on the stack */
+	MOVW	$(UTZERO+0x20), R0
+	MOVM.DB.W [R0], (R13)
+
+	/* return from interrupt */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+/*
+ *  here to jump to a newly forked process
+ */
+TEXT forkret(SB), 1, $-4
+	ADD	$(4*15), R13		/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$8, R13			/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */

+ 605 - 0
sys/src/9/kw/main.c

@@ -0,0 +1,605 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "init.h"
+#include "arm.h"
+#include <pool.h>
+
+#include "reboot.h"
+
+uintptr kseg0 = KZERO;
+Mach* machaddr[MAXMACH];
+
+/*
+ * Option arguments from the command line.
+ * oargv[0] is the boot file.
+ * Optionsinit() is called from multiboot()
+ * or some other machine-dependent place
+ * to set it all up.
+ */
+static int oargc;
+static char* oargv[20];
+static char oargb[128];
+static int oargblen;
+static char oenv[4096];
+
+static uintptr sp;		/* XXX - must go - user stack of init proc */
+
+int vflag;
+char debug[256];
+
+static void
+optionsinit(char* s)
+{
+	char *o;
+#ifdef USE_FLASH
+	uintptr va;
+	char *p;
+
+	va = 0xf0000000;
+	if(mmukmap(va, PHYSFLASH, 1*MiB) != 0){
+		o = oenv;
+		for(p = (char*)(va+256*KiB+4); *p != 0; p += strlen(p)+1)
+			o = strecpy(o, oenv+sizeof(oenv), p)+1;
+		mmukunmap(va, PHYSFLASH, 1*MiB);
+	}
+#else
+	strcpy(oenv, "ethaddr=00:50:43:01:c4:9e");	// TODO
+#endif
+
+	o = strecpy(oargb, oargb+sizeof(oargb), s)+1;
+	if(getenv("bootargs", o, o - oargb) != nil)
+		*(o-1) = ' ';
+
+	oargblen = strlen(oargb);
+	oargc = tokenize(oargb, oargv, nelem(oargv)-1);
+	oargv[oargc] = nil;
+}
+
+char*
+getenv(char* name, char* buf, int n)
+{
+	char *e, *p, *q;
+
+	p = oenv;
+	while(*p != 0){
+		if((e = strchr(p, '=')) == nil)
+			break;
+		for(q = name; p < e; p++){
+			if(*p != *q)
+				break;
+			q++;
+		}
+		if(p == e && *q == 0){
+			strecpy(buf, buf+n, e+1);
+			return buf;
+		}
+		p += strlen(p)+1;
+	}
+
+	return nil;
+}
+
+#include <io.h>
+
+typedef struct Spiregs Spiregs;
+struct Spiregs {
+	ulong	ictl;		/* interface ctl */
+	ulong	icfg;		/* interface config */
+	ulong	out;		/* data out */
+	ulong	in;		/* data in */
+	ulong	ic;		/* interrupt cause */
+	ulong	im;		/* interrupt mask */
+	ulong	_pad[2];
+	ulong	dwrcfg;		/* direct write config */
+	ulong	dwrhdr;		/* direct write header */
+};
+
+enum {
+	/* ictl bits */
+	Csnact	= 1<<0,		/* serial memory activated */
+
+	/* icfg bits */
+	Bytelen	= 1<<5,		/* 2^(this_bit) bytes per transfer */
+	Dirrdcmd= 1<<10,	/* flag: fast read */
+};
+
+static void
+dumpbytes(uchar *bp, long max)
+{
+	iprint("%#p: ", bp);
+	for (; max > 0; max--)
+		iprint("%02.2ux ", *bp++);
+	iprint("...\n");
+}
+
+vlong	probeaddr(uintptr);
+
+// linux sez environment is in nand, 128K at offset 0x40000
+static void
+spiprobe(void)
+{
+	Spiregs *rp = (Spiregs *)AddrSpi;
+
+	l2cacheon();
+
+	rp->ictl |= Csnact;
+	coherence();
+	rp->icfg |= Dirrdcmd | 3<<8;	/* fast reads, 4-byte addresses */
+	rp->icfg &= ~Bytelen;		/* one-byte reads */
+	coherence();
+
+	print("spi flash at %#ux: memory reads enabled\n", PHYSSPIFLASH);
+#ifdef AMBITIOUS
+	uchar *p, *ep, *np;
+
+	p = (uchar *)PHYSSPIFLASH;
+	ep = p + FLASHSIZE - 64;
+iprint("scan: ");
+	for (; p < ep - 1; p++) {
+iprint("%#p of %,ld bytes...", p, ep - p);
+		np = memchr(p, 'e', ep - p);
+		if (np == nil)
+			break;
+		p = np;
+		if (*p == 'e' && memcmp(p, "ethaddr", 7) == 0)
+			break;
+	}
+	dumpbytes(p, 64);
+#endif
+}
+
+void	archconsole(void);
+
+/*
+ * this low-level printing stuff is ugly,
+ * but there appears to be no other way to
+ * print until after #t is populated.
+ */
+
+#define wave(c) { \
+	coherence(); \
+	while ((*(ulong *)(PHYSCONS+4*5) & (1<<5)) == 0) /* (x->lsr&LSRthre)==0? */ \
+		; \
+	*(ulong *)PHYSCONS = (c); \
+	coherence(); \
+}
+
+/*
+ * entered from l.s with mmu enabled.
+ *
+ * we may have to realign the data segment; apparently 5l -H0 -R4096
+ * does not pad the text segment.  on the other hand, we may have been
+ * loaded by another kernel.
+ *
+ * be careful not to touch the data segment until we know it's aligned.
+ */
+void
+main(Mach* mach)
+{
+	extern char bdata[], edata[], end[], etext[];
+	static ulong vfy = 0xcafebabe;
+
+	m = mach;
+	if (vfy != 0xcafebabe)
+		memmove(bdata, etext, edata - bdata);
+	if (vfy != 0xcafebabe) {
+		wave('?');
+		panic("misaligned data segment");
+	}
+	memset(edata, 0, end - edata);		/* zero bss */
+	vfy = 0;
+
+wave('9');
+	machinit();
+	archreset();
+	mmuinit();
+
+	optionsinit("/boot/boot boot");
+	quotefmtinstall();
+	archconsole();
+wave('\n');
+
+	confinit();
+	xinit();
+wave('\r');
+
+	/*
+	 * Printinit will cause the first malloc call.
+	 * (printinit->qopen->malloc) unless any of the
+	 * above (like clockintr) do an irqenable, which
+	 * will call malloc.
+	 * If the system dies here it's probably due
+	 * to malloc(->xalloc) not being initialised
+	 * correctly, or the data segment is misaligned
+	 * (it's amazing how far you can get with
+	 * things like that completely broken).
+	 *
+	 * (Should be) boilerplate from here on.
+	 */
+	trapinit();
+	clockinit();
+
+	printinit();
+	/* only now can we print */
+	uartkirkwoodconsole();
+	archconfinit();
+	cpuidprint();
+	timersinit();
+
+	procinit0();
+	initseg();
+	links();
+	chandevreset();
+
+	spiprobe();
+
+	pageinit();
+	swapinit();
+	userinit();
+	schedinit();
+}
+
+void
+cpuidprint(void)
+{
+	char name[64];
+
+	cputype2name(name, sizeof name);
+	print("cpu%d: %lldMHz ARM %s\n", m->machno, m->cpuhz/1000000, name);
+}
+
+void
+machinit(void)
+{
+	memset(m, 0, sizeof(Mach));
+	m->machno = 0;
+	machaddr[m->machno] = m;
+
+	m->ticks = 1;
+	m->perf.period = 1;
+
+	conf.nmach = 1;
+
+	active.machs = 1;
+	active.exiting = 0;
+
+	up = nil;
+}
+
+static void
+shutdown(int ispanic)
+{
+	int ms, once;
+
+	lock(&active);
+	if(ispanic)
+		active.ispanic = ispanic;
+	else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
+		active.ispanic = 0;
+	once = active.machs & (1<<m->machno);
+	active.machs &= ~(1<<m->machno);
+	active.exiting = 1;
+	unlock(&active);
+
+	if(once)
+		iprint("cpu%d: exiting\n", m->machno);
+	spllo();
+	for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){
+		delay(TK2MS(2));
+		if(active.machs == 0 && consactive() == 0)
+			break;
+	}
+	delay(1000);
+}
+
+/*
+ *  exit kernel either on a panic or user request
+ */
+void
+exit(int code)
+{
+	shutdown(code);
+	splhi();
+	archreboot();
+}
+
+/*
+ * the new kernel is already loaded at address `code'
+ * of size `size' and entry point `entry'.
+ */
+void
+reboot(void *entry, void *code, ulong size)
+{
+	void (*f)(ulong, ulong, ulong);
+
+	iprint("starting reboot...");
+//	writeconf();
+	shutdown(0);
+
+	/*
+	 * should be the only processor running now
+	 */
+
+	print("shutting down...\n");
+	delay(200);
+
+	splhi();
+
+	/* turn off buffered serial console */
+	serialoq = nil;
+
+	/* shutdown devices */
+	devtabshutdown();
+
+	/* setup reboot trampoline function */
+	f = (void*)REBOOTADDR;
+	memmove(f, rebootcode, sizeof(rebootcode));
+	coherence();
+	dcflushall();
+	icflushall();
+
+	print("rebooting...");
+	iprint("entry %#lux code %#lux size %ld\n",
+		PADDR(entry), PADDR(code), size);
+	delay(100);		/* wait for uart to quiesce */
+
+	/* off we go - never to return */
+	coherence();
+	dcflushall();
+	icflushall();
+	(*f)(PADDR(entry), PADDR(code), size);
+
+	iprint("loaded kernel returned!\n");
+	delay(500);
+	archreboot();
+}
+
+/*
+ *  starting place for first process
+ */
+void
+init0(void)
+{
+	char buf[2*KNAMELEN];
+
+	assert(up != nil);
+	up->nerrlab = 0;
+	coherence();
+	spllo();
+
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	up->slash = namec("#/", Atodir, 0, 0);
+	pathclose(up->slash->path);
+	up->slash->path = newpath("/");
+	up->dot = cclone(up->slash);
+
+	devtabinit();
+
+	if(!waserror()){
+		snprint(buf, sizeof(buf), "%s %s", "ARM", conffile);
+		ksetenv("terminal", buf, 0);
+		ksetenv("cputype", "arm", 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+
+		/* sheevaplug configuration */
+		ksetenv("nvram", "/boot/nvram", 0);
+		ksetenv("nvroff", "0", 0);
+		ksetenv("nvrlen", "512", 0);
+		ksetenv("nobootprompt", "tcp", 0);
+
+		poperror();
+	}
+	kproc("alarm", alarmkproc, 0);
+
+	touser(sp);
+}
+
+static void
+bootargs(uintptr base)
+{
+	int i;
+	ulong ssize;
+	char **av, *p;
+
+	/*
+	 * Push the boot args onto the stack.
+	 * The initial value of the user stack must be such
+	 * that the total used is larger than the maximum size
+	 * of the argument list checked in syscall.
+	 */
+	i = oargblen+1;
+	p = UINT2PTR(STACKALIGN(base + PGSIZE - sizeof(up->s.args) - i));
+	memmove(p, oargb, i);
+
+	/*
+	 * Now push argc and the argv pointers.
+	 * This isn't strictly correct as the code jumped to by
+	 * touser in init9.s calls startboot (port/initcode.c) which
+	 * expects arguments
+	 * 	startboot(char *argv0, char **argv)
+	 * not the usual (int argc, char* argv[]), but argv0 is
+	 * unused so it doesn't matter (at the moment...).
+	 */
+	av = (char**)(p - (oargc+2)*sizeof(char*));
+	ssize = base + PGSIZE - PTR2UINT(av);
+	*av++ = (char*)oargc;
+	for(i = 0; i < oargc; i++)
+		*av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - BY2PG);
+	*av = nil;
+
+	/*
+	 * Leave space for the return PC of the
+	 * caller of initcode.
+	 */
+	sp = USTKTOP - ssize - sizeof(void*);
+}
+
+/*
+ *  create the first process
+ */
+void
+userinit(void)
+{
+	Proc *p;
+	Segment *s;
+	KMap *k;
+	Page *pg;
+
+	/* no processes yet */
+	up = nil;
+
+	p = newproc();
+	p->pgrp = newpgrp();
+	p->egrp = smalloc(sizeof(Egrp));
+	p->egrp->ref = 1;
+	p->fgrp = dupfgrp(nil);
+	p->rgrp = newrgrp();
+	p->procmode = 0640;
+
+	kstrdup(&eve, "");
+	kstrdup(&p->text, "*init*");
+	kstrdup(&p->user, eve);
+
+	/*
+	 * Kernel Stack
+	 */
+	p->sched.pc = PTR2UINT(init0);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->s.args)-sizeof(uintptr));
+	p->sched.sp = STACKALIGN(p->sched.sp);
+
+	/*
+	 * User Stack
+	 *
+	 * Technically, newpage can't be called here because it
+	 * should only be called when in a user context as it may
+	 * try to sleep if there are no pages available, but that
+	 * shouldn't be the case here.
+	 */
+	s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+	p->seg[SSEG] = s;
+	pg = newpage(1, 0, USTKTOP-BY2PG);
+	segpage(s, pg);
+	k = kmap(pg);
+	bootargs(VA(k));
+	kunmap(k);
+
+	/*
+	 * Text
+	 */
+	s = newseg(SG_TEXT, UTZERO, 1);
+	s->flushme++;
+	p->seg[TSEG] = s;
+	pg = newpage(1, 0, UTZERO);
+	memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
+	segpage(s, pg);
+	k = kmap(s->map[0]->pages[0]);
+	memmove(UINT2PTR(VA(k)), initcode, sizeof initcode);
+	kunmap(k);
+
+	ready(p);
+}
+
+Conf conf;			/* XXX - must go - gag */
+
+Confmem sheevamem[] = {
+	/*
+	 * Memory available to Plan 9:
+	 */
+	{ .base = 0x00000000, .limit = 512*1024*1024, },
+};
+
+void
+confinit(void)
+{
+	int i;
+	ulong kpages;
+	uintptr pa;
+
+	/*
+	 * Copy the physical memory configuration to Conf.mem.
+	 * The physical memory configuration will be used later
+	 * to check against what the the Pico Array wants.
+	 */
+	if(nelem(sheevamem) > nelem(conf.mem)){
+		iprint("memory configuration botch\n");
+		exit(1);
+	}
+	memmove(conf.mem, sheevamem, sizeof(sheevamem));
+
+	conf.npage = 0;
+	pa = PADDR(PGROUND(PTR2UINT(end)));
+
+	/*
+	 *  we assume that the kernel is at the beginning of one of the
+	 *  contiguous chunks of memory and fits therein.
+	 */
+	for(i=0; i<nelem(conf.mem); i++){
+		/* take kernel out of allocatable space */
+		if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+			conf.mem[i].base = pa;
+
+		conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+		conf.npage += conf.mem[i].npage;
+	}
+
+	conf.upages = (conf.npage*90)/100;
+	conf.ialloc = ((conf.npage-conf.upages)/2)*BY2PG;
+
+	/* only one processor */
+	conf.nmach = 1;
+
+	/* set up other configuration parameters */
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+	if(cpuserver)
+		conf.nproc *= 3;
+	if(conf.nproc > 2000)
+		conf.nproc = 2000;
+	conf.nswap = conf.npage*3;
+	conf.nswppo = 4096;
+	conf.nimage = 200;
+
+	conf.copymode = 0;		/* copy on write */
+
+	/*
+	 * Guess how much is taken by the large permanent
+	 * datastructures. Mntcache and Mntrpc are not accounted for
+	 * (probably ~300KB).
+	 */
+	kpages = conf.npage - conf.upages;
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page);
+	mainmem->maxsize = kpages;
+	if(!cpuserver)
+		/*
+		 * give terminals lots of image memory, too; the dynamic
+		 * allocation will balance the load properly, hopefully.
+		 * be careful with 32-bit overflow.
+		 */
+		imagmem->maxsize = kpages;
+}
+
+char*
+getconf(char *)
+{
+	return nil;
+}
+
+int
+cmpswap(long *addr, long old, long new)
+{
+	return cas32(addr, old, new);
+}

+ 137 - 0
sys/src/9/kw/mem.h

@@ -0,0 +1,137 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 000000000040000000 */
+
+#define HOWMANY(x, y)	(((x)+((y)-1))/(y))
+#define ROUNDUP(x, y)	(HOWMANY((x), (y))*(y))	/* ceiling */
+#define ROUNDDN(x, y)	(((x)/(y))*(y))		/* floor */
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
+
+/*
+ * Not sure where these macros should go.
+ * This probably isn't right but will do for now.
+ * The macro names are problematic too.
+ */
+/*
+ * In B(o), 'o' is the bit offset in the register.
+ * For multi-bit fields use F(v, o, w) where 'v' is the value
+ * of the bit-field of width 'w' with LSb at bit offset 'o'.
+ */
+#define B(o)		(1<<(o))
+#define F(v, o, w)	(((v) & ((1<<(w))-1))<<(o))
+
+#define FCLR(d, o, w)	((d) & ~(((1<<(w))-1)<<(o)))
+#define FEXT(d, o, w)	(((d)>>(o)) & ((1<<(w))-1))
+#define FINS(d, o, w, v) (FCLR((d), (o), (w))|F((v), (o), (w)))
+#define FSET(d, o, w)	((d)|(((1<<(w))-1)<<(o)))
+
+#define FMASK(o, w)	(((1<<(w))-1)<<(o))
+
+/*
+ * Sizes
+ */
+#define	PGSIZE		(4*KiB)			/* bytes per page */
+#define	PGSHIFT		12			/* log(PGZIZE) */
+#define	PGROUND(s)	ROUNDUP(s, PGSIZE)
+#define	ROUND(s, sz)	(((s)+(sz-1))&~(sz-1))
+
+#define	MAXMACH		1			/* max # cpus system can run */
+#define	MACHSIZE	(PGSIZE)
+
+#define KSTKSIZE	(8*KiB)
+#define STACKALIGN(sp)	((sp) & ~3)		/* bug: assure with alloc */
+
+/*
+ * Address spaces.
+ * KTZERO is used by kprof and dumpstack (if any).
+ *
+ * KZERO is mapped to physical 0.
+ * u-boot claims to take 0 - 8MB.
+ *
+ * This should leave 12K from KZERO to L1-MACHSIZE.
+ * cpu0's Mach struct is at L1 - MACHSIZE(4K) to L1 (12K to 16K above KZERO).
+ * PTEs are stored from L1 to L1+32K (16K to 48K above KZERO).
+ * KTZERO may be anywhere after KZERO + 48K.
+ * vectors are at 0.
+ */
+
+#define	KSEG0		0x60000000		/* kernel segment */
+#define	KSEGM		0xE0000000		/* mask to check segment */
+#define	KZERO		KSEG0			/* kernel address space */
+#define L1		(KZERO+16*KiB)		/* tt ptes: 16KiB aligned */
+// #define KTZERO	(KZERO+0x8000)		/* kernel text start */
+#define	KTZERO		(KZERO+0x800000)	/* kernel text start */
+
+#define	UZERO		0			/* user segment */
+#define	UTZERO		(UZERO+BY2PG)		/* user text start */
+#define	USTKTOP		KZERO			/* user segment end +1 */
+#define	USTKSIZE	(8*1024*1024)		/* user stack size */
+#define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+#define	TSTKSIZ	 	256
+
+#define	REBOOTADDR	KADDR(0x100)	/* reboot code - physical address */
+/*
+ * Time.
+ * Does this need to be here? Used in assembler?
+ */
+#define	HZ		100			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+/*
+ * More accurate time
+ */
+#define CLOCKFREQ	(200*1000*1000)		/* TCLK on sheeva: 200MHz */
+//#define MS2TMR(t)	((ulong)(((uvlong)(t)*CLOCKFREQ)/1000))
+//#define US2TMR(t)	((ulong)(((uvlong)(t)*CLOCKFREQ)/1000000))
+
+/*
+ * Legacy...
+ */
+#define BLOCKALIGN	32			/* only used in allocb.c */
+#define KSTACK		KSTKSIZE
+
+/*
+ * Sizes
+ */
+#define BI2BY		8			/* bits per byte */
+#define BY2PG		PGSIZE
+#define BY2SE		4
+#define BY2WD		4
+#define BY2V		8			/* only used in xalloc.c */
+
+#define	PTEMAPMEM	(1024*1024)
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	1984
+#define	SSEGMAPSIZE	16
+#define	PPN(x)		((x)&~(BY2PG-1))
+
+/*
+ * With a little work these move to port.
+ */
+#define	PTEVALID	(1<<0)
+#define	PTERONLY	0
+#define	PTEWRITE	(1<<1)
+#define	PTEUNCACHED	(1<<2)
+#define PTEKERNEL	(1<<3)
+
+/*
+ * Physical machine information from here on.
+ */
+#define PHYSDRAM	0
+#define PHYSNAND	0xd8000000
+#define FLASHSIZE	(128*MiB)
+#define PHYSSPIFLASH	0xe8000000
+// #define PHYSENV		(PHYSFLASH+256*KiB)
+// #define ENVSIZE		(64*KiB)
+#define PHYSBOOTROM	0xffff0000		/* boot rom */
+#define PHYSIO		0xf1000000		/* Regbase in io.h */
+#define PHYSCONS	0xf1012000		/* uart */
+
+//#define PHYSNAND	0xf9000000
+
+#define VIRTIO		PHYSIO

+ 132 - 0
sys/src/9/kw/mkfile

@@ -0,0 +1,132 @@
+CONF=plug
+CONFLIST=plug
+
+# allegedly u-boot uses the bottom 8MB (up to 0x800000)
+# so avoid that
+loadaddr=0x60800000
+
+objtype=arm
+</$objtype/mkfile
+p=9
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	dev.$O\
+	devtab.$O\
+	edf.$O\
+	fault.$O\
+	latin1.$O\
+	mul64fract.$O\
+	rebootcmd.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	segment.$O\
+	swap.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+
+OBJ=\
+	l.$O\
+	lexception.$O\
+	lproc.$O\
+	arch.$O\
+	clock.$O\
+	fpi.$O\
+	fpiarm.$O\
+	fpimem.$O\
+	main.$O\
+	mmu.$O\
+	random.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+LIB=\
+	/$objtype/lib/libmemlayer.a\
+	/$objtype/lib/libmemdraw.a\
+	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libc.a\
+	/$objtype/lib/libsec.a\
+
+9:V: $p$CONF s$p$CONF
+
+$p$CONF:DQ:	$CONF.c $OBJ $LIB mkfile
+	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+	echo '# linking raw kernel'
+	$LD -o $target -H0 -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+
+s$p$CONF:DQ:	$CONF.$O $OBJ $LIB
+	echo '# linking kernel with symbols'
+	$LD -o $target -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+	size $target
+
+$p$CONF.gz:D:	$p$CONF
+	gzip -9 <$p$CONF >$target
+
+$OBJ: $HFILES
+
+install:V: /$objtype/$p$CONF
+
+/$objtype/$p$CONF:D: $p$CONF s$p$CONF
+	{ cp -x $p$CONF s$p$CONF /$objtype } &
+	{ 9fs lookout &&  cp -x $p$CONF s$p$CONF /n/lookout/$objtype } &
+#	{ 9fs piestand && cp -x $p$CONF s$p$CONF /n/piestand/$objtype } &
+	wait
+	touch $target
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+# CFLAGS= -I. -I../port $CFLAGS	# hack to compile private sysproc.c (e.g.)
+
+clock.$O:	/$objtype/include/ureg.h
+devether.$O:	/$objtype/include/ureg.h
+fpiarm.$O:	/$objtype/include/ureg.h
+main.$O:	/$objtype/include/ureg.h errstr.h init.h reboot.h
+mmu.$O:		/$objtype/include/ureg.h
+trap$O:		/$objtype/include/ureg.h
+
+devether.$0:	etherif.h ../port/netif.h
+etherkw.$0:	etherif.h ../port/netif.h
+
+l.$O lexception.$O lproc.$O: arm.s arm.h mem.h
+
+init.h:D:	../port/initcode.c init9.s
+	$CC ../port/initcode.c
+	$AS init9.s
+	$LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
+	{echo 'uchar initcode[]={'
+	 xd -1x <init.out |
+		sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > init.h
+
+reboot.h:D:	rebootcode.s arm.s arm.h mem.h
+	$AS rebootcode.s
+	# -lc is only for memmove.  -T arg is PADDR(REBOOTADDR)
+	$LD -l -a -s -T0x100 -R4 -o reboot.out rebootcode.$O -lc >reboot.list
+	{echo 'uchar rebootcode[]={'
+	 xd -1x reboot.out |
+		sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > reboot.h
+errstr.h:D:	../port/mkerrstr ../port/error.h
+	rc ../port/mkerrstr > errstr.h

+ 465 - 0
sys/src/9/kw/mmu.c

@@ -0,0 +1,465 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "arm.h"
+
+#define L1X(va)		FEXT((va), 20, 12)
+#define L2X(va)		FEXT((va), 12, 8)
+
+enum {
+	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
+	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+};
+
+#define ISHOLE(pte)	((pte) == 0)
+
+/* dump level 1 page table at virtual addr l1 */
+void
+mmudump(PTE *l1)
+{
+	int i, type, rngtype;
+	uintptr pa, startva, startpa;
+	uvlong va, endva;
+	PTE pte;
+
+	print("\n");
+	endva = startva = startpa = 0;
+	rngtype = 0;
+	/* dump first level of ptes */
+	for (va = i = 0; i < 4096; i++) {
+		pte = l1[i];
+		pa = pte & ~(MB - 1);
+		type = pte & (Fine|Section|Coarse);
+		if (ISHOLE(pte)) {
+			if (endva != 0) {	/* open range? close it */
+				print("l1 maps va (%#lux-%#llux) -> pa %#lux type %#ux\n",
+					startva, endva-1, startpa, rngtype);
+				endva = 0;
+			}
+		} else {
+			if (endva == 0) {	/* no open range? start one */
+				startva = va;
+				startpa = pa;
+				rngtype = type;
+			}
+			endva = va + MB;	/* continue the open range */
+		}
+		va += MB;
+	}
+	if (endva != 0)			/* close an open range */
+		print("l1 maps va (%#lux-%#llux) -> pa %#lux type %#ux\n",
+			startva, endva-1, startpa, rngtype);
+}
+
+void
+mmuinit(void)
+{
+	PTE *l1, *l2;
+	uintptr pa, fpa;
+
+	pa = ttbget();
+	l1 = KADDR(pa);
+
+	/* identity-map i/o registers */
+	l1[L1X(VIRTIO)] = PHYSIO|Dom0|L1AP(Krw)|Section;
+
+	/* identity-map nand flash */
+	for (fpa = PHYSNAND; fpa < PHYSNAND + FLASHSIZE; fpa += MiB)
+		l1[L1X(fpa)] = fpa|Dom0|L1AP(Krw)|Section;
+	for (fpa = 0xf9000000; fpa < 0xf9000000 + 8*MB; fpa += MiB)
+		l1[L1X(fpa)] = fpa|Dom0|L1AP(Krw)|Section|Cached|Buffered;
+
+	/* identity-map spi flash */
+	for (fpa = PHYSSPIFLASH; fpa < PHYSSPIFLASH + FLASHSIZE; fpa += MiB)
+		l1[L1X(fpa)] = fpa|Dom0|L1AP(Krw)|Section|Cached|Buffered;
+	for (fpa = 0xf8000000; fpa < 0xf8000000 + 16*MB; fpa += MiB)
+		l1[L1X(fpa)] = fpa|Dom0|L1AP(Krw)|Section|Cached|Buffered;
+
+	/* map high vectors to start of dram, but only 4K, not 1MB */
+	pa -= MACHSIZE+2*1024;
+	l2 = KADDR(pa);
+	memset(l2, 0, 1024);
+	/* vectors step on u-boot, but so do page tables */
+	l2[L2X(HVECTORS)] = PHYSDRAM|L2AP(Krw)|Small;
+	l1[L1X(HVECTORS)] = pa|Dom0|Coarse;	/* vectors -> ttb-machsize-2k */
+
+	mmuinvalidate();
+	cacheuwbinv();
+
+	m->mmul1 = l1;
+//	mmudump(l1);			/* DEBUG */
+}
+
+static void
+mmul2empty(Proc* proc, int clear)
+{
+	PTE *l1;
+	Page **l2, *page;
+
+	l1 = m->mmul1;
+	l2 = &proc->mmul2;
+	for(page = *l2; page != nil; page = page->next){
+		if(clear)
+			memset(UINT2PTR(page->va), 0, BY2PG);
+		l1[page->daddr] = Fault;
+		l2 = &page->next;
+	}
+	*l2 = proc->mmul2cache;
+	proc->mmul2cache = proc->mmul2;
+	proc->mmul2 = nil;
+}
+
+static void
+mmul1empty(void)
+{
+#ifdef notdef
+there's a bug in here
+	PTE *l1;
+
+	/* clean out any user mappings still in l1 */
+	if(m->mmul1lo > L1lo){
+		if(m->mmul1lo == 1)
+			m->mmul1[L1lo] = Fault;
+		else
+			memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE));
+		m->mmul1lo = L1lo;
+	}
+	if(m->mmul1hi < L1hi){
+		l1 = &m->mmul1[m->mmul1hi];
+		if((L1hi - m->mmul1hi) == 1)
+			*l1 = Fault;
+		else
+			memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE));
+		m->mmul1hi = L1hi;
+	}
+#else
+	memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE));
+#endif /* notdef */
+}
+
+void
+mmuswitch(Proc* proc)
+{
+	int x;
+	PTE *l1;
+	Page *page;
+
+	/* do kprocs get here and if so, do they need to? */
+	if(m->mmupid == proc->pid && !proc->newtlb)
+		return;
+	m->mmupid = proc->pid;
+
+	/* write back dirty and invalidate caches */
+	cacheuwbinv();
+
+	if(proc->newtlb){
+		mmul2empty(proc, 1);
+		proc->newtlb = 0;
+	}
+
+	mmul1empty();
+
+	/* move in new map */
+	l1 = m->mmul1;
+	for(page = proc->mmul2; page != nil; page = page->next){
+		x = page->daddr;
+		l1[x] = PPN(page->pa)|Dom0|Coarse;
+		/* know here that L1lo < x < L1hi */
+		if(x+1 - m->mmul1lo < m->mmul1hi - x)
+			m->mmul1lo = x+1;
+		else
+			m->mmul1hi = x;
+	}
+
+	/* make sure map is in memory */
+	/* could be smarter about how much? */
+	cachedwbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+	/* lose any possible stale tlb entries */
+	mmuinvalidate();
+
+//	mmudump(l1);
+	//print("mmuswitch l1lo %d l1hi %d %d\n",
+	//	m->mmul1lo, m->mmul1hi, proc->kp);
+}
+
+void
+flushmmu(void)
+{
+	int s;
+
+	s = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(s);
+}
+
+void
+mmurelease(Proc* proc)
+{
+	Page *page, *next;
+
+	/* write back dirty and invalidate caches */
+	cacheuwbinv();
+
+	mmul2empty(proc, 0);
+	for(page = proc->mmul2cache; page != nil; page = next){
+		next = page->next;
+		if(--page->ref)
+			panic("mmurelease: page->ref %d", page->ref);
+		pagechainhead(page);
+	}
+	if(proc->mmul2cache && palloc.r.p)
+		wakeup(&palloc.r);
+	proc->mmul2cache = nil;
+
+	mmul1empty();
+
+	/* make sure map is in memory */
+	/* could be smarter about how much? */
+	cachedwbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+	/* lose any possible stale tlb entries */
+	mmuinvalidate();
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page* page)
+{
+	int x;
+	Page *pg;
+	PTE *l1, *pte;
+
+	x = L1X(va);
+	l1 = &m->mmul1[x];
+	//print("putmmu(%#p, %#p, %#p) ", va, pa, page->pa);
+	//print("mmul1 %#p l1 %#p *l1 %#ux x %d pid %d\n",
+	//	m->mmul1, l1, *l1, x, up->pid);
+	if(*l1 == Fault){
+		/* wasteful - l2 pages only have 256 entries - fix */
+		if(up->mmul2cache == nil){
+			/* auxpg since we don't need much? memset if so */
+			pg = newpage(1, 0, 0);
+			pg->va = VA(kmap(pg));
+		}
+		else{
+			pg = up->mmul2cache;
+			up->mmul2cache = pg->next;
+			memset(UINT2PTR(pg->va), 0, BY2PG);
+		}
+		pg->daddr = x;
+		pg->next = up->mmul2;
+		up->mmul2 = pg;
+
+		*l1 = PPN(pg->pa)|Dom0|Coarse;
+		//print("l1 %#p *l1 %#ux x %d pid %d\n", l1, *l1, x, up->pid);
+
+		if(x >= m->mmul1lo && x < m->mmul1hi){
+			if(x+1 - m->mmul1lo < m->mmul1hi - x)
+				m->mmul1lo = x+1;
+			else
+				m->mmul1hi = x;
+		}
+	}
+	pte = UINT2PTR(KADDR(PPN(*l1)));
+	//print("pte %#p index %ld %#ux\n", pte, L2X(va), *(pte+L2X(va)));
+
+	/* protection bits are
+	 *	PTERONLY|PTEVALID;
+	 *	PTEWRITE|PTEVALID;
+	 *	PTEWRITE|PTEUNCACHED|PTEVALID;
+	 */
+	x = Small;
+	if(!(pa & PTEUNCACHED))
+		x |= Cached|Buffered;
+	if(pa & PTEWRITE)
+		x |= L2AP(Urw);
+	else
+		x |= L2AP(Uro);
+	pte[L2X(va)] = PPN(pa)|x;
+
+	/* clear out the current entry */
+	mmuinvalidateaddr(PPN(va));
+
+	/*  write back dirty entries - we need this because the pio() in
+	 *  fault.c is writing via a different virt addr and won't clean
+	 *  its changes out of the dcache.  Page coloring doesn't work
+	 *  on this mmu because the virtual cache is set associative
+	 *  rather than direct mapped.
+	 */
+	cachedwbinv();
+	if(page->cachectl[0] == PG_TXTFLUSH){
+		/* pio() sets PG_TXTFLUSH whenever a text pg has been written */
+		cacheiinv();
+		page->cachectl[0] = PG_NOFLUSH;
+	}
+	//print("putmmu %#p %#p %#p\n", va, pa, PPN(pa)|x);
+}
+
+void*
+mmuuncache(void* v, usize size)
+{
+	int x;
+	PTE *pte;
+	uintptr va;
+
+	/*
+	 * Simple helper for ucalloc().
+	 * Uncache a Section, must already be
+	 * valid in the MMU.
+	 */
+	va = PTR2UINT(v);
+	assert(!(va & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if((*pte & (Fine|Section|Coarse)) != Section)
+		return nil;
+	*pte &= ~(Cached|Buffered);
+	mmuinvalidateaddr(va);
+	cachedwbinvse(pte, 4);
+
+	return v;
+}
+
+uintptr
+mmukmap(uintptr va, uintptr pa, usize size)
+{
+	int x;
+	PTE *pte;
+
+	/*
+	 * Stub.
+	 */
+	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if(*pte != Fault)
+		return 0;
+	*pte = pa|Dom0|L1AP(Krw)|Section;
+	mmuinvalidateaddr(va);
+	cachedwbinvse(pte, 4);
+
+	return va;
+}
+
+uintptr
+mmukunmap(uintptr va, uintptr pa, usize size)
+{
+	int x;
+	PTE *pte;
+
+	/*
+	 * Stub.
+	 */
+	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if(*pte != (pa|Dom0|L1AP(Krw)|Section))
+		return 0;
+	*pte = Fault;
+	mmuinvalidateaddr(va);
+	cachedwbinvse(pte, 4);
+
+	return va;
+}
+
+/*
+ * Return the number of bytes that can be accessed via KADDR(pa).
+ * If pa is not a valid argument to KADDR, return 0.
+ */
+uintptr
+cankaddr(uintptr pa)
+{
+	int i;
+	uintptr bank;
+
+	bank = PHYSDRAM;
+	for(i = 0; i < 1; i++){
+		if(pa >= bank && pa < bank+512*MiB)
+			return bank+512*MiB - pa;
+		bank += 512*MiB;
+	}
+
+	return 0;
+}
+
+/* from 386 */
+void*
+vmap(uintptr pa, usize size)
+{
+	uintptr pae, va;
+	usize o, osize;
+
+	/*
+	 * XXX - replace with new vm stuff.
+	 * Crock after crock - the first 4MB is mapped with 2MB pages
+	 * so catch that and return good values because the current mmukmap
+	 * will fail.
+	 */
+	if(pa+size < 4*MiB)
+		return UINT2PTR(kseg0|pa);
+
+	osize = size;
+	o = pa & (BY2PG-1);
+	pa -= o;
+	size += o;
+	size = ROUNDUP(size, PGSIZE);
+
+	va = kseg0|pa;
+	pae = mmukmap(va, pa, size);
+	if(pae == 0 || pae-size != pa)
+		panic("vmap(%#p, %ld) called from %#p: mmukmap fails %#p",
+			pa+o, osize, getcallerpc(&pa), pae);
+
+	return UINT2PTR(va+o);
+}
+
+/* from 386 */
+void
+vunmap(void* v, usize size)
+{
+	/*
+	 * XXX - replace with new vm stuff.
+	 * Can't do this until do real vmap for all space that
+	 * might be used, e.g. stuff below 1MB which is currently
+	 * mapped automagically at boot but that isn't used (or
+	 * at least shouldn't be used) by the kernel.
+	upafree(PADDR(v), size);
+	 */
+	USED(v, size);
+}
+
+/*
+ * Notes.
+ * Everything is in domain 0;
+ * domain 0 access bits in the DAC register are set
+ * to Client, which means access is controlled by the
+ * permission values set in the PTE.
+ *
+ * L1 access control for the kernel is set to 1 (RW,
+ * no user mode access);
+ * L2 access control for the kernel is set to 1 (ditto)
+ * for all 4 AP sets;
+ * L1 user mode access is never set;
+ * L2 access control for user mode is set to either
+ * 2 (RO) or 3 (RW) depending on whether text or data,
+ * for all 4 AP sets.
+ * (To get kernel RO set AP to 0 and S bit in control
+ * register c1).
+ * Coarse L1 page-tables are used. They have 256 entries
+ * and so consume 1024 bytes per table.
+ * Small L2 page-tables are used. They have 1024 entries
+ * and so consume 4096 bytes per table.
+ *
+ * 4KiB. That's the size of 1) a page, 2) the
+ * size allocated for an L2 page-table page (note only 1KiB
+ * is needed per L2 page - to be dealt with later) and
+ * 3) the size of the area in L1 needed to hold the PTEs
+ * to map 1GiB of user space (0 -> 0x3fffffff, 1024 entries).
+ */

BIN
sys/src/9/kw/nvram


+ 58 - 0
sys/src/9/kw/plug

@@ -0,0 +1,58 @@
+# marvell sheeva plug
+dev
+	root
+	cons
+	env
+	pipe
+	proc
+	mnt
+	srv
+	dup
+	arch
+	ssl
+	tls
+	cap
+	kprof
+#	flash
+#	pci		pci
+
+	ether		netif
+	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
+
+	uart
+#	usb
+
+link
+	etherkw		ethermii
+	archkw
+	ethermedium
+	loopbackmedium
+	netdevmedium
+#	usbehci
+ip
+	tcp
+	udp
+	ipifc
+	icmp
+	icmp6
+	ipmux
+
+misc
+	rdb
+	softfpu
+	syscall
+	uartkw
+
+boot cpu
+	tcp
+
+bootdir
+	bootplug.out boot
+	/arm/bin/ip/ipconfig ipconfig
+	/arm/bin/auth/factotum factotum
+#	/arm/bin/usb/usbd
+	nvram
+
+port
+	int cpuserver = 1;
+	int i8250freq = 3686000;

+ 138 - 0
sys/src/9/kw/random.c

@@ -0,0 +1,138 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+
+struct Rb
+{
+	QLock;
+	Rendez	producer;
+	Rendez	consumer;
+	ulong	randomcount;
+	uchar	buf[128];
+	uchar	*ep;
+	uchar	*rp;
+	uchar	*wp;
+	uchar	next;
+	uchar	wakeme;
+	ushort	bits;
+	ulong	randn;
+} rb;
+
+static int
+rbnotfull(void*)
+{
+	int i;
+
+	i = rb.rp - rb.wp;
+	return i != 1 && i != (1 - sizeof(rb.buf));
+}
+
+static int
+rbnotempty(void*)
+{
+	return rb.wp != rb.rp;
+}
+
+static void
+genrandom(void*)
+{
+	up->basepri = PriNormal;
+	up->priority = up->basepri;
+
+	for(;;){
+		for(;;)
+			if(++rb.randomcount > 100000)
+				break;
+		if(anyhigher())
+			sched();
+		if(!rbnotfull(0))
+			sleep(&rb.producer, rbnotfull, 0);
+	}
+}
+
+/*
+ *  produce random bits in a circular buffer
+ */
+static void
+randomclock(void)
+{
+	if(rb.randomcount == 0 || !rbnotfull(0))
+		return;
+
+	rb.bits = (rb.bits<<2) ^ rb.randomcount;
+	rb.randomcount = 0;
+
+	rb.next++;
+	if(rb.next != 8/2)
+		return;
+	rb.next = 0;
+
+	*rb.wp ^= rb.bits;
+	if(rb.wp+1 == rb.ep)
+		rb.wp = rb.buf;
+	else
+		rb.wp = rb.wp+1;
+
+	if(rb.wakeme)
+		wakeup(&rb.consumer);
+}
+
+void
+randominit(void)
+{
+	addclock0link(randomclock, 1000/HZ);
+	rb.ep = rb.buf + sizeof(rb.buf);
+	rb.rp = rb.wp = rb.buf;
+	kproc("genrandom", genrandom, 0);
+}
+
+/*
+ *  consume random bytes from a circular buffer
+ */
+ulong
+randomread(void *xp, ulong n)
+{
+	uchar *e, *p;
+	ulong x;
+
+	p = xp;
+
+	if(waserror()){
+		qunlock(&rb);
+		nexterror();
+	}
+
+	qlock(&rb);
+	for(e = p + n; p < e; ){
+		if(rb.wp == rb.rp){
+			rb.wakeme = 1;
+			wakeup(&rb.producer);
+			sleep(&rb.consumer, rbnotempty, 0);
+			rb.wakeme = 0;
+			continue;
+		}
+
+		/*
+		 *  beating clocks will be predictable if
+		 *  they are synchronized.  Use a cheap pseudo
+		 *  random number generator to obscure any cycles.
+		 */
+		x = rb.randn*1103515245 ^ *rb.rp;
+		*p++ = rb.randn = x;
+
+		if(rb.rp+1 == rb.ep)
+			rb.rp = rb.buf;
+		else
+			rb.rp = rb.rp+1;
+	}
+	qunlock(&rb);
+	poperror();
+
+	wakeup(&rb.producer);
+
+	return n;
+}

+ 104 - 0
sys/src/9/kw/reboot.h

@@ -0,0 +1,104 @@
+uchar rebootcode[]={
+0x18,0xc6,0x9f,0xe5,0x04,0x00,0x8d,0xe5,0x00,0x80,0xa8,0xe1,0x08,0x90,0x9d,0xe5,
+0x0c,0xa0,0x9d,0xe5,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0xf4,0x75,0x9f,0xe5,0x52,0x10,0xa1,0xe3,
+0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x8f,0x00,0x00,0xeb,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0xbc,0x75,0x9f,0xe5,0x65,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x0e,0x72,0xa7,0xe3,0x00,0x00,0xa0,0xe3,0xaf,0x00,0x00,0xeb,0x07,0xc0,0xcc,0xe1,
+0x07,0xd0,0xcd,0xe1,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x74,0x75,0x9f,0xe5,0x62,0x10,0xa1,0xe3,
+0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0xac,0x00,0x00,0xeb,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x3c,0x75,0x9f,0xe5,0x6f,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x94,0x00,0x00,0xeb,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x04,0x75,0x9f,0xe5,0x6f,0x10,0xa1,0xe3,
+0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x09,0x40,0xa4,0xe1,0x0a,0x50,0xa5,0xe1,
+0x08,0x60,0xa6,0xe1,0x06,0xd0,0xad,0xe1,0x24,0xd0,0x4d,0xe2,0x30,0xe0,0x0d,0xe5,
+0x30,0xd0,0x4d,0xe2,0x2c,0x60,0x8d,0xe5,0x28,0x50,0x8d,0xe5,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0xac,0x74,0x9f,0xe5,0x74,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x00,0x60,0x8d,0xe5,0x04,0x60,0x8d,0xe5,0x06,0x00,0xa0,0xe1,0x08,0x40,0x8d,0xe5,
+0x0c,0x50,0x8d,0xe5,0x1b,0x01,0x00,0xeb,0x2c,0x60,0x9d,0xe5,0x28,0x50,0x9d,0xe5,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x58,0x74,0x9f,0xe5,0x2d,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x6d,0x00,0x00,0xeb,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,
+0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x20,0x74,0x9f,0xe5,
+0x3e,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,
+0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0xec,0x73,0x9f,0xe5,0x0d,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0xb8,0x73,0x9f,0xe5,0x0a,0x10,0xa1,0xe3,0x00,0x10,0x87,0xe5,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x06,0x60,0x86,0xe1,0x00,0xf0,0x86,0xe2,0xd3,0x00,0xa0,0xe3,
+0x00,0xf0,0x29,0xe1,0x8c,0x73,0x9f,0xe5,0x00,0xe0,0x87,0xe5,0x3f,0x00,0x00,0xeb,
+0x10,0x0f,0x11,0xee,0x80,0xb3,0x9f,0xe5,0x0b,0x00,0xc0,0xe1,0x10,0x0f,0x01,0xee,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x64,0x43,0x9f,0xe5,0x64,0x23,0x9f,0xe5,0x00,0x30,0xa3,0xe3,
+0x02,0x5c,0xa5,0xe3,0x03,0x10,0x82,0xe1,0x00,0x10,0x84,0xe5,0x04,0x40,0x84,0xe2,
+0x01,0x36,0x83,0xe2,0x01,0x50,0x55,0xe2,0xf9,0xff,0xff,0x1a,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x00,0x00,0xa0,0xe3,0x16,0x0f,0x08,0xee,0x17,0x0f,0x08,0xee,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x10,0x0f,0x11,0xee,0x30,0x00,0xc0,0xe3,0x10,0x0f,0x01,0xee,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0xe0,0x72,0x9f,0xe5,0x00,0xe0,0x97,0xe5,0x00,0xf0,0x8e,0xe2,0x0f,0xe2,0xce,0xe3,
+0x00,0xe0,0x8e,0xe1,0x00,0xf0,0x8e,0xe2,0x10,0x0f,0x11,0xee,0xd4,0xb2,0x9f,0xe5,
+0x0b,0x00,0xc0,0xe1,0x10,0x0f,0x01,0xee,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,
+0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x00,0xf0,0x8e,0xe2,
+0x00,0x00,0xa0,0xe3,0x17,0x0f,0x08,0xee,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,
+0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x00,0xf0,0x8e,0xe2,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x00,0x30,0x0f,0xe1,0x80,0x10,0x83,0xe3,0x01,0xf0,0x29,0xe1,
+0x7e,0xff,0x17,0xee,0xfd,0xff,0xff,0x1a,0x19,0xff,0x2f,0xee,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x1b,0xff,0x2f,0xee,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,0x00,0x00,0xa0,0xe3,0x15,0x0f,0x07,0xee,
+0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,
+0x3b,0xff,0x2f,0xee,0x00,0x00,0xa0,0xe3,0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,
+0x9a,0x0f,0x07,0xee,0x00,0x00,0xa0,0xe3,0x95,0x0f,0x07,0xee,0x3b,0xff,0x2f,0xee,
+0x03,0xf0,0x29,0xe1,0x00,0xf0,0x8e,0xe2,0x04,0x00,0x8d,0xe5,0x08,0x20,0x9d,0xe5,
+0x0c,0x30,0x9d,0xe5,0x03,0x10,0x80,0xe0,0x02,0x00,0x50,0xe1,0x3a,0x00,0x00,0x9a,
+0x03,0x20,0x82,0xe0,0x04,0x00,0x53,0xe3,0x15,0x00,0x00,0xba,0x03,0x30,0x11,0xe2,
+0x03,0x00,0x00,0x0a,0x01,0x30,0x72,0xe5,0x01,0x30,0x61,0xe5,0x03,0x30,0x11,0xe2,
+0xfb,0xff,0xff,0x1a,0x03,0x30,0x12,0xe2,0x15,0x00,0x00,0x1a,0x1f,0x30,0x80,0xe2,
+0x03,0x00,0x51,0xe1,0x04,0x00,0x00,0x9a,0xf0,0x00,0x32,0xe9,0xf0,0x00,0x21,0xe9,
+0xf0,0x00,0x32,0xe9,0xf0,0x00,0x21,0xe9,0xf8,0xff,0xff,0xea,0x03,0x30,0x80,0xe2,
+0x03,0x00,0x51,0xe1,0x02,0x00,0x00,0x9a,0x04,0x40,0x32,0xe5,0x04,0x40,0x21,0xe5,
+0xfa,0xff,0xff,0xea,0x01,0x00,0x50,0xe1,0x03,0x00,0x00,0x0a,0x01,0x30,0x72,0xe5,
+0x01,0x30,0x61,0xe5,0x01,0x00,0x50,0xe1,0xfb,0xff,0xff,0x1a,0x04,0x00,0x9d,0xe5,
+0x00,0xf0,0x8e,0xe2,0x02,0x00,0x53,0xe3,0x08,0x40,0xa4,0xb3,0x18,0x50,0xa5,0xb3,
+0x01,0xb0,0xab,0xb3,0x10,0x40,0xa4,0x03,0x10,0x50,0xa5,0x03,0x02,0xb0,0xab,0x03,
+0x18,0x40,0xa4,0xc3,0x08,0x50,0xa5,0xc3,0x03,0xb0,0xab,0xc3,0x08,0x30,0x80,0xe2,
+0x03,0x00,0x51,0xe1,0xea,0xff,0xff,0x9a,0x03,0x20,0xc2,0xe3,0x00,0x60,0x92,0xe5,
+0x03,0x00,0x51,0xe1,0x06,0x00,0x00,0x9a,0x16,0x85,0xa8,0xe1,0xc0,0x00,0x32,0xe9,
+0x37,0x84,0x88,0xe1,0x17,0x75,0xa7,0xe1,0x36,0x74,0x87,0xe1,0x80,0x01,0x21,0xe9,
+0xf6,0xff,0xff,0xea,0x0b,0x20,0x82,0xe0,0xdd,0xff,0xff,0xea,0x04,0x00,0x53,0xe3,
+0x15,0x00,0x00,0xba,0x03,0x30,0x10,0xe2,0x03,0x00,0x00,0x0a,0x01,0x30,0xd2,0xe4,
+0x01,0x30,0xc0,0xe4,0x03,0x30,0x10,0xe2,0xfb,0xff,0xff,0x1a,0x03,0x30,0x12,0xe2,
+0x12,0x00,0x00,0x1a,0x1f,0x30,0x41,0xe2,0x03,0x00,0x50,0xe1,0x04,0x00,0x00,0x2a,
+0xf0,0x00,0xb2,0xe8,0xf0,0x00,0xa0,0xe8,0xf0,0x00,0xb2,0xe8,0xf0,0x00,0xa0,0xe8,
+0xf8,0xff,0xff,0xea,0x03,0x30,0x41,0xe2,0x03,0x00,0x50,0xe1,0x02,0x00,0x00,0x2a,
+0x04,0x40,0x92,0xe4,0x04,0x40,0x80,0xe4,0xfa,0xff,0xff,0xea,0x00,0x00,0x51,0xe1,
+0xc9,0xff,0xff,0x0a,0x01,0x30,0xd2,0xe4,0x01,0x30,0xc0,0xe4,0xfa,0xff,0xff,0xea,
+0x02,0x00,0x53,0xe3,0x08,0x40,0xa4,0xb3,0x18,0x50,0xa5,0xb3,0x03,0xb0,0xab,0xb3,
+0x10,0x40,0xa4,0x03,0x10,0x50,0xa5,0x03,0x02,0xb0,0xab,0x03,0x18,0x40,0xa4,0xc3,
+0x08,0x50,0xa5,0xc3,0x01,0xb0,0xab,0xc3,0x08,0x30,0x41,0xe2,0x03,0x00,0x50,0xe1,
+0xed,0xff,0xff,0x2a,0x03,0x20,0xc2,0xe3,0x04,0x80,0x92,0xe4,0x03,0x00,0x50,0xe1,
+0x06,0x00,0x00,0x2a,0x38,0x64,0xa6,0xe1,0x80,0x01,0xb2,0xe8,0x17,0x65,0x86,0xe1,
+0x37,0x74,0xa7,0xe1,0x18,0x75,0x87,0xe1,0xc0,0x00,0xa0,0xe8,0xf6,0xff,0xff,0xea,
+0x0b,0x20,0x42,0xe0,0xe0,0xff,0xff,0xea,0x86,0xff,0xff,0xea,0xfe,0xff,0xff,0xea,
+0x3c,0x17,0x00,0x00,0x00,0x20,0x01,0xf1,0xfc,0x00,0x00,0x60,0x0e,0x10,0x00,0x00,
+0x00,0x40,0x00,0x60,0x1e,0x04,0x00,0x00,0x0d,0x30,0x00,0x00,0x00,0x00,0x00,0x00,
+
+
+};

+ 175 - 0
sys/src/9/kw/rebootcode.s

@@ -0,0 +1,175 @@
+/*
+ * sheevaplug reboot code
+ *
+ * R11 is used by the loader as a temporary, so avoid it.
+ */
+#include "arm.s"
+
+/*
+ * Turn off MMU, then copy the new kernel to its correct location
+ * in physical memory.  Then jump to the start of the kernel.
+ */
+
+/* main(PADDR(entry), PADDR(code), size); */
+TEXT	main(SB), 1, $-4
+	MOVW	$setR12(SB), R12
+
+	MOVW	R0, p1+0(FP)		/* destination, passed in R0 */
+
+	/* copy in arguments from frame */
+	MOVW	R0, R8			/* entry point */
+	MOVW	p2+4(FP), R9		/* source */
+	MOVW	n+8(FP), R10		/* byte count */
+
+WAVE('R')
+	BL	cachesoff(SB)
+	/* now back in 29- or 26-bit addressing, mainly for SB */
+
+	/* turn the MMU off */
+WAVE('e')
+	MOVW	$KSEGM, R7
+	MOVW	$PHYSDRAM, R0
+	BL	_r15warp(SB)
+
+	BIC	R7, R12			/* SB */
+	BIC	R7, R13			/* SP */
+	/* don't care about R14 */
+
+WAVE('b')
+	BL	mmuinvalidate(SB)
+WAVE('o')
+	BL	mmudisable(SB)
+
+WAVE('o')
+	MOVW	R9, R4			/* restore regs across function calls */
+	MOVW	R10, R5
+	MOVW	R8, R6
+
+	/* set up a new stack for local vars and memmove args */
+	MOVW	R6, SP			/* tiny trampoline stack */
+	SUB	$(0x20 + 4), SP		/* back up before a.out header */
+
+	MOVW	R14, -48(SP)		/* store return addr */
+	SUB	$48, SP			/* allocate stack frame */
+
+	MOVW	R6, 44(SP)		/* save dest/entry */
+	MOVW	R5, 40(SP)		/* save count */
+
+WAVE('t')
+
+	MOVW	R6, 0(SP)
+	MOVW	R6, 4(SP)		/* push dest */
+	MOVW	R6, R0
+	MOVW	R4, 8(SP)		/* push src */
+	MOVW	R5, 12(SP)		/* push size */
+	BL	memmove(SB)
+
+	MOVW	44(SP), R6		/* restore R6 (dest/entry) */
+	MOVW	40(SP), R5		/* restore R5 (count) */
+WAVE('-')
+	/*
+	 * flush caches
+	 */
+	BL	cacheuwbinv(SB)
+
+WAVE('>')
+WAVE('\r');
+WAVE('\n');
+/*
+ * jump to kernel entry point.  Note the true kernel entry point is
+ * the virtual address KZERO|R6, but this must wait until
+ * the MMU is enabled by the kernel in l.s
+ */
+	ORR	R6, R6			/* NOP: avoid link bug */
+	B	(R6)
+
+/*
+ * turn the caches off, double map 0 & KZERO, invalidate TLBs, revert to
+ * tiny addresses.  upon return, it will be safe to turn off the mmu.
+ */
+TEXT cachesoff(SB), 1, $-4
+	MOVW	$(PsrDirq|PsrDfiq|PsrMsvc), R0
+	MOVW	R0, CPSR
+	MOVW	$KADDR(0x100-4), R7		/* just before this code */
+	MOVW	R14, (R7)			/* save link */
+
+	BL	cacheuwbinv(SB)
+
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCwb|CpCicache|CpCdcache|CpCalign), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+	/* redo double map of 0, KZERO */
+	MOVW	$(L1+L1X(PHYSDRAM)), R4		/* address of PTE for 0 */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+//	MOVW	$PTEIO, R2			/* PTE bits */
+	MOVW	$PHYSDRAM, R3
+	MOVW	$512, R5
+_ptrdbl:
+	ORR	R3, R2, R1		/* first identity-map 0 to 0, etc. */
+	MOVW	R1, (R4)
+	ADD	$4, R4				/* bump PTE address */
+	ADD	$MiB, R3			/* bump pa */
+	SUB.S	$1, R5
+	BNE	_ptrdbl
+
+	BARRIERS
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvd), CpTLBinv
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+	/* back to 29- or 26-bit addressing, mainly for SB */
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCd32|CpCi32), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+	MOVW	$KADDR(0x100-4), R7		/* just before this code */
+	MOVW	(R7), R14			/* restore link */
+	RET
+
+TEXT _r15warp(SB), 1, $-4
+	BIC	$0xf0000000, R14
+	ORR	R0, R14
+	RET
+
+TEXT mmudisable(SB), 1, $-4
+	MRC	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpChv|CpCmmu|CpCdcache|CpCicache|CpCwb), R0
+	MCR     CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+	RET
+
+TEXT mmuinvalidate(SB), 1, $-4			/* invalidate all */
+	MOVW	$0, R0
+	MCR	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+	RET
+
+TEXT cacheuwbinv(SB), 1, $-4			/* D+I writeback+invalidate */
+	BARRIERS
+	MOVW	CPSR, R3			/* splhi */
+	ORR	$(PsrDirq), R3, R1
+	MOVW	R1, CPSR
+
+_uwbinv:					/* D writeback+invalidate */
+	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwbi), CpCACHEtest
+	BNE	_uwbinv
+
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2flush), CpTCl2all
+	BARRIERS
+	MCR	CpSC, CpL2, PC, C(CpTESTCFG), C(CpTCl2inv), CpTCl2all
+	BARRIERS
+
+	MOVW	$0, R0				/* I invalidate */
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+	MOVW	$0, R0				/* drain write buffer */
+	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
+	BARRIERS
+
+	MOVW	R3, CPSR			/* splx */
+	RET

+ 119 - 0
sys/src/9/kw/softfpu.c

@@ -0,0 +1,119 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
+{
+	/*
+	 * Called from procdevtab.read and procdevtab.write
+	 * allow user process access to the FPU registers.
+	 * This is the only FPU routine which is called directly
+	 * from the port code; it would be nice to have dynamic
+	 * creation of entries in the device file trees...
+	 */
+	USED(proc, a, n, offset, write);
+
+	return 0;
+}
+
+void
+fpunotify(Ureg*)
+{
+	/*
+	 * Called when a note is about to be delivered to a
+	 * user process, usually at the end of a system call.
+	 * Note handlers are not allowed to use the FPU so
+	 * the state is marked (after saving if necessary) and
+	 * checked in the Device Not Available handler.
+	 */
+}
+
+void
+fpunoted(void)
+{
+	/*
+	 * Called from sysnoted() via the machine-dependent
+	 * noted() routine.
+	 * Clear the flag set above in fpunotify().
+	 */
+}
+
+void
+fpusysrfork(Ureg*)
+{
+	/*
+	 * Called early in the non-interruptible path of
+	 * sysrfork() via the machine-dependent syscall() routine.
+	 * Save the state so that it can be easily copied
+	 * to the child process later.
+	 */
+}
+
+void
+fpusysrforkchild(Proc*, Proc*)
+{
+	/*
+	 * Called later in sysrfork() via the machine-dependent
+	 * sysrforkchild() routine.
+	 * Copy the parent FPU state to the child.
+	 */
+}
+
+void
+fpuprocsave(Proc*)
+{
+	/*
+	 * Called from sched() and sleep() via the machine-dependent
+	 * procsave() routine.
+	 * About to go in to the scheduler.
+	 * If the process wasn't using the FPU
+	 * there's nothing to do.
+	 */
+}
+
+void
+fpuprocrestore(Proc*)
+{
+	/*
+	 * The process has been rescheduled and is about to run.
+	 * Nothing to do here right now. If the process tries to use
+	 * the FPU again it will cause a Device Not Available
+	 * exception and the state will then be restored.
+	 */
+}
+
+void
+fpusysprocsetup(Proc*)
+{
+	/*
+	 * Disable the FPU.
+	 * Called from sysexec() via sysprocsetup() to
+	 * set the FPU for the new process.
+	 */
+}
+
+void
+fpuinit(void)
+{
+}
+
+int
+fpuemu(Ureg* ureg)
+{
+	int nfp;
+
+	if(waserror()){
+		splhi();
+		postnote(up, 1, up->errstr, NDebug);
+		return 1;
+	}
+	spllo();
+	nfp = fpiarm(ureg);
+	splhi();
+	poperror();
+
+	return nfp;
+}

+ 331 - 0
sys/src/9/kw/syscall.c

@@ -0,0 +1,331 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+typedef struct {
+	uintptr	ip;
+	Ureg*	arg0;
+	char*	arg1;
+	char	msg[ERRMAX];
+	Ureg*	old;
+	Ureg	ureg;
+} NFrame;
+
+/*
+ *   Return user to state before notify()
+ */
+static void
+noted(Ureg* cur, uintptr arg0)
+{
+	NFrame *nf;
+	Ureg *nur;
+
+	qlock(&up->debug);
+	if(arg0 != NRSTR && !up->notified){
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+	fpunoted();
+
+	nf = up->ureg;
+
+	/* sanity clause */
+	if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){
+		pprint("bad ureg in noted %#p\n", nf);
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	/* don't let user change system flags */
+	nur = &nf->ureg;
+	nur->psr &= PsrMask|PsrDfiq|PsrDirq;
+	nur->psr |= (cur->psr & ~(PsrMask|PsrDfiq|PsrDirq));
+
+	memmove(cur, nur, sizeof(Ureg));
+
+	switch((int)arg0){
+	case NCONT:
+	case NRSTR:
+		if(!okaddr(nur->pc, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		up->ureg = nf->old;
+		qunlock(&up->debug);
+		break;
+	case NSAVE:
+		if(!okaddr(nur->pc, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+
+		splhi();
+		nf->arg1 = nf->msg;
+		nf->arg0 = &nf->ureg;
+		nf->ip = 0;
+		cur->sp = PTR2UINT(nf);
+		break;
+	default:
+		pprint("unknown noted arg %#p\n", arg0);
+		up->lastnote.flag = NDebug;
+		/*FALLTHROUGH*/
+	case NDFLT:
+		if(up->lastnote.flag == NDebug){ 
+			qunlock(&up->debug);
+			pprint("suicide: %s\n", up->lastnote.msg);
+		}
+		else
+			qunlock(&up->debug);
+		pexit(up->lastnote.msg, up->lastnote.flag != NDebug);
+	}
+}
+
+/*
+ *  Call user, if necessary, with note.
+ *  Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+	int l;
+	Note *n;
+	u32int s;
+	uintptr sp;
+	NFrame *nf;
+
+	if(up->procctl)
+		procctl(up);
+	if(up->nnote == 0)
+		return 0;
+
+	fpunotify(ureg);
+
+	s = spllo();
+	qlock(&up->debug);
+
+	up->notepending = 0;
+	n = &up->note[0];
+	if(strncmp(n->msg, "sys:", 4) == 0){
+		l = strlen(n->msg);
+		if(l > ERRMAX-23)	/* " pc=0x0123456789abcdef\0" */
+			l = ERRMAX-23;
+		snprint(n->msg + l, sizeof n->msg - l, " pc=%#ux", ureg->pc);
+	}
+
+	if(n->flag != NUser && (up->notified || up->notify == 0)){
+		if(n->flag == NDebug)
+			pprint("suicide: %s\n", n->msg);
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag != NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+		
+	if(up->notify == nil){
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag != NDebug);
+	}
+	if(!okaddr(PTR2UINT(up->notify), 1, 0)){
+		pprint("suicide: notify function address %#p\n", up->notify);
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	sp = ureg->sp - sizeof(NFrame);
+	if(!okaddr(sp, sizeof(NFrame), 1)){
+		pprint("suicide: notify stack address %#p\n", sp);
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	nf = UINT2PTR(sp);
+	memmove(&nf->ureg, ureg, sizeof(Ureg));
+	nf->old = up->ureg;
+	up->ureg = nf;
+	memmove(nf->msg, up->note[0].msg, ERRMAX);
+	nf->arg1 = nf->msg;
+	nf->arg0 = &nf->ureg;
+	nf->ip = 0;
+
+	ureg->sp = sp;
+	ureg->pc = PTR2UINT(up->notify);
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &up->note[0], sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(s);
+
+	return 1;
+}
+
+void
+syscall(Ureg* ureg)
+{
+	char *e;
+	u32int s;
+	ulong sp;
+	long ret;
+	int i, scallnr;
+
+	if(!userureg(ureg))
+		panic("syscall: pc %#ux r14 %#ux psr %#ux",
+			ureg->pc, ureg->r14, ureg->psr);
+
+	cycles(&up->kentry);
+
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+	up->dbgreg = ureg;
+
+	if(up->procctl == Proc_tracesyscall){
+		up->procctl = Proc_stopme;
+		procctl(up);
+	}
+
+	scallnr = ureg->r0;
+	up->scallnr = scallnr;
+	if(scallnr == RFORK)
+		fpusysrfork(ureg);
+	spllo();
+
+	sp = ureg->sp;
+	up->nerrlab = 0;
+	ret = -1;
+	if(!waserror()){
+		if(scallnr >= nsyscall){
+			pprint("bad sys call number %d pc %#ux\n",
+				scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+
+		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+		up->s = *((Sargs*)(sp+BY2WD));
+		up->psstate = sysctab[scallnr];
+
+	/*	iprint("%s: syscall %s\n", up->text, sysctab[scallnr]?sysctab[scallnr]:"huh?"); */
+
+		ret = systab[scallnr](up->s.args);
+		poperror();
+	}else{
+		/* failure: save the error buffer for errstr */
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+	}
+	if(up->nerrlab){
+		print("bad errstack [%d]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n",
+				up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+
+	/*
+	 *  Put return value in frame.  On the x86 the syscall is
+	 *  just another trap and the return value from syscall is
+	 *  ignored.  On other machines the return value is put into
+	 *  the results register by caller of syscall.
+	 */
+	ureg->r0 = ret;
+
+	if(up->procctl == Proc_tracesyscall){
+		up->procctl = Proc_stopme;
+		s = splhi();
+		procctl(up);
+		splx(s);
+	}
+
+	up->insyscall = 0;
+	up->psstate = 0;
+
+	if(scallnr == NOTED)
+		noted(ureg, *(ulong*)(sp+BY2WD));
+
+	splhi();
+	if(scallnr != RFORK && (up->procctl || up->nnote))
+		notify(ureg);
+
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched){
+		sched();
+		splhi();
+	}
+	kexit(ureg);
+}
+
+long	/* void* */
+execregs(ulong entry, ulong ssize, ulong nargs)
+{
+	ulong *sp;
+	Ureg *ureg;
+
+	sp = (ulong*)(USTKTOP - ssize);
+	*--sp = nargs;
+
+	ureg = up->dbgreg;
+//	memset(ureg, 0, 15*sizeof(ulong));
+	ureg->r13 = (ulong)sp;
+	ureg->pc = entry;
+//print("%lud: EXECREGS pc %#ux sp %#ux nargs %ld\n", up->pid, ureg->pc, ureg->r13, nargs);
+
+	/*
+	 * return the address of kernel/user shared data
+	 * (e.g. clock stuff)
+	 */
+	return USTKTOP-sizeof(Tos);
+}
+
+void
+sysprocsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+/* 
+ *  Craft a return frame which will cause the child to pop out of
+ *  the scheduler in user mode with the return register zero.  Set
+ *  pc to point to a l.s return function.
+ */
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+	Ureg *cureg;
+
+//print("%lud setting up for forking child %lud\n", up->pid, p->pid);
+	p->sched.sp = (ulong)p->kstack+KSTACK-sizeof(Ureg);
+	p->sched.pc = (ulong)forkret;
+
+	cureg = (Ureg*)(p->sched.sp);
+	memmove(cureg, ureg, sizeof(Ureg));
+
+	/* syscall returns 0 for child */
+	cureg->r0 = 0;
+
+	/* Things from bottom of syscall which were never executed */
+	p->psstate = 0;
+	p->insyscall = 0;
+}

+ 629 - 0
sys/src/9/kw/trap.c

@@ -0,0 +1,629 @@
+/*
+ * sheevaplug traps, exceptions, interrupts, system calls.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+#include "arm.h"
+
+enum {
+	Ntimevec = 20			/* # of time buckets for each intr */
+};
+
+extern int notify(Ureg*);
+
+extern int ldrexvalid;
+
+typedef struct Vctl Vctl;
+typedef struct Vctl {
+	Vctl*	next;		/* handlers on this vector */
+	char	*name;		/* of driver, xallocated */
+	void	(*f)(Ureg*, void*);	/* handler to call */
+	void*	a;		/* argument to call it with */
+} Vctl;
+
+static Lock vctllock;
+static Vctl* vctl[32];
+
+uvlong ninterrupt;
+uvlong ninterruptticks;
+ulong intrtimes[256][Ntimevec];
+
+typedef struct Handler Handler;
+struct Handler {
+	void	(*r)(Ureg*, void*);
+	void	*a;
+	char	name[KNAMELEN];
+};
+
+static Handler irqlo[32];
+static Handler irqhi[32];
+static Handler irqbridge[32];
+static Lock irqlock;
+static int probing, trapped;
+
+typedef struct Irq Irq;
+struct Irq {
+	ulong	*irq;
+	ulong	*irqmask;
+	Handler	*irqvec;
+	int	nirqvec;
+	char	*name;
+};
+static Irq irqs[] = {
+[Irqlo]	{&INTRREG->lo.irq, &INTRREG->lo.irqmask, irqlo,	nelem(irqlo), "lo"},
+[Irqhi]	{&INTRREG->hi.irq, &INTRREG->hi.irqmask, irqhi,	nelem(irqhi), "hi"},
+[Irqbridge] {&CPUCSREG->irq, &CPUCSREG->irqmask, irqbridge, nelem(irqbridge), "bridge"},
+};
+
+/*
+ *  keep histogram of interrupt service times
+ */
+void
+intrtime(Mach*, int vno)
+{
+	ulong diff, x;
+
+	if (m == nil)
+		return;
+	x = perfticks();
+	diff = x - m->perf.intrts;
+	m->perf.intrts = x;
+
+	m->perf.inintr += diff;
+	if(up == nil && m->perf.inidle > diff)
+		m->perf.inidle -= diff;
+
+	if (m->cpuhz == 0)			/* not set yet? */
+		return;
+	diff /= (m->cpuhz/1000000)*100;		/* quantum = 100µsec */
+	if(diff >= Ntimevec)
+		diff = Ntimevec-1;
+	intrtimes[vno][diff]++;
+}
+
+void
+intrfmtcounts(char *s, char *se)
+{
+	USED(s, se);
+}
+
+static void
+dumpcounts(void)
+{
+}
+
+void
+intrclear(int sort, int v)
+{
+	*irqs[sort].irq = ~(1 << v);
+}
+
+void
+intrmask(int sort, int v)
+{
+	*irqs[sort].irqmask &= ~(1 << v);
+}
+
+void
+intrunmask(int sort, int v)
+{
+	*irqs[sort].irqmask |= 1 << v;
+}
+
+static void
+maskallints(void)
+{
+	/* no fiq or ep in use */
+	INTRREG->lo.irqmask = 0;
+	INTRREG->hi.irqmask = 0;
+	CPUCSREG->irqmask = 0;
+}
+
+void
+intrset(Handler *h, void (*f)(Ureg*, void*), void *a, char *name)
+{
+	if(h->r != nil) {
+//		iprint("duplicate irq: %s (%#p)\n", h->name, h->r);
+		return;
+	}
+	h->r = f;
+	h->a = a;
+	strncpy(h->name, name, KNAMELEN-1);
+	h->name[KNAMELEN-1] = 0;
+}
+
+void
+intrunset(Handler *h)
+{
+	h->r = nil;
+	h->a = nil;
+	h->name[0] = 0;
+}
+
+void
+intrdel(Handler *h, void (*f)(Ureg*, void*), void *a, char *name)
+{
+	if(h->r != f || h->a != a || strcmp(h->name, name) != 0)
+		return;
+	intrunset(h);
+}
+
+void
+intrenable(int sort, int v, void (*f)(Ureg*, void*), void *a, char *name)
+{
+//iprint("enabling intr %d vec %d for %s\n", sort, v, name);
+	ilock(&irqlock);
+	intrset(&irqs[sort].irqvec[v], f, a, name);
+	intrunmask(sort, v);
+	iunlock(&irqlock);
+}
+
+void
+intrdisable(int sort, int v, void (*f)(Ureg*, void*), void* a, char *name)
+{
+	ilock(&irqlock);
+	intrdel(&irqs[sort].irqvec[v], f, a, name);
+	intrmask(sort, v);
+	iunlock(&irqlock);
+}
+
+/*
+ *  called by trap to handle interrupts
+ */
+static void
+intrs(Ureg *ur, int sort)
+{
+	int i, s;
+	ulong ibits;
+	Handler *h;
+	Irq irq;
+
+	irq = irqs[sort];
+	ibits = *irq.irq;
+	ibits &= *irq.irqmask;
+
+	for(i = 0; i < irq.nirqvec && ibits; i++)
+		if(ibits & (1<<i)){
+			h = &irq.irqvec[i];
+			if(h->r != nil){
+				h->r(ur, h->a);
+				splhi();
+				intrtime(m, sort*32 + i);
+				if (sort == Irqbridge && i == IRQcputimer0)
+					m->inclockintr = 1;
+				ibits &= ~(1<<i);
+			}
+		}
+	if(ibits != 0) {
+		iprint("spurious irq%s interrupt: %8.8lux\n", irq.name, ibits);
+		s = splfhi();
+		*irq.irq &= ibits;
+		splx(s);
+	}
+}
+
+void
+intrhi(Ureg *ureg, void*)
+{
+	intrs(ureg, Irqhi);
+}
+
+void
+intrbridge(Ureg *ureg, void*)
+{
+	intrs(ureg, Irqbridge);
+	intrclear(Irqlo, IRQ0bridge);
+}
+
+void
+trapinit(void)
+{
+	int i;
+	CpucsReg *cpu;
+	IntrReg *intr;
+	Vectorpage *page0 = (Vectorpage*)HVECTORS;
+
+	setr13(PsrMfiq, m->fiqstack + nelem(m->fiqstack));
+	setr13(PsrMirq, m->irqstack + nelem(m->irqstack));
+	setr13(PsrMabt, m->abtstack + nelem(m->abtstack));
+	setr13(PsrMund, m->undstack + nelem(m->undstack));
+
+	memmove(page0->vectors, vectors, sizeof page0->vectors);
+	memmove(page0->vtable,  vtable,  sizeof page0->vtable);
+	cacheuwbinv();
+
+	cpu = CPUCSREG;
+	cpu->cpucfg &= ~Cfgvecinithi;
+
+	for(i = 0; i < nelem(irqlo); i++)
+		intrunset(&irqlo[i]);
+	for(i = 0; i < nelem(irqhi); i++)
+		intrunset(&irqhi[i]);
+	for(i = 0; i < nelem(irqbridge); i++)
+		intrunset(&irqbridge[i]);
+
+	/* disable all interrupts */
+	intr = INTRREG;
+	intr->lo.fiqmask = intr->hi.fiqmask = 0;
+	intr->lo.irqmask = intr->hi.irqmask = 0;
+	intr->lo.epmask =  intr->hi.epmask = 0;
+	cpu->irqmask = 0;
+
+	/* clear interrupts */
+	intr->lo.irq = intr->hi.irq = ~0;
+	cpu->irq = ~0;
+
+	intrenable(Irqlo, IRQ0hisum, intrhi, nil, "hi");
+	intrenable(Irqlo, IRQ0bridge, intrbridge, nil, "bridge");
+}
+
+static char *trapnames[PsrMask+1] = {
+	[ PsrMusr ] "user mode",
+	[ PsrMfiq ] "fiq interrupt",
+	[ PsrMirq ] "irq interrupt",
+	[ PsrMsvc ] "svc/swi exception",
+	[ PsrMabt ] "prefetch abort/data abort",
+	[ PsrMabt+1 ] "data abort",
+	[ PsrMund ] "undefined instruction",
+	[ PsrMsys ] "sys trap",
+};
+
+static char *
+trapname(int psr)
+{
+	char *s;
+
+	s = trapnames[psr & PsrMask];
+	if(s == nil)
+		s = "unknown trap number in psr";
+	return s;
+}
+
+/*
+ *  called by trap to handle access faults
+ */
+static void
+faultarm(Ureg *ureg, uintptr va, int user, int read)
+{
+	int n, insyscall;
+	char buf[ERRMAX];
+
+	if(up == nil) {
+		dumpregs(ureg);
+		panic("fault: nil up in faultarm, accessing %#p", va);
+	}
+	insyscall = up->insyscall;
+	up->insyscall = 1;
+	n = fault(va, read);
+	if(n < 0){
+		if(!user){
+			dumpregs(ureg);
+			panic("fault: kernel accessing %#p", va);
+		}
+		/* don't dump registers; programs suicide all the time */
+		snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
+			read? "read": "write", va);
+		postnote(up, 1, buf, NDebug);
+	}
+	up->insyscall = insyscall;
+}
+
+/*
+ *  returns 1 if the instruction writes memory, 0 otherwise
+ */
+int
+writetomem(ulong inst)
+{
+	/* swap always write memory */
+	if((inst & 0x0FC00000) == 0x01000000)
+		return 1;
+
+	/* loads and stores are distinguished by bit 20 */
+	if(inst & (1<<20))
+		return 0;
+
+	return 1;
+}
+
+void
+trap(Ureg *ureg)
+{
+	int user, x, rv, rem;
+	ulong inst;
+	u32int fsr;
+	uintptr va;
+	char buf[ERRMAX];
+
+	if(up != nil)
+		rem = (char*)ureg - up->kstack;
+	else
+		rem = (char*)ureg - ((char*)m + sizeof(Mach));
+	if(rem < 256) {
+		dumpstack();
+		panic("trap %d bytes remaining, up %#p ureg %#p at pc %#ux",
+			rem, up, ureg, ureg->pc);
+	}
+
+	user = (ureg->psr & PsrMask) == PsrMusr;
+
+	if(ureg->type == PsrMabt+1)
+		ureg->pc -= 8;
+	else
+		ureg->pc -= 4;
+
+	m->inclockintr = 0;
+	switch(ureg->type) {
+	default:
+		panic("unknown trap %d", ureg->type);
+		break;
+	case PsrMirq:
+		ldrexvalid = 0;
+		// splflo();		/* allow fast interrupts */
+		intrs(ureg, Irqlo);
+		m->intr++;
+		break;
+	case PsrMabt:			/* prefetch fault */
+		ldrexvalid = 0;
+		faultarm(ureg, ureg->pc, user, 1);
+		break;
+	case PsrMabt+1:			/* data fault */
+		ldrexvalid = 0;
+		va = farget();
+		inst = *(ulong*)(ureg->pc);
+		fsr = fsrget() & 0xf;
+		if (probing && !user) {
+			if (trapped++ > 0)
+				panic("trap: recursive probe %#lux", va);
+			ureg->pc += 4;	/* continue at next instruction */
+			break;
+		}
+		switch(fsr){
+		case 0x0:
+			panic("vector exception at %#ux", ureg->pc);
+			break;
+		case 0x1:
+		case 0x3:
+			if(user){
+				snprint(buf, sizeof buf,
+					"sys: alignment: pc %#ux va %#p\n",
+					ureg->pc, va);
+				postnote(up, 1, buf, NDebug);
+			} else
+				panic("kernel alignment: pc %#ux va %#p", ureg->pc, va);
+			break;
+		case 0x2:
+			panic("terminal exception at %#ux", ureg->pc);
+			break;
+		case 0x4:
+		case 0x6:
+		case 0x8:
+		case 0xa:
+		case 0xc:
+		case 0xe:
+			panic("external abort %#ux pc %#ux addr %#px",
+				fsr, ureg->pc, va);
+			break;
+		case 0x5:		/* translation fault, no section entry */
+		case 0x7:		/* translation fault, no page entry */
+			faultarm(ureg, va, user, !writetomem(inst));
+			break;
+		case 0x9:
+		case 0xb:
+			/* domain fault, accessing something we shouldn't */
+			if(user){
+				snprint(buf, sizeof buf,
+					"sys: access violation: pc %#ux va %#p\n",
+					ureg->pc, va);
+				postnote(up, 1, buf, NDebug);
+			} else
+				panic("kernel access violation: pc %#ux va %#p",
+					ureg->pc, va);
+			break;
+		case 0xd:
+		case 0xf:
+			/* permission error, copy on write or real permission error */
+			faultarm(ureg, va, user, !writetomem(inst));
+			break;
+		}
+		break;
+	case PsrMund:	/* undefined instruction */
+		if(user){
+			/* look for floating point instructions to interpret */
+			x = spllo();
+			rv = fpiarm(ureg);
+			splx(x);
+			if(rv == 0){
+				ldrexvalid = 0;
+				snprint(buf, sizeof buf,
+					"undefined instruction: pc %#ux\n",
+					ureg->pc);
+				postnote(up, 1, buf, NDebug);
+			}
+		}else{
+			iprint("undefined instruction: pc %#ux inst %#ux\n",
+				ureg->pc, ((u32int*)ureg->pc)[-2]);
+			panic("undefined instruction");
+		}
+		break;
+	}
+	splhi();
+
+	/* delaysched set because we held a lock or because our quantum ended */
+	if(up && up->delaysched && m->inclockintr){
+		ldrexvalid = 0;
+		sched();
+		splhi();
+	}
+
+	if(user){
+		if(up->procctl || up->nnote)
+			notify(ureg);
+		kexit(ureg);
+	}
+}
+
+int
+isvalidaddr(void *v)
+{
+	return (uintptr)v >= KZERO;
+}
+
+void
+dumplongs(char *msg, ulong *v, int n)
+{
+	int i, l;
+
+	l = 0;
+	iprint("%s at %.8p: ", msg, v);
+	for(i=0; i<n; i++){
+		if(l >= 4){
+			iprint("\n    %.8p: ", v);
+			l = 0;
+		}
+		if(isvalidaddr(v)){
+			iprint(" %.8lux", *v++);
+			l++;
+		}else{
+			iprint(" invalid");
+			break;
+		}
+	}
+	iprint("\n");
+}
+
+static void
+dumpstackwithureg(Ureg *ureg)
+{
+	iprint("ktrace /kernel/path %#.8ux %#.8ux %#.8ux # pc, sp, link\n",
+		ureg->pc, ureg->sp, ureg->r14);
+	delay(2000);
+#ifdef AMBITIOUS
+	uintptr l, i, v, estack;
+	u32int *p;
+
+	i = 0;
+	if(up != nil && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
+		estack = (uintptr)up->kstack+KSTACK;
+	else if((uintptr)&l >= (uintptr)m->stack
+	     && (uintptr)&l <= (uintptr)m+MACHSIZE)
+		estack = (uintptr)m+MACHSIZE;
+	else{
+		if(up != nil)
+			iprint("&up->kstack %#p &l %#p\n", up->kstack, &l);
+		else
+			iprint("&m %#p &l %#p\n", m, &l);
+		return;
+	}
+	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
+		v = *(uintptr*)l;
+		if(KTZERO < v && v < (uintptr)etext && !(v & 3)){
+			v -= sizeof(u32int);
+			p = (u32int*)v;
+			if((*p & 0x0f000000) == 0x0b000000){	/* magic */
+				iprint("%#8.8lux=%#8.8lux ", l, v);
+				i++;
+			}
+		}
+		if(i == 4){
+			i = 0;
+			iprint("\n");
+		}
+	}
+	if(i)
+		iprint("\n");
+#endif
+}
+
+/*
+ * Fill in enough of Ureg to get a stack trace, and call a function.
+ * Used by debugging interface rdb.
+ */
+void
+callwithureg(void (*fn)(Ureg*))
+{
+	Ureg ureg;
+
+	ureg.pc = getcallerpc(&fn);
+	ureg.sp = PTR2UINT(&fn);
+	fn(&ureg);
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(dumpstackwithureg);
+}
+
+void
+dumpregs(Ureg* ureg)
+{
+	int s;
+
+	if (ureg == nil) {
+		iprint("trap: no user process\n");
+		return;
+	}
+	s = splhi();
+	iprint("trap: %s", trapname(ureg->type));
+	if(ureg != nil && (ureg->psr & PsrMask) != PsrMsvc)
+		iprint(" in %s", trapname(ureg->psr));
+	iprint("\n");
+	iprint("psr %8.8ux type %2.2ux pc %8.8ux link %8.8ux\n",
+		ureg->psr, ureg->type, ureg->pc, ureg->link);
+	iprint("R14 %8.8ux R13 %8.8ux R12 %8.8ux R11 %8.8ux R10 %8.8ux\n",
+		ureg->r14, ureg->r13, ureg->r12, ureg->r11, ureg->r10);
+	iprint("R9  %8.8ux R8  %8.8ux R7  %8.8ux R6  %8.8ux R5  %8.8ux\n",
+		ureg->r9, ureg->r8, ureg->r7, ureg->r6, ureg->r5);
+	iprint("R4  %8.8ux R3  %8.8ux R2  %8.8ux R1  %8.8ux R0  %8.8ux\n",
+		ureg->r4, ureg->r3, ureg->r2, ureg->r1, ureg->r0);
+	iprint("stack is at %#p\n", ureg);
+	iprint("pc %#ux link %#ux\n", ureg->pc, ureg->link);
+
+	if(up)
+		iprint("user stack: %#p-%#p\n", up->kstack, up->kstack+KSTACK-4);
+	else
+		iprint("kernel stack: %8.8lux-%8.8lux\n",
+			(ulong)(m+1), (ulong)m+BY2PG-4);
+	dumplongs("stack", (ulong *)(ureg + 1), 16);
+	delay(2000);
+	dumpstack();
+	splx(s);
+}
+
+void
+idlehands(void)
+{
+	extern void _idlehands(void);
+
+	_idlehands();
+}
+
+vlong
+probeaddr(uintptr addr)
+{
+	vlong v;
+	static Lock fltlck;
+
+	ilock(&fltlck);
+	trapped = 0;
+	probing = 1;
+	coherence();
+
+	v = *(ulong *)addr;	/* this may cause a fault */
+	USED(probing);
+	coherence();
+
+	probing = 0;
+	coherence();
+	if (trapped)
+		v = -1;
+	iunlock(&fltlck);
+	return v;
+}

+ 343 - 0
sys/src/9/kw/uartkw.c

@@ -0,0 +1,343 @@
+/*
+ * marvell kirkwood uart (supposed to be a 16550)
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+// #include "../port/uart.h"
+
+enum {
+	UartFREQ =	0, // xxx
+
+	IERrx		= 1<<0,
+	IERtx		= 1<<1,
+
+	IRRintrmask	= (1<<4)-1,
+	IRRnointr	= 1,
+	IRRthrempty	= 2,
+	IRRrxdata	= 4,
+	IRRrxstatus	= 6,
+	IRRtimeout	= 12,
+
+	IRRfifomask	= 3<<6,
+	IRRfifoenable	= 3<<6,
+
+	FCRenable	= 1<<0,
+	FCRrxreset	= 1<<1,
+	FCRtxreset	= 1<<2,
+	/* reserved */
+	FCRrxtriggermask	= 3<<6,
+	FCRrxtrigger1	= 0<<6,
+	FCRrxtrigger4	= 1<<6,
+	FCRrxtrigger8	= 2<<6,
+	FCRrxtrigger14	= 3<<6,
+
+	LCRbpcmask	= 3<<0,
+	LCRbpc5		= 0<<0,
+	LCRbpc6		= 1<<0,
+	LCRbpc7		= 2<<0,
+	LCRbpc8		= 3<<0,
+	LCRstop2b	= 1<<2,
+	LCRparity	= 1<<3,
+	LCRparityeven	= 1<<4,
+	LCRbreak	= 1<<6,
+	LCRdivlatch	= 1<<7,
+
+	LSRrx		= 1<<0,
+	LSRrunerr	= 1<<1,
+	LSRparerr	= 1<<2,
+	LSRframeerr	= 1<<3,
+	LSRbi		= 1<<4,
+	LSRthre		= 1<<5,
+	LSRtxempty	= 1<<6,
+	LSRfifoerr	= 1<<7,
+};
+
+extern PhysUart kwphysuart;
+
+#define UART0REG	((UartReg*)AddrUart0)
+
+typedef struct UartReg UartReg;
+struct UartReg
+{
+	union {
+		ulong	thr;
+		ulong	dll;
+		ulong	rbr;
+	};
+	union {
+		ulong	ier;
+		ulong	dlh;
+	};
+	union {
+		ulong	iir;
+		ulong	fcr;
+	};
+	ulong	lcr;
+	ulong	mcr;
+	ulong	lsr;
+	ulong	scr;
+};
+
+typedef struct Ctlr Ctlr;
+struct Ctlr {
+	UartReg*regs;
+	int	irq;
+	Lock;
+};
+
+static Ctlr kirkwoodctlr[] = {
+{
+	.regs   = UART0REG,
+	.irq    = IRQ1uart0, },
+};
+
+static Uart kirkwooduart[] = {
+{
+	.regs	= &kirkwoodctlr[0],
+	.name	= "eia0",
+	.freq	= UartFREQ,
+	.phys	= &kwphysuart,
+	.special= 0,
+	.console= 1,
+	.next	= nil, },
+};
+
+static void
+kw_read(Uart *uart)
+{
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+	ulong lsr;
+	char c;
+
+	while ((lsr = regs->lsr) & LSRrx) {
+		if(lsr&LSRrunerr)
+			uart->oerr++;
+		if(lsr&LSRparerr)
+			uart->perr++;
+		if(lsr&LSRframeerr)
+			uart->ferr++;
+		c = regs->rbr;
+		if((lsr & (LSRbi|LSRframeerr|LSRparerr)) == 0)
+			uartrecv(uart, c);
+	}
+}
+
+static void
+kw_intr(Ureg*, void *arg)
+{
+	Uart *uart = arg;
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+	ulong v;
+
+	v = regs->iir;
+	if(v & IRRthrempty)
+		uartkick(uart);
+	if(v & IRRrxdata)
+		kw_read(uart);
+
+	intrclear(Irqhi, ctlr->irq);
+}
+
+static Uart*
+kw_pnp(void)
+{
+	return kirkwooduart;
+}
+
+static void
+kw_enable(Uart* uart, int ie)
+{
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+
+	USED(ie);
+	regs->fcr = FCRenable|FCRrxtrigger4;
+	regs->ier = IERrx|IERtx;
+	intrenable(Irqhi, ctlr->irq, kw_intr, uart, uart->name);
+
+	(*uart->phys->dtr)(uart, 1);
+	(*uart->phys->rts)(uart, 1);
+}
+
+static void
+kw_disable(Uart* uart)
+{
+	Ctlr *ctlr = uart->regs;
+
+	(*uart->phys->dtr)(uart, 0);
+	(*uart->phys->rts)(uart, 0);
+	(*uart->phys->fifo)(uart, 0);
+
+	intrdisable(Irqhi, ctlr->irq, kw_intr, uart, uart->name);
+}
+
+static void
+kw_kick(Uart* uart)
+{
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+	int i;
+
+	if(uart->cts == 0 || uart->blocked)
+		return;
+
+	for(i = 0; i < 16; i++) {
+		if((regs->lsr & LSRthre) == 0 ||
+		    uart->op >= uart->oe && uartstageoutput(uart) == 0)
+			break;
+		regs->thr = *uart->op++;
+	}
+}
+
+static void
+kw_break(Uart* uart, int ms)
+{
+	USED(uart, ms);
+}
+
+static int
+kw_baud(Uart* uart, int baud)
+{
+	USED(uart, baud);
+	return 0;
+}
+
+static int
+kw_bits(Uart* uart, int bits)
+{
+	USED(uart, bits);
+	return 0;
+}
+
+static int
+kw_stop(Uart* uart, int stop)
+{
+	USED(uart, stop);
+	return 0;
+}
+
+static int
+kw_parity(Uart* uart, int parity)
+{
+	USED(uart, parity);
+	return 0;
+}
+
+static void
+kw_modemctl(Uart* uart, int on)
+{
+	USED(uart, on);
+}
+
+static void
+kw_rts(Uart* uart, int on)
+{
+	USED(uart, on);
+}
+
+static void
+kw_dtr(Uart* uart, int on)
+{
+	USED(uart, on);
+}
+
+static long
+kw_status(Uart* uart, void* buf, long n, long offset)
+{
+	USED(uart, buf, n, offset);
+	return 0;
+}
+
+static void
+kw_fifo(Uart* uart, int level)
+{
+	USED(uart, level);
+}
+
+static int
+kw_getc(Uart *uart)
+{
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+
+	while((regs->lsr&LSRrx) == 0)
+		;
+	return regs->rbr;
+}
+
+static void
+kw_putc(Uart *uart, int c)
+{
+	Ctlr *ctlr = uart->regs;
+	UartReg *regs = ctlr->regs;
+
+	while((regs->lsr&LSRthre) == 0)
+		;
+	regs->thr = c;
+}
+
+PhysUart kwphysuart = {
+	.name		= "kirkwood",
+	.pnp		= kw_pnp,
+	.enable		= kw_enable,
+	.disable	= kw_disable,
+	.kick		= kw_kick,
+	.dobreak	= kw_break,
+	.baud		= kw_baud,
+	.bits		= kw_bits,
+	.stop		= kw_stop,
+	.parity		= kw_parity,
+	.modemctl	= kw_modemctl,
+	.rts		= kw_rts,
+	.dtr		= kw_dtr,
+	.status		= kw_status,
+	.fifo		= kw_fifo,
+	.getc		= kw_getc,
+	.putc		= kw_putc,
+};
+
+void
+uartkirkwoodconsole(void)
+{
+	Uart *uart;
+
+	uart = &kirkwooduart[0];
+	(*uart->phys->enable)(uart, 0);
+	uartctl(uart, "b115200 l8 pn s1 i1");
+	uart->console = 1;
+	consuart = uart;
+//serialputs("uart0 kirkwood\n", strlen("uart0 kirkwood\n"));
+}
+
+void
+serialputc(int c)
+{
+	int cnt, s;
+
+	s = splhi();
+	cnt = m->cpuhz;
+	if (cnt <= 0)			/* cpuhz not set yet? */
+		cnt = 1000000;
+	while((UART0REG->lsr & LSRthre) == 0 && --cnt > 0)
+		;
+	UART0REG->thr = c;
+	delay(1);
+	splx(s);
+}
+
+void
+serialputs(char *p, int len)
+{
+	while(--len >= 0) {
+		if(*p == '\n')
+			serialputc('\r');
+		serialputc(*p++);
+	}
+}

+ 26 - 0
sys/src/9/kw/ureg.h

@@ -0,0 +1,26 @@
+typedef struct Ureg {
+	u32int	r0;
+	u32int	r1;
+	u32int	r2;
+	u32int	r3;
+	u32int	r4;
+	u32int	r5;
+	u32int	r6;
+	u32int	r7;
+	u32int	r8;
+	u32int	r9;				/* up */
+	u32int	r10;				/* m */
+	u32int	r11;				/* loader temprorary */
+	u32int	r12;				/* SB */
+	union {
+		u32int	r13;
+		u32int	sp;
+	};
+	union {
+		u32int	r14;
+		u32int	link;
+	};
+	u32int	type;				/* of exception */
+	u32int	psr;
+	u32int	pc;				/* interrupted addr */
+} Ureg;

+ 191 - 0
sys/src/9/kw/usb.h

@@ -0,0 +1,191 @@
+/*
+ * common USB definitions.
+ */
+typedef struct Udev Udev;	/* USB device */
+typedef struct Ep Ep;		/* Endpoint */
+typedef struct Hci Hci;		/* Host Controller Interface */
+typedef struct Hciimpl Hciimpl;	/* Link to the controller impl. */
+
+enum
+{
+	/* fundamental constants */
+	Ndeveps	= 16,		/* max nb. of endpoints per device */
+
+	/* tunable parameters */
+	Nhcis	= 16,		/* max nb. of HCIs */
+	Neps	= 64,		/* max nb. of endpoints */
+	Maxctllen = 8*1024,	/* max allowed sized for ctl. xfers */
+
+	/* transfer types. keep this order */
+	Tnone = 0,		/* no tranfer type configured */
+	Tctl,			/* wr req + rd/wr data + wr/rd sts */
+	Tiso,			/* stream rd or wr (real time) */
+	Tbulk,			/* stream rd or wr */
+	Tintr,			/* msg rd or wr */
+	Nttypes,		/* number of transfer types */
+
+	Epmax	= 0xF,		/* max ep. addr */
+	Devmax	= 0x7F,		/* max dev. addr */
+
+	/* Speeds */
+	Fullspeed = 0,
+	Lowspeed,
+	Highspeed,
+	Nospeed,
+
+	/* request type */
+	Rh2d = 0<<7,
+	Rd2h = 1<<7,
+	Rstd = 0<<5,
+	Rclass =  1<<5,
+	Rdev = 0,
+	Rep = 2,
+	Rother = 3,
+
+	/* req offsets */
+	Rtype	= 0,
+	Rreq	= 1,
+	Rvalue	= 2,
+	Rindex	= 4,
+	Rcount	= 6,
+	Rsetuplen = 8,
+
+	/* standard requests */
+	Rgetstatus	= 0,
+	Rclearfeature	= 1,
+	Rsetfeature	= 3,
+	Rsetaddr	= 5,
+	Rgetdesc	= 6,
+
+	/* device states */
+	Dconfig	 = 0,		/* configuration in progress */
+	Denabled,		/* address assigned */
+	Ddetach,		/* device is detached */
+
+	/* (root) Hub reply to port status (reported to usbd) */
+	HPpresent	= 0x1,
+	HPenable	= 0x2,
+	HPsuspend	= 0x4,
+	HPovercurrent	= 0x8,
+	HPreset		= 0x10,
+	HPpower		= 0x100,
+	HPslow		= 0x200,
+	HPhigh		= 0x400,
+	HPstatuschg	= 0x10000,
+	HPchange	= 0x20000,
+};
+
+/*
+ * Services provided by the driver.
+ * epopen allocates hardware structures to prepare the endpoint
+ * for I/O. This happens when the user opens the data file.
+ * epclose releases them. This happens when the data file is closed.
+ * epwrite tries to write the given bytes, waiting until all of them
+ * have been written (or failed) before returning; but not for Iso.
+ * epread does the same for reading.
+ * It can be assumed that endpoints are DMEXCL but concurrent
+ * read/writes may be issued and the controller must take care.
+ * For control endpoints, device-to-host requests must be followed by
+ * a read of the expected length if needed.
+ * The port requests are called when usbd issues commands for root
+ * hubs. Port status must return bits as a hub request would do.
+ * Toggle handling and other details are left for the controller driver
+ * to avoid mixing too much the controller and the comon device.
+ * While an endpoint is closed, its toggles are saved in the Ep struct.
+ */
+struct Hciimpl
+{
+	void	*aux;				/* for controller info */
+	void	(*init)(Hci*);			/* init. controller */
+	void	(*dump)(Hci*);			/* debug */
+	void	(*interrupt)(Ureg*, void*);	/* service interrupt */
+	void	(*epopen)(Ep*);			/* prepare ep. for I/O */
+	void	(*epclose)(Ep*);		/* terminate I/O on ep. */
+	long	(*epread)(Ep*,void*,long);	/* transmit data for ep */
+	long	(*epwrite)(Ep*,void*,long);	/* receive data for ep */
+	char*	(*seprintep)(char*,char*,Ep*);	/* debug */
+	int	(*portenable)(Hci*, int, int);	/* enable/disable port */
+	int	(*portreset)(Hci*, int, int);	/* set/clear port reset */
+	int	(*portstatus)(Hci*, int);	/* get port status */
+	void	(*shutdown)(Hci*);		/* shutdown for reboot */
+	void	(*debug)(Hci*, int);		/* set/clear debug flag */
+};
+
+struct Hci
+{
+	ISAConf;				/* hardware info */
+	int	tbdf;				/* type+busno+devno+funcno */
+	int	ctlrno;				/* controller number */
+	int	nports;				/* number of ports in hub */
+	int	highspeed;
+	Hciimpl;				/* HCI driver  */
+};
+
+/*
+ * USB endpoint.
+ * All endpoints are kept in a global array. The first
+ * block of fields is constant after endpoint creation.
+ * The rest is configuration information given to all controllers.
+ * The first endpoint for a device (known as ep0) represents the
+ * device and is used to configure it and create other endpoints.
+ * Its QLock also protects per-device data in dev.
+ * See Hciimpl for clues regarding how this is used by controllers.
+ */
+struct Ep
+{
+	Ref;			/* one per fid (and per dev ep for ep0s) */
+
+	/* const once inited. */
+	int	idx;		/* index in global eps array */
+	int	nb;		/* endpoint number in device */
+	Hci*	hp;		/* HCI it belongs to */
+	Udev*	dev;		/* device for the endpoint */
+	Ep*	ep0;		/* control endpoint for its device */
+
+	QLock;			/* protect fields below */
+	char*	name;		/* for ep file names at #u/ */
+	int	inuse;		/* endpoint is open */
+	int	mode;		/* OREAD, OWRITE, or ORDWR */
+	int	clrhalt;	/* true if halt was cleared on ep. */
+	int	debug;		/* per endpoint debug flag */
+	char*	info;		/* for humans to read */
+	long	maxpkt;		/* maximum packet size */
+	int	ttype;		/* tranfer type */
+	ulong	load;		/* in µs, for a fransfer of maxpkt bytes */
+	void*	aux;		/* for controller specific info */
+	int	rhrepl;		/* fake root hub replies */
+	int	toggle[2];	/* saved toggles (while ep is not in use) */
+	long	pollival;		/* poll interval ([µ]frames; intr/iso) */
+	long	hz;		/* poll frequency (iso) */
+	long	samplesz;	/* sample size (iso) */
+	int	ntds;		/* nb. of Tds per µframe */
+};
+
+/*
+ * Per-device configuration and cached list of endpoints.
+ * eps[0]->QLock protects it.
+ */
+struct Udev
+{
+	int	nb;		/* USB device number */
+	int	state;		/* state for the device */
+	int	ishub;		/* hubs can allocate devices */
+	int	isroot;		/* is a root hub */
+	int	speed;		/* Full/Low/High/No -speed */
+	int	hub;		/* dev number for the parent hub */
+	int	port;		/* port number in the parent hub */
+	Ep*	eps[Ndeveps];	/* end points for this device (cached) */
+};
+
+void	addhcitype(char *type, int (*reset)(Hci*));
+#define dprint		if(debug)print
+#define ddprint		if(debug>1)print
+#define deprint		if(debug || ep->debug)print
+#define ddeprint	if(debug>1 || ep->debug>1)print
+#define	GET2(p)		((((p)[1]&0xFF)<<8)|((p)[0]&0xFF))
+#define	PUT2(p,v)	{((p)[0] = (v)); ((p)[1] = (v)>>8);}
+
+extern char *usbmodename[];
+extern char Estalled[];
+
+extern char *seprintdata(char*,char*,uchar*,int);

+ 3348 - 0
sys/src/9/kw/usbehci.c

@@ -0,0 +1,3348 @@
+/*
+ * USB Enhanced Host Controller Interface (EHCI) driver
+ * High speed USB 2.0.
+ *
+ * BUGS:
+ * - Too many delays and ilocks.
+ * - bandwidth admission control must be done per-frame.
+ * - requires polling (some controllers miss interrupts).
+ * - must warn of power overruns.
+ */
+
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+#include	"usb.h"
+#include	"usbehci.h"
+
+typedef struct Ctlio Ctlio;
+typedef struct Ctlr Ctlr;
+typedef struct Itd Itd;
+typedef struct Sitd Sitd;
+typedef struct Qtd Qtd;
+typedef struct Td Td;
+typedef struct Qh Qh;
+typedef struct Fstn Fstn;
+typedef union Ed Ed;
+typedef struct Edpool Edpool;
+typedef struct Qio Qio;
+typedef struct Qtree Qtree;
+typedef struct Isoio Isoio;
+typedef struct Poll Poll;
+
+/*
+ * EHCI interface registers and bits
+ */
+enum
+{
+	/* Queue states (software) */
+	Qidle		= 0,
+	Qinstall,
+	Qrun,
+	Qdone,
+	Qclose,
+	Qfree,
+
+	Enabledelay	= 100,		/* waiting for a port to enable */
+	Abortdelay	= 5,		/* delay after cancelling Tds (ms) */
+	Ctltmout	= 2000,		/* timeout for a ctl. request (ms) */
+	Bulktmout	= 2000,		/* timeout for a bulk xfer. (ms) */
+	Isotmout	= 2000,		/* timeout for an iso. request (ms) */
+
+	Incr		= 64,		/* for pools of Tds, Qhs, etc. */
+	Align		= 128,		/* in bytes for all those descriptors */
+
+	/* Keep them as a power of 2, lower than ctlr->nframes */
+	/* Also, keep Nisoframes >= Nintrleafs */
+	Nintrleafs	= 32,		/* nb. of leaf frames in intr. tree */
+	Nisoframes	= 64,		/* nb. of iso frames (in window) */
+
+	/*
+	 * HW constants
+	 */
+
+	/* Itd bits (csw[]) */
+	Itdactive	= 0x80000000,	/* execution enabled */
+	Itddberr	= 0x40000000,	/* data buffer error */
+	Itdbabble	= 0x20000000,	/* babble error */
+	Itdtrerr	= 0x10000000,	/* transaction error */
+	Itdlenshift	= 16,		/* transaction length */
+	Itdlenmask	= 0xFFF,
+	Itdioc		= 0x00008000,	/* interrupt on complete */
+	Itdpgshift	= 12,		/* page select field */
+	Itdoffshift	= 0,		/* transaction offset */
+	/* Itd bits, buffer[] */
+	Itdepshift	= 8,		/* endpoint address (buffer[0]) */
+	Itddevshift	= 0,		/* device address (buffer[0]) */
+	Itdin		= 0x800,	/* is input (buffer[1]) */
+	Itdout		= 0,
+	Itdmaxpktshift	= 0,		/* max packet (buffer[1]) */
+	Itdntdsshift	= 0,		/* nb. of tds per µframe (buffer[2]) */
+
+	Itderrors	= Itddberr|Itdbabble|Itdtrerr,
+
+	/* Sitd bits (epc) */
+	Stdin		= 0x80000000,	/* input direction */
+	Stdportshift	= 24,		/* hub port number */
+	Stdhubshift	= 16,		/* hub address */
+	Stdepshift	= 8,		/* endpoint address */
+	Stddevshift	= 0,		/* device address */
+	/* Sitd bits (mfs) */
+	Stdssmshift	= 0,		/* split start mask */
+	Stdscmshift	= 8,		/* split complete mask */
+	/* Sitd bits (csw) */
+	Stdioc		= 0x80000000,	/* interrupt on complete */
+	Stdpg		= 0x40000000,	/* page select */
+	Stdlenshift	= 16,		/* total bytes to transfer */
+	Stdlenmask	= 0x3FF,
+	Stdactive	= 0x00000080,	/* active */
+	Stderr		= 0x00000040,	/* tr. translator error */
+	Stddberr	= 0x00000020,	/* data buffer error */
+	Stdbabble	= 0x00000010,	/* babble error */
+	Stdtrerr	= 0x00000008,	/* transanction error */
+	Stdmmf		= 0x00000004,	/* missed µframe */
+	Stddcs		= 0x00000002,	/* do complete split */
+
+	Stderrors	= Stderr|Stddberr|Stdbabble|Stdtrerr|Stdmmf,
+
+	/* Sitd bits buffer[1] */
+	Stdtpall	= 0x00000000,	/* all payload here (188 bytes) */
+	Stdtpbegin	= 0x00000008,	/* first payload for fs trans. */
+	Stdtcntmask	= 0x00000007,	/* T-count */
+
+	/* Td bits (csw) */
+	Tddata1		= 0x80000000,	/* data toggle 1 */
+	Tddata0		= 0x00000000,	/* data toggle 0 */
+	Tdlenshift	= 16,		/* total bytes to transfer */
+	Tdlenmask	= 0x7FFF,
+	Tdmaxpkt	= 0x5000,	/* max buffer for a Td */
+	Tdioc		= 0x00008000,	/* interrupt on complete */
+	Tdpgshift	= 12,		/* current page */
+	Tdpgmask	= 7,
+	Tderr1		= 0x00000400,	/* bit 0 of error counter */
+	Tderr2		= 0x00000800,	/* bit 1 of error counter */
+	Tdtokout	= 0x00000000,	/* direction out */
+	Tdtokin		= 0x00000100,	/* direction in */
+	Tdtoksetup	= 0x00000200,	/* setup packet */
+	Tdtok		= 0x00000300,	/* token bits */
+	Tdactive		= 0x00000080,	/* active */
+	Tdhalt		= 0x00000040,	/* halted */
+	Tddberr		= 0x00000020,	/* data buffer error */
+	Tdbabble	= 0x00000010,	/* babble error */
+	Tdtrerr		= 0x00000008,	/* transanction error */
+	Tdmmf		= 0x00000004,	/* missed µframe */
+	Tddcs		= 0x00000002,	/* do complete split */
+	Tdping		= 0x00000001,	/* do ping */
+
+	Tderrors	= Tdhalt|Tddberr|Tdbabble|Tdtrerr|Tdmmf,
+
+	/* Qh bits (eps0) */
+	Qhrlcmask	= 0xF,		/* nak reload count */
+	Qhrlcshift	= 28,		/* nak reload count */
+	Qhnhctl		= 0x08000000,	/* not-high speed ctl */
+	Qhmplmask	= 0x7FF,	/* max packet */
+	Qhmplshift	= 16,
+	Qhhrl		= 0x00008000,	/* head of reclamation list */
+	Qhdtc		= 0x00004000,	/* data toggle ctl. */
+	Qhint		= 0x00000080,	/* inactivate on next transition */
+	Qhspeedmask	= 0x00003000,	/* speed bits */
+	Qhfull		= 0x00000000,	/* full speed */
+	Qhlow		= 0x00001000,	/* low speed */
+	Qhhigh		= 0x00002000,	/* high speed */
+
+	/* Qh bits (eps1) */
+	Qhmultshift	= 30,		/* multiple tds per µframe */
+	Qhmultmask	= 3,
+	Qhportshift	= 23,		/* hub port number */
+	Qhhubshift	= 16,		/* hub address */
+	Qhscmshift	= 8,		/* split completion mask bits */
+	Qhismshift	= 0,		/* interrupt sched. mask bits */
+};
+
+/*
+ * Endpoint tree (software)
+ */
+struct Qtree
+{
+	int	nel;
+	int	depth;
+	ulong*	bw;
+	Qh**	root;
+};
+
+/*
+ * One per endpoint per direction, to control I/O.
+ */
+struct Qio
+{
+	QLock;			/* for the entire I/O process */
+	Rendez;			/* wait for completion */
+	Qh*	qh;		/* Td list (field const after init) */
+	int	usbid;		/* usb address for endpoint/device */
+	int	toggle;		/* Tddata0/Tddata1 */
+	int	tok;		/* Tdtoksetup, Tdtokin, Tdtokout */
+	ulong	iotime;		/* last I/O time; to hold interrupt polls */
+	int	debug;		/* debug flag from the endpoint */
+	char*	err;		/* error string */
+	char*	tag;		/* debug (no room in Qh for this) */
+	ulong	bw;
+};
+
+struct Ctlio
+{
+	Qio;			/* a single Qio for each RPC */
+	uchar*	data;		/* read from last ctl req. */
+	int	ndata;		/* number of bytes read */
+};
+
+struct Isoio
+{
+	QLock;
+	Rendez;			/* wait for space/completion/errors */
+	int	usbid;		/* address used for device/endpoint */
+	int	tok;		/* Tdtokin or Tdtokout */
+	int	state;		/* Qrun -> Qdone -> Qrun... -> Qclose */
+	int	nframes;	/* number of frames ([S]Itds) used */
+	uchar*	data;		/* iso data buffers if not embedded */
+	char*	err;		/* error string */
+	int	nerrs;		/* nb of consecutive I/O errors */
+	ulong	maxsize;		/* ntds * ep->maxpkt */
+	long	nleft;		/* number of bytes left from last write */
+	int	debug;		/* debug flag from the endpoint */
+	int	hs;		/* is high speed? */
+	Isoio*	next;		/* in list of active Isoios */
+	ulong	td0frno;	/* first frame used in ctlr */
+	union{
+		Itd*	tdi;	/* next td processed by interrupt */
+		Sitd*	stdi;
+	};
+	union{
+		Itd*	tdu;	/* next td for user I/O in tdps */
+		Sitd*	stdu;
+	};
+	union{
+		Itd**	itdps;	/* itdps[i]: ptr to Itd for i-th frame or nil */
+		Sitd**	sitdps;	/* sitdps[i]: ptr to Sitd for i-th frame or nil */
+		ulong**	tdps;	/* same thing, as seen by hw */
+	};
+};
+
+struct Poll
+{
+	Lock;
+	Rendez;
+	int must;
+	int does;
+};
+
+struct Ctlr
+{
+	Rendez;			/* for waiting to async advance doorbell */
+	Lock;			/* for ilock. qh lists and basic ctlr I/O */
+	QLock	portlck;	/* for port resets/enable... (and doorbell) */
+	int	active;		/* in use or not */
+	Ecapio*	capio;		/* Capability i/o regs */
+	Eopio*	opio;		/* Operational i/o regs */
+
+	int	nframes;	/* 1024, 512, or 256 frames in the list */
+	ulong*	frames;		/* periodic frame list (hw) */
+	Qh*	qhs;		/* async Qh circular list for bulk/ctl */
+	Qtree*	tree;		/* tree of Qhs for the periodic list */
+	int	ntree;		/* number of dummy qhs in tree */
+	Qh*	intrqhs;		/* list of (not dummy) qhs in tree  */
+	Isoio*	iso;		/* list of active Iso I/O */
+	ulong	load;
+	ulong	isoload;
+	int	nintr;		/* number of interrupts attended */
+	int	ntdintr;		/* number of intrs. with something to do */
+	int	nqhintr;		/* number of async td intrs. */
+	int	nisointr;	/* number of periodic td intrs. */
+	int	nreqs;
+	Poll	poll;
+};
+
+struct Edpool
+{
+	Lock;
+	Ed*	free;
+	int	nalloc;
+	int	ninuse;
+	int	nfree;
+};
+
+/*
+ * We use the 64-bit version for Itd, Sitd, Td, and Qh.
+ * If the ehci is 64-bit capable it assumes we are using those
+ * structures even when the system is 32 bits.
+ */
+
+/*
+ * Iso transfer descriptor. hw. 92 bytes, 104 bytes total
+ * aligned to 32.
+ */
+struct Itd
+{
+	ulong	link;		/* to next hw struct */
+	ulong	csw[8];		/* sts/length/pg/off. updated by hw */
+	ulong	buffer[7];	/* buffer pointers, addrs, maxsz */
+	ulong	xbuffer[7];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Itd*	next;
+	ulong	ndata;		/* number of bytes in data */
+	ulong	mdata;		/* max number of bytes in data */
+	uchar*	data;
+};
+
+/*
+ * Split transaction iso transfer descriptor.
+ * hw: 36 bytes, 52 bytes total. aligned to 32.
+ */
+struct Sitd
+{
+	ulong	link;		/* to next hw struct */
+	ulong	epc;		/* static endpoint state. addrs */
+	ulong	mfs;		/* static endpoint state. µ-frame sched. */
+	ulong	csw;		/* transfer state. updated by hw */
+	ulong	buffer[2];	/* buf. ptr/offset. offset updated by hw */
+				/* buf ptr/TP/Tcnt. TP/Tcnt updated by hw */
+	ulong	blink;		/* back pointer */
+	ulong	xbuffer[2];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Sitd*	next;
+	ulong	ndata;		/* number of bytes in data */
+	ulong	mdata;		/* max number of bytes in data */
+	uchar*	data;
+};
+
+/*
+ * Queue element transfer descriptor.
+ * hw: first 52 bytes; total 68+sbuff bytes aligned to 32 bytes.
+ */
+struct Td
+{
+	ulong	nlink;		/* to next Td */
+	ulong	alink;		/* alternate link to next Td */
+	ulong	csw;		/* cmd/sts. updated by hw */
+	ulong	buffer[5];	/* buf ptrs. offset updated by hw */
+	ulong	xbuffer[5];	/* high 32 bits of buffer for 64-bits */
+
+	Td*	next;		/* in qh or Isoio or free list */
+	ulong	ndata;		/* bytes available/used at data */
+	uchar*	data;		/* pointer to actual data */
+	uchar*	buff;		/* allocated data buffer or nil */
+	uchar	sbuff[1];	/* first byte of embedded buffer */
+};
+
+/*
+ * Queue head. Aligned to 32 bytes.
+ * hw uses the first 68 bytes, 92 total.
+ */
+struct Qh
+{
+	ulong	link;		/* to next Qh in round robin */
+	ulong	eps0;		/* static endpoint state. addrs */
+	ulong	eps1;		/* static endpoint state. µ-frame sched. */
+
+	/* updated by hw */
+	ulong	clink;		/* current Td (No Term bit here!) */
+	ulong	nlink;		/* to next Td */
+	ulong	alink;		/* alternate link to next Td */
+	ulong	csw;		/* cmd/sts. updated by hw */
+	ulong	buffer[5];	/* buf ptrs. offset updated by hw */
+	ulong	xbuffer[5];	/* high 32 bits of buffer for 64-bits */
+
+	/* software */
+	Qh*	next;		/* in controller list/tree of Qhs */
+	int	state;		/* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */
+	Qio*	io;		/* for this queue */
+	Td*	tds;		/* for this queue */
+	int	sched;		/* slot for for intr. Qhs */
+	Qh*	inext;		/* next in list of intr. qhs */
+};
+
+/*
+ * We can avoid frame span traversal nodes if we don't span frames.
+ * Just schedule transfer that can fit on the current frame and
+ * wait a little bit otherwise.
+ */
+
+/*
+ * Software. Ehci descriptors provided by pool.
+ * There are soo few because we avoid using Fstn.
+ */
+union Ed
+{
+	Ed*	next;		/* in free list */
+	Qh	qh;
+	Td	td;
+	Itd	itd;
+	Sitd	sitd;
+	uchar	align[Align];
+};
+
+#define diprint		if(debug || iso->debug)print
+#define ddiprint		if(debug>1 || iso->debug>1)print
+#define dqprint		if(debug || (qh->io && qh->io->debug))print
+#define ddqprint		if(debug>1 || (qh->io && qh->io->debug>1))print
+#define TRUNC(x, sz)	((x) & ((sz)-1))
+#define LPTR(q)		((ulong*)KADDR((q) & ~0x1F))
+
+static int debug;
+static Edpool edpool;
+static Ctlr* ctlrs[Nhcis];
+static char Ebug[] = "not yet implemented";
+static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" };
+
+
+static void
+ehcirun(Ctlr *ctlr, int on)
+{
+	int i;
+	Eopio *opio;
+
+	ddprint("ehci %#p %s\n", ctlr->capio, on ? "starting" : "halting");
+	opio = ctlr->opio;
+	if(on)
+		opio->cmd |= Crun;
+	else
+		opio->cmd = Cstop;
+	for(i = 0; i < 1000; i++)			/* was 100 */
+		if(on == 0 && (opio->sts & Shalted) != 0)
+			break;
+		else if(on != 0 && (opio->sts & Shalted) == 0)
+			break;
+		else
+			delay(1);
+	if(i >= 1000)
+		print("ehci %#p %s cmd timed out\n",
+			ctlr->capio, on ? "run" : "halt");
+	ddprint("ehci %#p cmd %#ulx sts %#ulx\n", ctlr->capio, opio->cmd, opio->sts);
+}
+
+static void*
+edalloc(void)
+{
+	Ed *ed;
+	Ed *pool;
+	int i;
+
+	lock(&edpool);
+	if(edpool.free == nil){
+		pool = xspanalloc(Incr*sizeof(Ed), Align, 0);
+		if(pool == nil)
+			panic("edalloc");
+		for(i=Incr; --i>=0;){
+			pool[i].next = edpool.free;
+			edpool.free = &pool[i];
+		}
+		edpool.nalloc += Incr;
+		edpool.nfree += Incr;
+		dprint("ehci: edalloc: %d eds\n", edpool.nalloc);
+	}
+	ed = edpool.free;
+	edpool.free = ed->next;
+	edpool.ninuse++;
+	edpool.nfree--;
+	unlock(&edpool);
+
+	memset(ed, 0, sizeof(Ed));	/* safety */
+	assert(((ulong)ed & 0xF) == 0);
+	return ed;
+}
+
+static void
+edfree(void *a)
+{
+	Ed *ed;
+
+	ed = a;
+	lock(&edpool);
+	ed->next = edpool.free;
+	edpool.free = ed;
+	edpool.ninuse--;
+	edpool.nfree++;
+	unlock(&edpool);
+}
+
+/*
+ * Allocate and so same initialization.
+ * Free after releasing buffers used.
+ */
+
+static Itd*
+itdalloc(void)
+{
+	Itd *td;
+
+	td = edalloc();
+	td->link = Lterm;
+	return td;
+}
+
+static void
+itdfree(Itd *td)
+{
+	edfree(td);
+}
+
+static Sitd*
+sitdalloc(void)
+{
+	Sitd *td;
+
+	td = edalloc();
+	td->link = td->blink = Lterm;
+	return td;
+}
+
+static void
+sitdfree(Sitd *td)
+{
+	edfree(td);
+}
+
+static Td*
+tdalloc(void)
+{
+	Td *td;
+
+	td = edalloc();
+	td->nlink = td->alink = Lterm;
+	return td;
+}
+
+static void
+tdfree(Td *td)
+{
+	if(td == nil)
+		return;
+	free(td->buff);
+	edfree(td);
+}
+
+static void
+tdlinktd(Td *td, Td *next)
+{
+	td->next = next;
+	td->alink = Lterm;
+	if(next == nil)
+		td->nlink = Lterm;
+	else
+		td->nlink = PADDR(next);
+}
+
+static Qh*
+qhlinkqh(Qh *qh, Qh *next)
+{
+	qh->next = next;
+	qh->link = PADDR(next)|Lqh;
+	return qh;
+}
+
+static void
+qhsetaddr(Qh *qh, ulong addr)
+{
+	ulong eps0;
+	ulong ep;
+	ulong dev;
+
+	eps0 = qh->eps0 & ~((Epmax<<8)|Devmax);
+	ep = (addr >> 7) & Epmax;
+	dev = addr & Devmax;
+	eps0 |= ep << 8;
+	eps0 |= dev;
+	qh->eps0 = eps0;
+}
+
+/*
+ * return smallest power of 2 <= n
+ */
+static int
+flog2lower(int n)
+{
+	int i;
+
+	for(i = 0; (1 << (i + 1)) <= n; i++)
+		;
+	return i;
+}
+
+static int
+pickschedq(Qtree *qt, int pollival, ulong bw, ulong limit)
+{
+	int i, j, d, upperb, q;
+	ulong best, worst, total;
+
+	d = flog2lower(pollival);
+	if(d > qt->depth)
+		d = qt->depth;
+	q = -1;
+	worst = 0;
+	best = ~0;
+	upperb = (1 << (d+1)) - 1;
+	for(i = (1 << d) - 1; i < upperb; i++){
+		total = qt->bw[0];
+		for(j = i; j > 0; j = (j - 1) / 2)
+			total += qt->bw[j];
+		if(total < best){
+			best = total;
+			q = i;
+		}
+		if(total > worst)
+			worst = total;
+	}
+	if(worst + bw >= limit)
+		return -1;
+	return q;
+}
+
+static int
+schedq(Ctlr *ctlr, Qh *qh, int pollival)
+{
+	int q;
+	Qh *tqh;
+	ulong bw;
+
+	bw = qh->io->bw;
+	q = pickschedq(ctlr->tree, pollival, 0, ~0);
+	ddqprint("ehci: sched %#p q %d, ival %d, bw %uld\n",
+		qh->io, q, pollival, bw);
+	if(q < 0){
+		print("ehci: no room for ed\n");
+		return -1;
+	}
+	ctlr->tree->bw[q] += bw;
+	tqh = ctlr->tree->root[q];
+	qh->sched = q;
+	qhlinkqh(qh, tqh->next);
+	qhlinkqh(tqh, qh);
+	qh->inext = ctlr->intrqhs;
+	ctlr->intrqhs = qh;
+	return 0;
+}
+
+static void
+unschedq(Ctlr *ctlr, Qh *qh)
+{
+	int q;
+	Qh *prev, *this, *next;
+	Qh **l;
+	ulong bw;
+
+	bw = qh->io->bw;
+	q = qh->sched;
+	if(q < 0)
+		return;
+	ctlr->tree->bw[q] -= bw;
+
+	prev = ctlr->tree->root[q];
+	this = prev->next;
+	while(this != nil && this != qh){
+		prev = this;
+		this = this->next;
+	}
+	if(this == nil)
+		print("ehci: unschedq %d: not found\n", q);
+	else{
+		next = this->next;
+		qhlinkqh(prev, next);
+	}
+	for(l = &ctlr->intrqhs; *l != nil; l = &(*l)->inext)
+		if(*l == qh){
+			*l = (*l)->inext;
+			return;
+		}
+	print("ehci: unschedq: qh %#p not found\n", qh);
+}
+
+static ulong
+qhmaxpkt(Qh *qh)
+{
+	return (qh->eps0 >> Qhmplshift) & Qhmplmask;
+}
+
+static void
+qhsetmaxpkt(Qh *qh, int maxpkt)
+{
+	ulong eps0;
+
+	eps0 = qh->eps0 & ~(Qhmplmask << Qhmplshift);
+	eps0 |= (maxpkt & Qhmplmask) << Qhmplshift;
+	qh->eps0 = eps0;
+}
+
+/*
+ * Initialize the round-robin circular list of ctl/bulk Qhs
+ * if ep is nil. Otherwise, allocate and link a new Qh in the ctlr.
+ */
+static Qh*
+qhalloc(Ctlr *ctlr, Ep *ep, Qio *io, char* tag)
+{
+	Qh *qh;
+	int ttype;
+
+	qh = edalloc();
+	qh->nlink = Lterm;
+	qh->alink = Lterm;
+	qh->csw = Tdhalt;
+	qh->state = Qidle;
+	qh->sched = -1;
+	qh->io = io;
+	if(ep != nil){
+		qh->eps0 = 0;
+		qhsetmaxpkt(qh, ep->maxpkt);
+		if(ep->dev->speed == Lowspeed)
+			qh->eps0 |= Qhlow;
+		if(ep->dev->speed == Highspeed)
+			qh->eps0 |= Qhhigh;
+		else if(ep->ttype == Tctl)
+			qh->eps0 |= Qhnhctl;
+		qh->eps0 |= Qhdtc;
+		qh->eps0 |= (8 << Qhrlcshift);	/* 8 naks max */
+		qhsetaddr(qh, io->usbid);
+		qh->eps1 = (ep->ntds & Qhmultmask) << Qhmultshift;
+		qh->eps1 |= ep->dev->port << Qhportshift;
+		qh->eps1 |= ep->dev->hub << Qhhubshift;
+		qh->eps1 |= 034 << Qhscmshift;
+		if(ep->ttype == Tintr)
+			qh->eps1 |= (1 << Qhismshift); /* intr. start µf. */
+		if(io != nil)
+			io->tag = tag;
+	}
+	ilock(ctlr);
+	ttype = Tctl;
+	if(ep != nil)
+		ttype = ep->ttype;
+	switch(ttype){
+	case Tctl:
+	case Tbulk:
+		if(ctlr->qhs == nil){
+			ctlr->qhs = qhlinkqh(qh, qh);
+			ctlr->opio->link = PADDR(qh)|Lqh;
+			qh->eps0 |= Qhhigh | Qhhrl;
+		}else{
+			qhlinkqh(qh, ctlr->qhs->next);
+			qhlinkqh(ctlr->qhs, qh);
+		}
+		break;
+	case Tintr:
+		schedq(ctlr, qh, ep->pollival);
+		break;
+	default:
+		print("ehci: qhalloc called for ttype != ctl/bulk\n");
+	}
+	iunlock(ctlr);
+	return qh;
+}
+
+static int
+qhadvanced(void *a)
+{
+	Ctlr *ctlr;
+
+	ctlr = a;
+	return (ctlr->opio->cmd & Ciasync) == 0;
+}
+
+/*
+ * called when a qh is removed, to be sure the hw is not
+ * keeping pointers into it.
+ */
+static void
+qhcoherency(Ctlr *ctlr)
+{
+	int i;
+
+	qlock(&ctlr->portlck);
+	ctlr->opio->cmd |= Ciasync;	/* ask for intr. on async advance */
+	for(i = 0; i < 3 && qhadvanced(ctlr) == 0; i++)
+		if(!waserror()){
+			tsleep(ctlr, qhadvanced, ctlr, Abortdelay);
+			poperror();
+		}
+	dprint("ehci: qhcoherency: doorbell %d\n", qhadvanced(ctlr));
+	if(i == 3)
+		print("ehci: async advance doorbell did not ring\n");
+	ctlr->opio->cmd &= ~Ciasync;	/* try to clean */
+	qunlock(&ctlr->portlck);
+}
+
+static void
+qhfree(Ctlr *ctlr, Qh *qh)
+{
+	Td *td;
+	Td *ltd;
+	Qh *q;
+
+	if(qh == nil)
+		return;
+	ilock(ctlr);
+	if(qh->sched < 0){
+		for(q = ctlr->qhs; q != nil; q = q->next)
+			if(q->next == qh)
+				break;
+		if(q == nil)
+			panic("qhfree: nil q");
+		q->next = qh->next;
+		q->link = qh->link;
+	}else
+		unschedq(ctlr, qh);
+	iunlock(ctlr);
+
+	qhcoherency(ctlr);
+
+	for(td = qh->tds; td != nil; td = ltd){
+		ltd = td->next;
+		tdfree(td);
+	}
+
+	edfree(qh);
+}
+
+static void
+qhlinktd(Qh *qh, Td *td)
+{
+	ulong csw;
+	int i;
+
+	if(td == nil){
+		qh->tds = nil;
+		qh->csw |= Tdhalt;
+		qh->csw &= ~Tdactive;
+	}else{
+		qh->tds = td;
+		csw = qh->csw & (Tddata1|Tdping);	/* save */
+		qh->csw = Tdhalt;
+		qh->clink = 0;
+		qh->alink = Lterm;
+		qh->nlink = PADDR(td);
+		for(i = 0; i < nelem(qh->buffer); i++)
+			qh->buffer[i] = 0;
+		qh->csw = csw & ~(Tdhalt|Tdactive);	/* activate next */
+	}
+}
+
+static char*
+seprintlink(char *s, char *se, char *name, ulong l, int typed)
+{
+	s = seprint(s, se, "%s %ulx", name, l);
+	if((l & Lterm) != 0)
+		return seprint(s, se, "T");
+	if(typed == 0)
+		return s;
+	switch(l & (3<<1)){
+	case Litd:
+		return seprint(s, se, "I");
+	case Lqh:
+		return seprint(s, se, "Q");
+	case Lsitd:
+		return seprint(s, se, "S");
+	default:
+		return seprint(s, se, "F");
+	}
+}
+
+static char*
+seprintitd(char *s, char *se, Itd *td)
+{
+	int i;
+	char flags[6];
+	ulong b0;
+	ulong b1;
+	char *rw;
+
+	if(td == nil)
+		return seprint(s, se, "<nil itd>\n");
+	b0 = td->buffer[0];
+	b1 = td->buffer[1];
+
+	s = seprint(s, se, "itd %#p", td);
+	rw = (b1 & Itdin) ? "in" : "out";
+	s = seprint(s, se, " %s ep %uld dev %uld max %uld mult %uld",
+		rw, (b0>>8)&Epmax, (b0&Devmax),
+		td->buffer[1] & 0x7ff, b1 & 3);
+	s = seprintlink(s, se, " link", td->link, 1);
+	s = seprint(s, se, "\n");
+	for(i = 0; i < nelem(td->csw); i++){
+		memset(flags, '-', 5);
+		if((td->csw[i] & Itdactive) != 0)
+			flags[0] = 'a';
+		if((td->csw[i] & Itdioc) != 0)
+			flags[1] = 'i';
+		if((td->csw[i] & Itddberr) != 0)
+			flags[2] = 'd';
+		if((td->csw[i] & Itdbabble) != 0)
+			flags[3] = 'b';
+		if((td->csw[i] & Itdtrerr) != 0)
+			flags[4] = 't';
+		flags[5] = 0;
+		s = seprint(s, se, "\ttd%d %s", i, flags);
+		s = seprint(s, se, " len %uld", (td->csw[i] >> 16) & 0x7ff);
+		s = seprint(s, se, " pg %uld", (td->csw[i] >> 12) & 0x7);
+		s = seprint(s, se, " off %uld\n", td->csw[i] & 0xfff);
+	}
+	s = seprint(s, se, "\tbuffs:");
+	for(i = 0; i < nelem(td->buffer); i++)
+		s = seprint(s, se, " %#ulx", td->buffer[i] >> 12);
+	return seprint(s, se, "\n");
+}
+
+static char*
+seprintsitd(char *s, char *se, Sitd *td)
+{
+	static char pc[4] = { 'a', 'b', 'm', 'e' };
+	char rw;
+	char pg;
+	char ss;
+	char flags[8];
+
+	if(td == nil)
+		return seprint(s, se, "<nil sitd>\n");
+	s = seprint(s, se, "sitd %#p", td);
+	rw = (td->epc & Stdin) ? 'r' : 'w';
+	s = seprint(s, se, " %c ep %uld dev %uld",
+		rw, (td->epc>>8)&0xf, td->epc&0x7f);
+	s = seprint(s, se, " max %uld", (td->csw >> 16) & 0x3ff);
+	s = seprint(s, se, " hub %uld", (td->epc >> 16) & 0x7f);
+	s = seprint(s, se, " port %uld\n", (td->epc >> 24) & 0x7f);
+	memset(flags, '-', 7);
+	if((td->csw & Stdactive) != 0)
+		flags[0] = 'a';
+	if((td->csw & Stdioc) != 0)
+		flags[1] = 'i';
+	if((td->csw & Stderr) != 0)
+		flags[2] = 'e';
+	if((td->csw & Stddberr) != 0)
+		flags[3] = 'd';
+	if((td->csw & Stdbabble) != 0)
+		flags[4] = 'b';
+	if((td->csw & Stdtrerr) != 0)
+		flags[5] = 't';
+	if((td->csw & Stdmmf) != 0)
+		flags[6] = 'n';
+	flags[7] = 0;
+	ss = (td->csw & Stddcs) ? 'c' : 's';
+	pg = (td->csw & Stdpg) ? '1' : '0';
+	s = seprint(s, se, "\t%s %cs pg%c", flags, ss, pg);
+	s = seprint(s, se, " b0 %#ulx b1 %#ulx off %uld\n",
+		td->buffer[0] >> 12, td->buffer[1] >> 12, td->buffer[0] & 0xfff);
+	s = seprint(s, se, "\ttpos %c tcnt %uld",
+		pc[(td->buffer[0]>>3)&3], td->buffer[1] & 7);
+	s = seprint(s, se, " ssm %#ulx csm %#ulx cspm %#ulx",
+		td->mfs & 0xff, (td->mfs>>8) & 0xff, (td->csw>>8) & 0xff);
+	s = seprintlink(s, se, " link", td->link, 1);
+	s = seprintlink(s, se, " blink", td->blink, 0);
+	return seprint(s, se, "\n");
+}
+
+static long
+maxtdlen(Td *td)
+{
+	return (td->csw >> Tdlenshift) & Tdlenmask;
+}
+
+static long
+tdlen(Td *td)
+{
+	if(td->data == nil)
+		return 0;
+	return td->ndata - maxtdlen(td);
+}
+
+static char*
+seprinttd(char *s, char *se, Td *td, char *tag)
+{
+	static char *tok[4] = { "out", "in", "setup", "BUG" };
+	char flags[9];
+	char t;
+	char ss;
+	int i;
+
+	s = seprint(s, se, "%s %#p", tag, td);
+	s = seprintlink(s, se, " nlink", td->nlink, 0);
+	s = seprintlink(s, se, " alink", td->alink, 0);
+	s = seprint(s, se, " %s", tok[(td->csw & Tdtok) >> 8]);
+	if((td->csw & Tdping) != 0)
+		s = seprint(s, se, " png");
+	memset(flags, '-', 8);
+	if((td->csw & Tdactive) != 0)
+		flags[0] = 'a';
+	if((td->csw & Tdioc) != 0)
+		flags[1] = 'i';
+	if((td->csw & Tdhalt) != 0)
+		flags[2] = 'h';
+	if((td->csw & Tddberr) != 0)
+		flags[3] = 'd';
+	if((td->csw & Tdbabble) != 0)
+		flags[4] = 'b';
+	if((td->csw & Tdtrerr) != 0)
+		flags[5] = 't';
+	if((td->csw & Tdmmf) != 0)
+		flags[6] = 'n';
+	if((td->csw & (Tderr2|Tderr1)) == 0)
+		flags[7] = 'z';
+	flags[8] = 0;
+	t = (td->csw & Tddata1) ? '1' : '0';
+	ss = (td->csw & Tddcs) ? 'c' : 's';
+	s = seprint(s, se, "\n\td%c %s %cs", t, flags, ss);
+	s = seprint(s, se, " max %uld", maxtdlen(td));
+	s = seprint(s, se, " pg %uld off %#ulx\n",
+		(td->csw >> Tdpgshift) & Tdpgmask, td->buffer[0] & 0xFFF);
+	s = seprint(s, se, "\tbuffs:");
+	for(i = 0; i < nelem(td->buffer); i++)
+		s = seprint(s, se, " %#ulx", td->buffer[i]>>12);
+	if(td->data != nil)
+		s = seprintdata(s, se, td->data, td->ndata);
+	return seprint(s, se, "\n");
+}
+
+static void
+dumptd(Td *td, char *pref)
+{
+	char buf[256];
+	char *se;
+	int i;
+
+	i = 0;
+	se = buf+sizeof(buf);
+	for(; td != nil; td = td->next){
+		seprinttd(buf, se, td, pref);
+		print("%s", buf);
+		if(i++ > 20){
+			print("...more tds...\n");
+			break;
+		}
+	}
+}
+
+static void
+qhdump(Qh *qh)
+{
+	static char *speed[] = {"full", "low", "high", "BUG"};
+	char buf[256];
+	char *s;
+	char *se;
+	char *tag;
+	Td td;
+
+	if(qh == nil){
+		print("<nil qh>\n");
+		return;
+	}
+	if(qh->io == nil)
+		tag = "qh";
+	else
+		tag = qh->io->tag;
+	se = buf+sizeof(buf);
+	s = seprint(buf, se, "%s %#p", tag, qh);
+	s = seprint(s, se, " ep %uld dev %uld",
+		(qh->eps0>>8)&0xf, qh->eps0&0x7f);
+	s = seprint(s, se, " hub %uld", (qh->eps1 >> 16) & 0x7f);
+	s = seprint(s, se, " port %uld", (qh->eps1 >> 23) & 0x7f);
+	s = seprintlink(s, se, " link", qh->link, 1);
+	seprint(s, se, "  clink %#ulx", qh->clink);
+	print("%s\n", buf);
+	s = seprint(buf, se, "\tnrld %uld", (qh->eps0 >> Qhrlcshift) & Qhrlcmask);
+	s = seprint(s, se, " nak %uld", (qh->alink >> 1) & 0xf);
+	s = seprint(s, se, " max %uld ", qhmaxpkt(qh));
+	if((qh->eps0 & Qhnhctl) != 0)
+		s = seprint(s, se, "c");
+	if((qh->eps0 & Qhhrl) != 0)
+		s = seprint(s, se, "h");
+	if((qh->eps0 & Qhdtc) != 0)
+		s = seprint(s, se, "d");
+	if((qh->eps0 & Qhint) != 0)
+		s = seprint(s, se, "i");
+	s = seprint(s, se, " %s", speed[(qh->eps0 >> 12) & 3]);
+	s = seprint(s, se, " mult %uld", (qh->eps1 >> Qhmultshift) & Qhmultmask);
+	seprint(s, se, " scm %#ulx ism %#ulx\n",
+		(qh->eps1 >> 8 & 0xff), qh->eps1 & 0xff);
+	print("%s\n", buf);
+	memset(&td, 0, sizeof(td));
+	memmove(&td, &qh->nlink, 32);	/* overlay area */
+	seprinttd(buf, se, &td, "\tovl");
+	print("%s", buf);
+}
+
+static void
+isodump(Isoio* iso, int all)
+{
+	Itd *td, *tdi, *tdu;
+	Sitd *std, *stdi, *stdu;
+	char buf[256];
+	int i;
+
+	if(iso == nil){
+		print("<nil iso>\n");
+		return;
+	}
+	print("iso %#p %s %s speed state %d nframes %d maxsz %uld",
+		iso, iso->tok == Tdtokin ? "in" : "out",
+		iso->hs ? "high" : "full",
+		iso->state, iso->nframes, iso->maxsize);
+	print(" td0 %uld tdi %#p tdu %#p data %#p\n",
+		iso->td0frno, iso->tdi, iso->tdu, iso->data);
+	if(iso->err != nil)
+		print("\terr %s\n", iso->err);
+	if(iso->err != nil)
+		print("\terr='%s'\n", iso->err);
+	if(all == 0)
+		if(iso->hs != 0){
+			tdi = iso->tdi;
+			seprintitd(buf, buf+sizeof(buf), tdi);
+			print("\ttdi %s\n", buf);
+			tdu = iso->tdu;
+			seprintitd(buf, buf+sizeof(buf), tdu);
+			print("\ttdu %s\n", buf);
+		}else{
+			stdi = iso->stdi;
+			seprintsitd(buf, buf+sizeof(buf), stdi);
+			print("\tstdi %s\n", buf);
+			stdu = iso->stdu;
+			seprintsitd(buf, buf+sizeof(buf), stdu);
+			print("\tstdu %s\n", buf);
+		}
+	else{
+		for(i = 0; i < Nisoframes; i++)
+			if(iso->tdps[i] != nil)
+			if(iso->hs != 0){
+				td = iso->itdps[i];
+				seprintitd(buf, buf+sizeof(buf), td);
+				if(td == iso->tdi)
+					print("i->");
+				if(td == iso->tdu)
+					print("i->");
+				print("[%d]\t%s", i, buf);
+			}else{
+				std = iso->sitdps[i];
+				seprintsitd(buf, buf+sizeof(buf), std);
+				if(std == iso->stdi)
+					print("i->");
+				if(std == iso->stdu)
+					print("u->");
+				print("[%d]\t%s", i, buf);
+			}
+	}
+}
+
+static void
+dump(Hci *hp)
+{
+	Ctlr *ctlr;
+	Isoio *iso;
+	Eopio *opio;
+	int i;
+	char buf[128];
+	char *s;
+	char *se;
+	Qh *qh;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	ilock(ctlr);
+	print("ehci port %#p frames %#p (%d fr.) nintr %d ntdintr %d",
+		ctlr->capio, ctlr->frames, ctlr->nframes,
+		ctlr->nintr, ctlr->ntdintr);
+	print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr);
+	print("\tcmd %#ulx sts %#ulx intr %#ulx frno %uld",
+		opio->cmd, opio->sts, opio->intr, opio->frno);
+	print(" base %#ulx link %#ulx fr0 %#ulx\n",
+		opio->frbase, opio->link, ctlr->frames[0]);
+	se = buf+sizeof(buf);
+	s = seprint(buf, se, "\t");
+	for(i = 0; i < hp->nports; i++){
+		s = seprint(s, se, "p%d %#ulx ", i, opio->portsc[i]);
+		if(hp->nports > 4 && i == hp->nports/2 - 1)
+			s = seprint(s, se, "\n\t");
+	}
+	print("%s\n", buf);
+	qh = ctlr->qhs;
+	i = 0;
+	do{
+		qhdump(qh);
+		qh = qh->next;
+	}while(qh != ctlr->qhs && i++ < 100);
+	if(i > 100)
+		print("...too many Qhs...\n");
+	if(ctlr->intrqhs != nil)
+		print("intr qhs:\n");
+	for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext)
+		qhdump(qh);
+	if(ctlr->iso != nil)
+		print("iso:\n");
+	for(iso = ctlr->iso; iso != nil; iso = iso->next)
+		isodump(ctlr->iso, 0);
+	print("%d eds in tree\n", ctlr->ntree);
+	iunlock(ctlr);
+	lock(&edpool);
+	print("%d eds allocated = %d in use + %d free\n",
+		edpool.nalloc, edpool.ninuse, edpool.nfree);
+	unlock(&edpool);
+}
+
+static char*
+errmsg(int err)
+{
+	if(err == 0)
+		return "ok";
+	if(err & Tddberr)
+		return "data buffer error";
+	if(err & Tdbabble)
+		return "babble detected";
+	if(err & Tdtrerr)
+		return "transaction error";
+	if(err & Tdmmf)
+		return "missed µframe";
+	if(err & Tdhalt)
+		return Estalled;	/* [uo]hci report this error */
+	return Eio;
+}
+
+static char*
+ierrmsg(int err)
+{
+	if(err == 0)
+		return "ok";
+	if(err & Itddberr)
+		return "data buffer error";
+	if(err & Itdbabble)
+		return "babble detected";
+	if(err & Itdtrerr)
+		return "transaction error";
+	return Eio;
+}
+
+static char*
+serrmsg(int err)
+{
+	if(err & Stderr)
+		return "translation translator error";
+	/* other errors have same numbers than Td errors */
+	return errmsg(err);
+}
+
+static int
+isocanread(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	if(iso->state == Qclose)
+		return 1;
+	if(iso->state == Qrun && iso->tok == Tdtokin){
+		if(iso->hs != 0 && iso->tdi != iso->tdu)
+			return 1;
+		if(iso->hs == 0 && iso->stdi != iso->stdu)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+isocanwrite(void *a)
+{
+	Isoio *iso;
+
+	iso = a;
+	if(iso->state == Qclose)
+		return 1;
+	if(iso->state == Qrun && iso->tok == Tdtokout){
+		if(iso->hs != 0 && iso->tdu->next != iso->tdi)
+			return 1;
+		if(iso->hs == 0 && iso->stdu->next != iso->stdi)
+			return 1;
+	}
+	return 0;
+}
+
+static void
+itdinit(Isoio *iso, Itd *td)
+{
+	ulong pa;
+	int p;
+	int t;
+	ulong tsize;
+	ulong size;
+
+	/*
+	 * BUG: This does not put an integral number of samples
+	 * on each µframe unless samples per packet % 8 == 0
+	 * Also, all samples are packed early on each frame.
+	 */
+	p = 0;
+	size = td->ndata = td->mdata;
+	pa = PADDR(td->data);
+	for(t = 0; size > 0 && t < 8; t++){
+		tsize = size;
+		if(tsize > iso->maxsize)
+			tsize = iso->maxsize;
+		size -= tsize;
+		td->csw[t] = tsize << Itdlenshift;
+		assert(p < nelem(td->buffer));
+		td->csw[t] |= p << Itdpgshift;
+		td->csw[t] |= (pa & 0xFFF) << Itdoffshift;
+		td->csw[t] |= Itdactive|Itdioc;
+		if(((pa+tsize) & ~0xFFF) != (pa & ~0xFFF))
+			p++;
+		pa += tsize;
+	}
+}
+
+static void
+sitdinit(Isoio *iso, Sitd *td)
+{
+	td->ndata = td->mdata & Stdlenmask;
+	td->csw = (td->ndata << Stdlenshift) | Stdactive | Stdioc;
+	td->buffer[0] = PADDR(td->data);
+	td->buffer[1] = (td->buffer[0] & ~0xFFF) + 0x1000;
+	if(iso->tok == Tdtokin || td->ndata <= 188)
+		td->buffer[1] |= Stdtpall;
+	else
+		td->buffer[1] |= Stdtpbegin;
+	if(iso->tok == Tdtokin)
+		td->buffer[1] |= 1;
+	else
+		td->buffer[1] |= ((td->ndata + 187 ) / 188) & Stdtcntmask;
+}
+
+static int
+itdactive(Itd *td)
+{
+	int i;
+
+	for(i = 0; i < nelem(td->csw); i++)
+		if((td->csw[i] & Itdactive) != 0)
+			return 1;
+	return 0;
+}
+
+static int
+isohsinterrupt(Ctlr *ctlr, Isoio *iso)
+{
+	Itd *tdi;
+	int err;
+	int i;
+	int t;
+	int nframes;
+
+	tdi = iso->tdi;
+	assert(tdi != nil);
+	if(itdactive(tdi))	/* not all tds are done */
+		return 0;
+	ctlr->nisointr++;
+	ddiprint("isohsintr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu);
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("isofsintr: iso state");
+	if(debug > 1 || iso->debug > 1)
+		isodump(iso, 0);
+
+	nframes = iso->nframes / 2;		/* limit how many we look */
+	if(nframes > Nisoframes)
+		nframes = Nisoframes;
+
+	if(iso->tok == Tdtokin)
+		tdi->ndata = 0;
+	/* else, it has the number of bytes transferred */
+
+	for(i = 0; i < nframes && itdactive(tdi) == 0; i++){
+		err = 0;
+		if(iso->tok == Tdtokin)
+			tdi->ndata += (tdi->csw[i] >> Itdlenshift)&Itdlenmask;
+		for(t = 0; t < nelem(tdi->csw); t++){
+			tdi->csw[i] &= ~Itdioc;
+			err |= tdi->csw[i] & Itderrors;
+		}
+		if(err == 0)
+			iso->nerrs = 0;
+		else if(iso->nerrs++ > iso->nframes/2){
+			if(iso->err == nil){
+				iso->err = ierrmsg(err);
+				diprint("isohsintr: tdi %#p error %#ux %s\n",
+					tdi, err, iso->err);
+				diprint("ctlr load %uld\n", ctlr->load);
+			}
+			tdi->ndata = 0;
+		}else
+			tdi->ndata = 0;
+		if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){
+			memset(iso->tdu->data, 0, iso->tdu->mdata);
+			itdinit(iso, iso->tdu);
+			iso->tdu = iso->tdu->next;
+			iso->nleft = 0;
+		}
+		tdi = tdi->next;
+	}
+	ddiprint("isohsintr: %d frames processed\n", nframes);
+	if(i == nframes)
+		tdi->csw[0] |= Itdioc;
+	iso->tdi = tdi;
+	if(isocanwrite(iso) || isocanread(iso)){
+		diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso,
+			iso->tdi, iso->tdu);
+		wakeup(iso);
+	}
+	return 1;
+}
+
+static int
+isofsinterrupt(Ctlr *ctlr, Isoio *iso)
+{
+	Sitd *stdi;
+	int err;
+	int i;
+	int nframes;
+
+	stdi = iso->stdi;
+	assert(stdi != nil);
+	if((stdi->csw & Stdactive) != 0)		/* nothing new done */
+		return 0;
+	ctlr->nisointr++;
+	ddiprint("isofsintr: iso %#p: tdi %#p tdu %#p\n", iso, stdi, iso->stdu);
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("isofsintr: iso state");
+	if(debug > 1 || iso->debug > 1)
+		isodump(iso, 0);
+
+	nframes = iso->nframes / 2;		/* limit how many we look */
+	if(nframes > Nisoframes)
+		nframes = Nisoframes;
+
+	for(i = 0; i < nframes && (stdi->csw & Stdactive) == 0; i++){
+		stdi->csw &= ~Stdioc;
+		err = stdi->csw & Stderrors;
+		if(err == 0){
+			iso->nerrs = 0;
+			if(iso->tok == Tdtokin)
+				stdi->ndata = (stdi->csw>>Stdlenshift)&Stdlenmask;
+			/* else len is assumed correct */
+		}else if(iso->nerrs++ > iso->nframes/2){
+			if(iso->err == nil){
+				iso->err = serrmsg(err);
+				diprint("isofsintr: tdi %#p error %#ux %s\n",
+					stdi, err, iso->err);
+				diprint("ctlr load %uld\n", ctlr->load);
+			}
+			stdi->ndata = 0;
+		}else
+			stdi->ndata = 0;
+
+		if(stdi->next == iso->stdu || stdi->next->next == iso->stdu){
+			memset(iso->stdu->data, 0, iso->stdu->mdata);
+			sitdinit(iso, iso->stdu);
+			iso->stdu = iso->stdu->next;
+			iso->nleft = 0;
+		}
+		stdi = stdi->next;
+	}
+	ddiprint("isofsintr: %d frames processed\n", nframes);
+	if(i == nframes)
+		stdi->csw |= Stdioc;
+	iso->stdi = stdi;
+	if(isocanwrite(iso) || isocanread(iso)){
+		diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso,
+			iso->stdi, iso->stdu);
+		wakeup(iso);
+	}
+	return 1;
+}
+
+static int
+qhinterrupt(Ctlr *ctlr, Qh *qh)
+{
+	Td *td;
+	int err;
+	char buf[256];
+
+	if(qh->state != Qrun)
+		panic("qhinterrupt: qh state");
+	if(qh->tds == nil)
+		panic("qhinterrupt: no tds");
+	if((qh->tds->csw & Tdactive) == 0)
+		ddqprint("qhinterrupt port %#p qh %#p\n",ctlr->capio, qh);
+	for(td = qh->tds; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			return 0;
+		if((td->csw & Tderrors) != 0){
+			err = td->csw & Tderrors;
+if(debug || qh->io->debug){
+seprinttd(buf, buf+sizeof(buf), td, "intr-fail-td");
+print("qh %#p io %#p\n\t%s\n", qh, qh->io, buf);
+}
+			if(qh->io->err == nil){
+				qh->io->err = errmsg(td->csw & Tderrors);
+				dqprint("qhintr: td %#p csw %#ulx error %#ux %s\n",
+					td, td->csw, err, qh->io->err);
+			}
+			break;
+		}
+		td->ndata = tdlen(td);
+		if(td->ndata < maxtdlen(td)){	/* EOT */
+			td = td->next;
+			break;
+		}
+	}
+	/*
+	 * Done. Make void the Tds not used (errors or EOT) and wakeup epio.
+	 */
+	for(; td != nil; td = td->next)
+		td->ndata = 0;
+	qh->state = Qdone;
+	wakeup(qh->io);
+	return 1;
+}
+
+static int
+ehciintr(Hci *hp)
+{
+	Ctlr *ctlr;
+	Eopio *opio;
+	Isoio *iso;
+	ulong sts;
+	Qh *qh;
+	int i;
+	int some;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+
+	/*
+	 * Will we know in USB 3.0 who the interrupt was for?.
+	 * Do they still teach indexing in CS?
+	 * This is Intel's doing.
+	 */
+	ilock(ctlr);
+	ctlr->nintr++;
+	sts = opio->sts & Sintrs;
+	if(sts == 0){		/* not ours; shared intr. */
+		iunlock(ctlr);
+		return 0;
+	}
+	opio->sts = sts;
+	if((sts & Sherr) != 0)
+		print("ehci: port %#p fatal host system error\n", ctlr->capio);
+	if((sts & Shalted) != 0)
+		print("ehci: port %#p: halted\n", ctlr->capio);
+	if((sts & Sasync) != 0){
+		dprint("ehci: doorbell\n");
+		wakeup(ctlr);
+	}
+	/*
+	 * We enter always this if, even if it seems the
+	 * interrupt does not report anything done/failed.
+	 * Some controllers don't post interrupts right.
+	 */
+	some = 0;
+	if((sts & (Serrintr|Sintr)) != 0){
+		ctlr->ntdintr++;
+		if(debug > 1){
+			print("ehci port %#p frames %#p nintr %d ntdintr %d",
+				ctlr->capio, ctlr->frames,
+				ctlr->nintr, ctlr->ntdintr);
+			print(" nqhintr %d nisointr %d\n",
+				ctlr->nqhintr, ctlr->nisointr);
+			print("\tcmd %#ulx sts %#ulx intr %#ulx frno %uld",
+				opio->cmd, opio->sts, opio->intr, opio->frno);
+		}
+
+		/* process the Iso transfers */
+		for(iso = ctlr->iso; iso != nil; iso = iso->next)
+			if(iso->state == Qrun || iso->state == Qdone)
+				if(iso->hs != 0)
+					some += isohsinterrupt(ctlr, iso);
+				else
+					some += isofsinterrupt(ctlr, iso);
+
+		/* process the qhs in the periodic tree */
+		for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext)
+			if(qh->state == Qrun)
+				some += qhinterrupt(ctlr, qh);
+
+		/* process the async Qh circular list */
+		qh = ctlr->qhs;
+		i = 0;
+		do{
+			if(qh->state == Qrun)
+				some += qhinterrupt(ctlr, qh);
+			qh = qh->next;
+		}while(qh != ctlr->qhs && i++ < 100);
+		if(i > 100)
+			print("echi: interrupt: qh loop?\n");
+	}
+	iunlock(ctlr);
+	return some;
+}
+
+static void
+interrupt(Ureg*, void* a)
+{
+	ehciintr(a);
+}
+
+static int
+portenable(Hci *hp, int port, int on)
+{
+	Ctlr *ctlr;
+	Eopio *opio;
+	int s;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	s = opio->portsc[port-1];
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	dprint("ehci %#p port %d enable=%d; sts %#x\n",
+		ctlr->capio, port, on, s);
+	ilock(ctlr);
+	if(s & (Psstatuschg | Pschange))
+		opio->portsc[port-1] = s;
+	if(on)
+		opio->portsc[port-1] |= Psenable;
+	else
+		opio->portsc[port-1] &= ~Psenable;
+	microdelay(64);
+	iunlock(ctlr);
+	tsleep(&up->sleep, return0, 0, Enabledelay);
+	dprint("ehci %#p port %d enable=%d: sts %#ulx\n",
+		ctlr->capio, port, on, opio->portsc[port-1]);
+	qunlock(&ctlr->portlck);
+	poperror();
+	return 0;
+}
+
+/*
+ * If we detect during status that the port is low-speed or
+ * during reset that it's full-speed, the device is not for
+ * ourselves. The companion controller will take care.
+ * Low-speed devices will not be seen by usbd. Full-speed
+ * ones are seen because it's only after reset that we know what
+ * they are (usbd may notice a device not enabled in this case).
+ */
+static void
+portlend(Ctlr *ctlr, int port, char *ss)
+{
+	Eopio *opio;
+	ulong s;
+
+	opio = ctlr->opio;
+
+	dprint("ehci %#p port %d: %s speed device: no longer owned\n",
+		ctlr->capio, port, ss);
+	s = opio->portsc[port-1];
+	s &= ~(Pschange|Psstatuschg);
+	s |= Psowner;
+	opio->portsc[port-1] = s;
+
+}
+
+static int
+portreset(Hci *hp, int port, int on)
+{
+	ulong s;
+	Eopio *opio;
+	Ctlr *ctlr;
+	int i;
+
+	if(on == 0)
+		return 0;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		iunlock(ctlr);
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	s = opio->portsc[port-1];
+	dprint("ehci %#p port %d reset; sts %#ulx\n", ctlr->capio, port, s);
+	ilock(ctlr);
+	s &= ~(Psenable|Psreset);
+	opio->portsc[port-1] = s|Psreset;
+	for(i = 0; i < 10; i++){
+		delay(10);
+		if((opio->portsc[port-1] & Psreset) == 0)
+			break;
+	}
+	opio->portsc[port-1] &= ~Psreset;
+	delay(10);
+	if((opio->portsc[port-1] & Psenable) == 0)
+		portlend(ctlr, port, "full");
+
+	iunlock(ctlr);
+	dprint("ehci %#p after port %d reset; sts %#ulx\n",
+		ctlr->capio, port, opio->portsc[port-1]);
+	qunlock(&ctlr->portlck);
+	poperror();
+	return 0;
+}
+
+static int
+portstatus(Hci *hp, int port)
+{
+	int s;
+	int r;
+	Eopio *opio;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	qlock(&ctlr->portlck);
+	if(waserror()){
+		iunlock(ctlr);
+		qunlock(&ctlr->portlck);
+		nexterror();
+	}
+	ilock(ctlr);
+	s = opio->portsc[port-1];
+	if(s & (Psstatuschg | Pschange)){
+		opio->portsc[port-1] = s;
+		ddprint("ehci %#p port %d status %#x\n", ctlr->capio, port, s);
+	}
+	/*
+	 * If the port is a low speed port we yield ownership now
+	 * to the [uo]hci companion controller and pretend it's not here.
+	 */
+	if((s & Pspresent) != 0 && (s & Pslinemask) == Pslow){
+		portlend(ctlr, port, "low");
+		s &= ~Pspresent;			/* not for us this time */
+	}
+	iunlock(ctlr);
+	qunlock(&ctlr->portlck);
+	poperror();
+
+	/*
+	 * We must return status bits as a
+	 * get port status hub request would do.
+	 */
+	r = 0;
+	if(s & Pspresent)
+		r |= HPpresent|HPhigh;
+	if(s & Psenable)
+		r |= HPenable;
+	if(s & Pssuspend)
+		r |= HPsuspend;
+	if(s & Psreset)
+		r |= HPreset;
+	if(s & Psstatuschg)
+		r |= HPstatuschg;
+	if(s & Pschange)
+		r |= HPchange;
+	return r;
+}
+
+static char*
+seprintio(char *s, char *e, Qio *io, char *pref)
+{
+	s = seprint(s,e,"%s io %#p qh %#p id %#x", pref, io, io->qh, io->usbid);
+	s = seprint(s,e," iot %ld", io->iotime);
+	s = seprint(s,e," tog %#x tok %#x err %s", io->toggle, io->tok, io->err);
+	return s;
+}
+
+static char*
+seprintep(char *s, char *e, Ep *ep)
+{
+	Qio *io;
+	Ctlio *cio;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+	ilock(ctlr);
+	if(ep->aux == nil){
+		*s = 0;
+		iunlock(ctlr);
+		return s;
+	}
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		s = seprintio(s, e, cio, "c");
+		s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata);
+		break;
+	case Tbulk:
+	case Tintr:
+		io = ep->aux;
+		if(ep->mode != OWRITE)
+			s = seprintio(s, e, &io[OREAD], "r");
+		if(ep->mode != OREAD)
+			s = seprintio(s, e, &io[OWRITE], "w");
+		break;
+	case Tiso:
+		*s = 0;
+		break;
+	}
+	iunlock(ctlr);
+	return s;
+}
+
+/*
+ * halt condition was cleared on the endpoint. update our toggles.
+ */
+static void
+clrhalt(Ep *ep)
+{
+	Qio *io;
+	ep->clrhalt = 0;
+	switch(ep->ttype){
+	case Tintr:
+	case Tbulk:
+		io = ep->aux;
+		if(ep->mode != OREAD){
+			qlock(&io[OWRITE]);
+			io[OWRITE].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OWRITE);
+			qunlock(&io[OWRITE]);
+		}
+		if(ep->mode != OWRITE){
+			qlock(&io[OREAD]);
+			io[OREAD].toggle = Tddata0;
+			deprint("ep clrhalt for io %#p\n", io+OREAD);
+			qunlock(&io[OREAD]);
+		}
+		break;
+	}
+}
+
+static void
+xdump(char* pref, void *qh)
+{
+	int i;
+	ulong *u;
+
+	u = qh;
+	print("%s %#p:", pref, u);
+	for(i = 0; i < 16; i++)
+		if((i%4) == 0)
+			print("\n %#8.8ulx", u[i]);
+		else
+			print(" %#8.8ulx", u[i]);
+	print("\n");
+}
+
+static long
+episohscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count)
+{
+	int nr;
+	long tot;
+	Itd *tdu;
+
+	for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){
+		tdu = iso->tdu;
+		if(itdactive(tdu))
+			break;
+		nr = tdu->ndata;
+		if(tot + nr > count)
+			nr = count - tot;
+		if(nr == 0)
+			print("ehci: ep%d.%d: too many polls\n",
+				ep->dev->nb, ep->nb);
+		else{
+			iunlock(ctlr);		/* We could page fault here */
+			memmove(b+tot, tdu->data, nr);
+			ilock(ctlr);
+			if(nr < tdu->ndata)
+				memmove(tdu->data, tdu->data+nr, tdu->ndata - nr);
+			tdu->ndata -= nr;
+		}
+		if(tdu->ndata == 0){
+			itdinit(iso, tdu);
+			iso->tdu = tdu->next;
+		}
+	}
+	return tot;
+}
+
+static long
+episofscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count)
+{
+	int nr;
+	long tot;
+	Sitd *stdu;
+
+	for(tot = 0; iso->stdi != iso->stdu && tot < count; tot += nr){
+		stdu = iso->stdu;
+		if(stdu->csw & Stdactive){
+			diprint("ehci: episoread: %#p tdu active\n", iso);
+			break;
+		}
+		nr = stdu->ndata;
+		if(tot + nr > count)
+			nr = count - tot;
+		if(nr == 0)
+			print("ehci: ep%d.%d: too many polls\n",
+				ep->dev->nb, ep->nb);
+		else{
+			iunlock(ctlr);		/* We could page fault here */
+			memmove(b+tot, stdu->data, nr);
+			ilock(ctlr);
+			if(nr < stdu->ndata)
+				memmove(stdu->data,stdu->data+nr,stdu->ndata - nr);
+			stdu->ndata -= nr;
+		}
+		if(stdu->ndata == 0){
+			sitdinit(iso, stdu);
+			iso->stdu = stdu->next;
+		}
+	}
+	return tot;
+}
+
+static long
+episoread(Ep *ep, Isoio *iso, void *a, long count)
+{
+	Ctlr *ctlr;
+	uchar *b;
+	long tot;
+
+	iso->debug = ep->debug;
+	diprint("ehci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	b = a;
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	iso->err = nil;
+	iso->nerrs = 0;
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	while(isocanread(iso) == 0){
+		iunlock(ctlr);
+		diprint("ehci: episoread: %#p sleep\n", iso);
+		if(waserror()){
+			if(iso->err == nil)
+				iso->err = "I/O timed out";
+			ilock(ctlr);
+			break;
+		}
+		tsleep(iso, isocanread, iso, Isotmout);
+		poperror();
+		ilock(ctlr);
+	}
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qdone;
+	assert(iso->tdu != iso->tdi);
+
+	if(iso->hs != 0)
+		tot = episohscpy(ctlr, ep, iso, b, count);
+	else
+		tot = episofscpy(ctlr, ep, iso, b, count);
+	iunlock(ctlr);
+	qunlock(iso);
+	poperror();
+	diprint("uhci: episoread: %#p %uld bytes err '%s'\n", iso, tot, iso->err);
+	if(iso->err != nil)
+		error(iso->err);
+	return tot;
+}
+
+/*
+ * iso->tdu is the next place to put data. When it gets full
+ * it is activated and tdu advanced.
+ */
+static long
+putsamples(Isoio *iso, uchar *b, long count)
+{
+	long tot;
+	long n;
+
+	for(tot = 0; isocanwrite(iso) && tot < count; tot += n){
+		n = count-tot;
+		if(iso->hs != 0){
+			if(n > iso->tdu->mdata - iso->nleft)
+				n = iso->tdu->mdata - iso->nleft;
+			memmove(iso->tdu->data+iso->nleft, b+tot, n);
+			iso->nleft += n;
+			if(iso->nleft == iso->tdu->mdata){
+				itdinit(iso, iso->tdu);
+				iso->nleft = 0;
+				iso->tdu = iso->tdu->next;
+			}
+		}else{
+			if(n > iso->stdu->mdata - iso->nleft)
+				n = iso->stdu->mdata - iso->nleft;
+			memmove(iso->stdu->data+iso->nleft, b+tot, n);
+			iso->nleft += n;
+			if(iso->nleft == iso->stdu->mdata){
+				sitdinit(iso, iso->stdu);
+				iso->nleft = 0;
+				iso->stdu = iso->stdu->next;
+			}
+		}
+	}
+	return tot;
+}
+
+/*
+ * Queue data for writing and return error status from
+ * last writes done, to maintain buffered data.
+ */
+static long
+episowrite(Ep *ep, Isoio *iso, void *a, long count)
+{
+	Ctlr *ctlr;
+	uchar *b;
+	int tot;
+	int nw;
+	char *err;
+
+	iso->debug = ep->debug;
+	diprint("ehci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb);
+
+	ctlr = ep->hp->aux;
+	qlock(iso);
+	if(waserror()){
+		qunlock(iso);
+		nexterror();
+	}
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		error(iso->err ? iso->err : Eio);
+	}
+	iso->state = Qrun;
+	b = a;
+	for(tot = 0; tot < count; tot += nw){
+		while(isocanwrite(iso) == 0){
+			iunlock(ctlr);
+			diprint("ehci: episowrite: %#p sleep\n", iso);
+			if(waserror()){
+				if(iso->err == nil)
+					iso->err = "I/O timed out";
+				ilock(ctlr);
+				break;
+			}
+			tsleep(iso, isocanwrite, iso, Isotmout);
+			poperror();
+			ilock(ctlr);
+		}
+		err = iso->err;
+		iso->err = nil;
+		if(iso->state == Qclose || err != nil){
+			iunlock(ctlr);
+			error(err ? err : Eio);
+		}
+		if(iso->state != Qrun)
+			panic("episowrite: iso not running");
+		iunlock(ctlr);		/* We could page fault here */
+		nw = putsamples(iso, b+tot, count-tot);
+		ilock(ctlr);
+	}
+	if(iso->state != Qclose)
+		iso->state = Qdone;
+	iunlock(ctlr);
+	err = iso->err;		/* in case it failed early */
+	iso->err = nil;
+	qunlock(iso);
+	poperror();
+	if(err != nil)
+		error(err);
+	diprint("ehci: episowrite: %#p %d bytes\n", iso, tot);
+	return tot;
+}
+
+static int
+nexttoggle(int toggle, int count, int maxpkt)
+{
+	int np;
+
+	np = count / maxpkt;
+	if(np == 0)
+		np = 1;
+	if((np % 2) == 0)
+		return toggle;
+	if(toggle == Tddata1)
+		return Tddata0;
+	else
+		return Tddata1;
+}
+
+static Td*
+epgettd(Qio *io, int flags, void *a, int count, int maxpkt)
+{
+	Td *td;
+	ulong pa;
+	int i;
+	if(count > Tdmaxpkt)
+		panic("ehci: epgettd: too many bytes");
+	td = tdalloc();
+	td->csw = flags;
+	td->csw |= io->toggle | io->tok | (count << Tdlenshift);
+	td->csw |= Tderr2|Tderr1;
+
+	/*
+	 * use the space wasted by alignment as an
+	 * embedded buffer if count bytes fit in there.
+	 */
+	assert(Align > sizeof(Td));
+	if(count <= Align - sizeof(Td))
+		td->data = td->sbuff;
+	else
+		td->data = td->buff = smalloc(Tdmaxpkt);
+
+	pa = PADDR(td->data);
+	for(i = 0; i < nelem(td->buffer); i++){
+		td->buffer[i] = pa;
+		if(i > 0)
+			td->buffer[i] &= ~0xFFF;
+		pa += 0x1000;
+	}
+	td->ndata = count;
+	if(a != nil && count > 0)
+		memmove(td->data, a, count);
+	io->toggle = nexttoggle(io->toggle, count, maxpkt);
+	return td;
+}
+
+/*
+ * Try to get them idle
+ */
+static void
+aborttds(Qh *qh)
+{
+	Td *td;
+
+	qh->state = Qdone;
+	if(qh->sched >= 0 && (qh->eps0&Qhspeedmask) != Qhhigh)
+		qh->eps0 |= Qhint;	/* inactivate on next pass */
+	for(td = qh->tds; td != nil; td = td->next){
+		if(td->csw & Tdactive)
+			td->ndata = 0;
+		td->csw |= Tdhalt;
+	}
+}
+
+/*
+ * Some controllers do not post the usb/error interrupt after
+ * the work has been done. It seems that we must poll for them.
+ */
+static int
+workpending(void *a)
+{
+	Ctlr *ctlr;
+
+	ctlr = a;
+	return ctlr->nreqs > 0;
+}
+
+static void
+ehcipoll(void* a)
+{
+	Hci *hp;
+	Ctlr *ctlr;
+	Poll *poll;
+	int i;
+
+	hp = a;
+	ctlr = hp->aux;
+	poll = &ctlr->poll;
+	for(;;){
+		if(ctlr->nreqs == 0){
+			if(0)ddprint("ehcipoll %#p sleep\n", ctlr->capio);
+			sleep(poll, workpending, ctlr);
+			if(0)ddprint("ehcipoll %#p awaken\n", ctlr->capio);
+		}
+		for(i = 0; i < 16 && ctlr->nreqs > 0; i++)
+			if(ehciintr(hp) == 0)
+				 break;
+		do{
+			tsleep(&up->sleep, return0, 0, 1);
+			ehciintr(hp);
+		}while(ctlr->nreqs > 0);
+	}
+}
+
+static void
+pollcheck(Hci *hp)
+{
+	Ctlr *ctlr;
+	Poll *poll;
+
+	ctlr = hp->aux;
+	poll = &ctlr->poll;
+
+	if(poll->must != 0 && poll->does == 0){
+		lock(poll);
+		if(poll->must != 0 && poll->does == 0){
+			poll->does++;
+			print("ehci %#p: polling\n", ctlr->capio);
+			kproc("ehcipoll", ehcipoll, hp);
+		}
+		unlock(poll);
+	}
+}
+
+static int
+epiodone(void *a)
+{
+	Qh *qh;
+
+	qh = a;
+	return qh->state != Qrun;
+}
+
+static void
+epiowait(Hci *hp, Qio *io, int tmout, ulong load)
+{
+	Qh *qh;
+	int timedout;
+	Ctlr *ctlr;
+
+	ctlr = hp->aux;
+	qh = io->qh;
+	ddqprint("ehci io %#p sleep on qh %#p state %s\n",
+		io, qh, qhsname[qh->state]);
+	timedout = 0;
+	if(waserror()){
+		dqprint("ehci io %#p qh %#p timed out\n", io, qh);
+		timedout++;
+	}else{
+		if(tmout == 0)
+			sleep(io, epiodone, qh);
+		else
+			tsleep(io, epiodone, qh, tmout);
+		poperror();
+	}
+
+	ilock(ctlr);
+	/* Are we missing interrupts? */
+	if(qh->state == Qrun){
+		iunlock(ctlr);
+		ehciintr(hp);
+		ilock(ctlr);
+		if(qh->state == Qdone){
+			dqprint("ehci %#p: polling required\n", ctlr->capio);
+			ctlr->poll.must = 1;
+			pollcheck(hp);
+		}
+	}
+
+	if(qh->state == Qrun){
+		dqprint("ehci io %#p qh %#p timed out (no intr?)\n", io, qh);
+		timedout = 1;
+	}else if(qh->state != Qdone && qh->state != Qclose)
+		panic("ehci: epio: queue state %d", qh->state);
+	if(timedout){
+		aborttds(io->qh);
+		io->err = "request timed out";
+		iunlock(ctlr);
+		if(!waserror()){
+			tsleep(&up->sleep, return0, 0, Abortdelay);
+			poperror();
+		}
+		ilock(ctlr);
+	}
+	if(qh->state != Qclose)
+		qh->state = Qidle;
+	qhlinktd(qh, nil);
+	ctlr->load -= load;
+	ctlr->nreqs--;
+	iunlock(ctlr);
+}
+
+/*
+ * Non iso I/O.
+ * To make it work for control transfers, the caller may
+ * lock the Qio for the entire control transfer.
+ * If tmout is not 0 it is a timeout value in ms.
+ *
+ */
+static long
+epio(Ep *ep, Qio *io, void *a, long count, int tmout, int mustlock)
+{
+	Td *td;
+	Td *ltd;
+	Td *td0;
+	Td *ntd;
+	Ctlr *ctlr;
+	Qh* qh;
+	long n;
+	long tot;
+	char buf[128];
+	uchar *c;
+	int saved;
+	int ntds;
+	ulong load;
+	char *err;
+
+	qh = io->qh;
+	ctlr = ep->hp->aux;
+	io->debug = ep->debug;
+	ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n",
+		io->tok == Tdtokin ? "in" : "out",
+		ep->dev->nb, ep->nb, io, count, ctlr->load);
+	if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){
+		seprintdata(buf, buf+sizeof(buf), a, count);
+		print("echi epio: user data: %s\n", buf);
+	}
+	if(mustlock){
+		qlock(io);
+		if(waserror()){
+			qunlock(io);
+			nexterror();
+		}
+	}
+	io->err = nil;
+	ilock(ctlr);
+	if(qh->state == Qclose){	/* Tds released by cancelio */
+		iunlock(ctlr);
+		error(io->err ? io->err : Eio);
+	}
+	if(qh->state != Qidle)
+		panic("epio: qh not idle");
+	qh->state = Qinstall;
+	iunlock(ctlr);
+
+	c = a;
+	td0 = ltd = nil;
+	load = tot = 0;
+	do{
+		n = (Tdmaxpkt / ep->maxpkt) * ep->maxpkt;
+		if(count-tot < n)
+			n = count-tot;
+		if(io->tok != Tdtokin)
+			td = epgettd(io, Tdactive, c+tot, n, ep->maxpkt);
+		else
+			td = epgettd(io, Tdactive, nil, n, ep->maxpkt);
+		if(td0 == nil)
+			td0 = td;
+		else
+			tdlinktd(ltd, td);
+		ltd = td;
+		tot += n;
+		load += ep->load;
+	}while(tot < count);
+	if(td0 == nil || ltd == nil)
+		panic("epio: no td");
+
+	ltd->csw |= Tdioc;	/* the last one interrupts */
+
+	ddeprint("ehci: load %uld ctlr load %uld\n", load, ctlr->load);
+	if(debug > 1 || ep->debug > 1)
+		dumptd(td0, "epio: put: ");
+
+	ilock(ctlr);
+	if(qh->state != Qclose){
+		io->iotime = TK2MS(MACHP(0)->ticks);
+		qh->state = Qrun;
+		qhlinktd(qh, td0);
+		ctlr->nreqs++;
+		ctlr->load += load;
+	}
+	iunlock(ctlr);
+
+	if(ctlr->poll.does)
+		wakeup(&ctlr->poll);
+
+	epiowait(ep->hp, io, tmout, load);
+	if(debug > 1 || ep->debug > 1){
+		dumptd(td0, "epio: got: ");
+		qhdump(qh);
+	}
+
+	tot = 0;
+	c = a;
+	saved = 0;
+	ntds = 0;
+	for(td = td0; td != nil; td = ntd){
+		ntds++;
+		/*
+		 * Use td tok, not io tok, because of setup packets.
+		 * Also, if the Td was stalled or active (previous Td
+		 * was a short packet), we must save the toggle as it is.
+		 */
+		if(td->csw & (Tdhalt|Tdactive)){
+			if(saved++ == 0)
+				io->toggle = td->csw & Tddata1;
+		}else{
+			tot += td->ndata;
+			if((td->csw & Tdtok) == Tdtokin && td->ndata > 0){
+				memmove(c, td->data, td->ndata);
+				c += td->ndata;
+			}
+		}
+		ntd = td->next;
+		tdfree(td);
+	}
+	err = io->err;
+	if(mustlock){
+		qunlock(io);
+		poperror();
+	}
+	ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n",
+		io, ntds, tot, err);
+	if(err == Estalled)
+		return 0;	/* that's our convention */
+	if(err != nil)
+		error(err);
+	if(tot < 0)
+		error(Eio);
+	return tot;
+}
+
+static long
+epread(Ep *ep, void *a, long count)
+{
+	Ctlio *cio;
+	Qio *io;
+	Isoio *iso;
+	char buf[160];
+	ulong delta;
+
+	ddeprint("ehci: epread\n");
+	if(ep->aux == nil)
+		panic("epread: not open");
+
+	pollcheck(ep->hp);
+
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		qlock(cio);
+		if(waserror()){
+			qunlock(cio);
+			nexterror();
+		}
+		ddeprint("epread ctl ndata %d\n", cio->ndata);
+		if(cio->ndata < 0)
+			error("request expected");
+		else if(cio->ndata == 0){
+			cio->ndata = -1;
+			count = 0;
+		}else{
+			if(count > cio->ndata)
+				count = cio->ndata;
+			if(count > 0)
+				memmove(a, cio->data, count);
+			/* BUG for big transfers */
+			free(cio->data);
+			cio->data = nil;
+			cio->ndata = 0;	/* signal EOF next time */
+		}
+		qunlock(cio);
+		poperror();
+		if(debug>1 || ep->debug){
+			seprintdata(buf, buf+sizeof(buf), a, count);
+			print("epread: %s\n", buf);
+		}
+		return count;
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, Bulktmout, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(MACHP(0)->ticks) - io[OREAD].iotime + 1;
+		if(delta < ep->pollival / 2)
+			tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OREAD], a, count, 0, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episoread(ep, iso, a, count);
+	}
+	return -1;
+}
+
+/*
+ * Control transfers are one setup write (data0)
+ * plus zero or more reads/writes (data1, data0, ...)
+ * plus a final write/read with data1 to ack.
+ * For both host to device and device to host we perform
+ * the entire transfer when the user writes the request,
+ * and keep any data read from the device for a later read.
+ * We call epio three times instead of placing all Tds at
+ * the same time because doing so leads to crc/tmout errors
+ * for some devices.
+ * Upon errors on the data phase we must still run the status
+ * phase or the device may cease responding in the future.
+ */
+static long
+epctlio(Ep *ep, Ctlio *cio, void *a, long count)
+{
+	uchar *c;
+	long len;
+
+	ddeprint("epctlio: cio %#p ep%d.%d count %ld\n",
+		cio, ep->dev->nb, ep->nb, count);
+	if(count < Rsetuplen)
+		error("short usb comand");
+	qlock(cio);
+	free(cio->data);
+	cio->data = nil;
+	cio->ndata = 0;
+	if(waserror()){
+		qunlock(cio);
+		free(cio->data);
+		cio->data = nil;
+		cio->ndata = 0;
+		nexterror();
+	}
+
+	/* set the address if unset and out of configuration state */
+	if(ep->dev->state != Dconfig && cio->usbid == 0){
+		cio->usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb&Devmax);
+		qhsetaddr(cio->qh, cio->usbid);
+	}
+	/* adjust maxpkt if the user has learned a different one */
+	if(qhmaxpkt(cio->qh) != ep->maxpkt)
+		qhsetmaxpkt(cio->qh, ep->maxpkt);
+	c = a;
+	cio->tok = Tdtoksetup;
+	cio->toggle = Tddata0;
+	if(epio(ep, cio, a, Rsetuplen, Ctltmout, 0) < Rsetuplen)
+		error(Eio);
+	a = c + Rsetuplen;
+	count -= Rsetuplen;
+
+	cio->toggle = Tddata1;
+	if(c[Rtype] & Rd2h){
+		cio->tok = Tdtokin;
+		len = GET2(c+Rcount);
+		if(len <= 0)
+			error("bad length in d2h request");
+		if(len > Maxctllen)
+			error("d2h data too large to fit in ehci");
+		a = cio->data = smalloc(len+1);
+	}else{
+		cio->tok = Tdtokout;
+		len = count;
+	}
+	if(len > 0)
+		if(waserror())
+			len = -1;
+		else{
+			len = epio(ep, cio, a, len, Ctltmout, 0);
+			poperror();
+		}
+	if(c[Rtype] & Rd2h){
+		count = Rsetuplen;
+		cio->ndata = len;
+		cio->tok = Tdtokout;
+	}else{
+		if(len < 0)
+			count = -1;
+		else
+			count = Rsetuplen + len;
+		cio->tok = Tdtokin;
+	}
+	cio->toggle = Tddata1;
+	epio(ep, cio, nil, 0, Ctltmout, 0);
+	qunlock(cio);
+	poperror();
+	ddeprint("epctlio cio %#p return %ld\n", cio, count);
+	return count;
+}
+
+static long
+epwrite(Ep *ep, void *a, long count)
+{
+	Qio *io;
+	Ctlio *cio;
+	Isoio *iso;
+	ulong delta;
+
+	pollcheck(ep->hp);
+
+	ddeprint("ehci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux == nil)
+		panic("ehci: epwrite: not open");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		return epctlio(ep, cio, a, count);
+	case Tbulk:
+		io = ep->aux;
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, Bulktmout, 1);
+	case Tintr:
+		io = ep->aux;
+		delta = TK2MS(MACHP(0)->ticks) - io[OWRITE].iotime + 1;
+		if(delta < ep->pollival)
+			tsleep(&up->sleep, return0, 0, ep->pollival - delta);
+		if(ep->clrhalt)
+			clrhalt(ep);
+		return epio(ep, &io[OWRITE], a, count, 0, 1);
+	case Tiso:
+		iso = ep->aux;
+		return episowrite(ep, iso, a, count);
+	}
+	return -1;
+}
+
+static void
+isofsinit(Ep *ep, Isoio *iso)
+{
+	long left;
+	Sitd *td;
+	Sitd *ltd;
+	int i;
+	ulong frno;
+
+	left = 0;
+	ltd = nil;
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		td = iso->sitdps[frno] = sitdalloc();
+		td->data = iso->data + i * ep->maxpkt;
+		td->epc = ep->dev->port << Stdportshift;
+		td->epc |= ep->dev->hub << Stdhubshift;
+		td->epc |= ep->nb << Stdepshift;
+		td->epc |= ep->dev->nb << Stddevshift;
+		td->mfs = (034 << Stdscmshift) | (1 << Stdssmshift);
+		if(ep->mode == OREAD){
+			td->epc |= Stdin;
+			td->mdata = ep->maxpkt;
+		}else{
+			td->mdata = (ep->hz+left) * ep->pollival / 1000;
+			td->mdata *= ep->samplesz;
+			left = (ep->hz+left) * ep->pollival % 1000;
+			if(td->mdata > ep->maxpkt){
+				print("ehci: ep%d.%d: size > maxpkt\n",
+					ep->dev->nb, ep->nb);
+				print("size = %ld max = %ld\n",
+					td->mdata,ep->maxpkt);
+				td->mdata = ep->maxpkt;
+			}
+		}
+
+		sitdinit(iso, td);
+		if(ltd != nil)
+			ltd->next = td;
+		ltd = td;
+		frno = TRUNC(frno+ep->pollival, Nisoframes);
+	}
+	ltd->next = iso->sitdps[iso->td0frno];
+}
+
+static void
+isohsinit(Ep *ep, Isoio *iso)
+{
+	long left;
+	Itd *td;
+	Itd *ltd;
+	ulong i;
+	ulong pa;
+	int p;
+	ulong frno;
+	int ival;
+
+	iso->hs = 1;
+	ival = 1;
+	if(ep->pollival > 8)
+		ival = ep->pollival/8;
+	left = 0;
+	ltd = nil;
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		td = iso->itdps[frno] = itdalloc();
+		td->data = iso->data + i * 8  * iso->maxsize;
+		pa = PADDR(td->data) & ~0xFFF;
+		for(p = 0; p < 8; p++)
+			td->buffer[i] = pa + p * 0x1000;
+		td->buffer[0] = PADDR(iso->data) & ~0xFFF;
+		td->buffer[0] |= ep->nb << Itdepshift;
+		td->buffer[0] |= ep->dev->nb << Itddevshift;
+		if(ep->mode == OREAD)
+			td->buffer[1] |= Itdin;
+		else
+			td->buffer[1] |= Itdout;
+		td->buffer[1] |= ep->maxpkt << Itdmaxpktshift;
+		td->buffer[2] |= ep->ntds << Itdntdsshift;
+
+		if(ep->mode == OREAD)
+			td->mdata = 8 * iso->maxsize;
+		else{
+			td->mdata = (ep->hz + left) * ep->pollival / 1000;
+			td->mdata *= ep->samplesz;
+			left = (ep->hz + left) * ep->pollival % 1000;
+		}
+		itdinit(iso, td);
+		if(ltd != nil)
+			ltd->next = td;
+		ltd = td;
+		frno = TRUNC(frno + ival, Nisoframes);
+	}
+}
+
+static void
+isoopen(Ctlr *ctlr, Ep *ep)
+{
+	Isoio *iso;
+	int ival;	/* pollival in ms */
+	int n;
+	ulong frno;
+	int i;
+	int w;
+	int woff;
+	int tpf;		/* tds per frame */
+
+	iso = ep->aux;
+	switch(ep->mode){
+	case OREAD:
+		iso->tok = Tdtokin;
+		break;
+	case OWRITE:
+		iso->tok = Tdtokout;
+		break;
+	default:
+		error("iso i/o is half-duplex");
+	}
+	iso->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax);
+	iso->state = Qidle;
+	iso->debug = ep->debug;
+	ival = ep->pollival;
+	tpf = 1;
+	if(ep->dev->speed == Highspeed){
+		tpf = 8;
+		if(ival <= 8)
+			ival = 1;
+		else
+			ival /= 8;
+	}
+	iso->nframes = Nisoframes / ival;
+	if(iso->nframes < 3)
+		error("uhci isoopen bug");	/* we need at least 3 tds */
+	iso->maxsize = ep->ntds * ep->maxpkt;
+	ilock(ctlr);
+	if(ctlr->load + ep->load > 800){
+		iunlock(ctlr);
+		error("bandwidth exceeded");
+	}
+	ctlr->load += ep->load;
+	ctlr->isoload += ep->load;
+	ctlr->nreqs++;
+	dprint("ehci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload);
+	diprint("iso nframes %d pollival %uld ival %d maxpkt %uld ntds %d\n",
+		iso->nframes, ep->pollival, ival, ep->maxpkt, ep->ntds);
+	iunlock(ctlr);
+	if(ctlr->poll.does)
+		wakeup(&ctlr->poll);
+
+	/*
+	 * From here on this cannot raise errors
+	 * unless we catch them and release here all memory allocated.
+	 */
+	assert(ep->maxpkt > 0 && ep->ntds > 0 && ep->ntds < 4);
+	assert(ep->maxpkt <= 1024);
+	iso->tdps = smalloc(sizeof(uintptr) * Nisoframes);
+	iso->data = smalloc(iso->nframes * tpf * ep->ntds * ep->maxpkt);
+	iso->td0frno = TRUNC(ctlr->opio->frno + 10, Nisoframes);
+	/* read: now; write: 1s ahead */
+
+	if(ep->dev->speed == Highspeed)
+		isohsinit(ep, iso);
+	else
+		isofsinit(ep, iso);
+	iso->tdu = iso->tdi = iso->itdps[iso->td0frno];
+	iso->stdu = iso->stdi = iso->sitdps[iso->td0frno];
+
+	ilock(ctlr);
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		*iso->tdps[frno] = ctlr->frames[frno];
+		frno = TRUNC(frno+ival, Nisoframes);
+	}
+
+	/*
+	 * Iso uses a virtual frame window of Nisoframes, and we must
+	 * fill the actual ctlr frame array by placing ctlr->nframes/Nisoframes
+	 * copies of the window in the frame array.
+	 */
+	assert(ctlr->nframes >= Nisoframes && Nisoframes >= iso->nframes);
+	assert(Nisoframes >= Nintrleafs);
+	n = ctlr->nframes / Nisoframes;
+	for(w = 0; w < n; w++){
+		frno = iso->td0frno;
+		woff = w * Nisoframes;
+		for(i = 0; i < iso->nframes ; i++){
+			assert(woff+frno < ctlr->nframes);
+			assert(iso->tdps[frno] != nil);
+			if(ep->dev->speed == Highspeed)
+				ctlr->frames[woff+frno] = PADDR(iso->tdps[frno])|Litd;
+			else
+				ctlr->frames[woff+frno] = PADDR(iso->tdps[frno])|Lsitd;
+			frno = TRUNC(frno+ep->pollival, Nisoframes);
+		}
+	}
+	iso->next = ctlr->iso;
+	ctlr->iso = iso;
+	iso->state = Qdone;
+	iunlock(ctlr);
+	if(debug > 1 || iso->debug >1)
+		isodump(iso, 0);
+
+
+}
+
+/*
+ * Allocate the endpoint and set it up for I/O
+ * in the controller. This must follow what's said
+ * in Ep regarding configuration, including perhaps
+ * the saved toggles (saved on a previous close of
+ * the endpoint data file by epclose).
+ */
+static void
+epopen(Ep *ep)
+{
+	Ctlr *ctlr;
+	Ctlio *cio;
+	Qio *io;
+	int usbid;
+
+	ctlr = ep->hp->aux;
+	deprint("ehci: epopen ep%d.%d\n", ep->dev->nb, ep->nb);
+	if(ep->aux != nil)
+		panic("ehci: epopen called with open ep");
+	if(waserror()){
+		free(ep->aux);
+		ep->aux = nil;
+		nexterror();
+	}
+	switch(ep->ttype){
+	case Tnone:
+		error("endpoint not configured");
+	case Tiso:
+		ep->aux = smalloc(sizeof(Isoio));
+		isoopen(ctlr, ep);
+		break;
+	case Tctl:
+		cio = ep->aux = smalloc(sizeof(Ctlio));
+		cio->debug = ep->debug;
+		cio->ndata = -1;
+		cio->data = nil;
+		if(ep->dev->isroot != 0 && ep->nb == 0)	/* root hub */
+			break;
+		cio->qh = qhalloc(ctlr, ep, cio, "epc");
+		break;
+	case Tbulk:
+		ep->pollival = 1;	/* assume this; doesn't really matter */
+		/* and fall... */
+	case Tintr:
+		io = ep->aux = smalloc(sizeof(Qio)*2);
+		io[OREAD].debug = io[OWRITE].debug = ep->debug;
+		usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb &Devmax);
+		if(ep->mode != OREAD){
+			if(ep->toggle[OWRITE] != 0)
+				io[OWRITE].toggle = Tddata1;
+			else
+				io[OWRITE].toggle = Tddata0;
+			io[OWRITE].tok = Tdtokout;
+			io[OWRITE].usbid = usbid;
+			io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			io[OWRITE].qh = qhalloc(ctlr, ep, io+OWRITE, "epw");
+		}
+		if(ep->mode != OWRITE){
+			if(ep->toggle[OREAD] != 0)
+				io[OREAD].toggle = Tddata1;
+			else
+				io[OREAD].toggle = Tddata0;
+			io[OREAD].tok = Tdtokin;
+			io[OREAD].usbid = usbid;
+			io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */
+			io[OREAD].qh = qhalloc(ctlr, ep, io+OREAD, "epr");
+		}
+		break;
+	}
+	if(debug>1 || ep->debug)
+		dump(ep->hp);
+	deprint("ehci: epopen done\n");
+	poperror();
+}
+
+static void
+cancelio(Ctlr *ctlr, Qio *io)
+{
+	Qh *qh;
+
+	ilock(ctlr);
+	qh = io->qh;
+	if(io == nil || io->qh == nil || io->qh->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	dqprint("ehci: cancelio for qh %#p state %s\n",
+		qh, qhsname[qh->state]);
+	aborttds(qh);
+	qh->state = Qclose;
+	iunlock(ctlr);
+	if(!waserror()){
+		tsleep(&up->sleep, return0, 0, Abortdelay);
+		poperror();
+	}
+	wakeup(io);
+	qlock(io);
+	/* wait for epio if running */
+	qunlock(io);
+
+	qhfree(ctlr, qh);
+	io->qh = nil;
+}
+
+static void
+cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, ulong load)
+{
+	Isoio **il;
+	ulong *lp;
+	int i;
+	int frno;
+	int w;
+	int n;
+	int woff;
+	ulong *tp;
+	Itd *td;
+	Sitd *std;
+	int t;
+
+	ilock(ctlr);
+	if(iso->state == Qclose){
+		iunlock(ctlr);
+		return;
+	}
+	ctlr->nreqs--;
+	if(iso->state != Qrun && iso->state != Qdone)
+		panic("bad iso state");
+	iso->state = Qclose;
+	if(ctlr->isoload < load)
+		panic("ehci: low isoload");
+	ctlr->isoload -= load;
+	ctlr->load -= load;
+	for(il = &ctlr->iso; *il != nil; il = &(*il)->next)
+		if(*il == iso)
+			break;
+	if(*il == nil)
+		panic("cancleiso: not found");
+	*il = iso->next;
+
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		tp = iso->tdps[frno];
+		if(iso->hs != 0){
+			td = iso->itdps[frno];
+			for(t = 0; t < nelem(td->csw); t++)
+				td->csw[1] &= ~(Itdioc|Itdactive);
+		}else{
+			std = iso->sitdps[frno];
+			std->csw &= ~(Stdioc|Stdactive);
+		}
+		for(lp=&ctlr->frames[frno]; !(*lp & Lterm); lp = &LPTR(*lp)[0])
+			if(LPTR(*lp) == tp)
+				break;
+		if(*lp & Lterm)
+			panic("cancelisoio: td not found");
+		*lp = tp[0];
+		/*
+		 * Iso uses a virtual frame window of Nisoframes, and we must
+		 * restore pointers in copies of the window kept at ctlr->frames.
+		 */
+		if(lp == &ctlr->frames[frno]){
+			n = ctlr->nframes / Nisoframes;
+			for(w = 1; w < n; w++){
+				woff = w * Nisoframes;
+				ctlr->frames[woff+frno] = *lp;
+			}
+		}
+		frno = TRUNC(frno+pollival, Nisoframes);
+	}
+	iunlock(ctlr);
+
+	/*
+	 * wakeup anyone waiting for I/O and
+	 * wait to be sure no I/O is in progress in the controller.
+	 * and then wait to be sure episo* is no longer running.
+	 */
+	wakeup(iso);
+	diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso);
+	tsleep(&up->sleep, return0, 0, 5);
+	qlock(iso);
+	qunlock(iso);
+	diprint("cancelisoio iso %#p releasing iso\n", iso);
+
+	frno = iso->td0frno;
+	for(i = 0; i < iso->nframes; i++){
+		if(iso->hs != 0)
+			itdfree(iso->itdps[frno]);
+		else
+			sitdfree(iso->sitdps[frno]);
+		iso->tdps[frno] = nil;
+		frno = TRUNC(frno+pollival, Nisoframes);
+	}
+	free(iso->tdps);
+	iso->tdps = nil;
+	free(iso->data);
+	iso->data = nil;
+}
+
+static void
+epclose(Ep *ep)
+{
+	Qio *io;
+	Ctlio *cio;
+	Isoio *iso;
+	Ctlr *ctlr;
+
+	ctlr = ep->hp->aux;
+	deprint("ehci: epclose ep%d.%d\n", ep->dev->nb, ep->nb);
+
+	if(ep->aux == nil)
+		panic("ehci: epclose called with closed ep");
+	switch(ep->ttype){
+	case Tctl:
+		cio = ep->aux;
+		cancelio(ctlr, cio);
+		free(cio->data);
+		cio->data = nil;
+		break;
+	case Tintr:
+	case Tbulk:
+		io = ep->aux;
+		ep->toggle[OREAD] = ep->toggle[OWRITE] = 0;
+		if(ep->mode != OWRITE){
+			cancelio(ctlr, &io[OREAD]);
+			if(io[OREAD].toggle == Tddata1)
+				ep->toggle[OREAD] = 1;
+		}
+		if(ep->mode != OREAD){
+			cancelio(ctlr, &io[OWRITE]);
+			if(io[OWRITE].toggle == Tddata1)
+				ep->toggle[OWRITE] = 1;
+		}
+		break;
+	case Tiso:
+		iso = ep->aux;
+		cancelisoio(ctlr, iso, ep->pollival, ep->load);
+		break;
+		break;
+	default:
+		panic("epclose: bad ttype");
+	}
+	free(ep->aux);
+	ep->aux = nil;
+}
+
+static void
+scanpci(void)		/* actually just use fixed addresses on sheeva */
+{
+	int i = 0;
+	Ctlr *ctlr;
+	static int already = 0;
+
+	if(already)
+		return;
+	already = 1;
+
+	ctlr = mallocz(sizeof(Ctlr), 1);
+	/* the sheeva's usb 2.0 otg uses a superset of the ehci registers */
+	ctlr->capio = (Ecapio *)(Addrusb + 0x100);
+	ctlr->opio  = (Eopio *) (Addrusb + 0x140);
+	dprint("usbehci: port %#p\n", ctlr->capio);
+
+	if(i == Nhcis)
+		print("ehci: bug: no more controllers\n");
+	ctlrs[i] = ctlr;
+}
+
+/*
+ * return smallest power of 2 >= n
+ */
+static int
+flog2(int n)
+{
+	int i;
+
+	for(i = 0; (1 << i) < n; i++)
+		;
+	return i;
+}
+
+/*
+ * build the periodic scheduling tree:
+ * framesize must be a multiple of the tree size
+ */
+static void
+mkqhtree(Ctlr *ctlr)
+{
+	int i, n, d, o, leaf0, depth;
+	Qh **tree;
+	Qtree *qt;
+	Qh *qh;
+	ulong leafs[Nintrleafs];
+
+	depth = flog2(Nintrleafs);
+	n = (1 << (depth+1)) - 1;
+	qt = mallocz(sizeof(*qt), 1);
+	if(qt == nil)
+		panic("ehci: mkqhtree: no memory");
+	qt->nel = n;
+	qt->depth = depth;
+	qt->bw = mallocz(n * sizeof(qt->bw), 1);
+	qt->root = tree = mallocz(n * sizeof(Qh *), 1);
+	if(qt->bw == nil || tree == nil)
+		panic("ehci: mkqhtree: no memory");
+	for(i = 0; i < n; i++){
+		qh = tree[i] = edalloc();
+		if(qh == nil)
+			panic("ehci: mkqhtree: no memory");
+		qh->nlink = qh->alink = qh->link = Lterm;
+		qh->csw = Tdhalt;
+		qh->state = Qidle;
+		if(i > 0)
+			qhlinkqh(tree[i], tree[(i-1)/2]);
+	}
+	ctlr->ntree = i;
+	dprint("ehci: tree: %d endpoints allocated\n", i);
+
+	/* distribute leaves evenly round the frame list */
+	leaf0 = n / 2;
+	for(i = 0; i < Nintrleafs; i++){
+		o = 0;
+		for(d = 0; d < depth; d++){
+			o <<= 1;
+			if(i & (1 << d))
+				o |= 1;
+		}
+		if(leaf0 + o >= n){
+			print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n);
+			break;
+		}
+		leafs[i] = PADDR(tree[leaf0 + o]) | Lqh;
+	}
+	assert((ctlr->nframes % Nintrleafs) == 0);
+	for(i = 0; i < ctlr->nframes; i += Nintrleafs)
+		memmove(ctlr->frames + i, leafs, sizeof(leafs));
+	ctlr->tree = qt;
+}
+
+static void
+ehcimeminit(Ctlr *ctlr)
+{
+	int frsize;
+	Eopio *opio;
+	int i;
+
+	opio = ctlr->opio;
+	frsize = ctlr->nframes * sizeof(ulong);
+	assert((frsize & 0xFFF) == 0);		/* must be 4k aligned */
+	ctlr->frames = xspanalloc(frsize, frsize, 0);
+	if(ctlr->frames == nil)
+		panic("ehci reset: no memory");
+
+	for (i = 0; i < ctlr->nframes; i++)
+		ctlr->frames[i] = Lterm;
+	opio->frbase = PADDR(ctlr->frames);
+	opio->frno = 0;
+
+	qhalloc(ctlr, nil, nil, nil);	/* init async list */
+	mkqhtree(ctlr);			/* init sync list */
+	edfree(edalloc());		/* try to get some ones pre-allocated */
+
+	dprint("ehci %#p flb %#ulx frno %#ulx\n",
+		ctlr->capio, opio->frbase, opio->frno);
+}
+
+static void
+init(Hci *hp)
+{
+	Ctlr *ctlr;
+	Eopio *opio;
+	int i;
+
+	hp->highspeed = 1;
+	ctlr = hp->aux;
+	opio = ctlr->opio;
+	dprint("ehci %#p init\n", ctlr->capio);
+
+	ilock(ctlr);
+	/*
+	 * Unless we activate frroll interrupt
+	 * some machines won't post other interrupts.
+	 */
+	opio->intr = Iusb|Ierr|Iportchg|Ihcerr|Iasync;
+	opio->config = Callmine;	/* reclaim all ports */
+	opio->cmd |= Cpse;
+	opio->cmd |= Case;
+	ehcirun(ctlr, 1);
+
+	for (i = 0; i < hp->nports; i++)
+		opio->portsc[i] = Pspower;
+	iunlock(ctlr);
+
+	if(debug > 1)
+		dump(hp);
+
+}
+
+static void
+ehcireset(Ctlr *ctlr)
+{
+	Eopio *opio;
+	int i;
+
+	ilock(ctlr);
+	dprint("ehci %#p reset\n", ctlr->capio);
+
+	/*
+	 * Turn off legacy mode. Some controllers won't
+	 * interrupt us as expected otherwise.
+	 */
+	ehcirun(ctlr, 0);
+
+	/* clear high 32 bits of address signals if it's 64 bits capable.
+	 * This is probably not needed but it does not hurt and others do it.
+	 */
+	if((ctlr->capio->capparms & C64) != 0){
+		dprint("ehci: 64 bits\n");
+		ctlr->opio->seg = 0;
+	}
+
+	opio = ctlr->opio;
+	opio->cmd |= Chcreset;	/* controller reset */
+	for(i = 0; i < 100; i++){
+		if((opio->cmd & Chcreset) == 0)
+			break;
+		delay(1);
+	}
+	if(i == 100)
+		print("ehci %#p controller reset timed out\n", ctlr->capio);
+
+	/* requesting more interrupts per µframe may miss interrupts */
+	opio->cmd |= Citc8;		/* 1 intr. per ms */
+	switch(opio->cmd & Cflsmask){
+	case Cfls1024:
+		ctlr->nframes = 1024;
+		break;
+	case Cfls512:
+		ctlr->nframes = 512;
+		break;
+	case Cfls256:
+		ctlr->nframes = 256;
+		break;
+	default:
+		panic("ehci: unknown fls %#lux", opio->cmd & Cflsmask);
+	}
+	dprint("ehci: %d frames\n", ctlr->nframes);
+	iunlock(ctlr);
+}
+
+static void
+setdebug(Hci*, int d)
+{
+	debug = d;
+}
+
+static void
+shutdown(Hci *hp)
+{
+	int i;
+	Ctlr *ctlr;
+	Eopio *opio;
+
+	ctlr = hp->aux;
+	ilock(ctlr);
+	opio = ctlr->opio;
+	opio->cmd |= Chcreset;		/* controller reset */
+	for(i = 0; i < 100; i++){
+		if((opio->cmd & Chcreset) == 0)
+			break;
+		delay(1);
+	}
+	if(i >= 100)
+		print("ehci %#p controller reset timed out\n", ctlr->capio);
+	delay(100);
+	ehcirun(ctlr, 0);
+	opio->frbase = 0;
+	iunlock(ctlr);
+}
+
+static int
+reset(Hci *hp)
+{
+	static Lock resetlck;
+	int i;
+	Ctlr *ctlr;
+	Ecapio *capio;
+
+	ilock(&resetlck);
+	scanpci();
+
+	/*
+	 * Any adapter matches if no hp->port is supplied,
+	 * otherwise the ports must match.
+	 */
+	ctlr = nil;
+	for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){
+		ctlr = ctlrs[i];
+		if(ctlr->active == 0)
+		if(hp->port == 0 || hp->port == (uintptr)ctlr->capio){
+			ctlr->active = 1;
+			break;
+		}
+	}
+	iunlock(&resetlck);
+	if(ctlrs[i] == nil || i == Nhcis)
+		return -1;
+
+	hp->aux = ctlr;
+	hp->port = (uintptr)ctlr->capio;
+	hp->irq = IRQ0usb0;
+	hp->tbdf = 0;
+
+	capio = ctlr->capio;
+	hp->nports = capio->parms & Cnports;
+
+	ddprint("echi: %s, ncc %lud npcc %lud\n",
+		capio->parms & 0x10000 ? "leds" : "no leds",
+		(capio->parms >> 12) & 0xf, (capio->parms >> 8) & 0xf);
+	ddprint("ehci: routing %s, %sport power ctl, %d ports\n",
+		capio->parms & 0x40 ? "explicit" : "automatic",
+		capio->parms & 0x10 ? "" : "no ", hp->nports);
+
+	ehcireset(ctlr);
+	ehcimeminit(ctlr);
+
+	/*
+	 * Linkage to the generic HCI driver.
+	 */
+	hp->init = init;
+	hp->dump = dump;
+	hp->interrupt = interrupt;
+	hp->epopen = epopen;
+	hp->epclose = epclose;
+	hp->epread = epread;
+	hp->epwrite = epwrite;
+	hp->seprintep = seprintep;
+	hp->portenable = portenable;
+	hp->portreset = portreset;
+	hp->portstatus = portstatus;
+	hp->shutdown = shutdown;
+	hp->debug = setdebug;
+	hp->type = "ehci";
+	return 0;
+}
+
+void
+usbehcilink(void)
+{
+	addhcitype("ehci", reset);
+}

+ 145 - 0
sys/src/9/kw/usbehci.h

@@ -0,0 +1,145 @@
+typedef struct Ecapio Ecapio;
+typedef struct Eopio Eopio;
+typedef struct Edbgio Edbgio;
+
+/*
+ * EHCI interface registers and bits
+ */
+enum
+{
+	Cnports		= 0xF,		/* nport bits in Ecapio parms. */
+	Cdbgportshift	= 20,,		/* debug port in Ecapio parms. */
+	Cdbgportmask	= 0xF,
+	C64		= 1,		/* 64-bits, in Ecapio capparms. */
+
+	/* typed links  */
+	Lterm		= 1,
+	Litd		= 0<<1,
+	Lqh		= 1<<1,
+	Lsitd		= 2<<1,
+	Lfstn		= 3<<1,		/* we don't use these */
+
+	/* Cmd reg. */
+	Cstop		= 0x00000,	/* stop running */
+	Crun		= 0x00001,	/* start operation */
+	Chcreset	= 0x00002,	/* host controller reset */
+	Cflsmask	= 0x0000C,	/* frame list size bits */
+	Cfls1024	= 0x00000,	/* frame list size 1024 */
+	Cfls512		= 0x00004,	/* frame list size 512 frames */
+	Cfls256		= 0x00008,	/* frame list size 256 frames */
+	Cpse		= 0x00010,	/* periodic sched. enable */
+	Case		= 0x00020,	/* async sched. enable */
+	Ciasync		= 0x00040,	/* interrupt on async advance doorbell */
+	Citc1		= 0x10000,	/* interrupt threshold ctl. 1 µframe */
+	Citc4		= 0x40000,	/* same. 2 µframes */
+	/* ... */
+	Citc8		= 0x80000,	/* same. 8 µframes (can go up to 64) */
+
+	/* Sts reg. */
+	Sasyncss	= 0x08000,	/* aync schedule status */
+	Speriodss	= 0x04000,	/* periodic schedule status */
+	Srecl		= 0x02000,	/* reclamnation (empty async sched.) */
+	Shalted		= 0x01000,	/* h.c. is halted */
+	Sasync		= 0x00020,	/* interrupt on async advance */
+	Sherr		= 0x00010,	/* host system error */
+	Sfrroll		= 0x00008,	/* frame list roll over */
+	Sportchg	= 0x00004,	/* port change detect */
+	Serrintr	= 0x00002,		/* error interrupt */
+	Sintr		= 0x00001,	/* interrupt */
+	Sintrs		= 0x0003F,	/* interrupts status */
+
+	/* Intr reg. */
+	Iusb		= 0x01,		/* intr. on usb */
+	Ierr		= 0x02,		/* intr. on usb error */
+	Iportchg	= 0x04,		/* intr. on port change */
+	Ifrroll		= 0x08,		/* intr. on frlist roll over */
+	Ihcerr		= 0x10,		/* intr. on host error */
+	Iasync		= 0x20,		/* intr. on async advance enable */
+	Iall		= 0x3F,		/* all interrupts */
+
+	/* Config reg. */
+	Callmine		= 1,		/* route all ports to us */
+
+	/* Portsc reg. */
+	Pspresent	= 0x00000001,	/* device present */
+	Psstatuschg	= 0x00000002,	/* Pspresent changed */
+	Psenable	= 0x00000004,	/* device enabled */
+	Pschange	= 0x00000008,	/* Psenable changed */
+	Psresume	= 0x00000040,	/* resume detected */
+	Pssuspend	= 0x00000080,	/* port suspended */
+	Psreset		= 0x00000100,	/* port reset */
+	Pspower		= 0x00001000,	/* port power on */
+	Psowner		= 0x00002000,	/* port owned by companion */
+	Pslinemask	= 0x00000C00,	/* line status bits */
+	Pslow		= 0x00000400,	/* low speed device */
+
+	/* Debug port csw reg. */
+	Cowner	= 0x40000000,		/* port owned by ehci */
+	Cenable	= 0x20000000,		/* debug port enabled */
+	Cdone	= 0x00010000,		/* request is done */
+	Cbusy	= 0x00000400,		/* port in use by a driver */
+	Cerrmask= 0x00000380,		/* error code bits */
+	Chwerr	= 0x00000100,		/* hardware error */
+	Cterr	= 0x00000080,		/* transaction error */
+	Cfailed	= 0x00000040,		/* transaction did fail */
+	Cgo	= 0x00000020,		/* execute the transaction */
+	Cwrite	= 0x00000010,		/* request is a write */
+	Clen	= 0x0000000F,		/* data len */
+
+	/* Debug port pid reg. */
+	Prpidshift	= 16,		/* received pid */
+	Prpidmask	= 0xFF,
+	Pspidshift	= 8,		/* sent pid */
+	Pspidmask	= 0xFF,
+	Ptokshift	= 0,		/* token pid */
+	Ptokmask	= 0xFF,
+
+	Ptoggle		= 0x00008800,	/* to update toggles */
+	Ptogglemask	= 0x00FFFF00,
+
+	/* Debug port addr reg. */
+	Adevshift	= 8,		/* device address */
+	Adevmask	= 0x7F,
+	Aepshift		= 0,		/* endpoint number */
+	Aepmask		= 0xF,
+};
+
+/*
+ * Capability registers (hw)
+ */
+struct Ecapio
+{
+	ulong	cap;		/* 00 controller capability register */
+	ulong	parms;		/* 04 structural parameters register */
+	ulong	capparms;	/* 08 capability parameters */
+	ulong	portroute;	/* 0c not on the CS5536 */
+};
+
+/*
+ * Operational registers (hw)
+ */
+struct Eopio
+{
+	ulong	cmd;		/* 00 command */
+	ulong	sts;		/* 04 status */
+	ulong	intr;		/* 08 interrupt enable */
+	ulong	frno;		/* 0c frame index */
+	ulong	seg;		/* 10 bits 63:32 of EHCI datastructs (unused) */
+	ulong	frbase;		/* 14 frame list base addr, 4096-byte boundary */
+	ulong	link;		/* 18 link for async list */
+	uchar	d2c[0x40-0x1c];	/* 1c dummy */
+	ulong	config;		/* 40 1: all ports default-routed to this HC */
+	ulong	portsc[1];	/* 44 Port status and control, one per port */
+};
+
+/*
+ * Debug port registers (hw)
+ */
+struct Edbgio
+{
+	ulong	csw;		/* control and status */
+	ulong	pid;		/* USB pid */
+	uchar	data[8];	/* data buffer */
+	ulong	addr;		/* device and endpoint addresses */
+};
+

+ 57 - 0
sys/src/9/kw/words

@@ -0,0 +1,57 @@
+global scale sheevaplug
+
+marvell 88f6281 (feroceon kirkwood) SoC
+arm926ej-s rev 1 [56251311] (armv5tejl) 1.2GHz cpu
+i & d caches 16K each, associativity 4, 32-byte lines, 128 sets
+512MB of dram at physical address 0
+512MB of flash
+16550 uart for console
+see http://www.marvell.com/files/products/embedded_processors/kirkwood/\
+	FS_88F6180_9x_6281_OpenSource.pdf, stored locally as
+	/public/doc/marvell/sheeva/88f61xx.kirkwood.pdf
+
+this plan 9 port is based on the port of native inferno to the
+sheevaplug by Salva Peiró (saoret.one@gmail.com) and Mechiel Lukkien
+(mechiel@ueber.net).
+
+# type this once at u-boot; there after the plug will pxe boot:
+setenv bootdelay 2
+setenv bootcmd 'bootp; bootp; tftp 0x800000; go 0x800000'
+saveenv
+
+	physical mem map
+hex addr	size	what
+----
+0		512MB	sdram
+
+80000000	512MB	pcie mem	# default
+90000000	256MB	pcie mem	# u-boot
+d0000000	1MB	internal address space	# default
+d8000000	128MB	nand flash	# actually 512MB addressed through this
+e8000000	128MB	spi serial flash
+f0000000	128MB	boot rom	# default
+f0000000	16MB	pcie io		# mapped to 0xc0000000 by u-boot
+
+f1000000	1MB 	internal regs
+f1000000	64K	dram regs
+f1010000	64K	uart, flashes, rtc, gpio, etc.
+f1040000	64K	pci-e regs
+f1050000	64K	usb otg regs (ehci-like)
+f1070000	64K	gbe regs
+f1080000	64K	sata regs
+f1090000	64K	sdio regs
+
+f8000000	128MB	boot device	# default, mapped to 0 by u-boot
+f8000000	16MB	spi flash	# mapped by u-boot
+f9000000	8MB	nand flash
+fb000000	64KB	crypto engine
+ff000000	16MB	boot rom	# u-boot
+
+	virtual mem map
+hex addr	size	what
+----
+0		512MB	user process address space
+
+60000000		kzero, mapped to 0
+c0000000	64KB	pcie i/o	# default
+...			as per physical map

Some files were not shown because too many files changed in this diff