Browse Source

Plan 9 from Bell Labs 2012-05-02

David du Colombier 12 years ago
parent
commit
c0318f7435
59 changed files with 18774 additions and 1 deletions
  1. 3 0
      sys/src/9/mkfile
  2. 58 0
      sys/src/9/teg2/_announce
  3. 165 0
      sys/src/9/teg2/arch.c
  4. 869 0
      sys/src/9/teg2/archtegra.c
  5. 311 0
      sys/src/9/teg2/arm.h
  6. 135 0
      sys/src/9/teg2/arm.s
  7. 56 0
      sys/src/9/teg2/atom.s
  8. 456 0
      sys/src/9/teg2/cache-l2-pl310.c
  9. 240 0
      sys/src/9/teg2/cache.v7.s
  10. 106 0
      sys/src/9/teg2/caches-v7.c
  11. 198 0
      sys/src/9/teg2/caches.c
  12. 138 0
      sys/src/9/teg2/clock-tegra.c
  13. 623 0
      sys/src/9/teg2/clock.c
  14. 200 0
      sys/src/9/teg2/coproc.c
  15. 478 0
      sys/src/9/teg2/dat.h
  16. 192 0
      sys/src/9/teg2/devarch.c
  17. 1366 0
      sys/src/9/teg2/devcons.c
  18. 528 0
      sys/src/9/teg2/devether.c
  19. 796 0
      sys/src/9/teg2/devuart.c
  20. 1675 0
      sys/src/9/teg2/ether8169.c
  21. 42 0
      sys/src/9/teg2/etherif.h
  22. 235 0
      sys/src/9/teg2/ethermii.c
  23. 116 0
      sys/src/9/teg2/ethermii.h
  24. 233 0
      sys/src/9/teg2/fns.h
  25. 300 0
      sys/src/9/teg2/fpi.c
  26. 61 0
      sys/src/9/teg2/fpi.h
  27. 502 0
      sys/src/9/teg2/fpiarm.c
  28. 136 0
      sys/src/9/teg2/fpimem.c
  29. 25 0
      sys/src/9/teg2/init9.s
  30. 219 0
      sys/src/9/teg2/io.h
  31. 410 0
      sys/src/9/teg2/kbd.c
  32. 869 0
      sys/src/9/teg2/l.s
  33. 325 0
      sys/src/9/teg2/lexception.s
  34. 38 0
      sys/src/9/teg2/lproc.s
  35. 985 0
      sys/src/9/teg2/main.c
  36. 150 0
      sys/src/9/teg2/mem.h
  37. 155 0
      sys/src/9/teg2/mkfile
  38. 750 0
      sys/src/9/teg2/mmu.c
  39. 4 0
      sys/src/9/teg2/notes/assumes-hz-under-1000
  40. 41 0
      sys/src/9/teg2/notes/bug.rfe
  41. 59 0
      sys/src/9/teg2/notes/byte-order
  42. 19 0
      sys/src/9/teg2/notes/clks
  43. 22 0
      sys/src/9/teg2/notes/movm.w
  44. 29 0
      sys/src/9/teg2/notes/pci
  45. 78 0
      sys/src/9/teg2/notes/pci.2.buses
  46. BIN
      sys/src/9/teg2/nvram
  47. 853 0
      sys/src/9/teg2/pci.c
  48. 138 0
      sys/src/9/teg2/random.c
  49. 208 0
      sys/src/9/teg2/rebootcode.s
  50. 129 0
      sys/src/9/teg2/softfpu.c
  51. 366 0
      sys/src/9/teg2/syscall.c
  52. 1068 0
      sys/src/9/teg2/trap.c
  53. 91 0
      sys/src/9/teg2/ts
  54. 821 0
      sys/src/9/teg2/uarti8250.c
  55. 104 0
      sys/src/9/teg2/usbehci.h
  56. 51 0
      sys/src/9/teg2/v7-arch.c
  57. 489 0
      sys/src/9/teg2/vfp3.c
  58. 60 0
      sys/src/9/teg2/words
  59. 0 1
      sys/src/cmd/ip/ipconfig/ipconfig.h

+ 3 - 0
sys/src/9/mkfile

@@ -1,9 +1,12 @@
 ARCH=\
 ARCH=\
 	alphapc\
 	alphapc\
 	bitsy\
 	bitsy\
+	kw\
 	mtx\
 	mtx\
+	omap\
 	pc\
 	pc\
 	ppc\
 	ppc\
+	teg2\
 	
 	
 all:V:
 all:V:
 	for(i in $ARCH)@{
 	for(i in $ARCH)@{

+ 58 - 0
sys/src/9/teg2/_announce

@@ -0,0 +1,58 @@
+This is a preliminary Plan 9 port to the Compulab Trimslice,
+containing a Tegra 2 SoC: a dual-core, (truly) dual-issue 1GHz
+Cortex-A9 v7a-architecture ARM system, *and* it comes in a case.  VFP
+3 floating-point hardware is present, but 5l doesn't yet generate
+those instructions.  This is the first multiprocessor ARM port we've
+done, and much of the code should be reusable in future ports.  There
+are still things to be done but it can run both processors and is
+believed to have adequate kernel support for VFP 3 floating-point.
+
+
+What's implemented.
+
+Two cpus running concurrently with level 1 and 2 caches enabled.
+
+Realtek 8168 Ethernet.  A slightly dimmer 8169.  Has to be jabbed with
+an electric cattle prod by software about once per day when it wedges.
+
+Profiling.  Charles Forsyth fixed various bugs to make user-mode
+profiling on ARMs work for the first time ever.
+
+
+What's not (yet) implemented.
+
+USB.  It probably just needs initialisation.
+
+NOR flash.
+
+Video.
+
+VFP3 floating point.  The go 5l generates VFP 3 floating-point
+instructions (among other changes).  Attempts to transplant just that
+code into our 5l failed to generate correct code.  Eventually someone
+will get this to work, and then we'll be able to use the hardware
+floating-point.  Even with only software emulation of floating-point,
+astro runs in under 3 seconds.
+
+In-line 64-bit arithmetic in 5[cl].
+
+And the really horrid peripherals: NAND flash and MMC.
+
+
+Known problems.
+
+kprof.  kprof profiling doesn't work correctly, charging all CPU time
+to _start.
+
+Reboot.  After an fshalt -r reboot (or two) with cpu1 enabled,
+accesses to pci registers (notably 0x80015000) in the newly-loaded
+kernel often hang.  One of three watchdogs' reset should jolt the
+system back to life and force a reboot through u-boot when this
+happens.  Sometimes the ethernet goes dead instead ("waiting for
+dhcp..." forever); this could be a different symptom of pci illness.
+
+Also following a reboot, cpu1's local (not tegra SoC shared) timers
+don't interrupt.  Since the local watchdogs don't seem to actually
+interrupt nor generate resets when used in anger (as opposed to
+boot-time check-out), their loss is merely a mystery.  The local timer
+not interrupting is more worrying.

+ 165 - 0
sys/src/9/teg2/arch.c

@@ -0,0 +1,165 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+/*
+ * A lot of this stuff doesn't belong here
+ * but this is a convenient dumping ground for
+ * later sorting into the appropriate buckets.
+ */
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp+4;
+	ureg->r14 = PTR2UINT(sched);
+}
+
+/*
+ * called in sysfile.c
+ */
+void
+evenaddr(uintptr addr)
+{
+	if(addr & 3){
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+	uvlong t;
+	Tos *tos;
+
+	/* precise time accounting, kernel exit */
+	tos = (Tos*)(USTKTOP-sizeof(Tos));
+	cycles(&t);
+	tos->kcycles += t - up->kentry;
+	tos->pcycles = up->pcycles;
+	tos->cyclefreq = m->cpuhz;
+	tos->pid = up->pid;
+
+	/* make visible immediately to user phase */
+	l1cache->wbse(tos, sizeof *tos);
+}
+
+/*
+ *  return the userpc the last exception happened at
+ */
+uintptr
+userpc(void)
+{
+	Ureg *ureg = up->dbgreg;
+	return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	USED(ureg, pureg, uva, n);
+}
+
+/*
+ *  this is the body for all kproc's
+ */
+static void
+linkproc(void)
+{
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc exiting", 0);
+}
+
+/*
+ *  setup stack and initial PC for a new kernel proc.  This is architecture
+ *  dependent because of the starting stack location
+ */
+void
+kprocchild(Proc *p, void (*func)(void*), void *arg)
+{
+	p->sched.pc = PTR2UINT(linkproc);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK);
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+/*
+ *  pc output by dumpaproc
+ */
+uintptr
+dbgpc(Proc* p)
+{
+	Ureg *ureg;
+
+	ureg = p->dbgreg;
+	if(ureg == 0)
+		return 0;
+
+	return ureg->pc;
+}
+
+/*
+ *  set mach dependent process state for a new process
+ */
+void
+procsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc* p)
+{
+	uvlong t;
+
+	cycles(&t);
+	p->pcycles += t;
+
+	fpuprocsave(p);
+	l1cache->wbse(p, sizeof *p);		/* is this needed? */
+	l1cache->wb();				/* is this needed? */
+}
+
+void
+procrestore(Proc* p)
+{
+	uvlong t;
+
+	if(p->kp)
+		return;
+	cycles(&t);
+	p->pcycles -= t;
+	wakewfi();		/* in case there's another runnable proc */
+
+	/* let it fault in at first use */
+//	fpuprocrestore(p);
+	l1cache->wb();			/* system is more stable with this */
+}
+
+int
+userureg(Ureg* ureg)
+{
+	return (ureg->psr & PsrMask) == PsrMusr;
+}

+ 869 - 0
sys/src/9/teg2/archtegra.c

@@ -0,0 +1,869 @@
+/*
+ * nvidia tegra 2 architecture-specific stuff
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+#include "../port/flashif.h"
+#include "../port/usb.h"
+#include "../port/portusbehci.h"
+#include "usbehci.h"
+
+enum {
+	/* hardware limits imposed by register contents or layouts */
+	Maxcpus		= 4,
+	Maxflowcpus	= 2,
+
+	Debug	= 0,
+};
+
+typedef struct Clkrst Clkrst;
+typedef struct Diag Diag;
+typedef struct Flow Flow;
+typedef struct Scu Scu;
+typedef struct Power Power;
+
+struct Clkrst {
+	ulong	rstsrc;
+	ulong	rstdevl;
+	ulong	rstdevh;
+	ulong	rstdevu;
+
+	ulong	clkoutl;
+	ulong	clkouth;
+	ulong	clkoutu;
+
+	uchar	_pad0[0x24-0x1c];
+	ulong	supcclkdiv;		/* super cclk divider */
+	ulong	_pad1;
+	ulong	supsclkdiv;		/* super sclk divider */
+
+	uchar	_pad4[0x4c-0x30];
+	ulong	clkcpu;
+
+	uchar	_pad1[0xe0-0x50];
+	ulong	pllxbase;		/* pllx controls CPU clock speed */
+	ulong	pllxmisc;
+	ulong	pllebase;		/* plle is dedicated to pcie */
+	ulong	pllemisc;
+
+	uchar	_pad2[0x340-0xf0];
+	ulong	cpuset;
+	ulong	cpuclr;
+};
+
+enum {
+	/* rstsrc bits */
+	Wdcpurst =	1<<0,
+	Wdcoprst =	1<<1,
+	Wdsysrst =	1<<2,
+	Wdsel =		1<<4,		/* tmr1 or tmr2? */
+	Wdena =		1<<5,
+
+	/* devl bits */
+	Sysreset =	1<<2,
+
+	/* clkcpu bits */
+	Cpu1stop =	1<<9,
+	Cpu0stop =	1<<8,
+
+	/* cpu* bits */
+	Cpu1dbgreset =	1<<13,
+	Cpu0dbgreset =	1<<12,
+	Cpu1wdreset =	1<<9,
+	Cpu0wdreset =	1<<8,
+	Cpu1dereset =	1<<5,
+	Cpu0dereset =	1<<4,
+	Cpu1reset =	1<<1,
+	Cpu0reset =	1<<0,
+};
+
+struct Power {
+	ulong	ctl;			/* mainly for rtc clock signals */
+	ulong	secregdis;
+	ulong	swrst;
+
+	ulong	wakevmask;
+	ulong	waklvl;
+	ulong	waksts;
+	ulong	swwaksts;
+
+	ulong	dpdpadsovr;		/* deep power down pads override */
+	ulong	dpdsample;
+	ulong	dpden;
+
+	ulong	gatetimroff;
+	ulong	gatetimron;
+	ulong	toggle;
+	ulong	unclamp;
+	ulong	gatests;		/* ro */
+
+	ulong	goodtmr;
+	ulong	blinktmr;
+
+	ulong	noiopwr;
+	ulong	detect;
+	ulong	detlatch;
+
+	ulong	scratch[24];
+	ulong	secscratch[6];
+
+	ulong	cpupwrgoodtmr;
+	ulong	cpupwrofftmr;
+
+	ulong	pgmask[2];
+
+	ulong	autowaklvl;
+	ulong	autowaklvlmask;
+	ulong	wakdelay;
+
+	ulong	detval;
+	ulong	ddr;
+	ulong	usbdebdel;	/* usb de-bounce delay */
+	ulong	usbao;
+	ulong	cryptoop;
+	ulong	pllpwb0ovr;
+	ulong	scratch24[42-24+1];
+	ulong	boundoutmirr[3];
+	ulong	sys33ven;
+	ulong	boundoutmirracc;
+	ulong	gate;
+};
+
+enum {
+	/* toggle bits */
+	Start	= 1<<8,
+	/* partition ids */
+	Partpcie= 3,
+	Partl2	= 4,
+};
+
+struct Scu {
+	ulong	ctl;
+	ulong	cfg;			/* ro */
+	ulong	cpupwrsts;
+	ulong	inval;
+
+	uchar	_pad0[0x40-0x10];
+	ulong	filtstart;
+	ulong	filtend;
+
+	uchar	_pad1[0x50-0x48];
+	ulong	accctl;			/* initially 0 */
+	ulong	nsaccctl;
+};
+
+enum {
+	/* ctl bits */
+	Scuenable =	1<<0,
+	Filter =	1<<1,
+	Scuparity =	1<<2,
+	Specfill =	1<<3,		/* only for PL310 */
+	Allport0 =	1<<4,
+	Standby =	1<<5,
+	Icstandby =	1<<6,
+};
+
+struct Flow {
+	ulong	haltcpu0;
+	ulong	haltcop;
+	ulong	cpu0;
+	ulong	cop;
+	ulong	xrq;
+	ulong	haltcpu1;
+	ulong	cpu1;
+};
+
+enum {
+	/* haltcpu* bits */
+	Stop =	2<<29,
+
+	/* cpu* bits */
+	Event =			1<<14,	/* w1c */
+	Waitwfebitsshift =	4,
+	Waitwfebitsmask =	MASK(2),
+	Eventenable =		1<<1,
+	Cpuenable =		1<<0,
+};
+
+struct Diag {
+	Cacheline c0;
+	Lock;
+	long	cnt;
+	long	sync;
+	Cacheline c1;
+};
+
+extern ulong testmem;
+
+/*
+ * number of cpus available.  contrast with conf.nmach, which is number
+ * of running cpus.
+ */
+int navailcpus;
+Isolated l1ptstable;
+
+Soc soc = {
+	.clkrst	= 0x60006000,		/* clock & reset signals */
+	.power	= 0x7000e400,
+	.exceptvec = PHYSEVP,		/* undocumented magic */
+	.sema	= 0x60001000,
+	.l2cache= PHYSL2BAG,		/* pl310 bag on the side */
+	.flow	= 0x60007000,
+
+	/* 4 non-gic controllers */
+//	.intr	= { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },
+
+	/* private memory region */
+	.scu	= 0x50040000,
+	/* we got this address from the `cortex-a series programmer's guide'. */
+	.intr	= 0x50040100,		/* per-cpu interface */
+	.glbtmr	= 0x50040200,
+	.loctmr	= 0x50040600,
+	.intrdist=0x50041000,
+
+	.uart	= { 0x70006000, 0x70006040,
+		    0x70006200, 0x70006300, 0x70006400, },
+
+	.rtc	= 0x7000e000,
+	.tmr	= { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
+	.µs	= 0x60005010,
+
+	.pci	= 0x80000000,
+	.ether	= 0xa0024000,
+
+	.nand	= 0x70008000,
+	.nor	= 0x70009000,		/* also VIRTNOR */
+
+	.ehci	= P2VAHB(0xc5000000),	/* 1st of 3 */
+	.ide	= P2VAHB(0xc3000000),
+
+	.gpio	= { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
+			    0x6000d200, 0x6000d280, 0x6000d300, },
+	.spi	= { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
+ 	.twsi	= 0x7000c000,
+	.mmc	= { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
+		    P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
+};
+
+static volatile Diag diag;
+static int missed;
+
+void
+dumpcpuclks(void)		/* run CPU at full speed */
+{
+	Clkrst *clk = (Clkrst *)soc.clkrst;
+
+	iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
+	iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
+	iprint("super cclk divider %#lux\n", clk->supcclkdiv);
+	iprint("super sclk divider %#lux\n", clk->supsclkdiv);
+}
+
+static char *
+devidstr(ulong)
+{
+	return "ARM Cortex-A9";
+}
+
+void
+archtegralink(void)
+{
+}
+
+/* convert AddrDevid register to a string in buf and return buf */
+char *
+cputype2name(char *buf, int size)
+{
+	ulong r;
+
+	r = cpidget();			/* main id register */
+	assert((r >> 24) == 'A');
+	seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
+		(r >> 20) & MASK(4), r & MASK(4));
+	return buf;
+}
+
+static void
+errata(void)
+{
+	ulong reg, r, p;
+
+	/* apply cortex-a9 errata workarounds */
+	r = cpidget();			/* main id register */
+	assert((r >> 24) == 'A');
+	p = r & MASK(4);		/* minor revision */
+	r >>= 20;
+	r &= MASK(4);			/* major revision */
+
+	/* this is an undocumented `diagnostic register' that linux knows */
+	reg = cprdsc(0, CpDTLB, 0, 1);
+	if (r < 2 || r == 2 && p <= 2)
+		reg |= 1<<4;			/* 742230 */
+	if (r == 2 && p <= 2)
+		reg |= 1<<6 | 1<<12 | 1<<22;	/* 743622, 2×742231 */
+	if (r < 3)
+		reg |= 1<<11;			/* 751472 */
+	cpwrsc(0, CpDTLB, 0, 1, reg);
+}
+
+void
+archconfinit(void)
+{
+	char *p;
+	ulong hz;
+
+	assert(m != nil);
+	m->cpuhz = 1000 * Mhz;			/* trimslice speed */
+	p = getconf("*cpumhz");
+	if (p) {
+		hz = atoi(p) * Mhz;
+		if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
+			m->cpuhz = hz;
+	}
+	m->delayloop = m->cpuhz/2000;		/* initial estimate */
+	errata();
+}
+
+int
+archether(unsigned ctlrno, Ether *ether)
+{
+	switch(ctlrno) {
+	case 0:
+		ether->type = "rtl8169";		/* pci-e ether */
+		ether->ctlrno = ctlrno;
+		ether->irq = Pcieirq;			/* non-msi pci-e intr */
+		ether->nopt = 0;
+		ether->mbps = 1000;
+		return 1;
+	}
+	return -1;
+}
+
+void
+dumpscustate(void)
+{
+	Scu *scu = (Scu *)soc.scu;
+
+	print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
+	print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
+		(scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
+	print("cpus' power %#lux\n", scu->cpupwrsts);
+}
+
+void
+scuon(void)
+{
+	Scu *scu = (Scu *)soc.scu;
+
+	if (scu->ctl & Scuenable)
+		return;
+	scu->inval = MASK(16);
+	coherence();
+	scu->ctl = Scuparity | Scuenable | Specfill;
+	coherence();
+}
+
+int
+getncpus(void)
+{
+	int n;
+	char *p;
+	Scu *scu;
+
+	if (navailcpus == 0) {
+		scu = (Scu *)soc.scu;
+		navailcpus = (scu->cfg & MASK(2)) + 1;
+		if (navailcpus > MAXMACH)
+			navailcpus = MAXMACH;
+
+		p = getconf("*ncpu");
+		if (p && *p) {
+			n = atoi(p);
+			if (n > 0 && n < navailcpus)
+				navailcpus = n;
+		}
+	}
+	return navailcpus;
+}
+
+void
+cpuidprint(void)
+{
+	char name[64];
+
+	cputype2name(name, sizeof name);
+	delay(50);				/* let uart catch up */
+	iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
+		m->machno, m->cpuhz / Mhz, name,
+		getpsr() & PsrBigend? "big": "little");
+}
+
+static void
+clockson(void)
+{
+	Clkrst *clk = (Clkrst *)soc.clkrst;
+
+	/* enable all by clearing resets */
+	clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
+	coherence();
+	clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
+	coherence();
+
+	clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
+	coherence();
+}
+
+/* we could be shutting down ourself (if cpu == m->machno), so take care. */
+void
+stopcpu(uint cpu)
+{
+	Flow *flow = (Flow *)soc.flow;
+	Clkrst *clk = (Clkrst *)soc.clkrst;
+
+	if (cpu == 0) {
+		iprint("stopcpu: may not stop cpu0\n");
+		return;
+	}
+
+	machoff(cpu);
+	lock(&active);
+	active.stopped |= 1 << cpu;
+	unlock(&active);
+	l1cache->wb();
+
+	/* shut down arm7 avp coproc so it can't cause mischief. */
+	/* could try watchdog without stopping avp. */
+	flow->haltcop = Stop;
+	coherence();
+	flow->cop = 0;					/* no Cpuenable */
+	coherence();
+	delay(10);
+
+	assert(cpu < Maxflowcpus);
+	*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
+	coherence();
+	*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;	/* no Cpuenable */
+	coherence();
+	delay(10);
+
+	/* cold reset */
+	assert(cpu < Maxcpus);
+	clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
+	coherence();
+	delay(1);
+
+	l1cache->wb();
+}
+
+static void
+synccpus(volatile long *cntp, int n)
+{
+	ainc(cntp);
+	while (*cntp < n)
+		;
+	/* all cpus should now be here */
+}
+
+static void
+pass1(int pass, volatile Diag *dp)
+{
+	int i;
+
+	if(m->machno == 0)
+		iprint(" %d", pass);
+	for (i = 1000*1000; --i > 0; ) {
+		ainc(&dp->cnt);
+		adec(&dp->cnt);
+	}
+
+	synccpus(&dp->sync, navailcpus);
+	/* all cpus are now here */
+
+	ilock(dp);
+	if(dp->cnt != 0)
+		panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt);
+	iunlock(dp);
+
+	synccpus(&dp->sync, 2 * navailcpus);
+	/* all cpus are now here */
+	adec(&dp->sync);
+	adec(&dp->sync);
+}
+
+/*
+ * try to confirm coherence of l1 caches.
+ * assume that all available cpus will be started.
+ */
+void
+l1diag(void)
+{
+	int pass;
+	volatile Diag *dp;
+
+	if (!Debug)
+		return;
+
+	l1cache->wb();
+
+	/*
+	 * synchronise and print
+	 */
+	dp = &diag;
+	ilock(dp);
+	if (m->machno == 0)
+		iprint("l1: waiting for %d cpus... ", navailcpus);
+	iunlock(dp);
+
+	synccpus(&dp->sync, navailcpus);
+
+	ilock(dp);
+	if (m->machno == 0)
+		iprint("cache coherency pass");
+	iunlock(dp);
+
+	synccpus(&dp->sync, 2 * navailcpus);
+	adec(&dp->sync);
+	adec(&dp->sync);
+
+	/*
+	 * cpus contend
+	 */
+	for (pass = 0; pass < 3; pass++)
+		pass1(pass, dp);
+
+	/*
+	 * synchronise and check sanity
+	 */
+	synccpus(&dp->sync, navailcpus);
+
+	if(dp->sync < navailcpus || dp->sync >= 2 * navailcpus)
+		panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
+			dp->sync);
+	if(dp->cnt != 0)
+		panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
+			dp->cnt);
+
+	ilock(dp);
+	iprint(" cpu%d ok", m->machno);
+	iunlock(dp);
+
+	synccpus(&dp->sync, 2 * navailcpus);
+	adec(&dp->sync);
+	adec(&dp->sync);
+	l1cache->wb();
+
+	/*
+	 * all done, print
+	 */
+	ilock(dp);
+	if (m->machno == 0)
+		iprint("\n");
+	iunlock(dp);
+}
+
+static void
+unfreeze(uint cpu)
+{
+	Clkrst *clk = (Clkrst *)soc.clkrst;
+	Flow *flow = (Flow *)soc.flow;
+
+	assert(cpu < Maxcpus);
+
+	clk->clkcpu &= ~(Cpu0stop << cpu);
+	coherence();
+	/* out of reset */
+	clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
+		cpu;
+	coherence();
+
+	assert(cpu < Maxflowcpus);
+	*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
+	coherence();
+	*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
+	coherence();
+}
+
+/*
+ * this is all a bit magic.  the soc.exceptvec register is effectively
+ * undocumented.  we had to look at linux and experiment, alas.  this is the
+ * sort of thing that should be standardised as part of the cortex mpcore spec.
+ * even intel document their equivalent procedure.
+ */
+int
+startcpu(uint cpu)
+{
+	int i, r;
+	ulong oldvec, rstaddr;
+	ulong *evp = (ulong *)soc.exceptvec;	/* magic */
+
+	r = 0;
+	if (getncpus() < 2 || cpu == m->machno ||
+	    cpu >= MAXMACH || cpu >= navailcpus)
+		return -1;
+
+	oldvec = *evp;
+	l1cache->wb();			/* start next cpu w same view of ram */
+	*evp = rstaddr = PADDR(_vrst);	/* will start cpu executing at _vrst */
+	coherence();
+	l1cache->wb();
+	unfreeze(cpu);
+
+	for (i = 2000; i > 0 && *evp == rstaddr; i--)
+		delay(1);
+	if (i <= 0 || *evp != cpu) {
+		iprint("cpu%d: didn't start!\n", cpu);
+		stopcpu(cpu);		/* make sure it's stopped */
+		r = -1;
+	}
+	*evp = oldvec;
+	return r;
+}
+
+static void
+cksecure(void)
+{
+	ulong db;
+	extern ulong getdebug(void);
+
+	if (getscr() & 1)
+		panic("cpu%d: running non-secure", m->machno);
+	db = getdebug();
+	if (db)
+		iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
+}
+
+ulong
+smpon(void)
+{
+	ulong aux;
+
+	/* cortex-a9 model-specific configuration */
+	aux = getauxctl();
+	putauxctl(aux | CpACsmp | CpACmaintbcast);
+	return aux;
+}
+
+void
+cortexa9cachecfg(void)
+{
+	/* cortex-a9 model-specific configuration */
+	putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
+}
+
+/*
+ * called on a cpu other than 0 from cpureset in l.s,
+ * from _vrst in lexception.s.
+ * mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
+ * but interrupts are disabled.
+ * our mmu is using an exact copy of cpu0's l1 page table
+ * as it was after userinit ran.
+ */
+void
+cpustart(void)
+{
+	int ms;
+	ulong *evp;
+	Power *pwr;
+
+	up = nil;
+	if (active.machs & (1<<m->machno)) {
+		serialputc('?');
+		serialputc('r');
+		panic("cpu%d: resetting after start", m->machno);
+	}
+	assert(m->machno != 0);
+
+	errata();
+	cortexa9cachecfg();
+	memdiag(&testmem);
+
+	machinit();			/* bumps nmach, adds bit to machs */
+	machoff(m->machno);		/* not ready to go yet */
+
+	/* clock signals and scu are system-wide and already on */
+	clockshutdown();		/* kill any watch-dog timer */
+
+	trapinit();
+	clockinit();			/* sets loop delay */
+	timersinit();
+	cpuidprint();
+
+	/*
+	 * notify cpu0 that we're up so it can proceed to l1diag.
+	 */
+	evp = (ulong *)soc.exceptvec;	/* magic */
+	*evp = m->machno;
+	coherence();
+
+	l1diag();		/* contend with other cpus to verify sanity */
+
+	/*
+	 * pwr->noiopwr == 0
+	 * pwr->detect == 0x1ff (default, all disabled)
+	 */
+	pwr = (Power *)soc.power;
+	assert(pwr->gatests == MASK(7)); /* everything has power */
+
+	/*
+	 * 8169 has to initialise before we get past this, thus cpu0
+	 * has to schedule processes first.
+	 */
+	if (Debug)
+		iprint("cpu%d: waiting for 8169\n", m->machno);
+	for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
+		delay(10);
+		cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
+	}
+	if (!l1ptstable.word)
+		iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
+	/* now safe to copy cpu0's l1 pt in mmuinit */
+
+	mmuinit();			/* update our l1 pt from cpu0's */
+	fpon();
+	machon(m->machno);		/* now ready to go and be scheduled */
+
+	if (Debug)
+		iprint("cpu%d: scheding\n", m->machno);
+	schedinit();
+	panic("cpu%d: schedinit returned", m->machno);
+}
+
+/* mainly used to break out of wfi */
+void
+sgintr(Ureg *ureg, void *)
+{
+	iprint("cpu%d: got sgi\n", m->machno);
+	/* try to prod cpu1 into life when it gets stuck */
+	if (m->machno != 0)
+		clockprod(ureg);
+}
+
+void
+archreset(void)
+{
+	static int beenhere;
+
+	if (beenhere)
+		return;
+	beenhere = 1;
+
+	/* conservative temporary values until archconfinit runs */
+	m->cpuhz = 1000 * Mhz;			/* trimslice speed */
+	m->delayloop = m->cpuhz/2000;		/* initial estimate */
+
+	prcachecfg();
+
+	clockson();
+	/* all partitions were powered up by u-boot, so needn't do anything */
+	archconfinit();
+//	resetusb();
+	fpon();
+
+	if (irqtooearly)
+		panic("archreset: too early for irqenable");
+	irqenable(Cpu0irq, sgintr, nil, "cpu0");
+	irqenable(Cpu1irq, sgintr, nil, "cpu1");
+	/* ... */
+}
+
+void
+archreboot(void)
+{
+	Clkrst *clk = (Clkrst *)soc.clkrst;
+
+	assert(m->machno == 0);
+	iprint("archreboot: reset!\n");
+	delay(20);
+
+	clk->rstdevl |= Sysreset;
+	coherence();
+	delay(500);
+
+	/* shouldn't get here */
+	splhi();
+	iprint("awaiting reset");
+	for(;;) {
+		delay(1000);
+		print(".");
+	}
+}
+
+void
+kbdinit(void)
+{
+}
+
+static void
+missing(ulong addr, char *name)
+{
+	static int firstmiss = 1;
+
+	if (addr == 0) {
+		iprint("address zero for %s\n", name);
+		return;
+	}
+	if (probeaddr(addr) >= 0)
+		return;
+	missed++;
+	if (firstmiss) {
+		iprint("missing:");
+		firstmiss = 0;
+	} else
+		iprint(",\n\t");
+	iprint(" %s at %#lux", name, addr);
+}
+
+/* verify that all the necessary device registers are accessible */
+void
+chkmissing(void)
+{
+	delay(10);
+	missing(KZERO, "dram");
+	missing(soc.intr, "intr ctlr");
+	missing(soc.intrdist, "intr distrib");
+	missing(soc.tmr[0], "tegra timer1");
+	missing(soc.uart[0], "console uart");
+	missing(soc.pci, "pcie");
+	missing(soc.ether, "ether8169");
+	missing(soc.µs, "µs counter");
+	if (missed)
+		iprint("\n");
+	delay(10);
+}
+
+void
+archflashwp(Flash*, int)
+{
+}
+
+/*
+ * for ../port/devflash.c:/^flashreset
+ * retrieve flash type, virtual base and length and return 0;
+ * return -1 on error (no flash)
+ */
+int
+archflashreset(int bank, Flash *f)
+{
+	if(bank != 0)
+		return -1;
+panic("archflashreset: rewrite for nor & nand flash on ts");
+	/*
+	 * this is set up for the igepv2 board.
+	 */
+	f->type = "onenand";
+	f->addr = (void*)VIRTNOR;		/* mapped here by archreset */
+	f->size = 0;				/* done by probe */
+	f->width = 1;
+	f->interleave = 0;
+	return 0;
+}

+ 311 - 0
sys/src/9/teg2/arm.h

@@ -0,0 +1,311 @@
+/*
+ * arm-specific definitions for cortex-a8 and -a9
+ * these are used in C and assembler
+ *
+ * `cortex' refers to the cortex-a8 or -a9.
+ */
+
+#define NREGS		15	/* general-purpose regs, R0 through R14 */
+
+/*
+ * Program Status Registers
+ */
+#define PsrMusr		0x00000010		/* mode */
+#define PsrMfiq		0x00000011
+#define PsrMirq		0x00000012
+#define PsrMsvc		0x00000013	/* `protected mode for OS' */
+#define PsrMmon		0x00000016	/* `secure monitor' (trustzone hyper) */
+#define PsrMabt		0x00000017
+#define PsrMund		0x0000001B
+#define PsrMsys		0x0000001F	/* `privileged user mode for OS' (trustzone) */
+#define PsrMask		0x0000001F
+
+#define PsrThumb	0x00000020		/* beware hammers */
+#define PsrDfiq		0x00000040		/* disable FIQ interrupts */
+#define PsrDirq		0x00000080		/* disable IRQ interrupts */
+#define PsrDasabt	0x00000100		/* disable asynch aborts */
+#define PsrBigend	0x00000200
+
+#define PsrJaz		0x01000000		/* java mode */
+
+#define PsrV		0x10000000		/* overflow */
+#define PsrC		0x20000000		/* carry/borrow/extend */
+#define PsrZ		0x40000000		/* zero */
+#define PsrN		0x80000000		/* negative/less than */
+
+#define PsrMbz		(PsrJaz|PsrThumb|PsrBigend) /* these bits must be 0 */
+
+/*
+ * MCR and MRC are anti-mnemonic.
+ *	MTCP	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# arm -> coproc
+ *	MFCP	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# coproc -> arm
+ */
+
+#define MTCP	MCR
+#define MFCP	MRC
+
+/* instruction decoding */
+#define ISCPOP(op)	((op) == 0xE || ((op) & ~1) == 0xC)
+#define ISFPAOP(cp, op)	((cp) == CpOFPA && ISCPOP(op))
+#define ISVFPOP(cp, op)	(((cp) == CpDFP || (cp) == CpFP) && ISCPOP(op))
+
+/*
+ * Coprocessors
+ *	MCR	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# arm -> coproc
+ *	MRC	coproc, opcode1, Rd, CRn, CRm[, opcode2]	# coproc -> arm
+ */
+#define CpOFPA		1			/* ancient 7500 FPA */
+#define CpFP		10			/* float FP, VFP cfg. */
+#define CpDFP		11			/* double FP */
+#define CpSC		15			/* System Control */
+
+/*
+ * Primary (CRn) CpSC registers.
+ */
+#define	CpID		0			/* ID and cache type */
+#define	CpCONTROL	1			/* miscellaneous control */
+#define	CpTTB		2			/* Translation Table Base(s) */
+#define	CpDAC		3			/* Domain Access Control */
+#define	CpFSR		5			/* Fault Status */
+#define	CpFAR		6			/* Fault Address */
+#define	CpCACHE		7			/* cache/write buffer control */
+#define	CpTLB		8			/* TLB control */
+#define	CpCLD		9			/* L2 Cache Lockdown, op1==1 */
+#define CpTLD		10			/* TLB Lockdown, with op2 */
+#define CpVECS		12			/* vector bases, op1==0, Crm==0, op2s (cortex) */
+#define	CpPID		13			/* Process ID */
+#define CpDTLB		15			/* TLB, L1 cache stuff (cortex) */
+
+/*
+ * CpTTB op1==0, Crm==0 opcode2 values.
+ */
+#define CpTTB0		0			/* secure ttb */
+#define CpTTB1		1			/* non-secure ttb (v7) */
+#define CpTTBctl	2			/* v7 */
+
+/*
+ * CpFSR op1==0, Crm==0 opcode 2 values.
+ */
+#define CpDFSR		0			/* data fault status */
+#define CpIFSR		1			/* instruction fault status */
+
+/*
+ * CpFAR op1==0, Crm==0 opcode 2 values.
+ */
+#define CpDFAR		0			/* data fault address */
+#define CpIFAR		2			/* instruction fault address */
+
+/*
+ * CpID Secondary (CRm) registers.
+ */
+#define CpIDidct	0
+
+/*
+ * CpID CpIDidct op1==0 opcode2 fields.
+ */
+#define CpIDid		0			/* main ID */
+#define CpIDct		1			/* cache type */
+#define CpIDtlb		3			/* tlb type (cortex) */
+#define CpIDmpid	5			/* multiprocessor id (cortex) */
+
+/* CpIDid op1 values */
+#define CpIDcsize	1			/* cache size (cortex) */
+#define CpIDcssel	2			/* cache size select (cortex) */
+
+/*
+ * CpID CpIDidct op1==CpIDcsize opcode2 fields.
+ */
+#define CpIDcasize	0			/* cache size */
+#define CpIDclvlid	1			/* cache-level id */
+
+/*
+ * CpCONTROL op2 codes, op1==0, Crm==0.
+ */
+#define CpMainctl	0		/* sctlr */
+#define CpAuxctl	1
+#define CpCPaccess	2
+
+/*
+ * CpCONTROL: op1==0, CRm==0, op2==CpMainctl.
+ * main control register.
+ * cortex/armv7 has more ops and CRm values.
+ */
+#define CpCmmu		0x00000001	/* M: MMU enable */
+#define CpCalign	0x00000002	/* A: alignment fault enable */
+#define CpCdcache	0x00000004	/* C: data cache on */
+#define CpBigend	(1<<7)
+#define CpCsw		(1<<10)		/* SW: SWP(B) enable (deprecated in v7) */
+#define CpCpredict	0x00000800	/* Z: branch prediction (armv7) */
+#define CpCicache	0x00001000	/* I: instruction cache on */
+#define CpChv		0x00002000	/* V: high vectors */
+#define CpCrr		(1<<14)	/* RR: round robin vs random cache replacement */
+#define CpCha		(1<<17)		/* HA: hw access flag enable */
+#define CpCdz		(1<<19)		/* DZ: divide by zero fault enable (not cortex-a9) */
+#define CpCfi		(1<<21)		/* FI: fast intrs */
+#define CpCve		(1<<24)		/* VE: intr vectors enable */
+#define CpCee		(1<<25)		/* EE: exception endianness: big */
+#define CpCnmfi		(1<<27)		/* NMFI: non-maskable fast intrs. (RO) */
+#define CpCtre		(1<<28)		/* TRE: TEX remap enable */
+#define CpCafe		(1<<29)		/* AFE: access flag (ttb) enable */
+#define CpCte		(1<<30)		/* TE: thumb exceptions */
+
+#define CpCsbz (1<<31 | CpCte | CpCafe | CpCtre | 1<<26 | CpCee | CpCve | \
+	CpCfi | 3<<19 | CpCha | 1<<15 | 3<<8 | CpBigend) /* must be 0 (armv7) */
+#define CpCsbo (3<<22 | 1<<18 | 1<<16 | CpChv | CpCsw | 017<<3)	/* must be 1 (armv7) */
+
+/*
+ * CpCONTROL: op1==0, CRm==0, op2==CpAuxctl.
+ * Auxiliary control register on cortex-a9.
+ * these differ from even the cortex-a8 bits.
+ */
+#define CpACparity		(1<<9)
+#define CpACca1way		(1<<8)	/* cache in a single way */
+#define CpACcaexcl		(1<<7)	/* exclusive cache */
+#define CpACsmp			(1<<6)	/* SMP l1 caches coherence; needed for ldrex/strex */
+#define CpAClwr0line		(1<<3)	/* write full cache line of 0s; see Fullline0 */
+#define CpACl1pref		(1<<2)	/* l1 prefetch enable */
+#define CpACl2pref		(1<<1)	/* l2 prefetch enable */
+#define CpACmaintbcast		(1<<0)	/* broadcast cache & tlb maint. ops */
+
+/*
+ * CpCONTROL Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpCONTROLscr	1
+
+#define CpSCRscr	0			/* secure configuration */
+
+/*
+ * CpCACHE Secondary (CRm) registers and opcode2 fields.  op1==0.
+ * In ARM-speak, 'flush' means invalidate and 'clean' means writeback.
+ */
+#define CpCACHEintr	0			/* interrupt (op2==4) */
+#define CpCACHEisi	1			/* inner-sharable I cache (v7) */
+#define CpCACHEpaddr	4			/* 0: phys. addr (cortex) */
+#define CpCACHEinvi	5			/* instruction, branch table */
+#define CpCACHEinvd	6			/* data or unified */
+// #define CpCACHEinvu	7			/* unified (not on cortex) */
+#define CpCACHEva2pa	8			/* va -> pa translation (cortex) */
+#define CpCACHEwb	10			/* writeback */
+#define CpCACHEinvdse	11			/* data or unified by mva */
+#define CpCACHEwbi	14			/* writeback+invalidate */
+
+#define CpCACHEall	0			/* entire (not for invd nor wb(i) on cortex) */
+#define CpCACHEse	1			/* single entry */
+#define CpCACHEsi	2			/* set/index (set/way) */
+#define CpCACHEtest	3			/* test loop */
+#define CpCACHEwait	4			/* wait (prefetch flush on cortex) */
+#define CpCACHEdmbarr	5			/* wb only (cortex) */
+#define CpCACHEflushbtc	6			/* flush branch-target cache (cortex) */
+#define CpCACHEflushbtse 7			/* ⋯ or just one entry in it (cortex) */
+
+/*
+ * CpTLB Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpTLBinvi	5			/* instruction */
+#define CpTLBinvd	6			/* data */
+#define CpTLBinvu	7			/* unified */
+
+#define CpTLBinv	0			/* invalidate all */
+#define CpTLBinvse	1			/* invalidate single entry */
+#define CpTBLasid	2			/* by ASID (cortex) */
+
+/*
+ * CpCLD Secondary (CRm) registers and opcode2 fields for op1==0. (cortex)
+ */
+#define CpCLDena	12			/* enables */
+#define CpCLDcyc	13			/* cycle counter */
+#define CpCLDuser	14			/* user enable */
+
+#define CpCLDenapmnc	0
+#define CpCLDenacyc	1
+
+/*
+ * CpCLD Secondary (CRm) registers and opcode2 fields for op1==1.
+ */
+#define CpCLDl2		0			/* l2 cache */
+
+#define CpCLDl2aux	2			/* auxiliary control */
+
+/*
+ * l2 cache aux. control
+ */
+#define CpCl2ecc	(1<<28)			/* use ecc, not parity */
+#define CpCl2noldforw	(1<<27)			/* no ld forwarding */
+#define CpCl2nowrcomb	(1<<25)			/* no write combining */
+#define CpCl2nowralldel	(1<<24)			/* no write allocate delay */
+#define CpCl2nowrallcomb (1<<23)		/* no write allocate combine */
+#define CpCl2nowralloc	(1<<22)			/* no write allocate */
+#define CpCl2eccparity	(1<<21)			/* enable ecc or parity */
+#define CpCl2inner	(1<<16)			/* inner cacheability */
+/* other bits are tag ram & data ram latencies */
+
+/*
+ * CpTLD Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpTLDlock	0			/* TLB lockdown registers */
+#define CpTLDpreload	1			/* TLB preload */
+
+#define CpTLDi		0			/* TLB instr. lockdown reg. */
+#define CpTLDd		1			/* " data " " */
+
+/*
+ * CpVECS Secondary (CRm) registers and opcode2 fields.
+ */
+#define CpVECSbase	0
+
+#define CpVECSnorm	0			/* (non-)secure base addr */
+#define CpVECSmon	1			/* secure monitor base addr */
+
+/*
+ * MMU page table entries.
+ * memory must be cached, buffered, sharable and wralloc to participate in
+ * automatic L1 cache coherency.
+ */
+#define Mbz		(0<<4)			/* L1 page tables: must be 0 */
+#define Noexecsect	(1<<4)			/* L1 sections: no execute */
+#define Fault		0x00000000		/* L[12] pte: unmapped */
+
+#define Coarse		(Mbz|1)			/* L1: page table */
+#define Section		(Mbz|2)			/* L1 1MB */
+/*
+ * next 2 bits (L1wralloc & L1sharable) and Buffered and Cached must be
+ * set in l1 ptes for LDREX/STREX to work.
+ */
+#define L1wralloc	(1<<12)			/* L1 TEX */
+#define L1sharable	(1<<16)
+#define L1nonglobal	(1<<17)			/* tied to asid */
+#define Nonsecuresect	(1<<19)			/* L1 sections */
+
+#define Large		0x00000001		/* L2 64KB */
+#define Noexecsmall	1			/* L2: no execute */
+#define Small		0x00000002		/* L2 4KB */
+/*
+ * next 4 bits (Buffered, Cached, L2wralloc & L2sharable) must be set in
+ * l2 ptes for memory containing locks because LDREX/STREX require them.
+ */
+#define Buffered	0x00000004		/* L[12]: 0 write-thru, 1 -back */
+#define Cached		0x00000008		/* L[12] */
+#define L2wralloc	(1<<6)			/* L2 TEX (small pages) */
+#define L2apro		(1<<9)			/* L2 AP: read only */
+#define L2sharable	(1<<10)
+#define L2nonglobal	(1<<11)			/* tied to asid */
+#define Dom0		0
+
+/* attributes for memory containing locks */
+#define L1ptedramattrs	(Cached | Buffered | L1wralloc | L1sharable)
+#define L2ptedramattrs	(Cached | Buffered | L2wralloc | L2sharable)
+
+#define Noaccess	0			/* AP, DAC */
+#define Krw		1			/* AP */
+/* armv7 deprecates AP[2] == 1 & AP[1:0] == 2 (Uro), prefers 3 (new in v7) */
+#define Uro		2			/* AP */
+#define Urw		3			/* AP */
+#define Client		1			/* DAC */
+#define Manager		3			/* DAC */
+
+#define AP(n, v)	F((v), ((n)*2)+4, 2)
+#define L1AP(ap)	(AP(3, (ap)))
+#define L2AP(ap)	(AP(0, (ap)))		/* armv7 */
+#define DAC(n, v)	F((v), (n)*2, 2)
+
+#define HVECTORS	0xffff0000

+ 135 - 0
sys/src/9/teg2/arm.s

@@ -0,0 +1,135 @@
+/*
+ * nvidia tegra 2 machine assist, definitions
+ * dual-core cortex-a9 processor
+ *
+ * R9 and R10 are used for `extern register' variables.
+ * R11 is used by the loader as a temporary, so avoid it.
+ */
+
+#include "mem.h"
+#include "arm.h"
+
+#undef B					/* B is for 'botch' */
+
+#define KADDR(pa)	(KZERO    | ((pa) & ~KSEGM))
+#define PADDR(va)	(PHYSDRAM | ((va) & ~KSEGM))
+
+#define L1X(va)		(((((va))>>20) & 0x0fff)<<2)
+
+#define MACHADDR	(L1-MACHSIZE)		/* only room for cpu0's */
+
+/* L1 pte values */
+#define PTEDRAM	(Dom0|L1AP(Krw)|Section|L1ptedramattrs)
+#define PTEIO	(Dom0|L1AP(Krw)|Section)
+
+#define DOUBLEMAPMBS	 512	/* megabytes of low dram to double-map */
+
+/* steps on R0 */
+#define DELAY(label, mloops) \
+	MOVW	$((mloops)*1000000), R0; \
+label: \
+	SUB.S	$1, R0; \
+	BNE	label
+
+/* print a byte on the serial console; clobbers R0 & R6; needs R12 (SB) set */
+#define PUTC(c) \
+	BARRIERS; \
+	MOVW	$(c), R0; \
+	MOVW	$PHYSCONS, R6; \
+	MOVW	R0, (R6); \
+	BARRIERS
+
+/*
+ * new instructions
+ */
+
+#define SMC	WORD	$0xe1600070	/* low 4-bits are call # (trustzone) */
+/* flush branch-target cache */
+#define FLBTC  MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
+/* flush one entry of the branch-target cache, va in R0 (cortex) */
+#define FLBTSE MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtse
+
+/* arm v7 arch defines these */
+#define DSB	WORD	$0xf57ff04f	/* data synch. barrier; last f = SY */
+#define DMB	WORD	$0xf57ff05f	/* data mem. barrier; last f = SY */
+#define ISB	WORD	$0xf57ff06f	/* instr. sync. barrier; last f = SY */
+
+#define WFI	WORD	$0xe320f003	/* wait for interrupt */
+#define NOOP	WORD	$0xe320f000
+
+#define CLZ(s, d) WORD	$(0xe16f0f10 | (d) << 12 | (s))	/* count leading 0s */
+
+#define SETEND(o) WORD	$(0xf1010000 | (o) << 9)  /* o==0, little-endian */
+
+#define CPSIE	WORD	$0xf1080080	/* intr enable: zeroes I bit */
+#define CPSID	WORD	$0xf10c00c0	/* intr disable: sets I,F bits */
+#define CPSAE	WORD	$0xf1080100	/* async abt enable: zeroes A bit */
+#define CPSMODE(m) WORD $(0xf1020000 | (m)) /* switch to mode m (PsrM*) */
+
+#define	CLREX	WORD	$0xf57ff01f
+#define	LDREX(fp,t)   WORD $(0xe<<28|0x01900f9f | (fp)<<16 | (t)<<12)
+/* `The order of operands is from left to right in dataflow order' - asm man */
+#define	STREX(f,tp,r) WORD $(0xe<<28|0x01800f90 | (tp)<<16 | (r)<<12 | (f)<<0)
+
+/* floating point */
+#define VMRS(fp, cpu) WORD $(0xeef00a10 | (fp)<<16 | (cpu)<<12) /* FP → arm */
+#define VMSR(cpu, fp) WORD $(0xeee00a10 | (fp)<<16 | (cpu)<<12) /* arm → FP */
+
+/*
+ * a popular code sequence used to write a pte for va is:
+ *
+ *	MOVW	R(n), TTB[LnX(va)]
+ *	// clean the cache line
+ *	DSB
+ *	// invalidate tlb entry for va
+ *	FLBTC
+ *	DSB
+ * 	PFF (now ISB)
+ */
+#define	BARRIERS	FLBTC; DSB; ISB
+
+/*
+ * invoked with PTE bits in R2, pa in R3, PTE pointed to by R4.
+ * fill PTE pointed to by R4 and increment R4 past it.
+ * increment R3 by a MB.  clobbers R1.
+ */
+#define FILLPTE() \
+	ORR	R3, R2, R1;			/* pte bits in R2, pa in R3 */ \
+	MOVW	R1, (R4); \
+	ADD	$4, R4;				/* bump PTE address */ \
+	ADD	$MiB, R3;			/* bump pa */ \
+
+/* zero PTE pointed to by R4 and increment R4 past it. assumes R0 is 0. */
+#define ZEROPTE() \
+	MOVW	R0, (R4); \
+	ADD	$4, R4;				/* bump PTE address */
+
+/*
+ * set kernel SB for zero segment (instead of usual KZERO segment).
+ * NB: the next line puts rubbish in R12:
+ *	MOVW	$setR12-KZERO(SB), R12
+ */
+#define SETZSB \
+	MOVW	$setR12(SB), R12;		/* load kernel's SB */ \
+	SUB	$KZERO, R12; \
+	ADD	$PHYSDRAM, R12
+
+/*
+ * note that 5a's RFE is not the v6/7 arch. instruction (0xf8900a00),
+ * which loads CPSR from the word after the PC at (R13), but rather
+ * the pre-v6 simulation `MOVM.IA.S.W (R13), [R15]' (0xe8fd8000 since
+ * MOVM is LDM in this case), which loads CPSR not from memory but
+ * from SPSR due to `.S'.
+ */
+#define RFEV7(r)    WORD $(0xf8900a00 | (r) << 16)
+#define RFEV7W(r)   WORD $(0xf8900a00 | (r) << 16 | 0x00200000)	/* RFE.W */
+#define RFEV7DB(r)  WORD $(0xf9100a00 | (r) << 16)		/* RFE.DB */
+#define RFEV7DBW(r) WORD $(0xf9100a00 | (r) << 16 | 0x00200000)	/* RFE.DB.W */
+
+#define CKPSR(psr, tmp, bad)
+#define CKCPSR(psrtmp, tmp, bad)
+
+/* return with cpu id in r and condition codes set from "r == 0" */
+#define CPUID(r) \
+	MFCP	CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \
+	AND.S	$(MAXMACH-1), r			/* mask out non-cpu-id bits */

+ 56 - 0
sys/src/9/teg2/atom.s

@@ -0,0 +1,56 @@
+#include "arm.s"
+
+/*
+ * int cas(ulong *p, ulong ov, ulong nv);
+ */
+
+TEXT	cas+0(SB),0,$12		/* r0 holds p */
+TEXT	casp+0(SB),0,$12	/* r0 holds p */
+	MOVW	ov+4(FP), R1
+	MOVW	nv+8(FP), R2
+spincas:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	CMP.S	R3, R1
+	BNE	fail
+	STREX(2,0,4)	/*	STREX	0(R0),R2,R4	*/
+	CMP.S	$0, R4
+	BNE	spincas
+	MOVW	$1, R0
+	BARRIERS
+	RET
+fail:
+	CLREX
+	MOVW	$0, R0
+	RET
+
+TEXT _xinc(SB), $0	/* void	_xinc(long *); */
+TEXT ainc(SB), $0	/* long ainc(long *); */
+spinainc:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	ADD	$1,R3
+	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
+	CMP.S	$0, R4
+	BNE	spinainc
+	MOVW	R3, R0
+	RET
+
+TEXT _xdec(SB), $0	/* long _xdec(long *); */
+TEXT adec(SB), $0	/* long adec(long *); */
+spinadec:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	SUB	$1,R3
+	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
+	CMP.S	$0, R4
+	BNE	spinadec
+	MOVW	R3, R0
+	RET
+
+TEXT loadlinked(SB), $0	/* long loadlinked(long *); */
+	LDREX(0,0)	/*	LDREX	0(R0),R0	*/
+	RET
+
+TEXT storecond(SB), $0	/* int storecond(long *, long); */
+	MOVW	ov+4(FP), R3
+	STREX(3,0,0)	/*	STREX	0(R0),R3,R0	*/
+	RSB	$1, R0
+	RET

+ 456 - 0
sys/src/9/teg2/cache-l2-pl310.c

@@ -0,0 +1,456 @@
+/*
+ * PL310 level 2 cache (non-architectural bag on the side)
+ *
+ * guaranteed to work incorrectly with default settings; must set Sharovr.
+ *
+ * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
+ * by disabling write-back and cache line-fill before, and restoring after.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "arm.h"
+
+#define NWAYS(l2p)	((l2p)->auxctl & Assoc16way? 16: 8)
+#define L2P		((L2pl310 *)soc.l2cache)
+
+enum {
+	L2size		= 1024 * 1024,	/* according to the tegra 2 manual */
+	Wayszgran	= 16 * KiB,	/* granularity of way sizes */
+};
+
+typedef struct L2pl310 L2pl310;
+typedef struct Pl310op Pl310op;
+
+struct Pl310op {
+	ulong	pa;
+	ulong	_pad;
+	ulong	indexway;
+	ulong	way;
+};
+
+struct L2pl310 {
+	ulong	id;
+	ulong	type;
+	uchar	_pad0[0x100 - 0x8];
+	ulong	ctl;
+	ulong	auxctl;
+
+	uchar	_pad1[0x730 - 0x108];	/* boring regs */
+	ulong	sync;
+	uchar	_pad2[0x740 - 0x734];
+	ulong	r3p0sync;		/* workaround for r3p0 bug */
+	uchar	_pad3[0x770 - 0x744];
+	Pl310op	inv;			/* inv.indexway doesn't exist */
+	uchar	_pad4[0x7b0 - 0x780];
+	Pl310op	clean;
+	uchar	_pad5[0x7f0 - 0x7c0];
+	Pl310op	cleaninv;
+	uchar	_pad6[0xc00 - 0x7d0];
+	ulong	filtstart;
+	ulong	filtend;
+	uchar	_pad6[0xf40 - 0xc08];
+	ulong	debug;
+	/* ... */
+};
+
+enum {
+	/* ctl bits */
+	L2enable = 1,
+
+	/* auxctl bits */
+	Ipref	= 1<<29,		/* prefetch enables */
+	Dpref	= 1<<28,
+	Mbo	= 1<<25,
+	Sharovr	= 1<<22, /* shared attribute override (i.e., work right!) */
+	Parity	= 1<<21,
+	Waycfgshift= 17,
+	Waycfgmask = (1<<3) - 1,
+	Assoc16way = 1<<16,
+	/*
+	 * optim'n to 0 cache lines; must be enabled in a9(?!).
+	 * set CpAClwr0line on all cpus 1st.
+	 */
+	Fullline0= 1<<0,
+
+	/* debug bits */
+	Wt	= 1<<1,			/* write-through, not write-back */
+	Nolinefill= 1<<0,
+
+	Basecfg = Wt | Nolinefill,
+};
+
+static Lock l2lock;
+static int disallowed;			/* by user: *l2off= in plan9.ini */
+static int l2ison;
+static int bg_op_running;
+static ulong waysmask;
+
+static Cacheimpl l2cacheimpl;
+
+static void
+awaitbgop(void)
+{
+	while (bg_op_running)
+		;
+}
+
+static void
+getlock(void)
+{
+	awaitbgop();		/* wait at normal PL first */
+	ilock(&l2lock);
+	awaitbgop();		/* wait under lock */
+}
+
+static void
+l2pl310sync(void)
+{
+	L2P->sync = 0;
+	coherence();
+}
+
+/* call this first to set sets/ways configuration */
+void
+l2pl310init(void)
+{
+	int waysz, nways;
+	ulong new;
+	L2pl310 *l2p = L2P;
+	static int configed;
+
+	if (getconf("*l2off") != nil) {
+//		iprint("l2 cache (pl310) disabled\n");
+		disallowed = 1;
+		return;
+	}
+	if (l2ison || configed)
+		return;
+	l2cache = &l2cacheimpl;
+	cachedwb();
+
+	/*
+	 * default config is:
+	 * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
+	 * but the tegra 2 manual says there's 1MB available.
+	 * ways or way-size may be fixed by hardware; the only way to tell
+	 * is to try to change the setting and read it back.
+	 */
+	l2pl310sync();
+	l2cache->inv();
+
+	/* figure out number of ways */
+	l2pl310sync();
+	nways = NWAYS(l2p);
+	if (!(l2p->auxctl & Assoc16way)) {
+		l2p->auxctl |= Assoc16way;
+		coherence();
+		l2pl310sync();
+		nways = NWAYS(l2p);
+//		iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
+	}
+	waysmask = MASK(nways);
+
+	/* figure out way size (and thus number of sets) */
+	waysz = L2size / nways;
+	new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
+		(log2(waysz / Wayszgran) + 1) << Waycfgshift;
+	l2p->auxctl = new;
+	coherence();
+	l2pl310sync();
+	l2cache->inv();
+
+//	iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
+//		waysz / CACHELINESZ, waysz);
+	if (l2p->auxctl != new)
+		iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
+			new, l2p->auxctl);
+	configed++;
+}
+
+void
+l2pl310info(Memcache *cp)
+{
+	int pow2;
+	ulong waysz;
+	L2pl310 *l2p = L2P;
+
+	memset(cp, 0, sizeof *cp);
+	if (!l2ison)
+		return;
+
+	l2pl310init();
+	assert((l2p->id >> 24) == 'A');
+	cp->level = 2;
+	cp->type = Unified;
+	cp->external = Extcache;
+	cp->setsways = Cara | Cawa | Cawt | Cawb;
+	cp->l1ip = 3<<14;				/* PIPT */
+	cp->setsh = cp->waysh = 0;			/* bag on the side */
+
+	cp->linelen = CACHELINESZ;
+	cp->log2linelen = log2(CACHELINESZ);
+
+	cp->nways = NWAYS(l2p);
+	pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
+	if (pow2 < 0)
+		pow2 = 0;
+	waysz = (1 << pow2) * Wayszgran;
+	cp->nsets = waysz / CACHELINESZ;
+}
+
+void
+l2pl310on(void)
+{
+	ulong ctl;
+	L2pl310 *l2p = L2P;
+
+	if (getconf("*l2off") != nil) {
+//		iprint("l2 cache (pl310) disabled\n");
+		disallowed = 1;
+		return;
+	}
+	if (l2ison)
+		return;
+
+	l2pl310init();
+	l2cache->inv();
+
+	/*
+	 * drain l1.  can't turn it off (which would make locks not work)
+	 * because doing so makes references below to the l2 registers wedge
+	 * the system.
+	 */
+	cacheuwbinv();
+	cacheiinv();
+
+	/*
+	 * this is only called once, on cpu0 at startup,
+	 * so we don't need locks here.
+	 * must do all configuration before enabling l2 cache.
+	 */
+	l2p->filtend = 0;
+	coherence();
+	l2p->filtstart = 0;		/* no enable bit */
+	l2p->debug = 0;			/* write-back, line fills allowed */
+	coherence();
+
+	ctl = l2p->auxctl;
+	/* don't change number of sets & ways, but reset all else. */
+	ctl &= Waycfgmask << Waycfgshift | Assoc16way;
+	ctl |= Sharovr;		/* actually work correctly for a change */
+	ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
+	l2p->auxctl = ctl;
+	coherence();
+
+	l2p->ctl |= L2enable;
+	coherence();
+
+	l2ison = 1;
+
+//	iprint("l2 cache (pl310) now on\n");
+}
+
+void
+l2pl310off(void)
+{
+	if (!l2ison)
+		return;
+	l2cache->wbinv();
+	getlock();
+	L2P->ctl &= ~L2enable;
+	coherence();
+	l2ison = 0;
+	iunlock(&l2lock);
+}
+
+
+static void
+applyrange(ulong *reg, void *ava, int len)
+{
+	uintptr va, endva;
+
+	if (disallowed || !l2ison)
+		return;
+	if (len < 0)
+		panic("l2cache*se called with negative length");
+	endva = (uintptr)ava + len;
+	for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
+	     va += CACHELINESZ)
+		*reg = PADDR(va);
+	l2pl310sync();
+}
+
+void
+l2pl310invse(void *va, int bytes)
+{
+	uintptr start, end;
+	L2pl310 *l2p = L2P;
+
+	/*
+	 * if start & end addresses are not on cache-line boundaries,
+	 * flush first & last cachelines before invalidating.
+	 */
+	start = (uintptr)va;
+	end = start + bytes;
+	getlock();
+	if (start % CACHELINESZ != 0) {
+//		iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
+//			getcallerpc(&va));
+		applyrange(&l2p->clean.pa, va, 1);
+	}
+	if (end % CACHELINESZ != 0) {
+//		iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
+//			getcallerpc(&va));
+		applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
+	}
+
+	applyrange(&l2p->inv.pa, va, bytes);
+	iunlock(&l2lock);
+}
+
+void
+l2pl310wbse(void *va, int bytes)
+{
+	getlock();
+	applyrange(&L2P->clean.pa, va, bytes);
+	iunlock(&l2lock);
+}
+
+/*
+ * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
+ * so don't manipulate locks between setting and clearing Wt.
+ */
+void
+l2pl310wbinvse(void *va, int bytes)
+{
+	int odb;
+	L2pl310 *l2p = L2P;
+
+	if (!l2ison)
+		return;
+	getlock();
+	applyrange(&l2p->clean.pa, va, bytes);	/* paranoia */
+
+	odb = l2p->debug;
+	l2p->debug |= Wt | Nolinefill;		/* erratum workaround */
+	coherence();
+
+	applyrange(&l2p->cleaninv.pa, va, bytes);
+
+	l2p->debug = odb;
+	iunlock(&l2lock);
+}
+
+
+/*
+ * we want to wait for completion at normal PL.
+ * if waiting is interrupted, interrupt code that calls
+ * these ops could deadlock on a uniprocessor, so we only
+ * give up l2lock before waiting on multiprocessors.
+ * in this port, only cpu 0 gets interrupts other than local timer ones.
+ */
+
+void
+l2pl310inv(void)
+{
+	L2pl310 *l2p = L2P;
+
+	if (disallowed)
+		return;
+
+	getlock();
+	bg_op_running = 1;
+	l2p->inv.way = waysmask;
+	coherence();
+	if (conf.nmach > 1)
+		iunlock(&l2lock);
+
+	while (l2p->inv.way & waysmask)
+		;
+
+	if (conf.nmach > 1)
+		ilock(&l2lock);
+	l2pl310sync();
+	bg_op_running = 0;
+	iunlock(&l2lock);
+}
+
+/*
+ * maximum time seen is 2542µs, typical is 625µs.
+ */
+void
+l2pl310wb(void)
+{
+	L2pl310 *l2p = L2P;
+
+	if (disallowed || !l2ison)
+		return;
+
+	getlock();
+	bg_op_running = 1;
+	l2p->clean.way = waysmask;
+	coherence();
+	if (conf.nmach > 1)
+		iunlock(&l2lock);
+
+	while (l2p->clean.way & waysmask)
+		;
+
+	if (conf.nmach > 1)
+		ilock(&l2lock);
+	l2pl310sync();
+	bg_op_running = 0;
+	iunlock(&l2lock);
+}
+
+void
+l2pl310wbinv(void)
+{
+	int odb;
+	L2pl310 *l2p = L2P;
+
+	if (disallowed || !l2ison)
+		return;
+
+	l2pl310wb();			/* paranoia */
+
+	getlock();
+	bg_op_running = 1;
+	odb = l2p->debug;
+	l2p->debug |= Wt | Nolinefill;	/* erratum workaround */
+	coherence();
+
+	l2p->cleaninv.way = waysmask;
+	coherence();
+	if (conf.nmach > 1)
+		iunlock(&l2lock);
+
+	while (l2p->cleaninv.way & waysmask)
+		;
+
+	if (conf.nmach > 1)
+		ilock(&l2lock);
+	l2pl310sync();
+	l2p->debug = odb;
+	bg_op_running = 0;
+	iunlock(&l2lock);
+}
+
+static Cacheimpl l2cacheimpl = {
+	.info	= l2pl310info,
+	.on	= l2pl310on,
+	.off	= l2pl310off,
+
+	.inv	= l2pl310inv,
+	.wb	= l2pl310wb,
+	.wbinv	= l2pl310wbinv,
+
+	.invse	= l2pl310invse,
+	.wbse	= l2pl310wbse,
+	.wbinvse= l2pl310wbinvse,
+};

+ 240 - 0
sys/src/9/teg2/cache.v7.s

@@ -0,0 +1,240 @@
+/*
+ * cortex arm arch v7 cache flushing and invalidation
+ * included by l.s and rebootcode.s
+ */
+
+TEXT cacheiinv(SB), $-4				/* I invalidate */
+	MOVW	$0, R0
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */
+	ISB
+	RET
+
+/*
+ * set/way operators, passed a suitable set/way value in R0.
+ */
+TEXT cachedwb_sw(SB), $-4
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi
+	RET
+
+TEXT cachedwbinv_sw(SB), $-4
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi
+	RET
+
+TEXT cachedinv_sw(SB), $-4
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi
+	RET
+
+	/* set cache size select */
+TEXT setcachelvl(SB), $-4
+	MTCP	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0
+	ISB
+	RET
+
+	/* return cache sizes */
+TEXT getwayssets(SB), $-4
+	MFCP	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0
+	RET
+
+/*
+ * l1 cache operations.
+ * l1 and l2 ops are intended to be called from C, thus need save no
+ * caller's regs, only those we need to preserve across calls.
+ */
+
+TEXT cachedwb(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	$cachedwb_sw(SB), R0
+	MOVW	$1, R8
+	BL	wholecache(SB)
+	MOVW.P	8(R13), R15
+
+TEXT cachedwbinv(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	$cachedwbinv_sw(SB), R0
+	MOVW	$1, R8
+	BL	wholecache(SB)
+	MOVW.P	8(R13), R15
+
+TEXT cachedinv(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	$cachedinv_sw(SB), R0
+	MOVW	$1, R8
+	BL	wholecache(SB)
+	MOVW.P	8(R13), R15
+
+TEXT cacheuwbinv(SB), $-4
+	MOVM.DB.W [R14], (R13)	/* save lr on stack */
+	MOVW	CPSR, R1
+	CPSID			/* splhi */
+
+	MOVM.DB.W [R1], (R13)	/* save R1 on stack */
+
+	BL	cachedwbinv(SB)
+	BL	cacheiinv(SB)
+
+	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
+	MOVW	R1, CPSR
+	MOVM.IA.W (R13), [R14]	/* restore lr */
+	RET
+
+/*
+ * architectural l2 cache operations
+ */
+
+TEXT _l2cacheuwb(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	$cachedwb_sw(SB), R0
+	MOVW	$2, R8
+	BL	wholecache(SB)
+	MOVW.P	8(R13), R15	/* return */
+
+TEXT _l2cacheuwbinv(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	CPSR, R1
+	CPSID			/* splhi */
+
+	MOVM.DB.W [R1], (R13)	/* save R1 on stack */
+
+	MOVW	$cachedwbinv_sw(SB), R0
+	MOVW	$2, R8
+	BL	wholecache(SB)
+
+	BL	_l2cacheuinv(SB)
+
+	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
+	MOVW	R1, CPSR
+	MOVW.P	8(R13), R15	/* return */
+
+TEXT _l2cacheuinv(SB), $-4
+	MOVW.W	R14, -8(R13)
+	MOVW	$cachedinv_sw(SB), R0
+	MOVW	$2, R8
+	BL	wholecache(SB)
+	MOVW.P	8(R13), R15	/* return */
+
+/*
+ * callers are assumed to be the above l1 and l2 ops.
+ * R0 is the function to call in the innermost loop.
+ * R8 is the cache level (1-origin: 1 or 2).
+ *
+ * R0	func to call at entry
+ * R1	func to call after entry
+ * R2	nsets
+ * R3	way shift (computed from R8)
+ * R4	set shift (computed from R8)
+ * R5	nways
+ * R6	set scratch
+ * R7	way scratch
+ * R8	cache level, 0-origin
+ * R9	extern reg up
+ * R10	extern reg m
+ *
+ * initial translation by 5c, then massaged by hand.
+ */
+TEXT wholecache+0(SB), $-4
+	MOVW	CPSR, R2
+	MOVM.DB.W [R2,R14], (SP) /* save regs on stack */
+
+	MOVW	R0, R1		/* save argument for inner loop in R1 */
+	SUB	$1, R8		/* convert cache level to zero origin */
+
+	/* we might not have the MMU on yet, so map R1 (func) to R14's space */
+	MOVW	R14, R0		/* get R14's segment ... */
+	AND	$KSEGM, R0
+	BIC	$KSEGM,	R1	/* strip segment from func address */
+	ORR	R0, R1		/* combine them */
+
+	/* get cache sizes */
+	SLL	$1, R8, R0	/* R0 = (cache - 1) << 1 */
+	MTCP	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache select */
+	ISB
+	MFCP	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */
+
+	/* compute # of ways and sets for this cache level */
+	SRA	$3, R0, R5	/* R5 (ways) = R0 >> 3 */
+	AND	$((1<<10)-1), R5 /* R5 = (R0 >> 3) & MASK(10) */
+	ADD	$1, R5		/* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */
+
+	SRA	$13, R0, R2	/* R2 = R0 >> 13 */
+	AND	$((1<<15)-1), R2 /* R2 = (R0 >> 13) & MASK(15) */
+	ADD	$1, R2		/* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */
+
+	/* precompute set/way shifts for inner loop */
+	MOVW	$(CACHECONF+0), R3	/* +0 = l1waysh */
+	MOVW	$(CACHECONF+4), R4	/* +4 = l1setsh */
+	CMP	$0, R8		/* cache == 1? */
+	ADD.NE	$(4*2), R3	/* no, assume l2: +8 = l2waysh */
+	ADD.NE	$(4*2), R3	/* +12 = l2setsh */
+
+	MOVW	R14, R0		/* get R14's segment ... */
+	AND	$KSEGM, R0
+
+	BIC	$KSEGM,	R3	/* strip segment from address */
+	ORR	R0, R3		/* combine them */
+	BIC	$KSEGM,	R4	/* strip segment from address */
+	ORR	R0, R4		/* combine them */
+	MOVW	(R3), R3
+	MOVW	(R4), R4
+
+	CMP	$0, R3		/* sanity checks */
+	BEQ	wbuggery
+	CMP	$0, R4
+	BEQ	sbuggery
+
+	CPSID			/* splhi to make entire op atomic */
+	BARRIERS
+
+	/* iterate over ways */
+	MOVW	$0, R7		/* R7: way */
+outer:
+	/* iterate over sets */
+	MOVW	$0, R6		/* R6: set */
+inner:
+	/* compute set/way register contents */
+	SLL	R3, R7, R0 	/* R0 = way << R3 (L?WAYSH) */
+	ORR	R8<<1, R0	/* R0 = way << L?WAYSH | (cache - 1) << 1 */
+	ORR	R6<<R4, R0 	/* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */
+
+	BL	(R1)		/* call set/way operation with R0 arg. */
+
+	ADD	$1, R6		/* set++ */
+	CMP	R2, R6		/* set >= sets? */
+	BLT	inner		/* no, do next set */
+
+	ADD	$1, R7		/* way++ */
+	CMP	R5, R7		/* way >= ways? */
+	BLT	outer		/* no, do next way */
+
+	MOVM.IA.W (SP), [R2,R14] /* restore regs */
+	BARRIERS
+	MOVW	R2, CPSR	/* splx */
+
+	RET
+
+wbuggery:
+	PUTC('?')
+	PUTC('c')
+	PUTC('w')
+	B	topanic
+sbuggery:
+	PUTC('?')
+	PUTC('c')
+	PUTC('s')
+topanic:
+	MOVW	$.string<>+0(SB), R0
+	BIC	$KSEGM,	R0	/* strip segment from address */
+	MOVW	R14, R1		/* get R14's segment ... */
+	AND	$KSEGM, R1
+	ORR	R1, R0		/* combine them */
+	SUB	$12, R13	/* not that it matters, since we're panicing */
+	MOVW	R14, 8(R13)
+	BL	panic(SB)	/* panic("msg %#p", LR) */
+bugloop:
+	WFI
+	B	bugloop
+
+	DATA	.string<>+0(SB)/8,$"bad cach"
+	DATA	.string<>+8(SB)/8,$"e params"
+	DATA	.string<>+16(SB)/8,$"\073 pc %\043p"
+	DATA	.string<>+24(SB)/1,$"\z"
+	GLOBL	.string<>+0(SB),$25

+ 106 - 0
sys/src/9/teg2/caches-v7.c

@@ -0,0 +1,106 @@
+/*
+ * caches defined by arm v7 architecture
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+static char *
+l1iptype(uint type)
+{
+	static char *types[] = {
+		"reserved",
+		"asid-tagged VIVT",
+		"VIPT",
+		"PIPT",
+	};
+
+	if (type >= nelem(types) || types[type] == nil)
+		return "GOK";
+	return types[type];
+}
+
+static char *catype[] = {
+	"none,",
+	"i,",
+	"d,",
+	"split i&d,",
+	"unified,",
+	"gok,",
+	"gok,",
+	"gok,",
+};
+
+void
+cacheinfo(int level, Memcache *cp, int ext, int type)
+{
+	ulong setsways;
+
+	memset(cp, 0, sizeof *cp);
+	if (type == Nocache)
+		return;
+	cp->level = level;
+	cp->type = type;
+	cp->external = ext;
+	if (level == 2) {			/* external PL310 */
+		allcache->info(cp);
+		setsways = cp->setsways;
+	} else {
+		/* select internal cache level */
+		cpwrsc(CpIDcssel, CpID, CpIDid, 0, (level - 1) << 1);
+
+		setsways = cprdsc(CpIDcsize, CpID, CpIDid, 0);
+		cp->l1ip = cpctget();
+		cp->nways = ((setsways >> 3)  & MASK(10)) + 1;
+		cp->nsets = ((setsways >> 13) & MASK(15)) + 1;
+		cp->log2linelen = (setsways & MASK(2)) + 2 + 2;
+	}
+	cp->linelen = 1 << cp->log2linelen;
+	cp->setsways = setsways;
+	cp->setsh = cp->log2linelen;
+	cp->waysh = 32 - log2(cp->nways);
+}
+
+void
+allcacheinfo(Memcache *mc)
+{
+	int n;
+	ulong lvl;
+
+	lvl = cprdsc(CpIDcsize, CpID, CpIDidct, CpIDclvlid);
+	n = 1;
+	for (lvl &= MASK(21); lvl; lvl >>= 3)
+		cacheinfo(n, &mc[n], Intcache, lvl & MASK(3));
+//	cacheinfo(2, &mc[2], Extcache, Unified);		/* PL310 */
+}
+
+void
+prcachecfg(void)
+{
+	int cache;
+	Memcache *mc;
+
+	for (cache = 1; cache < 8 && cachel[cache].type; cache++) {
+		mc = &cachel[cache];
+		iprint("l%d: %s %-10s %2d ways %4d sets %d bytes/line; can W[",
+			mc->level, mc->external? "ext": "int", catype[mc->type],
+			mc->nways, mc->nsets, mc->linelen);
+		if (mc->linelen != CACHELINESZ)
+			iprint(" *should* be %d", CACHELINESZ);
+		if (mc->setsways & Cawt)
+			iprint("T");
+		if (mc->setsways & Cawb)
+			iprint("B");
+		if (mc->setsways & Cawa)
+			iprint("A");
+		iprint("]");
+		if (cache == 1)
+			iprint("; l1-i %s", l1iptype((mc->l1ip >> 14) & MASK(2)));
+		iprint("\n");
+	}
+}

+ 198 - 0
sys/src/9/teg2/caches.c

@@ -0,0 +1,198 @@
+/*
+ * operations on all memory data or unified caches, a no-op cache,
+ * and an l1-only cache ops cache.
+ * i-caches are not handled here.
+ *
+ * there are only three cache operations that we care about:
+ * force cache contents to memory (before dma out or shutdown),
+ * ignore cache contents in favour of memory (initialisation, after dma in),
+ * both (update page tables and force cpu to read new contents).
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+
+static Cacheimpl allcaches, nullcaches, l1caches;
+
+void
+cachesinfo(Memcache *cp)
+{
+	memset(cp, 0, sizeof *cp);
+	cp->setsways = Cara | Cawa | Cawt | Cawb;
+	cp->l1ip = 3<<14;				/* PIPT */
+	cp->log2linelen = log2(CACHELINESZ);
+}
+
+void
+allcacheson(void)
+{
+	l2pl310init();
+	allcache = &allcaches;
+	nocache = &nullcaches;
+	l1cache = &l1caches;
+}
+
+void
+cachesoff(void)
+{
+	l2cache->off();
+}
+
+void
+cachesinvse(void *va, int bytes)
+{
+	int s;
+
+	s = splhi();
+	l2cache->invse(va, bytes);
+	cachedinvse(va, bytes);
+	splx(s);
+}
+
+void
+cacheswbse(void *va, int bytes)
+{
+	int s;
+
+	s = splhi();
+	cachedwbse(va, bytes);
+	l2cache->wbse(va, bytes);
+	splx(s);
+}
+
+void
+cacheswbinvse(void *va, int bytes)
+{
+	int s;
+
+	s = splhi();
+	cachedwbse(va, bytes);
+	l2cache->wbinvse(va, bytes);
+	cachedwbinvse(va, bytes);
+	splx(s);
+}
+
+
+void
+cachesinv(void)
+{
+	int s;
+
+	s = splhi();
+	l2cache->inv();
+	cachedinv();
+	splx(s);
+}
+
+void
+cacheswb(void)
+{
+	int s;
+
+	s = splhi();
+	cachedwb();
+	l2cache->wb();
+	splx(s);
+}
+
+void
+cacheswbinv(void)
+{
+	int s;
+
+	s = splhi();
+	cachedwb();
+	l2cache->wbinv();
+	cachedwbinv();
+	splx(s);
+}
+
+static Cacheimpl allcaches = {
+	.info	= cachesinfo,
+	.on	= allcacheson,
+	.off	= cachesoff,
+
+	.inv	= cachesinv,
+	.wb	= cacheswb,
+	.wbinv	= cacheswbinv,
+
+	.invse	= cachesinvse,
+	.wbse	= cacheswbse,
+	.wbinvse= cacheswbinvse,
+};
+
+
+/*
+ * null cache ops
+ */
+
+void
+nullinfo(Memcache *cp)
+{
+	memset(cp, 0, sizeof *cp);
+	cp->log2linelen = 2;
+}
+
+void
+nullon(void)
+{
+	nocache = &nullcaches;
+}
+
+void
+nullop(void)
+{
+}
+
+void
+nullse(void *, int)
+{
+}
+
+static Cacheimpl nullcaches = {
+	.info	= nullinfo,
+	.on	= nullon,
+	.off	= nullop,
+
+	.inv	= nullop,
+	.wb	= nullop,
+	.wbinv	= nullop,
+
+	.invse	= nullse,
+	.wbse	= nullse,
+	.wbinvse= nullse,
+};
+
+/*
+ * l1-only ops
+ */
+
+void
+l1cachesinfo(Memcache *)
+{
+}
+
+void
+l1cacheson(void)
+{
+	l1cache = &l1caches;
+}
+
+static Cacheimpl l1caches = {
+	.info	= l1cachesinfo,
+	.on	= l1cacheson,
+	.off	= nullop,
+
+	.inv	= cachedinv,
+	.wb	= cachedwb,
+	.wbinv	= cachedwbinv,
+
+	.invse	= cachedinvse,
+	.wbse	= cachedwbse,
+	.wbinvse= cachedwbinvse,
+};

+ 138 - 0
sys/src/9/teg2/clock-tegra.c

@@ -0,0 +1,138 @@
+/*
+ * tegra 2 SoC clocks; excludes cortex-a timers.
+ *
+ * SoC provides these shared clocks:
+ * 4 29-bit count-down `timers' @ 1MHz,
+ * 1 32-bit count-up time-stamp counter @ 1MHz,
+ * and a real-time clock @ 32KHz.
+ * the tegra watchdog (tegra 2 ref man §5.4.1) is tied to timers, not rtc.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "arm.h"
+
+typedef struct Shrdtmr Shrdtmr;
+typedef struct µscnt µscnt;
+
+/* tegra2 shared-intr timer registers */
+struct Shrdtmr {		/* 29-bit count-down timer (4); unused */
+	ulong	trigger;
+	ulong	prescnt;
+};
+
+enum {
+	/* trigger bits */
+	Enable =	1u<<31,
+	Periodintr =	1<<30,
+	Countmask =	MASK(29),
+
+	/* prescnt bits */
+	Intrclr =	1<<30,
+	/* Countmask is ro */
+};
+
+struct µscnt {		/* tegra2 shared 32-bit count-up µs counter (1) */
+	ulong	cntr;
+	/*
+	 * oscillator clock fraction - 1; initially 0xb (11) from u-boot
+	 * for 12MHz periphclk.
+	 */
+	ulong	cfg;
+	uchar	_pad0[0x3c - 0x8];
+	ulong	freeze;
+};
+
+enum {
+	/* cfg bits */
+	Dividendshift =	8,
+	Dividendmask =	MASK(8),
+	Divisorshift =	0,
+	Divisormask =	MASK(8),
+};
+
+void
+tegclockintr(void)
+{
+	int junk;
+	Shrdtmr *tmr;
+
+	/* appease the tegra dog */
+	tmr = (Shrdtmr *)soc.tmr[0];
+	junk = tmr->trigger;
+	USED(junk);
+}
+
+/*
+ * if on cpu0, shutdown the shared tegra2 watchdog timer.
+ */
+void
+tegclockshutdown(void)
+{
+	Shrdtmr *tmr;
+
+	if (m->machno == 0) {
+		tmr = (Shrdtmr *)soc.tmr[0];
+		tmr->prescnt = tmr->trigger = 0;
+		coherence();
+	}
+}
+
+void
+tegwdogintr(Ureg *, void *v)
+{
+	int junk;
+	Shrdtmr *tmr;
+
+	tmr = (Shrdtmr *)v;
+	tmr->prescnt |= Intrclr;
+	coherence();
+	/* the lousy documentation says we also have to read trigger */
+	junk = tmr->trigger;
+	USED(junk);
+}
+
+/* start tegra2 shared watch dog */
+void
+tegclock0init(void)
+{
+	Shrdtmr *tmr;
+
+	tmr = (Shrdtmr *)soc.tmr[0];
+	irqenable(Tn0irq, tegwdogintr, tmr, "tegra watchdog");
+
+	/*
+	 * tegra watchdog only fires on the second missed interrupt, thus /2.
+	 */
+	tmr->trigger = (Dogsectimeout * Mhz / 2 - 1) | Periodintr | Enable;
+	coherence();
+}
+
+/*
+ * µscnt is a freerunning timer (cycle counter); it needs no
+ * initialisation, wraps and does not dispatch interrupts.
+ */
+void
+tegclockinit(void)
+{
+	ulong old;
+	µscnt *µs = (µscnt *)soc.µs;
+
+	/* verify µs counter sanity */
+	assert(µs->cfg == 0xb);			/* set by u-boot */
+	old = µs->cntr;
+	delay(1);
+	assert(old != µs->cntr);
+}
+
+ulong
+perfticks(void)			/* MHz rate, assumed by timing loops */
+{
+	ulong v;
+
+	/* keep it non-zero to prevent m->fastclock ever going to zero. */
+	v = ((µscnt *)soc.µs)->cntr;
+	return v == 0? 1: v;
+}

+ 623 - 0
sys/src/9/teg2/clock.c

@@ -0,0 +1,623 @@
+/*
+ * cortex-a clocks; excludes tegra 2 SoC clocks
+ *
+ * cortex-a processors include private `global' and local timers
+ * at soc.scu + 0x200 (global) and + 0x600 (local).
+ * the global timer is a single count-up timer shared by all cores
+ * but with per-cpu comparator and auto-increment registers.
+ * a local count-down timer can be used as a watchdog.
+ *
+ * v7 arch provides a 32-bit count-up cycle counter (at about 1GHz in our case)
+ * but it's unsuitable as our source of fastticks, because it stops advancing
+ * when the cpu is suspended by WFI.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "arm.h"
+
+enum {
+	Debug		= 0,
+
+	Basetickfreq	= Mhz,			/* soc.µs rate in Hz */
+	/* the local timers seem to run at half the expected rate */
+	Clockfreqbase	= 250*Mhz / 2,	/* private timer rate (PERIPHCLK/2) */
+	Tcycles		= Clockfreqbase / HZ,	/* cycles per clock tick */
+
+	MinPeriod	= Tcycles / 100,
+	MaxPeriod	= Tcycles,
+
+	Dogtimeout	= Dogsectimeout * Clockfreqbase,
+};
+
+typedef struct Ltimer Ltimer;
+typedef struct Pglbtmr Pglbtmr;
+typedef struct Ploctmr Ploctmr;
+
+/*
+ * cortex-a private-intr local timer registers.  all cpus see their
+ * own local timers at the same base address.
+ */
+struct Ltimer {
+	ulong	load;		/* new value + 1 */
+	ulong	cnt;		/* counts down */
+	ulong	ctl;
+	ulong	isr;
+
+	/* watchdog only */
+	ulong	wdrst;
+	ulong	wddis;		/* wo */
+
+	ulong	_pad0[2];
+};
+struct Ploctmr {
+	Ltimer	loc;
+	Ltimer	wd;
+};
+
+enum {
+	/* ctl bits */
+	Tmrena	= 1<<0,		/* timer enabled */
+	Wdogena = Tmrena,	/* watchdog enabled */
+	Xreload	= 1<<1,		/* reload on intr; periodic interrupts */
+	Tintena	= 1<<2,		/* enable irq 29 at cnt==0 (30 for watchdog) */
+	Wdog	= 1<<3,		/* watchdog, not timer, mode */
+	Xsclrshift = 8,
+	Xsclrmask = MASK(8),
+
+	/* isr bits */
+	Xisrclk	= 1<<0,		/* write to clear */
+
+	/* wdrst bits */
+	Wdrst	= 1<<0,
+
+	/* wddis values */
+	Wdon	= 1,
+	Wdoff1	= 0x12345678,	/* send these two to switch to timer mode */
+	Wdoff2	= 0x87654321,
+};
+
+/* cortex-a private-intr globl timer registers */
+struct Pglbtmr {
+	ulong	cnt[2];		/* counts up; little-endian uvlong */
+	ulong	ctl;
+	ulong	isr;
+	ulong	cmp[2];		/* little-endian uvlong */
+	ulong	inc;
+};
+
+enum {
+	/* unique ctl bits (otherwise see X* above) */
+	Gcmp	= 1<<1,
+//	Gtintena= 1<<2,		/* enable irq 27 */
+	Gincr	= 1<<3,
+};
+
+/*
+ * until 5[cal] inline vlong ops, avoid them where possible,
+ * they are currently slow function calls.
+ */
+typedef union Counter Counter;
+union Counter {
+	uvlong	uvl;
+	struct {			/* little-endian */
+		ulong	low;
+		ulong	high;
+	};
+};
+
+static int fired;
+static int ticking[MAXMACH];
+
+/* no lock is needed to update our local timer.  splhi keeps it tight. */
+static void
+setltimer(Ltimer *tn, ulong ticks)
+{
+	int s;
+
+	assert(ticks <= Clockfreqbase);
+	s = splhi();
+	tn->load = ticks - 1;
+	coherence();
+	tn->ctl = Tmrena | Tintena | Xreload;
+	coherence();
+	splx(s);
+}
+
+static void
+ckstuck(int cpu, long myticks, long histicks)
+{
+	if (labs(histicks - myticks) > HZ) {
+//		iprint("cpu%d: clock ticks %ld (vs myticks %ld cpu0 %ld); "
+//			"apparently stopped\n",
+//			cpu, histicks, myticks, MACHP(0)->ticks);
+		if (!ticking[cpu])
+			panic("cpu%d: clock not interrupting", cpu);
+	}
+}
+
+static void
+mpclocksanity(void)
+{
+	int cpu, mycpu;
+	long myticks, histicks;
+
+	if (conf.nmach <= 1 || active.exiting || navailcpus == 0)
+		return;
+
+	mycpu = m->machno;
+	myticks = m->ticks;
+	if (myticks == HZ)
+		ticking[mycpu] = 1;
+
+	if (myticks < 5*HZ)
+		return;
+
+	for (cpu = 0; cpu < navailcpus; cpu++) {
+		if (cpu == mycpu)
+			continue;
+		histicks = MACHP(cpu)->ticks;
+		if (myticks == 5*HZ || histicks > 1)
+			ckstuck(cpu, myticks, histicks);
+	}
+}
+
+static void
+clockintr(Ureg* ureg, void *arg)
+{
+	Ltimer *wd, *tn;
+	Ploctmr *lt;
+
+	lt = (Ploctmr *)arg;
+	tn = &lt->loc;
+	tn->isr = Xisrclk;
+	coherence();
+
+	timerintr(ureg, 0);
+
+#ifdef watchdog_not_bloody_useless
+	/* appease the dogs */
+	wd = &lt->wd;
+	if (wd->cnt == 0 &&
+	    (wd->ctl & (Wdog | Wdogena | Tintena)) == (Wdog | Wdogena))
+		panic("cpu%d: zero watchdog count but no system reset",
+			m->machno);
+	wd->load = Dogtimeout - 1;
+	coherence();
+#endif
+	SET(wd); USED(wd);
+	tegclockintr();
+
+	mpclocksanity();
+}
+
+void
+clockprod(Ureg *ureg)
+{
+	Ltimer *tn;
+
+	timerintr(ureg, 0);
+	tegclockintr();
+	if (m->machno != 0) {		/* cpu1 gets stuck */
+		tn = &((Ploctmr *)soc.loctmr)->loc;
+		setltimer(tn, Tcycles);
+	}
+}
+
+static void
+clockreset(Ltimer *tn)
+{
+	if (probeaddr((uintptr)tn) < 0)
+		panic("no clock at %#p", tn);
+	tn->ctl = 0;
+	coherence();
+}
+
+void
+watchdogoff(Ltimer *wd)
+{
+	wd->ctl &= ~Wdogena;
+	coherence();
+	wd->wddis = Wdoff1;
+	coherence();
+	wd->wddis = Wdoff2;
+	coherence();
+}
+
+/* clear any pending watchdog intrs or causes */
+void
+wdogclrintr(Ltimer *wd)
+{
+#ifdef watchdog_not_bloody_useless
+	wd->isr = Xisrclk;
+	coherence();
+	wd->wdrst = Wdrst;
+	coherence();
+#endif
+	USED(wd);
+}
+
+/*
+ * stop clock interrupts on this cpu and disable the local watchdog timer,
+ * and, if on cpu0, shutdown the shared tegra2 watchdog timer.
+ */
+void
+clockshutdown(void)
+{
+	Ploctmr *lt;
+
+	lt = (Ploctmr *)soc.loctmr;
+	clockreset(&lt->loc);
+	watchdogoff(&lt->wd);
+
+	tegclockshutdown();
+}
+
+enum {
+	Instrs		= 10*Mhz,
+};
+
+/* we assume that perfticks are microseconds */
+static long
+issue1loop(void)
+{
+	register int i;
+	long st;
+
+	i = Instrs;
+	st = perfticks();
+	do {
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i;
+	} while(--i >= 0);
+	return perfticks() - st;
+}
+
+static long
+issue2loop(void)
+{
+	register int i, j;
+	long st;
+
+	i = Instrs / 2;			/* j gets half the decrements */
+	j = 0;
+	st = perfticks();
+	do {
+		     --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+		--i; --j; --i; --j; --i; --j; --i; --j; --i; --j;
+	} while(--i >= 0);
+	return perfticks() - st;
+}
+
+/* estimate instructions/s. */
+static void
+guessmips(long (*loop)(void), char *lab)
+{
+	int s;
+	long tcks;
+
+	do {
+		s = splhi();
+		tcks = loop();
+		splx(s);
+		if (tcks < 0)
+			iprint("again...");
+	} while (tcks < 0);
+	/*
+	 * Instrs instructions took tcks ticks @ Basetickfreq Hz.
+	 * round the result.
+	 */
+	s = (((vlong)Basetickfreq * Instrs) / tcks + 500000) / 1000000;
+	if (Debug)
+		iprint("%ud mips (%s-issue)", s, lab);
+	USED(s);
+}
+
+void
+wdogintr(Ureg *, void *ltmr)
+{
+#ifdef watchdog_not_bloody_useless
+	Ltimer *wd;
+
+	wd = ltmr;
+	fired++;
+	wdogclrintr(wd);
+#endif
+	USED(ltmr);
+}
+
+static void
+ckcounting(Ltimer *lt)
+{
+	ulong old;
+
+	old = lt->cnt;
+	if (old == lt->cnt)
+		delay(1);
+	if (old == lt->cnt)
+		panic("cpu%d: watchdog timer not counting down", m->machno);
+}
+
+/* test fire with interrupt to see that it's working */
+static void
+ckwatchdog(Ltimer *wd)
+{
+#ifdef watchdog_not_bloody_useless
+	int s;
+
+	fired = 0;
+	wd->load = Tcycles - 1;
+	coherence();
+	/* Tintena is supposed to be ignored in watchdog mode */
+	wd->ctl |= Wdogena | Tintena;
+	coherence();
+
+	ckcounting(wd);
+
+	s = spllo();
+	delay(2 * 1000/HZ);
+	splx(s);
+	if (!fired)
+		/* useless local watchdog */
+		iprint("cpu%d: local watchdog failed to interrupt\n", m->machno);
+	/* clean up */
+	wd->ctl &= ~Wdogena;
+	coherence();
+#endif
+	USED(wd);
+}
+
+static void
+startwatchdog(void)
+{
+#ifdef watchdog_not_bloody_useless
+	Ltimer *wd;
+	Ploctmr *lt;
+
+	lt = (Ploctmr *)soc.loctmr;
+	wd = &lt->wd;
+	watchdogoff(wd);
+	wdogclrintr(wd);
+	irqenable(Wdtmrirq, wdogintr, wd, "watchdog");
+
+	ckwatchdog(wd);
+
+	/* set up for normal use, causing reset */
+	wd->ctl &= ~Tintena;			/* reset, don't interrupt */
+	coherence();
+	wd->ctl |= Wdog;
+	coherence();
+	wd->load = Dogtimeout - 1;
+	coherence();
+	wd->ctl |= Wdogena;
+	coherence();
+
+	ckcounting(wd);
+#endif
+}
+
+static void
+clock0init(Ltimer *tn)
+{
+	int s;
+	ulong old, fticks;
+
+	/*
+	 * calibrate fastclock
+	 */
+	s = splhi();
+	tn->load = ~0ul >> 1;
+	coherence();
+	tn->ctl = Tmrena;
+	coherence();
+
+	old = perfticks();
+	fticks = tn->cnt;
+	delay(1);
+	fticks = abs(tn->cnt - fticks);
+	old = perfticks() - old;
+	splx(s);
+	if (Debug)
+		iprint("cpu%d: fastclock %ld/%ldµs = %ld fastticks/µs (MHz)\n",
+			m->machno, fticks, old, (fticks + old/2 - 1) / old);
+	USED(fticks, old);
+
+	if (Debug)
+		iprint("cpu%d: ", m->machno);
+	guessmips(issue1loop, "single");
+	if (Debug)
+		iprint(", ");
+	guessmips(issue2loop, "dual");
+	if (Debug)
+		iprint("\n");
+
+	/*
+	 * m->delayloop should be the number of delay loop iterations
+	 * needed to consume 1 ms.  2 is instr'ns in the delay loop.
+	 */
+	m->delayloop = m->cpuhz / (1000 * 2);
+//	iprint("cpu%d: m->delayloop = %lud\n", m->machno, m->delayloop);
+
+	tegclock0init();
+}
+
+/*
+ * the local timer is the interrupting timer and does not
+ * participate in measuring time.  It is initially set to HZ.
+ */
+void
+clockinit(void)
+{
+	ulong old;
+	Ltimer *tn;
+	Ploctmr *lt;
+
+	clockshutdown();
+
+	/* turn my cycle counter on */
+	cpwrsc(0, CpCLD, CpCLDena, CpCLDenacyc, 1<<31);
+
+	/* turn all my counters on and clear my cycle counter */
+	cpwrsc(0, CpCLD, CpCLDena, CpCLDenapmnc, 1<<2 | 1);
+
+	/* let users read my cycle counter directly */
+	cpwrsc(0, CpCLD, CpCLDuser, CpCLDenapmnc, 1);
+
+	/* verify µs counter sanity */
+	tegclockinit();
+
+	lt = (Ploctmr *)soc.loctmr;
+	tn = &lt->loc;
+	if (m->machno == 0)
+		irqenable(Loctmrirq, clockintr, lt, "clock");
+	else
+		intcunmask(Loctmrirq);
+
+	/*
+	 * verify sanity of local timer
+	 */
+	tn->load = Clockfreqbase / 1000;
+	tn->isr = Xisrclk;
+	coherence();
+	tn->ctl = Tmrena;
+	coherence();
+
+	old = tn->cnt;
+	delay(5);
+	/* m->ticks won't be incremented here because timersinit hasn't run. */
+	if (tn->cnt == old)
+		panic("cpu%d: clock not ticking at all", m->machno);
+	else if ((long)tn->cnt > 0)
+		panic("cpu%d: clock ticking slowly", m->machno);
+
+	if (m->machno == 0)
+		clock0init(tn);
+
+	/* if pci gets stuck, maybe one of the many watchdogs will nuke us. */
+	startwatchdog();
+
+	/*
+	 *  desynchronize the processor clocks so that they all don't
+	 *  try to resched at the same time.
+	 */
+	delay(m->machno*2);
+	setltimer(tn, Tcycles);
+}
+
+/* our fastticks are at 1MHz (Basetickfreq), so the conversion is trivial. */
+ulong
+µs(void)
+{
+	return fastticks2us(fastticks(nil));
+}
+
+/* Tval is supposed to be in fastticks units. */
+void
+timerset(Tval next)
+{
+	int s;
+	long offset;
+	Ltimer *tn;
+
+	tn = &((Ploctmr *)soc.loctmr)->loc;
+	s = splhi();
+	offset = fastticks2us(next - fastticks(nil));
+	/* offset is now in µs (MHz); convert to Clockfreqbase Hz. */
+	offset *= Clockfreqbase / Mhz;
+	if(offset < MinPeriod)
+		offset = MinPeriod;
+	else if(offset > MaxPeriod)
+		offset = MaxPeriod;
+
+	setltimer(tn, offset);
+	splx(s);
+}
+
+static ulong
+cpucycles(void)	/* cpu clock rate, except when waiting for intr (unused) */
+{
+	ulong v;
+
+	/* reads 32-bit cycle counter (counting up) */
+//	v = cprdsc(0, CpCLD, CpCLDcyc, 0);
+	v = getcyc();				/* fast asm */
+	/* keep it non-negative; prevent m->fastclock ever going to 0 */
+	return v == 0? 1: v;
+}
+
+long
+lcycles(void)
+{
+	return perfticks();
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+	vlong fastticks;
+	Counter now;
+
+	if(hz)
+		*hz = Basetickfreq;
+	/* avoid reentry on interrupt or trap, to prevent recursion */
+	ilock(&m->clklck);
+	fastticks = m->fastclock;
+	if (m->ticks > HZ/10 && fastticks == 0)
+		panic("fastticks: zero m->fastclock; ticks %lud fastclock %#llux",
+			m->ticks, fastticks);
+
+	now.uvl = fastticks;
+	now.low = perfticks();
+	if(now.uvl < fastticks)		/* low bits must have wrapped */
+		now.high++;
+	m->fastclock = now.uvl;
+	iunlock(&m->clklck);
+	return now.uvl;
+}
+
+void
+microdelay(int l)
+{
+	for (l = l * (vlong)m->delayloop / 1000; --l >= 0; )
+		;
+}
+
+void
+delay(int l)
+{
+	int i, d;
+
+	d = m->delayloop;
+	while(--l >= 0)
+		for (i = d; --i >= 0; )
+			;
+}

+ 200 - 0
sys/src/9/teg2/coproc.c

@@ -0,0 +1,200 @@
+/*
+ * arm co-processors
+ * mainly to cope with arm hard-wiring register numbers into instructions.
+ *
+ * CP15 (system control) is the one that gets used the most in practice.
+ * these routines must be callable from KZERO space or the 0 segment.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "arm.h"
+
+enum {
+	/* alternates:	0xe12fff1e	BX (R14); last e is R14 */
+	/*		0xe28ef000	B 0(R14); second e is R14 (ken) */
+	Retinst	= 0xe1a0f00e,		/* MOV R14, R15 */
+
+	Opmask	= MASK(3),
+	Regmask	= MASK(4),
+};
+
+typedef ulong (*Pufv)(void);
+typedef void  (*Pvfu)(ulong);
+
+static void
+setupcpop(ulong instr[2], ulong opcode, int cp, int op1, int crn, int crm,
+	int op2)
+{
+	ulong instrsz[2];
+
+	op1 &= Opmask;
+	op2 &= Opmask;
+	crn &= Regmask;
+	crm &= Regmask;
+	cp  &= Regmask;
+	instr[0] = opcode | op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm;
+	instr[1] = Retinst;
+
+	cachedwbse(instr, sizeof instrsz);
+	cacheiinv();
+}
+
+ulong
+cprd(int cp, int op1, int crn, int crm, int op2)
+{
+	int s, r;
+	volatile ulong instr[2];
+	Pufv fp;
+
+	s = splhi();
+	/*
+	 * MRC.  return value will be in R0, which is convenient.
+	 * Rt will be R0.
+	 */
+	setupcpop(instr, 0xee100010, cp, op1, crn, crm, op2);
+	fp = (Pufv)instr;
+	r = fp();
+	splx(s);
+	return r;
+}
+
+void
+cpwr(int cp, int op1, int crn, int crm, int op2, ulong val)
+{
+	int s;
+	volatile ulong instr[2];
+	Pvfu fp;
+
+	s = splhi();
+	setupcpop(instr, 0xee000010, cp, op1, crn, crm, op2); /* MCR, Rt is R0 */
+	fp = (Pvfu)instr;
+	fp(val);
+	coherence();
+	splx(s);
+}
+
+ulong
+cprdsc(int op1, int crn, int crm, int op2)
+{
+	return cprd(CpSC, op1, crn, crm, op2);
+}
+
+void
+cpwrsc(int op1, int crn, int crm, int op2, ulong val)
+{
+	cpwr(CpSC, op1, crn, crm, op2, val);
+}
+
+/* floating point */
+
+/* fp coproc control */
+static void
+setupfpctlop(ulong instr[2], int opcode, int fpctlreg)
+{
+	ulong instrsz[2];
+
+	fpctlreg &= Nfpctlregs - 1;
+	instr[0] = opcode | fpctlreg << 16 | 0 << 12 | CpFP << 8;
+	instr[1] = Retinst;
+
+	cachedwbse(instr, sizeof instrsz);
+	cacheiinv();
+}
+
+ulong
+fprd(int fpreg)
+{
+	int s, r;
+	volatile ulong instr[2];
+	Pufv fp;
+
+	if (!m->fpon) {
+		dumpstack();
+		panic("fprd: cpu%d fpu off", m->machno);
+	}
+	s = splhi();
+	/*
+	 * VMRS.  return value will be in R0, which is convenient.
+	 * Rt will be R0.
+	 */
+	setupfpctlop(instr, 0xeef00010, fpreg);
+	fp = (Pufv)instr;
+	r = fp();
+	splx(s);
+	return r;
+}
+
+void
+fpwr(int fpreg, ulong val)
+{
+	int s;
+	volatile ulong instr[2];
+	Pvfu fp;
+
+	/* fpu might be off and this VMSR might enable it */
+	s = splhi();
+	setupfpctlop(instr, 0xeee00010, fpreg);		/* VMSR, Rt is R0 */
+	fp = (Pvfu)instr;
+	fp(val);
+	coherence();
+	splx(s);
+}
+
+/* fp register access; don't bother with single precision */
+static void
+setupfpop(ulong instr[2], int opcode, int fpreg)
+{
+	ulong instrsz[2];
+
+	instr[0] = opcode | 0 << 16 | (fpreg & (16 - 1)) << 12;
+	if (fpreg >= 16)
+		instr[0] |= 1 << 22;		/* high bit of dfp reg # */
+	instr[1] = Retinst;
+
+	cachedwbse(instr, sizeof instrsz);
+	cacheiinv();
+}
+
+ulong
+fpsavereg(int fpreg, uvlong *fpp)
+{
+	int s, r;
+	volatile ulong instr[2];
+	ulong (*fp)(uvlong *);
+
+	if (!m->fpon)
+		panic("fpsavereg: cpu%d fpu off", m->machno);
+	s = splhi();
+	/*
+	 * VSTR.  pointer will be in R0, which is convenient.
+	 * Rt will be R0.
+	 */
+	setupfpop(instr, 0xed000000 | CpDFP << 8, fpreg);
+	fp = (ulong (*)(uvlong *))instr;
+	r = fp(fpp);
+	splx(s);
+	coherence();
+	return r;			/* not too meaningful */
+}
+
+void
+fprestreg(int fpreg, uvlong val)
+{
+	int s;
+	volatile ulong instr[2];
+	void (*fp)(uvlong *);
+
+	if (!m->fpon)
+		panic("fprestreg: cpu%d fpu off", m->machno);
+	s = splhi();
+	setupfpop(instr, 0xed100000 | CpDFP << 8, fpreg); /* VLDR, Rt is R0 */
+	fp = (void (*)(uvlong *))instr;
+	fp(&val);
+	coherence();
+	splx(s);
+}

+ 478 - 0
sys/src/9/teg2/dat.h

@@ -0,0 +1,478 @@
+/*
+ * Time.
+ *
+ * HZ should divide 1000 evenly, ideally.
+ * 100, 125, 200, 250 and 333 are okay.
+ */
+#define	HZ		100			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+enum {
+	Mhz	= 1000 * 1000,
+	Dogsectimeout = 4,		/* must be ≤ 34 s. to fit in a ulong */
+};
+
+/*
+ * More accurate time
+ */
+#define MS2TMR(t)	((ulong)(((uvlong)(t) * m->cpuhz)/1000))
+#define US2TMR(t)	((ulong)(((uvlong)(t) * m->cpuhz)/1000000))
+
+#define CONSOLE 0
+
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPsave	FPsave;
+typedef struct ISAConf	ISAConf;
+typedef struct Isolated Isolated;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct Lowmemcache Lowmemcache;
+typedef struct Memcache	Memcache;
+typedef struct MMMU	MMMU;
+typedef struct Mach	Mach;
+typedef u32int Mreg;				/* Msr - bloody UART */
+typedef struct Notsave	Notsave;
+typedef struct Page	Page;
+typedef struct Pcisiz Pcisiz;
+typedef struct Pcidev Pcidev;
+typedef struct PhysUart	PhysUart;
+typedef struct PMMU	PMMU;
+typedef struct Proc	Proc;
+typedef u32int		PTE;
+typedef struct Soc	Soc;
+typedef struct Uart	Uart;
+typedef struct Ureg	Ureg;
+typedef uvlong		Tval;
+
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+
+#define MAXSYSARG	5	/* for mount(fd, mpt, flag, arg, srv) */
+
+/*
+ *  parameters for sysproc.c
+ */
+#define AOUT_MAGIC	(E_MAGIC)
+
+struct Lock
+{
+	ulong	key;
+	u32int	sr;
+	uintptr	pc;
+	Proc*	p;
+	Mach*	m;
+	int	isilock;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+enum {
+	Maxfpregs	= 32,	/* could be 16 or 32, see Mach.fpnregs */
+	Nfpctlregs	= 16,
+};
+
+/*
+ * emulated or vfp3 floating point
+ */
+struct FPsave
+{
+	ulong	status;
+	ulong	control;
+	/*
+	 * vfp3 with ieee fp regs; uvlong is sufficient for hardware but
+	 * each must be able to hold an Internal from fpi.h for sw emulation.
+	 */
+	ulong	regs[Maxfpregs][3];
+
+	int	fpstate;
+	uintptr	pc;		/* of failed fp instr. */
+};
+
+/*
+ * FPsave.status
+ */
+enum
+{
+	FPinit,
+	FPactive,
+	FPinactive,
+
+	/* bit or'd with the state */
+	FPillegal= 0x100,
+};
+
+struct Confmem
+{
+	uintptr	base;
+	usize	npage;
+	uintptr	limit;
+	uintptr	kbase;
+	uintptr	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	Confmem	mem[1];		/* physical memory */
+	ulong	npage;		/* total physical pages of memory */
+	usize	upages;		/* user page pool */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* max interrupt time allocation in bytes */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+	ulong	hz;		/* processor cycle freq */
+	ulong	mhz;
+	int	monitor;	/* flag */
+};
+
+/*
+ *  things saved in the Proc structure during a notify
+ */
+struct Notsave {
+	int	emptiness;
+};
+
+/*
+ *  MMU stuff in Mach.
+ */
+struct MMMU
+{
+	PTE*	mmul1;		/* l1 for this processor */
+	int	mmul1lo;
+	int	mmul1hi;
+	int	mmupid;
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR	1		/* 1 level cache, don't worry about VCE's */
+struct PMMU
+{
+	Page*	mmul2;
+	Page*	mmul2cache;	/* free mmu pages */
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+	/* offsets known to asm */
+	int	machno;			/* physical id of processor */
+	uintptr	splpc;			/* pc of last caller to splhi */
+
+	Proc*	proc;			/* current process */
+
+	MMMU;
+	/* end of offsets known to asm */
+	int	flushmmu;		/* flush current proc mmu state */
+
+	ulong	ticks;			/* of the clock since boot time */
+	Label	sched;			/* scheduler wakeup */
+	Lock	alarmlock;		/* access to alarm list */
+	void*	alarm;			/* alarms bound to this clock */
+	int	inclockintr;
+
+	Proc*	readied;		/* for runproc */
+	ulong	schedticks;		/* next forced context switch */
+
+	int	cputype;
+	ulong	delayloop;
+
+	/* stats */
+	int	tlbfault;
+	int	tlbpurge;
+	int	pfault;
+	int	cs;
+	int	syscall;
+	int	load;
+	int	intr;
+	uvlong	fastclock;		/* last sampled value */
+	uvlong	inidle;			/* time spent in idlehands() */
+	ulong	spuriousintr;
+	int	lastintr;
+	int	ilockdepth;
+	Perf	perf;			/* performance counters */
+
+	int	probing;		/* probeaddr() state */
+	int	trapped;
+	Lock	probelock;
+	int	inidlehands;
+
+	int	cpumhz;
+	uvlong	cpuhz;			/* speed of cpu */
+	uvlong	cyclefreq;		/* Frequency of user readable cycle counter */
+	Lock	clklck;
+
+	/* vfp3 fpu */
+	int	havefp;
+	int	havefpvalid;
+	int	fpon;
+	int	fpconfiged;
+	int	fpnregs;
+	ulong	fpscr;			/* sw copy */
+	int	fppid;			/* pid of last fault */
+	uintptr	fppc;			/* addr of last fault */
+	int	fpcnt;			/* how many consecutive at that addr */
+
+	/* save areas for exceptions, hold R0-R4 */
+	u32int	sfiq[5];
+	u32int	sirq[5];
+	u32int	sund[5];
+	u32int	sabt[5];
+	u32int	smon[5];		/* probably not needed */
+	u32int	ssys[5];
+
+	int	stack[1];
+};
+
+/*
+ * Fake kmap.
+ */
+typedef void		KMap;
+#define	VA(k)		((uintptr)(k))
+#define	kmap(p)		(KMap*)((p)->pa|kseg0)
+#define	kunmap(k)
+
+struct
+{
+	Lock;
+	int	machs;			/* bitmap of active CPUs */
+	int	wfi;			/* bitmap of CPUs in WFI state */
+	int	stopped;		/* bitmap of CPUs stopped */
+	int	exiting;		/* shutdown */
+	int	ispanic;		/* shutdown in response to a panic */
+	int	thunderbirdsarego;	/* lets the added processors continue to schedinit */
+}active;
+
+extern register Mach* m;			/* R10 */
+extern register Proc* up;			/* R9 */
+
+/* an object guaranteed to be in its own cache line */
+typedef uchar Cacheline[CACHELINESZ];
+struct Isolated {
+	Cacheline c0;
+	ulong	word;
+	Cacheline c1;
+};
+
+extern Memcache cachel[];		/* arm arch v7 supports 1-7 */
+extern ulong intrcount[MAXMACH];
+extern int irqtooearly;
+extern uintptr kseg0;
+extern Isolated l1ptstable;
+extern uchar *l2pages;
+extern Mach* machaddr[MAXMACH];
+extern ulong memsize;
+extern int navailcpus;
+extern int normalprint;
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char	*type;
+	ulong	port;
+	int	irq;
+	ulong	dma;
+	ulong	mem;
+	ulong	size;
+	ulong	freq;
+
+	int	nopt;
+	char	*opt[NISAOPT];
+};
+
+#define	MACHP(n) machaddr[n]
+
+/*
+ * Horrid. But the alternative is 'defined'.
+ */
+#ifdef _DBGC_
+#define DBGFLG		(dbgflg[_DBGC_])
+#else
+#define DBGFLG		(0)
+#endif /* _DBGC_ */
+
+int vflag;
+extern char dbgflg[256];
+
+#define dbgprint	print		/* for now */
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;
+	int	size;
+} Devport;
+
+struct DevConf
+{
+	ulong	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	int	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
+
+/* characteristics of a given arm cache level */
+struct Memcache {
+	uint	waysh;		/* shifts for set/way register */
+	uint	setsh;
+
+	uint	log2linelen;
+
+	uint	level;		/* 1 is nearest processor, 2 further away */
+	uint	type;
+	uint	external;	/* flag */
+	uint	l1ip;		/* l1 I policy */
+
+	uint	nways;		/* associativity */
+	uint	nsets;
+	uint	linelen;	/* bytes per cache line */
+	uint	setsways;
+};
+enum Cachetype {
+	Nocache,
+	Ionly,
+	Donly,
+	Splitid,
+	Unified,
+};
+enum {
+	Intcache,
+	Extcache,
+};
+
+/*
+ * characteristics of cache level, kept at low, fixed address (CACHECONF).
+ * all offsets are known to cache.v7.s.
+ */
+struct Lowmemcache {
+	uint	l1waysh;		/* shifts for set/way register */
+	uint	l1setsh;
+	uint	l2waysh;
+	uint	l2setsh;
+};
+
+/*
+ * cache capabilities.  write-back vs write-through is controlled
+ * by the Buffered bit in PTEs.
+ *
+ * see cache.v7.s and Memcache in dat.h
+ */
+enum {
+	Cawt	= 1 << 31,
+	Cawb	= 1 << 30,
+	Cara	= 1 << 29,
+	Cawa	= 1 << 28,
+};
+
+/* non-architectural L2 cache */
+typedef struct Cacheimpl Cacheimpl;
+struct Cacheimpl {
+	void	(*info)(Memcache *);
+	void	(*on)(void);
+	void	(*off)(void);
+
+	void	(*inv)(void);
+	void	(*wb)(void);
+	void	(*wbinv)(void);
+
+	void	(*invse)(void *, int);
+	void	(*wbse)(void *, int);
+	void	(*wbinvse)(void *, int);
+};
+/* extern */ Cacheimpl *l2cache, *allcache, *nocache, *l1cache;
+
+enum Dmamode {
+	Const,
+	Postincr,
+	Index,
+	Index2,
+};
+
+/* pmu = power management unit */
+enum Irqs {
+	/*
+	 * 1st 32 gic irqs reserved for cpu; private interrupts.
+	 *  0—15 are software-generated by other cpus;
+	 * 16—31 are private peripheral intrs.
+	 */
+	Cpu0irq		= 0,
+	Cpu1irq,
+	/* ... */
+	Cpu15irq	= 15,
+	Glbtmrirq	= 27,
+	Loctmrirq	= 29,
+	Wdtmrirq	= 30,
+
+	/* shared interrupts */
+	Ctlr0base	= (1+0)*32,		/* primary ctlr */
+	Tn0irq		= Ctlr0base + 0,	/* tegra timers */
+	Tn1irq		= Ctlr0base + 1,
+	Rtcirq		= Ctlr0base + 2,
+
+	Ctlr1base	= (1+1)*32,		/* secondary ctlr */
+	Uartirq		= Ctlr1base + 4,
+	Tn2irq		= Ctlr1base + 9,	/* tegra timers */
+	Tn3irq		= Ctlr1base + 10,
+	/* +24 is cpu0_pmu_intr, +25 is cpu1_pum_intr */
+
+	Ctlr2base	= (1+2)*32,		/* ternary ctlr */
+	Extpmuirq	= Ctlr2base + 22,
+
+	Ctlr3base	= (1+3)*32,		/* quad ctlr */
+	Pcieirq		= Ctlr3base + 2,
+};
+
+struct Soc {			/* addr's of SoC controllers */
+	uintptr clkrst;
+	uintptr	power;
+	uintptr	exceptvec;
+	uintptr	sema;
+	uintptr	l2cache;
+	uintptr	flow;
+
+	/* private memory region */
+	uintptr	scu;
+	uintptr	intr;		/* `cpu interface' */
+	/* private-peripheral-interrupt cortex-a clocks */
+	uintptr	glbtmr;
+	uintptr	loctmr;
+
+	uintptr	intrdist;
+
+	uintptr	uart[5];
+
+	/* shared-peripheral-interrupt tegra2 clocks */
+	uintptr	rtc;		/* real-time clock */
+	uintptr	tmr[4];
+	uintptr	µs;
+
+	uintptr	pci;
+	uintptr	ether;
+
+	uintptr	ehci;
+	uintptr	ide;
+
+	uintptr	nand;
+	uintptr	nor;
+
+	uintptr	spi[4];
+	uintptr	twsi;
+	uintptr	mmc[4];
+	uintptr	gpio[7];
+} soc;
+extern Soc soc;

+ 192 - 0
sys/src/9/teg2/devarch.c

@@ -0,0 +1,192 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+
+#include "../ip/ip.h"
+
+enum {
+	Qdir = 0,
+	Qbase,
+
+	Qmax = 16,
+};
+
+typedef long Rdwrfn(Chan*, void*, long, vlong);
+
+static Rdwrfn *readfn[Qmax];
+static Rdwrfn *writefn[Qmax];
+
+static Dirtab archdir[Qmax] = {
+	".",		{ Qdir, 0, QTDIR },	0,	0555,
+};
+
+Lock archwlock;	/* the lock is only for changing archdir */
+int narchdir = Qbase;
+
+/*
+ * Add a file to the #P listing.  Once added, you can't delete it.
+ * You can't add a file with the same name as one already there,
+ * and you get a pointer to the Dirtab entry so you can do things
+ * like change the Qid version.  Changing the Qid path is disallowed.
+ */
+Dirtab*
+addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+{
+	int i;
+	Dirtab d;
+	Dirtab *dp;
+
+	memset(&d, 0, sizeof d);
+	strcpy(d.name, name);
+	d.perm = perm;
+
+	lock(&archwlock);
+	if(narchdir >= Qmax){
+		unlock(&archwlock);
+		return nil;
+	}
+
+	for(i=0; i<narchdir; i++)
+		if(strcmp(archdir[i].name, name) == 0){
+			unlock(&archwlock);
+			return nil;
+		}
+
+	d.qid.path = narchdir;
+	archdir[narchdir] = d;
+	readfn[narchdir] = rdfn;
+	writefn[narchdir] = wrfn;
+	dp = &archdir[narchdir++];
+	unlock(&archwlock);
+
+	return dp;
+}
+
+static Chan*
+archattach(char* spec)
+{
+	return devattach('P', spec);
+}
+
+Walkqid*
+archwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+	return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
+}
+
+static int
+archstat(Chan* c, uchar* dp, int n)
+{
+	return devstat(c, dp, n, archdir, narchdir, devgen);
+}
+
+static Chan*
+archopen(Chan* c, int omode)
+{
+	return devopen(c, omode, archdir, narchdir, devgen);
+}
+
+static void
+archclose(Chan*)
+{
+}
+
+static long
+archread(Chan *c, void *a, long n, vlong offset)
+{
+	Rdwrfn *fn;
+
+	switch((ulong)c->qid.path){
+	case Qdir:
+		return devdirread(c, a, n, archdir, narchdir, devgen);
+
+	default:
+		if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+
+	return 0;
+}
+
+static long
+archwrite(Chan *c, void *a, long n, vlong offset)
+{
+	Rdwrfn *fn;
+
+	if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+		return fn(c, a, n, offset);
+	error(Eperm);
+
+	return 0;
+}
+
+void archinit(void);
+
+Dev archdevtab = {
+	'P',
+	"arch",
+
+	devreset,
+	archinit,
+	devshutdown,
+	archattach,
+	archwalk,
+	archstat,
+	archopen,
+	devcreate,
+	archclose,
+	archread,
+	devbread,
+	archwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};
+
+static long
+cputyperead(Chan*, void *a, long n, vlong offset)
+{
+	char name[64], str[128];
+
+	cputype2name(name, sizeof name);
+	snprint(str, sizeof str, "ARM %s %llud\n", name, m->cpuhz / Mhz);
+	return readstr(offset, a, n, str);
+}
+
+static long
+tbread(Chan*, void *a, long n, vlong offset)
+{
+	char str[16];
+	uvlong tb;
+
+	cycles(&tb);
+
+	snprint(str, sizeof(str), "%16.16llux", tb);
+	return readstr(offset, a, n, str);
+}
+
+static long
+nsread(Chan*, void *a, long n, vlong offset)
+{
+	char str[16];
+	uvlong tb;
+
+	cycles(&tb);
+
+	snprint(str, sizeof(str), "%16.16llux", (tb/700)* 1000);
+	return readstr(offset, a, n, str);
+}
+
+void
+archinit(void)
+{
+	addarchfile("cputype", 0444, cputyperead, nil);
+	addarchfile("timebase",0444, tbread, nil);
+//	addarchfile("nsec", 0444, nsread, nil);
+}

+ 1366 - 0
sys/src/9/teg2/devcons.c

@@ -0,0 +1,1366 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"pool.h"
+
+#include	<authsrv.h>
+
+void	(*consdebug)(void) = nil;
+void	(*screenputs)(char*, int) = nil;
+
+Queue*	kbdq;			/* unprocessed console input */
+Queue*	lineq;			/* processed console input */
+Queue*	serialoq;		/* serial console output */
+Queue*	kprintoq;		/* console output, for /dev/kprint */
+ulong	kprintinuse;		/* test and set whether /dev/kprint is open */
+int	iprintscreenputs = 1;
+
+int	panicking;
+
+static struct
+{
+	QLock;
+
+	int	raw;		/* true if we shouldn't process input */
+	Ref	ctl;		/* number of opens to the control file */
+	int	x;		/* index into line */
+	char	line[1024];	/* current input line */
+
+	int	count;
+	int	ctlpoff;
+
+	/* a place to save up characters at interrupt time before dumping them in the queue */
+	Lock	lockputc;
+	char	istage[1024];
+	char	*iw;
+	char	*ir;
+	char	*ie;
+} kbd = {
+	.iw	= kbd.istage,
+	.ir	= kbd.istage,
+	.ie	= kbd.istage + sizeof(kbd.istage),
+};
+
+char	*sysname;
+vlong	fasthz;
+
+static void	seedrand(void);
+static int	readtime(ulong, char*, int);
+static int	readbintime(char*, int);
+static int	writetime(char*, int);
+static int	writebintime(char*, int);
+
+enum
+{
+	CMhalt,
+	CMreboot,
+	CMpanic,
+};
+
+Cmdtab rebootmsg[] =
+{
+	CMhalt,		"halt",		1,
+	CMreboot,	"reboot",	0,
+	CMpanic,	"panic",	0,
+};
+
+void
+printinit(void)
+{
+	lineq = qopen(2*1024, 0, nil, nil);
+	if(lineq == nil)
+		panic("printinit");
+	qnoblock(lineq, 1);
+}
+
+int
+consactive(void)
+{
+	if(serialoq)
+		return qlen(serialoq) > 0;
+	return 0;
+}
+
+void
+prflush(void)
+{
+	ulong now;
+
+	now = m->ticks;
+	while(consactive())
+		if(m->ticks - now >= HZ)
+			break;
+}
+
+/*
+ * Log console output so it can be retrieved via /dev/kmesg.
+ * This is good for catching boot-time messages after the fact.
+ */
+struct {
+	Lock lk;
+	char buf[KMESGSIZE];
+	uint n;
+} kmesg;
+
+static void
+kmesgputs(char *str, int n)
+{
+	uint nn, d;
+
+	ilock(&kmesg.lk);
+	/* take the tail of huge writes */
+	if(n > sizeof kmesg.buf){
+		d = n - sizeof kmesg.buf;
+		str += d;
+		n -= d;
+	}
+
+	/* slide the buffer down to make room */
+	nn = kmesg.n;
+	if(nn + n >= sizeof kmesg.buf){
+		d = nn + n - sizeof kmesg.buf;
+		if(d)
+			memmove(kmesg.buf, kmesg.buf+d, sizeof kmesg.buf-d);
+		nn -= d;
+	}
+
+	/* copy the data in */
+	memmove(kmesg.buf+nn, str, n);
+	nn += n;
+	kmesg.n = nn;
+	iunlock(&kmesg.lk);
+}
+
+/*
+ *   Print a string on the console.  Convert \n to \r\n for serial
+ *   line consoles.  Locking of the queues is left up to the screen
+ *   or uart code.  Multi-line messages to serial consoles may get
+ *   interspersed with other messages.
+ */
+static void
+putstrn0(char *str, int n, int usewrite)
+{
+	int m;
+	char *t;
+
+	if(!islo())
+		usewrite = 0;
+
+	/*
+	 *  how many different output devices do we need?
+	 */
+	kmesgputs(str, n);
+
+	/*
+	 *  if someone is reading /dev/kprint,
+	 *  put the message there.
+	 *  if not and there's an attached bit mapped display,
+	 *  put the message there.
+	 *
+	 *  if there's a serial line being used as a console,
+	 *  put the message there.
+	 */
+	if(kprintoq != nil && !qisclosed(kprintoq)){
+		if(usewrite)
+			qwrite(kprintoq, str, n);
+		else
+			qiwrite(kprintoq, str, n);
+	}else if(screenputs != nil)
+		screenputs(str, n);
+
+	if(serialoq == nil){
+		uartputs(str, n);
+		return;
+	}
+
+	while(n > 0) {
+		t = memchr(str, '\n', n);
+		if(t && !kbd.raw) {
+			m = t-str;
+			if(usewrite){
+				qwrite(serialoq, str, m);
+				qwrite(serialoq, "\r\n", 2);
+			} else {
+				qiwrite(serialoq, str, m);
+				qiwrite(serialoq, "\r\n", 2);
+			}
+			n -= m+1;
+			str = t+1;
+		} else {
+			if(usewrite)
+				qwrite(serialoq, str, n);
+			else
+				qiwrite(serialoq, str, n);
+			break;
+		}
+	}
+}
+
+void
+putstrn(char *str, int n)
+{
+	putstrn0(str, n, 0);
+}
+
+int noprint;
+
+int
+print(char *fmt, ...)
+{
+	int n;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	if(noprint)
+		return -1;
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	if(!normalprint) {
+		if(0) iprint("\nprint called too early from %#lux\n",
+			getcallerpc(&fmt));
+		iprint("%.*s", n, buf);
+	} else
+		putstrn(buf, n);
+
+	return n;
+}
+
+/*
+ * Want to interlock iprints to avoid interlaced output on 
+ * multiprocessor, but don't want to deadlock if one processor
+ * dies during print and another has something important to say.
+ * Make a good faith effort.
+ */
+static Lock iprintlock;
+static int
+iprintcanlock(Lock *l)
+{
+	int i;
+	
+	for(i=0; i<1000; i++){
+		if(canlock(l))
+			return 1;
+		if(l->m == MACHP(m->machno))
+			return 0;
+		microdelay(100);
+	}
+	return 0;
+}
+
+int
+iprint(char *fmt, ...)
+{
+	int n, s, locked;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	s = splhi();
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+	locked = iprintcanlock(&iprintlock);
+	if(screenputs != nil && iprintscreenputs)
+		screenputs(buf, n);
+	if(consuart == nil || consuart->phys == nil ||
+	    consuart->phys->putc == nil)
+		_uartputs(buf, n);
+	else
+		uartputs(buf, n);
+	if(locked)
+		unlock(&iprintlock);
+	splx(s);
+
+	return n;
+}
+
+void
+panic(char *fmt, ...)
+{
+	int n, s;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	kprintoq = nil;	/* don't try to write to /dev/kprint */
+
+	if(panicking)
+		for(;;);
+	panicking = 1;
+
+	s = splhi();
+	delay(2000);
+	strcpy(buf, "\npanic: ");
+	va_start(arg, fmt);
+	n = vseprint(buf+strlen(buf), buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+	iprint("%s\n", buf);
+	if(consdebug)
+		(*consdebug)();
+	splx(s);
+	prflush();
+	USED(n);
+//	buf[n] = '\n';
+//	putstrn(buf, n+1);		/* redundant */
+//	dumpstack();
+
+	delay(2000);
+	exit(1);
+}
+
+/* libmp at least contains a few calls to sysfatal; simulate with panic */
+void
+sysfatal(char *fmt, ...)
+{
+	char err[256];
+	va_list arg;
+
+	va_start(arg, fmt);
+	vseprint(err, err + sizeof err, fmt, arg);
+	va_end(arg);
+	panic("sysfatal: %s", err);
+}
+
+void
+_assert(char *fmt)
+{
+	panic("assert failed at %#p: %s", getcallerpc(&fmt), fmt);
+}
+
+int
+pprint(char *fmt, ...)
+{
+	int n;
+	Chan *c;
+	va_list arg;
+	char buf[2*PRINTSIZE];
+
+	if(up == nil || up->fgrp == nil)
+		return 0;
+
+	c = up->fgrp->fd[2];
+	if(c==0 || (c->mode!=OWRITE && c->mode!=ORDWR))
+		return 0;
+	n = snprint(buf, sizeof buf, "%s %lud: ", up->text, up->pid);
+	va_start(arg, fmt);
+	n = vseprint(buf+n, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	if(waserror())
+		return 0;
+	devtab[c->type]->write(c, buf, n, c->offset);
+	poperror();
+
+	lock(c);
+	c->offset += n;
+	unlock(c);
+
+	return n;
+}
+
+static void
+echoscreen(char *buf, int n)
+{
+	char *e, *p;
+	char ebuf[128];
+	int x;
+
+	p = ebuf;
+	e = ebuf + sizeof(ebuf) - 4;
+	while(n-- > 0){
+		if(p >= e){
+			screenputs(ebuf, p - ebuf);
+			p = ebuf;
+		}
+		x = *buf++;
+		if(x == 0x15){
+			*p++ = '^';
+			*p++ = 'U';
+			*p++ = '\n';
+		} else
+			*p++ = x;
+	}
+	if(p != ebuf)
+		screenputs(ebuf, p - ebuf);
+}
+
+static void
+echoserialoq(char *buf, int n)
+{
+	int x;
+	char *e, *p;
+	char ebuf[128];
+
+	p = ebuf;
+	e = ebuf + sizeof(ebuf) - 4;
+	while(n-- > 0){
+		if(p >= e){
+			qiwrite(serialoq, ebuf, p - ebuf);
+			p = ebuf;
+		}
+		x = *buf++;
+		if(x == '\n'){
+			*p++ = '\r';
+			*p++ = '\n';
+		} else if(x == 0x15){
+			*p++ = '^';
+			*p++ = 'U';
+			*p++ = '\n';
+		} else
+			*p++ = x;
+	}
+	if(p != ebuf)
+		qiwrite(serialoq, ebuf, p - ebuf);
+}
+
+static void
+echo(char *buf, int n)
+{
+	static int ctrlt, pid;
+	int x;
+	char *e, *p;
+
+	if(n == 0)
+		return;
+
+	e = buf+n;
+	for(p = buf; p < e; p++){
+		switch(*p){
+		case 0x10:	/* ^P */
+			if(cpuserver && !kbd.ctlpoff){
+				active.exiting = 1;
+				return;
+			}
+			break;
+		case 0x14:	/* ^T */
+			ctrlt++;
+			if(ctrlt > 2)
+				ctrlt = 2;
+			continue;
+		}
+
+		if(ctrlt != 2)
+			continue;
+
+		/* ^T escapes */
+		ctrlt = 0;
+		switch(*p){
+		case 'S':
+			x = splhi();
+			dumpstack();
+			procdump();
+			splx(x);
+			return;
+		case 's':
+			dumpstack();
+			return;
+		case 'x':
+			xsummary();
+			ixsummary();
+			mallocsummary();
+		//	memorysummary();
+			pagersummary();
+			return;
+		case 'd':
+			if(consdebug == nil)
+				consdebug = rdb;
+			else
+				consdebug = nil;
+			print("consdebug now %#p\n", consdebug);
+			return;
+		case 'D':
+			if(consdebug == nil)
+				consdebug = rdb;
+			consdebug();
+			return;
+		case 'p':
+			x = spllo();
+			procdump();
+			splx(x);
+			return;
+		case 'q':
+			scheddump();
+			return;
+		case 'k':
+			killbig("^t ^t k");
+			return;
+		case 'r':
+			exit(0);
+			return;
+		}
+	}
+
+	qproduce(kbdq, buf, n);
+	if(kbd.raw)
+		return;
+	kmesgputs(buf, n);
+	if(screenputs != nil)
+		echoscreen(buf, n);
+	if(serialoq)
+		echoserialoq(buf, n);
+}
+
+/*
+ *  Called by a uart interrupt for console input.
+ *
+ *  turn '\r' into '\n' before putting it into the queue.
+ */
+int
+kbdcr2nl(Queue*, int ch)
+{
+	char *next;
+
+	ilock(&kbd.lockputc);		/* just a mutex */
+	if(ch == '\r' && !kbd.raw)
+		ch = '\n';
+	next = kbd.iw+1;
+	if(next >= kbd.ie)
+		next = kbd.istage;
+	if(next != kbd.ir){
+		*kbd.iw = ch;
+		kbd.iw = next;
+	}
+	iunlock(&kbd.lockputc);
+	return 0;
+}
+
+/*
+ *  Put character, possibly a rune, into read queue at interrupt time.
+ *  Called at interrupt time to process a character.
+ */
+int
+kbdputc(Queue*, int ch)
+{
+	int i, n;
+	char buf[3];
+	Rune r;
+	char *next;
+
+	if(kbd.ir == nil)
+		return 0;		/* in case we're not inited yet */
+	
+	ilock(&kbd.lockputc);		/* just a mutex */
+	r = ch;
+	n = runetochar(buf, &r);
+	for(i = 0; i < n; i++){
+		next = kbd.iw+1;
+		if(next >= kbd.ie)
+			next = kbd.istage;
+		if(next == kbd.ir)
+			break;
+		*kbd.iw = buf[i];
+		kbd.iw = next;
+	}
+	iunlock(&kbd.lockputc);
+	return 0;
+}
+
+/*
+ *  we save up input characters till clock time to reduce
+ *  per character interrupt overhead.
+ */
+static void
+kbdputcclock(void)
+{
+	char *iw;
+
+	/* this amortizes cost of qproduce */
+	if(kbd.iw != kbd.ir){
+		iw = kbd.iw;
+		if(iw < kbd.ir){
+			echo(kbd.ir, kbd.ie-kbd.ir);
+			kbd.ir = kbd.istage;
+		}
+		if(kbd.ir != iw){
+			echo(kbd.ir, iw-kbd.ir);
+			kbd.ir = iw;
+		}
+	}
+}
+
+enum{
+	Qdir,
+	Qbintime,
+	Qcons,
+	Qconsctl,
+	Qcputime,
+	Qdrivers,
+	Qkmesg,
+	Qkprint,
+	Qhostdomain,
+	Qhostowner,
+	Qnull,
+	Qosversion,
+	Qpgrpid,
+	Qpid,
+	Qppid,
+	Qrandom,
+	Qreboot,
+	Qswap,
+	Qsysname,
+	Qsysstat,
+	Qtime,
+	Quser,
+	Qzero,
+	Qconfig,
+};
+
+enum
+{
+	VLNUMSIZE=	22,
+};
+
+static Dirtab consdir[]={
+	".",	{Qdir, 0, QTDIR},	0,		DMDIR|0555,
+	"bintime",	{Qbintime},	24,		0664,
+	"cons",		{Qcons},	0,		0660,
+	"consctl",	{Qconsctl},	0,		0220,
+	"cputime",	{Qcputime},	6*NUMSIZE,	0444,
+	"drivers",	{Qdrivers},	0,		0444,
+	"hostdomain",	{Qhostdomain},	DOMLEN,		0664,
+	"hostowner",	{Qhostowner},	0,		0664,
+	"kmesg",	{Qkmesg},	0,		0440,
+	"kprint",	{Qkprint, 0, QTEXCL},	0,	DMEXCL|0440,
+	"null",		{Qnull},	0,		0666,
+	"osversion",	{Qosversion},	0,		0444,
+	"pgrpid",	{Qpgrpid},	NUMSIZE,	0444,
+	"pid",		{Qpid},		NUMSIZE,	0444,
+	"ppid",		{Qppid},	NUMSIZE,	0444,
+	"random",	{Qrandom},	0,		0444,
+	"reboot",	{Qreboot},	0,		0664,
+	"swap",		{Qswap},	0,		0664,
+	"sysname",	{Qsysname},	0,		0664,
+	"sysstat",	{Qsysstat},	0,		0666,
+	"time",		{Qtime},	NUMSIZE+3*VLNUMSIZE,	0664,
+	"user",		{Quser},	0,		0666,
+	"zero",		{Qzero},	0,		0444,
+	"config",	{Qconfig},	0,		0444,
+};
+
+int
+readnum(ulong off, char *buf, ulong n, ulong val, int size)
+{
+	char tmp[64];
+
+	snprint(tmp, sizeof(tmp), "%*lud", size-1, val);
+	tmp[size-1] = ' ';
+	if(off >= size)
+		return 0;
+	if(off+n > size)
+		n = size-off;
+	memmove(buf, tmp+off, n);
+	return n;
+}
+
+int
+readstr(ulong off, char *buf, ulong n, char *str)
+{
+	int size;
+
+	size = strlen(str);
+	if(off >= size)
+		return 0;
+	if(off+n > size)
+		n = size-off;
+	memmove(buf, str+off, n);
+	return n;
+}
+
+static void
+consinit(void)
+{
+	todinit();
+	randominit();
+	/*
+	 * at 115200 baud, the 1024 char buffer takes 56 ms to process,
+	 * processing it every 22 ms should be fine
+	 */
+	addclock0link(kbdputcclock, 22);
+}
+
+static Chan*
+consattach(char *spec)
+{
+	return devattach('c', spec);
+}
+
+static Walkqid*
+conswalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name,nname, consdir, nelem(consdir), devgen);
+}
+
+static int
+consstat(Chan *c, uchar *dp, int n)
+{
+	return devstat(c, dp, n, consdir, nelem(consdir), devgen);
+}
+
+static Chan*
+consopen(Chan *c, int omode)
+{
+	c->aux = nil;
+	c = devopen(c, omode, consdir, nelem(consdir), devgen);
+	switch((ulong)c->qid.path){
+	case Qconsctl:
+		incref(&kbd.ctl);
+		break;
+
+	case Qkprint:
+		if(tas(&kprintinuse) != 0){
+			c->flag &= ~COPEN;
+			error(Einuse);
+		}
+		if(kprintoq == nil){
+			kprintoq = qopen(8*1024, Qcoalesce, 0, 0);
+			if(kprintoq == nil){
+				c->flag &= ~COPEN;
+				error(Enomem);
+			}
+			qnoblock(kprintoq, 1);
+		}else
+			qreopen(kprintoq);
+		c->iounit = qiomaxatomic;
+		break;
+	}
+	return c;
+}
+
+static void
+consclose(Chan *c)
+{
+	switch((ulong)c->qid.path){
+	/* last close of control file turns off raw */
+	case Qconsctl:
+		if(c->flag&COPEN){
+			if(decref(&kbd.ctl) == 0)
+				kbd.raw = 0;
+		}
+		break;
+
+	/* close of kprint allows other opens */
+	case Qkprint:
+		if(c->flag & COPEN){
+			kprintinuse = 0;
+			qhangup(kprintoq, nil);
+		}
+		break;
+	}
+}
+
+static long
+consread(Chan *c, void *buf, long n, vlong off)
+{
+	ulong l;
+	Mach *mp;
+	char *b, *bp, ch;
+	char tmp[256];		/* must be >= 18*NUMSIZE (Qswap) */
+	int i, k, id, send;
+	vlong offset = off;
+	extern char configfile[];
+
+	if(n <= 0)
+		return n;
+
+	switch((ulong)c->qid.path){
+	case Qdir:
+		return devdirread(c, buf, n, consdir, nelem(consdir), devgen);
+
+	case Qcons:
+		qlock(&kbd);
+		if(waserror()) {
+			qunlock(&kbd);
+			nexterror();
+		}
+		while(!qcanread(lineq)){
+			if(qread(kbdq, &ch, 1) == 0)
+				continue;
+			send = 0;
+			if(ch == 0){
+				/* flush output on rawoff -> rawon */
+				if(kbd.x > 0)
+					send = !qcanread(kbdq);
+			}else if(kbd.raw){
+				kbd.line[kbd.x++] = ch;
+				send = !qcanread(kbdq);
+			}else{
+				switch(ch){
+				case '\b':
+					if(kbd.x > 0)
+						kbd.x--;
+					break;
+				case 0x15:	/* ^U */
+					kbd.x = 0;
+					break;
+				case '\n':
+				case 0x04:	/* ^D */
+					send = 1;
+				default:
+					if(ch != 0x04)
+						kbd.line[kbd.x++] = ch;
+					break;
+				}
+			}
+			if(send || kbd.x == sizeof kbd.line){
+				qwrite(lineq, kbd.line, kbd.x);
+				kbd.x = 0;
+			}
+		}
+		n = qread(lineq, buf, n);
+		qunlock(&kbd);
+		poperror();
+		return n;
+
+	case Qcputime:
+		k = offset;
+		if(k >= 6*NUMSIZE)
+			return 0;
+		if(k+n > 6*NUMSIZE)
+			n = 6*NUMSIZE - k;
+		/* easiest to format in a separate buffer and copy out */
+		for(i=0; i<6 && NUMSIZE*i<k+n; i++){
+			l = up->time[i];
+			if(i == TReal)
+				l = MACHP(0)->ticks - l;
+			l = TK2MS(l);
+			readnum(0, tmp+NUMSIZE*i, NUMSIZE, l, NUMSIZE);
+		}
+		memmove(buf, tmp+k, n);
+		return n;
+
+	case Qkmesg:
+		/*
+		 * This is unlocked to avoid tying up a process
+		 * that's writing to the buffer.  kmesg.n never 
+		 * gets smaller, so worst case the reader will
+		 * see a slurred buffer.
+		 */
+		if(off >= kmesg.n)
+			n = 0;
+		else{
+			if(off+n > kmesg.n)
+				n = kmesg.n - off;
+			memmove(buf, kmesg.buf+off, n);
+		}
+		return n;
+		
+	case Qkprint:
+		return qread(kprintoq, buf, n);
+
+	case Qpgrpid:
+		return readnum((ulong)offset, buf, n, up->pgrp->pgrpid, NUMSIZE);
+
+	case Qpid:
+		return readnum((ulong)offset, buf, n, up->pid, NUMSIZE);
+
+	case Qppid:
+		return readnum((ulong)offset, buf, n, up->parentpid, NUMSIZE);
+
+	case Qtime:
+		return readtime((ulong)offset, buf, n);
+
+	case Qbintime:
+		return readbintime(buf, n);
+
+	case Qhostowner:
+		return readstr((ulong)offset, buf, n, eve);
+
+	case Qhostdomain:
+		return readstr((ulong)offset, buf, n, hostdomain);
+
+	case Quser:
+		return readstr((ulong)offset, buf, n, up->user);
+
+	case Qnull:
+		return 0;
+
+	case Qconfig:
+		return readstr((ulong)offset, buf, n, configfile);
+
+	case Qsysstat:
+		b = smalloc(conf.nmach*(NUMSIZE*11+1) + 1);	/* +1 for NUL */
+		bp = b;
+		for(id = 0; id < 32; id++) {
+			if(active.machs & (1<<id)) {
+				mp = MACHP(id);
+				readnum(0, bp, NUMSIZE, id, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->cs, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->intr, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->syscall, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->pfault, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->tlbfault, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->tlbpurge, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE, mp->load, NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE,
+					(mp->perf.avg_inidle*100)/mp->perf.period,
+					NUMSIZE);
+				bp += NUMSIZE;
+				readnum(0, bp, NUMSIZE,
+					(mp->perf.avg_inintr*100)/mp->perf.period,
+					NUMSIZE);
+				bp += NUMSIZE;
+				*bp++ = '\n';
+			}
+		}
+		if(waserror()){
+			free(b);
+			nexterror();
+		}
+		n = readstr((ulong)offset, buf, n, b);
+		free(b);
+		poperror();
+		return n;
+
+	case Qswap:
+		snprint(tmp, sizeof tmp,
+			"%lud memory\n"
+			"%d pagesize\n"
+			"%lud kernel\n"
+			"%lud/%lud user\n"
+			"%lud/%lud swap\n"
+			"%lud/%lud kernel malloc\n"
+			"%lud/%lud kernel draw\n",
+			conf.npage*BY2PG,
+			BY2PG,
+			conf.npage-conf.upages,
+			palloc.user-palloc.freecount, palloc.user,
+			conf.nswap-swapalloc.free, conf.nswap,
+			mainmem->cursize, mainmem->maxsize,
+			imagmem->cursize, imagmem->maxsize);
+
+		return readstr((ulong)offset, buf, n, tmp);
+
+	case Qsysname:
+		if(sysname == nil)
+			return 0;
+		return readstr((ulong)offset, buf, n, sysname);
+
+	case Qrandom:
+		return randomread(buf, n);
+
+	case Qdrivers:
+		b = malloc(READSTR);
+		if(b == nil)
+			error(Enomem);
+		k = 0;
+		for(i = 0; devtab[i] != nil; i++)
+			k += snprint(b+k, READSTR-k, "#%C %s\n",
+				devtab[i]->dc, devtab[i]->name);
+		if(waserror()){
+			free(b);
+			nexterror();
+		}
+		n = readstr((ulong)offset, buf, n, b);
+		free(b);
+		poperror();
+		return n;
+
+	case Qzero:
+		memset(buf, 0, n);
+		return n;
+
+	case Qosversion:
+		snprint(tmp, sizeof tmp, "2000");
+		n = readstr((ulong)offset, buf, n, tmp);
+		return n;
+
+	default:
+		print("consread %#llux\n", c->qid.path);
+		error(Egreg);
+	}
+	return -1;		/* never reached */
+}
+
+static long
+conswrite(Chan *c, void *va, long n, vlong off)
+{
+	char buf[256], ch;
+	long l, bp;
+	char *a;
+	Mach *mp;
+	int id, fd;
+	Chan *swc;
+	ulong offset;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+
+	a = va;
+	offset = off;
+
+	switch((ulong)c->qid.path){
+	case Qcons:
+		/*
+		 * Can't page fault in putstrn, so copy the data locally.
+		 */
+		l = n;
+		while(l > 0){
+			bp = l;
+			if(bp > sizeof buf)
+				bp = sizeof buf;
+			memmove(buf, a, bp);
+			putstrn0(buf, bp, 1);
+			a += bp;
+			l -= bp;
+		}
+		break;
+
+	case Qconsctl:
+		if(n >= sizeof(buf))
+			n = sizeof(buf)-1;
+		strncpy(buf, a, n);
+		buf[n] = 0;
+		for(a = buf; a;){
+			if(strncmp(a, "rawon", 5) == 0){
+				kbd.raw = 1;
+				/* clumsy hack - wake up reader */
+				ch = 0;
+				qwrite(kbdq, &ch, 1);			
+			} else if(strncmp(a, "rawoff", 6) == 0){
+				kbd.raw = 0;
+			} else if(strncmp(a, "ctlpon", 6) == 0){
+				kbd.ctlpoff = 0;
+			} else if(strncmp(a, "ctlpoff", 7) == 0){
+				kbd.ctlpoff = 1;
+			}
+			if(a = strchr(a, ' '))
+				a++;
+		}
+		break;
+
+	case Qtime:
+		if(!iseve())
+			error(Eperm);
+		return writetime(a, n);
+
+	case Qbintime:
+		if(!iseve())
+			error(Eperm);
+		return writebintime(a, n);
+
+	case Qhostowner:
+		return hostownerwrite(a, n);
+
+	case Qhostdomain:
+		return hostdomainwrite(a, n);
+
+	case Quser:
+		return userwrite(a, n);
+
+	case Qnull:
+		break;
+
+	case Qconfig:
+		error(Eperm);
+		break;
+
+	case Qreboot:
+		if(!iseve())
+			error(Eperm);
+		cb = parsecmd(a, n);
+
+		if(waserror()) {
+			free(cb);
+			nexterror();
+		}
+		ct = lookupcmd(cb, rebootmsg, nelem(rebootmsg));
+		switch(ct->index) {
+		case CMhalt:
+			reboot(nil, 0, 0);
+			break;
+		case CMreboot:
+			rebootcmd(cb->nf-1, cb->f+1);
+			break;
+		case CMpanic:
+			*(ulong*)0=0;
+			panic("/dev/reboot");
+		}
+		poperror();
+		free(cb);
+		break;
+
+	case Qsysstat:
+		for(id = 0; id < 32; id++) {
+			if(active.machs & (1<<id)) {
+				mp = MACHP(id);
+				mp->cs = 0;
+				mp->intr = 0;
+				mp->syscall = 0;
+				mp->pfault = 0;
+				mp->tlbfault = 0;
+				mp->tlbpurge = 0;
+			}
+		}
+		break;
+
+	case Qswap:
+		if(n >= sizeof buf)
+			error(Egreg);
+		memmove(buf, va, n);	/* so we can NUL-terminate */
+		buf[n] = 0;
+		/* start a pager if not already started */
+		if(strncmp(buf, "start", 5) == 0){
+			kickpager();
+			break;
+		}
+		if(!iseve())
+			error(Eperm);
+		if(buf[0]<'0' || '9'<buf[0])
+			error(Ebadarg);
+		fd = strtoul(buf, 0, 0);
+		swc = fdtochan(fd, -1, 1, 1);
+		setswapchan(swc);
+		break;
+
+	case Qsysname:
+		if(offset != 0)
+			error(Ebadarg);
+		if(n <= 0 || n >= sizeof buf)
+			error(Ebadarg);
+		strncpy(buf, a, n);
+		buf[n] = 0;
+		if(buf[n-1] == '\n')
+			buf[n-1] = 0;
+		kstrdup(&sysname, buf);
+		break;
+
+	default:
+		print("conswrite: %#llux\n", c->qid.path);
+		error(Egreg);
+	}
+	return n;
+}
+
+Dev consdevtab = {
+	'c',
+	"cons",
+
+	devreset,
+	consinit,
+	devshutdown,
+	consattach,
+	conswalk,
+	consstat,
+	consopen,
+	devcreate,
+	consclose,
+	consread,
+	devbread,
+	conswrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};
+
+static	ulong	randn;
+
+static void
+seedrand(void)
+{
+	if(!waserror()){
+		randomread((void*)&randn, sizeof(randn));
+		poperror();
+	}
+}
+
+int
+nrand(int n)
+{
+	if(randn == 0)
+		seedrand();
+	randn = randn*1103515245 + 12345 + MACHP(0)->ticks;
+	return (randn>>16) % n;
+}
+
+int
+rand(void)
+{
+	nrand(1);
+	return randn;
+}
+
+static uvlong uvorder = 0x0001020304050607ULL;
+
+static uchar*
+le2vlong(vlong *to, uchar *f)
+{
+	uchar *t, *o;
+	int i;
+
+	t = (uchar*)to;
+	o = (uchar*)&uvorder;
+	for(i = 0; i < sizeof(vlong); i++)
+		t[o[i]] = f[i];
+	return f+sizeof(vlong);
+}
+
+static uchar*
+vlong2le(uchar *t, vlong from)
+{
+	uchar *f, *o;
+	int i;
+
+	f = (uchar*)&from;
+	o = (uchar*)&uvorder;
+	for(i = 0; i < sizeof(vlong); i++)
+		t[i] = f[o[i]];
+	return t+sizeof(vlong);
+}
+
+static long order = 0x00010203;
+
+static uchar*
+le2long(long *to, uchar *f)
+{
+	uchar *t, *o;
+	int i;
+
+	t = (uchar*)to;
+	o = (uchar*)&order;
+	for(i = 0; i < sizeof(long); i++)
+		t[o[i]] = f[i];
+	return f+sizeof(long);
+}
+
+static uchar*
+long2le(uchar *t, long from)
+{
+	uchar *f, *o;
+	int i;
+
+	f = (uchar*)&from;
+	o = (uchar*)&order;
+	for(i = 0; i < sizeof(long); i++)
+		t[i] = f[o[i]];
+	return t+sizeof(long);
+}
+
+char *Ebadtimectl = "bad time control";
+
+/*
+ *  like the old #c/time but with added info.  Return
+ *
+ *	secs	nanosecs	fastticks	fasthz
+ */
+static int
+readtime(ulong off, char *buf, int n)
+{
+	vlong	nsec, ticks;
+	long sec;
+	char str[7*NUMSIZE];
+
+	nsec = todget(&ticks);
+	if(fasthz == 0LL)
+		fastticks((uvlong*)&fasthz);
+	sec = nsec/1000000000ULL;
+	snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud ",
+		NUMSIZE-1, sec,
+		VLNUMSIZE-1, nsec,
+		VLNUMSIZE-1, ticks,
+		VLNUMSIZE-1, fasthz);
+	return readstr(off, buf, n, str);
+}
+
+/*
+ *  set the time in seconds
+ */
+static int
+writetime(char *buf, int n)
+{
+	char b[13];
+	long i;
+	vlong now;
+
+	if(n >= sizeof(b))
+		error(Ebadtimectl);
+	strncpy(b, buf, n);
+	b[n] = 0;
+	i = strtol(b, 0, 0);
+	if(i <= 0)
+		error(Ebadtimectl);
+	now = i*1000000000LL;
+	todset(now, 0, 0);
+	return n;
+}
+
+/*
+ *  read binary time info.  all numbers are little endian.
+ *  ticks and nsec are syncronized.
+ */
+static int
+readbintime(char *buf, int n)
+{
+	int i;
+	vlong nsec, ticks;
+	uchar *b = (uchar*)buf;
+
+	i = 0;
+	if(fasthz == 0LL)
+		fastticks((uvlong*)&fasthz);
+	nsec = todget(&ticks);
+	if(n >= 3*sizeof(uvlong)){
+		vlong2le(b+2*sizeof(uvlong), fasthz);
+		i += sizeof(uvlong);
+	}
+	if(n >= 2*sizeof(uvlong)){
+		vlong2le(b+sizeof(uvlong), ticks);
+		i += sizeof(uvlong);
+	}
+	if(n >= 8){
+		vlong2le(b, nsec);
+		i += sizeof(vlong);
+	}
+	return i;
+}
+
+/*
+ *  set any of the following
+ *	- time in nsec
+ *	- nsec trim applied over some seconds
+ *	- clock frequency
+ */
+static int
+writebintime(char *buf, int n)
+{
+	uchar *p;
+	vlong delta;
+	long period;
+
+	n--;
+	p = (uchar*)buf + 1;
+	switch(*buf){
+	case 'n':
+		if(n < sizeof(vlong))
+			error(Ebadtimectl);
+		le2vlong(&delta, p);
+		todset(delta, 0, 0);
+		break;
+	case 'd':
+		if(n < sizeof(vlong)+sizeof(long))
+			error(Ebadtimectl);
+		p = le2vlong(&delta, p);
+		le2long(&period, p);
+		todset(-1, delta, period);
+		break;
+	case 'f':
+		if(n < sizeof(uvlong))
+			error(Ebadtimectl);
+		le2vlong(&fasthz, p);
+		if(fasthz <= 0)
+			error(Ebadtimectl);
+		todsetfreq(fasthz);
+		break;
+	}
+	return n;
+}

+ 528 - 0
sys/src/9/teg2/devether.c

@@ -0,0 +1,528 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+
+#include "../port/netif.h"
+#include "etherif.h"
+
+static Ether *etherxx[MaxEther];
+
+Chan*
+etherattach(char* spec)
+{
+	int ctlrno;
+	char *p;
+	Chan *chan;
+
+	ctlrno = 0;
+	if(spec && *spec){
+		ctlrno = strtoul(spec, &p, 0);
+		if((ctlrno == 0 && p == spec) || *p != 0)
+			error(Ebadarg);
+		if(ctlrno < 0 || ctlrno >= MaxEther)
+			error(Ebadarg);
+	}
+	if(etherxx[ctlrno] == 0)
+		error(Enodev);
+
+	chan = devattach('l', spec);
+	if(waserror()){
+		chanfree(chan);
+		nexterror();
+	}
+	chan->dev = ctlrno;
+	if(etherxx[ctlrno]->attach)
+		etherxx[ctlrno]->attach(etherxx[ctlrno]);
+	poperror();
+	return chan;
+}
+
+static Walkqid*
+etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
+{
+	return netifwalk(etherxx[chan->dev], chan, nchan, name, nname);
+}
+
+static int
+etherstat(Chan* chan, uchar* dp, int n)
+{
+	return netifstat(etherxx[chan->dev], chan, dp, n);
+}
+
+static Chan*
+etheropen(Chan* chan, int omode)
+{
+	return netifopen(etherxx[chan->dev], chan, omode);
+}
+
+static void
+ethercreate(Chan*, char*, int, ulong)
+{
+}
+
+static void
+etherclose(Chan* chan)
+{
+	netifclose(etherxx[chan->dev], chan);
+}
+
+static long
+etherread(Chan* chan, void* buf, long n, vlong off)
+{
+	Ether *ether;
+	ulong offset = off;
+
+	ether = etherxx[chan->dev];
+	if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
+		/*
+		 * With some controllers it is necessary to reach
+		 * into the chip to extract statistics.
+		 */
+		if(NETTYPE(chan->qid.path) == Nifstatqid)
+			return ether->ifstat(ether, buf, n, offset);
+		else if(NETTYPE(chan->qid.path) == Nstatqid)
+			ether->ifstat(ether, buf, 0, offset);
+	}
+
+	return netifread(ether, chan, buf, n, offset);
+}
+
+static Block*
+etherbread(Chan* chan, long n, ulong offset)
+{
+	return netifbread(etherxx[chan->dev], chan, n, offset);
+}
+
+static int
+etherwstat(Chan* chan, uchar* dp, int n)
+{
+	return netifwstat(etherxx[chan->dev], chan, dp, n);
+}
+
+static void
+etherrtrace(Netfile* f, Etherpkt* pkt, int len)
+{
+	int i, n;
+	Block *bp;
+
+	if(qwindow(f->in) <= 0)
+		return;
+	if(len > 58)
+		n = 58;
+	else
+		n = len;
+	bp = iallocb(64);
+	if(bp == nil)
+		return;
+	memmove(bp->wp, pkt->d, n);
+	i = TK2MS(MACHP(0)->ticks);
+	bp->wp[58] = len>>8;
+	bp->wp[59] = len;
+	bp->wp[60] = i>>24;
+	bp->wp[61] = i>>16;
+	bp->wp[62] = i>>8;
+	bp->wp[63] = i;
+	bp->wp += 64;
+	qpass(f->in, bp);
+}
+
+Block*
+etheriq(Ether* ether, Block* bp, int fromwire)
+{
+	Etherpkt *pkt;
+	ushort type;
+	int len, multi, tome, fromme;
+	Netfile **ep, *f, **fp, *fx;
+	Block *xbp;
+
+	ether->inpackets++;
+
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	type = (pkt->type[0]<<8)|pkt->type[1];
+	fx = 0;
+	ep = &ether->f[Ntypes];
+
+	multi = pkt->d[0] & 1;
+	/* check for valid multicast addresses */
+	if(multi && memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) != 0 &&
+	    ether->prom == 0){
+		if(!activemulti(ether, pkt->d, sizeof(pkt->d))){
+			if(fromwire){
+				freeb(bp);
+				bp = 0;
+			}
+			return bp;
+		}
+	}
+	/* is it for me? */
+	tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
+
+	/*
+	 * Multiplex the packet to all the connections which want it.
+	 * If the packet is not to be used subsequently (fromwire != 0),
+	 * attempt to simply pass it into one of the connections, thereby
+	 * saving a copy of the data (usual case hopefully).
+	 */
+	for(fp = ether->f; fp < ep; fp++){
+		if((f = *fp) != nil && (f->type == type || f->type < 0) &&
+		    (tome || multi || f->prom)){
+			/* Don't want to hear bridged packets */
+			if(f->bridge && !fromwire && !fromme)
+				continue;
+			if(!f->headersonly){
+				if(fromwire && fx == 0)
+					fx = f;
+				else if(xbp = iallocb(len)){
+					memmove(xbp->wp, pkt, len);
+					xbp->wp += len;
+					if(qpass(f->in, xbp) < 0)
+						ether->soverflows++;
+				}
+				else
+					ether->soverflows++;
+			}
+			else
+				etherrtrace(f, pkt, len);
+		}
+	}
+
+	if(fx){
+		if(qpass(fx->in, bp) < 0)
+			ether->soverflows++;
+		return 0;
+	}
+	if(fromwire){
+		freeb(bp);
+		return 0;
+	}
+	return bp;
+}
+
+static int
+etheroq(Ether* ether, Block* bp)
+{
+	int len, loopback, s;
+	Etherpkt *pkt;
+
+	ether->outpackets++;
+
+	/*
+	 * Check if the packet has to be placed back onto the input queue,
+	 * i.e. if it's a loopback or broadcast packet or the interface is
+	 * in promiscuous mode.
+	 * If it's a loopback packet indicate to etheriq that the data isn't
+	 * needed and return, etheriq will pass-on or free the block.
+	 * To enable bridging to work, only packets that were originated
+	 * by this interface are fed back.
+	 */
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	if(loopback || memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) == 0 || ether->prom){
+		s = splhi();
+		etheriq(ether, bp, 0);
+		splx(s);
+	}
+
+	if(!loopback){
+		qbwrite(ether->oq, bp);
+		if(ether->transmit != nil)
+			ether->transmit(ether);
+	} else
+		freeb(bp);
+
+	return len;
+}
+
+static long
+etherwrite(Chan* chan, void* buf, long n, vlong)
+{
+	Ether *ether;
+	Block *bp;
+	int nn, onoff;
+	Cmdbuf *cb;
+
+	ether = etherxx[chan->dev];
+	if(NETTYPE(chan->qid.path) != Ndataqid) {
+		nn = netifwrite(ether, chan, buf, n);
+		if(nn >= 0)
+			return nn;
+		cb = parsecmd(buf, n);
+		if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){
+			if(cb->nf <= 1)
+				onoff = 1;
+			else
+				onoff = atoi(cb->f[1]);
+			qnoblock(ether->oq, onoff);
+			free(cb);
+			return n;
+		}
+		free(cb);
+		if(ether->ctl!=nil)
+			return ether->ctl(ether,buf,n);
+			
+		error(Ebadctl);
+	}
+
+	if(n > ether->maxmtu)
+		error(Etoobig);
+	if(n < ether->minmtu)
+		error(Etoosmall);
+
+	bp = allocb(n);
+	if(waserror()){
+		freeb(bp);
+		nexterror();
+	}
+	memmove(bp->rp, buf, n);
+	memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
+	poperror();
+	bp->wp += n;
+
+	return etheroq(ether, bp);
+}
+
+static long
+etherbwrite(Chan* chan, Block* bp, ulong)
+{
+	Ether *ether;
+	long n;
+
+	n = BLEN(bp);
+	if(NETTYPE(chan->qid.path) != Ndataqid){
+		if(waserror()) {
+			freeb(bp);
+			nexterror();
+		}
+		n = etherwrite(chan, bp->rp, n, 0);
+		poperror();
+		freeb(bp);
+		return n;
+	}
+	ether = etherxx[chan->dev];
+
+	if(n > ether->maxmtu){
+		freeb(bp);
+		error(Etoobig);
+	}
+	if(n < ether->minmtu){
+		freeb(bp);
+		error(Etoosmall);
+	}
+
+	return etheroq(ether, bp);
+}
+
+static struct {
+	char*	type;
+	int	(*reset)(Ether*);
+} cards[MaxEther+1];
+
+void
+addethercard(char* t, int (*r)(Ether*))
+{
+	static int ncard;
+
+	if(ncard == MaxEther)
+		panic("too many ether cards");
+	cards[ncard].type = t;
+	cards[ncard].reset = r;
+	ncard++;
+}
+
+int
+parseether(uchar *to, char *from)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	for(i = 0; i < Eaddrlen; i++){
+		if(*p == 0)
+			return -1;
+		nip[0] = *p++;
+		if(*p == 0)
+			return -1;
+		nip[1] = *p++;
+		nip[2] = 0;
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return 0;
+}
+
+static void
+etherreset(void)
+{
+	Ether *ether;
+	int i, n, ctlrno;
+	char name[KNAMELEN], buf[128];
+
+	for(ether = 0, ctlrno = 0; ctlrno < MaxEther; ctlrno++){
+		if(ether == 0)
+			ether = malloc(sizeof(Ether));
+		memset(ether, 0, sizeof(Ether));
+		ether->ctlrno = ctlrno;
+		ether->mbps = 10;
+		ether->minmtu = ETHERMINTU;
+		ether->maxmtu = ETHERMAXTU;
+
+		if(archether(ctlrno, ether) <= 0)
+			continue;
+
+		if(isaconfig("ether", ctlrno, ether) == 0){
+			free(ether);
+//			return nil;
+			continue;
+		}
+		for(n = 0; cards[n].type; n++){
+			if(cistrcmp(cards[n].type, ether->type))
+				continue;
+			for(i = 0; i < ether->nopt; i++)
+				if(cistrncmp(ether->opt[i], "ea=", 3) == 0){
+					if(parseether(ether->ea,
+					    &ether->opt[i][3]) == -1)
+						memset(ether->ea, 0, Eaddrlen);
+				} else if(cistrcmp(ether->opt[i],
+				    "100BASE-TXFD") == 0)
+					ether->mbps = 100;
+			if(cards[n].reset(ether))
+				break;
+			snprint(name, sizeof(name), "ether%d", ctlrno);
+
+			if(ether->interrupt != nil && ether->irq >= 0)
+				intrenable(ether->irq, ether->interrupt,
+					ether, 0, name);
+
+			i = snprint(buf, sizeof buf,
+				"#l%d: %s: %dMbps port %#lux irq %d",
+				ctlrno, ether->type, ether->mbps, ether->port,
+				ether->irq);
+			if(ether->mem)
+				i += snprint(buf+i, sizeof buf - i,
+					" addr %#lux", PADDR(ether->mem));
+			if(ether->size)
+				i += snprint(buf+i, sizeof buf - i,
+					" size %#luX", ether->size);
+			i += snprint(buf+i, sizeof buf - i,
+				": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
+				ether->ea[0], ether->ea[1], ether->ea[2],
+				ether->ea[3], ether->ea[4], ether->ea[5]);
+			snprint(buf+i, sizeof buf - i, "\n");
+			iprint("%s", buf);  /* it may be too early for print */
+
+			if(ether->mbps >= 1000)
+				netifinit(ether, name, Ntypes, 4*1024*1024);
+			else if(ether->mbps >= 100)
+				netifinit(ether, name, Ntypes, 1024*1024);
+			else
+				netifinit(ether, name, Ntypes, 65*1024);
+			if(ether->oq == 0)
+				ether->oq = qopen(ether->limit, Qmsg, 0, 0);
+			if(ether->oq == 0)
+				panic("etherreset %s", name);
+			ether->alen = Eaddrlen;
+			memmove(ether->addr, ether->ea, Eaddrlen);
+			memset(ether->bcast, 0xFF, Eaddrlen);
+
+			etherxx[ctlrno] = ether;
+			ether = 0;
+			break;
+		}
+	}
+	if(ether)
+		free(ether);
+}
+
+static void
+ethershutdown(void)
+{
+	Ether *ether;
+	int i;
+
+	for(i = 0; i < MaxEther; i++){
+		ether = etherxx[i];
+		if(ether == nil)
+			continue;
+		if(ether->shutdown == nil) {
+			print("#l%d: no shutdown function\n", i);
+			continue;
+		}
+		(*ether->shutdown)(ether);
+	}
+}
+
+
+#define POLY 0xedb88320
+
+/* really slow 32 bit crc for ethers */
+ulong
+ethercrc(uchar *p, int len)
+{
+	int i, j;
+	ulong crc, b;
+
+	crc = 0xffffffff;
+	for(i = 0; i < len; i++){
+		b = *p++;
+		for(j = 0; j < 8; j++){
+			crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
+			b >>= 1;
+		}
+	}
+	return crc;
+}
+
+void
+dumpoq(Queue *oq)
+{
+	if (oq == nil)
+		print("no outq! ");
+	else if (qisclosed(oq))
+		print("outq closed ");
+	else if (qfull(oq))
+		print("outq full ");
+	else
+		print("outq %d ", qlen(oq));
+}
+
+void
+dumpnetif(Netif *netif)
+{
+	print("netif %s ", netif->name);
+	print("limit %d mbps %d link %d ",
+		netif->limit, netif->mbps, netif->link);
+	print("inpkts %lld outpkts %lld errs %d\n",
+		netif->inpackets, netif->outpackets,
+		netif->crcs + netif->oerrs + netif->frames + netif->overflows +
+		netif->buffs + netif->soverflows);
+}
+
+Dev etherdevtab = {
+	'l',
+	"ether",
+
+	etherreset,
+	devinit,
+	ethershutdown,
+	etherattach,
+	etherwalk,
+	etherstat,
+	etheropen,
+	ethercreate,
+	etherclose,
+	etherread,
+	etherbread,
+	etherwrite,
+	etherbwrite,
+	devremove,
+	etherwstat,
+};

+ 796 - 0
sys/src/9/teg2/devuart.c

@@ -0,0 +1,796 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+#include	"../port/netif.h"
+
+enum
+{
+	/* soft flow control chars */
+	CTLS= 023,
+	CTLQ= 021,
+};
+
+extern Dev uartdevtab;
+extern PhysUart* physuart[];
+
+static Uart* uartlist;
+static Uart** uart;
+static int uartnuart;
+static Dirtab *uartdir;
+static int uartndir;
+static Timer *uarttimer;
+
+struct Uartalloc {
+	Lock;
+	Uart *elist;	/* list of enabled interfaces */
+} uartalloc;
+
+static void	uartclock(void);
+static void	uartflow(void*);
+
+/*
+ *  enable/disable uart and add/remove to list of enabled uarts
+ */
+//static
+Uart*
+uartenable(Uart *p)
+{
+	Uart **l;
+
+	if (up == nil)
+		return p;		/* too soon; try again later */
+//		return nil;
+
+	if(p->iq == nil){
+		if((p->iq = qopen(8*1024, 0, uartflow, p)) == nil)
+			return nil;
+	}
+	else
+		qreopen(p->iq);
+	if(p->oq == nil){
+		if((p->oq = qopen(8*1024, 0, uartkick, p)) == nil){
+			qfree(p->iq);
+			p->iq = nil;
+			return nil;
+		}
+	}
+	else
+		qreopen(p->oq);
+
+	p->ir = p->istage;
+	p->iw = p->istage;
+	p->ie = &p->istage[Stagesize];
+	p->op = p->ostage;
+	p->oe = p->ostage;
+
+	p->hup_dsr = p->hup_dcd = 0;
+	p->dsr = p->dcd = 0;
+
+	/* assume we can send */
+	p->cts = 1;
+	p->ctsbackoff = 0;
+
+	if (up) {
+		if(p->bits == 0)
+			uartctl(p, "l8");
+		if(p->stop == 0)
+			uartctl(p, "s1");
+		if(p->parity == 0)
+			uartctl(p, "pn");
+		if(p->baud == 0)
+			uartctl(p, "b9600");
+		(*p->phys->enable)(p, 1);
+	}
+
+	/*
+	 * use ilock because uartclock can otherwise interrupt here
+	 * and would hang on an attempt to lock uartalloc.
+	 */
+	ilock(&uartalloc);
+	for(l = &uartalloc.elist; *l; l = &(*l)->elist){
+		if(*l == p)
+			break;
+	}
+	if(*l == 0){
+		p->elist = uartalloc.elist;
+		uartalloc.elist = p;
+	}
+	p->enabled = 1;
+	iunlock(&uartalloc);
+
+	return p;
+}
+
+static void
+uartdisable(Uart *p)
+{
+	Uart **l;
+
+	(*p->phys->disable)(p);
+
+	ilock(&uartalloc);
+	for(l = &uartalloc.elist; *l; l = &(*l)->elist){
+		if(*l == p){
+			*l = p->elist;
+			break;
+		}
+	}
+	p->enabled = 0;
+	iunlock(&uartalloc);
+}
+
+void
+uartmouse(Uart* p, int (*putc)(Queue*, int), int setb1200)
+{
+	qlock(p);
+	if(p->opens++ == 0 && uartenable(p) == nil){
+		qunlock(p);
+		error(Enodev);
+	}
+	if(setb1200)
+		uartctl(p, "b1200");
+	p->putc = putc;
+	p->special = 1;
+	qunlock(p);
+}
+
+void
+uartsetmouseputc(Uart* p, int (*putc)(Queue*, int))
+{
+	qlock(p);
+	if(p->opens == 0 || p->special == 0){
+		qunlock(p);
+		error(Enodev);
+	}
+	p->putc = putc;
+	qunlock(p);
+}
+
+static void
+setlength(int i)
+{
+	Uart *p;
+
+	if(i > 0){
+		p = uart[i];
+		if(p && p->opens && p->iq)
+			uartdir[1+3*i].length = qlen(p->iq);
+	} else for(i = 0; i < uartnuart; i++){
+		p = uart[i];
+		if(p && p->opens && p->iq)
+			uartdir[1+3*i].length = qlen(p->iq);
+	}
+}
+
+/*
+ *  set up the '#t' directory
+ */
+static void
+uartreset(void)
+{
+	int i;
+	Dirtab *dp;
+	Uart *p, *tail;
+
+	tail = nil;
+	for(i = 0; physuart[i] != nil; i++){
+		if(physuart[i]->pnp == nil)
+			continue;
+		if((p = physuart[i]->pnp()) == nil)
+			continue;
+		if(uartlist != nil)
+			tail->next = p;
+		else
+			uartlist = p;
+		for(tail = p; tail->next != nil; tail = tail->next)
+			uartnuart++;
+		uartnuart++;
+	}
+
+	if(uartnuart)
+		uart = xalloc(uartnuart*sizeof(Uart*));
+
+	uartndir = 1 + 3*uartnuart;
+	uartdir = xalloc(uartndir * sizeof(Dirtab));
+	if (uart == nil || uartdir == nil)
+		panic("uartreset: no memory");
+	dp = uartdir;
+	strcpy(dp->name, ".");
+	mkqid(&dp->qid, 0, 0, QTDIR);
+	dp->length = 0;
+	dp->perm = DMDIR|0555;
+	dp++;
+	p = uartlist;
+	for(i = 0; i < uartnuart; i++){
+		/* 3 directory entries per port */
+		snprint(dp->name, sizeof dp->name, "eia%d", i);
+		dp->qid.path = NETQID(i, Ndataqid);
+		dp->perm = 0660;
+		dp++;
+		snprint(dp->name, sizeof dp->name, "eia%dctl", i);
+		dp->qid.path = NETQID(i, Nctlqid);
+		dp->perm = 0660;
+		dp++;
+		snprint(dp->name, sizeof dp->name, "eia%dstatus", i);
+		dp->qid.path = NETQID(i, Nstatqid);
+		dp->perm = 0444;
+		dp++;
+
+		uart[i] = p;
+		p->dev = i;
+		if(p->console || p->special){
+			if(uartenable(p) != nil){
+				if(p->console && up){
+					kbdq = p->iq;
+					serialoq = p->oq;
+					p->putc = kbdcr2nl;
+				}
+				p->opens++;
+			}
+		}
+		p = p->next;
+	}
+
+	if(uartnuart){
+		/*
+		 * at 115200 baud, the 1024 char buffer takes 56 ms to process,
+		 * processing it every 22 ms should be fine.
+		 */
+		uarttimer = addclock0link(uartclock, 22);
+	}
+}
+
+
+static Chan*
+uartattach(char *spec)
+{
+	return devattach('t', spec);
+}
+
+static Walkqid*
+uartwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, uartdir, uartndir, devgen);
+}
+
+static int
+uartstat(Chan *c, uchar *dp, int n)
+{
+	if(NETTYPE(c->qid.path) == Ndataqid)
+		setlength(NETID(c->qid.path));
+	return devstat(c, dp, n, uartdir, uartndir, devgen);
+}
+
+static Chan*
+uartopen(Chan *c, int omode)
+{
+	Uart *p;
+
+	c = devopen(c, omode, uartdir, uartndir, devgen);
+
+	switch(NETTYPE(c->qid.path)){
+	case Nctlqid:
+	case Ndataqid:
+		p = uart[NETID(c->qid.path)];
+		qlock(p);
+		if(p->opens++ == 0 && uartenable(p) == nil){
+			qunlock(p);
+			c->flag &= ~COPEN;
+			error(Enodev);
+		}
+		qunlock(p);
+		break;
+	}
+
+	c->iounit = qiomaxatomic;
+	return c;
+}
+
+static int
+uartdrained(void* arg)
+{
+	Uart *p;
+
+	p = arg;
+	return qlen(p->oq) == 0 && p->op == p->oe;
+}
+
+static void
+uartdrainoutput(Uart *p)
+{
+	if(!p->enabled || up == nil)
+		return;
+
+	p->drain = 1;
+	if(waserror()){
+		p->drain = 0;
+		nexterror();
+	}
+	sleep(&p->r, uartdrained, p);
+	poperror();
+}
+
+static void
+uartclose(Chan *c)
+{
+	Uart *p;
+
+	if(c->qid.type & QTDIR)
+		return;
+	if((c->flag & COPEN) == 0)
+		return;
+	switch(NETTYPE(c->qid.path)){
+	case Ndataqid:
+	case Nctlqid:
+		p = uart[NETID(c->qid.path)];
+		qlock(p);
+		if(--(p->opens) == 0){
+			qclose(p->iq);
+			ilock(&p->rlock);
+			p->ir = p->iw = p->istage;
+			iunlock(&p->rlock);
+
+			/*
+			 */
+			qhangup(p->oq, nil);
+			if(!waserror()){
+				uartdrainoutput(p);
+				poperror();
+			}
+			qclose(p->oq);
+			uartdisable(p);
+			p->dcd = p->dsr = p->dohup = 0;
+		}
+		qunlock(p);
+		break;
+	}
+}
+
+static long
+uartread(Chan *c, void *buf, long n, vlong off)
+{
+	Uart *p;
+	ulong offset = off;
+
+	if(c->qid.type & QTDIR){
+		setlength(-1);
+		return devdirread(c, buf, n, uartdir, uartndir, devgen);
+	}
+
+	p = uart[NETID(c->qid.path)];
+	switch(NETTYPE(c->qid.path)){
+	case Ndataqid:
+		return qread(p->iq, buf, n);
+	case Nctlqid:
+		return readnum(offset, buf, n, NETID(c->qid.path), NUMSIZE);
+	case Nstatqid:
+		return (*p->phys->status)(p, buf, n, offset);
+	}
+
+	return 0;
+}
+
+int
+uartctl(Uart *p, char *cmd)
+{
+	char *f[16];
+	int i, n, nf;
+
+	nf = tokenize(cmd, f, nelem(f));
+	for(i = 0; i < nf; i++){
+		if(strncmp(f[i], "break", 5) == 0){
+			(*p->phys->dobreak)(p, 0);
+			continue;
+		}
+
+		n = atoi(f[i]+1);
+		switch(*f[i]){
+		case 'B':
+		case 'b':
+			uartdrainoutput(p);
+			if((*p->phys->baud)(p, n) < 0)
+				return -1;
+			break;
+		case 'C':
+		case 'c':
+			p->hup_dcd = n;
+			break;
+		case 'D':
+		case 'd':
+			uartdrainoutput(p);
+			(*p->phys->dtr)(p, n);
+			break;
+		case 'E':
+		case 'e':
+			p->hup_dsr = n;
+			break;
+		case 'f':
+		case 'F':
+			if(p->oq != nil)
+				qflush(p->oq);
+			break;
+		case 'H':
+		case 'h':
+			if(p->iq != nil)
+				qhangup(p->iq, 0);
+			if(p->oq != nil)
+				qhangup(p->oq, 0);
+			break;
+		case 'i':
+		case 'I':
+			uartdrainoutput(p);
+			(*p->phys->fifo)(p, n);
+			break;
+		case 'K':
+		case 'k':
+			uartdrainoutput(p);
+			(*p->phys->dobreak)(p, n);
+			break;
+		case 'L':
+		case 'l':
+			uartdrainoutput(p);
+			if((*p->phys->bits)(p, n) < 0)
+				return -1;
+			break;
+		case 'm':
+		case 'M':
+			uartdrainoutput(p);
+			(*p->phys->modemctl)(p, n);
+			break;
+		case 'n':
+		case 'N':
+			if(p->oq != nil)
+				qnoblock(p->oq, n);
+			break;
+		case 'P':
+		case 'p':
+			uartdrainoutput(p);
+			if((*p->phys->parity)(p, *(f[i]+1)) < 0)
+				return -1;
+			break;
+		case 'Q':
+		case 'q':
+			if(p->iq != nil)
+				qsetlimit(p->iq, n);
+			if(p->oq != nil)
+				qsetlimit(p->oq, n);
+			break;
+		case 'R':
+		case 'r':
+			uartdrainoutput(p);
+			(*p->phys->rts)(p, n);
+			break;
+		case 'S':
+		case 's':
+			uartdrainoutput(p);
+			if((*p->phys->stop)(p, n) < 0)
+				return -1;
+			break;
+		case 'W':
+		case 'w':
+			if(uarttimer == nil || n < 1)
+				return -1;
+			uarttimer->tns = (vlong)n * 100000LL;
+			break;
+		case 'X':
+		case 'x':
+			if(p->enabled){
+				ilock(&p->tlock);
+				p->xonoff = n;
+				iunlock(&p->tlock);
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static long
+uartwrite(Chan *c, void *buf, long n, vlong)
+{
+	Uart *p;
+	char *cmd;
+
+	if(c->qid.type & QTDIR)
+		error(Eperm);
+
+	p = uart[NETID(c->qid.path)];
+
+	switch(NETTYPE(c->qid.path)){
+	case Ndataqid:
+		qlock(p);
+		if(waserror()){
+			qunlock(p);
+			nexterror();
+		}
+
+		n = qwrite(p->oq, buf, n);
+
+		qunlock(p);
+		poperror();
+		break;
+	case Nctlqid:
+		cmd = malloc(n+1);
+		memmove(cmd, buf, n);
+		cmd[n] = 0;
+		qlock(p);
+		if(waserror()){
+			qunlock(p);
+			free(cmd);
+			nexterror();
+		}
+
+		/* let output drain */
+		if(uartctl(p, cmd) < 0)
+			error(Ebadarg);
+
+		qunlock(p);
+		poperror();
+		free(cmd);
+		break;
+	}
+
+	return n;
+}
+
+static int
+uartwstat(Chan *c, uchar *dp, int n)
+{
+	Dir d;
+	Dirtab *dt;
+
+	if(!iseve())
+		error(Eperm);
+	if(QTDIR & c->qid.type)
+		error(Eperm);
+	if(NETTYPE(c->qid.path) == Nstatqid)
+		error(Eperm);
+
+	dt = &uartdir[1 + 3 * NETID(c->qid.path)];
+	n = convM2D(dp, n, &d, nil);
+	if(n == 0)
+		error(Eshortstat);
+	if(d.mode != ~0UL)
+		dt[0].perm = dt[1].perm = d.mode;
+	return n;
+}
+
+void
+uartpower(int on)
+{
+	Uart *p;
+
+	for(p = uartlist; p != nil; p = p->next) {
+		if(p->phys->power)
+			(*p->phys->power)(p, on);
+	}
+}
+
+Dev uartdevtab = {
+	't',
+	"uart",
+
+	uartreset,
+	devinit,
+	devshutdown,
+	uartattach,
+	uartwalk,
+	uartstat,
+	uartopen,
+	devcreate,
+	uartclose,
+	uartread,
+	devbread,
+	uartwrite,
+	devbwrite,
+	devremove,
+	uartwstat,
+	uartpower,
+};
+
+/*
+ *  restart input if it's off
+ */
+static void
+uartflow(void *v)
+{
+	Uart *p;
+
+	p = v;
+	if(p->modem)
+		(*p->phys->rts)(p, 1);
+}
+
+/*
+ *  put some bytes into the local queue to avoid calling
+ *  qconsume for every character
+ */
+int
+uartstageoutput(Uart *p)
+{
+	int n;
+
+	n = qconsume(p->oq, p->ostage, Stagesize);
+	if(n <= 0)
+//		n = 0;			/* experiment */
+		return 0;
+	p->op = p->ostage;
+	p->oe = p->ostage + n;
+	return n;
+}
+
+/*
+ *  restart output
+ */
+void
+uartkick(void *v)
+{
+	Uart *p = v;
+
+	if(p->blocked)
+		return;
+
+	ilock(&p->tlock);
+	(*p->phys->kick)(p);
+	iunlock(&p->tlock);
+
+	if(p->drain && uartdrained(p)){
+		p->drain = 0;
+		wakeup(&p->r);
+	}
+}
+
+/*
+ * Move data from the interrupt staging area to
+ * the input Queue.
+ */
+static void
+uartstageinput(Uart *p)
+{
+	int n;
+	uchar *ir, *iw;
+
+	while(p->ir != p->iw){
+		ir = p->ir;
+		if(p->ir > p->iw){
+			iw = p->ie;
+			p->ir = p->istage;
+		}
+		else{
+			iw = p->iw;
+			p->ir = p->iw;
+		}
+		if((n = qproduce(p->iq, ir, iw - ir)) < 0){
+			p->serr++;
+			(*p->phys->rts)(p, 0);
+		}
+		else if(n == 0)
+			p->berr++;
+	}
+}
+
+/*
+ *  receive a character at interrupt time
+ */
+void
+uartrecv(Uart *p,  char ch)
+{
+	uchar *next;
+
+	/* software flow control */
+	if(p->xonoff){
+		if(ch == CTLS){
+			p->blocked = 1;
+		}else if(ch == CTLQ){
+			p->blocked = 0;
+			p->ctsbackoff = 2; /* clock gets output going again */
+		}
+	}
+
+	/* receive the character */
+	if(p->putc)
+		p->putc(p->iq, ch);
+	else if (p->iw) {		/* maybe the line isn't enabled yet */
+		ilock(&p->rlock);
+		next = p->iw + 1;
+		if(next == p->ie)
+			next = p->istage;
+		if(next == p->ir)
+			uartstageinput(p);
+		if(next != p->ir){
+			*p->iw = ch;
+			p->iw = next;
+		}
+		iunlock(&p->rlock);
+	}
+}
+
+/*
+ *  we save up input characters till clock time to reduce
+ *  per character interrupt overhead.
+ */
+static void
+uartclock(void)
+{
+	Uart *p;
+
+	ilock(&uartalloc);
+	for(p = uartalloc.elist; p; p = p->elist){
+
+		/* this hopefully amortizes cost of qproduce to many chars */
+		if(p->iw != p->ir){
+			ilock(&p->rlock);
+			uartstageinput(p);
+			iunlock(&p->rlock);
+		}
+
+		/* hang up if requested */
+		if(p->dohup){
+			qhangup(p->iq, 0);
+			qhangup(p->oq, 0);
+			p->dohup = 0;
+		}
+
+		/* this adds hysteresis to hardware/software flow control */
+		if(p->ctsbackoff){
+			ilock(&p->tlock);
+			if(p->ctsbackoff){
+				if(--(p->ctsbackoff) == 0)
+					(*p->phys->kick)(p);
+			}
+			iunlock(&p->tlock);
+		}
+		uartkick(p);		/* keep it moving */
+	}
+	iunlock(&uartalloc);
+}
+
+/*
+ * polling console input, output
+ */
+
+Uart* consuart;
+
+int
+uartgetc(void)
+{
+	if(consuart == nil || consuart->phys->getc == nil)
+		return -1;
+	return consuart->phys->getc(consuart);
+}
+
+void
+uartputc(int c)
+{
+	char c2;
+
+	if(consuart == nil || consuart->phys->putc == nil) {
+		c2 = c;
+		_uartputs(&c2, 1);
+		return;
+	}
+	consuart->phys->putc(consuart, c);
+}
+
+void
+uartputs(char *s, int n)
+{
+	char *e;
+
+	if(consuart == nil || consuart->phys->putc == nil) {
+		_uartputs(s, n);
+		return;
+	}
+
+	e = s+n;
+	for(; s<e; s++){
+		if(*s == '\n')
+			consuart->phys->putc(consuart, '\r');
+		consuart->phys->putc(consuart, *s);
+	}
+}

+ 1675 - 0
sys/src/9/teg2/ether8169.c

@@ -0,0 +1,1675 @@
+/*
+ * Realtek RTL8110/8168/8169 Gigabit Ethernet Controllers.
+ * There are some magic register values used which are not described in
+ * any datasheet or driver but seem to be necessary.
+ * There are slight differences between the chips in the series so some
+ * tweaks may be needed.
+ *
+ * we use l1 and l2 cache ops; data must reach ram for dma.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+
+typedef struct Ctlr Ctlr;
+typedef struct D D;			/* Transmit/Receive Descriptor */
+typedef struct Dtcc Dtcc;
+
+enum {
+	Debug = 0,  /* beware: > 1 interferes with correct operation */
+};
+
+enum {					/* registers */
+	Idr0		= 0x00,		/* MAC address */
+	Mar0		= 0x08,		/* Multicast address */
+	Dtccr		= 0x10,		/* Dump Tally Counter Command */
+	Tnpds		= 0x20,		/* Transmit Normal Priority Descriptors */
+	Thpds		= 0x28,		/* Transmit High Priority Descriptors */
+	Flash		= 0x30,		/* Flash Memory Read/Write */
+	Erbcr		= 0x34,		/* Early Receive Byte Count */
+	Ersr		= 0x36,		/* Early Receive Status */
+	Cr		= 0x37,		/* Command Register */
+	Tppoll		= 0x38,		/* Transmit Priority Polling */
+	Imr		= 0x3C,		/* Interrupt Mask */
+	Isr		= 0x3E,		/* Interrupt Status */
+	Tcr		= 0x40,		/* Transmit Configuration */
+	Rcr		= 0x44,		/* Receive Configuration */
+	Tctr		= 0x48,		/* Timer Count */
+	Mpc		= 0x4C,		/* Missed Packet Counter */
+	Cr9346		= 0x50,		/* 9346 Command Register */
+	Config0		= 0x51,		/* Configuration Register 0 */
+	Config1		= 0x52,		/* Configuration Register 1 */
+	Config2		= 0x53,		/* Configuration Register 2 */
+	Config3		= 0x54,		/* Configuration Register 3 */
+	Config4		= 0x55,		/* Configuration Register 4 */
+	Config5		= 0x56,		/* Configuration Register 5 */
+	Timerint	= 0x58,		/* Timer Interrupt */
+	Mulint		= 0x5C,		/* Multiple Interrupt Select */
+	Phyar		= 0x60,		/* PHY Access */
+	Tbicsr0		= 0x64,		/* TBI Control and Status */
+	Tbianar		= 0x68,		/* TBI Auto-Negotiation Advertisment */
+	Tbilpar		= 0x6A,		/* TBI Auto-Negotiation Link Partner */
+	Phystatus	= 0x6C,		/* PHY Status */
+
+	Rms		= 0xDA,		/* Receive Packet Maximum Size */
+	Cplusc		= 0xE0,		/* C+ Command */
+	Coal		= 0xE2,		/* Interrupt Mitigation (Coalesce) */
+	Rdsar		= 0xE4,		/* Receive Descriptor Start Address */
+	Etx		= 0xEC,		/* 8169: Early Tx Threshold; 32-byte units */
+	Mtps		= 0xEC,		/* 8168: Maximum Transmit Packet Size */
+};
+
+enum {					/* Dtccr */
+	Cmd		= 0x00000008,	/* Command */
+};
+
+enum {					/* Cr */
+	Te		= 0x04,		/* Transmitter Enable */
+	Re		= 0x08,		/* Receiver Enable */
+	Rst		= 0x10,		/* Software Reset */
+};
+
+enum {					/* Tppoll */
+	Fswint		= 0x01,		/* Forced Software Interrupt */
+	Npq		= 0x40,		/* Normal Priority Queue polling */
+	Hpq		= 0x80,		/* High Priority Queue polling */
+};
+
+enum {					/* Imr/Isr */
+	Rok		= 0x0001,	/* Receive OK */
+	Rer		= 0x0002,	/* Receive Error */
+	Tok		= 0x0004,	/* Transmit OK */
+	Ter		= 0x0008,	/* Transmit Error */
+	Rdu		= 0x0010,	/* Receive Descriptor Unavailable */
+	Punlc		= 0x0020,	/* Packet Underrun or Link Change */
+	Fovw		= 0x0040,	/* Receive FIFO Overflow */
+	Tdu		= 0x0080,	/* Transmit Descriptor Unavailable */
+	Swint		= 0x0100,	/* Software Interrupt */
+	Timeout		= 0x4000,	/* Timer */
+	Serr		= 0x8000,	/* System Error */
+};
+
+enum {					/* Tcr */
+	MtxdmaSHIFT	= 8,		/* Max. DMA Burst Size */
+	MtxdmaMASK	= 0x00000700,
+	Mtxdmaunlimited	= 0x00000700,
+	Acrc		= 0x00010000,	/* Append CRC (not) */
+	Lbk0		= 0x00020000,	/* Loopback Test 0 */
+	Lbk1		= 0x00040000,	/* Loopback Test 1 */
+	Ifg2		= 0x00080000,	/* Interframe Gap 2 */
+	HwveridSHIFT	= 23,		/* Hardware Version ID */
+	HwveridMASK	= 0x7C800000,
+	Macv01		= 0x00000000,	/* RTL8169 */
+	Macv02		= 0x00800000,	/* RTL8169S/8110S */
+	Macv03		= 0x04000000,	/* RTL8169S/8110S */
+	Macv04		= 0x10000000,	/* RTL8169SB/8110SB */
+	Macv05		= 0x18000000,	/* RTL8169SC/8110SC */
+	Macv07		= 0x24800000,	/* RTL8102e */
+//	Macv8103e	= 0x24C00000,
+	Macv25		= 0x28000000,	/* RTL8168D */
+//	Macv8168dp	= 0x28800000,
+//	Macv8168e	= 0x2C000000,
+	Macv11		= 0x30000000,	/* RTL8168B/8111B */
+	Macv14		= 0x30800000,	/* RTL8100E */
+	Macv13		= 0x34000000,	/* RTL8101E */
+	Macv07a		= 0x34800000,	/* RTL8102e */
+	Macv12		= 0x38000000,	/* RTL8169B/8111B */
+//	Macv8168spin3	= 0x38400000,
+	Macv15		= 0x38800000,	/* RTL8100E */
+	Macv12a		= 0x3c000000,	/* RTL8169C/8111C */
+//	Macv19		= 0x3c000000,	/* dup Macv12a: RTL8111c-gr */
+//	Macv8168cspin2	= 0x3c400000,
+//	Macv8168cp	= 0x3c800000,
+//	Macv8139	= 0x60000000,
+//	Macv8139a	= 0x70000000,
+//	Macv8139ag	= 0x70800000,
+//	Macv8139b	= 0x78000000,
+//	Macv8130	= 0x7C000000,
+//	Macv8139c	= 0x74000000,
+//	Macv8139d	= 0x74400000,
+//	Macv8139cplus	= 0x74800000,
+//	Macv8101	= 0x74c00000,
+//	Macv8100	= 0x78800000,
+//	Macv8169_8110sbl= 0x7cc00000,
+//	Macv8169_8110sce= 0x98000000,
+	Ifg0		= 0x01000000,	/* Interframe Gap 0 */
+	Ifg1		= 0x02000000,	/* Interframe Gap 1 */
+};
+
+enum {					/* Rcr */
+	Aap		= 0x00000001,	/* Accept All Packets */
+	Apm		= 0x00000002,	/* Accept Physical Match */
+	Am		= 0x00000004,	/* Accept Multicast */
+	Ab		= 0x00000008,	/* Accept Broadcast */
+	Ar		= 0x00000010,	/* Accept Runt */
+	Aer		= 0x00000020,	/* Accept Error */
+	Sel9356		= 0x00000040,	/* 9356 EEPROM used */
+	MrxdmaSHIFT	= 8,		/* Max. DMA Burst Size */
+	MrxdmaMASK	= 0x00000700,
+	Mrxdmaunlimited	= 0x00000700,
+	RxfthSHIFT	= 13,		/* Receive Buffer Length */
+	RxfthMASK	= 0x0000E000,
+	Rxfth256	= 0x00008000,
+	Rxfthnone	= 0x0000E000,
+	Rer8		= 0x00010000,	/* Accept Error Packets > 8 bytes */
+	MulERINT	= 0x01000000,	/* Multiple Early Interrupt Select */
+};
+
+enum {					/* Cr9346 */
+	Eedo		= 0x01,		/* */
+	Eedi		= 0x02,		/* */
+	Eesk		= 0x04,		/* */
+	Eecs		= 0x08,		/* */
+	Eem0		= 0x40,		/* Operating Mode */
+	Eem1		= 0x80,
+};
+
+enum {					/* Phyar */
+	DataMASK	= 0x0000FFFF,	/* 16-bit GMII/MII Register Data */
+	DataSHIFT	= 0,
+	RegaddrMASK	= 0x001F0000,	/* 5-bit GMII/MII Register Address */
+	RegaddrSHIFT	= 16,
+	Flag		= 0x80000000,	/* */
+};
+
+enum {					/* Phystatus */
+	Fd		= 0x01,		/* Full Duplex */
+	Linksts		= 0x02,		/* Link Status */
+	Speed10		= 0x04,		/* */
+	Speed100	= 0x08,		/* */
+	Speed1000	= 0x10,		/* */
+	Rxflow		= 0x20,		/* */
+	Txflow		= 0x40,		/* */
+	Entbi		= 0x80,		/* */
+};
+
+enum {					/* Cplusc */
+	Init1		= 0x0001,	/* 8168 */
+	Mulrw		= 0x0008,	/* PCI Multiple R/W Enable */
+	Dac		= 0x0010,	/* PCI Dual Address Cycle Enable */
+	Rxchksum	= 0x0020,	/* Receive Checksum Offload Enable */
+	Rxvlan		= 0x0040,	/* Receive VLAN De-tagging Enable */
+	Pktcntoff	= 0x0080,	/* 8168, 8101 */
+	Endian		= 0x0200,	/* Endian Mode */
+};
+
+struct D {
+	u32int	control;
+	u32int	vlan;
+	u32int	addrlo;
+	u32int	addrhi;
+};
+
+enum {					/* Transmit Descriptor control */
+	TxflMASK	= 0x0000FFFF,	/* Transmit Frame Length */
+	TxflSHIFT	= 0,
+	Tcps		= 0x00010000,	/* TCP Checksum Offload */
+	Udpcs		= 0x00020000,	/* UDP Checksum Offload */
+	Ipcs		= 0x00040000,	/* IP Checksum Offload */
+	Lgsen		= 0x08000000,	/* TSO; WARNING: contains lark's vomit */
+};
+
+enum {					/* Receive Descriptor control */
+	RxflMASK	= 0x00001FFF,	/* Receive Frame Length */
+	Tcpf		= 0x00004000,	/* TCP Checksum Failure */
+	Udpf		= 0x00008000,	/* UDP Checksum Failure */
+	Ipf		= 0x00010000,	/* IP Checksum Failure */
+	Pid0		= 0x00020000,	/* Protocol ID0 */
+	Pid1		= 0x00040000,	/* Protocol ID1 */
+	Crce		= 0x00080000,	/* CRC Error */
+	Runt		= 0x00100000,	/* Runt Packet */
+	Res		= 0x00200000,	/* Receive Error Summary */
+	Rwt		= 0x00400000,	/* Receive Watchdog Timer Expired */
+	Fovf		= 0x00800000,	/* FIFO Overflow */
+	Bovf		= 0x01000000,	/* Buffer Overflow */
+	Bar		= 0x02000000,	/* Broadcast Address Received */
+	Pam		= 0x04000000,	/* Physical Address Matched */
+	Mar		= 0x08000000,	/* Multicast Address Received */
+};
+
+enum {					/* General Descriptor control */
+	Ls		= 0x10000000,	/* Last Segment Descriptor */
+	Fs		= 0x20000000,	/* First Segment Descriptor */
+	Eor		= 0x40000000,	/* End of Descriptor Ring */
+	Own		= 0x80000000,	/* Ownership: belongs to hw */
+};
+
+/*
+ */
+enum {					/* Ring sizes  (<= 1024) */
+	Ntd		= 1024,		/* Transmit Ring */
+	/* at 1Gb/s, it only takes 12 ms. to fill a 1024-buffer ring */
+	Nrd		= 1024,		/* Receive Ring */
+	Nrb		= 4096,
+
+	Mtu		= ETHERMAXTU,
+	Mps		= ROUNDUP(ETHERMAXTU+4, 128),
+//	Mps		= Mtu + 8 + 14,	/* if(mtu>ETHERMAXTU) */
+};
+
+struct Dtcc {
+	u64int	txok;
+	u64int	rxok;
+	u64int	txer;
+	u32int	rxer;
+	u16int	misspkt;
+	u16int	fae;
+	u32int	tx1col;
+	u32int	txmcol;
+	u64int	rxokph;
+	u64int	rxokbrd;
+	u32int	rxokmu;
+	u16int	txabt;
+	u16int	txundrn;
+};
+
+enum {						/* Variants */
+	Rtl8100e	= (0x8136<<16)|0x10EC,	/* RTL810[01]E: pci -e */
+	Rtl8169c	= (0x0116<<16)|0x16EC,	/* RTL8169C+ (USR997902) */
+	Rtl8169sc	= (0x8167<<16)|0x10EC,	/* RTL8169SC */
+	Rtl8168b	= (0x8168<<16)|0x10EC,	/* RTL8168B: pci-e */
+	Rtl8169		= (0x8169<<16)|0x10EC,	/* RTL8169 */
+	/*
+	 * trimslice is 10ec/8168 (8168b) Macv25 (8168D) but
+	 * compulab says 8111dl.
+	 *	oui 0x732 (aaeon) phyno 1, macv = 0x28000000 phyv = 0x0002
+	 */
+};
+
+struct Ctlr {
+	void*	nic;
+	int	port;
+	Pcidev*	pcidev;
+	Ctlr*	next;
+	Ether*	ether;			/* point back */
+	int	active;
+
+	QLock	alock;			/* attach */
+	Lock	ilock;			/* init */
+	int	init;			/*  */
+
+	int	pciv;			/*  */
+	int	macv;			/* MAC version */
+	int	phyv;			/* PHY version */
+	int	pcie;			/* flag: pci-express device? */
+
+	uvlong	mchash;			/* multicast hash */
+
+	Mii*	mii;
+
+//	Lock	tlock;			/* transmit */
+	Rendez	trendez;
+	D*	td;			/* descriptor ring */
+	Block**	tb;			/* transmit buffers */
+	int	ntd;
+
+	int	tdh;			/* head - producer index (host) */
+	int	tdt;			/* tail - consumer index (NIC) */
+	int	ntdfree;
+	int	ntq;
+
+	int	nrb;
+
+//	Lock	rlock;			/* receive */
+	Rendez	rrendez;
+	D*	rd;			/* descriptor ring */
+	Block**	rb;			/* receive buffers */
+	int	nrd;
+
+	int	rdh;			/* head - producer index (NIC) */
+	int	rdt;			/* tail - consumer index (host) */
+	int	nrdfree;
+
+	Lock	reglock;
+	int	tcr;			/* transmit configuration register */
+	int	rcr;			/* receive configuration register */
+	int	imr;
+	int	isr;			/* sw copy for kprocs */
+
+	QLock	slock;			/* statistics */
+	Dtcc*	dtcc;
+	uint	txdu;
+	uint	tcpf;
+	uint	udpf;
+	uint	ipf;
+	uint	fovf;
+	uint	ierrs;
+	uint	rer;
+	uint	rdu;
+	uint	punlc;
+	uint	fovw;
+	uint	mcast;
+	uint	frag;			/* partial packets; rb was too small */
+};
+
+static Ctlr* rtl8169ctlrhead;
+static Ctlr* rtl8169ctlrtail;
+
+static Lock rblock;			/* free receive Blocks */
+static Block* rbpool;
+
+#define csr8r(c, r)	(*((uchar *) ((c)->nic)+(r)))
+#define csr16r(c, r)	(*((u16int *)((c)->nic)+((r)/2)))
+#define csr32p(c, r)	((u32int *)  ((c)->nic)+((r)/4))
+#define csr32r(c, r)	(*csr32p(c, r))
+
+#define csr8w(c, r, b)	(*((uchar *) ((c)->nic)+(r))     = (b), coherence())
+#define csr16w(c, r, w)	(*((u16int *)((c)->nic)+((r)/2)) = (w), coherence())
+#define csr32w(c, r, v)	(*csr32p(c, r) = (v), coherence())
+
+static int
+rtl8169miimir(Mii* mii, int pa, int ra)
+{
+	uint r;
+	int timeo;
+	Ctlr *ctlr;
+
+	if(pa != 1)
+		return -1;
+	ctlr = mii->ctlr;
+	r = (ra<<16) & RegaddrMASK;
+	csr32w(ctlr, Phyar, r);
+	delay(1);
+	for(timeo = 0; timeo < 2000; timeo++){
+		if((r = csr32r(ctlr, Phyar)) & Flag)
+			break;
+		microdelay(100);
+	}
+	if(!(r & Flag))
+		return -1;
+
+	return (r & DataMASK)>>DataSHIFT;
+}
+
+static int
+rtl8169miimiw(Mii* mii, int pa, int ra, int data)
+{
+	uint r;
+	int timeo;
+	Ctlr *ctlr;
+
+	if(pa != 1)
+		return -1;
+	ctlr = mii->ctlr;
+	r = Flag|((ra<<16) & RegaddrMASK)|((data<<DataSHIFT) & DataMASK);
+	csr32w(ctlr, Phyar, r);
+	delay(1);
+	for(timeo = 0; timeo < 2000; timeo++){
+		if(!((r = csr32r(ctlr, Phyar)) & Flag))
+			break;
+		microdelay(100);
+	}
+	if(r & Flag)
+		return -1;
+
+	return 0;
+}
+
+static int
+rtl8169mii(Ctlr* ctlr)
+{
+	MiiPhy *phy;
+
+	/*
+	 * Link management.
+	 */
+	if((ctlr->mii = malloc(sizeof(Mii))) == nil)
+		return -1;
+	ctlr->mii->mir = rtl8169miimir;
+	ctlr->mii->miw = rtl8169miimiw;
+	ctlr->mii->ctlr = ctlr;
+
+	/*
+	 * Get rev number out of Phyidr2 so can config properly.
+	 * There's probably more special stuff for Macv0[234] needed here.
+	 */
+	ilock(&ctlr->reglock);
+	ctlr->phyv = rtl8169miimir(ctlr->mii, 1, Phyidr2) & 0x0F;
+	if(ctlr->macv == Macv02){
+		csr8w(ctlr, 0x82, 1);				/* magic */
+		rtl8169miimiw(ctlr->mii, 1, 0x0B, 0x0000);	/* magic */
+	}
+
+	if(mii(ctlr->mii, (1<<1)) == 0 || (phy = ctlr->mii->curphy) == nil){
+		iunlock(&ctlr->reglock);
+		free(ctlr->mii);
+		ctlr->mii = nil;
+		return -1;
+	}
+	print("rtl8169: oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n",
+		phy->oui, phy->phyno, ctlr->macv, ctlr->phyv);
+
+	miiane(ctlr->mii, ~0, ~0, ~0);
+	iunlock(&ctlr->reglock);
+
+	return 0;
+}
+
+static Block*
+rballoc(void)
+{
+	Block *bp;
+
+	ilock(&rblock);
+	if((bp = rbpool) != nil){
+		rbpool = bp->next;
+		bp->next = nil;
+		_xinc(&bp->ref);	/* prevent bp from being freed */
+	}
+	iunlock(&rblock);
+	return bp;
+}
+
+static void
+rbfree(Block *bp)
+{
+	bp->wp = bp->rp = bp->lim - Mps;
+ 	bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
+
+	ilock(&rblock);
+	bp->next = rbpool;
+	rbpool = bp;
+	iunlock(&rblock);
+}
+
+static void
+rtl8169promiscuous(void* arg, int on)
+{
+	Ether *edev;
+	Ctlr * ctlr;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+	ilock(&ctlr->ilock);
+	ilock(&ctlr->reglock);
+
+	if(on)
+		ctlr->rcr |= Aap;
+	else
+		ctlr->rcr &= ~Aap;
+	csr32w(ctlr, Rcr, ctlr->rcr);
+	iunlock(&ctlr->reglock);
+	iunlock(&ctlr->ilock);
+}
+
+enum {
+	/* everyone else uses 0x04c11db7, but they both produce the same crc */
+	Etherpolybe = 0x04c11db6,
+	Bytemask = (1<<8) - 1,
+};
+
+static ulong
+ethercrcbe(uchar *addr, long len)
+{
+	int i, j;
+	ulong c, crc, carry;
+
+	crc = ~0UL;
+	for (i = 0; i < len; i++) {
+		c = addr[i];
+		for (j = 0; j < 8; j++) {
+			carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1);
+			crc <<= 1;
+			c >>= 1;
+			if (carry)
+				crc = (crc ^ Etherpolybe) | carry;
+		}
+	}
+	return crc;
+}
+
+static ulong
+swabl(ulong l)
+{
+	return l>>24 | (l>>8) & (Bytemask<<8) |
+		(l<<8) & (Bytemask<<16) | l<<24;
+}
+
+static void
+rtl8169multicast(void* ether, uchar *eaddr, int add)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+
+	if (!add)
+		return;	/* ok to keep receiving on old mcast addrs */
+
+	edev = ether;
+	ctlr = edev->ctlr;
+	ilock(&ctlr->ilock);
+	ilock(&ctlr->reglock);
+
+	ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26);
+
+	ctlr->rcr |= Am;
+	csr32w(ctlr, Rcr, ctlr->rcr);
+
+	/* pci-e variants reverse the order of the hash byte registers */
+	if (ctlr->pcie) {
+		csr32w(ctlr, Mar0,   swabl(ctlr->mchash>>32));
+		csr32w(ctlr, Mar0+4, swabl(ctlr->mchash));
+	} else {
+		csr32w(ctlr, Mar0,   ctlr->mchash);
+		csr32w(ctlr, Mar0+4, ctlr->mchash>>32);
+	}
+
+	iunlock(&ctlr->reglock);
+	iunlock(&ctlr->ilock);
+}
+
+static long
+rtl8169ifstat(Ether* edev, void* a, long n, ulong offset)
+{
+	char *p;
+	Ctlr *ctlr;
+	Dtcc *dtcc;
+	int i, l, r, timeo;
+
+	ctlr = edev->ctlr;
+	qlock(&ctlr->slock);
+
+	p = nil;
+	if(waserror()){
+		qunlock(&ctlr->slock);
+		free(p);
+		nexterror();
+	}
+
+	/* copy hw statistics into ctlr->dtcc */
+	dtcc = ctlr->dtcc;
+	allcache->invse(dtcc, sizeof *dtcc);
+	ilock(&ctlr->reglock);
+	csr32w(ctlr, Dtccr+4, 0);
+	csr32w(ctlr, Dtccr, PCIWADDR(dtcc)|Cmd);	/* initiate dma? */
+	for(timeo = 0; timeo < 1000; timeo++){
+		if(!(csr32r(ctlr, Dtccr) & Cmd))
+			break;
+		delay(1);
+	}
+	iunlock(&ctlr->reglock);
+	if(csr32r(ctlr, Dtccr) & Cmd)
+		error(Eio);
+
+	edev->oerrs = dtcc->txer;
+	edev->crcs = dtcc->rxer;
+	edev->frames = dtcc->fae;
+	edev->buffs = dtcc->misspkt;
+	edev->overflows = ctlr->txdu + ctlr->rdu;
+
+	if(n == 0){
+		qunlock(&ctlr->slock);
+		poperror();
+		return 0;
+	}
+
+	if((p = malloc(READSTR)) == nil)
+		error(Enomem);
+
+	l = snprint(p, READSTR, "TxOk: %llud\n", dtcc->txok);
+	l += snprint(p+l, READSTR-l, "RxOk: %llud\n", dtcc->rxok);
+	l += snprint(p+l, READSTR-l, "TxEr: %llud\n", dtcc->txer);
+	l += snprint(p+l, READSTR-l, "RxEr: %ud\n", dtcc->rxer);
+	l += snprint(p+l, READSTR-l, "MissPkt: %ud\n", dtcc->misspkt);
+	l += snprint(p+l, READSTR-l, "FAE: %ud\n", dtcc->fae);
+	l += snprint(p+l, READSTR-l, "Tx1Col: %ud\n", dtcc->tx1col);
+	l += snprint(p+l, READSTR-l, "TxMCol: %ud\n", dtcc->txmcol);
+	l += snprint(p+l, READSTR-l, "RxOkPh: %llud\n", dtcc->rxokph);
+	l += snprint(p+l, READSTR-l, "RxOkBrd: %llud\n", dtcc->rxokbrd);
+	l += snprint(p+l, READSTR-l, "RxOkMu: %ud\n", dtcc->rxokmu);
+	l += snprint(p+l, READSTR-l, "TxAbt: %ud\n", dtcc->txabt);
+	l += snprint(p+l, READSTR-l, "TxUndrn: %ud\n", dtcc->txundrn);
+
+	l += snprint(p+l, READSTR-l, "txdu: %ud\n", ctlr->txdu);
+	l += snprint(p+l, READSTR-l, "tcpf: %ud\n", ctlr->tcpf);
+	l += snprint(p+l, READSTR-l, "udpf: %ud\n", ctlr->udpf);
+	l += snprint(p+l, READSTR-l, "ipf: %ud\n", ctlr->ipf);
+	l += snprint(p+l, READSTR-l, "fovf: %ud\n", ctlr->fovf);
+	l += snprint(p+l, READSTR-l, "ierrs: %ud\n", ctlr->ierrs);
+	l += snprint(p+l, READSTR-l, "rer: %ud\n", ctlr->rer);
+	l += snprint(p+l, READSTR-l, "rdu: %ud\n", ctlr->rdu);
+	l += snprint(p+l, READSTR-l, "punlc: %ud\n", ctlr->punlc);
+	l += snprint(p+l, READSTR-l, "fovw: %ud\n", ctlr->fovw);
+
+	l += snprint(p+l, READSTR-l, "tcr: %#8.8ux\n", ctlr->tcr);
+	l += snprint(p+l, READSTR-l, "rcr: %#8.8ux\n", ctlr->rcr);
+	l += snprint(p+l, READSTR-l, "multicast: %ud\n", ctlr->mcast);
+
+	if(ctlr->mii != nil && ctlr->mii->curphy != nil){
+		l += snprint(p+l, READSTR, "phy:   ");
+		for(i = 0; i < NMiiPhyr; i++){
+			if(i && ((i & 0x07) == 0))
+				l += snprint(p+l, READSTR-l, "\n       ");
+			r = miimir(ctlr->mii, i);
+			l += snprint(p+l, READSTR-l, " %4.4ux", r);
+		}
+		snprint(p+l, READSTR-l, "\n");
+	}
+
+	n = readstr(offset, a, n, p);
+
+	qunlock(&ctlr->slock);
+	poperror();
+	free(p);
+
+	return n;
+}
+
+static void
+rtl8169halt(Ctlr* ctlr)
+{
+	ilock(&ctlr->reglock);
+	csr32w(ctlr, Timerint, 0);
+	csr8w(ctlr, Cr, 0);
+	csr16w(ctlr, Imr, 0);
+	csr16w(ctlr, Isr, ~0);
+	iunlock(&ctlr->reglock);
+}
+
+static int
+rtl8169reset(Ctlr* ctlr)
+{
+	u32int r;
+	int timeo;
+
+	/*
+	 * Soft reset the controller.
+	 */
+	ilock(&ctlr->reglock);
+	csr8w(ctlr, Cr, Rst);
+	for(r = timeo = 0; timeo < 1000; timeo++){
+		r = csr8r(ctlr, Cr);
+		if(!(r & Rst))
+			break;
+		delay(1);
+	}
+	iunlock(&ctlr->reglock);
+
+	rtl8169halt(ctlr);
+
+	if(r & Rst)
+		return -1;
+	return 0;
+}
+
+static void
+rtl8169shutdown(Ether *ether)
+{
+	rtl8169reset(ether->ctlr);
+}
+
+static int
+rtl8169replenish(Ether *edev)
+{
+	int rdt;
+	Block *bp;
+	Ctlr *ctlr;
+	D *d;
+
+	ctlr = edev->ctlr;
+	if (ctlr->nrd == 0) {
+		iprint("rtl8169replenish: not yet initialised\n");
+		return -1;
+	}
+	rdt = ctlr->rdt;
+	assert(ctlr->rb);
+	assert(ctlr->rd);
+	while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){
+		d = &ctlr->rd[rdt];
+		if (d == nil)
+			panic("rtl8169replenish: nil ctlr->rd[%d]", rdt);
+		if (d->control & Own) {	/* ctlr owns it? shouldn't happen */
+			iprint("replenish: descriptor owned by hw\n");
+			break;
+		}
+		if(ctlr->rb[rdt] == nil){
+			bp = rballoc();
+			if(bp == nil){
+				iprint("rtl8169: no available buffers\n");
+				break;
+			}
+			ctlr->rb[rdt] = bp;
+			d->addrhi = 0;
+			coherence();
+			d->addrlo = PCIWADDR(bp->rp);
+			coherence();
+		} else
+			iprint("8169: replenish: rx overrun\n");
+		d->control = (d->control & ~RxflMASK) | Mps | Own;
+		coherence();
+
+		rdt = NEXT(rdt, ctlr->nrd);
+		ctlr->nrdfree++;
+	}
+	ctlr->rdt = rdt;
+	coherence();
+	return 0;
+}
+
+static void
+ckrderrs(Ctlr *ctlr, Block *bp, ulong control)
+{
+	if(control & Fovf)
+		ctlr->fovf++;
+	if(control & Mar)
+		ctlr->mcast++;
+
+	switch(control & (Pid1|Pid0)){
+	case Pid0:
+		if(control & Tcpf){
+			iprint("8169: bad tcp checksum\n");
+			ctlr->tcpf++;
+			break;
+		}
+		bp->flag |= Btcpck;
+		break;
+	case Pid1:
+		if(control & Udpf){
+			iprint("8169: bad udp checksum\n");
+			ctlr->udpf++;
+			break;
+		}
+		bp->flag |= Budpck;
+		break;
+	case Pid1|Pid0:
+		if(control & Ipf){
+			iprint("8169: bad ip checksum\n");
+			ctlr->ipf++;
+			break;
+		}
+		bp->flag |= Bipck;
+		break;
+	}
+}
+
+static void
+badpkt(Ether *edev, int rdh, ulong control)
+{
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+	/* Res is only valid if Fs is set */
+	if(control & Res)
+		iprint("8169: rcv error; d->control %#.8lux\n", control);
+	else if (control == 0) {		/* buggered? */
+		if (edev->link)
+			iprint("8169: rcv: d->control==0 (wtf?)\n");
+	} else {
+		ctlr->frag++;
+		iprint("8169: rcv'd frag; d->control %#.8lux\n", control);
+	}
+	if (ctlr->rb[rdh])
+		freeb(ctlr->rb[rdh]);
+}
+
+void
+qpkt(Ether *edev, int rdh, ulong control)
+{
+	int len;
+	Block *bp;
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+	len = (control & RxflMASK) - 4;
+	if ((uint)len > Mps)
+		if (len < 0)
+			panic("8169: received pkt non-existent");
+		else if (len > Mps)
+			panic("8169: received pkt too big");
+	bp = ctlr->rb[rdh];
+	bp->wp = bp->rp + len;
+	bp->next = nil;
+
+	allcache->invse(bp->rp, len);	/* clear any stale cached packet */
+	ckrderrs(ctlr, bp, control);
+	etheriq(edev, bp, 1);
+
+	if(Debug > 1)
+		iprint("R%d ", len);
+}
+
+static int
+pktstoread(void* v)
+{
+	Ctlr *ctlr = v;
+
+	return ctlr->isr & (Fovw|Rdu|Rer|Rok) &&
+		!(ctlr->rd[ctlr->rdh].control & Own);
+}
+
+static void
+rproc(void* arg)
+{
+	int rdh;
+	ulong control;
+	Ctlr *ctlr;
+	D *rd;
+	Ether *edev;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+	for(;;){
+		/* wait for next interrupt */
+		ilock(&ctlr->reglock);
+		ctlr->imr |= Fovw|Rdu|Rer|Rok;
+		csr16w(ctlr, Imr, ctlr->imr);
+		iunlock(&ctlr->reglock);
+
+		sleep(&ctlr->rrendez, pktstoread, ctlr);
+
+		/* clear saved isr bits */
+		ilock(&ctlr->reglock);
+		ctlr->isr &= ~(Fovw|Rdu|Rer|Rok);
+		iunlock(&ctlr->reglock);
+
+		rdh = ctlr->rdh;
+		for (rd = &ctlr->rd[rdh]; !(rd->control & Own);
+		     rd = &ctlr->rd[rdh]){
+			control = rd->control;
+			if((control & (Fs|Ls|Res)) == (Fs|Ls))
+				qpkt(edev, rdh, control);
+			else
+				badpkt(edev, rdh, control);
+			ctlr->rb[rdh] = nil;
+			coherence();
+			rd->control &= Eor;
+			coherence();
+
+			ctlr->nrdfree--;
+			rdh = NEXT(rdh, ctlr->nrd);
+			if(ctlr->nrdfree < ctlr->nrd/2) {
+				/* replenish reads ctlr->rdh */
+				ctlr->rdh = rdh;
+				rtl8169replenish(edev);
+				/* if replenish called restart, rdh is reset */
+				rdh = ctlr->rdh;
+			}
+		}
+		ctlr->rdh = rdh;
+	}
+}
+
+static int
+pktstosend(void* v)
+{
+	Ether *edev = v;
+	Ctlr *ctlr = edev->ctlr;
+
+	return ctlr->isr & (Ter|Tok) &&
+		!(ctlr->td[ctlr->tdh].control & Own) && edev->link;
+}
+
+static void
+tproc(void* arg)
+{
+	int x, len;
+	Block *bp;
+	Ctlr *ctlr;
+	D *d;
+	Ether *edev;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+	for(;;){
+		/* wait for next interrupt */
+		ilock(&ctlr->reglock);
+		ctlr->imr |= Ter|Tok;
+		csr16w(ctlr, Imr, ctlr->imr);
+		iunlock(&ctlr->reglock);
+
+		sleep(&ctlr->trendez, pktstosend, edev);
+
+		/* clear saved isr bits */
+		ilock(&ctlr->reglock);
+		ctlr->isr &= ~(Ter|Tok);
+		iunlock(&ctlr->reglock);
+
+		/* reclaim transmitted Blocks */
+		for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){
+			d = &ctlr->td[x];
+			if(d == nil || d->control & Own)
+				break;
+
+			/*
+			 * Free it up.
+			 * Need to clean the descriptor here? Not really.
+			 * Simple freeb for now (no chain and freeblist).
+			 * Use ntq count for now.
+			 */
+			freeb(ctlr->tb[x]);
+			ctlr->tb[x] = nil;
+			d->control &= Eor;
+			coherence();
+
+			ctlr->ntq--;
+		}
+		ctlr->tdh = x;
+
+		if (ctlr->ntq > 0)
+			csr8w(ctlr, Tppoll, Npq); /* kick xmiter to keep it going */
+		/* copy as much of my output q as possible into output ring */
+		x = ctlr->tdt;
+		while(ctlr->ntq < (ctlr->ntd-1)){
+			if((bp = qget(edev->oq)) == nil)
+				break;
+
+			/* make sure the whole packet is in ram */
+			len = BLEN(bp);
+			allcache->wbse(bp->rp, len);
+
+			d = &ctlr->td[x];
+			assert(d);
+			assert(!(d->control & Own));
+			d->addrhi = 0;
+			d->addrlo = PCIWADDR(bp->rp);
+			ctlr->tb[x] = bp;
+			coherence();
+			d->control = (d->control & ~TxflMASK) |
+				Own | Fs | Ls | len;
+			coherence();
+
+			if(Debug > 1)
+				iprint("T%d ", len);
+
+			x = NEXT(x, ctlr->ntd);
+			ctlr->ntq++;
+
+			ctlr->tdt = x;
+			coherence();
+			csr8w(ctlr, Tppoll, Npq);	/* kick xmiter again */
+		}
+		if(x != ctlr->tdt){		/* added new packet(s)? */
+			ctlr->tdt = x;
+			coherence();
+			csr8w(ctlr, Tppoll, Npq);
+		}
+		else if(ctlr->ntq >= (ctlr->ntd-1))
+			ctlr->txdu++;
+	}
+}
+
+static int
+rtl8169init(Ether* edev)
+{
+	u32int r;
+	Ctlr *ctlr;
+	ushort cplusc;
+
+	ctlr = edev->ctlr;
+	ilock(&ctlr->ilock);
+	rtl8169reset(ctlr);
+
+	ilock(&ctlr->reglock);
+	switch(ctlr->pciv){
+	case Rtl8169sc:
+		csr8w(ctlr, Cr, 0);
+		break;
+	case Rtl8168b:
+	case Rtl8169c:
+		/* 8168b manual says set c+ reg first, then command */
+		csr16w(ctlr, Cplusc, 0x2000);		/* magic */
+		csr8w(ctlr, Cr, 0);
+		break;
+	}
+
+	/*
+	 * MAC Address is not settable on some (all?) chips.
+	 * Must put chip into config register write enable mode.
+	 */
+	csr8w(ctlr, Cr9346, Eem1|Eem0);
+
+	/*
+	 * Transmitter.
+	 */
+	memset(ctlr->td, 0, sizeof(D)*ctlr->ntd);
+	ctlr->tdh = ctlr->tdt = 0;
+	ctlr->ntq = 0;
+	ctlr->td[ctlr->ntd-1].control = Eor;
+
+	/*
+	 * Receiver.
+	 * Need to do something here about the multicast filter.
+	 */
+	memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd);
+	ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0;
+	ctlr->rd[ctlr->nrd-1].control = Eor;
+
+	rtl8169replenish(edev);
+
+	switch(ctlr->pciv){
+	default:
+		ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Apm;
+		break;
+	case Rtl8168b:
+	case Rtl8169c:
+		ctlr->rcr = Rxfthnone|6<<MrxdmaSHIFT|Ab|Apm; /* DMA max 1024 */
+		break;
+	}
+
+	/*
+	 * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst
+	 * settings in Tcr/Rcr; the (1<<14) is magic.
+	 */
+	cplusc = csr16r(ctlr, Cplusc) & ~(1<<14);
+	switch(ctlr->pciv){
+	case Rtl8168b:
+	case Rtl8169c:
+		cplusc |= Pktcntoff | Init1;
+		break;
+	}
+	cplusc |= /*Rxchksum|*/Mulrw;
+	switch(ctlr->macv){
+	default:
+		panic("ether8169: unknown macv %#08ux for vid %#ux did %#ux",
+			ctlr->macv, ctlr->pcidev->vid, ctlr->pcidev->did);
+	case Macv01:
+		break;
+	case Macv02:
+	case Macv03:
+		cplusc |= 1<<14;			/* magic */
+		break;
+	case Macv05:
+		/*
+		 * This is interpreted from clearly bogus code
+		 * in the manufacturer-supplied driver, it could
+		 * be wrong. Untested.
+		 */
+		r = csr8r(ctlr, Config2) & 0x07;
+		if(r == 0x01)				/* 66MHz PCI */
+			csr32w(ctlr, 0x7C, 0x0007FFFF);	/* magic */
+		else
+			csr32w(ctlr, 0x7C, 0x0007FF00);	/* magic */
+		pciclrmwi(ctlr->pcidev);
+		break;
+	case Macv13:
+		/*
+		 * This is interpreted from clearly bogus code
+		 * in the manufacturer-supplied driver, it could
+		 * be wrong. Untested.
+		 */
+		pcicfgw8(ctlr->pcidev, 0x68, 0x00);	/* magic */
+		pcicfgw8(ctlr->pcidev, 0x69, 0x08);	/* magic */
+		break;
+	case Macv04:
+	case Macv07:
+	case Macv07a:
+	case Macv11:
+	case Macv12:
+	case Macv12a:
+	case Macv14:
+	case Macv15:
+	case Macv25:
+		break;
+	}
+
+	/*
+	 * Enable receiver/transmitter.
+	 * Need to do this first or some of the settings below
+	 * won't take.
+	 */
+	switch(ctlr->pciv){
+	default:
+		csr8w(ctlr, Cr, Te|Re);
+		csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+		csr32w(ctlr, Rcr, ctlr->rcr);
+		break;
+	case Rtl8169sc:
+	case Rtl8168b:
+		break;
+	}
+	ctlr->mchash = 0;
+	csr32w(ctlr, Mar0,   0);
+	csr32w(ctlr, Mar0+4, 0);
+
+	/*
+	 * Interrupts.
+	 * Disable Tdu for now, the transmit routine will tidy.
+	 * Tdu means the NIC ran out of descriptors to send (i.e., the
+	 * output ring is empty), so it doesn't really need to ever be on.
+	 *
+	 * The timer runs at the PCI(-E) clock frequency, 125MHz for PCI-E,
+	 * presumably 66MHz for PCI.  Thus the units for PCI-E controllers
+	 * (e.g., 8168) are 8ns, and only the buggy 8168 seems to need to use
+	 * timeouts to keep from stalling.
+	 */
+	csr32w(ctlr, Tctr, 0);
+	/* Tok makes the whole system run faster */
+	ctlr->imr = Serr|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok;
+	switch(ctlr->pciv){
+	case Rtl8169sc:
+	case Rtl8168b:
+		/* alleged workaround for rx fifo overflow on 8168[bd] */
+		ctlr->imr &= ~Rdu;
+		break;
+	}
+	csr16w(ctlr, Imr, ctlr->imr);
+
+	/*
+	 * Clear missed-packet counter;
+	 * clear early transmit threshold value;
+	 * set the descriptor ring base addresses;
+	 * set the maximum receive packet size;
+	 * no early-receive interrupts.
+	 *
+	 * note: the maximum rx size is a filter.  the size of the buffer
+	 * in the descriptor ring is still honored.  we will toss >Mtu
+	 * packets because they've been fragmented into multiple
+	 * rx buffers.
+	 */
+	csr32w(ctlr, Mpc, 0);
+	if (ctlr->pcie)
+		csr8w(ctlr, Mtps, Mps / 128);
+	else
+		csr8w(ctlr, Etx, 0x3f);		/* max; no early transmission */
+	csr32w(ctlr, Tnpds+4, 0);
+	csr32w(ctlr, Tnpds, PCIWADDR(ctlr->td));
+	csr32w(ctlr, Rdsar+4, 0);
+	csr32w(ctlr, Rdsar, PCIWADDR(ctlr->rd));
+	csr16w(ctlr, Rms, 2048);		/* was Mps; see above comment */
+	r = csr16r(ctlr, Mulint) & 0xF000;	/* no early rx interrupts */
+	csr16w(ctlr, Mulint, r);
+	csr16w(ctlr, Cplusc, cplusc);
+	csr16w(ctlr, Coal, 0);
+
+	/*
+	 * Set configuration.
+	 */
+	switch(ctlr->pciv){
+	case Rtl8169sc:
+		csr8w(ctlr, Cr, Te|Re);
+		csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+		csr32w(ctlr, Rcr, ctlr->rcr);
+		break;
+	case Rtl8168b:
+	case Rtl8169c:
+		csr16w(ctlr, Cplusc, 0x2000);		/* magic */
+		csr8w(ctlr, Cr, Te|Re);
+		csr32w(ctlr, Tcr, Ifg1|Ifg0|6<<MtxdmaSHIFT); /* DMA max 1024 */
+		csr32w(ctlr, Rcr, ctlr->rcr);
+		break;
+	}
+	ctlr->tcr = csr32r(ctlr, Tcr);
+	csr8w(ctlr, Cr9346, 0);
+
+	iunlock(&ctlr->reglock);
+	iunlock(&ctlr->ilock);
+
+//	rtl8169mii(ctlr);
+
+	return 0;
+}
+
+static void
+rtl8169attach(Ether* edev)
+{
+	int timeo, s, i;
+	char name[KNAMELEN];
+	Block *bp;
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+	s = splhi();
+	qlock(&ctlr->alock);
+	if(ctlr->init || waserror()) {
+		qunlock(&ctlr->alock);
+		splx(s);
+		return;
+	}
+	ctlr->td = ucallocalign(sizeof(D)*Ntd, 256, 0);
+	ctlr->tb = malloc(Ntd*sizeof(Block*));
+	ctlr->ntd = Ntd;
+
+	ctlr->rd = ucallocalign(sizeof(D)*Nrd, 256, 0);
+	ctlr->rb = malloc(Nrd*sizeof(Block*));
+	ctlr->nrd = Nrd;
+
+	ctlr->dtcc = mallocalign(sizeof(Dtcc), 64, 0, 0);
+	if(waserror()){
+		free(ctlr->td);
+		free(ctlr->tb);
+		free(ctlr->rd);
+		free(ctlr->rb);
+		free(ctlr->dtcc);
+		nexterror();
+	}
+	if(ctlr->td == nil || ctlr->tb == nil || ctlr->rd == nil ||
+	   ctlr->rb == nil || ctlr->dtcc == nil)
+		error(Enomem);
+
+	/* allocate private receive-buffer pool */
+	ctlr->nrb = Nrb;
+	for(i = 0; i < Nrb; i++){
+		if((bp = allocb(Mps)) == nil)
+			error(Enomem);
+		bp->free = rbfree;
+		freeb(bp);
+	}
+
+	rtl8169init(edev);
+	ctlr->init = 1;
+	qunlock(&ctlr->alock);
+	splx(s);
+	poperror();				/* free */
+	poperror();				/* qunlock */
+
+	/* signal secondary cpus that l1 ptes are stable */
+	l1ptstable.word = 1;
+	allcache->wbse(&l1ptstable, sizeof l1ptstable);
+
+	s = spllo();
+	/* Don't wait long for link to be ready. */
+	for(timeo = 0; timeo < 50 && miistatus(ctlr->mii) != 0; timeo++)
+//		tsleep(&up->sleep, return0, 0, 100); /* fewer miistatus msgs */
+		delay(100);
+
+	while (!edev->link)
+		tsleep(&up->sleep, return0, 0, 10);
+	splx(s);
+
+	snprint(name, KNAMELEN, "#l%drproc", edev->ctlrno);
+	kproc(name, rproc, edev);
+
+	snprint(name, KNAMELEN, "#l%dtproc", edev->ctlrno);
+	kproc(name, tproc, edev);
+}
+
+/* call with ctlr->reglock held */
+static void
+rtl8169link(Ether* edev)
+{
+	uint r;
+	int limit;
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+
+	if(!((r = csr8r(ctlr, Phystatus)) & Linksts)){
+		if (edev->link) {
+			edev->link = 0;
+			csr8w(ctlr, Cr, Re);
+			iprint("#l%d: link down\n", edev->ctlrno);
+		}
+		return;
+	}
+	if (edev->link == 0) {
+		edev->link = 1;
+		csr8w(ctlr, Cr, Te|Re);
+		iprint("#l%d: link up\n", edev->ctlrno);
+	}
+	limit = 256*1024;
+	if(r & Speed10){
+		edev->mbps = 10;
+		limit = 65*1024;
+	} else if(r & Speed100)
+		edev->mbps = 100;
+	else if(r & Speed1000)
+		edev->mbps = 1000;
+
+	if(edev->oq != nil)
+		qsetlimit(edev->oq, limit);
+}
+
+static void
+rtl8169transmit(Ether* edev)
+{
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+	if (ctlr == nil || ctlr->ntd == 0) {
+		iprint("rtl8169transmit: not yet initialised\n");
+		return;
+	}
+	wakeup(&ctlr->trendez);
+}
+
+/*
+ * the controller has lost its mind, so reset it.
+ * call with ctlr->reglock held.
+ */
+static void
+restart(Ether *edev, char *why)
+{
+	int i, s, del;
+	Ctlr *ctlr;
+	static int inrestart;
+	static Lock rstrtlck;
+
+	/* keep other cpus out */
+	s = splhi();
+	if (inrestart) {
+		splx(s);
+		return;
+	}
+	ilock(&rstrtlck);
+
+	ctlr = edev->ctlr;
+	if (ctlr == nil || !ctlr->init) {
+		iunlock(&rstrtlck);
+		splx(s);
+		return;
+	}
+
+	if (Debug)
+		iprint("#l%d: restart due to %s\n", edev->ctlrno, why);
+	inrestart = 1;
+
+	/* process any pkts in the rings */
+	wakeup(&ctlr->rrendez);
+	coherence();
+	rtl8169transmit(edev);
+	/* allow time to drain 1024-buffer ring */
+	for (del = 0; del < 13 && ctlr->ntq > 0; del++)
+		delay(1);			
+
+	iunlock(&ctlr->reglock);
+	rtl8169reset(ctlr);
+	/* free any remaining unprocessed input buffers */
+	for (i = 0; i < ctlr->nrd; i++) {
+		freeb(ctlr->rb[i]);
+		ctlr->rb[i] = nil;
+	}
+	rtl8169init(edev);
+	ilock(&ctlr->reglock);
+
+	rtl8169link(edev);
+	rtl8169transmit(edev);		/* drain any output queue */
+	wakeup(&ctlr->rrendez);
+
+	inrestart = 0;
+
+	iunlock(&rstrtlck);
+	splx(s);
+}
+
+static ulong
+rcvdiag(Ether *edev, ulong isr)
+{
+	Ctlr *ctlr;
+
+	ctlr = edev->ctlr;
+	if(!(isr & (Punlc|Rok)))
+		ctlr->ierrs++;
+	if(isr & Rer)
+		ctlr->rer++;
+	if(isr & Rdu)
+		ctlr->rdu++;
+	if(isr & Punlc)
+		ctlr->punlc++;
+	if(isr & Fovw)
+		ctlr->fovw++;
+	if (isr & (Fovw|Rdu|Rer)) {
+		if (isr & ~(Tdu|Tok|Rok))		/* harmless */
+			iprint("#l%d: isr %8.8#lux\n", edev->ctlrno, isr);
+		restart(edev, "rcv error");
+		isr = ~0;
+	}
+	return isr;
+}
+
+void
+rtl8169interrupt(Ureg*, void* arg)
+{
+	Ctlr *ctlr;
+	Ether *edev;
+	u32int isr;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+	ilock(&ctlr->reglock);
+
+	while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){
+		ctlr->isr |= isr;		/* merge bits for [rt]proc */
+		csr16w(ctlr, Isr, isr);		/* dismiss? */
+		if((isr & ctlr->imr) == 0)
+			break;
+		if(isr & Fovw && ctlr->pciv == Rtl8168b) {
+			/*
+			 * Fovw means we got behind; relatively common on 8168.
+			 * this is a big hammer, but it gets things going again.
+			 */
+			ctlr->fovw++;
+			restart(edev, "rx fifo overrun");
+			break;
+		}
+		if(isr & (Fovw|Punlc|Rdu|Rer|Rok)) {
+			ctlr->imr &= ~(Fovw|Rdu|Rer|Rok);
+			csr16w(ctlr, Imr, ctlr->imr);
+			wakeup(&ctlr->rrendez);
+
+			if (isr & (Fovw|Punlc|Rdu|Rer)) {
+				isr = rcvdiag(edev, isr);
+				if (isr == ~0)
+					break;		/* restarted */
+			}
+			isr &= ~(Fovw|Rdu|Rer|Rok);
+		}
+		if(isr & (Ter|Tok)){
+			ctlr->imr &= ~(Ter|Tok);
+			csr16w(ctlr, Imr, ctlr->imr);
+			wakeup(&ctlr->trendez);
+
+			if (isr & Ter)
+				iprint("xmit err; isr %8.8#ux\n", isr);
+			isr &= ~(Ter|Tok);
+		}
+
+		if(isr & Punlc){
+			rtl8169link(edev);
+			isr &= ~Punlc;
+		}
+
+		/*
+		 * Some of the reserved bits get set sometimes...
+		 */
+		if(isr & (Serr|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok))
+			panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux",
+				csr16r(ctlr, Imr), isr);
+	}
+	if (edev->link && ctlr->ntq > 0)
+		csr8w(ctlr, Tppoll, Npq); /* kick xmiter to keep it going */
+	iunlock(&ctlr->reglock);
+	/*
+	 * extinguish pci-e controller interrupt source.
+	 * should be done more cleanly.
+	 */
+	if (ctlr->pcie)
+		pcieintrdone();
+}
+
+int
+vetmacv(Ctlr *ctlr, uint *macv)
+{
+	*macv = csr32r(ctlr, Tcr) & HwveridMASK;
+	switch(*macv){
+	default:
+		return -1;
+	case Macv01:
+	case Macv02:
+	case Macv03:
+	case Macv04:
+	case Macv05:
+	case Macv07:
+	case Macv07a:
+	case Macv11:
+	case Macv12:
+	case Macv12a:
+	case Macv13:
+	case Macv14:
+	case Macv15:
+	case Macv25:
+		break;
+	}
+	return 0;
+}
+
+static void
+rtl8169pci(void)
+{
+	Pcidev *p;
+	Ctlr *ctlr;
+	int i, pcie;
+	uint macv, bar;
+	void *mem;
+
+	p = nil;
+	while(p = pcimatch(p, 0, 0)){
+		if(p->ccrb != 0x02 || p->ccru != 0)
+			continue;
+
+		pcie = 0;
+		switch(i = ((p->did<<16)|p->vid)){
+		default:
+			continue;
+		case Rtl8100e:			/* RTL810[01]E ? */
+		case Rtl8168b:			/* RTL8168B */
+			pcie = 1;
+			break;
+		case Rtl8169c:			/* RTL8169C */
+		case Rtl8169sc:			/* RTL8169SC */
+		case Rtl8169:			/* RTL8169 */
+			break;
+		case (0xC107<<16)|0x1259:	/* Corega CG-LAPCIGT */
+			i = Rtl8169;
+			break;
+		}
+
+		bar = p->mem[2].bar & ~0x0F;
+		assert(bar != 0);
+		assert(!(p->mem[2].bar & Barioaddr));
+		if(0) iprint("rtl8169: %d-bit register accesses\n",
+			((p->mem[2].bar >> Barwidthshift) & Barwidthmask) ==
+			 Barwidth32? 32: 64);
+		mem = (void *)bar;	/* don't need to vmap on trimslice */
+		if(mem == 0){
+			print("rtl8169: can't map %#ux\n", bar);
+			continue;
+		}
+		ctlr = malloc(sizeof(Ctlr));
+		if(ctlr == nil)
+			error(Enomem);
+		ctlr->nic = mem;
+		ctlr->port = bar;
+		ctlr->pcidev = p;
+		ctlr->pciv = i;
+		ctlr->pcie = pcie;
+
+		if(vetmacv(ctlr, &macv) == -1){
+			free(ctlr);
+			print("rtl8169: unknown mac %.4ux %.8ux\n", p->did, macv);
+			continue;
+		}
+
+		if(pcigetpms(p) > 0){
+			pcisetpms(p, 0);
+
+			for(i = 0; i < 6; i++)
+				pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar);
+			pcicfgw8(p, PciINTL, p->intl);
+			pcicfgw8(p, PciLTR, p->ltr);
+			pcicfgw8(p, PciCLS, p->cls);
+			pcicfgw16(p, PciPCR, p->pcr);
+		}
+
+		if(rtl8169reset(ctlr)){
+			free(ctlr);
+			continue;
+		}
+
+		/*
+		 * Extract the chip hardware version,
+		 * needed to configure each properly.
+		 */
+		ctlr->macv = macv;
+
+		rtl8169mii(ctlr);
+		pcisetbme(p);
+
+		if(rtl8169ctlrhead != nil)
+			rtl8169ctlrtail->next = ctlr;
+		else
+			rtl8169ctlrhead = ctlr;
+		rtl8169ctlrtail = ctlr;
+	}
+}
+
+static int
+rtl8169pnp(Ether* edev)
+{
+	u32int r;
+	Ctlr *ctlr;
+	uchar ea[Eaddrlen];
+	static int once;
+
+	if(once == 0){
+		once = 1;
+		rtl8169pci();
+	}
+
+	/*
+	 * Any adapter matches if no edev->port is supplied,
+	 * otherwise the ports must match.
+	 */
+	for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){
+		if(ctlr->active)
+			continue;
+		if(edev->port == 0 || edev->port == ctlr->port){
+			ctlr->active = 1;
+			break;
+		}
+	}
+	if(ctlr == nil)
+		return -1;
+
+	edev->ctlr = ctlr;
+	ctlr->ether = edev;
+	edev->port = ctlr->port;
+//	edev->irq = ctlr->pcidev->intl;	/* incorrect on trimslice */
+	edev->irq = Pcieirq;		/* trimslice: non-msi pci-e intr */
+	edev->tbdf = ctlr->pcidev->tbdf;
+	edev->mbps = 1000;
+	edev->maxmtu = Mtu;
+
+	/*
+	 * Check if the adapter's station address is to be overridden.
+	 * If not, read it from the device and set in edev->ea.
+	 */
+	memset(ea, 0, Eaddrlen);
+	if(memcmp(ea, edev->ea, Eaddrlen) == 0){
+		r = csr32r(ctlr, Idr0);
+		edev->ea[0] = r;
+		edev->ea[1] = r>>8;
+		edev->ea[2] = r>>16;
+		edev->ea[3] = r>>24;
+		r = csr32r(ctlr, Idr0+4);
+		edev->ea[4] = r;
+		edev->ea[5] = r>>8;
+	}
+
+	edev->attach = rtl8169attach;
+	edev->transmit = rtl8169transmit;
+	edev->interrupt = rtl8169interrupt;
+	edev->ifstat = rtl8169ifstat;
+
+	edev->arg = edev;
+	edev->promiscuous = rtl8169promiscuous;
+	edev->multicast = rtl8169multicast;
+	edev->shutdown = rtl8169shutdown;
+
+	ilock(&ctlr->reglock);
+	rtl8169link(edev);
+	iunlock(&ctlr->reglock);
+	return 0;
+}
+
+void
+ether8169link(void)
+{
+	addethercard("rtl8169", rtl8169pnp);
+}

+ 42 - 0
sys/src/9/teg2/etherif.h

@@ -0,0 +1,42 @@
+enum
+{
+	MaxEther	= 4,
+	Ntypes		= 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+	RWlock;
+	ISAConf;			/* hardware info */
+
+	int	ctlrno;
+	ulong	tbdf;
+	int	minmtu;
+	int 	maxmtu;
+
+	Netif;
+
+	void	(*attach)(Ether*);	/* filled in by reset routine */
+	void	(*detach)(Ether*);
+	void	(*transmit)(Ether*);
+	void	(*interrupt)(Ureg*, void*);
+	long	(*ifstat)(Ether*, void*, long, ulong);
+	long 	(*ctl)(Ether*, void*, long); /* custom ctl messages */
+	void	(*power)(Ether*, int);	/* power on/off */
+	void	(*shutdown)(Ether*);	/* shutdown hardware before reboot */
+
+	void*	ctlr;
+	uchar	ea[Eaddrlen];
+	void*	address;
+	int	irq;
+
+	Queue*	oq;
+};
+
+extern Block* etheriq(Ether*, Block*, int);
+extern void addethercard(char*, int(*)(Ether*));
+extern ulong ethercrc(uchar*, int);
+extern int parseether(uchar*, char*);
+
+#define NEXT(x, l)	(((x)+1)%(l))
+#define PREV(x, l)	(((x) == 0) ? (l)-1: (x)-1)

+ 235 - 0
sys/src/9/teg2/ethermii.c

@@ -0,0 +1,235 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+
+#include "etherif.h"
+#include "ethermii.h"
+
+int
+mii(Mii* mii, int mask)
+{
+	MiiPhy *miiphy;
+	int bit, oui, phyno, r, rmask;
+
+	/*
+	 * Probe through mii for PHYs in mask;
+	 * return the mask of those found in the current probe.
+	 * If the PHY has not already been probed, update
+	 * the Mii information.
+	 */
+	rmask = 0;
+	for(phyno = 0; phyno < NMiiPhy; phyno++){
+		bit = 1<<phyno;
+		if(!(mask & bit))
+			continue;
+		if(mii->mask & bit){
+			rmask |= bit;
+			continue;
+		}
+		if(mii->mir(mii, phyno, Bmsr) == -1)
+			continue;
+		r = mii->mir(mii, phyno, Phyidr1);
+		oui = (r & 0x3FFF)<<6;
+		r = mii->mir(mii, phyno, Phyidr2);
+		oui |= r>>10;
+		if(oui == 0xFFFFF || oui == 0)
+			continue;
+
+		if((miiphy = malloc(sizeof(MiiPhy))) == nil)
+			continue;
+
+		miiphy->mii = mii;
+		miiphy->oui = oui;
+		miiphy->phyno = phyno;
+
+		miiphy->anar = ~0;
+		miiphy->fc = ~0;
+		miiphy->mscr = ~0;
+
+		mii->phy[phyno] = miiphy;
+		if(mii->curphy == nil)
+			mii->curphy = miiphy;
+		mii->mask |= bit;
+		mii->nphy++;
+
+		rmask |= bit;
+	}
+	return rmask;
+}
+
+int
+miimir(Mii* mii, int r)
+{
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	return mii->mir(mii, mii->curphy->phyno, r);
+}
+
+int
+miimiw(Mii* mii, int r, int data)
+{
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	return mii->miw(mii, mii->curphy->phyno, r, data);
+}
+
+int
+miireset(Mii* mii)
+{
+	int bmcr;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	bmcr = mii->mir(mii, mii->curphy->phyno, Bmcr);
+	bmcr |= BmcrR;
+	mii->miw(mii, mii->curphy->phyno, Bmcr, bmcr);
+	microdelay(1);
+
+	return 0;
+}
+
+int
+miiane(Mii* mii, int a, int p, int e)
+{
+	int anar, bmsr, mscr, r, phyno;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	phyno = mii->curphy->phyno;
+
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & BmsrAna))
+		return -1;
+
+	if(a != ~0)
+		anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a;
+	else if(mii->curphy->anar != ~0)
+		anar = mii->curphy->anar;
+	else{
+		anar = mii->mir(mii, phyno, Anar);
+		anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD);
+		if(bmsr & Bmsr10THD)
+			anar |= Ana10HD;
+		if(bmsr & Bmsr10TFD)
+			anar |= Ana10FD;
+		if(bmsr & Bmsr100TXHD)
+			anar |= AnaTXHD;
+		if(bmsr & Bmsr100TXFD)
+			anar |= AnaTXFD;
+	}
+	mii->curphy->anar = anar;
+
+	if(p != ~0)
+		anar |= (AnaAP|AnaP) & p;
+	else if(mii->curphy->fc != ~0)
+		anar |= mii->curphy->fc;
+	mii->curphy->fc = (AnaAP|AnaP) & anar;
+
+	if(bmsr & BmsrEs){
+		mscr = mii->mir(mii, phyno, Mscr);
+		mscr &= ~(Mscr1000TFD|Mscr1000THD);
+		if(e != ~0)
+			mscr |= (Mscr1000TFD|Mscr1000THD) & e;
+		else if(mii->curphy->mscr != ~0)
+			mscr = mii->curphy->mscr;
+		else{
+			r = mii->mir(mii, phyno, Esr);
+			if(r & Esr1000THD)
+				mscr |= Mscr1000THD;
+			if(r & Esr1000TFD)
+				mscr |= Mscr1000TFD;
+		}
+		mii->curphy->mscr = mscr;
+		mii->miw(mii, phyno, Mscr, mscr);
+	}
+	mii->miw(mii, phyno, Anar, anar);
+
+	r = mii->mir(mii, phyno, Bmcr);
+	if(!(r & BmcrR)){
+		r |= BmcrAne|BmcrRan;
+		mii->miw(mii, phyno, Bmcr, r);
+	}
+
+	return 0;
+}
+
+int
+miistatus(Mii* mii)
+{
+	MiiPhy *phy;
+	int anlpar, bmsr, p, r, phyno;
+
+	if(mii == nil || mii->ctlr == nil || mii->curphy == nil)
+		return -1;
+	phy = mii->curphy;
+	phyno = phy->phyno;
+
+	/*
+	 * Check Auto-Negotiation is complete and link is up.
+	 * (Read status twice as the Ls bit is sticky).
+	 */
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & (BmsrAnc|BmsrAna))) {
+		// print("miistatus: auto-neg incomplete\n");
+		return -1;
+	}
+
+	bmsr = mii->mir(mii, phyno, Bmsr);
+	if(!(bmsr & BmsrLs)){
+		// print("miistatus: link down\n");
+		phy->link = 0;
+		return -1;
+	}
+
+	phy->speed = phy->fd = phy->rfc = phy->tfc = 0;
+	if(phy->mscr){
+		r = mii->mir(mii, phyno, Mssr);
+		if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){
+			phy->speed = 1000;
+			phy->fd = 1;
+		}
+		else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD))
+			phy->speed = 1000;
+	}
+
+	anlpar = mii->mir(mii, phyno, Anlpar);
+	if(phy->speed == 0){
+		r = phy->anar & anlpar;
+		if(r & AnaTXFD){
+			phy->speed = 100;
+			phy->fd = 1;
+		}
+		else if(r & AnaTXHD)
+			phy->speed = 100;
+		else if(r & Ana10FD){
+			phy->speed = 10;
+			phy->fd = 1;
+		}
+		else if(r & Ana10HD)
+			phy->speed = 10;
+	}
+	if(phy->speed == 0) {
+		// print("miistatus: phy speed 0\n");
+		return -1;
+	}
+
+	if(phy->fd){
+		p = phy->fc;
+		r = anlpar & (AnaAP|AnaP);
+		if(p == AnaAP && r == (AnaAP|AnaP))
+			phy->tfc = 1;
+		else if(p == (AnaAP|AnaP) && r == AnaAP)
+			phy->rfc = 1;
+		else if((p & AnaP) && (r & AnaP))
+			phy->rfc = phy->tfc = 1;
+	}
+
+	phy->link = 1;
+
+	return 0;
+}

+ 116 - 0
sys/src/9/teg2/ethermii.h

@@ -0,0 +1,116 @@
+typedef struct Mii Mii;
+typedef struct MiiPhy MiiPhy;
+
+enum {					/* registers */
+	Bmcr		= 0x00,		/* Basic Mode Control */
+	Bmsr		= 0x01,		/* Basic Mode Status */
+	Phyidr1		= 0x02,		/* PHY Identifier #1 */
+	Phyidr2		= 0x03,		/* PHY Identifier #2 */
+	Anar		= 0x04,		/* Auto-Negotiation Advertisement */
+	Anlpar		= 0x05,		/* AN Link Partner Ability */
+	Aner		= 0x06,		/* AN Expansion */
+	Annptr		= 0x07,		/* AN Next Page TX */
+	Annprr		= 0x08,		/* AN Next Page RX */
+	Mscr		= 0x09,		/* MASTER-SLAVE Control */
+	Mssr		= 0x0A,		/* MASTER-SLAVE Status */
+	Esr		= 0x0F,		/* Extended Status */
+
+	NMiiPhyr	= 32,
+	NMiiPhy		= 32,
+};
+
+enum {					/* Bmcr */
+	BmcrSs1		= 0x0040,	/* Speed Select[1] */
+	BmcrCte		= 0x0080,	/* Collision Test Enable */
+	BmcrDm		= 0x0100,	/* Duplex Mode */
+	BmcrRan		= 0x0200,	/* Restart Auto-Negotiation */
+	BmcrI		= 0x0400,	/* Isolate */
+	BmcrPd		= 0x0800,	/* Power Down */
+	BmcrAne		= 0x1000,	/* Auto-Negotiation Enable */
+	BmcrSs0		= 0x2000,	/* Speed Select[0] */
+	BmcrLe		= 0x4000,	/* Loopback Enable */
+	BmcrR		= 0x8000,	/* Reset */
+};
+
+enum {					/* Bmsr */
+	BmsrEc		= 0x0001,	/* Extended Capability */
+	BmsrJd		= 0x0002,	/* Jabber Detect */
+	BmsrLs		= 0x0004,	/* Link Status */
+	BmsrAna		= 0x0008,	/* Auto-Negotiation Ability */
+	BmsrRf		= 0x0010,	/* Remote Fault */
+	BmsrAnc		= 0x0020,	/* Auto-Negotiation Complete */
+	BmsrPs		= 0x0040,	/* Preamble Suppression Capable */
+	BmsrEs		= 0x0100,	/* Extended Status */
+	Bmsr100T2HD	= 0x0200,	/* 100BASE-T2 HD Capable */
+	Bmsr100T2FD	= 0x0400,	/* 100BASE-T2 FD Capable */
+	Bmsr10THD	= 0x0800,	/* 10BASE-T HD Capable */
+	Bmsr10TFD	= 0x1000,	/* 10BASE-T FD Capable */
+	Bmsr100TXHD	= 0x2000,	/* 100BASE-TX HD Capable */
+	Bmsr100TXFD	= 0x4000,	/* 100BASE-TX FD Capable */
+	Bmsr100T4	= 0x8000,	/* 100BASE-T4 Capable */
+};
+
+enum {					/* Anar/Anlpar */
+	Ana10HD		= 0x0020,	/* Advertise 10BASE-T */
+	Ana10FD		= 0x0040,	/* Advertise 10BASE-T FD */
+	AnaTXHD		= 0x0080,	/* Advertise 100BASE-TX */
+	AnaTXFD		= 0x0100,	/* Advertise 100BASE-TX FD */
+	AnaT4		= 0x0200,	/* Advertise 100BASE-T4 */
+	AnaP		= 0x0400,	/* Pause */
+	AnaAP		= 0x0800,	/* Asymmetrical Pause */
+	AnaRf		= 0x2000,	/* Remote Fault */
+	AnaAck		= 0x4000,	/* Acknowledge */
+	AnaNp		= 0x8000,	/* Next Page Indication */
+};
+
+enum {					/* Mscr */
+	Mscr1000THD	= 0x0100,	/* Advertise 1000BASE-T HD */
+	Mscr1000TFD	= 0x0200,	/* Advertise 1000BASE-T FD */
+};
+
+enum {					/* Mssr */
+	Mssr1000THD	= 0x0400,	/* Link Partner 1000BASE-T HD able */
+	Mssr1000TFD	= 0x0800,	/* Link Partner 1000BASE-T FD able */
+};
+
+enum {					/* Esr */
+	Esr1000THD	= 0x1000,	/* 1000BASE-T HD Capable */
+	Esr1000TFD	= 0x2000,	/* 1000BASE-T FD Capable */
+	Esr1000XHD	= 0x4000,	/* 1000BASE-X HD Capable */
+	Esr1000XFD	= 0x8000,	/* 1000BASE-X FD Capable */
+};
+
+typedef struct Mii {
+	Lock;
+	int	nphy;
+	int	mask;
+	MiiPhy*	phy[NMiiPhy];
+	MiiPhy*	curphy;
+
+	void*	ctlr;
+	int	(*mir)(Mii*, int, int);
+	int	(*miw)(Mii*, int, int, int);
+} Mii;
+
+typedef struct MiiPhy {
+	Mii*	mii;
+	int	oui;
+	int	phyno;
+
+	int	anar;
+	int	fc;
+	int	mscr;
+
+	int	link;
+	int	speed;
+	int	fd;
+	int	rfc;
+	int	tfc;
+};
+
+extern int mii(Mii*, int);
+extern int miiane(Mii*, int, int, int);
+extern int miimir(Mii*, int);
+extern int miimiw(Mii*, int, int);
+extern int miireset(Mii*);
+extern int miistatus(Mii*);

+ 233 - 0
sys/src/9/teg2/fns.h

@@ -0,0 +1,233 @@
+#define checkmmu(a, b)
+#define countpagerefs(a, b)
+
+#include "../port/portfns.h"
+
+typedef struct Ether Ether;
+struct Ether;
+
+extern int led(int, int);
+extern void ledexit(int);
+extern void delay(int);
+extern void _uartputs(char*, int);
+extern int _uartprint(char*, ...);
+
+#pragma	varargck argpos	_uartprint 1
+
+extern long ainc(long *);
+extern long adec(long *);
+extern void allcacheinfo(Memcache *);
+extern void allcacheson(void);
+extern int archether(unsigned, Ether *);
+extern void archreboot(void);
+extern void archreset(void);
+extern void cachedinv(void);
+extern void cachedinvse(void*, int);
+extern void cachedwb(void);
+extern void cachedwbinv(void);
+extern void cachedwbinvse(void*, int);
+extern void cachedwbse(void*, int);
+extern void cacheiinv(void);
+extern void cacheuwbinv(void);
+extern uintptr cankaddr(uintptr pa);
+extern void chkmissing(void);
+extern void clockprod(Ureg *);
+extern void clockshutdown(void);
+extern int clz(ulong);
+extern int cmpswap(long*, long, long);
+extern void coherence(void);
+extern void configscreengpio(void);
+extern u32int controlget(void);
+extern void cortexa9cachecfg(void);
+extern u32int cpctget(void);
+extern u32int cpidget(void);
+extern ulong cprd(int cp, int op1, int crn, int crm, int op2);
+extern ulong cprdsc(int op1, int crn, int crm, int op2);
+extern void cpuidprint(void);
+extern char *cputype2name(char *buf, int size);
+extern void cpwr(int cp, int op1, int crn, int crm, int op2, ulong val);
+extern void cpwrsc(int op1, int crn, int crm, int op2, ulong val);
+#define cycles(vlp) *(vlp) = (ulong)lcycles()
+extern u32int dacget(void);
+extern void dacput(u32int);
+extern void dmainit(void);
+extern int dmastart(void *, int, void *, int, uint, Rendez *, int *);
+extern void dmatest(void);
+extern void dump(void *vaddr, int words);
+extern u32int farget(void);
+extern void fpclear(void);
+extern void fpoff(void);
+extern void fpon(void);
+extern ulong fprd(int fpreg);
+extern void fprestreg(int fpreg, uvlong val);
+extern void fpsave(FPsave *);
+extern ulong fpsavereg(int fpreg, uvlong *fpp);
+extern void fpwr(int fpreg, ulong val);
+extern u32int fsrget(void);
+extern ulong getauxctl(void);
+extern ulong getclvlid(void);
+extern ulong getcyc(void);
+extern int getncpus(void);
+extern u32int getpsr(void);
+extern u32int getscr(void);
+extern ulong getwayssets(void);
+extern void intcmask(uint);
+extern void intcunmask(uint);
+extern void intrcpu(int);
+extern void intrcpushutdown(void);
+extern void intrshutdown(void);
+extern void intrsoff(void);
+extern int isaconfig(char*, int, ISAConf*);
+extern int isdmadone(int);
+extern int ispow2(uvlong);
+extern void kbdenable(void);
+extern void l1diag(void);
+extern void l2pl310init(void);
+extern int log2(ulong);
+extern void machoff(uint cpu);
+extern void machon(uint cpu);
+extern void memdiag(ulong *);
+extern void mmuidmap(uintptr phys, int mbs);
+extern void mmuinvalidate(void);		/* 'mmu' or 'tlb'? */
+extern void mmuinvalidateaddr(u32int);		/* 'mmu' or 'tlb'? */
+extern void mousectl(Cmdbuf *cb);
+extern ulong pcibarsize(Pcidev*, int);
+extern void pcibussize(Pcidev*, ulong*, ulong*);
+extern int pcicfgr8(Pcidev*, int);
+extern int pcicfgr16(Pcidev*, int);
+extern int pcicfgr32(Pcidev*, int);
+extern void pcicfgw8(Pcidev*, int, int);
+extern void pcicfgw16(Pcidev*, int, int);
+extern void pcicfgw32(Pcidev*, int, int);
+extern void pciclrbme(Pcidev*);
+extern void pciclrioe(Pcidev*);
+extern void pciclrmwi(Pcidev*);
+extern void pcieintrdone(void);
+extern int pcigetpms(Pcidev*);
+extern void pcihinv(Pcidev*);
+extern uchar pciipin(Pcidev*, uchar);
+extern Pcidev* pcimatch(Pcidev*, int, int);
+extern Pcidev* pcimatchtbdf(int);
+extern void pcireset(void);
+extern void pcisetbme(Pcidev*);
+extern void pcisetioe(Pcidev*);
+extern void pcisetmwi(Pcidev*);
+extern int pcisetpms(Pcidev*, int);
+extern u32int pidget(void);
+extern void pidput(u32int);
+extern void prcachecfg(void);
+extern vlong probeaddr(uintptr);
+extern void procrestore(Proc *);
+extern void procsave(Proc*);
+extern void procsetup(Proc*);
+extern void putauxctl(ulong);
+extern void _reset(void);
+extern void screenclockson(void);
+extern void screeninit(void);
+extern void serialputc(int c);
+extern void serialputs(char* s, int n);
+extern void setcachelvl(int);
+extern void setsp(uintptr);
+extern void setr13(int, u32int*);
+extern ulong smpon(void);
+extern int startcpu(uint);
+extern void stopcpu(uint);
+extern int tas(void *);
+extern void tegclock0init(void);
+extern void tegclockinit(void);
+extern void tegclockintr(void);
+extern void tegclockshutdown(void);
+extern void tegwdogintr(Ureg *, void *);
+extern u32int ttbget(void);
+extern void ttbput(u32int);
+extern void _vrst(void);
+extern void wakewfi(void);
+extern void watchdoginit(void);
+extern void wfi(void);
+
+extern int irqenable(uint, void (*)(Ureg*, void*), void*, char*);
+extern int irqdisable(uint, void (*)(Ureg*, void*), void*, char*);
+#define intrenable(i, f, a, b, n)	irqenable((i), (f), (a), (n))
+#define intrdisable(i, f, a, b, n)	irqdisable((i), (f), (a), (n))
+extern void vectors(void);
+extern void vtable(void);
+
+/*
+ * Things called in main.
+ */
+extern void archconfinit(void);
+extern void clockinit(void);
+extern int i8250console(void);
+extern void links(void);
+extern void mmuinit(void);
+extern void touser(uintptr);
+extern void trapinit(void);
+
+
+extern int fpiarm(Ureg*);
+extern int fpudevprocio(Proc*, void*, long, uintptr, int);
+extern void fpuinit(void);
+extern void fpunoted(void);
+extern void fpunotify(Ureg*);
+extern void fpuprocrestore(Proc*);
+extern void fpuprocsave(Proc*);
+extern void fpusysprocsetup(Proc*);
+extern void fpusysrfork(Ureg*);
+extern void fpusysrforkchild(Proc*, Ureg*, Proc*);
+extern int fpuemu(Ureg*);
+
+/*
+ * Miscellaneous machine dependent stuff.
+ */
+extern int cas(int *, int, int);
+extern char* getenv(char*, char*, int);
+char*	getconf(char*);
+uintptr mmukmap(uintptr, uintptr, usize);
+uintptr mmukunmap(uintptr, uintptr, usize);
+extern void* mmuuncache(void*, usize);
+extern void* ucalloc(usize);
+extern Block* ucallocb(int);
+extern void* ucallocalign(usize size, int align, int span);
+extern void ucfree(void*);
+extern void ucfreeb(Block*);
+
+/*
+ * Things called from port.
+ */
+extern void delay(int);				/* only scheddump() */
+extern int islo(void);
+extern void microdelay(int);			/* only edf.c */
+extern void evenaddr(uintptr);
+extern void idlehands(void);
+extern void setkernur(Ureg*, Proc*);		/* only devproc.c */
+extern void syscallfmt(int syscallno, ulong pc, va_list list);
+extern void sysretfmt(int syscallno, va_list list, long ret, uvlong start, uvlong stop);
+extern void* sysexecregs(uintptr, ulong, int);
+extern void sysprocsetup(Proc*);
+
+/* libc */
+long labs(long);
+
+/*
+ * PCI stuff.
+ */
+
+extern void forkret(void);
+extern int userureg(Ureg*);
+void*	vmap(uintptr, usize);
+void vunmap(void*, usize);
+
+extern void kexit(Ureg*);
+
+#define	getpgcolor(a)	0
+#define	kmapinval()
+
+#define PTR2UINT(p)	((uintptr)(p))
+#define UINT2PTR(i)	((void*)(i))
+
+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+
+#define KADDR(pa)	UINT2PTR(KZERO    | ((uintptr)(pa) & ~KSEGM))
+#define PADDR(va)	PTR2UINT(PHYSDRAM | ((uintptr)(va) & ~KSEGM))
+
+#define MASK(v)	((1UL << (v)) - 1)	/* mask `v' bits wide */

+ 300 - 0
sys/src/9/teg2/fpi.c

@@ -0,0 +1,300 @@
+/*
+ * Floating Point Interpreter.
+ * shamelessly stolen from an original by ark.
+ */
+#include "fpi.h"
+
+void
+fpiround(Internal *i)
+{
+	unsigned long guard;
+
+	guard = i->l & GuardMask;
+	i->l &= ~GuardMask;
+	if(guard > (LsBit>>1) || (guard == (LsBit>>1) && (i->l & LsBit))){
+		i->l += LsBit;
+		if(i->l & CarryBit){
+			i->l &= ~CarryBit;
+			i->h++;
+			if(i->h & CarryBit){
+				if (i->h & 0x01)
+					i->l |= CarryBit;
+				i->l >>= 1;
+				i->h >>= 1;
+				i->e++;
+			}
+		}
+	}
+}
+
+static void
+matchexponents(Internal *x, Internal *y)
+{
+	int count;
+
+	count = y->e - x->e;
+	x->e = y->e;
+	if(count >= 2*FractBits){
+		x->l = x->l || x->h;
+		x->h = 0;
+		return;
+	}
+	if(count >= FractBits){
+		count -= FractBits;
+		x->l = x->h|(x->l != 0);
+		x->h = 0;
+	}
+	while(count > 0){
+		count--;
+		if(x->h & 0x01)
+			x->l |= CarryBit;
+		if(x->l & 0x01)
+			x->l |= 2;
+		x->l >>= 1;
+		x->h >>= 1;
+	}
+}
+
+static void
+shift(Internal *i)
+{
+	i->e--;
+	i->h <<= 1;
+	i->l <<= 1;
+	if(i->l & CarryBit){
+		i->l &= ~CarryBit;
+		i->h |= 0x01;
+	}
+}
+
+static void
+normalise(Internal *i)
+{
+	while((i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+static void
+renormalise(Internal *i)
+{
+	if(i->e < -2 * FractBits)
+		i->e = -2 * FractBits;
+	while(i->e < 1){
+		i->e++;
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->h >>= 1;
+		i->l = (i->l>>1)|(i->l & 0x01);
+	}
+	if(i->e >= ExpInfinity)
+		SetInfinity(i);
+}
+
+void
+fpinormalise(Internal *x)
+{
+	if(!IsWeird(x) && !IsZero(x))
+		normalise(x);
+}
+
+void
+fpiadd(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	i->s = x->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	if(x->e > y->e){
+		t = x;
+		x = y;
+		y = t;
+	}
+	matchexponents(x, y);
+	i->e = x->e;
+	i->h = x->h + y->h;
+	i->l = x->l + y->l;
+	if(i->l & CarryBit){
+		i->h++;
+		i->l &= ~CarryBit;
+	}
+	if(i->h & (HiddenBit<<1)){
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->l = (i->l>>1)|(i->l & 0x01);
+		i->h >>= 1;
+		i->e++;
+	}
+	if(IsWeird(i))
+		SetInfinity(i);
+}
+
+void
+fpisub(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	if(y->e < x->e
+	   || (y->e == x->e && (y->h < x->h || (y->h == x->h && y->l < x->l)))){
+		t = x;
+		x = y;
+		y = t;
+	}
+	i->s = y->s;
+	if(IsNaN(y)){
+		SetQNaN(i);
+		return;
+	}
+	if(IsInfinity(y)){
+		if(IsInfinity(x))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	matchexponents(x, y);
+	i->e = y->e;
+	i->h = y->h - x->h;
+	i->l = y->l - x->l;
+	if(i->l < 0){
+		i->l += CarryBit;
+		i->h--;
+	}
+	if(i->h == 0 && i->l == 0)
+		SetZero(i);
+	else while(i->e > 1 && (i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+#define	CHUNK		(FractBits/2)
+#define	CMASK		((1<<CHUNK)-1)
+#define	HI(x)		((short)((x)>>CHUNK) & CMASK)
+#define	LO(x)		((short)(x) & CMASK)
+#define	SPILL(x)	((x)>>CHUNK)
+#define	M(x, y)		((long)a[x]*(long)b[y])
+#define	C(h, l)		(((long)((h) & CMASK)<<CHUNK)|((l) & CMASK))
+
+void
+fpimul(Internal *x, Internal *y, Internal *i)
+{
+	long a[4], b[4], c[7], f[4];
+
+	i->s = x->s^y->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y) || IsZero(x) || IsZero(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	else if(IsZero(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->e = x->e + y->e - (ExpBias - 1);
+
+	a[0] = HI(x->h); b[0] = HI(y->h);
+	a[1] = LO(x->h); b[1] = LO(y->h);
+	a[2] = HI(x->l); b[2] = HI(y->l);
+	a[3] = LO(x->l); b[3] = LO(y->l);
+
+	c[6] =                               M(3, 3);
+	c[5] =                     M(2, 3) + M(3, 2) + SPILL(c[6]);
+	c[4] =           M(1, 3) + M(2, 2) + M(3, 1) + SPILL(c[5]);
+	c[3] = M(0, 3) + M(1, 2) + M(2, 1) + M(3, 0) + SPILL(c[4]);
+	c[2] = M(0, 2) + M(1, 1) + M(2, 0)           + SPILL(c[3]);
+	c[1] = M(0, 1) + M(1, 0)                     + SPILL(c[2]);
+	c[0] = M(0, 0)                               + SPILL(c[1]);
+
+	f[0] = c[0];
+	f[1] = C(c[1], c[2]);
+	f[2] = C(c[3], c[4]);
+	f[3] = C(c[5], c[6]);
+
+	if((f[0] & HiddenBit) == 0){
+		f[0] <<= 1;
+		f[1] <<= 1;
+		f[2] <<= 1;
+		f[3] <<= 1;
+		if(f[1] & CarryBit){
+			f[0] |= 1;
+			f[1] &= ~CarryBit;
+		}
+		if(f[2] & CarryBit){
+			f[1] |= 1;
+			f[2] &= ~CarryBit;
+		}
+		if(f[3] & CarryBit){
+			f[2] |= 1;
+			f[3] &= ~CarryBit;
+		}
+		i->e--;
+	}
+	i->h = f[0];
+	i->l = f[1];
+	if(f[2] || f[3])
+		i->l |= 1;
+	renormalise(i);
+}
+
+void
+fpidiv(Internal *x, Internal *y, Internal *i)
+{
+	i->s = x->s^y->s;
+	if(IsNaN(x) || IsNaN(y)
+	   || (IsInfinity(x) && IsInfinity(y)) || (IsZero(x) && IsZero(y))){
+		SetQNaN(i);
+		return;
+	}
+	else if(IsZero(x) || IsInfinity(y)){
+		SetInfinity(i);
+		return;
+	}
+	else if(IsInfinity(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->h = 0;
+	i->l = 0;
+	i->e = y->e - x->e + (ExpBias + 2*FractBits - 1);
+	do{
+		if(y->h > x->h || (y->h == x->h && y->l >= x->l)){
+			i->l |= 0x01;
+			y->h -= x->h;
+			y->l -= x->l;
+			if(y->l < 0){
+				y->l += CarryBit;
+				y->h--;
+			}
+		}
+		shift(y);
+		shift(i);
+	}while ((i->h & HiddenBit) == 0);
+	if(y->h || y->l)
+		i->l |= 0x01;
+	renormalise(i);
+}
+
+int
+fpicmp(Internal *x, Internal *y)
+{
+	if(IsNaN(x) && IsNaN(y))
+		return 0;
+	if(IsInfinity(x) && IsInfinity(y))
+		return y->s - x->s;
+	if(x->e == y->e && x->h == y->h && x->l == y->l)
+		return y->s - x->s;
+	if(x->e < y->e
+	   || (x->e == y->e && (x->h < y->h || (x->h == y->h && x->l < y->l))))
+		return y->s ? 1: -1;
+	return x->s ? -1: 1;
+}

+ 61 - 0
sys/src/9/teg2/fpi.h

@@ -0,0 +1,61 @@
+typedef long Word;
+typedef unsigned long Single;
+typedef struct {
+	unsigned long l;
+	unsigned long h;
+} Double;
+
+enum {
+	FractBits	= 28,
+	CarryBit	= 0x10000000,
+	HiddenBit	= 0x08000000,
+	MsBit		= HiddenBit,
+	NGuardBits	= 3,
+	GuardMask	= 0x07,
+	LsBit		= (1<<NGuardBits),
+
+	SingleExpBias	= 127,
+	SingleExpMax	= 255,
+	DoubleExpBias	= 1023,
+	DoubleExpMax	= 2047,
+
+	ExpBias		= DoubleExpBias,
+	ExpInfinity	= DoubleExpMax,
+};
+
+typedef struct {
+	unsigned char s;
+	short e;
+	long l;				/* 0000FFFFFFFFFFFFFFFFFFFFFFFFFGGG */
+	long h;				/* 0000HFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+} Internal;
+
+#define IsWeird(n)	((n)->e >= ExpInfinity)
+#define	IsInfinity(n)	(IsWeird(n) && (n)->h == HiddenBit && (n)->l == 0)
+#define	SetInfinity(n)	((n)->e = ExpInfinity, (n)->h = HiddenBit, (n)->l = 0)
+#define IsNaN(n)	(IsWeird(n) && (((n)->h & ~HiddenBit) || (n)->l))
+#define	SetQNaN(n)	((n)->s = 0, (n)->e = ExpInfinity, 		\
+			 (n)->h = HiddenBit|(LsBit<<1), (n)->l = 0)
+#define IsZero(n)	((n)->e == 1 && (n)->h == 0 && (n)->l == 0)
+#define SetZero(n)	((n)->e = 1, (n)->h = 0, (n)->l = 0)
+
+/*
+ * fpi.c
+ */
+extern void fpiround(Internal *);
+extern void fpiadd(Internal *, Internal *, Internal *);
+extern void fpisub(Internal *, Internal *, Internal *);
+extern void fpimul(Internal *, Internal *, Internal *);
+extern void fpidiv(Internal *, Internal *, Internal *);
+extern int fpicmp(Internal *, Internal *);
+extern void fpinormalise(Internal*);
+
+/*
+ * fpimem.c
+ */
+extern void fpis2i(Internal *, void *);
+extern void fpid2i(Internal *, void *);
+extern void fpiw2i(Internal *, void *);
+extern void fpii2s(void *, Internal *);
+extern void fpii2d(void *, Internal *);
+extern void fpii2w(Word *, Internal *);

+ 502 - 0
sys/src/9/teg2/fpiarm.c

@@ -0,0 +1,502 @@
+/*
+ * this doesn't attempt to implement ARM floating-point properties
+ * that aren't visible in the Inferno environment.
+ * all arithmetic is done in double precision.
+ * the FP trap status isn't updated.
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+#include	"ureg.h"
+
+#include	"arm.h"
+#include	"fpi.h"
+
+/* undef this if correct kernel r13 isn't in Ureg;
+ * check calculation in fpiarm below
+ */
+
+#define ARM7500
+
+#define	REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
+#ifdef ARM7500
+#define	FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&7])
+#else
+#define	FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&(Nfpregs - 1)])
+#endif
+
+typedef struct FP2 FP2;
+typedef struct FP1 FP1;
+
+struct FP2 {
+	char*	name;
+	void	(*f)(Internal, Internal, Internal*);
+};
+
+struct FP1 {
+	char*	name;
+	void	(*f)(Internal*, Internal*);
+};
+
+enum {
+	N = 1<<31,
+	Z = 1<<30,
+	C = 1<<29,
+	V = 1<<28,
+	REGPC = 15,
+};
+
+enum {
+	fpemudebug = 0,
+};
+
+#undef OFR
+#define	OFR(X)	((ulong)&((Ureg*)0)->X)
+
+static	int	roff[] = {
+	OFR(r0), OFR(r1), OFR(r2), OFR(r3),
+	OFR(r4), OFR(r5), OFR(r6), OFR(r7),
+	OFR(r8), OFR(r9), OFR(r10), OFR(r11),
+	OFR(r12), OFR(r13), OFR(r14), OFR(pc),
+};
+
+static Internal fpconst[8] = {		/* indexed by op&7 (ARM 7500 FPA) */
+	/* s, e, l, h */
+	{0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
+	{0, 0x3FF, 0x00000000, 0x08000000},	/* 1.0 */
+	{0, 0x400, 0x00000000, 0x08000000},	/* 2.0 */
+	{0, 0x400, 0x00000000, 0x0C000000},	/* 3.0 */
+	{0, 0x401, 0x00000000, 0x08000000},	/* 4.0 */
+	{0, 0x401, 0x00000000, 0x0A000000},	/* 5.0 */
+	{0, 0x3FE, 0x00000000, 0x08000000},	/* 0.5 */
+	{0, 0x402, 0x00000000, 0x0A000000},	/* 10.0 */
+};
+
+/*
+ * arm binary operations
+ */
+
+static void
+fadd(Internal m, Internal n, Internal *d)
+{
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsub(Internal m, Internal n, Internal *d)
+{
+	m.s ^= 1;
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsubr(Internal m, Internal n, Internal *d)
+{
+	n.s ^= 1;
+	(n.s == m.s? fpiadd: fpisub)(&n, &m, d);
+}
+
+static void
+fmul(Internal m, Internal n, Internal *d)
+{
+	fpimul(&m, &n, d);
+}
+
+static void
+fdiv(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&m, &n, d);
+}
+
+static void
+fdivr(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&n, &m, d);
+}
+
+/*
+ * arm unary operations
+ */
+
+static void
+fmov(Internal *m, Internal *d)
+{
+	*d = *m;
+}
+
+static void
+fmovn(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s ^= 1;
+}
+
+static void
+fabsf(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s = 0;
+}
+
+static void
+frnd(Internal *m, Internal *d)
+{
+	short e;
+
+	(m->s? fsub: fadd)(fpconst[6], *m, d);
+	if(IsWeird(d))
+		return;
+	fpiround(d);
+	e = (d->e - ExpBias) + 1;
+	if(e <= 0)
+		SetZero(d);
+	else if(e > FractBits){
+		if(e < 2*FractBits)
+			d->l &= ~((1<<(2*FractBits - e))-1);
+	}else{
+		d->l = 0;
+		if(e < FractBits)
+			d->h &= ~((1<<(FractBits-e))-1);
+	}
+}
+
+/*
+ * ARM 7500 FPA opcodes
+ */
+
+static	FP1	optab1[16] = {	/* Fd := OP Fm */
+[0]	{"MOVF",	fmov},
+[1]	{"NEGF",	fmovn},
+[2]	{"ABSF",	fabsf},
+[3]	{"RNDF",	frnd},
+[4]	{"SQTF",	/*fsqt*/0},
+/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
+/* URD and NRM aren't implemented */
+};
+
+static	FP2	optab2[16] = {	/* Fd := Fn OP Fm */
+[0]	{"ADDF",	fadd},
+[1]	{"MULF",	fmul},
+[2]	{"SUBF",	fsub},
+[3]	{"RSUBF",	fsubr},
+[4]	{"DIVF",	fdiv},
+[5]	{"RDIVF",	fdivr},
+/* POW, RPW deprecated */
+[8]	{"REMF",	/*frem*/0},
+[9]	{"FMF",	fmul},	/* fast multiply */
+[10]	{"FDV",	fdiv},	/* fast divide */
+[11]	{"FRD",	fdivr},	/* fast reverse divide */
+/* POL deprecated */
+};
+
+static ulong
+fcmp(Internal *n, Internal *m)
+{
+	int i;
+	Internal rm, rn;
+
+	if(IsWeird(m) || IsWeird(n)){
+		/* BUG: should trap if not masked */
+		return V|C;
+	}
+	rn = *n;
+	rm = *m;
+	fpiround(&rn);
+	fpiround(&rm);
+	i = fpicmp(&rn, &rm);
+	if(i > 0)
+		return C;
+	else if(i == 0)
+		return C|Z;
+	else
+		return N;
+}
+
+static void
+fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp)
+{
+	void *mem;
+
+	mem = (void*)ea;
+	(*f)(&FR(ufp, d), mem);
+	if(fpemudebug)
+		print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
+}
+
+static void
+fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp)
+{
+	Internal tmp;
+	void *mem;
+
+	mem = (void*)ea;
+	tmp = FR(ufp, s);
+	if(fpemudebug)
+		print("MOV%c	F%d,#%lux\n", n==8? 'D': 'F', s, ea);
+	(*f)(mem, &tmp);
+}
+
+static int
+condok(int cc, int c)
+{
+	switch(c){
+	case 0:	/* Z set */
+		return cc&Z;
+	case 1:	/* Z clear */
+		return (cc&Z) == 0;
+	case 2:	/* C set */
+		return cc&C;
+	case 3:	/* C clear */
+		return (cc&C) == 0;
+	case 4:	/* N set */
+		return cc&N;
+	case 5:	/* N clear */
+		return (cc&N) == 0;
+	case 6:	/* V set */
+		return cc&V;
+	case 7:	/* V clear */
+		return (cc&V) == 0;
+	case 8:	/* C set and Z clear */
+		return cc&C && (cc&Z) == 0;
+	case 9:	/* C clear or Z set */
+		return (cc&C) == 0 || cc&Z;
+	case 10:	/* N set and V set, or N clear and V clear */
+		return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+	case 11:	/* N set and V clear, or N clear and V set */
+		return (cc&(N|V))==N || (cc&(N|V))==V;
+	case 12:	/* Z clear, and either N set and V set or N clear and V clear */
+		return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+	case 13:	/* Z set, or N set and V clear or N clear and V set */
+		return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+	case 14:	/* always */
+		return 1;
+	case 15:	/* never (reserved) */
+		return 0;
+	}
+	return 0;	/* not reached */
+}
+
+static void
+unimp(ulong pc, ulong op)
+{
+	char buf[60];
+
+	snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
+	if(fpemudebug)
+		print("FPE: %s\n", buf);
+	error(buf);
+	/* no return */
+}
+
+static void
+fpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
+{
+	int rn, rd, tag, o;
+	long off;
+	ulong ea;
+	Internal tmp, *fm, *fn;
+
+	/* note: would update fault status here if we noted numeric exceptions */
+
+	/*
+	 * LDF, STF; 10.1.1
+	 */
+	if(((op>>25)&7) == 6){
+		if(op & (1<<22))
+			unimp(pc, op);	/* packed or extended */
+		rn = (op>>16)&0xF;
+		off = (op&0xFF)<<2;
+		if((op & (1<<23)) == 0)
+			off = -off;
+		ea = REG(ur, rn);
+		if(rn == REGPC)
+			ea += 8;
+		if(op & (1<<24))
+			ea += off;
+		rd = (op>>12)&7;
+		if(op & (1<<20)){
+			if(op & (1<<15))
+				fld(fpid2i, rd, ea, 8, ufp);
+			else
+				fld(fpis2i, rd, ea, 4, ufp);
+		}else{
+			if(op & (1<<15))
+				fst(fpii2d, ea, rd, 8, ufp);
+			else
+				fst(fpii2s, ea, rd, 4, ufp);
+		}
+		if((op & (1<<24)) == 0)
+			ea += off;
+		if(op & (1<<21))
+			REG(ur, rn) = ea;
+		return;
+	}
+
+	/*
+	 * CPRT/transfer, 10.3
+	 */
+	if(op & (1<<4)){
+		rd = (op>>12) & 0xF;
+
+		/*
+		 * compare, 10.3.1
+		 */
+		if(rd == 15 && op & (1<<20)){
+			rn = (op>>16)&7;
+			fn = &FR(ufp, rn);
+			if(op & (1<<3)){
+				fm = &fpconst[op&7];
+				if(fpemudebug)
+					tag = 'C';
+			}else{
+				fm = &FR(ufp, op&7);
+				if(fpemudebug)
+					tag = 'F';
+			}
+			switch((op>>21)&7){
+			default:
+				unimp(pc, op);
+			case 4:	/* CMF: Fn :: Fm */
+			case 6:	/* CMFE: Fn :: Fm (with exception) */
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, fm);
+				break;
+			case 5:	/* CNF: Fn :: -Fm */
+			case 7:	/* CNFE: Fn :: -Fm (with exception) */
+				tmp = *fm;
+				tmp.s ^= 1;
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, &tmp);
+				break;
+			}
+			if(fpemudebug)
+				print("CMPF	%c%d,F%ld =%#lux\n",
+					tag, rn, op&7, ur->psr>>28);
+			return;
+		}
+
+		/*
+		 * other transfer, 10.3
+		 */
+		switch((op>>20)&0xF){
+		default:
+			unimp(pc, op);
+		case 0:	/* FLT */
+			rn = (op>>16) & 7;
+			fpiw2i(&FR(ufp, rn), &REG(ur, rd));
+			if(fpemudebug)
+				print("MOVW[FD]	R%d, F%d\n", rd, rn);
+			break;
+		case 1:	/* FIX */
+			if(op & (1<<3))
+				unimp(pc, op);
+			rn = op & 7;
+			tmp = FR(ufp, rn);
+			fpii2w(&REG(ur, rd), &tmp);
+			if(fpemudebug)
+				print("MOV[FD]W	F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
+			break;
+		case 2:	/* FPSR := Rd */
+			ufp->status = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPSR\n", rd);
+			break;
+		case 3:	/* Rd := FPSR */
+			REG(ur, rd) = ufp->status;
+			if(fpemudebug)
+				print("MOVW	FPSR, R%d\n", rd);
+			break;
+		case 4:	/* FPCR := Rd */
+			ufp->control = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPCR\n", rd);
+			break;
+		case 5:	/* Rd := FPCR */
+			REG(ur, rd) = ufp->control;
+			if(fpemudebug)
+				print("MOVW	FPCR, R%d\n", rd);
+			break;
+		}
+		return;
+	}
+
+	/*
+	 * arithmetic
+	 */
+
+	if(op & (1<<3)){	/* constant */
+		fm = &fpconst[op&7];
+		if(fpemudebug)
+			tag = 'C';
+	}else{
+		fm = &FR(ufp, op&7);
+		if(fpemudebug)
+			tag = 'F';
+	}
+	rd = (op>>12)&7;
+	o = (op>>20)&0xF;
+	if(op & (1<<15)){	/* monadic */
+		FP1 *fp;
+		fp = &optab1[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		if(fpemudebug)
+			print("%s	%c%ld,F%d\n", fp->name, tag, op&7, rd);
+		(*fp->f)(fm, &FR(ufp, rd));
+	} else {
+		FP2 *fp;
+		fp = &optab2[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		rn = (op>>16)&7;
+		if(fpemudebug)
+			print("%s	%c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
+		(*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
+	}
+}
+
+/*
+ * returns the number of FP instructions emulated
+ */
+int
+fpiarm(Ureg *ur)
+{
+	ulong op, o, cp;
+	FPsave *ufp;
+	int n;
+
+	if(up == nil)
+		panic("fpiarm not in a process");
+	ufp = &up->fpsave;
+	/*
+	 * because all the emulated fp state is in the proc structure,
+	 * it need not be saved/restored
+	 */
+	if(up->fpstate != FPactive){
+		assert(sizeof(Internal) <= sizeof(ufp->regs[0]));
+		up->fpstate = FPactive;
+		ufp->control = 0;
+		ufp->status = (0x01<<28)|(1<<12); /* sw emulation, alt. C flag */
+		for(n = 0; n < 8; n++)
+			FR(ufp, n) = fpconst[0];
+	}
+	for(n=0; ;n++){
+		validaddr(ur->pc, 4, 0);
+		op = *(ulong*)(ur->pc);
+		if(fpemudebug)
+			print("%#lux: %#8.8lux ", ur->pc, op);
+		o = (op>>24) & 0xF;
+		cp = (op>>8) & 0xF;
+		if(!ISFPAOP(cp, o))
+			break;
+		if(condok(ur->psr, op>>28))
+			fpemu(ur->pc, op, ur, ufp);
+		ur->pc += 4;		/* pretend cpu executed the instr */
+	}
+	if(fpemudebug)
+		print("\n");
+	return n;
+}

+ 136 - 0
sys/src/9/teg2/fpimem.c

@@ -0,0 +1,136 @@
+#include "fpi.h"
+
+/*
+ * the following routines depend on memory format, not the machine
+ */
+
+void
+fpis2i(Internal *i, void *v)
+{
+	Single *s = v;
+
+	i->s = (*s & 0x80000000) ? 1: 0;
+	if((*s & ~0x80000000) == 0){
+		SetZero(i);
+		return;
+	}
+	i->e = ((*s>>23) & 0x00FF) - SingleExpBias + ExpBias;
+	i->h = (*s & 0x007FFFFF)<<(1+NGuardBits);
+	i->l = 0;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpid2i(Internal *i, void *v)
+{
+	Double *d = v;
+
+	i->s = (d->h & 0x80000000) ? 1: 0;
+	i->e = (d->h>>20) & 0x07FF;
+	i->h = ((d->h & 0x000FFFFF)<<(4+NGuardBits))|((d->l>>25) & 0x7F);
+	i->l = (d->l & 0x01FFFFFF)<<NGuardBits;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpiw2i(Internal *i, void *v)
+{
+	Word w, word = *(Word*)v;
+	short e;
+
+	if(word < 0){
+		i->s = 1;
+		word = -word;
+	}
+	else
+		i->s = 0;
+	if(word == 0){
+		SetZero(i);
+		return;
+	}
+	if(word > 0){
+		for (e = 0, w = word; w; w >>= 1, e++)
+			;
+	} else
+		e = 32;
+	if(e > FractBits){
+		i->h = word>>(e - FractBits);
+		i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+	}
+	else {
+		i->h = word<<(FractBits - e);
+		i->l = 0;
+	}
+	i->e = (e - 1) + ExpBias;
+}
+
+void
+fpii2s(void *v, Internal *i)
+{
+	short e;
+	Single *s = (Single*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	*s = i->s ? 0x80000000: 0;
+	e = i->e;
+	if(e < ExpBias){
+		if(e <= (ExpBias - SingleExpBias))
+			return;
+		e = SingleExpBias - (ExpBias - e);
+	}
+	else  if(e >= (ExpBias + (SingleExpMax-SingleExpBias))){
+		*s |= SingleExpMax<<23;
+		return;
+	}
+	else
+		e = SingleExpBias + (e - ExpBias);
+	*s |= (e<<23)|(i->h>>(1+NGuardBits));
+}
+
+void
+fpii2d(void *v, Internal *i)
+{
+	Double *d = (Double*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	i->l = ((i->h & GuardMask)<<25)|(i->l>>NGuardBits);
+	i->h >>= NGuardBits;
+	d->h = i->s ? 0x80000000: 0;
+	d->h |= (i->e<<20)|((i->h & 0x00FFFFFF)>>4);
+	d->l = (i->h<<28)|i->l;
+}
+
+void
+fpii2w(Word *word, Internal *i)
+{
+	Word w;
+	short e;
+
+	fpiround(i);
+	e = (i->e - ExpBias) + 1;
+	if(e <= 0)
+		w = 0;
+	else if(e > 31)
+		w = 0x7FFFFFFF;
+	else if(e > FractBits)
+		w = (i->h<<(e - FractBits))|(i->l>>(2*FractBits - e));
+	else
+		w = i->h>>(FractBits-e);
+	if(i->s)
+		w = -w;
+	*word = w;
+}

+ 25 - 0
sys/src/9/teg2/init9.s

@@ -0,0 +1,25 @@
+/*
+ * This is the same as the C programme:
+ *
+ *	void
+ *	main(char* argv0)
+ *	{
+ *		startboot(argv0, &argv0);
+ *	}
+ *
+ * It is in assembler because SB needs to be
+ * set and doing this in C drags in too many
+ * other routines.
+ */
+TEXT main(SB), 1, $8
+	MOVW	$setR12(SB), R12		/* load the SB */
+	MOVW	$boot(SB), R0
+
+	ADD	$12, R13, R1			/* pointer to 0(FP) */
+
+	MOVW	R0, 4(R13)			/* pass argc, argv */
+	MOVW	R1, 8(R13)
+
+	BL	startboot(SB)
+_loop:
+	B	_loop

+ 219 - 0
sys/src/9/teg2/io.h

@@ -0,0 +1,219 @@
+#pragma varargck	type	"T"	int
+#pragma varargck	type	"T"	uint
+
+/*
+ * PCI
+ */
+
+enum {
+	BusCBUS		= 0,		/* Corollary CBUS */
+	BusCBUSII,			/* Corollary CBUS II */
+	BusEISA,			/* Extended ISA */
+	BusFUTURE,			/* IEEE Futurebus */
+	BusINTERN,			/* Internal bus */
+	BusISA,				/* Industry Standard Architecture */
+	BusMBI,				/* Multibus I */
+	BusMBII,			/* Multibus II */
+	BusMCA,				/* Micro Channel Architecture */
+	BusMPI,				/* MPI */
+	BusMPSA,			/* MPSA */
+	BusNUBUS,			/* Apple Macintosh NuBus */
+	BusPCI,				/* Peripheral Component Interconnect */
+	BusPCMCIA,			/* PC Memory Card International Association */
+	BusTC,				/* DEC TurboChannel */
+	BusVL,				/* VESA Local bus */
+	BusVME,				/* VMEbus */
+	BusXPRESS,			/* Express System Bus */
+};
+
+#define MKBUS(t,b,d,f)	(((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8))
+#define BUSFNO(tbdf)	(((tbdf)>>8)&0x07)
+#define BUSDNO(tbdf)	(((tbdf)>>11)&0x1F)
+#define BUSBNO(tbdf)	(((tbdf)>>16)&0xFF)
+#define BUSTYPE(tbdf)	((tbdf)>>24)
+#define BUSBDF(tbdf)	((tbdf)&0x00FFFF00)
+#define BUSUNKNOWN	(-1)
+
+enum {					/* type 0 & type 1 pre-defined header */
+	PciVID		= 0x00,		/* vendor ID */
+	PciDID		= 0x02,		/* device ID */
+	PciPCR		= 0x04,		/* command */
+	PciPSR		= 0x06,		/* status */
+	PciRID		= 0x08,		/* revision ID */
+	PciCCRp		= 0x09,		/* programming interface class code */
+	PciCCRu		= 0x0A,		/* sub-class code */
+	PciCCRb		= 0x0B,		/* base class code */
+	PciCLS		= 0x0C,		/* cache line size */
+	PciLTR		= 0x0D,		/* latency timer */
+	PciHDT		= 0x0E,		/* header type */
+	PciBST		= 0x0F,		/* BIST */
+
+	PciBAR0		= 0x10,		/* base address */
+	PciBAR1		= 0x14,
+
+	PciINTL		= 0x3C,		/* interrupt line */
+	PciINTP		= 0x3D,		/* interrupt pin */
+};
+
+/* ccrb (base class code) values; controller types */
+enum {
+	Pcibcpci1	= 0,		/* pci 1.0; no class codes defined */
+	Pcibcstore	= 1,		/* mass storage */
+	Pcibcnet	= 2,		/* network */
+	Pcibcdisp	= 3,		/* display */
+	Pcibcmmedia	= 4,		/* multimedia */
+	Pcibcmem	= 5,		/* memory */
+	Pcibcbridge	= 6,		/* bridge */
+	Pcibccomm	= 7,		/* simple comms (e.g., serial) */
+	Pcibcbasesys	= 8,		/* base system */
+	Pcibcinput	= 9,		/* input */
+	Pcibcdock	= 0xa,		/* docking stations */
+	Pcibcproc	= 0xb,		/* processors */
+	Pcibcserial	= 0xc,		/* serial bus (e.g., USB) */
+	Pcibcwireless	= 0xd,		/* wireless */
+	Pcibcintell	= 0xe,		/* intelligent i/o */
+	Pcibcsatcom	= 0xf,		/* satellite comms */
+	Pcibccrypto	= 0x10,		/* encryption/decryption */
+	Pcibcdacq	= 0x11,		/* data acquisition & signal proc. */
+};
+
+/* ccru (sub-class code) values; common cases only */
+enum {
+	/* mass storage */
+	Pciscscsi	= 0,		/* SCSI */
+	Pciscide	= 1,		/* IDE (ATA) */
+	Pciscsata	= 6,		/* SATA */
+
+	/* network */
+	Pciscether	= 0,		/* Ethernet */
+
+	/* display */
+	Pciscvga	= 0,		/* VGA */
+	Pciscxga	= 1,		/* XGA */
+	Pcisc3d		= 2,		/* 3D */
+
+	/* bridges */
+	Pcischostpci	= 0,		/* host/pci */
+	Pciscpcicpci	= 1,		/* pci/pci */
+
+	/* simple comms */
+	Pciscserial	= 0,		/* 16450, etc. */
+	Pciscmultiser	= 1,		/* multiport serial */
+
+	/* serial bus */
+	Pciscusb	= 3,		/* USB */
+};
+
+enum {					/* type 0 pre-defined header */
+	PciCIS		= 0x28,		/* cardbus CIS pointer */
+	PciSVID		= 0x2C,		/* subsystem vendor ID */
+	PciSID		= 0x2E,		/* cardbus CIS pointer */
+	PciEBAR0	= 0x30,		/* expansion ROM base address */
+	PciMGNT		= 0x3E,		/* burst period length */
+	PciMLT		= 0x3F,		/* maximum latency between bursts */
+};
+
+enum {					/* type 1 pre-defined header */
+	PciPBN		= 0x18,		/* primary bus number */
+	PciSBN		= 0x19,		/* secondary bus number */
+	PciUBN		= 0x1A,		/* subordinate bus number */
+	PciSLTR		= 0x1B,		/* secondary latency timer */
+	PciIBR		= 0x1C,		/* I/O base */
+	PciILR		= 0x1D,		/* I/O limit */
+	PciSPSR		= 0x1E,		/* secondary status */
+	PciMBR		= 0x20,		/* memory base */
+	PciMLR		= 0x22,		/* memory limit */
+	PciPMBR		= 0x24,		/* prefetchable memory base */
+	PciPMLR		= 0x26,		/* prefetchable memory limit */
+	PciPUBR		= 0x28,		/* prefetchable base upper 32 bits */
+	PciPULR		= 0x2C,		/* prefetchable limit upper 32 bits */
+	PciIUBR		= 0x30,		/* I/O base upper 16 bits */
+	PciIULR		= 0x32,		/* I/O limit upper 16 bits */
+	PciEBAR1	= 0x28,		/* expansion ROM base address */
+	PciBCR		= 0x3E,		/* bridge control register */
+};
+
+enum {					/* type 2 pre-defined header */
+	PciCBExCA	= 0x10,
+	PciCBSPSR	= 0x16,
+	PciCBPBN	= 0x18,		/* primary bus number */
+	PciCBSBN	= 0x19,		/* secondary bus number */
+	PciCBUBN	= 0x1A,		/* subordinate bus number */
+	PciCBSLTR	= 0x1B,		/* secondary latency timer */
+	PciCBMBR0	= 0x1C,
+	PciCBMLR0	= 0x20,
+	PciCBMBR1	= 0x24,
+	PciCBMLR1	= 0x28,
+	PciCBIBR0	= 0x2C,		/* I/O base */
+	PciCBILR0	= 0x30,		/* I/O limit */
+	PciCBIBR1	= 0x34,		/* I/O base */
+	PciCBILR1	= 0x38,		/* I/O limit */
+	PciCBSVID	= 0x40,		/* subsystem vendor ID */
+	PciCBSID	= 0x42,		/* subsystem ID */
+	PciCBLMBAR	= 0x44,		/* legacy mode base address */
+};
+
+enum {
+	/* bar bits */
+	Barioaddr	= 1<<0,		/* vs. memory addr */
+	Barwidthshift	= 1,
+	Barwidthmask	= MASK(2),
+	Barwidth32	= 0,
+	Barwidth64	= 2,
+	Barprefetch	= 1<<3,
+};
+
+struct Pcisiz
+{
+	Pcidev*	dev;
+	int	siz;
+	int	bar;
+};
+
+struct Pcidev
+{
+	int	tbdf;			/* type+bus+device+function */
+	ushort	vid;			/* vendor ID */
+	ushort	did;			/* device ID */
+
+	ushort	pcr;
+
+	uchar	rid;
+	uchar	ccrp;
+	uchar	ccru;
+	uchar	ccrb;
+	uchar	cls;
+	uchar	ltr;
+
+	struct {
+		ulong	bar;		/* base address */
+		int	size;
+	} mem[6];
+
+	struct {
+		ulong	bar;	
+		int	size;
+	} rom;
+	uchar	intl;			/* interrupt line */
+
+	Pcidev*	list;
+	Pcidev*	link;			/* next device on this bno */
+
+	Pcidev*	bridge;			/* down a bus */
+
+	int	pmrb;			/* power management register block */
+};
+
+enum {
+	/* vendor ids */
+	Vatiamd	= 0x1002,
+	Vintel	= 0x8086,
+	Vjmicron= 0x197b,
+	Vmarvell= 0x1b4b,
+	Vmyricom= 0x14c1,
+	Vnvidia	= 0x10de,
+	Vrealtek= 0x10ec,
+};
+
+#define PCIWINDOW	0
+#define PCIWADDR(va)	(PADDR(va)+PCIWINDOW)

+ 410 - 0
sys/src/9/teg2/kbd.c

@@ -0,0 +1,410 @@
+/*
+ * simulated keyboard input for systems with none (except via uart or usb)
+ *
+ * gutted version of ps2 version from ../pc
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+enum {
+	Spec=		0xF800,		/* Unicode private space */
+	PF=		Spec|0x20,	/* num pad function key */
+	View=		Spec|0x00,	/* view (shift window up) */
+	KF=		0xF000,		/* function key (begin Unicode private space) */
+	Shift=		Spec|0x60,
+	Break=		Spec|0x61,
+	Ctrl=		Spec|0x62,
+	Latin=		Spec|0x63,
+	Caps=		Spec|0x64,
+	Num=		Spec|0x65,
+	Middle=		Spec|0x66,
+	Altgr=		Spec|0x67,
+	Kmouse=		Spec|0x100,
+	No=		0x00,		/* peter */
+
+	Home=		KF|13,
+	Up=		KF|14,
+	Pgup=		KF|15,
+	Print=		KF|16,
+	Left=		KF|17,
+	Right=		KF|18,
+	End=		KF|24,
+	Down=		View,
+	Pgdown=		KF|19,
+	Ins=		KF|20,
+	Del=		0x7F,
+	Scroll=		KF|21,
+
+	Nscan=	128,
+
+	Int=	0,			/* kbscans indices */
+	Ext,
+	Nscans,
+};
+
+/*
+ * The codes at 0x79 and 0x7b are produced by the PFU Happy Hacking keyboard.
+ * A 'standard' keyboard doesn't produce anything above 0x58.
+ */
+Rune kbtab[Nscan] = 
+{
+[0x00]	No,	0x1b,	'1',	'2',	'3',	'4',	'5',	'6',
+[0x08]	'7',	'8',	'9',	'0',	'-',	'=',	'\b',	'\t',
+[0x10]	'q',	'w',	'e',	'r',	't',	'y',	'u',	'i',
+[0x18]	'o',	'p',	'[',	']',	'\n',	Ctrl,	'a',	's',
+[0x20]	'd',	'f',	'g',	'h',	'j',	'k',	'l',	';',
+[0x28]	'\'',	'`',	Shift,	'\\',	'z',	'x',	'c',	'v',
+[0x30]	'b',	'n',	'm',	',',	'.',	'/',	Shift,	'*',
+[0x38]	Latin,	' ',	Ctrl,	KF|1,	KF|2,	KF|3,	KF|4,	KF|5,
+[0x40]	KF|6,	KF|7,	KF|8,	KF|9,	KF|10,	Num,	Scroll,	'7',
+[0x48]	'8',	'9',	'-',	'4',	'5',	'6',	'+',	'1',
+[0x50]	'2',	'3',	'0',	'.',	No,	No,	No,	KF|11,
+[0x58]	KF|12,	No,	No,	No,	No,	No,	No,	No,
+[0x60]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x68]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x70]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x78]	No,	View,	No,	Up,	No,	No,	No,	No,
+};
+
+Rune kbtabshift[Nscan] =
+{
+[0x00]	No,	0x1b,	'!',	'@',	'#',	'$',	'%',	'^',
+[0x08]	'&',	'*',	'(',	')',	'_',	'+',	'\b',	'\t',
+[0x10]	'Q',	'W',	'E',	'R',	'T',	'Y',	'U',	'I',
+[0x18]	'O',	'P',	'{',	'}',	'\n',	Ctrl,	'A',	'S',
+[0x20]	'D',	'F',	'G',	'H',	'J',	'K',	'L',	':',
+[0x28]	'"',	'~',	Shift,	'|',	'Z',	'X',	'C',	'V',
+[0x30]	'B',	'N',	'M',	'<',	'>',	'?',	Shift,	'*',
+[0x38]	Latin,	' ',	Ctrl,	KF|1,	KF|2,	KF|3,	KF|4,	KF|5,
+[0x40]	KF|6,	KF|7,	KF|8,	KF|9,	KF|10,	Num,	Scroll,	'7',
+[0x48]	'8',	'9',	'-',	'4',	'5',	'6',	'+',	'1',
+[0x50]	'2',	'3',	'0',	'.',	No,	No,	No,	KF|11,
+[0x58]	KF|12,	No,	No,	No,	No,	No,	No,	No,
+[0x60]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x68]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x70]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x78]	No,	Up,	No,	Up,	No,	No,	No,	No,
+};
+
+Rune kbtabesc1[Nscan] =
+{
+[0x00]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x08]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x10]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x18]	No,	No,	No,	No,	'\n',	Ctrl,	No,	No,
+[0x20]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x28]	No,	No,	Shift,	No,	No,	No,	No,	No,
+[0x30]	No,	No,	No,	No,	No,	'/',	No,	Print,
+[0x38]	Altgr,	No,	No,	No,	No,	No,	No,	No,
+[0x40]	No,	No,	No,	No,	No,	No,	Break,	Home,
+[0x48]	Up,	Pgup,	No,	Left,	No,	Right,	No,	End,
+[0x50]	Down,	Pgdown,	Ins,	Del,	No,	No,	No,	No,
+[0x58]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x60]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x68]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x70]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x78]	No,	Up,	No,	No,	No,	No,	No,	No,
+};
+
+Rune kbtabaltgr[Nscan] =
+{
+[0x00]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x08]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x10]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x18]	No,	No,	No,	No,	'\n',	Ctrl,	No,	No,
+[0x20]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x28]	No,	No,	Shift,	No,	No,	No,	No,	No,
+[0x30]	No,	No,	No,	No,	No,	'/',	No,	Print,
+[0x38]	Altgr,	No,	No,	No,	No,	No,	No,	No,
+[0x40]	No,	No,	No,	No,	No,	No,	Break,	Home,
+[0x48]	Up,	Pgup,	No,	Left,	No,	Right,	No,	End,
+[0x50]	Down,	Pgdown,	Ins,	Del,	No,	No,	No,	No,
+[0x58]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x60]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x68]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x70]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x78]	No,	Up,	No,	No,	No,	No,	No,	No,
+};
+
+Rune kbtabctrl[Nscan] =
+{
+[0x00]	No,	'', 	'', 	'', 	'', 	'', 	'', 	'', 
+[0x08]	'', 	'', 	'', 	'', 	'
', 	'', 	'\b',	'\t',
+[0x10]	'', 	'', 	'', 	'', 	'', 	'', 	'', 	'\t',
+[0x18]	'', 	'', 	'', 	'', 	'\n',	Ctrl,	'', 	'', 
+[0x20]	'', 	'', 	'', 	'\b',	'\n',	'', 	'', 	'', 
+[0x28]	'', 	No, 	Shift,	'', 	'', 	'', 	'', 	'', 
+[0x30]	'', 	'', 	'
', 	'', 	'', 	'', 	Shift,	'\n',
+[0x38]	Latin,	No, 	Ctrl,	'', 	'', 	'', 	'', 	'', 
+[0x40]	'', 	'', 	'', 	'
', 	'', 	'', 	'', 	'', 
+[0x48]	'', 	'', 	'
', 	'', 	'', 	'', 	'', 	'', 
+[0x50]	'', 	'', 	'', 	'', 	No,	No,	No,	'', 
+[0x58]	'', 	No,	No,	No,	No,	No,	No,	No,
+[0x60]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x68]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x70]	No,	No,	No,	No,	No,	No,	No,	No,
+[0x78]	No,	'', 	No,	'\b',	No,	No,	No,	No,
+};
+
+int mouseshifted;
+void (*kbdmouse)(int);
+
+static int kdebug;
+
+typedef struct Kbscan Kbscan;
+struct Kbscan {
+	int	esc1;
+	int	esc2;
+	int	alt;
+	int	altgr;
+	int	caps;
+	int	ctl;
+	int	num;
+	int	shift;
+	int	collecting;
+	int	nk;
+	Rune	kc[5];
+	int	buttons;
+};
+
+Kbscan kbscans[Nscans];	/* kernel and external scan code state */
+
+/*
+ * Scan code processing
+ */
+void
+kbdputsc(int c, int external)
+{
+	int i, keyup;
+	Kbscan *kbscan;
+
+	if(external)
+		kbscan = &kbscans[Ext];
+	else
+		kbscan = &kbscans[Int];
+
+	if(kdebug)
+		print("sc %x ms %d\n", c, mouseshifted);
+	/*
+	 *  e0's is the first of a 2 character sequence, e1 the first
+	 *  of a 3 character sequence (on the safari)
+	 */
+	if(c == 0xe0){
+		kbscan->esc1 = 1;
+		return;
+	} else if(c == 0xe1){
+		kbscan->esc2 = 2;
+		return;
+	}
+
+	keyup = c & 0x80;
+	c &= 0x7f;
+	if(c > sizeof kbtab){
+		c |= keyup;
+		if(c != 0xFF)	/* these come fairly often: CAPSLOCK U Y */
+			print("unknown key %ux\n", c);
+		return;
+	}
+
+	if(kbscan->esc1){
+		c = kbtabesc1[c];
+		kbscan->esc1 = 0;
+	} else if(kbscan->esc2){
+		kbscan->esc2--;
+		return;
+	} else if(kbscan->shift)
+		c = kbtabshift[c];
+	else if(kbscan->altgr)
+		c = kbtabaltgr[c];
+	else if(kbscan->ctl)
+		c = kbtabctrl[c];
+	else
+		c = kbtab[c];
+
+	if(kbscan->caps && c<='z' && c>='a')
+		c += 'A' - 'a';
+
+	/*
+	 *  keyup only important for shifts
+	 */
+	if(keyup){
+		switch(c){
+		case Latin:
+			kbscan->alt = 0;
+			break;
+		case Shift:
+			kbscan->shift = 0;
+			mouseshifted = 0;
+			if(kdebug)
+				print("shiftclr\n");
+			break;
+		case Ctrl:
+			kbscan->ctl = 0;
+			break;
+		case Altgr:
+			kbscan->altgr = 0;
+			break;
+		case Kmouse|1:
+		case Kmouse|2:
+		case Kmouse|3:
+		case Kmouse|4:
+		case Kmouse|5:
+			kbscan->buttons &= ~(1<<(c-Kmouse-1));
+			if(kbdmouse)
+				kbdmouse(kbscan->buttons);
+			break;
+		}
+		return;
+	}
+
+	/*
+	 *  normal character
+	 */
+	if(!(c & (Spec|KF))){
+		if(kbscan->ctl)
+			if(kbscan->alt && c == Del)
+				exit(0);
+		if(!kbscan->collecting){
+			kbdputc(kbdq, c);
+			return;
+		}
+		kbscan->kc[kbscan->nk++] = c;
+		c = latin1(kbscan->kc, kbscan->nk);
+		if(c < -1)	/* need more keystrokes */
+			return;
+		if(c != -1)	/* valid sequence */
+			kbdputc(kbdq, c);
+		else	/* dump characters */
+			for(i=0; i<kbscan->nk; i++)
+				kbdputc(kbdq, kbscan->kc[i]);
+		kbscan->nk = 0;
+		kbscan->collecting = 0;
+		return;
+	} else {
+		switch(c){
+		case Caps:
+			kbscan->caps ^= 1;
+			return;
+		case Num:
+			kbscan->num ^= 1;
+			return;
+		case Shift:
+			kbscan->shift = 1;
+			if(kdebug)
+				print("shift\n");
+			mouseshifted = 1;
+			return;
+		case Latin:
+			kbscan->alt = 1;
+			/*
+			 * VMware and Qemu use Ctl-Alt as the key combination
+			 * to make the VM give up keyboard and mouse focus.
+			 * This has the unfortunate side effect that when you
+			 * come back into focus, Plan 9 thinks you want to type
+			 * a compose sequence (you just typed alt). 
+			 *
+			 * As a clumsy hack around this, we look for ctl-alt
+			 * and don't treat it as the start of a compose sequence.
+			 */
+			if(!kbscan->ctl){
+				kbscan->collecting = 1;
+				kbscan->nk = 0;
+			}
+			return;
+		case Ctrl:
+			kbscan->ctl = 1;
+			return;
+		case Altgr:
+			kbscan->altgr = 1;
+			return;
+		case Kmouse|1:
+		case Kmouse|2:
+		case Kmouse|3:
+		case Kmouse|4:
+		case Kmouse|5:
+			kbscan->buttons |= 1<<(c-Kmouse-1);
+			if(kbdmouse)
+				kbdmouse(kbscan->buttons);
+			return;
+		case KF|11:
+			print("kbd debug on, F12 turns it off\n");
+			kdebug = 1;
+			break;
+		case KF|12:
+			kdebug = 0;
+			break;
+		}
+	}
+	kbdputc(kbdq, c);
+}
+
+void
+kbdenable(void)
+{
+#ifdef notdef
+	kbdq = qopen(4*1024, 0, 0, 0);
+	if(kbdq == nil)
+		panic("kbdinit");
+	qnoblock(kbdq, 1);
+#endif
+	kbscans[Int].num = 0;
+}
+
+void
+kbdputmap(ushort m, ushort scanc, Rune r)
+{
+	if(scanc >= Nscan)
+		error(Ebadarg);
+	switch(m) {
+	default:
+		error(Ebadarg);
+	case 0:
+		kbtab[scanc] = r;
+		break;
+	case 1:
+		kbtabshift[scanc] = r;
+		break;
+	case 2:
+		kbtabesc1[scanc] = r;
+		break;
+	case 3:
+		kbtabaltgr[scanc] = r;
+		break;
+	case 4:	
+		kbtabctrl[scanc] = r;
+		break;
+	}
+}
+
+int
+kbdgetmap(uint offset, int *t, int *sc, Rune *r)
+{
+	if ((int)offset < 0)
+		error(Ebadarg);
+	*t = offset/Nscan;
+	*sc = offset%Nscan;
+	switch(*t) {
+	default:
+		return 0;
+	case 0:
+		*r = kbtab[*sc];
+		return 1;
+	case 1:
+		*r = kbtabshift[*sc];
+		return 1;
+	case 2:
+		*r = kbtabesc1[*sc];
+		return 1;
+	case 3:
+		*r = kbtabaltgr[*sc];
+		return 1;
+	case 4:
+		*r = kbtabctrl[*sc];
+		return 1;
+	}
+}

+ 869 - 0
sys/src/9/teg2/l.s

@@ -0,0 +1,869 @@
+/*
+ * tegra 2 SoC machine assist
+ * dual arm cortex-a9 processors
+ *
+ * ARM v7 arch. ref. man. §B1.3.3 says that we don't need barriers
+ * around writes to CPSR.
+ *
+ * LDREX/STREX use an exclusive monitor, which is part of the data cache unit
+ * for the L1 cache, so they won't work right if the L1 cache is disabled.
+ */
+
+#include "arm.s"
+
+#define MAXMB	(KiB-1)			/* last MB has vectors */
+#define TMPSTACK (DRAMSIZE - 64*MiB)	/* used only during cpu startup */
+/* tas/cas strex debugging limits; started at 10000 */
+#define MAXSC 100000
+
+GLOBL	testmem(SB), $4
+
+/*
+ * Entered here from Das U-Boot or another Plan 9 kernel with MMU disabled.
+ * Until the MMU is enabled it is OK to call functions provided
+ * they are within ±32MiB relative and do not require any
+ * local variables or more than one argument (i.e. there is
+ * no stack).
+ */
+TEXT _start(SB), 1, $-4
+	CPSMODE(PsrMsvc)
+	CPSID					/* interrupts off */
+	CPSAE
+	SETEND(0)				/* little-endian */
+	BARRIERS
+	CLREX
+	SETZSB
+
+	MOVW	CPSR, R0
+	ORR	$PsrDfiq, R0
+	MOVW	R0, CPSR
+
+	/* invalidate i-cache and branch-target cache */
+	MTCP	CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+	/* put cpus other than 0 to sleep until cpu 0 is ready */
+	CPUID(R1)
+	BEQ	cpuinit
+
+	/* not cpu 0 */
+PUTC('Z')
+PUTC('Z')
+	BARRIERS
+dowfi:
+	WFI
+	MOVW	cpus_proceed(SB), R1
+	CMP	$0, R1
+	BEQ	dowfi
+	BL	cpureset(SB)
+	B	dowfi
+
+cpuinit:
+	DELAY(printloopret, 1)
+PUTC('\r')
+	DELAY(printloopnl, 1)
+PUTC('\n')
+
+	DELAY(printloops, 1)
+PUTC('P')
+	/* disable the PL310 L2 cache on cpu0 */
+	MOVW	$(PHYSL2BAG+0x100), R1
+	MOVW	$0, R2
+	MOVW	R2, (R1)
+	BARRIERS
+	/* invalidate it */
+	MOVW	$((1<<16)-1), R2
+	MOVW	R2, 0x77c(R1)
+	BARRIERS
+
+	/*
+	 * disable my MMU & caches
+	 */
+	MFCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+	ORR	$CpCsbo, R1
+	BIC	$(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R1
+	MTCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+
+	/* cortex-a9 model-specific initial configuration */
+	MOVW	$0, R1
+	MTCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+	BARRIERS
+
+PUTC('l')
+	DELAY(printloop3, 1)
+
+	MOVW	$testmem-KZERO(SB), R0
+	BL	memdiag(SB)
+
+PUTC('a')
+	/* clear Mach for cpu 0 */
+	MOVW	$PADDR(MACHADDR), R4		/* address of Mach for cpu 0 */
+	MOVW	$0, R0
+_machZ:
+	MOVW	R0, (R4)
+	ADD	$4, R4
+	CMP.S	$PADDR(L1+L1X(0)), R4	/* end at top-level page table */
+	BNE	_machZ
+
+	/*
+	 * set up the MMU page table for cpu 0
+	 */
+
+PUTC('n')
+	/* clear all PTEs first, to provide a default */
+//	MOVW	$PADDR(L1+L1X(0)), R4		/* address of PTE for 0 */
+_ptenv0:
+	ZEROPTE()
+	CMP.S	$PADDR(L1+16*KiB), R4
+	BNE	_ptenv0
+
+	DELAY(printloop4, 2)
+PUTC(' ')
+	/*
+	 * set up double map of PHYSDRAM, KZERO to PHYSDRAM for first few MBs,
+	 * but only if KZERO and PHYSDRAM differ.
+	 */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$PHYSDRAM, R3			/* pa */
+	CMP	$KZERO, R3
+	BEQ	no2map
+	MOVW	$PADDR(L1+L1X(PHYSDRAM)), R4  /* address of PTE for PHYSDRAM */
+	MOVW	$DOUBLEMAPMBS, R5
+_ptdbl:
+	FILLPTE()
+	SUB.S	$1, R5
+	BNE	_ptdbl
+no2map:
+
+	/*
+	 * back up and fill in PTEs for memory at KZERO.
+	 * trimslice has 1 bank of 1GB at PHYSDRAM.
+	 * Map the maximum.
+	 */
+PUTC('9')
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$PHYSDRAM, R3
+	MOVW	$PADDR(L1+L1X(KZERO)), R4	/* start with PTE for KZERO */
+	MOVW	$MAXMB, R5			/* inner loop count (MBs) */
+_ptekrw:					/* set PTEs */
+	FILLPTE()
+	SUB.S	$1, R5				/* decrement inner loop count */
+	BNE	_ptekrw
+
+	/*
+	 * back up and fill in PTEs for MMIO
+	 */
+PUTC(' ')
+	MOVW	$PTEIO, R2			/* PTE bits */
+	MOVW	$PHYSIO, R3
+	MOVW	$PADDR(L1+L1X(VIRTIO)), R4	/* start with PTE for VIRTIO */
+_ptenv2:
+	FILLPTE()
+	CMP.S	$PADDR(L1+L1X(PHYSIOEND)), R4
+	BNE	_ptenv2
+
+	/* mmu.c sets up the trap vectors later */
+
+	MOVW	$(PHYSDRAM | TMPSTACK), SP
+
+	/*
+	 * learn l1 cache characteristics (on cpu 0 only).
+	 */
+
+	MOVW	$(1-1), R0			/* l1 */
+	SLL	$1, R0				/* R0 = (cache - 1) << 1 */
+	MTCP	CpSC, CpIDcssel, R0, C(CpID), C(CpIDid), 0 /* select l1 cache */
+	BARRIERS
+	MFCP	CpSC, CpIDcsize, R0, C(CpID), C(CpIDid), 0 /* get sets & ways */
+	MOVW	$CACHECONF, R8
+
+	/* get log2linelen into l1setsh */
+	MOVW	R0, R1
+	AND	$3, R1
+	ADD	$4, R1
+	/* l1 & l2 must have same cache line size, thus same set shift */
+	MOVW	R1, 4(R8)		/*  +4 = l1setsh */
+	MOVW	R1, 12(R8)		/* +12 = l2setsh */
+
+	/* get nways in R1 */
+	SRA	$3, R0, R1
+	AND	$((1<<10)-1), R1
+	ADD	$1, R1
+
+	/* get log2(nways) in R2 (assume nways is 2^n) */
+	MOVW	$(BI2BY*BY2WD - 1), R2
+	CLZ(1, 1)
+	SUB.S	R1, R2			/* R2 = 31 - clz(nways) */
+	ADD.EQ	$1, R2
+//	MOVW	R2, R3			/* print log2(nways): 2 */
+
+	MOVW	$32, R1
+	SUB	R2, R1			/* R1 = 32 - log2(nways) */
+	MOVW	R1, 0(R8)		/* +0 = l1waysh */
+
+	BARRIERS
+
+	MOVW	$testmem-KZERO(SB), R0
+	BL	memdiag(SB)
+
+	/*
+	 * the mpcore manual says invalidate d-cache, scu, pl310 in that order,
+	 * but says nothing about when to disable them.
+	 *
+	 * invalidate my caches before enabling
+	 */
+	BL	cachedinv(SB)
+	MTCP	CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+PUTC('f')
+	/*
+	 * the mpcore manual says enable scu, d-cache, pl310, smp mode
+	 * in that order.  we have to reverse the last two; see main().
+	 */
+	BL	scuon(SB)
+
+	/*
+	 * turn my L1 cache on; need it for tas below.
+	 */
+	MFCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+	ORR	$(CpCdcache|CpCicache|CpCalign|CpCpredict), R1
+	MTCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+
+	/* cortex-a9 model-specific configuration */
+	MOVW	$CpACl1pref, R1
+	MTCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+	BARRIERS
+
+	/* we're supposed to wait until l1 & l2 are on before calling smpon */
+
+PUTC('r')
+	/* set the domain access control */
+	MOVW	$Client, R0
+	BL	dacput(SB)
+
+	DELAY(printloop5, 2)
+PUTC('o')
+	BL	mmuinvalidate(SB)
+
+	MOVW	$0, R0
+	BL	pidput(SB)
+
+	/* set the translation table base */
+	MOVW	$PADDR(L1), R0
+	BL	ttbput(SB)
+
+PUTC('m')
+	/*
+	 * the little dance to turn the MMU on
+	 */
+	BL	cacheuwbinv(SB)
+	BL	mmuinvalidate(SB)
+	BL	mmuenable(SB)
+
+PUTC(' ')
+	/* warp the PC into the virtual map */
+	MOVW	$KZERO, R0
+	BL	_r15warp(SB)
+	/*
+	 * cpu 0 is now running at KZERO+something!
+	 */
+
+	BARRIERS
+	MOVW	$setR12(SB), R12		/* reload kernel SB */
+	MOVW	$(KZERO | TMPSTACK), SP
+
+	BL	cacheuwbinv(SB)
+
+PUTC('B')
+	MOVW	$PHYSDRAM, R3			/* pa */
+	CMP	$KZERO, R3
+	BEQ	no2unmap
+	/* undo double map of PHYSDRAM, KZERO & first few MBs */
+	MOVW	$(L1+L1X(PHYSDRAM)), R4		/* addr. of PTE for PHYSDRAM */
+	MOVW	$0, R0
+	MOVW	$DOUBLEMAPMBS, R5
+_ptudbl:
+	ZEROPTE()
+	SUB.S	$1, R5
+	BNE	_ptudbl
+no2unmap:
+
+	BL	cachedwb(SB)
+	BL	mmuinvalidate(SB)
+
+	/*
+	 * call main in C
+	 * pass Mach to main and set up the stack in it
+	 */
+	MOVW	$MACHADDR, R0			/* cpu 0 Mach */
+	MOVW	R0, R(MACH)			/* m = MACHADDR */
+	ADD	$(MACHSIZE-4), R0, SP		/* leave space for link register */
+PUTC('e')
+	BL	main(SB)			/* main(m) */
+limbo:
+	BL	idlehands(SB)
+	B	limbo
+
+	BL	_div(SB)			/* hack to load _div, etc. */
+
+
+/*
+ * called on cpu(s) other than 0, to start them, from _vrst
+ * (reset vector) in lexception.s, with interrupts disabled
+ * and in SVC mode, running in the zero segment (pc is in lower 256MB).
+ * SB is set for the zero segment.
+ */
+TEXT cpureset(SB), 1, $-4
+	CLREX
+	MOVW	CPSR, R0
+	ORR	$PsrDfiq, R0
+	MOVW	R0, CPSR
+
+	MOVW	$(PHYSDRAM | TMPSTACK), SP	/* stack for cache ops */
+
+	/* paranoia: turn my mmu and caches off. */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	ORR	$CpCsbo, R0
+	BIC	$(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+
+	/* cortex-a9 model-specific initial configuration */
+	MOVW	$0, R1
+	MTCP	CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
+	ISB
+
+	/* invalidate my caches before enabling */
+	BL	cachedinv(SB)
+	MTCP	CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+	/*
+	 * turn my L1 cache on; need it (and mmu) for tas below.
+	 * need branch prediction to make delay() timing right.
+	 */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	ORR	$(CpCdcache|CpCicache|CpCalign|CpCpredict), R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+
+	/* enable l1 caches coherency, at minimum for ldrex/strex. */
+	BL	smpon(SB)
+	BARRIERS
+
+	/*
+	 * we used to write to PHYSEVP here; now we do it in C, which offers
+	 * more assurance that we're up and won't go off the rails.
+	 */
+
+	/* set the domain access control */
+	MOVW	$Client, R0
+	BL	dacput(SB)
+
+	BL	setmach(SB)
+
+	/*
+	 * redo double map of PHYSDRAM, KZERO in this cpu's ptes.
+	 * mmuinit will undo this later.
+	 */
+
+	MOVW	$PHYSDRAM, R3
+	CMP	$KZERO, R3
+	BEQ	noun2map
+
+	/* launchinit set m->mmul1 to a copy of cpu0's l1 page table */
+	MOVW	12(R(MACH)), R0		/* m->mmul1 (virtual addr) */
+	BL	k2paddr(SB)		/* R0 = PADDR(m->mmul1) */
+	ADD	$L1X(PHYSDRAM), R0, R4	/* R4 = address of PHYSDRAM's PTE */
+
+	MOVW	$PTEDRAM, R2		/* PTE bits */
+	MOVW	$DOUBLEMAPMBS, R5
+_ptrdbl:
+	ORR	R3, R2, R1		/* first identity-map 0 to 0, etc. */
+	MOVW	R1, (R4)
+	ADD	$4, R4			/* bump PTE address */
+	ADD	$MiB, R3		/* bump pa */
+	SUB.S	$1, R5
+	BNE	_ptrdbl
+noun2map:
+
+	MOVW	$0, R0
+	BL	pidput(SB)
+
+	/* set the translation table base to PADDR(m->mmul1) */
+	MOVW	12(R(MACH)), R0		/* m->mmul1 */
+	BL	k2paddr(SB)		/* R0 = PADDR(m->mmul1) */
+	BL	ttbput(SB)
+
+	/*
+	 * the little dance to turn the MMU on
+	 */
+	BL	cacheuwbinv(SB)
+	BL	mmuinvalidate(SB)
+	BL	mmuenable(SB)
+
+	/*
+	 * mmu is now on, with l1 pt at m->mmul1.
+	 */
+
+	/* warp the PC into the virtual map */
+	MOVW	$KZERO, R0
+	BL	_r15warp(SB)
+
+	/*
+	 * now running at KZERO+something!
+	 */
+
+	BARRIERS
+	MOVW	$setR12(SB), R12	/* reload kernel's SB */
+	MOVW	$(KZERO | TMPSTACK), SP	/* stack for cache ops*/
+	BL	setmach(SB)
+	ADD	$(MACHSIZE-4), R(MACH), SP /* leave space for link register */
+	BL	cpustart(SB)
+
+
+/*
+ * converts virtual address in R0 to a physical address.
+ */
+TEXT k2paddr(SB), 1, $-4
+	BIC	$KSEGM, R0
+	ADD	$PHYSDRAM, R0
+	RET
+
+/*
+ * converts physical address in R0 to a virtual address.
+ */
+TEXT p2kaddr(SB), 1, $-4
+	BIC	$KSEGM, R0
+	ORR	$KZERO, R0
+	RET
+
+/*
+ * converts address in R0 to the current segment, as defined by the PC.
+ * clobbers R1.
+ */
+TEXT addr2pcseg(SB), 1, $-4
+	BIC	$KSEGM, R0
+	MOVW	PC, R1
+	AND	$KSEGM, R1		/* segment PC is in */
+	ORR	R1, R0
+	RET
+
+/* sets R(MACH), preserves other registers */
+TEXT setmach(SB), 1, $-4
+	MOVM.DB.W [R14], (R13)
+	MOVM.DB.W [R0-R2], (R13)
+
+	CPUID(R2)
+	SLL	$2, R2			/* convert to word index */
+
+	MOVW	$machaddr(SB), R0
+	BL	addr2pcseg(SB)
+	ADD	R2, R0			/* R0 = &machaddr[cpuid] */
+	MOVW	(R0), R0		/* R0 = machaddr[cpuid] */
+	CMP	$0, R0
+	MOVW.EQ	$MACHADDR, R0		/* paranoia: use MACHADDR if 0 */
+	BL	addr2pcseg(SB)
+	MOVW	R0, R(MACH)		/* m = machaddr[cpuid] */
+
+	MOVM.IA.W (R13), [R0-R2]
+	MOVM.IA.W (R13), [R14]
+	RET
+
+
+/*
+ * memory diagnostic
+ * tests word at (R0); modifies R7 and R8
+ */
+TEXT memdiag(SB), 1, $-4
+	MOVW	$0xabcdef89, R7
+	MOVW	R7, (R0)
+	MOVW	(R0), R8
+	CMP	R7, R8
+	BNE	mbuggery		/* broken memory */
+
+	BARRIERS
+	MOVW	(R0), R8
+	CMP	R7, R8
+	BNE	mbuggery		/* broken memory */
+
+	MOVW	$0, R7
+	MOVW	R7, (R0)
+	BARRIERS
+	RET
+
+/* modifies R0, R3—R6 */
+TEXT printhex(SB), 1, $-4
+	MOVW	R0, R3
+	PUTC('0')
+	PUTC('x')
+	MOVW	$(32-4), R5	/* bits to shift right */
+nextdig:
+	SRA	R5, R3, R4
+	AND	$0xf, R4
+	ADD	$'0', R4
+	CMP.S	$'9', R4
+	BLE	nothex		/* if R4 <= 9, jump */
+	ADD	$('a'-('9'+1)), R4
+nothex:
+	PUTC(R4)
+	SUB.S	$4, R5
+	BGE	nextdig
+
+	PUTC('\r')
+	PUTC('\n')
+	DELAY(proct, 50)
+	RET
+
+mbuggery:
+	PUTC('?')
+	PUTC('m')
+mtopanic:
+	MOVW	$membmsg(SB), R0
+	MOVW	R14, R1		/* get R14's segment ... */
+	AND	$KSEGM, R1
+	BIC	$KSEGM,	R0	/* strip segment from address */
+	ORR	R1, R0		/* combine them */
+	BL	panic(SB)
+mbugloop:
+	WFI
+	B	mbugloop
+
+	DATA	membmsg+0(SB)/8,$"memory b"
+	DATA	membmsg+8(SB)/6,$"roken\z"
+	GLOBL	membmsg(SB), $14
+
+TEXT _r15warp(SB), 1, $-4
+	BIC	$KSEGM, R14			/* link reg, will become PC */
+	ORR	R0, R14
+	BIC	$KSEGM, SP
+	ORR	R0, SP
+	RET
+
+/*
+ * `single-element' cache operations.
+ * in arm arch v7, they operate on all architected cache levels, so separate
+ * l2 functions are usually unnecessary.
+ */
+
+TEXT cachedwbse(SB), $-4			/* D writeback SE */
+	MOVW	R0, R2
+
+	MOVW	CPSR, R3
+	CPSID					/* splhi */
+
+	BARRIERS			/* force outstanding stores to cache */
+	MOVW	R2, R0
+	MOVW	4(FP), R1
+	ADD	R0, R1				/* R1 is end address */
+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
+_dwbse:
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+	ADD	$CACHELINESZ, R0
+	CMP.S	R0, R1
+	BGT	_dwbse
+	B	_wait
+
+TEXT cachedwbinvse(SB), $-4			/* D writeback+invalidate SE */
+	MOVW	R0, R2
+
+	MOVW	CPSR, R3
+	CPSID					/* splhi */
+
+	BARRIERS			/* force outstanding stores to cache */
+	MOVW	R2, R0
+	MOVW	4(FP), R1
+	ADD	R0, R1				/* R1 is end address */
+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
+_dwbinvse:
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEse
+	ADD	$CACHELINESZ, R0
+	CMP.S	R0, R1
+	BGT	_dwbinvse
+_wait:						/* drain write buffer */
+	BARRIERS
+
+	MOVW	R3, CPSR			/* splx */
+	RET
+
+TEXT cachedinvse(SB), $-4			/* D invalidate SE */
+	MOVW	R0, R2
+
+	MOVW	CPSR, R3
+	CPSID					/* splhi */
+
+	BARRIERS			/* force outstanding stores to cache */
+	MOVW	R2, R0
+	MOVW	4(FP), R1
+	ADD	R0, R1				/* R1 is end address */
+
+	/*
+	 * if start & end addresses are not on cache-line boundaries,
+	 * flush first & last cache lines before invalidating.
+	 */
+	AND.S	$(CACHELINESZ-1), R0, R4
+	BEQ	stok
+	BIC	$(CACHELINESZ-1), R0, R4	/* cache line start */
+	MTCP	CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+stok:
+	AND.S	$(CACHELINESZ-1), R1, R4
+	BEQ	endok
+	BIC	$(CACHELINESZ-1), R1, R4	/* cache line start */
+	MTCP	CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
+endok:
+	BIC	$(CACHELINESZ-1), R0		/* cache line start */
+_dinvse:
+	MTCP	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEse
+	ADD	$CACHELINESZ, R0
+	CMP.S	R0, R1
+	BGT	_dinvse
+	B	_wait
+
+/*
+ *  enable mmu and high vectors
+ */
+TEXT mmuenable(SB), 1, $-4
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	ORR	$CpCmmu, R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+	RET
+
+TEXT mmudisable(SB), 1, $-4
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	BIC	$CpCmmu, R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	BARRIERS
+	RET
+
+/*
+ * If one of these MCR instructions crashes or hangs the machine,
+ * check your Level 1 page table (at TTB) closely.
+ */
+TEXT mmuinvalidate(SB), $-4			/* invalidate all */
+	MOVW	CPSR, R2
+	CPSID					/* interrupts off */
+	BARRIERS
+	MTCP	CpSC, 0, PC, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+	MOVW	R2, CPSR			/* interrupts restored */
+	RET
+
+TEXT mmuinvalidateaddr(SB), $-4			/* invalidate single entry */
+	MTCP	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
+	BARRIERS
+	RET
+
+TEXT cpidget(SB), 1, $-4			/* main ID */
+	MFCP	CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDid
+	RET
+
+TEXT cpctget(SB), 1, $-4			/* cache type */
+	MFCP	CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDct
+	RET
+
+TEXT controlget(SB), 1, $-4			/* system control (sctlr) */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
+	RET
+
+TEXT ttbget(SB), 1, $-4				/* translation table base */
+	MFCP	CpSC, 0, R0, C(CpTTB), C(0), CpTTB0
+	RET
+
+TEXT ttbput(SB), 1, $-4				/* translation table base */
+	MOVW	CPSR, R2
+	CPSID
+	MOVW	R0, R1
+	BARRIERS		/* finish prior accesses before changing ttb */
+	MTCP	CpSC, 0, R1, C(CpTTB), C(0), CpTTB0
+	MTCP	CpSC, 0, R1, C(CpTTB), C(0), CpTTB1	/* non-secure too */
+	MOVW	$0, R0
+	MTCP	CpSC, 0, R0, C(CpTTB), C(0), CpTTBctl
+	BARRIERS
+	MOVW	R2, CPSR
+	RET
+
+TEXT dacget(SB), 1, $-4				/* domain access control */
+	MFCP	CpSC, 0, R0, C(CpDAC), C(0)
+	RET
+
+TEXT dacput(SB), 1, $-4				/* domain access control */
+	MOVW	R0, R1
+	BARRIERS
+	MTCP	CpSC, 0, R1, C(CpDAC), C(0)
+	ISB
+	RET
+
+TEXT fsrget(SB), 1, $-4				/* fault status */
+	MFCP	CpSC, 0, R0, C(CpFSR), C(0), CpDFSR
+	RET
+
+TEXT farget(SB), 1, $-4				/* fault address */
+	MFCP	CpSC, 0, R0, C(CpFAR), C(0), CpDFAR
+	RET
+
+TEXT getpsr(SB), 1, $-4
+	MOVW	CPSR, R0
+	RET
+
+TEXT getscr(SB), 1, $-4				/* secure configuration */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(CpCONTROLscr), CpSCRscr
+	RET
+
+TEXT pidget(SB), 1, $-4				/* address translation pid */
+	MFCP	CpSC, 0, R0, C(CpPID), C(0x0)
+	RET
+
+TEXT pidput(SB), 1, $-4				/* address translation pid */
+	MTCP	CpSC, 0, R0, C(CpPID), C(0), 0	/* pid, v7a deprecated */
+	MTCP	CpSC, 0, R0, C(CpPID), C(0), 1	/* context id, errata 754322 */
+	ISB
+	RET
+
+/*
+ * access to yet more coprocessor registers
+ */
+
+TEXT getauxctl(SB), 1, $-4		/* get cortex-a9 aux. ctl. */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
+	RET
+
+TEXT putauxctl(SB), 1, $-4		/* put cortex-a9 aux. ctl. */
+	BARRIERS
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
+	BARRIERS
+	RET
+
+TEXT getclvlid(SB), 1, $-4
+	MFCP	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), CpIDclvlid
+	RET
+
+TEXT getcyc(SB), 1, $-4
+	MFCP	CpSC, 0, R0, C(CpCLD), C(CpCLDcyc), 0
+	RET
+
+TEXT getdebug(SB), 1, $-4		/* get cortex-a9 debug enable register */
+	MFCP	CpSC, 0, R0, C(1), C(1), 1
+	RET
+
+TEXT getpc(SB), 1, $-4
+	MOVW	PC, R0
+	RET
+
+TEXT getsb(SB), 1, $-4
+	MOVW	R12, R0
+	RET
+
+TEXT setsp(SB), 1, $-4
+	MOVW	R0, SP
+	RET
+
+
+TEXT splhi(SB), 1, $-4
+	MOVW	CPSR, R0			/* return old CPSR */
+	CPSID					/* turn off interrupts */
+	CMP.S	$0, R(MACH)
+	MOVW.NE	R14, 4(R(MACH))			/* save caller pc in m->splpc */
+	RET
+
+TEXT spllo(SB), 1, $-4			/* start marker for devkprof.c */
+	MOVW	CPSR, R0			/* return old CPSR */
+	MOVW	$0, R1
+	CMP.S	R1, R(MACH)
+	MOVW.NE	R1, 4(R(MACH))			/* clear m->splpc */
+	CPSIE
+	RET
+
+TEXT splx(SB), 1, $-4
+	MOVW	CPSR, R3			/* must return old CPSR */
+	CPSID
+
+	CMP.S	$0, R(MACH)
+	MOVW.NE	R14, 4(R(MACH))			/* save caller pc in m->splpc */
+	MOVW	R0, CPSR			/* reset interrupt level */
+	MOVW	R3, R0				/* must return old CPSR */
+	RET
+
+TEXT spldone(SB), 1, $0				/* end marker for devkprof.c */
+	RET
+
+TEXT islo(SB), 1, $-4
+	MOVW	CPSR, R0
+	AND	$(PsrDirq), R0
+	EOR	$(PsrDirq), R0
+	RET
+
+TEXT clz(SB), $-4
+	CLZ(0, 0)			/* 0 is R0 */
+	RET
+
+TEXT setlabel(SB), 1, $-4
+	MOVW	SP, 0(R0)
+	MOVW	R14, 4(R0)		/* pc */
+	MOVW	$0, R0
+	RET
+
+TEXT gotolabel(SB), 1, $-4
+	MOVW	0(R0), SP
+	MOVW	4(R0), R14		/* pc */
+	MOVW	$1, R0
+	RET
+
+TEXT getcallerpc(SB), 1, $-4
+	MOVW	0(SP), R0
+	RET
+
+TEXT wfi(SB), $-4
+	MOVW	CPSR, R1
+	/*
+	 * an interrupt should break us out of wfi.  masking interrupts
+	 * slows interrupt response slightly but prevents recursion.
+	 */
+//	CPSIE
+	CPSID
+
+	BARRIERS
+	WFI
+
+	MOVW	R1, CPSR
+	RET
+
+TEXT coherence(SB), $-4
+	BARRIERS
+	RET
+
+GLOBL cpus_proceed+0(SB), $4
+
+#include "cache.v7.s"
+
+TEXT	tas(SB), $-4			/* _tas(ulong *) */
+	/* returns old (R0) after modifying (R0) */
+	MOVW	R0,R5
+	DMB
+
+	MOVW	$1,R2		/* new value of (R0) */
+	MOVW	$MAXSC, R8
+tas1:
+	LDREX(5,7)		/* LDREX 0(R5),R7 */
+	CMP.S	$0, R7		/* old value non-zero (lock taken)? */
+	BNE	lockbusy	/* we lose */
+	SUB.S	$1, R8
+	BEQ	lockloop2
+	STREX(2,5,4)		/* STREX R2,(R5),R4 */
+	CMP.S	$0, R4
+	BNE	tas1		/* strex failed? try again */
+	DMB
+	B	tas0
+lockloop2:
+	PUTC('?')
+	PUTC('l')
+	PUTC('t')
+	BL	abort(SB)
+lockbusy:
+	CLREX
+tas0:
+	MOVW	R7, R0		/* return old value */
+	RET

+ 325 - 0
sys/src/9/teg2/lexception.s

@@ -0,0 +1,325 @@
+/*
+ * arm exception handlers
+ */
+#include "arm.s"
+
+#undef B					/* B is for 'botch' */
+
+/*
+ *  exception vectors, copied by trapinit() to somewhere useful
+ */
+TEXT vectors(SB), 1, $-4
+	MOVW	0x18(R15), R15		/* reset */
+	MOVW	0x18(R15), R15		/* undefined instr. */
+	MOVW	0x18(R15), R15		/* SWI & SMC */
+	MOVW	0x18(R15), R15		/* prefetch abort */
+	MOVW	0x18(R15), R15		/* data abort */
+	MOVW	0x18(R15), R15		/* hypervisor call */
+	MOVW	0x18(R15), R15		/* IRQ */
+	MOVW	0x18(R15), R15		/* FIQ */
+
+TEXT vtable(SB), 1, $-4
+	WORD	$_vrst-KZERO(SB)	/* reset, in svc mode already */
+	WORD	$_vund(SB)		/* undefined, switch to svc mode */
+	WORD	$_vsvc(SB)		/* swi, in svc mode already */
+	WORD	$_vpabt(SB)		/* prefetch abort, switch to svc mode */
+	WORD	$_vdabt(SB)		/* data abort, switch to svc mode */
+	WORD	$_vhype(SB)		/* hypervisor call */
+	WORD	$_virq(SB)		/* IRQ, switch to svc mode */
+	WORD	$_vfiq(SB)		/* FIQ, switch to svc mode */
+
+/*
+ * reset - start additional cpus
+ */
+TEXT _vrst(SB), 1, $-4
+	/* running in the zero segment (pc is lower 256MB) */
+	CPSMODE(PsrMsvc)		/* should be redundant */
+	CPSID
+	CPSAE
+	SETEND(0)			/* force little-endian */
+	BARRIERS
+	SETZSB
+	MOVW	$PsrMsvc, SPSR
+	MOVW	$0, R14
+
+	/* invalidate i-cache and branch-target cache */
+	MTCP	CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
+	BARRIERS
+
+	BL	cpureset(SB)
+spin:
+	B	spin
+
+/*
+ * system call
+ */
+TEXT _vsvc(SB), 1, $-4			/* SWI */
+	CLREX
+	BARRIERS
+	/* stack is m->stack */
+	MOVW.W	R14, -4(R13)		/* ureg->pc = interrupted PC */
+	MOVW	SPSR, R14		/* ureg->psr = SPSR */
+	MOVW.W	R14, -4(R13)		/* ... */
+	MOVW	$PsrMsvc, R14		/* ureg->type = PsrMsvc */
+	MOVW.W	R14, -4(R13)		/* ... */
+
+	/* avoid the ambiguity described in notes/movm.w. */
+	MOVM.DB.S [R0-R14], (R13)	/* save user level registers */
+	SUB	$(NREGS*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	/*
+	 * set up m and up registers since user registers could contain anything
+	 */
+	CPUID(R1)
+	SLL	$2, R1			/* convert to word index */
+	MOVW	$machaddr(SB), R2
+	ADD	R1, R2
+	MOVW	(R2), R(MACH)		/* m = machaddr[cpuid] */
+	CMP	$0, R(MACH)
+	MOVW.EQ	$MACHADDR, R0		/* paranoia: use MACHADDR if 0 */
+	MOVW	8(R(MACH)), R(USER)	/* up = m->proc */
+
+	MOVW	((NREGS+1)*4)(R13), R2	/* saved SPSR (user mode) */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$8, R13			/* space for argument+link */
+
+	BL	syscall(SB)
+	/*
+	 * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+	 * may have been trashed when we get here.
+	 */
+
+	MOVW	$setR12(SB), R12	/* reload kernel's SB */
+
+	ADD	$(8+4*NREGS), R13	/* make r13 point to ureg->type */
+
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+/*
+ * return from user-mode exception.
+ * expects new SPSR in R0.  R13 must point to ureg->type.
+ */
+_rfue:
+TEXT rfue(SB), 1, $-4
+	MOVW	R0, SPSR		/* ... */
+
+	/*
+	 * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+	 * step on type and previous word to hold temporary values.
+	 * we could instead change the order in which psr & pc are pushed.
+	 */
+	MOVW	4(R13), R1		/* psr */
+	MOVW	8(R13), R2		/* pc */
+	MOVW	R2, 4(R13)		/* pc */
+	MOVW	R1, 8(R13)		/* psr */
+
+	MOVM.DB.S (R13), [R0-R14]	/* restore user registers */
+	ADD	$4, R13			/* pop type, sp -> pc */
+	RFEV7W(13)
+
+
+TEXT _vund(SB), 1, $-4			/* undefined */
+	/* sp is m->sund */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMund, R0
+	B	_vswitch
+
+TEXT _vpabt(SB), 1, $-4			/* prefetch abort */
+	/* sp is m->sabt */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMabt, R0		/* r0 = type */
+	B	_vswitch
+
+TEXT _vdabt(SB), 1, $-4			/* data abort */
+	/* sp is m->sabt */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$(PsrMabt+1), R0	/* r0 = type */
+	B	_vswitch
+
+TEXT _virq(SB), 1, $-4			/* IRQ */
+	/* sp is m->sirq */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMirq, R0		/* r0 = type */
+	B	_vswitch
+
+	/*
+	 *  come here with type in R0 and R13 pointing above saved [r0-r4].
+	 *  we'll switch to SVC mode and then call trap.
+	 */
+_vswitch:
+// TEXT _vswtch(SB), 1, $-4		/* make symbol visible to debuggers */
+	CLREX
+	BARRIERS
+	MOVW	SPSR, R1		/* save SPSR for ureg */
+	/*
+	 * R12 needs to be set before using PsrMbz, so BIGENDCHECK code has
+	 * been moved below.
+	 */
+	MOVW	R14, R2			/* save interrupted pc for ureg */
+	MOVW	R13, R3			/* save pointer to where the original [R0-R4] are */
+
+	/*
+	 * switch processor to svc mode.  this switches the banked registers
+	 * (r13 [sp] and r14 [link]) to those of svc mode (so we must be sure
+	 * to never get here already in svc mode).
+	 */
+	CPSMODE(PsrMsvc)		/* switch! */
+	CPSID
+
+	AND.S	$0xf, R1, R4		/* interrupted code kernel or user? */
+	BEQ	_userexcep
+
+	/*
+	 * here for trap from SVC mode
+	 */
+
+	/* push ureg->{type, psr, pc} onto Msvc stack.
+	 * r13 points to ureg->type after.
+	 */
+	MOVM.DB.W [R0-R2], (R13)
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+	/*
+	 * avoid the ambiguity described in notes/movm.w.
+	 * In order to get a predictable value in R13 after the stores,
+	 * separate the store-multiple from the stack-pointer adjustment.
+	 * We'll assume that the old value of R13 should be stored on the stack.
+	 */
+	/* save kernel level registers, at end r13 points to ureg */
+	MOVM.DB	[R0-R14], (R13)
+	SUB	$(NREGS*4), R13		/* SP now points to saved R0 */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+	/* previous mode was svc, so the saved spsr should be sane. */
+	MOVW	((NREGS+1)*4)(R13), R1
+
+	MOVM.IA	(R13), [R0-R8]		/* restore a few user registers */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+	MOVW	$0xdeaddead, R11	/* marker */
+
+	BL	trap(SB)		/* trap(ureg) */
+	/*
+	 * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+	 * may have been trashed when we get here.
+	 */
+
+	MOVW	$setR12(SB), R12	/* reload kernel's SB */
+
+	ADD	$(4*2+4*NREGS), R13	/* make r13 point to ureg->type */
+
+	/*
+	 * if we interrupted a previous trap's handler and are now
+	 * returning to it, we need to propagate the current R(MACH) (R10)
+	 * by overriding the saved one on the stack, since we may have
+	 * been rescheduled and be on a different processor now than
+	 * at entry.
+	 */
+	MOVW	R(MACH), (-(NREGS-MACH)*4)(R13) /* restore current cpu's MACH */
+
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+
+	/* return from kernel-mode exception */
+	MOVW	R0, SPSR		/* ... */
+
+	/*
+	 * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+	 * step on type and previous word to hold temporary values.
+	 * we could instead change the order in which psr & pc are pushed.
+	 */
+	MOVW	4(R13), R1		/* psr */
+	MOVW	8(R13), R2		/* pc */
+	MOVW	R2, 4(R13)		/* pc */
+	MOVW	R1, 8(R13)		/* psr */
+
+	/* restore kernel regs other than SP; we're using it */
+	SUB	$(NREGS*4), R13
+	MOVM.IA.W (R13), [R0-R12]
+	ADD	$4, R13			/* skip saved kernel SP */
+	MOVM.IA.W (R13), [R14]
+	ADD	$4, R13			/* pop type, sp -> pc */
+	BARRIERS
+	RFEV7W(13)
+
+	/*
+	 * here for trap from USER mode
+	 */
+_userexcep:
+	MOVM.DB.W [R0-R2], (R13)	/* set ureg->{type, psr, pc}; r13 points to ureg->type  */
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+	/* avoid the ambiguity described in notes/movm.w. */
+	MOVM.DB.S [R0-R14], (R13)	/* save kernel level registers */
+	SUB	$(NREGS*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	/*
+	 * set up m and up registers since user registers could contain anything
+	 */
+	CPUID(R1)
+	SLL	$2, R1			/* convert to word index */
+	MOVW	$machaddr(SB), R2
+	ADD	R1, R2
+	MOVW	(R2), R(MACH)		/* m = machaddr[cpuid] */
+	CMP	$0, R(MACH)
+	MOVW.EQ	$MACHADDR, R0		/* paranoia: use MACHADDR if 0 */
+	MOVW	8(R(MACH)), R(USER)	/* up = m->proc */
+
+	MOVW	((NREGS+1)*4)(R13), R2	/* saved SPSR */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+
+	BL	trap(SB)		/* trap(ureg) */
+	/*
+	 * caller saves on plan 9, so registers other than 9, 10, 13 & 14
+	 * may have been trashed when we get here.
+	 */
+
+	ADD	$(4*2+4*NREGS), R13	/* make r13 point to ureg->type */
+
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+
+	MOVW	4(R13), R0		/* restore SPSR */
+	B	_rfue
+
+
+TEXT _vfiq(SB), 1, $-4			/* FIQ */
+	PUTC('?')
+	PUTC('f')
+	PUTC('i')
+	PUTC('q')
+	RFE				/* FIQ is special, ignore it for now */
+
+TEXT _vhype(SB), 1, $-4
+	PUTC('?')
+	PUTC('h')
+	PUTC('y')
+	PUTC('p')
+	RFE
+
+/*
+ *  set the stack value for the mode passed in R0
+ */
+TEXT setr13(SB), 1, $-4
+	MOVW	4(FP), R1
+
+	MOVW	CPSR, R2
+	BIC	$(PsrMask|PsrMbz), R2, R3
+	ORR	$(PsrDirq|PsrDfiq), R3
+	ORR	R0, R3
+
+	MOVW	R3, CPSR		/* switch to new mode */
+
+	MOVW	R13, R0			/* return old sp */
+	MOVW	R1, R13			/* install new one */
+
+	MOVW	R2, CPSR		/* switch back to old mode */
+	RET

+ 38 - 0
sys/src/9/teg2/lproc.s

@@ -0,0 +1,38 @@
+#include "arm.s"
+
+/*
+ *  This is the first jump from kernel to user mode.
+ *  Fake a return from interrupt.
+ *
+ *  Enter with R0 containing the user stack pointer.
+ *  UTZERO + 0x20 is always the entry point.
+ *
+ */
+TEXT touser(SB), 1, $-4
+	/* store the user stack pointer into the USR_r13 */
+	MOVM.DB.W [R0], (R13)
+	/* avoid the ambiguity described in notes/movm.w. */
+	MOVM.S	(R13), [R13]
+	ADD	$4, R13			/* pop new user SP */
+
+	/* set up a PSR for user level */
+	MOVW	$(PsrMusr), R0
+	MOVW	R0, SPSR
+
+	/* push new user PSR */
+	MOVM.DB.W [R0], (R13)
+
+	/* push the new user PC on the stack */
+	MOVW	$(UTZERO+0x20), R0
+	MOVM.DB.W [R0], (R13)
+
+	RFEV7W(13)
+
+/*
+ *  here to jump to a newly forked process
+ */
+TEXT forkret(SB), 1, $-4
+	ADD	$(4*NREGS), R13		/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	B	rfue(SB)

+ 985 - 0
sys/src/9/teg2/main.c

@@ -0,0 +1,985 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "init.h"
+#include <pool.h>
+
+#include "arm.h"
+#include "reboot.h"
+
+/*
+ * Where configuration info is left for the loaded programme.
+ * This will turn into a structure as more is done by the boot loader
+ * (e.g. why parse the .ini file twice?).
+ * There are 3584 bytes available at CONFADDR.
+ */
+#define BOOTARGS	((char*)CONFADDR)
+#define	BOOTARGSLEN	(16*KiB)		/* limit in devenv.c */
+#define	MAXCONF		64
+#define MAXCONFLINE	160
+
+enum {
+	Minmem	= 256*MB,			/* conservative default */
+};
+
+#define isascii(c) ((uchar)(c) > 0 && (uchar)(c) < 0177)
+
+extern char bdata[], edata[], end[], etext[];
+
+uintptr kseg0 = KZERO;
+Mach* machaddr[MAXMACH];
+uchar *l2pages;
+
+Memcache cachel[8];		/* arm arch v7 supports 1-7 */
+/*
+ * these are used by the cache.v7.s routines.
+ */
+Lowmemcache *cacheconf;
+
+/*
+ * Option arguments from the command line.
+ * oargv[0] is the boot file.
+ * Optionsinit() is called from multiboot()
+ * or some other machine-dependent place
+ * to set it all up.
+ */
+static int oargc;
+static char* oargv[20];
+static char oargb[128];
+static int oargblen;
+static char oenv[4096];
+
+static uintptr sp;		/* XXX - must go - user stack of init proc */
+
+int vflag;
+int normalprint;
+char debug[256];
+
+static Lock testlock;
+
+/* store plan9.ini contents here at least until we stash them in #ec */
+static char confname[MAXCONF][KNAMELEN];
+static char confval[MAXCONF][MAXCONFLINE];
+static int nconf;
+
+static int
+findconf(char *name)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++)
+		if(cistrcmp(confname[i], name) == 0)
+			return i;
+	return -1;
+}
+
+char*
+getconf(char *name)
+{
+	int i;
+
+	i = findconf(name);
+	if(i >= 0)
+		return confval[i];
+	return nil;
+}
+
+void
+addconf(char *name, char *val)
+{
+	int i;
+
+	i = findconf(name);
+	if(i < 0){
+		if(val == nil || nconf >= MAXCONF)
+			return;
+		i = nconf++;
+		strecpy(confname[i], confname[i]+sizeof(confname[i]), name);
+	}
+//	confval[i] = val;
+	strecpy(confval[i], confval[i]+sizeof(confval[i]), val);
+}
+
+static void
+writeconf(void)
+{
+	char *p, *q;
+	int n;
+
+	p = getconfenv();
+
+	if(waserror()) {
+		free(p);
+		nexterror();
+	}
+
+	/* convert to name=value\n format */
+	for(q=p; *q; q++) {
+		q += strlen(q);
+		*q = '=';
+		q += strlen(q);
+		*q = '\n';
+	}
+	n = q - p + 1;
+	if(n >= BOOTARGSLEN)
+		error("kernel configuration too large");
+	memmove(BOOTARGS, p, n);
+	memset(BOOTARGS + n, '\n', BOOTARGSLEN - n);
+	poperror();
+	free(p);
+}
+
+/*
+ * assumes that we have loaded our /cfg/pxe/mac file at CONFADDR
+ * (usually 0x1000) with tftp in u-boot.  no longer uses malloc, so
+ * can be called early.
+ */
+static void
+plan9iniinit(void)
+{
+	char *k, *v, *next;
+
+	k = (char *)CONFADDR;
+	if(!isascii(*k))
+		return;
+
+	for(; k && *k != '\0'; k = next) {
+		if (!isascii(*k))		/* sanity check */
+			break;
+		next = strchr(k, '\n');
+		if (next)
+			*next++ = '\0';
+
+		if (*k == '\0' || *k == '\n' || *k == '#')
+			continue;
+		v = strchr(k, '=');
+		if(v == nil)
+			continue;		/* mal-formed line */
+		*v++ = '\0';
+
+		addconf(k, v);
+	}
+}
+
+static void
+optionsinit(char* s)
+{
+	char *o;
+
+	strcpy(oenv, "");
+	o = strecpy(oargb, oargb+sizeof(oargb), s)+1;
+	if(getenv("bootargs", o, o - oargb) != nil)
+		*(o-1) = ' ';
+
+	oargblen = strlen(oargb);
+	oargc = tokenize(oargb, oargv, nelem(oargv)-1);
+	oargv[oargc] = nil;
+}
+
+char*
+getenv(char* name, char* buf, int n)
+{
+	char *e, *p, *q;
+
+	p = oenv;
+	while(*p != 0){
+		if((e = strchr(p, '=')) == nil)
+			break;
+		for(q = name; p < e; p++){
+			if(*p != *q)
+				break;
+			q++;
+		}
+		if(p == e && *q == 0){
+			strecpy(buf, buf+n, e+1);
+			return buf;
+		}
+		p += strlen(p)+1;
+	}
+
+	return nil;
+}
+
+/* enable scheduling of this cpu */
+void
+machon(uint cpu)
+{
+	ulong cpubit;
+
+	cpubit = 1 << cpu;
+	lock(&active);
+	if ((active.machs & cpubit) == 0) {	/* currently off? */
+		conf.nmach++;
+		active.machs |= cpubit;
+	}
+	unlock(&active);
+}
+
+/* disable scheduling of this cpu */
+void
+machoff(uint cpu)
+{
+	ulong cpubit;
+
+	cpubit = 1 << cpu;
+	lock(&active);
+	if (active.machs & cpubit) {		/* currently on? */
+		conf.nmach--;
+		active.machs &= ~cpubit;
+	}
+	unlock(&active);
+}
+
+void
+machinit(void)
+{
+	Mach *m0;
+
+	if (m == 0) {
+		serialputc('?');
+		serialputc('m');
+		serialputc('0');
+	}
+	if(machaddr[m->machno] != m) {
+		serialputc('?');
+		serialputc('m');
+		serialputc('m');
+	}
+
+	if (canlock(&testlock)) {
+		serialputc('?');
+		serialputc('l');
+		panic("cpu%d: locks don't work", m->machno);
+	}
+
+	m->ticks = 1;
+	m->perf.period = 1;
+	m0 = MACHP(0);
+	if (m->machno != 0) {
+		/* synchronise with cpu 0 */
+		m->ticks = m0->ticks;
+		m->fastclock = m0->fastclock;
+		m->cpuhz = m0->cpuhz;
+		m->delayloop = m0->delayloop;
+	}
+	if (m->machno != 0 &&
+	    (m->fastclock == 0 || m->cpuhz == 0 || m->delayloop == 0))
+		panic("buggered cpu 0 Mach");
+
+	machon(m->machno);
+	fpoff();
+}
+
+/* l.s has already zeroed Mach, which now contains our stack. */
+void
+mach0init(void)
+{
+	if (m == 0) {
+		serialputc('?');
+		serialputc('m');
+	}
+	conf.nmach = 0;
+
+	m->machno = 0;
+	machaddr[0] = m;
+
+	lock(&testlock);		/* hold this forever */
+	machinit();
+
+	active.exiting = 0;
+	l1cache->wbse(&active, sizeof active);
+	up = nil;
+}
+
+/*
+ *  count CPU's, set up their mach structures and l1 ptes.
+ *  we're running on cpu 0 and our data structures were
+ *  statically allocated.
+ */
+void
+launchinit(void)
+{
+	int mach;
+	Mach *mm;
+	PTE *l1;
+
+	for(mach = 1; mach < MAXMACH; mach++){
+		machaddr[mach] = mm = mallocalign(MACHSIZE, MACHSIZE, 0, 0);
+		l1 = mallocalign(L1SIZE, L1SIZE, 0, 0);
+		if(mm == nil || l1 == nil)
+			panic("launchinit");
+		memset(mm, 0, MACHSIZE);
+		mm->machno = mach;
+
+		memmove(l1, (void *)L1, L1SIZE);  /* clone cpu0's l1 table */
+		l1cache->wbse(l1, L1SIZE);
+
+		mm->mmul1 = l1;
+		l1cache->wbse(mm, MACHSIZE);
+	}
+	l1cache->wbse(machaddr, sizeof machaddr);
+	conf.nmach = 1;
+}
+
+void
+dump(void *vaddr, int words)
+{
+	ulong *addr;
+
+	addr = vaddr;
+	while (words-- > 0)
+		iprint("%.8lux%c", *addr++, words % 8 == 0? '\n': ' ');
+}
+
+static void
+cacheinit(void)
+{
+	allcacheinfo(cachel);
+	cacheconf = (Lowmemcache *)CACHECONF;
+	cacheconf->l1waysh = cachel[1].waysh;
+	cacheconf->l1setsh = cachel[1].setsh;
+	/* on the tegra 2, l2 is unarchitected */
+	cacheconf->l2waysh = cachel[2].waysh;
+	cacheconf->l2setsh = cachel[2].setsh;
+
+	l2pl310init();
+	allcacheson();
+	allcache->wb();
+}
+
+void
+l2pageinit(void)
+{
+	l2pages = KADDR(PHYSDRAM + DRAMSIZE - RESRVDHIMEM);
+}
+
+/*
+ * at entry, l.s has set m for cpu0 and printed "Plan 9 from Be"
+ * but has not zeroed bss.
+ */
+void
+main(void)
+{
+	int cpu;
+	static ulong vfy = 0xcafebabe;
+
+	up = nil;
+	if (vfy != 0xcafebabe) {
+		serialputc('?');
+		serialputc('d');
+		panic("data segment misaligned");
+	}
+
+	memset(edata, 0, end - edata);
+
+	/*
+	 * we can't lock until smpon has run, but we're supposed to wait
+	 * until l1 & l2 are on.  too bad.  l1 is on, l2 will soon be.
+	 */
+	smpon();
+	iprint("ll Labs ");
+	cacheinit();
+
+	/*
+	 * data segment is aligned, bss is zeroed, caches' characteristics
+	 * are known.  begin initialisation.
+	 */
+	mach0init();
+	l2pageinit();
+	mmuinit();
+
+	optionsinit("/boot/boot boot");
+	quotefmtinstall();
+
+	/* want plan9.ini to be able to affect memory sizing in confinit */
+	plan9iniinit();		/* before we step on plan9.ini in low memory */
+
+	/* l2 looks for *l2off= in plan9.ini */
+	l2cache->on();		/* l2->on requires locks to work, thus smpon */
+	l2cache->info(&cachel[2]);
+	allcache->on();
+
+	cortexa9cachecfg();
+
+	trapinit();		/* so confinit can probe memory to size it */
+	confinit();		/* figures out amount of memory */
+	/* xinit prints (if it can), so finish up the banner here. */
+	delay(100);
+	navailcpus = getncpus();
+	iprint("(mp arm; %d cpus)\n\n", navailcpus);
+	delay(100);
+
+	for (cpu = 1; cpu < navailcpus; cpu++)
+		stopcpu(cpu);
+
+	xinit();
+	irqtooearly = 0;	/* now that xinit and trapinit have run */
+
+	mainmem->flags |= POOL_ANTAGONISM /* | POOL_PARANOIA */ ;
+
+	/*
+	 * Printinit will cause the first malloc call.
+	 * (printinit->qopen->malloc) unless any of the
+	 * above (like clockinit) do an irqenable, which
+	 * will call malloc.
+	 * If the system dies here it's probably due
+	 * to malloc(->xalloc) not being initialised
+	 * correctly, or the data segment is misaligned
+	 * (it's amazing how far you can get with
+	 * things like that completely broken).
+	 *
+	 * (Should be) boilerplate from here on.
+	 */
+
+	archreset();			/* cfg clock signals, print cache cfg */
+	clockinit();			/* start clocks */
+	timersinit();
+
+	delay(50);			/* let uart catch up */
+	printinit();
+	kbdenable();
+
+	cpuidprint();
+	chkmissing();
+
+	procinit0();
+	initseg();
+
+//	dmainit();
+	links();
+	conf.monitor = 1;
+//	screeninit();
+
+	iprint("pcireset...");
+	pcireset();			/* this tends to hang after a reboot */
+	iprint("ok\n");
+
+	chandevreset();			/* most devices are discovered here */
+//	i8250console();			/* too early; see init0 */
+
+	pageinit();			/* prints "1020M memory: ⋯ */
+	swapinit();
+	userinit();
+
+	/*
+	 * starting a cpu will eventually result in it calling schedinit,
+	 * so everything necessary to run user processes should be set up
+	 * before starting secondary cpus.
+	 */
+	launchinit();
+	/* SMP & FW are already on when we get here; u-boot set them? */
+	for (cpu = 1; cpu < navailcpus; cpu++)
+		if (startcpu(cpu) < 0)
+			panic("cpu%d didn't start", cpu);
+	l1diag();
+
+	schedinit();
+	panic("cpu%d: schedinit returned", m->machno);
+}
+
+static void
+shutdown(int ispanic)
+{
+	int ms, once;
+
+	lock(&active);
+	if(ispanic)
+		active.ispanic = ispanic;
+	else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
+		active.ispanic = 0;
+	once = active.machs & (1<<m->machno);
+	/*
+	 * setting exiting will make hzclock() on each processor call exit(0),
+	 * which calls shutdown(0) and idles non-bootstrap cpus and returns
+	 * on bootstrap processors (to permit a reboot).  clearing our bit
+	 * in machs avoids calling exit(0) from hzclock() on this processor.
+	 */
+	active.machs &= ~(1<<m->machno);
+	active.exiting = 1;
+	unlock(&active);
+
+	if(once) {
+		delay(m->machno*1000);		/* stagger them */
+		iprint("cpu%d: exiting\n", m->machno);
+	}
+	spllo();
+	if (m->machno == 0)
+		ms = 5*1000;
+	else
+		ms = 2*1000;
+	for(; ms > 0; ms -= TK2MS(2)){
+		delay(TK2MS(2));
+		if(active.machs == 0 && consactive() == 0)
+			break;
+	}
+	delay(500);
+}
+
+/*
+ *  exit kernel either on a panic or user request
+ */
+void
+exit(int code)
+{
+	shutdown(code);
+	splhi();
+	if (m->machno == 0)
+		archreboot();
+	else {
+		intrcpushutdown();
+		stopcpu(m->machno);
+		for (;;)
+			idlehands();
+	}
+}
+
+int
+isaconfig(char *class, int ctlrno, ISAConf *isa)
+{
+	char cc[32], *p;
+	int i;
+
+	snprint(cc, sizeof cc, "%s%d", class, ctlrno);
+	p = getconf(cc);
+	if(p == nil)
+		return 0;
+
+	isa->type = "";
+	isa->nopt = tokenize(p, isa->opt, NISAOPT);
+	for(i = 0; i < isa->nopt; i++){
+		p = isa->opt[i];
+		if(cistrncmp(p, "type=", 5) == 0)
+			isa->type = p + 5;
+		else if(cistrncmp(p, "port=", 5) == 0)
+			isa->port = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "irq=", 4) == 0)
+			isa->irq = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "dma=", 4) == 0)
+			isa->dma = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "mem=", 4) == 0)
+			isa->mem = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "size=", 5) == 0)
+			isa->size = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "freq=", 5) == 0)
+			isa->freq = strtoul(p+5, &p, 0);
+	}
+	return 1;
+}
+
+/*
+ * the new kernel is already loaded at address `code'
+ * of size `size' and entry point `entry'.
+ */
+void
+reboot(void *entry, void *code, ulong size)
+{
+	int cpu, nmach, want, ms;
+	void (*f)(ulong, ulong, ulong);
+
+	nmach = conf.nmach;
+	writeconf();
+
+	/*
+	 * the boot processor is cpu0.  execute this function on it
+	 * so that the new kernel has the same cpu0.
+	 */
+	if (m->machno != 0) {
+		procwired(up, 0);
+		sched();
+	}
+	if (m->machno != 0)
+		print("on cpu%d (not 0)!\n", m->machno);
+
+	/*
+	 * the other cpus could be holding locks that will never get
+	 * released (e.g., in the print path) if we put them into
+	 * reset now, so force them to shutdown gracefully first.
+	 */
+	for (want = 0, cpu = 1; cpu < navailcpus; cpu++)
+		want |= 1 << cpu;
+	active.stopped = 0;
+	shutdown(0);
+	for (ms = 15*1000; ms > 0 && active.stopped != want; ms -= 10)
+		delay(10);
+	delay(20);
+	if (active.stopped != want) {
+		for (cpu = 1; cpu < nmach; cpu++)
+			stopcpu(cpu);		/* make really sure */
+		delay(20);
+	}
+
+	/*
+	 * should be the only processor running now
+	 */
+	pcireset();
+//	print("reboot entry %#lux code %#lux size %ld\n",
+//		PADDR(entry), PADDR(code), size);
+
+	/* turn off buffered serial console */
+	serialoq = nil;
+	kprintoq = nil;
+	screenputs = nil;
+
+	/* shutdown devices */
+	chandevshutdown();
+
+	/* call off the dog */
+	clockshutdown();
+
+	splhi();
+	intrshutdown();
+
+	/* setup reboot trampoline function */
+	f = (void*)REBOOTADDR;
+	memmove(f, rebootcode, sizeof(rebootcode));
+	cachedwb();
+	l2cache->wbinv();
+	l2cache->off();
+	cacheuwbinv();
+
+	/* off we go - never to return */
+	(*f)(PADDR(entry), PADDR(code), size);
+
+	iprint("loaded kernel returned!\n");
+	archreboot();
+}
+
+/*
+ *  starting place for first process
+ */
+void
+init0(void)
+{
+	int i;
+	char buf[2*KNAMELEN];
+
+	up->nerrlab = 0;
+	coherence();
+	spllo();
+
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	up->slash = namec("#/", Atodir, 0, 0);
+	pathclose(up->slash->path);
+	up->slash->path = newpath("/");
+	up->dot = cclone(up->slash);
+
+	chandevinit();
+	i8250console();		/* might be redundant, but harmless */
+	if(kbdq == nil)
+		panic("init0: nil kbdq");
+	if(serialoq == nil)
+		panic("init0: nil serialoq");
+	normalprint = 1;
+
+	if(!waserror()){
+		snprint(buf, sizeof(buf), "%s %s", "ARM", conffile);
+		ksetenv("terminal", buf, 0);
+		ksetenv("cputype", "arm", 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+
+		/* convert plan9.ini variables to #e and #ec */
+		for(i = 0; i < nconf; i++) {
+			ksetenv(confname[i], confval[i], 0);
+			ksetenv(confname[i], confval[i], 1);
+		}
+		poperror();
+	}
+	kproc("alarm", alarmkproc, 0);
+//	kproc("startcpusproc", startcpusproc, nil);
+
+	touser(sp);
+}
+
+static void
+bootargs(uintptr base)
+{
+	int i;
+	ulong ssize;
+	char **av, *p;
+
+	/*
+	 * Push the boot args onto the stack.
+	 * The initial value of the user stack must be such
+	 * that the total used is larger than the maximum size
+	 * of the argument list checked in syscall.
+	 */
+	i = oargblen+1;
+	p = UINT2PTR(STACKALIGN(base + BY2PG - sizeof(up->s.args) - i));
+	memmove(p, oargb, i);
+
+	/*
+	 * Now push argc and the argv pointers.
+	 * This isn't strictly correct as the code jumped to by
+	 * touser in init9.s calls startboot (port/initcode.c) which
+	 * expects arguments
+	 * 	startboot(char *argv0, char **argv)
+	 * not the usual (int argc, char* argv[]), but argv0 is
+	 * unused so it doesn't matter (at the moment...).
+	 */
+	av = (char**)(p - (oargc+2)*sizeof(char*));
+	ssize = base + BY2PG - PTR2UINT(av);
+	*av++ = (char*)oargc;
+	for(i = 0; i < oargc; i++)
+		*av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - BY2PG);
+	*av = nil;
+
+	/*
+	 * Leave space for the return PC of the
+	 * caller of initcode.
+	 */
+	sp = USTKTOP - ssize - sizeof(void*);
+}
+
+/*
+ *  create the first process
+ */
+void
+userinit(void)
+{
+	Proc *p;
+	Segment *s;
+	KMap *k;
+	Page *pg;
+
+	/* no processes yet */
+	up = nil;
+
+	p = newproc();
+	p->pgrp = newpgrp();
+	p->egrp = smalloc(sizeof(Egrp));
+	p->egrp->ref = 1;
+	p->fgrp = dupfgrp(nil);
+	p->rgrp = newrgrp();
+	p->procmode = 0640;
+
+	kstrdup(&eve, "");
+	kstrdup(&p->text, "*init*");
+	kstrdup(&p->user, eve);
+
+	/*
+	 * Kernel Stack
+	 */
+	p->sched.pc = PTR2UINT(init0);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->s.args)-sizeof(uintptr));
+	p->sched.sp = STACKALIGN(p->sched.sp);
+
+	/*
+	 * User Stack
+	 *
+	 * Technically, newpage can't be called here because it
+	 * should only be called when in a user context as it may
+	 * try to sleep if there are no pages available, but that
+	 * shouldn't be the case here.
+	 */
+	s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+	s->flushme++;
+	p->seg[SSEG] = s;
+	pg = newpage(1, 0, USTKTOP-BY2PG);
+	segpage(s, pg);
+	k = kmap(pg);
+	bootargs(VA(k));
+	kunmap(k);
+
+	/*
+	 * Text
+	 */
+	s = newseg(SG_TEXT, UTZERO, 1);
+	p->seg[TSEG] = s;
+	pg = newpage(1, 0, UTZERO);
+	memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
+	segpage(s, pg);
+	k = kmap(s->map[0]->pages[0]);
+	memmove(UINT2PTR(VA(k)), initcode, sizeof initcode);
+	kunmap(k);
+
+	ready(p);
+}
+
+Conf conf;			/* XXX - must go - gag */
+
+Confmem tsmem[nelem(conf.mem)] = {
+	/*
+	 * Memory available to Plan 9:
+	 */
+	{ .base = PHYSDRAM, .limit = PHYSDRAM + Minmem, },
+};
+ulong memsize = DRAMSIZE;
+
+static int
+gotmem(uintptr sz)
+{
+	uintptr addr;
+
+	/* back off a little from the end */
+	addr = (uintptr)KADDR(PHYSDRAM + sz - BY2WD);
+	if (probeaddr(addr) >= 0) {	/* didn't trap? memory present */
+		memsize = sz;
+		return 0;
+	}
+	return -1;
+}
+
+void
+confinit(void)
+{
+	int i;
+	ulong kpages;
+	uintptr pa;
+	char *p;
+
+	/*
+	 * Copy the physical memory configuration to Conf.mem.
+	 */
+	if(nelem(tsmem) > nelem(conf.mem)){
+		iprint("memory configuration botch\n");
+		exit(1);
+	}
+	if(0 && (p = getconf("*maxmem")) != nil) {
+		memsize = strtoul(p, 0, 0) - PHYSDRAM;
+		if (memsize < 16*MB)		/* sanity */
+			memsize = 16*MB;
+	}
+
+	/*
+	 * see if all that memory exists; if not, find out how much does.
+	 * trapinit must have been called first.
+	 */
+	if (gotmem(memsize - RESRVDHIMEM) < 0)
+		panic("can't find 1GB of memory");
+
+	tsmem[0].limit = PHYSDRAM + memsize;
+	memmove(conf.mem, tsmem, sizeof(tsmem));
+
+	conf.npage = 0;
+	pa = PADDR(PGROUND(PTR2UINT(end)));
+
+	/*
+	 *  we assume that the kernel is at the beginning of one of the
+	 *  contiguous chunks of memory and fits therein.
+	 */
+	for(i=0; i<nelem(conf.mem); i++){
+		/* take kernel out of allocatable space */
+		if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+			conf.mem[i].base = pa;
+
+		conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+		conf.npage += conf.mem[i].npage;
+	}
+
+	conf.upages = (conf.npage*80)/100;
+	conf.ialloc = ((conf.npage-conf.upages)/2)*BY2PG;
+
+	/* set up other configuration parameters */
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+	if(cpuserver)
+		conf.nproc *= 3;
+	if(conf.nproc > 2000)
+		conf.nproc = 2000;
+	conf.nswap = conf.npage*3;
+	conf.nswppo = 4096;
+	conf.nimage = 200;
+
+	/*
+	 * it's simpler on mp systems to take page-faults early,
+	 * on reference, rather than later, on write, which might
+	 * require tlb shootdowns.
+	 */
+	conf.copymode = 1;		/* copy on reference */
+
+	/*
+	 * Guess how much is taken by the large permanent
+	 * datastructures. Mntcache and Mntrpc are not accounted for
+	 * (probably ~300KB).
+	 */
+	kpages = conf.npage - conf.upages;
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page);
+	mainmem->maxsize = kpages;
+	if(!cpuserver)
+		/*
+		 * give terminals lots of image memory, too; the dynamic
+		 * allocation will balance the load properly, hopefully.
+		 * be careful with 32-bit overflow.
+		 */
+		imagmem->maxsize = kpages;
+
+//	archconfinit();
+}
+
+int
+cmpswap(long *addr, long old, long new)
+{
+	return cas((int *)addr, old, new);
+}
+
+void
+advertwfi(void)			/* advertise my wfi status */
+{
+	ilock(&active);
+	active.wfi |= 1 << m->machno;
+	iunlock(&active);
+}
+
+void
+unadvertwfi(void)		/* do not advertise my wfi status */
+{
+	ilock(&active);
+	active.wfi &= ~(1 << m->machno);
+	iunlock(&active);
+}
+
+void
+idlehands(void)
+{
+#ifdef use_ipi
+	int advertised;
+
+	/* don't go into wfi until my local timer is ticking */
+	if (m->ticks <= 1)
+		return;
+
+	advertised = 0;
+	m->inidlehands++;
+	/* avoid recursion via ilock, advertise iff this cpu is initialised */
+	if (m->inidlehands == 1 && m->syscall > 0) {
+		advertwfi();
+		advertised = 1;
+	}
+
+	wfi();
+
+	if (advertised)
+		unadvertwfi();
+	m->inidlehands--;
+#endif
+}
+
+void
+wakewfi(void)
+{
+#ifdef use_ipi
+	uint cpu;
+
+	/*
+	 * find any cpu other than me currently in wfi.
+	 * need not be exact.
+	 */
+	cpu = BI2BY*BY2WD - 1 - clz(active.wfi & ~(1 << m->machno));
+	if (cpu < MAXMACH)
+		intrcpu(cpu);
+#endif
+}

+ 150 - 0
sys/src/9/teg2/mem.h

@@ -0,0 +1,150 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 000000000040000000 */
+
+#define HOWMANY(x, y)	(((x)+((y)-1))/(y))
+#define ROUNDUP(x, y)	(HOWMANY((x), (y))*(y))	/* ceiling */
+#define ROUNDDN(x, y)	(((x)/(y))*(y))		/* floor */
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
+
+/*
+ * Not sure where these macros should go.
+ * This probably isn't right but will do for now.
+ * The macro names are problematic too.
+ */
+/*
+ * In B(o), 'o' is the bit offset in the register.
+ * For multi-bit fields use F(v, o, w) where 'v' is the value
+ * of the bit-field of width 'w' with LSb at bit offset 'o'.
+ */
+#define B(o)		(1<<(o))
+#define F(v, o, w)	(((v) & ((1<<(w))-1))<<(o))
+
+#define FCLR(d, o, w)	((d) & ~(((1<<(w))-1)<<(o)))
+#define FEXT(d, o, w)	(((d)>>(o)) & ((1<<(w))-1))
+#define FINS(d, o, w, v) (FCLR((d), (o), (w))|F((v), (o), (w)))
+#define FSET(d, o, w)	((d)|(((1<<(w))-1)<<(o)))
+
+#define FMASK(o, w)	(((1<<(w))-1)<<(o))
+
+/*
+ * Sizes
+ */
+#define	BY2PG		(4*KiB)			/* bytes per page */
+#define	PGSHIFT		12			/* log(BY2PG) */
+#define	PGROUND(s)	ROUNDUP(s, BY2PG)
+#define	ROUND(s, sz)	(((s)+(sz-1))&~(sz-1))
+
+/* max # of cpus system can run.  tegra2 cpu ids are two bits wide. */
+#define	MAXMACH		4
+#define	MACHSIZE	BY2PG
+#define L1SIZE		(4 * BY2PG)
+
+#define KSTKSIZE	(16*KiB)		/* was 8K */
+#define STACKALIGN(sp)	((sp) & ~7)		/* bug: assure with alloc */
+
+/*
+ * Magic registers
+ */
+
+#define	USER		9		/* R9 is up-> */
+#define	MACH		10		/* R10 is m-> */
+
+/*
+ * Address spaces.
+ * KTZERO is used by kprof and dumpstack (if any).
+ *
+ * KZERO (0xc0000000) is mapped to physical 0 (start of dram).
+ * u-boot claims to occupy the first 4 MB of dram, but we're willing to
+ * step on it once we're loaded.
+ *
+ * L2 PTEs are stored in 4K before cpu0's Mach (8K to 12K above KZERO).
+ * cpu0's Mach struct is at L1 - MACHSIZE(4K) to L1 (12K to 16K above KZERO).
+ * L1 PTEs are stored from L1 to L1+32K (16K to 48K above KZERO).
+ * plan9.ini is loaded at CONFADDR (4MB).
+ * KTZERO may be anywhere after that.
+ */
+#define	KSEG0		0xC0000000		/* kernel segment */
+/* mask to check segment; good for 1GB dram */
+#define	KSEGM		0xC0000000
+#define	KZERO		KSEG0			/* kernel address space */
+#define L1		(KZERO+16*KiB)		/* cpu0 l1 page table; 16KiB aligned */
+#define CONFADDR	(KZERO+0x400000)	/* unparsed plan9.ini */
+#define CACHECONF	(CONFADDR+48*KiB)
+/* KTZERO must match loadaddr in mkfile */
+#define	KTZERO		(KZERO+0x410000)	/* kernel text start */
+
+#define	L2pages		(2*MiB)	/* high memory reserved for l2 page tables */
+#define RESRVDHIMEM	(64*KiB + MiB + L2pages) /* avoid HVECTOR, l2 pages */
+/* we assume that we have 1 GB of ram, which is true for all trimslices. */
+#define DRAMSIZE	GiB
+
+#define	UZERO		0			/* user segment */
+#define	UTZERO		(UZERO+BY2PG)		/* user text start */
+#define UTROUND(t)	ROUNDUP((t), BY2PG)
+/*
+ * moved USTKTOP down to 1GB to keep MMIO space out of user space.
+ * moved it down another MB to utterly avoid KADDR(stack_base) mapping
+ * to high exception vectors.  see confinit().
+ */
+#define	USTKTOP		(0x40000000 - 64*KiB - MiB) /* user segment end +1 */
+#define	USTKSIZE	(8*1024*1024)		/* user stack size */
+#define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+#define	TSTKSIZ	 	256
+
+/* address at which to copy and execute rebootcode */
+#define	REBOOTADDR	KADDR(0x100)
+
+/*
+ * Legacy...
+ */
+#define BLOCKALIGN	CACHELINESZ		/* only used in allocb.c */
+#define KSTACK		KSTKSIZE
+
+/*
+ * Sizes
+ */
+#define BI2BY		8			/* bits per byte */
+#define BY2SE		4
+#define BY2WD		4
+#define BY2V		8			/* only used in xalloc.c */
+
+#define CACHELINESZ	32			/* bytes per cache line */
+#define	PTEMAPMEM	(1024*1024)
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	1984			/* magic 16*124 */
+#define	SSEGMAPSIZE	16			/* magic */
+#define	PPN(x)		((x)&~(BY2PG-1))	/* pure page number? */
+
+/*
+ * With a little work these move to port.
+ */
+#define	PTEVALID	(1<<0)
+#define	PTERONLY	0
+#define	PTEWRITE	(1<<1)
+#define	PTEUNCACHED	(1<<2)
+#define PTEKERNEL	(1<<3)
+
+/*
+ * Physical machine information from here on.
+ */
+
+#define PHYSDRAM	0
+
+#define PHYSIO		0x50000000	/* cpu */
+#define VIRTIO		PHYSIO
+#define PHYSL2BAG	0x50043000	/* l2 cache bag-on-the-side */
+#define PHYSEVP		0x6000f100	/* undocumented `exception vector' */
+#define PHYSCONS	0x70006000	/* uart console */
+#define PHYSIOEND	0xc0000000	/* end of ahb mem & pcie */
+
+#define PHYSAHB		0xc0000000	/* ahb bus */
+#define VIRTAHB		0xb0000000
+#define P2VAHB(pa) ((pa) - PHYSAHB + VIRTAHB)
+
+#define PHYSNOR		0xd0000000
+#define VIRTNOR		0x40000000

+ 155 - 0
sys/src/9/teg2/mkfile

@@ -0,0 +1,155 @@
+CONF=ts
+CONFLIST=ts
+EXTRACOPIES=
+
+# allegedly u-boot uses the bottom 4MB (up to 0x400000) so avoid that,
+# and leave 64K for plan9.ini.  loadaddr must match KTZERO in mem.h
+# and CONFADDR must be 0x10000 lower.
+loadaddr=0xc0410000
+
+objtype=arm
+</$objtype/mkfile
+p=9
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	dev.$O\
+	edf.$O\
+	fault.$O\
+	latin1.$O\
+	mul64fract.$O\
+	rebootcmd.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	segment.$O\
+	swap.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+
+OBJ=\
+	l.$O\
+	lexception.$O\
+	lproc.$O\
+	arch.$O\
+	atom.$O\
+	clock.$O\
+	clock-tegra.$O\
+	kbd.$O\
+	main.$O\
+	mmu.$O\
+	random.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+HFILES=\
+	arm.h\
+	dat.h\
+	../port/error.h\
+	errstr.h\
+	fns.h\
+	io.h\
+	../port/lib.h\
+	mem.h\
+	../port/portdat.h\
+	../port/portfns.h\
+	/$objtype/include/u.h\
+
+LIB=\
+	/$objtype/lib/libmemlayer.a\
+	/$objtype/lib/libmemdraw.a\
+	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libsec.a\
+	/$objtype/lib/libmp.a\
+	/$objtype/lib/libc.a\
+
+9:V: $p$CONF s$p$CONF
+
+$p$CONF:DQ:	$CONF.c $OBJ $LIB mkfile
+	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+	echo '# linking raw kernel'	# H6: no headers, data segment aligned
+	$LD -o $target -H6 -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+
+s$p$CONF:DQ:	$CONF.$O $OBJ $LIB
+	echo '# linking 9 kernel with symbols'
+#	$LD -o $target -R4096 -T$loadaddr -l -a $OBJ $CONF.$O $LIB >$target.list
+	$LD -o $target -R4096 -T$loadaddr -l $OBJ $CONF.$O $LIB
+	size $target
+
+$p$CONF.gz:D:	$p$CONF
+	gzip -9 <$p$CONF >$target
+
+$OBJ: $HFILES
+
+install:V: /$objtype/$p$CONF
+
+/$objtype/$p$CONF:D: $p$CONF s$p$CONF
+	cp -x $p$CONF s$p$CONF /$objtype/ &
+	for(i in $EXTRACOPIES)
+		{ 9fs $i && cp $p$CONF s$p$CONF /n/$i/$objtype && echo -n $i... & }
+	wait
+	echo
+	touch $target
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+CFLAGS= -I. -I../port $CFLAGS	# hack to compile private sysproc.c (e.g.)
+
+arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O sdscsi.$O syscall.$O \
+	trap.$O: /$objtype/include/ureg.h
+
+archtegra.$O devether.$0 ether9221.$O: etherif.h ../port/netif.h
+archtegra.$O devflash.$O flashtegra.$O flashigep.$O: ../port/flashif.h
+ecc.$O flashtegra.$O flashigep.$O: ../port/nandecc.h io.h
+fpi.$O fpiarm.$O fpimem.$O: fpi.h
+l.$O lexception.$O lproc.$O mmu.$O: arm.s mem.h
+l.$O rebootcode.$O: cache.v7.s
+main.$O: errstr.h init.h reboot.h
+devdss.$O devmouse.$O mouse.$O screen.$O: screen.h
+devusb.$O: ../port/usb.h
+usbehci.$O usbohci.$O usbuhci.$O: ../port/usb.h usbehci.h uncached.h
+
+init.h:D:	../port/initcode.c init9.s
+	$CC ../port/initcode.c
+	$AS init9.s
+	$LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
+	{echo 'uchar initcode[]={'
+	 xd -1x <init.out |
+		sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > init.h
+
+reboot.h:D:	rebootcode.s cache.v7.s arm.s arm.h mem.h
+	$AS rebootcode.s
+	# -lc is only for memmove.  -T arg is PADDR(REBOOTADDR)
+#	$LD -l -a -s -T0x100 -R4 -o reboot.out rebootcode.$O -lc >reboot.list
+	$LD -l -s -T0x100 -R4 -o reboot.out rebootcode.$O -lc
+	{echo 'uchar rebootcode[]={'
+	 xd -1x reboot.out |
+		sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > reboot.h
+errstr.h:D:	../port/mkerrstr ../port/error.h
+	rc ../port/mkerrstr > errstr.h
+
+$CONF.clean:
+	rm -rf $p$CONF s$p$CONF errstr.h reboot.h $CONF.c boot$CONF.c

+ 750 - 0
sys/src/9/teg2/mmu.c

@@ -0,0 +1,750 @@
+/*
+ * arm arch v7 mmu
+ *
+ * we initially thought that we needn't flush the l2 cache since external
+ * devices needn't see page tables.  sadly, reality does not agree with
+ * the manuals.
+ *
+ * we use l1 and l2 cache ops here because they are empirically needed.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "arm.h"
+
+#define L1X(va)		FEXT((va), 20, 12)
+#define L2X(va)		FEXT((va), 12, 8)
+
+enum {
+	Debug		= 0,
+
+	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
+#ifdef SMALL_ARM				/* well under 1GB of RAM? */
+	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+#else
+	/*
+	 * on trimslice, top of 1GB ram can't be addressible, as high
+	 * virtual memory (0xfff.....) contains high vectors.  We
+	 * moved USTKTOP down another MB to utterly avoid KADDR(stack_base)
+	 * mapping to high exception vectors.  USTKTOP is thus
+	 * (0x40000000 - 64*KiB - MiB), which in kernel virtual space is
+	 * (0x100000000ull - 64*KiB - MiB), but we need the whole user
+	 * virtual address space to be unmapped in a new process.
+	 */
+	L1hi		= DRAMSIZE/MiB,
+#endif
+};
+
+#define ISHOLE(type)	((type) == 0)
+
+typedef struct Range Range;
+struct Range {
+	uintptr	startva;
+	uvlong	endva;
+	uintptr	startpa;
+	uvlong	endpa;
+	ulong	attrs;
+	int	type;			/* L1 Section or Coarse? */
+};
+
+static void mmul1empty(void);
+
+static char *
+typename(int type)
+{
+	static char numb[20];
+
+	switch(type) {
+	case Coarse:
+		return "4KB-page table(s)";
+	case Section:
+		return "1MB section(s)";
+	default:
+		snprint(numb, sizeof numb, "type %d", type);
+		return numb;
+	}
+}
+
+static void
+prl1range(Range *rp)
+{
+	int attrs;
+
+	iprint("l1 maps va (%#8.8lux-%#llux) -> ", rp->startva, rp->endva-1);
+	if (rp->startva == rp->startpa)
+		iprint("identity-mapped");
+	else
+		iprint("pa %#8.8lux", rp->startpa);
+	iprint(" attrs ");
+	attrs = rp->attrs;
+	if (attrs) {
+		if (attrs & Cached)
+			iprint("C");
+		if (attrs & Buffered)
+			iprint("B");
+		if (attrs & L1sharable)
+			iprint("S1");
+		if (attrs & L1wralloc)
+			iprint("A1");
+	} else
+		iprint("\"\"");
+	iprint(" %s\n", typename(rp->type));
+	delay(100);
+	rp->endva = 0;
+}
+
+static void
+l2dump(Range *rp, PTE pte)
+{
+	USED(rp, pte);
+}
+
+/* dump level 1 page table at virtual addr l1 */
+void
+mmudump(PTE *l1)
+{
+	int i, type, attrs;
+	uintptr pa;
+	uvlong va;
+	PTE pte;
+	Range rng;
+
+	/* dump first level of ptes */
+	iprint("cpu%d l1 pt @ %#p:\n", m->machno, PADDR(l1));
+	memset(&rng, 0, sizeof rng);
+	for (va = i = 0; i < 4096; i++, va += MB) {
+		pte = l1[i];
+		type = pte & (Section|Coarse);
+		if (type == Section)
+			pa = pte & ~(MB - 1);
+		else
+			pa = pte & ~(KiB - 1);
+		attrs = 0;
+		if (!ISHOLE(type) && type == Section)
+			attrs = pte & L1ptedramattrs;
+
+		/* if a range is open but this pte isn't part, close & open */
+		if (!ISHOLE(type) &&
+		    (pa != rng.endpa || type != rng.type || attrs != rng.attrs))
+			if (rng.endva != 0) {	/* range is open? close it */
+				prl1range(&rng);
+				rng.type = 0;
+				rng.attrs = 0;
+			}
+
+		if (ISHOLE(type)) {		/* end of any open range? */
+			if (rng.endva != 0)	/* range is open? close it */
+				prl1range(&rng);
+		} else {			/* continuation or new range */
+			if (rng.endva == 0) {	/* no open range? start one */
+				rng.startva = va;
+				rng.startpa = pa;
+				rng.type = type;
+				rng.attrs = attrs;
+			}
+			rng.endva = va + MB;	/* continue the open range */
+			rng.endpa = pa + MB;
+		}
+		if (type == Coarse)
+			l2dump(&rng, pte);
+	}
+	if (rng.endva != 0)			/* close any open range */
+		prl1range(&rng);
+	iprint("\n");
+}
+
+/*
+ * map `mbs' megabytes from virt to phys, uncached.
+ * device registers are sharable, except the private memory region:
+ * 2 4K pages, at 0x50040000 on the tegra2.
+ */
+void
+mmumap(uintptr virt, uintptr phys, int mbs)
+{
+	uint off;
+	PTE *l1;
+
+	phys &= ~(MB-1);
+	virt &= ~(MB-1);
+	l1 = KADDR(ttbget());
+	for (off = 0; mbs-- > 0; off += MB)
+		l1[L1X(virt + off)] = (phys + off) | Dom0 | L1AP(Krw) |
+			Section | L1sharable;
+	allcache->wbse(l1, L1SIZE);
+	mmuinvalidate();
+}
+
+/* identity map `mbs' megabytes from phys */
+void
+mmuidmap(uintptr phys, int mbs)
+{
+	mmumap(phys, phys, mbs);
+}
+
+PTE *
+newl2page(void)
+{
+	PTE *p;
+
+	if ((uintptr)l2pages >= HVECTORS - BY2PG)
+		panic("l2pages");
+	p = (PTE *)l2pages;
+	l2pages += BY2PG;
+	return p;
+}
+
+/*
+ * replace an L1 section pte with an L2 page table and an L1 coarse pte,
+ * with the same attributes as the original pte and covering the same
+ * region of memory.
+ */
+static void
+expand(uintptr va)
+{
+	int x;
+	uintptr tva, pa;
+	PTE oldpte;
+	PTE *l1, *l2;
+
+	va &= ~(MB-1);
+	x = L1X(va);
+	l1 = &m->mmul1[x];
+	oldpte = *l1;
+	if (oldpte == Fault || (oldpte & (Coarse|Section)) != Section)
+		return;			/* make idempotent */
+
+	/* wasteful - l2 pages only have 256 entries - fix */
+	/*
+	 * it may be very early, before any memory allocators are
+	 * configured, so do a crude allocation from the top of memory.
+	 */
+	l2 = newl2page();
+	memset(l2, 0, BY2PG);
+
+	/* write new L1 l2 entry back into L1 descriptors */
+	*l1 = PPN(PADDR(l2))|Dom0|Coarse;
+
+	/* fill l2 page with l2 ptes with equiv attrs; copy AP bits */
+	x = Small | oldpte & (Cached|Buffered) | (oldpte & (1<<15 | 3<<10)) >> 6;
+	if (oldpte & L1sharable)
+		x |= L2sharable;
+	if (oldpte & L1wralloc)
+		x |= L2wralloc;
+	pa = oldpte & ~(MiB - 1);
+	for(tva = va; tva < va + MiB; tva += BY2PG, pa += BY2PG)
+		l2[L2X(tva)] = PPN(pa) | x;
+
+	/* force l2 page to memory */
+	allcache->wbse(l2, BY2PG);
+
+	/* clear out the current entry */
+	mmuinvalidateaddr(PPN(va));
+
+	allcache->wbinvse(l1, sizeof *l1);
+	if ((*l1 & (Coarse|Section)) != Coarse)
+		panic("explode %#p", va);
+}
+
+/*
+ * cpu0's l1 page table has likely changed since we copied it in
+ * launchinit, notably to allocate uncached sections for ucalloc.
+ * so copy it again from cpu0's.
+ */
+void
+mmuninit(void)
+{
+	int s;
+	PTE *l1, *newl1;
+
+	s = splhi();
+	l1 = m->mmul1;
+	newl1 = mallocalign(L1SIZE, L1SIZE, 0, 0);
+	assert(newl1);
+
+	allcache->wbinvse((PTE *)L1, L1SIZE);	/* get cpu0's up-to-date copy */
+	memmove(newl1, (PTE *)L1, L1SIZE);
+	allcache->wbse(newl1, L1SIZE);
+
+	mmuinvalidate();
+	coherence();
+
+	ttbput(PADDR(newl1));		/* switch */
+	coherence();
+	mmuinvalidate();
+	coherence();
+	m->mmul1 = newl1;
+	coherence();
+
+	mmul1empty();
+	coherence();
+	mmuinvalidate();
+	coherence();
+
+//	mmudump(m->mmul1);		/* DEBUG */
+	splx(s);
+	free(l1);
+}
+
+/* l1 is base of my l1 descriptor table */
+static PTE *
+l2pteaddr(PTE *l1, uintptr va)
+{
+	uintptr l2pa;
+	PTE pte;
+	PTE *l2;
+
+	expand(va);
+	pte = l1[L1X(va)];
+	if ((pte & (Coarse|Section)) != Coarse)
+		panic("l2pteaddr l1 pte %#8.8ux @ %#p not Coarse",
+			pte, &l1[L1X(va)]);
+	l2pa = pte & ~(KiB - 1);
+	l2 = (PTE *)KADDR(l2pa);
+	return &l2[L2X(va)];
+}
+
+void
+mmuinit(void)
+{
+	ulong va;
+	uintptr pa;
+	PTE *l1, *l2;
+
+	if (m->machno != 0) {
+		mmuninit();
+		return;
+	}
+
+	pa = ttbget();
+	l1 = KADDR(pa);
+
+	/* identity map most of the io space */
+	mmuidmap(PHYSIO, (PHYSIOEND - PHYSIO + MB - 1) / MB);
+	/* move the rest to more convenient addresses */
+	mmumap(VIRTNOR, PHYSNOR, 256);	/* 0x40000000 v -> 0xd0000000 p */
+	mmumap(VIRTAHB, PHYSAHB, 256);	/* 0xb0000000 v -> 0xc0000000 p */
+
+	/* map high vectors to start of dram, but only 4K, not 1MB */
+	pa -= MACHSIZE+BY2PG;		/* page tables must be page aligned */
+	l2 = KADDR(pa);
+	memset(l2, 0, 1024);
+
+	m->mmul1 = l1;		/* used by explode in l2pteaddr */
+
+	/* map private mem region (8K at soc.scu) without sharable bits */
+	va = soc.scu;
+	*l2pteaddr(l1, va) &= ~L2sharable;
+	va += BY2PG;
+	*l2pteaddr(l1, va) &= ~L2sharable;
+
+	/*
+	 * below (and above!) the vectors in virtual space may be dram.
+	 * populate the rest of l2 for the last MB.
+	 */
+	for (va = -MiB; va != 0; va += BY2PG)
+		l2[L2X(va)] = PADDR(va) | L2AP(Krw) | Small | L2ptedramattrs;
+	/* map high vectors page to 0; must match attributes of KZERO->0 map */
+	l2[L2X(HVECTORS)] = PHYSDRAM | L2AP(Krw) | Small | L2ptedramattrs;
+	coherence();
+	l1[L1X(HVECTORS)] = pa | Dom0 | Coarse;	/* l1 -> ttb-machsize-4k */
+
+	/* make kernel text unwritable */
+	for(va = KTZERO; va < (ulong)etext; va += BY2PG)
+		*l2pteaddr(l1, va) |= L2apro;
+
+	allcache->wbinv();
+	mmuinvalidate();
+
+	m->mmul1 = l1;
+	coherence();
+	mmul1empty();
+	coherence();
+//	mmudump(l1);			/* DEBUG */
+}
+
+static void
+mmul2empty(Proc* proc, int clear)
+{
+	PTE *l1;
+	Page **l2, *page;
+
+	l1 = m->mmul1;
+	l2 = &proc->mmul2;
+	for(page = *l2; page != nil; page = page->next){
+		if(clear)
+			memset(UINT2PTR(page->va), 0, BY2PG);
+		l1[page->daddr] = Fault;
+		allcache->wbse(l1, sizeof *l1);
+		l2 = &page->next;
+	}
+	*l2 = proc->mmul2cache;
+	proc->mmul2cache = proc->mmul2;
+	proc->mmul2 = nil;
+}
+
+static void
+mmul1empty(void)
+{
+#ifdef notdef
+/* there's a bug in here */
+	PTE *l1;
+
+	/* clean out any user mappings still in l1 */
+	if(m->mmul1lo > L1lo){
+		if(m->mmul1lo == 1)
+			m->mmul1[L1lo] = Fault;
+		else
+			memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE));
+		m->mmul1lo = L1lo;
+	}
+	if(m->mmul1hi < L1hi){
+		l1 = &m->mmul1[m->mmul1hi];
+		if((L1hi - m->mmul1hi) == 1)
+			*l1 = Fault;
+		else
+			memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE));
+		m->mmul1hi = L1hi;
+	}
+#else
+	memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE));
+#endif /* notdef */
+	allcache->wbse(&m->mmul1[L1lo], (L1hi - L1lo)*sizeof(PTE));
+}
+
+void
+mmuswitch(Proc* proc)
+{
+	int x;
+	PTE *l1;
+	Page *page;
+
+	/* do kprocs get here and if so, do they need to? */
+	if(m->mmupid == proc->pid && !proc->newtlb)
+		return;
+	m->mmupid = proc->pid;
+
+	/* write back dirty and invalidate caches */
+	l1cache->wbinv();
+
+	if(proc->newtlb){
+		mmul2empty(proc, 1);
+		proc->newtlb = 0;
+	}
+
+	mmul1empty();
+
+	/* move in new map */
+	l1 = m->mmul1;
+	for(page = proc->mmul2; page != nil; page = page->next){
+		x = page->daddr;
+		l1[x] = PPN(page->pa)|Dom0|Coarse;
+		/* know here that L1lo < x < L1hi */
+		if(x+1 - m->mmul1lo < m->mmul1hi - x)
+			m->mmul1lo = x+1;
+		else
+			m->mmul1hi = x;
+	}
+
+	/* make sure map is in memory */
+	/* could be smarter about how much? */
+	allcache->wbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+	/* lose any possible stale tlb entries */
+	mmuinvalidate();
+
+	//print("mmuswitch l1lo %d l1hi %d %d\n",
+	//	m->mmul1lo, m->mmul1hi, proc->kp);
+
+	wakewfi();		/* in case there's another runnable proc */
+}
+
+void
+flushmmu(void)
+{
+	int s;
+
+	s = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(s);
+}
+
+void
+mmurelease(Proc* proc)
+{
+	Page *page, *next;
+
+	/* write back dirty and invalidate caches */
+	l1cache->wbinv();
+
+	mmul2empty(proc, 0);
+	for(page = proc->mmul2cache; page != nil; page = next){
+		next = page->next;
+		if(--page->ref)
+			panic("mmurelease: page->ref %d", page->ref);
+		pagechainhead(page);
+	}
+	if(proc->mmul2cache && palloc.r.p)
+		wakeup(&palloc.r);
+	proc->mmul2cache = nil;
+
+	mmul1empty();
+
+	/* make sure map is in memory */
+	/* could be smarter about how much? */
+	allcache->wbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
+
+	/* lose any possible stale tlb entries */
+	mmuinvalidate();
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page* page)
+{
+	int x;
+	Page *pg;
+	PTE *l1, *pte;
+
+	x = L1X(va);
+	l1 = &m->mmul1[x];
+	if (Debug) {
+		iprint("putmmu(%#p, %#p, %#p) ", va, pa, page->pa);
+		iprint("mmul1 %#p l1 %#p *l1 %#ux x %d pid %ld\n",
+			m->mmul1, l1, *l1, x, up->pid);
+		if (*l1)
+			panic("putmmu: old l1 pte non-zero; stuck?");
+	}
+	if(*l1 == Fault){
+		/* wasteful - l2 pages only have 256 entries - fix */
+		if(up->mmul2cache == nil){
+			/* auxpg since we don't need much? memset if so */
+			pg = newpage(1, 0, 0);
+			pg->va = VA(kmap(pg));
+		}
+		else{
+			pg = up->mmul2cache;
+			up->mmul2cache = pg->next;
+			memset(UINT2PTR(pg->va), 0, BY2PG);
+		}
+		pg->daddr = x;
+		pg->next = up->mmul2;
+		up->mmul2 = pg;
+
+		/* force l2 page to memory */
+		allcache->wbse((void *)pg->va, BY2PG);
+
+		*l1 = PPN(pg->pa)|Dom0|Coarse;
+		allcache->wbse(l1, sizeof *l1);
+
+		if (Debug)
+			iprint("l1 %#p *l1 %#ux x %d pid %ld\n", l1, *l1, x, up->pid);
+
+		if(x >= m->mmul1lo && x < m->mmul1hi){
+			if(x+1 - m->mmul1lo < m->mmul1hi - x)
+				m->mmul1lo = x+1;
+			else
+				m->mmul1hi = x;
+		}
+	}
+	pte = UINT2PTR(KADDR(PPN(*l1)));
+	if (Debug) {
+		iprint("pte %#p index %ld was %#ux\n", pte, L2X(va), *(pte+L2X(va)));
+		if (*(pte+L2X(va)))
+			panic("putmmu: old l2 pte non-zero; stuck?");
+	}
+
+	/* protection bits are
+	 *	PTERONLY|PTEVALID;
+	 *	PTEWRITE|PTEVALID;
+	 *	PTEWRITE|PTEUNCACHED|PTEVALID;
+	 */
+	x = Small;
+	if(!(pa & PTEUNCACHED))
+		x |= L2ptedramattrs;
+	if(pa & PTEWRITE)
+		x |= L2AP(Urw);
+	else
+		x |= L2AP(Uro);
+	pte[L2X(va)] = PPN(pa)|x;
+	allcache->wbse(&pte[L2X(va)], sizeof pte[0]);
+
+	/* clear out the current entry */
+	mmuinvalidateaddr(PPN(va));
+
+	/*  write back dirty entries - we need this because the pio() in
+	 *  fault.c is writing via a different virt addr and won't clean
+	 *  its changes out of the dcache.  Page coloring doesn't work
+	 *  on this mmu because the virtual cache is set associative
+	 *  rather than direct mapped.
+	 */
+	l1cache->wb();
+
+	if(page->cachectl[0] == PG_TXTFLUSH){
+		/* pio() sets PG_TXTFLUSH whenever a text pg has been written */
+		cacheiinv();
+		page->cachectl[0] = PG_NOFLUSH;
+	}
+	if (Debug)
+		iprint("putmmu %#p %#p %#p\n", va, pa, PPN(pa)|x);
+}
+
+void*
+mmuuncache(void* v, usize size)
+{
+	int x;
+	PTE *pte;
+	uintptr va;
+
+	/*
+	 * Simple helper for ucalloc().
+	 * Uncache a Section, must already be
+	 * valid in the MMU.
+	 */
+	va = PTR2UINT(v);
+	assert(!(va & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if((*pte & (Section|Coarse)) != Section)
+		return nil;
+	*pte &= ~L1ptedramattrs;
+	*pte |= L1sharable;
+	mmuinvalidateaddr(va);
+	allcache->wbse(pte, 4);
+
+	return v;
+}
+
+uintptr
+mmukmap(uintptr va, uintptr pa, usize size)
+{
+	int x;
+	PTE *pte;
+
+	/*
+	 * Stub.
+	 */
+	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if(*pte != Fault)
+		return 0;
+	*pte = pa|Dom0|L1AP(Krw)|Section;
+	mmuinvalidateaddr(va);
+	allcache->wbse(pte, 4);
+
+	return va;
+}
+
+uintptr
+mmukunmap(uintptr va, uintptr pa, usize size)
+{
+	int x;
+	PTE *pte;
+
+	/*
+	 * Stub.
+	 */
+	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
+
+	x = L1X(va);
+	pte = &m->mmul1[x];
+	if(*pte != (pa|Dom0|L1AP(Krw)|Section))
+		return 0;
+	*pte = Fault;
+	mmuinvalidateaddr(va);
+	allcache->wbse(pte, 4);
+
+	return va;
+}
+
+/*
+ * Return the number of bytes that can be accessed via KADDR(pa).
+ * If pa is not a valid argument to KADDR, return 0.
+ */
+uintptr
+cankaddr(uintptr pa)
+{
+	if((PHYSDRAM == 0 || pa >= PHYSDRAM) && pa < PHYSDRAM+memsize)
+		return PHYSDRAM+memsize - pa;
+	return 0;
+}
+
+/* from 386 */
+void*
+vmap(uintptr pa, usize size)
+{
+	uintptr pae, va;
+	usize o, osize;
+
+	/*
+	 * XXX - replace with new vm stuff.
+	 * Crock after crock - the first 4MB is mapped with 2MB pages
+	 * so catch that and return good values because the current mmukmap
+	 * will fail.
+	 */
+	if(pa+size < 4*MiB)
+		return UINT2PTR(kseg0|pa);
+
+	osize = size;
+	o = pa & (BY2PG-1);
+	pa -= o;
+	size += o;
+	size = ROUNDUP(size, BY2PG);
+
+	va = kseg0|pa;
+	pae = mmukmap(va, pa, size);
+	if(pae == 0 || pae-size != pa)
+		panic("vmap(%#p, %ld) called from %#p: mmukmap fails %#p",
+			pa+o, osize, getcallerpc(&pa), pae);
+
+	return UINT2PTR(va+o);
+}
+
+/* from 386 */
+void
+vunmap(void* v, usize size)
+{
+	/*
+	 * XXX - replace with new vm stuff.
+	 * Can't do this until do real vmap for all space that
+	 * might be used, e.g. stuff below 1MB which is currently
+	 * mapped automagically at boot but that isn't used (or
+	 * at least shouldn't be used) by the kernel.
+	upafree(PADDR(v), size);
+	 */
+	USED(v, size);
+}
+
+/*
+ * Notes.
+ * Everything is in domain 0;
+ * domain 0 access bits in the DAC register are set
+ * to Client, which means access is controlled by the
+ * permission values set in the PTE.
+ *
+ * L1 access control for the kernel is set to 1 (RW,
+ * no user mode access);
+ * L2 access control for the kernel is set to 1 (ditto)
+ * for all 4 AP sets;
+ * L1 user mode access is never set;
+ * L2 access control for user mode is set to either
+ * 2 (RO) or 3 (RW) depending on whether text or data,
+ * for all 4 AP sets.
+ * (To get kernel RO set AP to 0 and S bit in control
+ * register c1).
+ * Coarse L1 page-tables are used. They have 256 entries
+ * and so consume 1024 bytes per table.
+ * Small L2 page-tables are used. They have 1024 entries
+ * and so consume 4096 bytes per table.
+ *
+ * 4KiB. That's the size of 1) a page, 2) the
+ * size allocated for an L2 page-table page (note only 1KiB
+ * is needed per L2 page - to be dealt with later) and
+ * 3) the size of the area in L1 needed to hold the PTEs
+ * to map 1GiB of user space (0 -> 0x3fffffff, 1024 entries).
+ */

+ 4 - 0
sys/src/9/teg2/notes/assumes-hz-under-1000

@@ -0,0 +1,4 @@
+./dat.h:9: #define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+./random.c:87: 	addclock0link(randomclock, 1000/HZ);
+../port/portclock.c:255: 		ms = 1000/HZ;
+../port/portfns.h:335: #define		TK2MS(x) ((x)*(1000/HZ))

+ 41 - 0
sys/src/9/teg2/notes/bug.rfe

@@ -0,0 +1,41 @@
+/*
+ * return from user-mode exception.
+ * expects new SPSR in R0.  R13 must point to ureg->type.
+ */
+_rfue:
+TEXT rfue(SB), 1, $-4
+//	CPSID
+//	BIC	$PsrMbz, R0		/* force little-endian upon return */
+	MOVW	R0, SPSR		/* ... */
+
+	/*
+	 * order on stack is type, psr, pc, but RFEV7 needs pc, psr.
+	 * step on type and previous word to hold temporary values.
+	 * we could instead change the order in which psr & pc are pushed.
+	 */
+	MOVW	4(R13), R1		/* psr */
+	MOVW	8(R13), R2		/* pc */
+	MOVW	R2, 4(R13)		/* pc */
+	MOVW	R1, 8(R13)		/* psr */
+
+	MOVM.DB.S (R13), [R0-R14]	/* restore user registers */
+	ADD	$4, R13			/* pop type, sp -> pc */
+
+#ifdef OLDWAY
+	ADD	$(2*4), R13		/* pop past ureg->{type+psr} to pc */
+	/*
+	 * this used to work on arm arch v[567] and still works on cpu 0.
+	 * for some reason it sometimes sets PsrBigend on cpu 1.
+	 * Ureg's tail was:
+	 *
+	 * typedef struct Ureg {
+	 * 	⋯
+	 * 	ulong	type;	/* of exception */
+	 * 	ulong	psr;
+	 * 	ulong	pc;	/* interrupted addr */
+	 * } Ureg;
+	 */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+#endif
+//	SETEND(0)
+	RFEV7W(13)

+ 59 - 0
sys/src/9/teg2/notes/byte-order

@@ -0,0 +1,59 @@
+static void
+forcele(void)
+{
+#ifdef BIGENDCHECK
+	union {
+		ulong 	ul;
+		uchar	uc[sizeof(ulong)];
+	} u;
+
+	u.ul = 0;
+	coherence();
+	u.uc[0] = 1;
+	coherence();
+	if (u.ul == 1)
+		return;
+
+	emerge('?');
+	emerge('e');
+	if ((u.ul & MASK(8)) == 0) {
+		emerge('B');
+		panic("rdbaseticks: cpu%d is big-endian", m->machno);
+	} else {
+		emerge('W');
+		panic("rdbaseticks: cpu%d is whacked-endian", m->machno);
+	}
+#endif
+}
+
+void
+ckbigendian(char *state)
+{
+	int wrong;
+
+	wrong = 0;
+	if (getpsr() & PsrBigend) {
+		setendlittle();
+		wrong++;
+		wave('?');
+		wave('e');
+		wave('p');
+		if (state == nil)
+			state = "running";
+		iprint("cpu%d: %s in big-endian mode\n", m->machno, state);
+	}
+	if (controlget() & CpCee) {
+		wrong++;
+		wave('?');
+		wave('e');
+		wave('e');
+		if (state == nil)
+			state = "running";
+		iprint("cpu%d: %s with big-endian exceptions\n", m->machno, state);
+	}
+	if (wrong) {
+		dumpstack();
+		delay(3000);
+		panic("cpu%d: big-endian", m->machno);
+	}
+}

+ 19 - 0
sys/src/9/teg2/notes/clks

@@ -0,0 +1,19 @@
+see §5.4.40 (p.142) pllx_* (2 regs)
+
+out of u-boot, these are the settings:
+---
+pllx	base 0x4003e80c:
+		enabled, no locked
+		divp == 0 (post divider == 2^0 == 1)
+		divn == 1000 (feedback divider)
+		divm == 12 (input divider)
+	misc 0x100: pllx_cpcon == 1		[ should be 12 ]
+super cclk divider 0x80000000:
+	enabled
+	dividend == 0 (thus 1)
+	divisor == 0 (thus 1)
+super sclk divider 0x0:
+	disabled
+	dividend == 0 (thus 1)
+	divisor == 0 (thus 1)
+---

+ 22 - 0
sys/src/9/teg2/notes/movm.w

@@ -0,0 +1,22 @@
+gorka writes:
+---
+I have userspace on the gumstix [xscale, not omap].  The problem that
+got me in trouble was that in lexception.s (or l.s),
+
+	MOVM.DB.W [R0-R14], (R13)
+
+works differently for this architecture (and probably for others, as
+it is unclear how it should behave by reading the arm specs).  This
+happens only for kernel faults as the others (syscall, user faults)
+use MOVM.DB.W.S which uses the banked user registers.
+
+The problem is that in this arch the value of R13 saved is the value
+after R13 itself has been modified, whereas in the others (bitsy,
+pico...), it was the value before.  Adding 4*15 to the stack before
+the RFE solves the problem.
+---
+
+In fact, the 2005 ARM arch. ref. man. (ARM DDI 0100I) says, under STM (1),
+that if Rn appears in the set of registers (and isn't the first one)
+and .W is specified, the stored value of Rn is unpredictable.
+The arm v7-ar arch. ref. man. says such usage is obsolete.

+ 29 - 0
sys/src/9/teg2/notes/pci

@@ -0,0 +1,29 @@
+
+Plan 9 from Bell Labs
+
+127 holes free 213327872 bytes free
+l1: int split i&d, 4 ways 256 sets 32 bytes/line; can WB; can write-allocate; l1 I policy VIPT
+l2: ext unified,   8 ways 512 sets 32 bytes/line; can WT; can WB; can write-allocate
+fp: arm arch VFPv3+ with null subarch
+1000 mips (single-issue), 1980 mips (dual-issue)
+cpu0: 1000MHz ARM Cortex-A9
+pci: 0x80000000: nvidia, rev 0xa0 class 0x060000 misc 0x00010008
+		cfg sp	 ecfg sp  downstream pref.  !pref.
+axi bar sz	00000100 00000100 00000010 00010000 00010000 00000000 
+axi bar start	80004000 80104000 80400000 a0000000 90000000 00000000
+fcpi bar	fdff0000 fe100000 fdfc0000 00a00001 00900001 00000000
+cache bar	00000000 00000000 00000000 00000000
+		00000000 fc000000
+msi bar		00000000 00000000 00000000
+		00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00008e05 00000000 00000001 00000004 a0024001
+00000000 00007fff 0000003f 00000000 00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000 00000840 00000000 00103020 00000000
+3f3f003f 00000332 00000000 00100000 00000009 00000000 00000009 00000001
+00000000 00000000 00000001 
+panic: external abort 0x8 pc 0xc0486b40 addr 0x8000392c
+cpu0: exiting
+archreboot: reset!
+
+

+ 78 - 0
sys/src/9/teg2/notes/pci.2.buses

@@ -0,0 +1,78 @@
+
+Plan 9 from Bell Labs
+
+127 holes free 213327872 bytes free
+l1: int split i&d, 4 ways 256 sets 32 bytes/line; can WB; can write-allocate; l1 I policy VIPT
+l2: ext unified,   8 ways 512 sets 32 bytes/line; can WT; can WB; can write-allocate
+fp: arm arch VFPv3+ with null subarch
+1000 mips (single-issue), 1980 mips (dual-issue)
+cpu0: 1000MHz ARM Cortex-A9
+pci: 0x80000000: nvidia, rev 0xa0 class 0x060000 misc 0x00010008
+pci->ioaddrs  0x20000101
+pci->ioaddrhi 0x80408040
+pci->memaddrs 0x8ff09000
+scanning pci bus 0...tbdf 0xc000000 probe 0x80104000 failed
+tbdf 0xc000800 probe 0x80104800 failed
+tbdf 0xc001000 probe 0x80105000 failed
+tbdf 0xc001800 probe 0x80105800 failed
+tbdf 0xc002000 probe 0x80106000 failed
+tbdf 0xc002800 probe 0x80106800 failed
+tbdf 0xc003000 probe 0x80107000 failed
+tbdf 0xc003800 probe 0x80107800 failed
+tbdf 0xc004000 probe 0x80108000 failed
+tbdf 0xc004800 probe 0x80108800 failed
+tbdf 0xc005000 probe 0x80109000 failed
+tbdf 0xc005800 probe 0x80109800 failed
+tbdf 0xc006000 probe 0x8010a000 failed
+tbdf 0xc006800 probe 0x8010a800 failed
+tbdf 0xc007000 probe 0x8010b000 failed
+tbdf 0xc007800 probe 0x8010b800 failed
+scanning pci bus 1...tbdf 0xc010000 probe 0x80114000 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114010 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114014 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x80114018 ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x8011401c ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114020 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010000 probe 0x80114024 ok
+tbdf 0xc010800 probe 0x80114800 failed
+tbdf 0xc011000 probe 0x80115000 failed
+tbdf 0xc011800 probe 0x80115800 failed
+tbdf 0xc012000 probe 0x80116000 failed
+tbdf 0xc012800 probe 0x80116800 failed
+tbdf 0xc013000 probe 0x80117000 failed
+tbdf 0xc013800 probe 0x80117800 failed
+tbdf 0xc014000 probe 0x80118000 failed
+tbdf 0xc014800 probe 0x80118800 failed
+tbdf 0xc015000 probe 0x80119000 failed
+tbdf 0xc015800 probe 0x80119800 failed
+tbdf 0xc016000 probe 0x8011a000 failed
+tbdf 0xc016800 probe 0x8011a800 failed
+tbdf 0xc017000 probe 0x8011b000 failed
+tbdf 0xc017800 probe 0x8011b800 failed
+scanning pci bus 2...tbdf 0xc020000 probe 0x80124000 failed
+tbdf 0xc020800 probe 0x80124800 
+

BIN
sys/src/9/teg2/nvram


+ 853 - 0
sys/src/9/teg2/pci.c

@@ -0,0 +1,853 @@
+/*
+ * PCI support code.
+ * Needs a massive rewrite.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#define DBG	if(0) pcilog
+
+typedef struct Pci Pci;
+
+struct
+{
+	char	output[PCICONSSIZE];
+	int	ptr;
+}PCICONS;
+
+int
+pcilog(char *fmt, ...)
+{
+	int n;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	memmove(PCICONS.output+PCICONS.ptr, buf, n);
+	PCICONS.ptr += n;
+	return n;
+}
+
+enum
+{
+	MaxFNO		= 7,
+	MaxUBN		= 255,
+};
+
+enum
+{					/* command register */
+	IOen		= (1<<0),
+	MEMen		= (1<<1),
+	MASen		= (1<<2),
+	MemWrInv	= (1<<4),
+	PErrEn		= (1<<6),
+	SErrEn		= (1<<8),
+};
+
+typedef struct {
+	ulong	cap;
+	ulong	ctl;
+} Capctl;
+typedef struct {
+	Capctl	dev;
+	Capctl	link;
+	Capctl	slot;
+} Devlinkslot;
+
+/* capability list id 0x10 is pci-e */
+struct Pci {
+	/* pci-compatible config */
+	/* what io.h calls type 0 & type 1 pre-defined header */
+	ulong	id;
+	ulong	cs;
+	ulong	revclass;
+	ulong	misc;	/* cache line size, latency timer, header type, bist */
+	ulong	bar[2];		/* always 0 on tegra 2 */
+
+	/* types 1 & 2 pre-defined header */
+	ulong	bus;
+	ulong	ioaddrs;
+	ulong	memaddrs;
+	ulong	prefmem;
+	ulong	prefbasehi;
+	ulong	preflimhi;
+	/* type 2 pre-defined header only */
+	ulong	ioaddrhi;
+	ulong	cfgcapoff;	/* offset in cfg. space to cap. list (0x40) */
+	ulong	rom;
+	ulong	intr;		/* PciINT[LP] */
+	/* subsystem capability regs */
+	ulong	subsysid;
+	ulong	subsyscap;
+	/* */
+
+	Capctl	pwrmgmt;
+
+	/* msi */
+	ulong	msictlcap;
+	ulong	msimsgaddr[2];	/* little-endian */
+	ulong	msimsgdata;
+
+	/* pci-e cap. */
+	uchar	_pad0[0x80-0x60];
+	ulong	pciecap;
+	Devlinkslot port0;
+	ulong	rootctl;
+	ulong	rootsts;
+	Devlinkslot port1;
+
+	/* 0xbc */
+	
+};
+
+enum {
+	/* offsets from soc.pci */
+	Port0		= 0,
+	Port1		= 0x1000,
+	Pads		= 0x3000,
+	Afi		= 0x3800,
+	Aficfg		= Afi + 0xac,
+	Cfgspace	= 0x4000,
+	Ecfgspace	= 0x104000,
+
+	/* cs bits */
+	Iospace		= 1<<0,
+	Memspace	= 1<<1,
+	Busmaster	= 1<<2,
+
+	/* Aficfg bits */
+	Fpcion		= 1<<0,
+};
+
+struct Pcictlr {
+	union {
+		uchar	_padpci[0x1000];
+		Pci;
+	} ports[2];
+	uchar	_padpads[0x1000];
+	uchar	pads[0x800];
+	uchar	afi[0x800];
+	ulong	cfg[0x1000];
+	ulong	extcfg[0x1000];
+};
+
+static Lock pcicfglock;
+static Lock pcicfginitlock;
+static int pcicfgmode = -1;
+static int pcimaxbno = 1;  /* was 7; only 2 pci buses; touching 3rd hangs */
+static int pcimaxdno;
+static Pcidev* pciroot;
+static Pcidev* pcilist;
+static Pcidev* pcitail;
+
+static int pcicfgrw8(int, int, int, int);
+static int pcicfgrw16(int, int, int, int);
+static int pcicfgrw32(int, int, int, int);
+
+static char* bustypes[] = {
+	"CBUSI",
+	"CBUSII",
+	"EISA",
+	"FUTURE",
+	"INTERN",
+	"ISA",
+	"MBI",
+	"MBII",
+	"MCA",
+	"MPI",
+	"MPSA",
+	"NUBUS",
+	"PCI",
+	"PCMCIA",
+	"TC",
+	"VL",
+	"VME",
+	"XPRESS",
+};
+
+static int
+tbdffmt(Fmt* fmt)
+{
+	char *p;
+	int l, r;
+	uint type, tbdf;
+
+	if((p = malloc(READSTR)) == nil)
+		return fmtstrcpy(fmt, "(tbdfconv)");
+
+	switch(fmt->r){
+	case 'T':
+		tbdf = va_arg(fmt->args, int);
+		if(tbdf == BUSUNKNOWN)
+			snprint(p, READSTR, "unknown");
+		else{
+			type = BUSTYPE(tbdf);
+			if(type < nelem(bustypes))
+				l = snprint(p, READSTR, bustypes[type]);
+			else
+				l = snprint(p, READSTR, "%d", type);
+			snprint(p+l, READSTR-l, ".%d.%d.%d",
+				BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf));
+		}
+		break;
+
+	default:
+		snprint(p, READSTR, "(tbdfconv)");
+		break;
+	}
+	r = fmtstrcpy(fmt, p);
+	free(p);
+
+	return r;
+}
+
+ulong
+pcibarsize(Pcidev *p, int rno)
+{
+	ulong v, size;
+
+	v = pcicfgrw32(p->tbdf, rno, 0, 1);
+	pcicfgrw32(p->tbdf, rno, 0xFFFFFFF0, 0);
+	size = pcicfgrw32(p->tbdf, rno, 0, 1);
+	if(v & 1)
+		size |= 0xFFFF0000;
+	pcicfgrw32(p->tbdf, rno, v, 0);
+
+	return -(size & ~0x0F);
+}
+
+static int
+pcilscan(int bno, Pcidev** list)
+{
+	Pcidev *p, *head, *tail;
+	int dno, fno, i, hdt, l, maxfno, maxubn, rno, sbn, tbdf, ubn;
+
+	maxubn = bno;
+	head = nil;
+	tail = nil;
+	for(dno = 0; dno <= pcimaxdno; dno++){
+		maxfno = 0;
+		for(fno = 0; fno <= maxfno; fno++){
+			/*
+			 * For this possible device, form the
+			 * bus+device+function triplet needed to address it
+			 * and try to read the vendor and device ID.
+			 * If successful, allocate a device struct and
+			 * start to fill it in with some useful information
+			 * from the device's configuration space.
+			 */
+			tbdf = MKBUS(BusPCI, bno, dno, fno);
+			l = pcicfgrw32(tbdf, PciVID, 0, 1);
+			if(l == 0xFFFFFFFF || l == 0)
+				continue;
+			p = malloc(sizeof(*p));
+			if(p == nil)
+				panic("pcilscan: no memory");
+			p->tbdf = tbdf;
+			p->vid = l;
+			p->did = l>>16;
+
+			if(pcilist != nil)
+				pcitail->list = p;
+			else
+				pcilist = p;
+			pcitail = p;
+
+			p->pcr = pcicfgr16(p, PciPCR);
+			p->rid = pcicfgr8(p, PciRID);
+			p->ccrp = pcicfgr8(p, PciCCRp);
+			p->ccru = pcicfgr8(p, PciCCRu);
+			p->ccrb = pcicfgr8(p, PciCCRb);
+			p->cls = pcicfgr8(p, PciCLS);
+			p->ltr = pcicfgr8(p, PciLTR);
+
+			p->intl = pcicfgr8(p, PciINTL);
+
+			/*
+			 * If the device is a multi-function device adjust the
+			 * loop count so all possible functions are checked.
+			 */
+			hdt = pcicfgr8(p, PciHDT);
+			if(hdt & 0x80)
+				maxfno = MaxFNO;
+
+			/*
+			 * If appropriate, read the base address registers
+			 * and work out the sizes.
+			 */
+			switch(p->ccrb) {
+			case 0x03:		/* display controller */
+				/* fall through */
+			case 0x01:		/* mass storage controller */
+			case 0x02:		/* network controller */
+			case 0x04:		/* multimedia device */
+			case 0x07:		/* simple comm. controllers */
+			case 0x08:		/* base system peripherals */
+			case 0x09:		/* input devices */
+			case 0x0A:		/* docking stations */
+			case 0x0B:		/* processors */
+			case 0x0C:		/* serial bus controllers */
+				if((hdt & 0x7F) != 0)
+					break;
+				rno = PciBAR0 - 4;
+				for(i = 0; i < nelem(p->mem); i++) {
+					rno += 4;
+					p->mem[i].bar = pcicfgr32(p, rno);
+					p->mem[i].size = pcibarsize(p, rno);
+				}
+				break;
+
+			case 0x00:
+			case 0x05:		/* memory controller */
+			case 0x06:		/* bridge device */
+			default:
+				break;
+			}
+
+			if(head != nil)
+				tail->link = p;
+			else
+				head = p;
+			tail = p;
+		}
+	}
+
+	*list = head;
+	for(p = head; p != nil; p = p->link){
+		/*
+		 * Find PCI-PCI bridges and recursively descend the tree.
+		 */
+		if(p->ccrb != 0x06 || p->ccru != 0x04)
+			continue;
+
+		/*
+		 * If the secondary or subordinate bus number is not
+		 * initialised try to do what the PCI BIOS should have
+		 * done and fill in the numbers as the tree is descended.
+		 * On the way down the subordinate bus number is set to
+		 * the maximum as it's not known how many buses are behind
+		 * this one; the final value is set on the way back up.
+		 */
+		sbn = pcicfgr8(p, PciSBN);
+		ubn = pcicfgr8(p, PciUBN);
+
+		if(sbn == 0 || ubn == 0) {
+			sbn = maxubn+1;
+			/*
+			 * Make sure memory, I/O and master enables are
+			 * off, set the primary, secondary and subordinate
+			 * bus numbers and clear the secondary status before
+			 * attempting to scan the secondary bus.
+			 *
+			 * Initialisation of the bridge should be done here.
+			 */
+			pcicfgw32(p, PciPCR, 0xFFFF0000);
+			l = (MaxUBN<<16)|(sbn<<8)|bno;
+			pcicfgw32(p, PciPBN, l);
+			pcicfgw16(p, PciSPSR, 0xFFFF);
+			maxubn = pcilscan(sbn, &p->bridge);
+			l = (maxubn<<16)|(sbn<<8)|bno;
+
+			pcicfgw32(p, PciPBN, l);
+		}
+		else {
+			if(ubn > maxubn)
+				maxubn = ubn;
+			pcilscan(sbn, &p->bridge);
+		}
+	}
+
+	return maxubn;
+}
+
+extern void rtl8169interrupt(Ureg*, void* arg);
+
+/* not used yet */
+static void
+pciintr(Ureg *ureg, void *p)
+{
+	rtl8169interrupt(ureg, p);		/* HACK */
+}
+
+static void
+pcicfginit(void)
+{
+	char *p;
+	Pci *pci = (Pci *)soc.pci;
+	Pcidev **list;
+	int bno, n;
+
+	lock(&pcicfginitlock);
+	if(pcicfgmode != -1) {
+		unlock(&pcicfginitlock);
+		return;
+	}
+
+	/*
+	 * TrimSlice # pci 0 1
+	 * Scanning PCI devices on bus 0 1
+	 * BusDevFun  VendorId   DeviceId   Device Class       Sub-Class
+	 * _____________________________________________________________
+	 * 00.00.00   0x10de     0x0bf0     Bridge device           0x04
+	 * 01.00.00   0x10ec     0x8168     Network controller      0x00
+	 *
+	 * thus pci bus 0 has a bridge with, perhaps, an ide/sata ctlr behind,
+	 * and pci bus 1 has the realtek 8169 on it:
+	 *
+	 * TrimSlice # pci 1 long
+	 * Scanning PCI devices on bus 1
+	 *
+	 * Found PCI device 01.00.00:
+	 *   vendor ID =                   0x10ec
+	 *   device ID =                   0x8168
+	 *   command register =            0x0007
+	 *   status register =             0x0010
+	 *   revision ID =                 0x03
+	 *   class code =                  0x02 (Network controller)
+	 *   sub class code =              0x00
+	 *   programming interface =       0x00
+	 *   cache line =                  0x08
+	 *   base address 0 =              0x80400001		config
+	 *   base address 1 =              0x00000000		(ext. config)
+	 *   base address 2 =              0xa000000c		"downstream"
+	 *   base address 3 =              0x00000000		(prefetchable)
+	 *   base address 4 =              0xa000400c		not "
+	 *   base address 5 =              0x00000000		(unused)
+	 */
+	n = pci->id >> 16;
+	if (((pci->id & MASK(16)) != Vnvidia || (n != 0xbf0 && n != 0xbf1)) &&
+	     (pci->id & MASK(16)) != Vrealtek) {
+		print("no pci controller at %#p\n", pci);
+		unlock(&pcicfginitlock);
+		return;
+	}
+	if (0)
+		iprint("pci: %#p: nvidia, rev %#ux class %#6.6lux misc %#8.8lux\n",
+			pci, (uchar)pci->revclass, pci->revclass >> 8,
+			pci->misc);
+
+	pci->cs &= Iospace;
+	pci->cs |= Memspace | Busmaster;
+	coherence();
+
+	pcicfgmode = 1;
+//	pcimaxdno = 31;
+	pcimaxdno = 15;			/* for trimslice */
+
+	fmtinstall('T', tbdffmt);
+
+	if(p = getconf("*pcimaxbno")){
+		n = strtoul(p, 0, 0);
+		if(n < pcimaxbno)
+			pcimaxbno = n;
+	}
+	if(p = getconf("*pcimaxdno")){
+		n = strtoul(p, 0, 0);
+		if(n < pcimaxdno)
+			pcimaxdno = n;
+	}
+
+	list = &pciroot;
+	/* was bno = 0; trimslice needs to start at 1 */
+	for(bno = 1; bno <= pcimaxbno; bno++) {
+		bno = pcilscan(bno, list);
+		while(*list)
+			list = &(*list)->link;
+	}
+	unlock(&pcicfginitlock);
+
+	if(getconf("*pcihinv"))
+		pcihinv(nil);
+}
+
+enum {
+	Afiintrcode	= 0xb8,
+};
+
+void
+pcieintrdone(void)				/* dismiss pci-e intr */
+{
+	ulong *afi;
+
+	afi = (ulong *)(soc.pci + Afi);
+	afi[Afiintrcode/sizeof *afi] = 0;	/* magic */
+	coherence();
+}
+
+/*
+ * whole config space for tbdf should be at (return address - rno).
+ */
+static void *
+tegracfgaddr(int tbdf, int rno)
+{
+	uintptr addr;
+
+	addr = soc.pci + (rno < 256? Cfgspace: Ecfgspace) + BUSBDF(tbdf) + rno;
+//	if (BUSBNO(tbdf) == 1)
+//		addr += Port1;
+	return (void *)addr;
+}
+
+static int
+pcicfgrw8(int tbdf, int rno, int data, int read)
+{
+	int x;
+	void *addr;
+
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	x = -1;
+	if(BUSDNO(tbdf) > pcimaxdno)
+		return x;
+
+	addr = tegracfgaddr(tbdf, rno);
+
+	lock(&pcicfglock);
+	if(read)
+		x = *(uchar *)addr;
+	else
+		*(uchar *)addr = data;
+	unlock(&pcicfglock);
+
+	return x;
+}
+
+int
+pcicfgr8(Pcidev* pcidev, int rno)
+{
+	return pcicfgrw8(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw8(Pcidev* pcidev, int rno, int data)
+{
+	pcicfgrw8(pcidev->tbdf, rno, data, 0);
+}
+
+static int
+pcicfgrw16(int tbdf, int rno, int data, int read)
+{
+	int x;
+	void *addr;
+
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	x = -1;
+	if(BUSDNO(tbdf) > pcimaxdno)
+		return x;
+
+	addr = tegracfgaddr(tbdf, rno);
+
+	lock(&pcicfglock);
+	if(read)
+		x = *(ushort *)addr;
+	else
+		*(ushort *)addr = data;
+	unlock(&pcicfglock);
+
+	return x;
+}
+
+int
+pcicfgr16(Pcidev* pcidev, int rno)
+{
+	return pcicfgrw16(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw16(Pcidev* pcidev, int rno, int data)
+{
+	pcicfgrw16(pcidev->tbdf, rno, data, 0);
+}
+
+static int
+pcicfgrw32(int tbdf, int rno, int data, int read)
+{
+	int x;
+	vlong v;
+	void *addr;
+
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	x = -1;
+	if(BUSDNO(tbdf) > pcimaxdno)
+		return x;
+
+	addr = tegracfgaddr(tbdf, rno);
+	v = probeaddr((uintptr)addr);
+	if (v < 0)
+		return -1;
+
+	lock(&pcicfglock);
+	if(read)
+		x = *(ulong *)addr;
+	else
+		*(ulong *)addr = data;
+	unlock(&pcicfglock);
+
+	return x;
+}
+
+int
+pcicfgr32(Pcidev* pcidev, int rno)
+{
+	return pcicfgrw32(pcidev->tbdf, rno, 0, 1);
+}
+
+void
+pcicfgw32(Pcidev* pcidev, int rno, int data)
+{
+	pcicfgrw32(pcidev->tbdf, rno, data, 0);
+}
+
+Pcidev*
+pcimatch(Pcidev* prev, int vid, int did)
+{
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	if(prev == nil)
+		prev = pcilist;
+	else
+		prev = prev->list;
+
+	while(prev != nil){
+		if((vid == 0 || prev->vid == vid)
+		&& (did == 0 || prev->did == did))
+			break;
+		prev = prev->list;
+	}
+	return prev;
+}
+
+Pcidev*
+pcimatchtbdf(int tbdf)
+{
+	Pcidev *pcidev;
+
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	for(pcidev = pcilist; pcidev != nil; pcidev = pcidev->list) {
+		if(pcidev->tbdf == tbdf)
+			break;
+	}
+	return pcidev;
+}
+
+static void
+pcilhinv(Pcidev* p)
+{
+	int i;
+	Pcidev *t;
+
+	if(p == nil) {
+		putstrn(PCICONS.output, PCICONS.ptr);
+		p = pciroot;
+		print("bus dev type vid  did intl memory\n");
+	}
+	for(t = p; t != nil; t = t->link) {
+		print("%d  %2d/%d %.2ux %.2ux %.2ux %.4ux %.4ux %3d  ",
+			BUSBNO(t->tbdf), BUSDNO(t->tbdf), BUSFNO(t->tbdf),
+			t->ccrb, t->ccru, t->ccrp, t->vid, t->did, t->intl);
+
+		for(i = 0; i < nelem(p->mem); i++) {
+			if(t->mem[i].size == 0)
+				continue;
+			print("%d:%.8lux %d ", i,
+				t->mem[i].bar, t->mem[i].size);
+		}
+		if(t->bridge)
+			print("->%d", BUSBNO(t->bridge->tbdf));
+		print("\n");
+	}
+	while(p != nil) {
+		if(p->bridge != nil)
+			pcilhinv(p->bridge);
+		p = p->link;
+	}
+}
+
+void
+pcihinv(Pcidev* p)
+{
+	if(pcicfgmode == -1)
+		pcicfginit();
+	lock(&pcicfginitlock);
+	pcilhinv(p);
+	unlock(&pcicfginitlock);
+}
+
+void
+pcireset(void)
+{
+	Pcidev *p;
+
+	if(pcicfgmode == -1)
+		pcicfginit();
+
+	for(p = pcilist; p != nil; p = p->list) {
+		/* don't mess with the bridges */
+		if(p->ccrb == 0x06)
+			continue;
+		pciclrbme(p);
+	}
+}
+
+void
+pcisetioe(Pcidev* p)
+{
+	p->pcr |= IOen;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrioe(Pcidev* p)
+{
+	p->pcr &= ~IOen;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pcisetbme(Pcidev* p)
+{
+	p->pcr |= MASen;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrbme(Pcidev* p)
+{
+	p->pcr &= ~MASen;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pcisetmwi(Pcidev* p)
+{
+	p->pcr |= MemWrInv;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+void
+pciclrmwi(Pcidev* p)
+{
+	p->pcr &= ~MemWrInv;
+	pcicfgw16(p, PciPCR, p->pcr);
+}
+
+static int
+pcigetpmrb(Pcidev* p)
+{
+	int ptr;
+
+	if(p->pmrb != 0)
+		return p->pmrb;
+	p->pmrb = -1;
+
+	/*
+	 * If there are no extended capabilities implemented,
+	 * (bit 4 in the status register) assume there's no standard
+	 * power management method.
+	 * Find the capabilities pointer based on PCI header type.
+	 */
+	if(!(pcicfgr16(p, PciPSR) & 0x0010))
+		return -1;
+	switch(pcicfgr8(p, PciHDT)){
+	default:
+		return -1;
+	case 0:					/* all other */
+	case 1:					/* PCI to PCI bridge */
+		ptr = 0x34;
+		break;
+	case 2:					/* CardBus bridge */
+		ptr = 0x14;
+		break;
+	}
+	ptr = pcicfgr32(p, ptr);
+
+	while(ptr != 0){
+		/*
+		 * Check for validity.
+		 * Can't be in standard header and must be double
+		 * word aligned.
+		 */
+		if(ptr < 0x40 || (ptr & ~0xFC))
+			return -1;
+		if(pcicfgr8(p, ptr) == 0x01){
+			p->pmrb = ptr;
+			return ptr;
+		}
+
+		ptr = pcicfgr8(p, ptr+1);
+	}
+
+	return -1;
+}
+
+int
+pcigetpms(Pcidev* p)
+{
+	int pmcsr, ptr;
+
+	if((ptr = pcigetpmrb(p)) == -1)
+		return -1;
+
+	/*
+	 * Power Management Register Block:
+	 *  offset 0:	Capability ID
+	 *	   1:	next item pointer
+	 *	   2:	capabilities
+	 *	   4:	control/status
+	 *	   6:	bridge support extensions
+	 *	   7:	data
+	 */
+	pmcsr = pcicfgr16(p, ptr+4);
+
+	return pmcsr & 0x0003;
+}
+
+int
+pcisetpms(Pcidev* p, int state)
+{
+	int ostate, pmc, pmcsr, ptr;
+
+	if((ptr = pcigetpmrb(p)) == -1)
+		return -1;
+
+	pmc = pcicfgr16(p, ptr+2);
+	pmcsr = pcicfgr16(p, ptr+4);
+	ostate = pmcsr & 0x0003;
+	pmcsr &= ~0x0003;
+
+	switch(state){
+	default:
+		return -1;
+	case 0:
+		break;
+	case 1:
+		if(!(pmc & 0x0200))
+			return -1;
+		break;
+	case 2:
+		if(!(pmc & 0x0400))
+			return -1;
+		break;
+	case 3:
+		break;
+	}
+	pmcsr |= state;
+	pcicfgw16(p, ptr+4, pmcsr);
+
+	return ostate;
+}

+ 138 - 0
sys/src/9/teg2/random.c

@@ -0,0 +1,138 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+
+struct Rb
+{
+	QLock;
+	Rendez	producer;
+	Rendez	consumer;
+	ulong	randomcount;
+	uchar	buf[128];
+	uchar	*ep;
+	uchar	*rp;
+	uchar	*wp;
+	uchar	next;
+	uchar	wakeme;
+	ushort	bits;
+	ulong	randn;
+} rb;
+
+static int
+rbnotfull(void*)
+{
+	int i;
+
+	i = rb.rp - rb.wp;
+	return i != 1 && i != (1 - sizeof(rb.buf));
+}
+
+static int
+rbnotempty(void*)
+{
+	return rb.wp != rb.rp;
+}
+
+static void
+genrandom(void*)
+{
+	up->basepri = PriNormal;
+	up->priority = up->basepri;
+
+	for(;;){
+		for(;;)
+			if(++rb.randomcount > 100000)
+				break;
+		if(anyhigher())
+			sched();
+		if(!rbnotfull(0))
+			sleep(&rb.producer, rbnotfull, 0);
+	}
+}
+
+/*
+ *  produce random bits in a circular buffer
+ */
+static void
+randomclock(void)
+{
+	if(rb.randomcount == 0 || !rbnotfull(0))
+		return;
+
+	rb.bits = (rb.bits<<2) ^ rb.randomcount;
+	rb.randomcount = 0;
+
+	rb.next++;
+	if(rb.next != 8/2)
+		return;
+	rb.next = 0;
+
+	*rb.wp ^= rb.bits;
+	if(rb.wp+1 == rb.ep)
+		rb.wp = rb.buf;
+	else
+		rb.wp = rb.wp+1;
+
+	if(rb.wakeme)
+		wakeup(&rb.consumer);
+}
+
+void
+randominit(void)
+{
+	addclock0link(randomclock, 1000/HZ);
+	rb.ep = rb.buf + sizeof(rb.buf);
+	rb.rp = rb.wp = rb.buf;
+	kproc("genrandom", genrandom, 0);
+}
+
+/*
+ *  consume random bytes from a circular buffer
+ */
+ulong
+randomread(void *xp, ulong n)
+{
+	uchar *e, *p;
+	ulong x;
+
+	p = xp;
+
+	if(waserror()){
+		qunlock(&rb);
+		nexterror();
+	}
+
+	qlock(&rb);
+	for(e = p + n; p < e; ){
+		if(rb.wp == rb.rp){
+			rb.wakeme = 1;
+			wakeup(&rb.producer);
+			sleep(&rb.consumer, rbnotempty, 0);
+			rb.wakeme = 0;
+			continue;
+		}
+
+		/*
+		 *  beating clocks will be predictable if
+		 *  they are synchronized.  Use a cheap pseudo
+		 *  random number generator to obscure any cycles.
+		 */
+		x = rb.randn*1103515245 ^ *rb.rp;
+		*p++ = rb.randn = x;
+
+		if(rb.rp+1 == rb.ep)
+			rb.rp = rb.buf;
+		else
+			rb.rp = rb.rp+1;
+	}
+	qunlock(&rb);
+	poperror();
+
+	wakeup(&rb.producer);
+
+	return n;
+}

+ 208 - 0
sys/src/9/teg2/rebootcode.s

@@ -0,0 +1,208 @@
+/*
+ * arm v7 reboot code
+ *
+ * must fit in 11K to avoid stepping on PTEs; see mem.h.
+ * cache parameters are at CACHECONF.
+ */
+#include "arm.s"
+
+/*
+ * All caches but L1 should be off before calling this.
+ * Turn off MMU, then copy the new kernel to its correct location
+ * in physical memory.  Then jump to the start of the kernel.
+ */
+
+/* main(PADDR(entry), PADDR(code), size); */
+TEXT	main(SB), 1, $-4
+	MOVW	$setR12(SB), R12
+	MOVW	R0, p1+0(FP)		/* destination, passed in R0 */
+	CPSID				/* splhi */
+
+PUTC('R')
+	BL	cachesoff(SB)
+	/* now back in 29- or 26-bit addressing, mainly for SB */
+	/* double mapping of PHYSDRAM & KZERO now in effect */
+
+PUTC('e')
+	/* before turning MMU off, switch to PHYSDRAM-based addresses */
+	DMB
+
+	MOVW	$KSEGM, R7		/* clear segment bits */
+	MOVW	$PHYSDRAM, R0		/* set dram base bits */
+	BIC	R7, R12			/* adjust SB */
+	ORR	R0, R12
+
+	BL	_r15warp(SB)
+	/* don't care about saving R14; we're not returning */
+
+	/*
+	 * now running in PHYSDRAM segment, not KZERO.
+	 */
+
+PUTC('b')
+	/* invalidate mmu mappings */
+	MOVW	$KZERO, R0			/* some valid virtual address */
+	MTCP	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+PUTC('o')
+	/*
+	 * turn the MMU off
+	 */
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$CpCmmu, R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BARRIERS
+
+PUTC('o')
+	/* copy in arguments from stack frame before moving stack */
+	MOVW	p2+4(FP), R4		/* phys source */
+	MOVW	n+8(FP), R5		/* byte count */
+	MOVW	p1+0(FP), R6		/* phys destination */
+
+	/* set up a new stack for local vars and memmove args */
+	MOVW	R6, SP			/* tiny trampoline stack */
+	SUB	$(0x20 + 4), SP		/* back up before a.out header */
+
+//	MOVW	R14, -48(SP)		/* store return addr */
+	SUB	$48, SP			/* allocate stack frame */
+
+	MOVW	R5, 40(SP)		/* save count */
+	MOVW	R6, 44(SP)		/* save dest/entry */
+
+	/* copy the new kernel into place */
+	DELAY(printloop2, 2)
+PUTC('t')
+	MOVW	40(SP), R5		/* restore count */
+	MOVW	44(SP), R6		/* restore dest/entry */
+	MOVW	R6, 0(SP)		/* normally saved LR goes here */
+	MOVW	R6, 4(SP)		/* push dest */
+	MOVW	R6, R0
+	MOVW	R4, 8(SP)		/* push src */
+	MOVW	R5, 12(SP)		/* push size */
+	BL	memmove(SB)
+
+PUTC('-')
+PUTC('>')
+	DELAY(printloopret, 1)
+PUTC('\r')
+	DELAY(printloopnl, 1)
+PUTC('\n')
+/*
+ * jump to kernel entry point.  Note the true kernel entry point is
+ * the virtual address KZERO|R6, but this must wait until
+ * the MMU is enabled by the kernel in l.s
+ */
+	MOVW	44(SP), R6		/* restore R6 (dest/entry) */
+	ORR	R6, R6			/* NOP: avoid link bug */
+	B	(R6)
+PUTC('?')
+PUTC('?')
+	B	0(PC)
+
+/*
+ * turn the caches off, double map PHYSDRAM & KZERO, invalidate TLBs, revert
+ * to tiny addresses.  upon return, it will be safe to turn off the mmu.
+ */
+TEXT cachesoff(SB), 1, $-4
+	MOVM.DB.W [R14,R1-R10], (R13)		/* save regs on stack */
+	CPSID
+	BARRIERS
+
+	SUB	$12, SP				/* paranoia */
+	BL	cacheuwbinv(SB)
+	ADD	$12, SP				/* paranoia */
+
+	MFCP	CpSC, 0, R0, C(CpCONTROL), C(0)
+	BIC	$(CpCicache|CpCdcache), R0
+	MTCP	CpSC, 0, R0, C(CpCONTROL), C(0)	/* caches off */
+	BARRIERS
+
+	/*
+	 * caches are off
+	 */
+
+	/* invalidate stale TLBs before changing them */
+	MOVW	$KZERO, R0			/* some valid virtual address */
+	MTCP	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+	/* redo double map of PHYSDRAM, KZERO */
+	MOVW	$PHYSDRAM, R3
+	CMP	$KZERO, R3
+	BEQ	noun2map
+	MOVW	$(L1+L1X(PHYSDRAM)), R4		/* address of PHYSDRAM's PTE */
+	MOVW	$PTEDRAM, R2			/* PTE bits */
+	MOVW	$DOUBLEMAPMBS, R5
+_ptrdbl:
+	ORR	R3, R2, R1		/* first identity-map 0 to 0, etc. */
+	MOVW	R1, (R4)
+	ADD	$4, R4				/* bump PTE address */
+	ADD	$MiB, R3			/* bump pa */
+	SUB.S	$1, R5
+	BNE	_ptrdbl
+noun2map:
+
+	/*
+	 * flush stale TLB entries
+	 */
+
+	BARRIERS
+	MOVW	$KZERO, R0			/* some valid virtual address */
+	MTCP	CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv
+	BARRIERS
+
+	/* switch back to PHYSDRAM addressing, mainly for SB */
+	MOVW	$KSEGM, R7		/* clear segment bits */
+	MOVW	$PHYSDRAM, R0		/* set dram base bits */
+	BIC	R7, R12			/* adjust SB */
+	ORR	R0, R12
+	BIC	R7, SP
+	ORR	R0, SP
+
+	MOVM.IA.W (R13), [R14,R1-R10]		/* restore regs from stack */
+
+	MOVW	$KSEGM, R0		/* clear segment bits */
+	BIC	R0, R14			/* adjust link */
+	MOVW	$PHYSDRAM, R0		/* set dram base bits */
+	ORR	R0, R14
+
+	RET
+
+TEXT _r15warp(SB), 1, $-4
+	BIC	R7, R14			/* link */
+	ORR	R0, R14
+
+	BIC	R7, R13			/* SP */
+	ORR	R0, R13
+	RET
+
+TEXT panic(SB), 1, $-4		/* stub */
+PUTC('?')
+PUTC('!')
+	RET
+TEXT pczeroseg(SB), 1, $-4	/* stub */
+	RET
+
+#include "cache.v7.s"
+
+/* modifies R0, R3—R6 */
+TEXT printhex(SB), 1, $-4
+	MOVW	R0, R3
+	MOVW	$(32-4), R5	/* bits to shift right */
+nextdig:
+	SRA	R5, R3, R4
+	AND	$0xf, R4
+	ADD	$'0', R4
+	CMP.S	$'9', R4
+	BLE	nothex		/* if R4 <= 9, jump */
+	ADD	$('a'-('9'+1)), R4
+nothex:
+	PUTC(R4)
+	SUB.S	$4, R5
+	BGE	nextdig
+
+	PUTC('\r')
+	PUTC('\n')
+	DELAY(proct, 50)
+	RET

+ 129 - 0
sys/src/9/teg2/softfpu.c

@@ -0,0 +1,129 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
+{
+	/*
+	 * Called from procdevtab.read and procdevtab.write
+	 * allow user process access to the FPU registers.
+	 * This is the only FPU routine which is called directly
+	 * from the port code; it would be nice to have dynamic
+	 * creation of entries in the device file trees...
+	 */
+	USED(proc, a, n, offset, write);
+
+	return 0;
+}
+
+void
+fpunotify(Ureg*)
+{
+	/*
+	 * Called when a note is about to be delivered to a
+	 * user process, usually at the end of a system call.
+	 * Note handlers are not allowed to use the FPU so
+	 * the state is marked (after saving if necessary) and
+	 * checked in the Device Not Available handler.
+	 */
+}
+
+void
+fpunoted(void)
+{
+	/*
+	 * Called from sysnoted() via the machine-dependent
+	 * noted() routine.
+	 * Clear the flag set above in fpunotify().
+	 */
+}
+
+void
+fpusysrfork(Ureg*)
+{
+	/*
+	 * Called early in the non-interruptible path of
+	 * sysrfork() via the machine-dependent syscall() routine.
+	 * Save the state so that it can be easily copied
+	 * to the child process later.
+	 */
+}
+
+void
+fpusysrforkchild(Proc*, Ureg *, Proc*)
+{
+	/*
+	 * Called later in sysrfork() via the machine-dependent
+	 * sysrforkchild() routine.
+	 * Copy the parent FPU state to the child.
+	 */
+}
+
+void
+fpuprocsave(Proc*)
+{
+	/*
+	 * Called from sched() and sleep() via the machine-dependent
+	 * procsave() routine.
+	 * About to go in to the scheduler.
+	 * If the process wasn't using the FPU
+	 * there's nothing to do.
+	 */
+}
+
+void
+fpuprocrestore(Proc*)
+{
+	/*
+	 * The process has been rescheduled and is about to run.
+	 * Nothing to do here right now. If the process tries to use
+	 * the FPU again it will cause a Device Not Available
+	 * exception and the state will then be restored.
+	 */
+}
+
+void
+fpusysprocsetup(Proc*)
+{
+	/*
+	 * Disable the FPU.
+	 * Called from sysexec() via sysprocsetup() to
+	 * set the FPU for the new process.
+	 */
+}
+
+void
+fpuinit(void)
+{
+}
+
+int
+fpuemu(Ureg* ureg)
+{
+	int nfp;
+
+	if(waserror()){
+		splhi();
+		postnote(up, 1, up->errstr, NDebug);
+		return 1;
+	}
+	spllo();
+	nfp = fpiarm(ureg);
+	splhi();
+	poperror();
+
+	return nfp;
+}
+
+void
+fpon(void)
+{
+}
+
+void
+fpoff(void)
+{
+}

+ 366 - 0
sys/src/9/teg2/syscall.c

@@ -0,0 +1,366 @@
+/* we use l1 and l2 cache ops to help stability. */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+enum {
+	Psrsysbits = PsrMask | PsrDfiq | PsrDirq | PsrDasabt | PsrMbz,
+};
+
+typedef struct {
+	uintptr	ip;
+	Ureg*	arg0;
+	char*	arg1;
+	char	msg[ERRMAX];
+	Ureg*	old;
+	Ureg	ureg;
+} NFrame;
+
+/*
+ *   Return user to state before notify()
+ */
+static void
+noted(Ureg* cur, uintptr arg0)
+{
+	NFrame *nf;
+	Ureg *nur;
+
+	qlock(&up->debug);
+	if(arg0 != NRSTR && !up->notified){
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+	fpunoted();
+
+	nf = up->ureg;
+
+	/* sanity clause */
+	if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){
+		qunlock(&up->debug);
+		pprint("bad ureg in noted %#p\n", nf);
+		pexit("Suicide", 0);
+	}
+
+	/* don't let user change system flags */
+	nur = &nf->ureg;
+	nur->psr &= Psrsysbits;
+	nur->psr |= cur->psr & ~Psrsysbits;
+
+	memmove(cur, nur, sizeof(Ureg));
+
+	switch((int)arg0){
+	case NCONT:
+	case NRSTR:
+		if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->sp, BY2WD, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		up->ureg = nf->old;
+		qunlock(&up->debug);
+		break;
+	case NSAVE:
+		if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->sp, BY2WD, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+
+		splhi();
+		nf->arg1 = nf->msg;
+		nf->arg0 = &nf->ureg;
+		nf->ip = 0;
+		cur->sp = PTR2UINT(nf);
+		break;
+	default:
+		pprint("unknown noted arg %#p\n", arg0);
+		up->lastnote.flag = NDebug;
+		/*FALLTHROUGH*/
+	case NDFLT:
+		if(up->lastnote.flag == NDebug){ 
+			qunlock(&up->debug);
+			pprint("suicide: %s\n", up->lastnote.msg);
+		}
+		else
+			qunlock(&up->debug);
+		pexit(up->lastnote.msg, up->lastnote.flag != NDebug);
+	}
+}
+
+/*
+ *  Call user, if necessary, with note.
+ *  Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+	int l;
+	Note *n;
+	u32int s;
+	uintptr sp;
+	NFrame *nf;
+
+	if(up->procctl)
+		procctl(up);
+	if(up->nnote == 0)
+		return 0;
+
+	fpunotify(ureg);
+
+	s = spllo();
+	qlock(&up->debug);
+
+	up->notepending = 0;
+	n = &up->note[0];
+	if(strncmp(n->msg, "sys:", 4) == 0){
+		l = strlen(n->msg);
+		if(l > ERRMAX-23)	/* " pc=0x0123456789abcdef\0" */
+			l = ERRMAX-23;
+		snprint(n->msg + l, sizeof n->msg - l, " pc=%#lux", ureg->pc);
+	}
+
+	if(n->flag != NUser && (up->notified || up->notify == 0)){
+		if(n->flag == NDebug)
+			pprint("suicide: %s\n", n->msg);
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag != NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+		
+	if(up->notify == nil){
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag != NDebug);
+	}
+	if(!okaddr(PTR2UINT(up->notify), 1, 0)){
+		pprint("suicide: notify function address %#p\n", up->notify);
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	sp = ureg->sp - sizeof(NFrame);
+	if(!okaddr(sp, sizeof(NFrame), 1)){
+		qunlock(&up->debug);
+		pprint("suicide: notify stack address %#p\n", sp);
+		pexit("Suicide", 0);
+	}
+
+	nf = UINT2PTR(sp);
+	memmove(&nf->ureg, ureg, sizeof(Ureg));
+	nf->old = up->ureg;
+	up->ureg = nf;
+	memmove(nf->msg, up->note[0].msg, ERRMAX);
+	nf->arg1 = nf->msg;
+	nf->arg0 = &nf->ureg;
+	nf->ip = 0;
+
+	ureg->sp = sp;
+	ureg->pc = PTR2UINT(up->notify);
+
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &up->note[0], sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(s);
+
+	l1cache->wb();				/* is this needed? */
+	return 1;
+}
+
+void
+syscall(Ureg* ureg)
+{
+	char *e;
+	u32int s;
+	ulong sp;
+	long ret;
+	int i, scallnr;
+	vlong startns, stopns;
+
+	if(!userureg(ureg))
+		panic("syscall: from kernel: pc %#lux r14 %#lux psr %#lux",
+			ureg->pc, ureg->r14, ureg->psr);
+
+	cycles(&up->kentry);
+
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+	up->dbgreg = ureg;
+
+	scallnr = ureg->r0;
+	up->scallnr = scallnr;
+	if(scallnr == RFORK)
+		fpusysrfork(ureg);
+	spllo();
+	sp = ureg->sp;
+
+	if(up->procctl == Proc_tracesyscall){
+		/*
+		 * Redundant validaddr.  Do we care?
+		 * Tracing syscalls is not exactly a fast path...
+		 * Beware, validaddr currently does a pexit rather
+		 * than an error if there's a problem; that might
+		 * change in the future.
+		 */
+		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+		syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
+		up->procctl = Proc_stopme;
+		procctl(up);
+		if (up->syscalltrace) 
+			free(up->syscalltrace);
+		up->syscalltrace = nil;
+	}
+
+	up->nerrlab = 0;
+	ret = -1;
+	startns = todget(nil);
+
+	l1cache->wb();			/* system is more stable with this */
+	if(!waserror()){
+		if(scallnr >= nsyscall){
+			pprint("bad sys call number %d pc %#lux\n",
+				scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+
+		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+		up->s = *((Sargs*)(sp+BY2WD));
+		up->psstate = sysctab[scallnr];
+
+	/*	iprint("%s: syscall %s\n", up->text, sysctab[scallnr]?sysctab[scallnr]:"huh?"); */
+
+		ret = systab[scallnr](up->s.args);
+		poperror();
+	}else{
+		/* failure: save the error buffer for errstr */
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+	}
+	if(up->nerrlab){
+		print("bad errstack [%d]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n",
+				up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+
+	/*
+	 *  Put return value in frame.  On the x86 the syscall is
+	 *  just another trap and the return value from syscall is
+	 *  ignored.  On other machines the return value is put into
+	 *  the results register by caller of syscall.
+	 */
+	ureg->r0 = ret;
+
+	if(up->procctl == Proc_tracesyscall){
+		stopns = todget(nil);
+		up->procctl = Proc_stopme;
+		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
+		s = splhi();
+		procctl(up);
+		splx(s);
+		if(up->syscalltrace)
+			free(up->syscalltrace);
+		up->syscalltrace = nil;
+	}
+
+	up->insyscall = 0;
+	up->psstate = 0;
+
+	if(scallnr == NOTED)
+		noted(ureg, *(ulong*)(sp+BY2WD));
+
+	splhi();
+	if(scallnr != RFORK && (up->procctl || up->nnote))
+		notify(ureg);
+
+	l1cache->wb();			/* system is more stable with this */
+
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched){
+		sched();
+		splhi();
+	}
+	kexit(ureg);
+}
+
+long
+execregs(ulong entry, ulong ssize, ulong nargs)
+{
+	ulong *sp;
+	Ureg *ureg;
+
+	sp = (ulong*)(USTKTOP - ssize);
+	*--sp = nargs;
+
+	ureg = up->dbgreg;
+//	memset(ureg, 0, 15*sizeof(ulong));
+	ureg->r13 = (ulong)sp;
+	ureg->pc = entry;
+//print("%lud: EXECREGS pc %#ux sp %#ux nargs %ld\n", up->pid, ureg->pc, ureg->r13, nargs);
+	allcache->wbse(ureg, sizeof *ureg);		/* is this needed? */
+
+	/*
+	 * return the address of kernel/user shared data
+	 * (e.g. clock stuff)
+	 */
+	return USTKTOP-sizeof(Tos);
+}
+
+void
+sysprocsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+/* 
+ *  Craft a return frame which will cause the child to pop out of
+ *  the scheduler in user mode with the return register zero.  Set
+ *  pc to point to a l.s return function.
+ */
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+	Ureg *cureg;
+
+	p->sched.sp = (ulong)p->kstack+KSTACK-sizeof(Ureg);
+	p->sched.pc = (ulong)forkret;
+
+	cureg = (Ureg*)(p->sched.sp);
+	memmove(cureg, ureg, sizeof(Ureg));
+
+	/* syscall returns 0 for child */
+	cureg->r0 = 0;
+
+	/* Things from bottom of syscall which were never executed */
+	p->psstate = 0;
+	p->insyscall = 0;
+
+	fpusysrforkchild(p, cureg, up);
+}

+ 1068 - 0
sys/src/9/teg2/trap.c

@@ -0,0 +1,1068 @@
+/*
+ * arm mpcore generic interrupt controller (gic) v1
+ * traps, exceptions, interrupts, system calls.
+ *
+ * there are two pieces: the interrupt distributor and the cpu interface.
+ *
+ * memset or memmove on any of the distributor registers generates an
+ * exception like this one:
+ *	panic: external abort 0x28 pc 0xc048bf68 addr 0x50041800
+ *
+ * we use l1 and l2 cache ops to force vectors to be visible everywhere.
+ *
+ * apparently irqs 0—15 (SGIs) are always enabled.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ureg.h"
+#include "arm.h"
+
+#define ISSGI(irq)	((uint)(irq) < Nsgi)
+
+enum {
+	Debug = 0,
+
+	Nvec = 8,		/* # of vectors at start of lexception.s */
+	Bi2long = BI2BY * sizeof(long),
+	Nirqs = 1024,
+	Nsgi =	16,		/* software-generated (inter-processor) intrs */
+	Nppi =	32,		/* sgis + other private peripheral intrs */
+};
+
+typedef struct Intrcpuregs Intrcpuregs;
+typedef struct Intrdistregs Intrdistregs;
+
+/*
+ * almost this entire register set is buggered.
+ * the distributor is supposed to be per-system, not per-cpu,
+ * yet some registers are banked per-cpu, as marked.
+ */
+struct Intrdistregs {			/* distributor */
+	ulong	ctl;
+	ulong	ctlrtype;
+	ulong	distid;
+	uchar	_pad0[0x80 - 0xc];
+
+	/* botch: *[0] are banked per-cpu from here */
+	/* bit maps */
+	ulong	grp[32];		/* in group 1 (non-secure) */
+	ulong	setena[32];		/* forward to cpu interfaces */
+	ulong	clrena[32];
+	ulong	setpend[32];
+	ulong	clrpend[32];
+	ulong	setact[32];		/* active? */
+	ulong	clract[32];
+	/* botch: *[0] are banked per-cpu until here */
+
+	uchar	pri[1020];	/* botch: pri[0] — pri[7] are banked per-cpu */
+	ulong	_rsrvd1;
+	/* botch: targ[0] through targ[7] are banked per-cpu and RO */
+	uchar	targ[1020];	/* byte bit maps: cpu targets indexed by intr */
+	ulong	_rsrvd2;
+	/* botch: cfg[1] is banked per-cpu */
+	ulong	cfg[64];		/* bit pairs: edge? 1-N? */
+	ulong	_pad1[64];
+	ulong	nsac[64];		/* bit pairs (v2 only) */
+
+	/* software-generated intrs (a.k.a. sgi) */
+	ulong	swgen;			/* intr targets */
+	uchar	_pad2[0xf10 - 0xf04];
+	uchar	clrsgipend[16];		/* bit map (v2 only) */
+	uchar	setsgipend[16];		/* bit map (v2 only) */
+};
+
+enum {
+	/* ctl bits */
+	Forw2cpuif =	1,
+
+	/* ctlrtype bits */
+	Cpunoshft =	5,
+	Cpunomask =	MASK(3),
+	Intrlines =	MASK(5),
+
+	/* cfg bits */
+	Level =		0<<1,
+	Edge =		1<<1,		/* edge-, not level-sensitive */
+	Toall =		0<<0,
+	To1 =		1<<0,		/* vs. to all */
+
+	/* swgen bits */
+	Totargets =	0,
+	Tonotme =	1<<24,
+	Tome =		2<<24,
+};
+
+/* each cpu sees its own registers at the same base address (soc.intr) */
+struct Intrcpuregs {
+	ulong	ctl;
+	ulong	primask;
+
+	ulong	binpt;			/* group pri vs subpri split */
+	ulong	ack;
+	ulong	end;
+	ulong	runpri;
+	ulong	hipripend;
+
+	/* aliased regs (secure, for group 1) */
+	ulong	alibinpt;
+	ulong	aliack;			/* (v2 only) */
+	ulong	aliend;			/* (v2 only) */
+	ulong	alihipripend;		/* (v2 only) */
+
+	uchar	_pad0[0xd0 - 0x2c];
+	ulong	actpri[4];		/* (v2 only) */
+	ulong	nsactpri[4];		/* (v2 only) */
+
+	uchar	_pad0[0xfc - 0xf0];
+	ulong	ifid;			/* ro */
+
+	uchar	_pad0[0x1000 - 0x100];
+	ulong	deact;			/* wo (v2 only) */
+};
+
+enum {
+	/* ctl bits */
+	Enable =	1,
+	Eoinodeact =	1<<9,		/* (v2 only) */
+
+	/* (ali) ack/end/hipriend/deact bits */
+	Intrmask =	MASK(10),
+	Cpuidshift =	10,
+	Cpuidmask =	MASK(3),
+
+	/* ifid bits */
+	Archversshift =	16,
+	Archversmask =	MASK(4),
+};
+
+typedef struct Vctl Vctl;
+typedef struct Vctl {
+	Vctl*	next;		/* handlers on this vector */
+	char	*name;		/* of driver, xallocated */
+	void	(*f)(Ureg*, void*);	/* handler to call */
+	void*	a;		/* argument to call it with */
+} Vctl;
+
+static Lock vctllock;
+static Vctl* vctl[Nirqs];
+
+/*
+ *   Layout at virtual address 0.
+ */
+typedef struct Vpage0 {
+	void	(*vectors[Nvec])(void);
+	u32int	vtable[Nvec];
+} Vpage0;
+
+enum
+{
+	Ntimevec = 20		/* number of time buckets for each intr */
+};
+ulong intrtimes[Nirqs][Ntimevec];
+
+uvlong ninterrupt;
+uvlong ninterruptticks;
+int irqtooearly = 1;
+
+static ulong shadena[32];	/* copy of enable bits, saved by intcmaskall */
+static Lock distlock, nintrlock;
+
+extern int notify(Ureg*);
+
+static void dumpstackwithureg(Ureg *ureg);
+
+void
+printrs(int base, ulong word)
+{
+	int bit;
+
+	for (bit = 0; word; bit++, word >>= 1)
+		if (word & 1)
+			iprint(" %d", base + bit);
+}
+
+void
+dumpintrs(char *what, ulong *bits)
+{
+	int i, first, some;
+	ulong word;
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	first = 1;
+	some = 0;
+	USED(idp);
+	for (i = 0; i < nelem(idp->setpend); i++) {
+		word = bits[i];
+		if (word) {
+			if (first) {
+				first = 0;
+				iprint("%s", what);
+			}
+			some = 1;
+			printrs(i * Bi2long, word);
+		}
+	}
+	if (!some)
+		iprint("%s none", what);
+	iprint("\n");
+}
+
+void
+dumpintrpend(void)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	iprint("\ncpu%d gic regs:\n", m->machno);
+	dumpintrs("group 1", idp->grp);
+	dumpintrs("enabled", idp->setena);
+	dumpintrs("pending", idp->setpend);
+	dumpintrs("active ", idp->setact);
+}
+
+/*
+ *  keep histogram of interrupt service times
+ */
+void
+intrtime(Mach*, int vno)
+{
+	ulong diff;
+	ulong x;
+
+	x = perfticks();
+	diff = x - m->perf.intrts;
+	m->perf.intrts = x;
+
+	m->perf.inintr += diff;
+	if(up == nil && m->perf.inidle > diff)
+		m->perf.inidle -= diff;
+
+	if (m->cpumhz == 0)
+		return;			/* don't divide by zero */
+	diff /= m->cpumhz*100;		/* quantum = 100µsec */
+	if(diff >= Ntimevec)
+		diff = Ntimevec-1;
+	if ((uint)vno >= Nirqs)
+		vno = Nirqs-1;
+	intrtimes[vno][diff]++;
+}
+
+static ulong
+intack(Intrcpuregs *icp)
+{
+	return icp->ack & Intrmask;
+}
+
+static void
+intdismiss(Intrcpuregs *icp, ulong ack)
+{
+	icp->end = ack;
+	coherence();
+}
+
+static int
+irqinuse(uint irq)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	return idp->setena[irq / Bi2long] & (1 << (irq % Bi2long));
+}
+
+void
+intcunmask(uint irq)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	ilock(&distlock);
+	idp->setena[irq / Bi2long] = 1 << (irq % Bi2long);
+	iunlock(&distlock);
+}
+
+void
+intcmask(uint irq)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	ilock(&distlock);
+	idp->clrena[irq / Bi2long] = 1 << (irq % Bi2long);
+	iunlock(&distlock);
+}
+
+static void
+intcmaskall(Intrdistregs *idp)		/* mask all intrs for all cpus */
+{
+	int i;
+
+	for (i = 0; i < nelem(idp->setena); i++)
+		shadena[i] = idp->setena[i];
+	for (i = 0; i < nelem(idp->clrena); i++)
+		idp->clrena[i] = ~0;
+	coherence();
+}
+
+static void
+intcunmaskall(Intrdistregs *idp)	/* unused */
+{
+	int i;
+
+	for (i = 0; i < nelem(idp->setena); i++)
+		idp->setena[i] = shadena[i];
+	coherence();
+}
+
+static ulong
+permintrs(Intrdistregs *idp, int base, int r)
+{
+	ulong perms;
+
+	idp->clrena[r] = ~0;		/* disable all */
+	coherence();
+	perms = idp->clrena[r];
+	if (perms) {
+		iprint("perm intrs:");
+		printrs(base, perms);
+		iprint("\n");
+	}
+	return perms;
+}
+
+static void
+intrcfg(Intrdistregs *idp)
+{
+	int i, cpumask;
+	ulong pat;
+
+	/* set up all interrupts as level-sensitive, to one cpu (0) */
+	pat = 0;
+	for (i = 0; i < Bi2long; i += 2)
+		pat |= (Level | To1) << i;
+
+	if (m->machno == 0) {			/* system-wide & cpu0 cfg */
+		for (i = 0; i < nelem(idp->grp); i++)
+			idp->grp[i] = 0;		/* secure */
+		for (i = 0; i < nelem(idp->pri); i++)
+			idp->pri[i] = 0;		/* highest priority */
+		/* set up all interrupts as level-sensitive, to one cpu (0) */
+		for (i = 0; i < nelem(idp->cfg); i++)
+			idp->cfg[i] = pat;
+		/* first Nppi are read-only for SGIs and PPIs */
+		cpumask = 1<<0;				/* just cpu 0 */
+		navailcpus = getncpus();
+		for (i = Nppi; i < sizeof idp->targ; i++)
+			idp->targ[i] = cpumask;
+		coherence();
+
+		intcmaskall(idp);
+		for (i = 0; i < nelem(idp->clrena); i++) {
+			// permintrs(idp, i * Bi2long, i);
+			idp->clrpend[i] = idp->clract[i] = idp->clrena[i] = ~0;
+		}
+	} else {				/* per-cpu config */
+		idp->grp[0] = 0;		/* secure */
+		for (i = 0; i < 8; i++)
+			idp->pri[i] = 0;	/* highest priority */
+		/* idp->targ[0 through Nppi-1] are supposed to be read-only */
+		for (i = 0; i < Nppi; i++)
+			idp->targ[i] = 1<<m->machno;
+		idp->cfg[1] = pat;
+		coherence();
+
+		// permintrs(idp, i * Bi2long, i);
+		idp->clrpend[0] = idp->clract[0] = idp->clrena[0] = ~0;
+		/* on cpu1, irq Extpmuirq (118) is always pending here */
+	}
+	coherence();
+}
+
+void
+intrto(int cpu, int irq)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	/* first Nppi are read-only for SGIs and the like */
+	ilock(&distlock);
+	idp->targ[irq] = 1 << cpu;
+	iunlock(&distlock);
+}
+
+void
+intrsto(int cpu)			/* unused */
+{
+	int i;
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	/* first Nppi are read-only for SGIs and the like */
+	for (i = Nppi; i < sizeof idp->targ; i++)
+		intrto(cpu, i);
+	USED(idp);
+}
+
+void
+intrcpu(int cpu)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	ilock(&distlock);
+	idp->swgen = Totargets | 1 << (cpu + 16) | m->machno;
+	iunlock(&distlock);
+}
+
+/*
+ *  set up for exceptions
+ */
+void
+trapinit(void)
+{
+	int s;
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+	Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+	Vpage0 *vpage0;
+	enum { Vecsize = sizeof vpage0->vectors + sizeof vpage0->vtable, };
+
+	/*
+	 * set up the exception vectors, high and low.
+	 *
+	 * we can't use cache ops on HVECTORS address, since they
+	 * work on virtual addresses, and only those that have a
+	 * physical address == PADDR(virtual).
+	 */
+	if (m->machno == 0) {
+		vpage0 = (Vpage0*)HVECTORS;
+		memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
+		memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
+
+		vpage0 = (Vpage0*)KADDR(0);
+		memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors));
+		memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable));
+
+		allcache->wbse(vpage0, Vecsize);
+		cacheiinv();
+	}
+
+	/*
+	 * set up the stack pointers for the exception modes for this cpu.
+	 * they point to small `save areas' in Mach, not actual stacks.
+	 */
+	s = splhi();			/* make these modes ignore intrs too */
+	setr13(PsrMfiq, m->sfiq);
+	setr13(PsrMirq, m->sirq);
+	setr13(PsrMmon, m->smon);
+	setr13(PsrMabt, m->sabt);
+	setr13(PsrMund, m->sund);
+	setr13(PsrMsys, m->ssys);
+	splx(s);
+
+	assert((idp->distid & MASK(12)) == 0x43b);	/* made by arm */
+	assert((icp->ifid   & MASK(12)) == 0x43b);	/* made by arm */
+
+	ilock(&distlock);
+	idp->ctl = 0;
+	icp->ctl = 0;
+	coherence();
+
+	intrcfg(idp);			/* some per-cpu cfg here */
+
+	icp->ctl = Enable;
+	icp->primask = (uchar)~0;	/* let all priorities through */
+	coherence();
+
+	idp->ctl = Forw2cpuif;
+	iunlock(&distlock);
+}
+
+void
+intrsoff(void)
+{
+	ilock(&distlock);
+	intcmaskall((Intrdistregs *)soc.intrdist);
+	iunlock(&distlock);
+}
+
+void
+intrcpushutdown(void)
+{
+	Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+
+	icp->ctl = 0;
+	icp->primask = 0;	/* let no priorities through */
+	coherence();
+}
+
+/* called from cpu0 after other cpus are shutdown */
+void
+intrshutdown(void)
+{
+	Intrdistregs *idp = (Intrdistregs *)soc.intrdist;
+
+	intrsoff();
+	idp->ctl = 0;
+	intrcpushutdown();
+}
+
+/*
+ *  enable an irq interrupt
+ *  note that the same private interrupt may be enabled on multiple cpus
+ */
+int
+irqenable(uint irq, void (*f)(Ureg*, void*), void* a, char *name)
+{
+	Vctl *v;
+
+	if(irq >= nelem(vctl))
+		panic("irqenable irq %d", irq);
+
+	if (irqtooearly) {
+		iprint("irqenable for %d %s called too early\n", irq, name);
+		return -1;
+	}
+	/*
+	 * if in use, could be a private interrupt on a secondary cpu,
+	 * so don't add anything to the vector chain.  irqs should
+	 * otherwise be one-to-one with devices.
+	 */
+	if(!ISSGI(irq) && irqinuse(irq)) {
+		lock(&vctllock);
+		if (vctl[irq] == nil) {
+			dumpintrpend();
+			panic("non-sgi irq %d in use yet no Vctl allocated", irq);
+		}
+		unlock(&vctllock);
+	}
+	/* could be 1st use of this irq or could be an sgi (always in use) */
+	else if (vctl[irq] == nil) {
+		v = malloc(sizeof(Vctl));
+		if (v == nil)
+			panic("irqenable: malloc Vctl");
+		v->f = f;
+		v->a = a;
+		v->name = malloc(strlen(name)+1);
+		if (v->name == nil)
+			panic("irqenable: malloc name");
+		strcpy(v->name, name);
+
+		lock(&vctllock);
+		if (vctl[irq] != nil) {
+			/* allocation race: someone else did it first */
+			free(v->name);
+			free(v);
+		} else {
+			v->next = vctl[irq];
+			vctl[irq] = v;
+		}
+		unlock(&vctllock);
+	}
+	intcunmask(irq);
+	return 0;
+}
+
+/*
+ *  disable an irq interrupt
+ */
+int
+irqdisable(uint irq, void (*f)(Ureg*, void*), void* a, char *name)
+{
+	Vctl **vp, *v;
+
+	if(irq >= nelem(vctl))
+		panic("irqdisable irq %d", irq);
+
+	lock(&vctllock);
+	for(vp = &vctl[irq]; v = *vp; vp = &v->next)
+		if (v->f == f && v->a == a && strcmp(v->name, name) == 0){
+			print("irqdisable: remove %s\n", name);
+			*vp = v->next;
+			free(v->name);
+			free(v);
+			break;
+		}
+
+	if(v == nil)
+		print("irqdisable: irq %d, name %s not enabled\n", irq, name);
+	if(vctl[irq] == nil){
+		print("irqdisable: clear icmr bit %d\n", irq);
+		intcmask(irq);
+	}
+	unlock(&vctllock);
+
+	return 0;
+}
+
+/*
+ *  called by trap to handle access faults
+ */
+static void
+faultarm(Ureg *ureg, uintptr va, int user, int read)
+{
+	int n, insyscall;
+
+	if(up == nil) {
+		dumpstackwithureg(ureg);
+		panic("faultarm: cpu%d: nil up, %sing %#p at %#p",
+			m->machno, (read? "read": "writ"), va, ureg->pc);
+	}
+	insyscall = up->insyscall;
+	up->insyscall = 1;
+
+	n = fault(va, read);		/* goes spllo */
+	splhi();
+	if(n < 0){
+		char buf[ERRMAX];
+
+		if(!user){
+			dumpstackwithureg(ureg);
+			panic("fault: cpu%d: kernel %sing %#p at %#p",
+				m->machno, read? "read": "writ", va, ureg->pc);
+		}
+		/* don't dump registers; programs suicide all the time */
+		snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
+			read? "read": "write", va);
+		postnote(up, 1, buf, NDebug);
+	}
+	up->insyscall = insyscall;
+}
+
+/*
+ *  called by trap to handle interrupts.
+ *  returns true iff a clock interrupt, thus maybe reschedule.
+ */
+static int
+irq(Ureg* ureg)
+{
+	int clockintr, ack;
+	uint irqno, handled, t, ticks;
+	Intrcpuregs *icp = (Intrcpuregs *)soc.intr;
+	Vctl *v;
+
+	ticks = perfticks();
+	handled = 0;
+	ack = intack(icp);
+	irqno = ack & Intrmask;
+
+	if (irqno >= nelem(vctl)) {
+		iprint("trap: irq %d >= # vectors (%d)\n", irqno, nelem(vctl));
+		intdismiss(icp, ack);
+		return 0;
+	}
+
+	if (irqno == Loctmrirq)			/* this is a clock intr? */
+		m->inclockintr++;		/* yes, count nesting */
+	if(m->machno && m->inclockintr > 1) {
+		iprint("cpu%d: nested clock intrs\n", m->machno);
+		m->inclockintr--;
+		intdismiss(icp, ack);
+		return 0;
+	}
+
+	for(v = vctl[irqno]; v != nil; v = v->next)
+		if (v->f) {
+			if (islo())
+				panic("trap: pl0 before trap handler for %s",
+					v->name);
+			v->f(ureg, v->a);
+			if (islo())
+				panic("trap: %s lowered pl", v->name);
+//			splhi();		/* in case v->f lowered pl */
+			handled++;
+		}
+	if(!handled)
+		if (irqno >= 1022)
+			iprint("cpu%d: ignoring spurious interrupt\n", m->machno);
+		else {
+			intcmask(irqno);
+			iprint("cpu%d: unexpected interrupt %d, now masked\n",
+				m->machno, irqno);
+		}
+	t = perfticks();
+	if (0) {			/* left over from another port? */
+		ilock(&nintrlock);
+		ninterrupt++;
+		if(t < ticks)
+			ninterruptticks += ticks-t;
+		else
+			ninterruptticks += t-ticks;
+		iunlock(&nintrlock);
+	}
+	USED(t, ticks);
+	clockintr = m->inclockintr == 1;
+	if (irqno == Loctmrirq)
+		m->inclockintr--;
+
+	intdismiss(icp, ack);
+	intrtime(m, irqno);
+	return clockintr;
+}
+
+/*
+ *  returns 1 if the instruction writes memory, 0 otherwise
+ */
+int
+writetomem(ulong inst)
+{
+	/* swap always write memory */
+	if((inst & 0x0FC00000) == 0x01000000)
+		return 1;
+
+	/* loads and stores are distinguished by bit 20 */
+	if(inst & (1<<20))
+		return 0;
+
+	return 1;
+}
+
+static void
+datafault(Ureg *ureg, int user)
+{
+	int x;
+	ulong inst, fsr;
+	uintptr va;
+
+	va = farget();
+
+	if (m->probing && !user) {
+		if (m->trapped++ > 0) {
+			dumpstackwithureg(ureg);
+			panic("trap: recursive probe %#lux", va);
+		}
+		ureg->pc += 4;	/* continue after faulting instr'n */
+		return;
+	}
+
+	inst = *(ulong*)(ureg->pc);
+	/* bits 12 and 10 have to be concatenated with status */
+	x = fsrget();
+	fsr = (x>>7) & 0x20 | (x>>6) & 0x10 | x & 0xf;
+	switch(fsr){
+	default:
+	case 0xa:		/* ? was under external abort */
+		panic("unknown data fault, 6b fsr %#lux", fsr);
+		break;
+	case 0x0:
+		panic("vector exception at %#lux", ureg->pc);
+		break;
+	case 0x1:		/* alignment fault */
+	case 0x3:		/* access flag fault (section) */
+		if(user){
+			char buf[ERRMAX];
+
+			snprint(buf, sizeof buf,
+				"sys: alignment: pc %#lux va %#p\n",
+				ureg->pc, va);
+			postnote(up, 1, buf, NDebug);
+		} else {
+			dumpstackwithureg(ureg);
+			panic("kernel alignment: pc %#lux va %#p", ureg->pc, va);
+		}
+		break;
+	case 0x2:
+		panic("terminal exception at %#lux", ureg->pc);
+		break;
+	case 0x4:		/* icache maint fault */
+	case 0x6:		/* access flag fault (page) */
+	case 0x8:		/* precise external abort, non-xlat'n */
+	case 0x28:
+	case 0x16:		/* imprecise ext. abort, non-xlt'n */
+	case 0x36:
+		panic("external non-translation abort %#lux pc %#lux addr %#p",
+			fsr, ureg->pc, va);
+		break;
+	case 0xc:		/* l1 translation, precise ext. abort */
+	case 0x2c:
+	case 0xe:		/* l2 translation, precise ext. abort */
+	case 0x2e:
+		panic("external translation abort %#lux pc %#lux addr %#p",
+			fsr, ureg->pc, va);
+		break;
+	case 0x1c:		/* l1 translation, precise parity err */
+	case 0x1e:		/* l2 translation, precise parity err */
+	case 0x18:		/* imprecise parity or ecc err */
+		panic("translation parity error %#lux pc %#lux addr %#p",
+			fsr, ureg->pc, va);
+		break;
+	case 0x5:		/* translation fault, no section entry */
+	case 0x7:		/* translation fault, no page entry */
+		faultarm(ureg, va, user, !writetomem(inst));
+		break;
+	case 0x9:
+	case 0xb:
+		/* domain fault, accessing something we shouldn't */
+		if(user){
+			char buf[ERRMAX];
+
+			snprint(buf, sizeof buf,
+				"sys: access violation: pc %#lux va %#p\n",
+				ureg->pc, va);
+			postnote(up, 1, buf, NDebug);
+		} else
+			panic("kernel access violation: pc %#lux va %#p",
+				ureg->pc, va);
+		break;
+	case 0xd:
+	case 0xf:
+		/* permission error, copy on write or real permission error */
+		faultarm(ureg, va, user, !writetomem(inst));
+		break;
+	}
+}
+
+/*
+ *  here on all exceptions other than syscall (SWI) and reset
+ */
+void
+trap(Ureg *ureg)
+{
+	int clockintr, user, rem;
+	uintptr va, ifar;
+
+	splhi();			/* paranoia */
+	if(up != nil)
+		rem = ((char*)ureg)-up->kstack;
+	else
+		rem = ((char*)ureg)-((char*)m+sizeof(Mach));
+	if(rem < 1024) {
+		iprint("trap: %d stack bytes left, up %#p ureg %#p m %#p cpu%d at pc %#lux\n",
+			rem, up, ureg, m, m->machno, ureg->pc);
+		dumpstackwithureg(ureg);
+		panic("trap: %d stack bytes left, up %#p ureg %#p at pc %#lux",
+			rem, up, ureg, ureg->pc);
+	}
+
+	m->perf.intrts = perfticks();
+	user = (ureg->psr & PsrMask) == PsrMusr;
+	if(user){
+		up->dbgreg = ureg;
+		cycles(&up->kentry);
+	}
+
+	/*
+	 * All interrupts/exceptions should be resumed at ureg->pc-4,
+	 * except for Data Abort which resumes at ureg->pc-8.
+	 */
+	if(ureg->type == (PsrMabt+1))
+		ureg->pc -= 8;
+	else
+		ureg->pc -= 4;
+
+	clockintr = 0;		/* if set, may call sched() before return */
+	switch(ureg->type){
+	default:
+		panic("unknown trap; type %#lux, psr mode %#lux", ureg->type,
+			ureg->psr & PsrMask);
+		break;
+	case PsrMirq:
+		m->intr++;
+		clockintr = irq(ureg);
+		if(0 && up && !clockintr)
+			preempted();	/* this causes spurious suicides */
+		break;
+	case PsrMabt:			/* prefetch (instruction) fault */
+		va = ureg->pc;
+		ifar = cprdsc(0, CpFAR, 0, CpIFAR);
+		if (va != ifar)
+			iprint("trap: cpu%d: i-fault va %#p != ifar %#p\n",
+				m->machno, va, ifar);
+		faultarm(ureg, va, user, 1);
+		break;
+	case PsrMabt+1:			/* data fault */
+		datafault(ureg, user);
+		break;
+	case PsrMund:			/* undefined instruction */
+		if(!user) {
+			if (ureg->pc & 3) {
+				iprint("rounding fault pc %#lux down to word\n",
+					ureg->pc);
+				ureg->pc &= ~3;
+			}
+			if (Debug)
+				iprint("mathemu: cpu%d fpon %d instr %#8.8lux at %#p\n",
+					m->machno, m->fpon, *(ulong *)ureg->pc,
+				ureg->pc);
+			dumpstackwithureg(ureg);
+			panic("cpu%d: undefined instruction: pc %#lux inst %#ux",
+				m->machno, ureg->pc, ((u32int*)ureg->pc)[0]);
+		} else if(seg(up, ureg->pc, 0) != nil &&
+		   *(u32int*)ureg->pc == 0xD1200070)
+			postnote(up, 1, "sys: breakpoint", NDebug);
+		else if(fpuemu(ureg) == 0){	/* didn't find any FP instrs? */
+			char buf[ERRMAX];
+
+			snprint(buf, sizeof buf,
+				"undefined instruction: pc %#lux instr %#8.8lux\n",
+				ureg->pc, *(ulong *)ureg->pc);
+			postnote(up, 1, buf, NDebug);
+		}
+		break;
+	}
+	splhi();
+
+	/* delaysched set because we held a lock or because our quantum ended */
+	if(up && up->delaysched && clockintr){
+		sched();		/* can cause more traps */
+		splhi();
+	}
+
+	if(user){
+		if(up->procctl || up->nnote)
+			notify(ureg);
+		kexit(ureg);
+	}
+}
+
+/*
+ * Fill in enough of Ureg to get a stack trace, and call a function.
+ * Used by debugging interface rdb.
+ */
+void
+callwithureg(void (*fn)(Ureg*))
+{
+	Ureg ureg;
+
+	memset(&ureg, 0, sizeof ureg);
+	ureg.pc = getcallerpc(&fn);
+	ureg.sp = PTR2UINT(&fn);
+	fn(&ureg);
+}
+
+static void
+dumpstackwithureg(Ureg *ureg)
+{
+	int x;
+	uintptr l, v, i, estack;
+	char *s;
+
+	dumpregs(ureg);
+	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
+		iprint("dumpstack disabled\n");
+		return;
+	}
+	delay(1000);
+	iprint("dumpstack\n");
+
+	x = 0;
+	x += iprint("ktrace /kernel/path %#.8lux %#.8lux %#.8lux # pc, sp, link\n",
+		ureg->pc, ureg->sp, ureg->r14);
+	delay(20);
+	i = 0;
+	if(up
+	&& (uintptr)&l >= (uintptr)up->kstack
+	&& (uintptr)&l <= (uintptr)up->kstack+KSTACK)
+		estack = (uintptr)up->kstack+KSTACK;
+	else if((uintptr)&l >= (uintptr)m->stack
+	&& (uintptr)&l <= (uintptr)m+MACHSIZE)
+		estack = (uintptr)m+MACHSIZE;
+	else
+		return;
+	x += iprint("estackx %p\n", estack);
+
+	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
+		v = *(uintptr*)l;
+		if((KTZERO < v && v < (uintptr)etext) || estack-l < 32){
+			x += iprint("%.8p ", v);
+			delay(20);
+			i++;
+		}
+		if(i == 8){
+			i = 0;
+			x += iprint("\n");
+			delay(20);
+		}
+	}
+	if(i)
+		iprint("\n");
+	delay(3000);
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(dumpstackwithureg);
+}
+
+/*
+ * dump system control coprocessor registers
+ */
+static void
+dumpscr(void)
+{
+	iprint("0:\t%#8.8ux id\n", cpidget());
+	iprint("\t%8.8#ux ct\n", cpctget());
+	iprint("1:\t%#8.8ux control\n", controlget());
+	iprint("2:\t%#8.8ux ttb\n", ttbget());
+	iprint("3:\t%#8.8ux dac\n", dacget());
+	iprint("4:\t(reserved)\n");
+	iprint("5:\t%#8.8ux fsr\n", fsrget());
+	iprint("6:\t%#8.8ux far\n", farget());
+	iprint("7:\twrite-only cache\n");
+	iprint("8:\twrite-only tlb\n");
+	iprint("13:\t%#8.8ux pid\n", pidget());
+	delay(10);
+}
+
+/*
+ * dump general registers
+ */
+static void
+dumpgpr(Ureg* ureg)
+{
+	if(up != nil)
+		iprint("cpu%d: registers for %s %lud\n",
+			m->machno, up->text, up->pid);
+	else
+		iprint("cpu%d: registers for kernel\n", m->machno);
+
+	delay(20);
+	iprint("%#8.8lux\tr0\n", ureg->r0);
+	iprint("%#8.8lux\tr1\n", ureg->r1);
+	iprint("%#8.8lux\tr2\n", ureg->r2);
+	delay(20);
+	iprint("%#8.8lux\tr3\n", ureg->r3);
+	iprint("%#8.8lux\tr4\n", ureg->r4);
+	iprint("%#8.8lux\tr5\n", ureg->r5);
+	delay(20);
+	iprint("%#8.8lux\tr6\n", ureg->r6);
+	iprint("%#8.8lux\tr7\n", ureg->r7);
+	iprint("%#8.8lux\tr8\n", ureg->r8);
+	delay(20);
+	iprint("%#8.8lux\tr9 (up)\n", ureg->r9);
+	iprint("%#8.8lux\tr10 (m)\n", ureg->r10);
+	iprint("%#8.8lux\tr11 (loader temporary)\n", ureg->r11);
+	iprint("%#8.8lux\tr12 (SB)\n", ureg->r12);
+	delay(20);
+	iprint("%#8.8lux\tr13 (sp)\n", ureg->r13);
+	iprint("%#8.8lux\tr14 (link)\n", ureg->r14);
+	iprint("%#8.8lux\tr15 (pc)\n", ureg->pc);
+	delay(20);
+	iprint("%10.10lud\ttype\n", ureg->type);
+	iprint("%#8.8lux\tpsr\n", ureg->psr);
+	delay(500);
+}
+
+void
+dumpregs(Ureg* ureg)
+{
+	dumpgpr(ureg);
+	dumpscr();
+}
+
+vlong
+probeaddr(uintptr addr)
+{
+	vlong v;
+
+	ilock(&m->probelock);
+	m->trapped = 0;
+	m->probing = 1;
+	coherence();
+
+	v = *(ulong *)addr;	/* this may cause a fault */
+	coherence();
+
+	m->probing = 0;
+	if (m->trapped)
+		v = -1;
+	iunlock(&m->probelock);
+	return v;
+}

+ 91 - 0
sys/src/9/teg2/ts

@@ -0,0 +1,91 @@
+# trimslice dual-core cortex-a9
+dev
+	root
+	cons
+	env
+	pipe
+	proc
+	mnt
+	srv
+	dup
+	arch
+	ssl
+	tls
+	bridge		log
+	sdp		thwack unthwack
+	cap
+	kprof
+#	aoe
+#	sd
+	fs
+#	flash
+
+	ether		netif
+	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno
+
+#	draw		screen
+#	dss
+#	kbmap
+#	kbin
+#	mouse
+
+	uart
+#	usb
+
+link
+	archtegra
+	ethermedium
+#	flashtegra	ecc
+	loopbackmedium
+	netdevmedium
+
+	ether8169	ethermii
+#	usbohci
+#	usbehci		usbehcitegra
+
+ip
+	tcp
+	udp
+	ipifc
+	icmp
+	icmp6
+	ipmux
+	gre
+	esp
+
+misc
+	pci
+	rdb
+	coproc
+	v7-arch
+	caches
+	caches-v7
+	cache-l2-pl310
+#	mouse
+#	sdaoe		sdscsi
+	syscall
+	syscallfmt
+	uarti8250
+	ucalloc
+	ucallocb
+# hardware fp; can't get 5l to generate the right opcodes
+#	vfp3
+# emulated fp
+	fpi
+	fpiarm
+	fpimem
+	softfpu
+
+port
+	int cpuserver = 1;
+	int i8250freq = 3686000;
+
+boot cpu
+	tcp
+
+bootdir
+	boot$CONF.out boot
+	/arm/bin/ip/ipconfig ipconfig
+	/arm/bin/auth/factotum factotum
+	/arm/bin/usb/usbd
+	nvram

+ 821 - 0
sys/src/9/teg2/uarti8250.c

@@ -0,0 +1,821 @@
+/*
+ * 8250-like UART
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {					/* registers */
+	Rbr		= 0,		/* Receiver Buffer (RO) */
+	Thr		= 0,		/* Transmitter Holding (WO) */
+	Ier		= 1,		/* Interrupt Enable */
+	Iir		= 2,		/* Interrupt Identification (RO) */
+	Fcr		= 2,		/* FIFO Control (WO) */
+	Lcr		= 3,		/* Line Control */
+	Mcr		= 4,		/* Modem Control */
+	Lsr		= 5,		/* Line Status */
+	Msr		= 6,		/* Modem Status */
+	Scr		= 7,		/* Scratch Pad */
+	Mdr		= 8,		/* Mode Def'n (omap rw) */
+//	Usr		= 31,		/* Uart Status Register; missing in omap? */
+	Dll		= 0,		/* Divisor Latch LSB */
+	Dlm		= 1,		/* Divisor Latch MSB */
+};
+
+enum {					/* Usr */
+	Busy		= 0x01,
+};
+
+enum {					/* Ier */
+	Erda		= 0x01,		/* Enable Received Data Available */
+	Ethre		= 0x02,		/* Enable Thr Empty */
+	Erls		= 0x04,		/* Enable Receiver Line Status */
+	Ems		= 0x08,		/* Enable Modem Status */
+};
+
+enum {					/* Iir */
+	Ims		= 0x00,		/* Ms interrupt */
+	Ip		= 0x01,		/* Interrupt Pending (not) */
+	Ithre		= 0x02,		/* Thr Empty */
+	Irda		= 0x04,		/* Received Data Available */
+	Irls		= 0x06,		/* Receiver Line Status */
+	Ictoi		= 0x0C,		/* Character Time-out Indication */
+	IirMASK		= 0x3F,
+	Ifena		= 0xC0,		/* FIFOs enabled */
+};
+
+enum {					/* Fcr */
+	FIFOena		= 0x01,		/* FIFO enable */
+	FIFOrclr	= 0x02,		/* clear Rx FIFO */
+	FIFOtclr	= 0x04,		/* clear Tx FIFO */
+//	FIFOdma		= 0x08,
+	FIFO1		= 0x00,		/* Rx FIFO trigger level 1 byte */
+	FIFO4		= 0x40,		/*	4 bytes */
+	FIFO8		= 0x80,		/*	8 bytes */
+	FIFO14		= 0xC0,		/*	14 bytes */
+};
+
+enum {					/* Lcr */
+	Wls5		= 0x00,		/* Word Length Select 5 bits/byte */
+	Wls6		= 0x01,		/*	6 bits/byte */
+	Wls7		= 0x02,		/*	7 bits/byte */
+	Wls8		= 0x03,		/*	8 bits/byte */
+	WlsMASK		= 0x03,
+	Stb		= 0x04,		/* 2 stop bits */
+	Pen		= 0x08,		/* Parity Enable */
+	Eps		= 0x10,		/* Even Parity Select */
+	Stp		= 0x20,		/* Stick Parity */
+	Brk		= 0x40,		/* Break */
+	Dlab		= 0x80,		/* Divisor Latch Access Bit */
+};
+
+enum {					/* Mcr */
+	Dtr		= 0x01,		/* Data Terminal Ready */
+	Rts		= 0x02,		/* Ready To Send */
+	Out1		= 0x04,		/* no longer in use */
+//	Ie		= 0x08,		/* IRQ Enable (cd_sts_ch on omap) */
+	Dm		= 0x10,		/* Diagnostic Mode loopback */
+};
+
+enum {					/* Lsr */
+	Dr		= 0x01,		/* Data Ready */
+	Oe		= 0x02,		/* Overrun Error */
+	Pe		= 0x04,		/* Parity Error */
+	Fe		= 0x08,		/* Framing Error */
+	Bi		= 0x10,		/* Break Interrupt */
+	Thre		= 0x20,		/* Thr Empty */
+	Temt		= 0x40,		/* Transmitter Empty */
+	FIFOerr		= 0x80,		/* error in receiver FIFO */
+};
+
+enum {					/* Msr */
+	Dcts		= 0x01,		/* Delta Cts */
+	Ddsr		= 0x02,		/* Delta Dsr */
+	Teri		= 0x04,		/* Trailing Edge of Ri */
+	Ddcd		= 0x08,		/* Delta Dcd */
+	Cts		= 0x10,		/* Clear To Send */
+	Dsr		= 0x20,		/* Data Set Ready */
+	Ri		= 0x40,		/* Ring Indicator */
+	Dcd		= 0x80,		/* Carrier Detect */
+};
+
+enum {					/* Mdr */
+	Modemask	= 7,
+	Modeuart	= 0,
+};
+
+
+typedef struct Ctlr {
+	u32int*	io;
+	int	irq;
+	int	tbdf;
+	int	iena;
+	int	poll;
+
+	uchar	sticky[Scr+1];
+
+	Lock;
+	int	hasfifo;
+	int	checkfifo;
+	int	fena;
+} Ctlr;
+
+extern PhysUart i8250physuart;
+
+static Ctlr i8250ctlr[] = {
+{	.io	= (u32int*)PHYSCONS,
+	.irq	= Uartirq,
+	.tbdf	= -1,
+	.poll	= 0, },
+};
+
+static Uart i8250uart[] = {
+{	.regs	= &i8250ctlr[0], /* not [2] */
+	.name	= "COM3",
+	.freq	= 3686000,	/* Not used, we use the global i8250freq */
+	.phys	= &i8250physuart,
+	.console= 1,
+	.next	= nil, },
+};
+
+#define csr8r(c, r)	((c)->io[r])
+#define csr8w(c, r, v)	((c)->io[r] = (c)->sticky[r] | (v), coherence())
+#define csr8o(c, r, v)	((c)->io[r] = (v), coherence())
+
+static long
+i8250status(Uart* uart, void* buf, long n, long offset)
+{
+	char *p;
+	Ctlr *ctlr;
+	uchar ier, lcr, mcr, msr;
+
+	ctlr = uart->regs;
+	p = malloc(READSTR);
+	mcr = ctlr->sticky[Mcr];
+	msr = csr8r(ctlr, Msr);
+	ier = ctlr->sticky[Ier];
+	lcr = ctlr->sticky[Lcr];
+	snprint(p, READSTR,
+		"b%d c%d d%d e%d l%d m%d p%c r%d s%d i%d\n"
+		"dev(%d) type(%d) framing(%d) overruns(%d) "
+		"berr(%d) serr(%d)%s%s%s%s\n",
+
+		uart->baud,
+		uart->hup_dcd,
+		(msr & Dsr) != 0,
+		uart->hup_dsr,
+		(lcr & WlsMASK) + 5,
+		(ier & Ems) != 0,
+		(lcr & Pen) ? ((lcr & Eps) ? 'e': 'o'): 'n',
+		(mcr & Rts) != 0,
+		(lcr & Stb) ? 2: 1,
+		ctlr->fena,
+
+		uart->dev,
+		uart->type,
+		uart->ferr,
+		uart->oerr,
+		uart->berr,
+		uart->serr,
+		(msr & Cts) ? " cts": "",
+		(msr & Dsr) ? " dsr": "",
+		(msr & Dcd) ? " dcd": "",
+		(msr & Ri) ? " ring": ""
+	);
+	n = readstr(offset, buf, n, p);
+	free(p);
+
+	return n;
+}
+
+static void
+i8250fifo(Uart* uart, int level)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	if(ctlr->hasfifo == 0)
+		return;
+
+	/*
+	 * Changing the FIFOena bit in Fcr flushes data
+	 * from both receive and transmit FIFOs; there's
+	 * no easy way to guarantee not losing data on
+	 * the receive side, but it's possible to wait until
+	 * the transmitter is really empty.
+	 */
+	ilock(ctlr);
+	while(!(csr8r(ctlr, Lsr) & Temt))
+		;
+
+	/*
+	 * Set the trigger level, default is the max.
+	 * value.
+	 * Some UARTs require FIFOena to be set before
+	 * other bits can take effect, so set it twice.
+	 */
+	ctlr->fena = level;
+	switch(level){
+	case 0:
+		break;
+	case 1:
+		level = FIFO1|FIFOena;
+		break;
+	case 4:
+		level = FIFO4|FIFOena;
+		break;
+	case 8:
+		level = FIFO8|FIFOena;
+		break;
+	default:
+		level = FIFO14|FIFOena;
+		break;
+	}
+	csr8w(ctlr, Fcr, level);
+	csr8w(ctlr, Fcr, level);
+	iunlock(ctlr);
+}
+
+static void
+i8250dtr(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Toggle DTR.
+	 */
+	ctlr = uart->regs;
+	if(on)
+		ctlr->sticky[Mcr] |= Dtr;
+	else
+		ctlr->sticky[Mcr] &= ~Dtr;
+	csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250rts(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Toggle RTS.
+	 */
+	ctlr = uart->regs;
+	if(on)
+		ctlr->sticky[Mcr] |= Rts;
+	else
+		ctlr->sticky[Mcr] &= ~Rts;
+	csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250modemctl(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	ilock(&uart->tlock);
+	if(on){
+		ctlr->sticky[Ier] |= Ems;
+		csr8w(ctlr, Ier, 0);
+		uart->modem = 1;
+		uart->cts = csr8r(ctlr, Msr) & Cts;
+	}
+	else{
+		ctlr->sticky[Ier] &= ~Ems;
+		csr8w(ctlr, Ier, 0);
+		uart->modem = 0;
+		uart->cts = 1;
+	}
+	iunlock(&uart->tlock);
+
+	/* modem needs fifo */
+	(*uart->phys->fifo)(uart, on);
+}
+
+static int
+i8250parity(Uart* uart, int parity)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~(Eps|Pen);
+
+	switch(parity){
+	case 'e':
+		lcr |= Eps|Pen;
+		break;
+	case 'o':
+		lcr |= Pen;
+		break;
+	case 'n':
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->parity = parity;
+
+	return 0;
+}
+
+static int
+i8250stop(Uart* uart, int stop)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~Stb;
+
+	switch(stop){
+	case 1:
+		break;
+	case 2:
+		lcr |= Stb;
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->stop = stop;
+
+	return 0;
+}
+
+static int
+i8250bits(Uart* uart, int bits)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~WlsMASK;
+
+	switch(bits){
+	case 5:
+		lcr |= Wls5;
+		break;
+	case 6:
+		lcr |= Wls6;
+		break;
+	case 7:
+		lcr |= Wls7;
+		break;
+	case 8:
+		lcr |= Wls8;
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->bits = bits;
+
+	return 0;
+}
+
+static int
+i8250baud(Uart* uart, int baud)
+{
+#ifdef notdef				/* don't change the speed */
+	ulong bgc;
+	Ctlr *ctlr;
+	extern int i8250freq;	/* In the config file */
+
+	/*
+	 * Set the Baud rate by calculating and setting the Baud rate
+	 * Generator Constant. This will work with fairly non-standard
+	 * Baud rates.
+	 */
+	if(i8250freq == 0 || baud <= 0)
+		return -1;
+	bgc = (i8250freq+8*baud-1)/(16*baud);
+
+	ctlr = uart->regs;
+	while(csr8r(ctlr, Usr) & Busy)
+		delay(1);
+	csr8w(ctlr, Lcr, Dlab);		/* begin kludge */
+	csr8o(ctlr, Dlm, bgc>>8);
+	csr8o(ctlr, Dll, bgc);
+	csr8w(ctlr, Lcr, 0);
+#endif
+	uart->baud = baud;
+	return 0;
+}
+
+static void
+i8250break(Uart* uart, int ms)
+{
+	Ctlr *ctlr;
+
+	if (up == nil)
+		panic("i8250break: nil up");
+	/*
+	 * Send a break.
+	 */
+	if(ms <= 0)
+		ms = 200;
+
+	ctlr = uart->regs;
+	csr8w(ctlr, Lcr, Brk);
+	tsleep(&up->sleep, return0, 0, ms);
+	csr8w(ctlr, Lcr, 0);
+}
+
+static void
+emptyoutstage(Uart *uart, int n)
+{
+	_uartputs((char *)uart->op, n);
+	uart->op = uart->oe = uart->ostage;
+}
+
+static void
+i8250kick(Uart* uart)
+{
+	int i;
+	Ctlr *ctlr;
+
+	if(/* uart->cts == 0 || */ uart->blocked)
+		return;
+
+	if(!normalprint) {			/* early */
+		if (uart->op < uart->oe)
+			emptyoutstage(uart, uart->oe - uart->op);
+		while ((i = uartstageoutput(uart)) > 0)
+			emptyoutstage(uart, i);
+		return;
+	}
+
+	/* nothing more to send? then disable xmit intr */
+	ctlr = uart->regs;
+	if (uart->op >= uart->oe && qlen(uart->oq) == 0 &&
+	    csr8r(ctlr, Lsr) & Temt) {
+		ctlr->sticky[Ier] &= ~Ethre;
+		csr8w(ctlr, Ier, 0);
+		return;
+	}
+
+	/*
+	 *  128 here is an arbitrary limit to make sure
+	 *  we don't stay in this loop too long.  If the
+	 *  chip's output queue is longer than 128, too
+	 *  bad -- presotto
+	 */
+	for(i = 0; i < 128; i++){
+		if(!(csr8r(ctlr, Lsr) & Thre))
+			break;
+		if(uart->op >= uart->oe && uartstageoutput(uart) == 0)
+			break;
+		csr8o(ctlr, Thr, *uart->op++);		/* start tx */
+		ctlr->sticky[Ier] |= Ethre;
+		csr8w(ctlr, Ier, 0);			/* intr when done */
+	}
+}
+
+void
+serialkick(void)
+{
+	uartkick(&i8250uart[CONSOLE]);
+}
+
+static void
+i8250interrupt(Ureg*, void* arg)
+{
+	Ctlr *ctlr;
+	Uart *uart;
+	int iir, lsr, old, r;
+
+	uart = arg;
+	ctlr = uart->regs;
+	for(iir = csr8r(ctlr, Iir); !(iir & Ip); iir = csr8r(ctlr, Iir)){
+		switch(iir & IirMASK){
+		case Ims:		/* Ms interrupt */
+			r = csr8r(ctlr, Msr);
+			if(r & Dcts){
+				ilock(&uart->tlock);
+				old = uart->cts;
+				uart->cts = r & Cts;
+				if(old == 0 && uart->cts)
+					uart->ctsbackoff = 2;
+				iunlock(&uart->tlock);
+			}
+		 	if(r & Ddsr){
+				old = r & Dsr;
+				if(uart->hup_dsr && uart->dsr && !old)
+					uart->dohup = 1;
+				uart->dsr = old;
+			}
+		 	if(r & Ddcd){
+				old = r & Dcd;
+				if(uart->hup_dcd && uart->dcd && !old)
+					uart->dohup = 1;
+				uart->dcd = old;
+			}
+			break;
+		case Ithre:		/* Thr Empty */
+			uartkick(uart);
+			break;
+		case Irda:		/* Received Data Available */
+		case Irls:		/* Receiver Line Status */
+		case Ictoi:		/* Character Time-out Indication */
+			/*
+			 * Consume any received data.
+			 * If the received byte came in with a break,
+			 * parity or framing error, throw it away;
+			 * overrun is an indication that something has
+			 * already been tossed.
+			 */
+			while((lsr = csr8r(ctlr, Lsr)) & Dr){
+				if(lsr & (FIFOerr|Oe))
+					uart->oerr++;
+				if(lsr & Pe)
+					uart->perr++;
+				if(lsr & Fe)
+					uart->ferr++;
+				r = csr8r(ctlr, Rbr);
+				if(!(lsr & (Bi|Fe|Pe)))
+					uartrecv(uart, r);
+			}
+			break;
+
+		default:
+			iprint("weird uart interrupt type %#2.2uX\n", iir);
+			break;
+		}
+	}
+}
+
+static void
+i8250disable(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Turn off DTR and RTS, disable interrupts and fifos.
+	 */
+	(*uart->phys->dtr)(uart, 0);
+	(*uart->phys->rts)(uart, 0);
+	(*uart->phys->fifo)(uart, 0);
+
+	ctlr = uart->regs;
+	ctlr->sticky[Ier] = 0;
+	csr8w(ctlr, Ier, 0);
+
+	if(ctlr->iena != 0){
+		if(irqdisable(ctlr->irq, i8250interrupt, uart, uart->name) == 0)
+			ctlr->iena = 0;
+	}
+}
+
+static void
+i8250enable(Uart* uart, int ie)
+{
+	int mode;
+	Ctlr *ctlr;
+
+	if (up == nil)
+		return;				/* too soon */
+
+	ctlr = uart->regs;
+
+	/* omap only: set uart/irda/cir mode to uart */
+	mode = csr8r(ctlr, Mdr);
+	csr8o(ctlr, Mdr, (mode & ~Modemask) | Modeuart);
+
+	ctlr->sticky[Lcr] = Wls8;		/* no parity */
+	csr8w(ctlr, Lcr, 0);
+
+	/*
+	 * Check if there is a FIFO.
+	 * Changing the FIFOena bit in Fcr flushes data
+	 * from both receive and transmit FIFOs; there's
+	 * no easy way to guarantee not losing data on
+	 * the receive side, but it's possible to wait until
+	 * the transmitter is really empty.
+	 * Also, reading the Iir outwith i8250interrupt()
+	 * can be dangerous, but this should only happen
+	 * once, before interrupts are enabled.
+	 */
+	ilock(ctlr);
+	if(!ctlr->checkfifo){
+		/*
+		 * Wait until the transmitter is really empty.
+		 */
+		while(!(csr8r(ctlr, Lsr) & Temt))
+			;
+		csr8w(ctlr, Fcr, FIFOena);
+		if(csr8r(ctlr, Iir) & Ifena)
+			ctlr->hasfifo = 1;
+		csr8w(ctlr, Fcr, 0);
+		ctlr->checkfifo = 1;
+	}
+	iunlock(ctlr);
+
+	/*
+	 * Enable interrupts and turn on DTR and RTS.
+	 * Be careful if this is called to set up a polled serial line
+	 * early on not to try to enable interrupts as interrupt-
+	 * -enabling mechanisms might not be set up yet.
+	 */
+	if(ie){
+		if(ctlr->iena == 0 && !ctlr->poll){
+			irqenable(ctlr->irq, i8250interrupt, uart, uart->name);
+			ctlr->iena = 1;
+		}
+		ctlr->sticky[Ier] = Erda;
+//		ctlr->sticky[Mcr] |= Ie;		/* not on omap */
+		ctlr->sticky[Mcr] = 0;
+	}
+	else{
+		ctlr->sticky[Ier] = 0;
+		ctlr->sticky[Mcr] = 0;
+	}
+	csr8w(ctlr, Ier, 0);
+	csr8w(ctlr, Mcr, 0);
+
+	(*uart->phys->dtr)(uart, 1);
+	(*uart->phys->rts)(uart, 1);
+
+	/*
+	 * During startup, the i8259 interrupt controller is reset.
+	 * This may result in a lost interrupt from the i8250 uart.
+	 * The i8250 thinks the interrupt is still outstanding and does not
+	 * generate any further interrupts. The workaround is to call the
+	 * interrupt handler to clear any pending interrupt events.
+	 * Note: this must be done after setting Ier.
+	 */
+	if(ie)
+		i8250interrupt(nil, uart);
+}
+
+static Uart*
+i8250pnp(void)
+{
+	return i8250uart;
+}
+
+static int
+i8250getc(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	while(!(csr8r(ctlr, Lsr) & Dr))
+		delay(1);
+	return csr8r(ctlr, Rbr);
+}
+
+static void
+i8250putc(Uart* uart, int c)
+{
+	int i;
+	Ctlr *ctlr;
+
+	if (!normalprint) {		/* too early; use brute force */
+		int s = splhi();
+
+		while (!(((ulong *)PHYSCONS)[Lsr] & Thre))
+			;
+		((ulong *)PHYSCONS)[Thr] = c;
+		coherence();
+		splx(s);
+		return;
+	}
+
+	ctlr = uart->regs;
+	for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+		delay(1);
+	csr8o(ctlr, Thr, (uchar)c);
+	for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+		delay(1);
+}
+
+void
+serialputc(int c)
+{
+	i8250putc(&i8250uart[CONSOLE], c);
+}
+
+void
+serialputs(char* s, int n)
+{
+	_uartputs(s, n);
+}
+
+#ifdef notdef
+static void
+i8250poll(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * If PhysUart has a non-nil .poll member, this
+	 * routine will be called from the uartclock timer.
+	 * If the Ctlr .poll member is non-zero, when the
+	 * Uart is enabled interrupts will not be enabled
+	 * and the result is polled input and output.
+	 * Not very useful here, but ports to new hardware
+	 * or simulators can use this to get serial I/O
+	 * without setting up the interrupt mechanism.
+	 */
+	ctlr = uart->regs;
+	if(ctlr->iena || !ctlr->poll)
+		return;
+	i8250interrupt(nil, uart);
+}
+#endif
+
+PhysUart i8250physuart = {
+	.name		= "i8250",
+	.pnp		= i8250pnp,
+	.enable		= i8250enable,
+	.disable	= i8250disable,
+	.kick		= i8250kick,
+	.dobreak	= i8250break,
+	.baud		= i8250baud,
+	.bits		= i8250bits,
+	.stop		= i8250stop,
+	.parity		= i8250parity,
+	.modemctl	= i8250modemctl,
+	.rts		= i8250rts,
+	.dtr		= i8250dtr,
+	.status		= i8250status,
+	.fifo		= i8250fifo,
+	.getc		= i8250getc,
+	.putc		= i8250putc,
+//	.poll		= i8250poll,		/* only in 9k, not 9 */
+};
+
+static void
+i8250dumpregs(Ctlr* ctlr)
+{
+	int dlm, dll;
+	int _uartprint(char*, ...);
+
+	csr8w(ctlr, Lcr, Dlab);
+	dlm = csr8r(ctlr, Dlm);
+	dll = csr8r(ctlr, Dll);
+	csr8w(ctlr, Lcr, 0);
+
+	_uartprint("dlm %#ux dll %#ux\n", dlm, dll);
+}
+
+Uart*	uartenable(Uart *p);
+
+/* must call this from a process's context */
+int
+i8250console(void)
+{
+	Uart *uart = &i8250uart[CONSOLE];
+
+	if (up == nil)
+		return -1;			/* too early */
+
+	if(uartenable(uart) != nil /* && uart->console */){
+		// iprint("i8250console: enabling console uart\n");
+		kbdq = uart->iq;
+		serialoq = uart->oq;
+		uart->putc = kbdcr2nl;
+		uart->opens++;
+		consuart = uart;
+	}
+	uartctl(uart, "b115200 l8 pn r1 s1 i1");
+	return 0;
+}
+
+void
+_uartputs(char* s, int n)
+{
+	char *e;
+
+	for(e = s+n; s < e; s++){
+		if(*s == '\n')
+			i8250putc(&i8250uart[CONSOLE], '\r');
+		i8250putc(&i8250uart[CONSOLE], *s);
+	}
+}
+
+int
+_uartprint(char* fmt, ...)
+{
+	int n;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+	_uartputs(buf, n);
+
+	return n;
+}

+ 104 - 0
sys/src/9/teg2/usbehci.h

@@ -0,0 +1,104 @@
+/* override default macros from ../port/usb.h */
+#undef	dprint
+#undef	ddprint
+#undef	deprint
+#undef	ddeprint
+#define dprint		if(ehcidebug)print
+#define ddprint		if(ehcidebug>1)print
+#define deprint		if(ehcidebug || ep->debug)print
+#define ddeprint	if(ehcidebug>1 || ep->debug>1)print
+
+typedef struct Ctlr Ctlr;
+typedef struct Eopio Eopio;
+typedef struct Isoio Isoio;
+typedef struct Poll Poll;
+typedef struct Qh Qh;
+typedef struct Qtree Qtree;
+
+#pragma incomplete Ctlr;
+#pragma incomplete Eopio;
+#pragma incomplete Isoio;
+#pragma incomplete Poll;
+#pragma incomplete Qh;
+#pragma incomplete Qtree;
+
+struct Poll
+{
+	Lock;
+	Rendez;
+	int	must;
+	int	does;
+};
+
+struct Ctlr
+{
+	Rendez;			/* for waiting to async advance doorbell */
+	Lock;			/* for ilock. qh lists and basic ctlr I/O */
+	QLock	portlck;	/* for port resets/enable... (and doorbell) */
+	int	active;		/* in use or not */
+	Ecapio*	capio;		/* Capability i/o regs */
+	Eopio*	opio;		/* Operational i/o regs */
+
+	int	nframes;	/* 1024, 512, or 256 frames in the list */
+	ulong*	frames;		/* periodic frame list (hw) */
+	Qh*	qhs;		/* async Qh circular list for bulk/ctl */
+	Qtree*	tree;		/* tree of Qhs for the periodic list */
+	int	ntree;		/* number of dummy qhs in tree */
+	Qh*	intrqhs;	/* list of (not dummy) qhs in tree  */
+	Isoio*	iso;		/* list of active Iso I/O */
+	ulong	load;
+	ulong	isoload;
+	int	nintr;		/* number of interrupts attended */
+	int	ntdintr;	/* number of intrs. with something to do */
+	int	nqhintr;	/* number of async td intrs. */
+	int	nisointr;	/* number of periodic td intrs. */
+	int	nreqs;
+	Poll	poll;
+};
+
+/*
+ * Operational registers (hw)
+ */
+struct Eopio
+{
+	ulong	cmd;		/* 00 command */
+	ulong	sts;		/* 04 status */
+	ulong	intr;		/* 08 interrupt enable */
+	ulong	frno;		/* 0c frame index */
+	ulong	seg;		/* 10 bits 63:32 of EHCI datastructs (unused) */
+	ulong	frbase;		/* 14 frame list base addr, 4096-byte boundary */
+	ulong	link;		/* 18 link for async list */
+	uchar	d2c[0x40-0x1c];	/* 1c dummy */
+	ulong	config;		/* 40 1: all ports default-routed to this HC */
+	ulong	portsc[3];	/* 44 Port status and control, one per port */
+
+	/* defined for omap35 ehci at least */
+	uchar	_pad0[0x80 - 0x50];
+	ulong	insn[6];	/* implementation-specific */
+};
+
+typedef struct Uhh Uhh;
+struct Uhh {
+	ulong	revision;	/* ro */
+	uchar	_pad0[0x10-0x4];
+	ulong	sysconfig;
+	ulong	sysstatus;	/* ro */
+
+	uchar	_pad1[0x40-0x18];
+	ulong	hostconfig;
+	ulong	debug_csr;
+};
+
+enum {
+	/* hostconfig bits */
+	P1ulpi_bypass = 1<<0,	/* utmi if set; else ulpi */
+};
+
+extern Ecapio *ehcidebugcapio;
+extern int ehcidebugport;
+
+extern int ehcidebug;
+
+void	ehcilinkage(Hci *hp);
+void	ehcimeminit(Ctlr *ctlr);
+void	ehcirun(Ctlr *ctlr, int on);

+ 51 - 0
sys/src/9/teg2/v7-arch.c

@@ -0,0 +1,51 @@
+/*
+ * arm arch v7 routines other than cache-related ones.
+ *
+ * calling this arch-v7.c would confuse the mk scripts,
+ * to which a filename arch*.c is magic.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "arm.h"
+
+/*
+ * these routines should be cheap enough that there will
+ * be no hesitation to use them.
+ *
+ * once 5c in-lines vlong ops, just use the vlong versions.
+ */
+
+/* see Hacker's Delight if this isn't obvious */
+#define ISPOW2(i) (((i) & ((i) - 1)) == 0)
+
+int
+ispow2(uvlong uvl)
+{
+	/* see Hacker's Delight if this isn't obvious */
+	return ISPOW2(uvl);
+}
+
+static int
+isulpow2(ulong ul)				/* temporary speed hack */
+{
+	return ISPOW2(ul);
+}
+
+/*
+ * return exponent of smallest power of 2 ≥ n
+ */
+int
+log2(ulong n)
+{
+	int i;
+
+	i = BI2BY*BY2WD - 1 - clz(n);
+	if (n == 0 || !ISPOW2(n))
+		i++;
+	return i;
+}

+ 489 - 0
sys/src/9/teg2/vfp3.c

@@ -0,0 +1,489 @@
+/*
+ * VFPv3 floating point unit
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "ureg.h"
+#include "arm.h"
+
+/* fp control regs.  most are read-only */
+enum {
+	Fpsid =	0,
+	Fpscr =	1,			/* rw */
+	Mvfr1 =	6,
+	Mvfr0 =	7,
+	Fpexc =	8,			/* rw */
+	Fpinst =9,			/* optional, for exceptions */
+	Fpinst2 =10,
+};
+enum {
+	/* Fpexc bits */
+	Fpex =		1u << 31,
+	Fpenabled =	1 << 30,
+	Fpdex =		1 << 29,	/* defined synch exception */
+//	Fp2v =		1 << 28,	/* Fpinst2 reg is valid */
+//	Fpvv =		1 << 27,	/* if Fpdex, vecitr is valid */
+//	Fptfv = 	1 << 26,	/* trapped fault is valid */
+//	Fpvecitr =	MASK(3) << 8,
+	/* FSR bits appear here */
+	Fpmbc =		Fpdex,		/* bits exception handler must clear */
+
+	/* Fpscr bits; see u.h for more */
+	Stride =	MASK(2) << 20,
+	Len =		MASK(3) << 16,
+	/* trap exception enables (not allowed in vfp3) */
+	FPIDNRM =	1 << 15,	/* input denormal */
+	Alltraps = FPIDNRM | FPINEX | FPUNFL | FPOVFL | FPZDIV | FPINVAL,
+	/* pending exceptions */
+	FPAIDNRM =	1 << 7,		/* input denormal */
+#define Allexc (FPAIDNRM | FPAINEX | FPAUNFL | FPAOVFL | FPAZDIV | FPAINVAL)
+};
+enum {
+	/* CpCPaccess bits */
+	Cpaccnosimd =	1u << 31,
+	Cpaccd16 =	1 << 30,
+};
+
+static char *
+subarch(int impl, uint sa)
+{
+	static char *armarchs[] = {
+		"VFPv1 (pre-armv7)",
+		"VFPv2 (pre-armv7)",
+		"VFPv3+ with common VFP subarch v2",
+		"VFPv3+ with null subarch",
+		"VFPv3+ with common VFP subarch v3",
+	};
+
+	if (impl != 'A' || sa >= nelem(armarchs))
+		return "GOK";
+	else
+		return armarchs[sa];
+}
+
+static char *
+implement(uchar impl)
+{
+	if (impl == 'A')
+		return "arm";
+	else
+		return "unknown";
+}
+
+static int
+havefp(void)
+{
+	int gotfp;
+	ulong acc;
+
+	if (m->havefpvalid)
+		return m->havefp;
+
+	m->havefp = 0;
+	gotfp = 1 << CpFP | 1 << CpDFP;
+	cpwrsc(0, CpCONTROL, 0, CpCPaccess, MASK(28));
+	acc = cprdsc(0, CpCONTROL, 0, CpCPaccess);
+	if ((acc & (MASK(2) << (2*CpFP))) == 0) {
+		gotfp &= ~(1 << CpFP);
+		print("fpon: no single FP coprocessor\n");
+	}
+	if ((acc & (MASK(2) << (2*CpDFP))) == 0) {
+		gotfp &= ~(1 << CpDFP);
+		print("fpon: no double FP coprocessor\n");
+	}
+	if (!gotfp) {
+		print("fpon: no FP coprocessors\n");
+		m->havefpvalid = 1;
+		return 0;
+	}
+	if (acc & Cpaccd16)
+		m->fpnregs = 16;
+	else
+		m->fpnregs = 32;
+	if (m->machno == 0)
+		print("fp: %d registers,%s simd\n", m->fpnregs,
+			(acc & Cpaccnosimd? " no": ""));
+	m->havefp = 1;
+	m->havefpvalid = 1;
+	return 1;
+}
+
+/*
+ * these can be called to turn the fpu on or off for user procs,
+ * not just at system start up or shutdown.
+ */
+
+void
+fpoff(void)
+{
+	if (m->fpon) {
+		fpwr(Fpexc, 0);
+		m->fpon = 0;
+	}
+}
+
+void
+fpononly(void)
+{
+	if (!m->fpon && havefp()) {
+		/* enable fp.  must be first operation on the FPUs. */
+		fpwr(Fpexc, Fpenabled);
+		m->fpon = 1;
+	}
+}
+
+static void
+fpcfg(void)
+{
+	int impl;
+	ulong sid;
+	static int printed;
+
+	/* clear pending exceptions; no traps in vfp3; all v7 ops are scalar */
+	m->fpscr = FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps;
+	fpwr(Fpscr, m->fpscr);
+	m->fpconfiged = 1;
+
+	if (printed)
+		return;
+	sid = fprd(Fpsid);
+	impl = sid >> 24;
+	print("fp: %s arch %s; r%ld\n", implement(impl),
+		subarch(impl, (sid >> 16) & MASK(7)), sid & MASK(4));
+	printed = 1;
+}
+
+void
+fpinit(void)
+{
+	if (havefp()) {
+		fpononly();
+		fpcfg();
+	}
+}
+
+void
+fpon(void)
+{
+	if (havefp()) {
+	 	fpononly();
+		if (m->fpconfiged)
+			fpwr(Fpscr, m->fpscr);
+		else
+			fpcfg();	/* 1st time on this fpu; configure it */
+	}
+}
+
+void
+fpclear(void)
+{
+//	ulong scr;
+
+	fpon();
+//	scr = fprd(Fpscr);
+//	m->fpscr = scr & ~Allexc;
+//	fpwr(Fpscr, m->fpscr);
+
+	fpwr(Fpexc, fprd(Fpexc) & ~Fpmbc);
+}
+
+
+/*
+ * Called when a note is about to be delivered to a
+ * user process, usually at the end of a system call.
+ * Note handlers are not allowed to use the FPU so
+ * the state is marked (after saving if necessary) and
+ * checked in the Device Not Available handler.
+ */
+void
+fpunotify(Ureg*)
+{
+	if(up->fpstate == FPactive){
+		fpsave(&up->fpsave);
+		up->fpstate = FPinactive;
+	}
+	up->fpstate |= FPillegal;
+}
+
+/*
+ * Called from sysnoted() via the machine-dependent
+ * noted() routine.
+ * Clear the flag set above in fpunotify().
+ */
+void
+fpunoted(void)
+{
+	up->fpstate &= ~FPillegal;
+}
+
+/*
+ * Called early in the non-interruptible path of
+ * sysrfork() via the machine-dependent syscall() routine.
+ * Save the state so that it can be easily copied
+ * to the child process later.
+ */
+void
+fpusysrfork(Ureg*)
+{
+	if(up->fpstate == FPactive){
+		fpsave(&up->fpsave);
+		up->fpstate = FPinactive;
+	}
+}
+
+/*
+ * Called later in sysrfork() via the machine-dependent
+ * sysrforkchild() routine.
+ * Copy the parent FPU state to the child.
+ */
+void
+fpusysrforkchild(Proc *p, Ureg *, Proc *up)
+{
+	/* don't penalize the child, it hasn't done FP in a note handler. */
+	p->fpstate = up->fpstate & ~FPillegal;
+}
+
+/* should only be called if p->fpstate == FPactive */
+void
+fpsave(FPsave *fps)
+{
+	int n;
+
+	fpon();
+	fps->control = fps->status = fprd(Fpscr);
+	assert(m->fpnregs);
+	for (n = 0; n < m->fpnregs; n++)
+		fpsavereg(n, (uvlong *)fps->regs[n]);
+	fpoff();
+}
+
+/*
+ * Called from sched() and sleep() via the machine-dependent
+ * procsave() routine.
+ * About to go in to the scheduler.
+ * If the process wasn't using the FPU
+ * there's nothing to do.
+ */
+void
+fpuprocsave(Proc *p)
+{
+	if(p->fpstate == FPactive){
+		if(p->state == Moribund)
+			fpclear();
+		else{
+			/*
+			 * Fpsave() stores without handling pending
+			 * unmasked exeptions. Postnote() can't be called
+			 * here as sleep() already has up->rlock, so
+			 * the handling of pending exceptions is delayed
+			 * until the process runs again and generates an
+			 * emulation fault to activate the FPU.
+			 */
+			fpsave(&p->fpsave);
+		}
+		p->fpstate = FPinactive;
+	}
+}
+
+/*
+ * The process has been rescheduled and is about to run.
+ * Nothing to do here right now. If the process tries to use
+ * the FPU again it will cause a Device Not Available
+ * exception and the state will then be restored.
+ */
+void
+fpuprocrestore(Proc *p)
+{
+	int n;
+
+	if (p->fpstate == FPactive) {
+		fpon();
+		fpwr(Fpscr, p->fpsave.control);
+		m->fpscr = fprd(Fpscr);
+		assert(m->fpnregs);
+		for (n = 0; n < m->fpnregs; n++)
+			fprestreg(n, *(uvlong *)p->fpsave.regs[n]);
+	}
+}
+
+/*
+ * Disable the FPU.
+ * Called from sysexec() via sysprocsetup() to
+ * set the FPU for the new process.
+ */
+void
+fpusysprocsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	fpoff();
+}
+
+static void
+mathnote(void)
+{
+	ulong status;
+	char *msg, note[ERRMAX];
+
+	status = up->fpsave.status;
+
+	/*
+	 * Some attention should probably be paid here to the
+	 * exception masks and error summary.
+	 */
+	if (status & FPAINEX)
+		msg = "inexact";
+	else if (status & FPAOVFL)
+		msg = "overflow";
+	else if (status & FPAUNFL)
+		msg = "underflow";
+	else if (status & FPAZDIV)
+		msg = "divide by zero";
+	else if (status & FPAINVAL)
+		msg = "bad operation";
+	else
+		msg = "spurious";
+	snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=%#lux",
+		msg, up->fpsave.pc, status);
+	postnote(up, 1, note, NDebug);
+}
+
+static void
+mathemu(Ureg *)
+{
+	if (!(fprd(Fpexc) & (Fpex|Fpdex)))
+		iprint("mathemu: not an FP exception but an unknown FP opcode\n");
+	switch(up->fpstate){
+	case FPinit:
+		fpinit();
+		up->fpstate = FPactive;
+		break;
+	case FPinactive:
+		/*
+		 * Before restoring the state, check for any pending
+		 * exceptions.  There's no way to restore the state without
+		 * generating an unmasked exception.
+		 * More attention should probably be paid here to the
+		 * exception masks and error summary.
+		 */
+		if(up->fpsave.status & (FPAINEX|FPAUNFL|FPAOVFL|FPAZDIV|FPAINVAL)){
+			mathnote();
+			break;
+		}
+		fpuprocrestore(up);
+		up->fpstate = FPactive;
+		break;
+	case FPactive:
+		error("illegal instruction: bad vfp fpu opcode");
+		break;
+	}
+	fpclear();
+}
+
+void
+fpstuck(uintptr pc)
+{
+	if (m->fppc == pc && m->fppid == up->pid) {
+		m->fpcnt++;
+		if (m->fpcnt > 4)
+			panic("fpuemu: cpu%d stuck at pid %ld %s pc %#p "
+				"instr %#8.8lux", m->machno, up->pid, up->text,
+				pc, *(ulong *)pc);
+	} else {
+		m->fppid = up->pid;
+		m->fppc = pc;
+		m->fpcnt = 0;
+	}
+}
+
+enum {
+	N = 1<<31,
+	Z = 1<<30,
+	C = 1<<29,
+	V = 1<<28,
+	REGPC = 15,
+};
+
+static int
+condok(int cc, int c)
+{
+	switch(c){
+	case 0:	/* Z set */
+		return cc&Z;
+	case 1:	/* Z clear */
+		return (cc&Z) == 0;
+	case 2:	/* C set */
+		return cc&C;
+	case 3:	/* C clear */
+		return (cc&C) == 0;
+	case 4:	/* N set */
+		return cc&N;
+	case 5:	/* N clear */
+		return (cc&N) == 0;
+	case 6:	/* V set */
+		return cc&V;
+	case 7:	/* V clear */
+		return (cc&V) == 0;
+	case 8:	/* C set and Z clear */
+		return cc&C && (cc&Z) == 0;
+	case 9:	/* C clear or Z set */
+		return (cc&C) == 0 || cc&Z;
+	case 10:	/* N set and V set, or N clear and V clear */
+		return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+	case 11:	/* N set and V clear, or N clear and V set */
+		return (cc&(N|V))==N || (cc&(N|V))==V;
+	case 12:	/* Z clear, and either N set and V set or N clear and V clear */
+		return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+	case 13:	/* Z set, or N set and V clear or N clear and V set */
+		return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+	case 14:	/* always */
+		return 1;
+	case 15:	/* never (reserved) */
+		return 0;
+	}
+	return 0;	/* not reached */
+}
+
+/* only called to deal with user-mode instruction faults */
+int
+fpuemu(Ureg* ureg)
+{
+	int s, nfp;
+	int cop, op;
+	uintptr pc;
+
+	s = spllo();
+	if(waserror()){
+		splx(s);
+		postnote(up, 1, up->errstr, NDebug);
+		return 1;
+	}
+
+	if(up->fpstate & FPillegal)
+		error("floating point in note handler");
+
+	nfp = 0;
+	pc = ureg->pc;
+	validaddr(pc, 4, 0);
+	if(!condok(ureg->psr, *(ulong*)pc >> 28))
+		iprint("fpuemu: conditional instr shouldn't have got here\n");
+	op  = (*(ulong *)pc >> 24) & MASK(4);
+	cop = (*(ulong *)pc >>  8) & MASK(4);
+	fpstuck(pc);			/* debugging; could move down 1 line */
+	if (ISFPAOP(cop, op)) {		/* old arm 7500 fpa opcode? */
+		iprint("fpuemu: fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc);
+		error("illegal instruction: old arm 7500 fpa opcode");
+//		nfp = fpiarm(ureg);	/* advances pc past emulated instr(s) */
+//		if (nfp > 1)		/* could adjust this threshold */
+//			m->fppc = m->fpcnt = 0;
+	} else 	if (ISVFPOP(cop, op)) {	/* if vfp, fpu must be off */
+		mathemu(ureg);		/* enable fpu & retry */
+		nfp = 1;
+	}
+	splx(s);
+
+	poperror();
+	return nfp;
+}

+ 60 - 0
sys/src/9/teg2/words

@@ -0,0 +1,60 @@
+this is a plan 9 port to the Trimslice with tegra2 soc: dual-core,
+dual-issue 1GHz Cortex-A9 system.
+
+dram is 1GB at 0.
+linux believes that u-boot runs in the bottom 4MB.
+the l2 cache is a non-architectural bag nailed on the side.
+mp arm systems have a generic interrupt controller; this one is gic v1(!).
+vfp 3 floating-point is present.  5l doesn't yet generate those instructions.
+
+section numbers (§) are in the tegra 2 tech. ref. man.
+for a minimal cpu server, need these devices to work:
+	clock signals §5 (leave to u-boot),
+	pad mux + gpio crap §8, §11 and §18 (leave to u-boot),
+☑	1 cpu §13,
+☑	uart (16[45]50) §22,
+☑	gic (gic.v1.pdf),
+☑	clock §6—7,
+☑	ether8169 via pcie §31.
+then add these:
+☑	2nd cpu (cortex.a9.mpcore.pdf),
+☑	l2 cache (l2cache.pl310.pdf, errata),
+☹	fpu (cortex.a9.fp.pdf): kernel done, 5l isn't,
+☑	user profiling,
+	kprof,
+	in-line 64-bit arithmetic,
+eventually might want:
+	usb (e.g., for sata) §26,
+	nor flash §17,
+	video §29,
+and the really horrid ones:
+	nand flash §16,
+	mmc §25.
+
+physical memory map
+
+0		1GB	ram
+
+40000000 	256K	iram (audio/video memory)
+50000000		cortex-a9 cpu regs, periphbase, intr distrib, memsel,
+			l2 cache
+54000000		graphics regs
+58000000		gart (graphics window)
+60000000	256MB	ppsb bus dev regs, including semas, intr ctlr, dma,
+			arm7 cache, gpio, except. vects
+70000000	256MB	apc bus regs, including uarts, nand, nor, spi, rtc
+
+80000000	1GB	ahb extern mem, pcie for cpu only
+90000000-97ffffff	pcie 0 mem(?)
+a0000000-a7ffffff	pcie 0 prefetch mem, includes rtl8111dl ether(?)
+a0020000		ether region 4
+a0024000		ether region 2
+
+c0000000	256MB	ahb bus			virtual b0000000
+c3000000-c80007ff 81MB	ide, usb, sata, mmc
+d0000000	256MB	nor flash		virtual 40000000
+f000f000	4K	mmu tlb
+fff00000	48K	irom boot code
+ffff0000	64K	high vectors
+
+use 0xc0000000 as KZERO.

+ 0 - 1
sys/src/cmd/ip/ipconfig/ipconfig.h

@@ -92,7 +92,6 @@ extern int	nip;
 extern int	plan9;
 extern int	plan9;
 extern int	dupl_disc;
 extern int	dupl_disc;
 
 
-extern Conf	conf;
 extern int	myifc;
 extern int	myifc;
 extern char	*vs;
 extern char	*vs;