Browse Source

Plan 9 from Bell Labs 2010-04-23

David du Colombier 9 years ago
parent
commit
562b912d6b

+ 2 - 2
sys/man/2/pool

@@ -1,6 +1,6 @@
 .TH POOL 2
 .SH NAME
-poolalloc, poolallocalignspan, poolfree, poolmsize, poolrealloc, poolcompact, poolcheck, poolblockcheck,
+poolalloc, poolallocalign, poolfree, poolmsize, poolrealloc, poolcompact, poolcheck, poolblockcheck,
 pooldump \- general memory management routines
 .SH SYNOPSIS
 .B #include <u.h>
@@ -13,7 +13,7 @@ pooldump \- general memory management routines
 void*	poolalloc(Pool* pool, ulong size)
 .PP
 .B
-void*	poolallocalignspan(Pool *pool, ulong size, 
+void*	poolallocalign(Pool *pool, ulong size, 
 .br
 .B
                 ulong align, long offset, ulong span)

+ 1 - 1
sys/src/9/kw/archkw.c

@@ -371,7 +371,7 @@ void
 archconfinit(void)
 {
 	m->cpuhz = 1200*1000*1000;
-	m->delayloop = m->cpuhz/6000;  /* only an initial estimate */
+	m->delayloop = m->cpuhz/2000; 	 /* initial estimate */
 	fixaddrmap();
 //	praddrmap();
 	prcachecfg();

+ 0 - 7
sys/src/9/kw/devether.c

@@ -461,13 +461,6 @@ ethershutdown(void)
 	}
 }
 
-
-/* called from clock.c once per second */
-void
-etherclock(void)
-{
-}
-
 #define POLY 0xedb88320
 
 /* really slow 32 bit crc for ethers */

+ 1 - 0
sys/src/9/kw/fns.h

@@ -120,6 +120,7 @@ uintptr mmukmap(uintptr, uintptr, usize);
 uintptr mmukunmap(uintptr, uintptr, usize);
 extern void* mmuuncache(void*, usize);
 extern void* ucalloc(usize);
+extern void* ucallocalign(usize size, int align, int span);
 extern void ucfree(void*);
 
 /*

+ 2 - 2
sys/src/9/kw/fpiarm.c

@@ -364,7 +364,7 @@ fpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
 				break;
 			}
 			if(fpemudebug)
-				print("CMPF	%c%d,F%ld =%#ux\n",
+				print("CMPF	%c%d,F%ld =%#lux\n",
 					tag, rn, op&7, ur->psr>>28);
 			return;
 		}
@@ -555,7 +555,7 @@ fpiarm(Ureg *ur)
 		validaddr(ur->pc, 4, 0);
 		op = *(ulong*)(ur->pc);
 		if(fpemudebug)
-			print("%#ux: %#8.8lux ", ur->pc, op);
+			print("%#lux: %#8.8lux ", ur->pc, op);
 		o = (op>>24) & 0xF;
 		if(condok(ur->psr, op>>28)){
 			for(i = 0; specialopc[i].f; i++)

+ 2 - 16
sys/src/9/kw/l.s

@@ -322,30 +322,22 @@ TEXT l1cachesoff(SB), 1, $-4
  * cache* functions affect only the L1 caches, which are VIVT.
  */
 
-#define MAXFLUSH 320000
-
 TEXT cachedwb(SB), 1, $-4			/* D writeback */
 	MOVW	CPSR, R3			/* splhi */
 	ORR	$(PsrDirq), R3, R1
 	MOVW	R1, CPSR
 	BARRIERS
+
 	/* keep writing back dirty cache lines until no more exist */
-	MOVW	$MAXFLUSH, R1
 _dwb:
-	SUB.S	$1, R1
-	BEQ	stuck
 	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwb), CpCACHEtest
 	BNE	_dwb
 	/* drain L1 write buffer, also drains L2 eviction buffer on sheeva */
 	BARRIERS
-dwbret:
+
 	MOVW	R3, CPSR			/* splx */
 	BARRIERS
 	RET
-stuck:
-WAVE('?')
-WAVE('!')
-	B	dwbret
 
 TEXT cachedwbse(SB), 1, $-4			/* D writeback SE */
 	MOVW	R0, R2				/* first arg: address */
@@ -380,10 +372,7 @@ TEXT cachedwbinv(SB), 1, $-4			/* D writeback+invalidate */
 	BARRIERS
 
 	/* keep writing back dirty cache lines until no more exist */
-	MOVW	$MAXFLUSH, R1
 _dwbinv:
-	SUB.S	$1, R1
-	BEQ	stuck
 	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwbi), CpCACHEtest
 	BNE	_dwbinv
 	/* drain L1 write buffer, also drains L2 eviction buffer on sheeva */
@@ -452,10 +441,7 @@ TEXT cacheuwbinv(SB), 1, $-4			/* D+I writeback+invalidate */
 	BARRIERS
 
 	/* keep writing back dirty cache lines until no more exist */
-	MOVW	$MAXFLUSH, R1
 _uwbinv:					/* D writeback+invalidate */
-	SUB.S	$1, R1
-	BEQ	stuck
 	MRC	CpSC, 0, PC, C(CpCACHE), C(CpCACHEwbi), CpCACHEtest
 	BNE	_uwbinv
 	/* drain L1 write buffer, also drains L2 eviction buffer on sheeva */

+ 14 - 12
sys/src/9/kw/mkfile

@@ -60,13 +60,15 @@ OBJ=\
 	$DEVS\
 	$PORT\
 
+# HFILES=
+
 LIB=\
 	/$objtype/lib/libmemlayer.a\
 	/$objtype/lib/libmemdraw.a\
 	/$objtype/lib/libdraw.a\
 	/$objtype/lib/libip.a\
-	/$objtype/lib/libc.a\
 	/$objtype/lib/libsec.a\
+	/$objtype/lib/libc.a\
 
 9:V: $p$CONF s$p$CONF
 
@@ -100,17 +102,17 @@ install:V: /$objtype/$p$CONF
 
 # CFLAGS= -I. -I../port $CFLAGS	# hack to compile private sysproc.c (e.g.)
 
-clock.$O:	/$objtype/include/ureg.h
-devether.$O:	/$objtype/include/ureg.h
-fpiarm.$O:	/$objtype/include/ureg.h
-main.$O:	/$objtype/include/ureg.h errstr.h init.h reboot.h
-mmu.$O:		/$objtype/include/ureg.h arm.h
-trap$O:		/$objtype/include/ureg.h
-
-devether.$0:	etherif.h ../port/netif.h
-etherkw.$0:	etherif.h ../port/netif.h
-
-l.$O lexception.$O lproc.$O: arm.s arm.h mem.h
+arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O sdscsi.$O syscall.$O \
+	trap.$O: /$objtype/include/ureg.h
+
+archkw.$O devether.$0 etherkw.$O ethermii.$O: \
+	etherif.h ethermii.h ../port/netif.h
+fpi.$O fpiarm.$O fpimem.$O: fpi.h
+l.$O lexception.$O lproc.$O mmu.$O: arm.s arm.h mem.h
+main.$O:	errstr.h init.h reboot.h
+mouse.$O:	screen.h
+devusb.$O:	usb.h
+usbehci.$O usbohci.$O usbuhci.$O: usb.h usbehci.h uncached.h
 
 init.h:D:	../port/initcode.c init9.s
 	$CC ../port/initcode.c

+ 0 - 2
sys/src/9/kw/openrd.words

@@ -18,6 +18,4 @@ run video as normal svga; get framebuffer addr from pci (implement pci).
 uart0 is the shared jtag/uart thing with the usb mini b connector.
 uart1 is the rs232/rs485 ports.
 
-is the u-boot environment in nand flash at 0xf80a0000?
-
 has non-ahci sata without going through pci

+ 5 - 5
sys/src/9/kw/plug

@@ -28,8 +28,7 @@ dev
 ##	kbin
 
 	uart
-# usb is temporarily excluded until we add cache ops to it, 16 apr 2010
-#	usb
+	usb
 
 link
 	etherkw		ethermii
@@ -37,7 +36,7 @@ link
 	ethermedium
 	loopbackmedium
 	netdevmedium
-#	usbehci
+	usbehci
 
 ip
 	tcp
@@ -50,10 +49,11 @@ ip
 misc
 	rdb
 	coproc
+	sdaoe		sdscsi
 	softfpu
 	syscall
 	uartkw
-	sdaoe		sdscsi
+	ucalloc
 ##	vgavesa
 
 port
@@ -67,5 +67,5 @@ bootdir
 	boot$CONF.out boot
 	/arm/bin/ip/ipconfig ipconfig
 	/arm/bin/auth/factotum factotum
-#	/arm/bin/usb/usbd
+	/arm/bin/usb/usbd
 	nvram

+ 11 - 4
sys/src/9/kw/plug.words

@@ -7,6 +7,7 @@ l1 I & D VIVT caches 16K each: 4-way, 128 sets, 32-byte lines
 	l1 D is write-through, l1 I is write-back
 unified l2 PIPT cache 256K: 4-way, 2048 sets, 32-byte lines
 	potentially 512K: 8-way
+
 apparently the mmu walks the page tables in dram and won't look in the
 l2 cache.  there is no hardware cache coherence, thus the l1 caches
 need to be flushed or invalidated when mmu mappings change, but l2
@@ -14,6 +15,16 @@ only needs to be flushed or invalidated around dma operations and page
 table changes, and only the affected dma buffers and descriptors or
 page table entries need to be flushed or invalidated in l2.
 
+we arrange that device registers are uncached.
+
+be aware that cache operations act on cache lines (of CACHELINESZ
+bytes) as atomic units, so if you invalidate 4 caches of a cache line,
+you invalidate the entire cache line, whether it's been written back
+(is clean) or not (is dirty).  mixed data structures with parts
+maintained by hardware and other parts by software are especially
+tricky.  we try to pad the initial hardware parts so that the software
+parts start in a new cache line.
+
 512MB of dram at physical address 0
 512MB of flash
 16550 uart for console
@@ -29,10 +40,6 @@ ___
 
 unfinished business:
 
-usb almost works.  we can see devices but not perform i/o to them.
-this may be due to lack of cache flushing and invalidation around dma
-operations.
-
 access to nand or spi flash would be handy for nvram and small
 fossils.  flash access isn't well documented.  inferno implements
 these software layers: ecc, translation (for wear-levelling and bad

+ 3 - 3
sys/src/9/kw/syscall.c

@@ -123,7 +123,7 @@ notify(Ureg* ureg)
 		l = strlen(n->msg);
 		if(l > ERRMAX-23)	/* " pc=0x0123456789abcdef\0" */
 			l = ERRMAX-23;
-		snprint(n->msg + l, sizeof n->msg - l, " pc=%#ux", ureg->pc);
+		snprint(n->msg + l, sizeof n->msg - l, " pc=%#lux", ureg->pc);
 	}
 
 	if(n->flag != NUser && (up->notified || up->notify == 0)){
@@ -188,7 +188,7 @@ syscall(Ureg* ureg)
 	int i, scallnr;
 
 	if(!userureg(ureg))
-		panic("syscall: from kernel: pc %#ux r14 %#ux psr %#ux",
+		panic("syscall: from kernel: pc %#lux r14 %#lux psr %#lux",
 			ureg->pc, ureg->r14, ureg->psr);
 
 	cycles(&up->kentry);
@@ -214,7 +214,7 @@ syscall(Ureg* ureg)
 	ret = -1;
 	if(!waserror()){
 		if(scallnr >= nsyscall){
-			pprint("bad sys call number %d pc %#ux\n",
+			pprint("bad sys call number %d pc %#lux\n",
 				scallnr, ureg->pc);
 			postnote(up, 1, "sys: bad sys call", NDebug);
 			error(Ebadarg);

+ 17 - 17
sys/src/9/kw/trap.c

@@ -374,7 +374,7 @@ trap(Ureg *ureg)
 		rem = (char*)ureg - ((char*)m + sizeof(Mach));
 	if(rem < 256) {
 		dumpstack();
-		panic("trap %d bytes remaining, up %#p ureg %#p at pc %#ux",
+		panic("trap %d bytes remaining, up %#p ureg %#p at pc %#lux",
 			rem, up, ureg, ureg->pc);
 	}
 
@@ -392,7 +392,7 @@ trap(Ureg *ureg)
 	m->inclockintr = 0;
 	switch(ureg->type) {
 	default:
-		panic("unknown trap %d", ureg->type);
+		panic("unknown trap %ld", ureg->type);
 		break;
 	case PsrMirq:
 		ldrexvalid = 0;
@@ -417,20 +417,20 @@ trap(Ureg *ureg)
 		}
 		switch(fsr){
 		case 0x0:
-			panic("vector exception at %#ux", ureg->pc);
+			panic("vector exception at %#lux", ureg->pc);
 			break;
 		case 0x1:
 		case 0x3:
 			if(user){
 				snprint(buf, sizeof buf,
-					"sys: alignment: pc %#ux va %#p\n",
+					"sys: alignment: pc %#lux va %#p\n",
 					ureg->pc, va);
 				postnote(up, 1, buf, NDebug);
 			} else
-				panic("kernel alignment: pc %#ux va %#p", ureg->pc, va);
+				panic("kernel alignment: pc %#lux va %#p", ureg->pc, va);
 			break;
 		case 0x2:
-			panic("terminal exception at %#ux", ureg->pc);
+			panic("terminal exception at %#lux", ureg->pc);
 			break;
 		case 0x4:
 		case 0x6:
@@ -438,7 +438,7 @@ trap(Ureg *ureg)
 		case 0xa:
 		case 0xc:
 		case 0xe:
-			panic("external abort %#ux pc %#ux addr %#px",
+			panic("external abort %#ux pc %#lux addr %#px",
 				fsr, ureg->pc, va);
 			break;
 		case 0x5:		/* translation fault, no section entry */
@@ -450,11 +450,11 @@ trap(Ureg *ureg)
 			/* domain fault, accessing something we shouldn't */
 			if(user){
 				snprint(buf, sizeof buf,
-					"sys: access violation: pc %#ux va %#p\n",
+					"sys: access violation: pc %#lux va %#p\n",
 					ureg->pc, va);
 				postnote(up, 1, buf, NDebug);
 			} else
-				panic("kernel access violation: pc %#ux va %#p",
+				panic("kernel access violation: pc %#lux va %#p",
 					ureg->pc, va);
 			break;
 		case 0xd:
@@ -473,12 +473,12 @@ trap(Ureg *ureg)
 			if(rv == 0){
 				ldrexvalid = 0;
 				snprint(buf, sizeof buf,
-					"undefined instruction: pc %#ux",
+					"undefined instruction: pc %#lux",
 					ureg->pc);
 				postnote(up, 1, buf, NDebug);
 			}
 		}else{
-			iprint("undefined instruction: pc %#ux inst %#ux\n",
+			iprint("undefined instruction: pc %#lux inst %#ux\n",
 				ureg->pc, ((u32int*)ureg->pc)[-2]);
 			panic("undefined instruction");
 		}
@@ -532,7 +532,7 @@ dumplongs(char *msg, ulong *v, int n)
 static void
 dumpstackwithureg(Ureg *ureg)
 {
-	iprint("ktrace /kernel/path %#.8ux %#.8ux %#.8ux # pc, sp, link\n",
+	iprint("ktrace /kernel/path %#.8lux %#.8lux %#.8lux # pc, sp, link\n",
 		ureg->pc, ureg->sp, ureg->r14);
 	delay(2000);
 #ifdef AMBITIOUS
@@ -606,16 +606,16 @@ dumpregs(Ureg* ureg)
 	if(ureg != nil && (ureg->psr & PsrMask) != PsrMsvc)
 		iprint(" in %s", trapname(ureg->psr));
 	iprint("\n");
-	iprint("psr %8.8ux type %2.2ux pc %8.8ux link %8.8ux\n",
+	iprint("psr %8.8lux type %2.2lux pc %8.8lux link %8.8lux\n",
 		ureg->psr, ureg->type, ureg->pc, ureg->link);
-	iprint("R14 %8.8ux R13 %8.8ux R12 %8.8ux R11 %8.8ux R10 %8.8ux\n",
+	iprint("R14 %8.8lux R13 %8.8lux R12 %8.8lux R11 %8.8lux R10 %8.8lux\n",
 		ureg->r14, ureg->r13, ureg->r12, ureg->r11, ureg->r10);
-	iprint("R9  %8.8ux R8  %8.8ux R7  %8.8ux R6  %8.8ux R5  %8.8ux\n",
+	iprint("R9  %8.8lux R8  %8.8lux R7  %8.8lux R6  %8.8lux R5  %8.8lux\n",
 		ureg->r9, ureg->r8, ureg->r7, ureg->r6, ureg->r5);
-	iprint("R4  %8.8ux R3  %8.8ux R2  %8.8ux R1  %8.8ux R0  %8.8ux\n",
+	iprint("R4  %8.8lux R3  %8.8lux R2  %8.8lux R1  %8.8lux R0  %8.8lux\n",
 		ureg->r4, ureg->r3, ureg->r2, ureg->r1, ureg->r0);
 	iprint("stack is at %#p\n", ureg);
-	iprint("pc %#ux link %#ux\n", ureg->pc, ureg->link);
+	iprint("pc %#lux link %#lux\n", ureg->pc, ureg->link);
 
 	if(up)
 		iprint("user stack: %#p-%#p\n", up->kstack, up->kstack+KSTACK-4);

+ 139 - 0
sys/src/9/kw/ucalloc.c

@@ -0,0 +1,139 @@
+/*
+ * allocate uncached memory
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include <pool.h>
+
+typedef struct Private Private;
+struct Private {
+	Lock;
+	char	msg[256];
+	char*	cur;
+};
+
+static Private ucprivate;
+
+static void
+ucpoolpanic(Pool* p, char* fmt, ...)
+{
+	va_list v;
+	Private *pv;
+	char msg[sizeof pv->msg];
+
+	pv = p->private;
+	va_start(v, fmt);
+	vseprint(pv->cur, &pv->msg[sizeof pv->msg], fmt, v);
+	va_end(v);
+	memmove(msg, pv->msg, sizeof msg);
+	iunlock(pv);
+	panic("%s", msg);
+}
+
+static void
+ucpoolprint(Pool* p, char* fmt, ...)
+{
+	va_list v;
+	Private *pv;
+
+	pv = p->private;
+	va_start(v, fmt);
+	pv->cur = vseprint(pv->cur, &pv->msg[sizeof pv->msg], fmt, v);
+	va_end(v);
+}
+
+static void
+ucpoolunlock(Pool* p)
+{
+	Private *pv;
+	char msg[sizeof pv->msg];
+
+	pv = p->private;
+	if(pv->cur == pv->msg){
+		iunlock(pv);
+		return;
+	}
+
+	memmove(msg, pv->msg, sizeof msg);
+	pv->cur = pv->msg;
+	iunlock(pv);
+
+	iprint("%.*s", sizeof pv->msg, msg);
+}
+
+static void
+ucpoollock(Pool* p)
+{
+	Private *pv;
+
+	pv = p->private;
+	ilock(pv);
+	pv->pc = getcallerpc(&p);
+	pv->cur = pv->msg;
+}
+
+static void*
+ucarena(usize size)
+{
+	void *uv, *v;
+
+	assert(size == 1*MiB);
+
+	mainmem->maxsize += 1*MiB;
+	if((v = mallocalign(1*MiB, 1*MiB, 0, 0)) == nil){
+		mainmem->maxsize -= 1*MiB;
+		return nil;
+	}
+	if((uv = mmuuncache(v, 1*MiB)) == nil){
+		free(v);
+		mainmem->maxsize -= 1*MiB;
+		return nil;
+	}
+
+	return uv;
+}
+
+static Pool ucpool = {
+	.name		= "Uncached",
+	.maxsize	= 4*MiB,
+	.minarena	= 1*MiB-32,
+	.quantum	= 32,
+	.alloc		= ucarena,
+	.merge		= nil,
+	.flags		= /*POOL_TOLERANCE|POOL_ANTAGONISM|POOL_PARANOIA|*/0,
+
+	.lock		= ucpoollock,
+	.unlock		= ucpoolunlock,
+	.print		= ucpoolprint,
+	.panic		= ucpoolpanic,
+
+	.private	= &ucprivate,
+};
+
+void
+ucfree(void* v)
+{
+	if(v == nil)
+		return;
+	poolfree(&ucpool, v);
+}
+
+void*
+ucalloc(usize size)
+{
+	assert(size < ucpool.minarena-128);
+
+	return poolallocalign(&ucpool, size, 32, 0, 0);
+}
+
+void*
+ucallocalign(usize size, int align, int span)
+{
+	assert(size < ucpool.minarena-128);
+
+	return poolallocalign(&ucpool, size, align, 0, span);
+}

+ 31 - 0
sys/src/9/kw/uncached.h

@@ -0,0 +1,31 @@
+/*
+ * running the l2 cache as write-back and using cached memory for
+ * usb data structures yields spurious errors such as
+ *
+ *	qhintr: td 0x60ee3d80 csw 0x8824a error 0x48 transaction error
+ *
+ * from usbehci.  so, at least for now, we will use uncached memory until
+ * we sort out the write-back problems.
+ */
+#define smalloc(n)		myucalloc(n)
+#define free			ucfree
+#define xspanalloc		ucallocalign
+#define mallocz(n, clr)		ucallocz(n, clr)
+
+static void *
+ucallocz(uint n, int)
+{
+	char *p = ucalloc(n);
+
+	if (p)
+		memset(p, 0, n);
+	else
+		panic("ucalloc: out of memory");
+	return p;
+}
+
+static void *
+myucalloc(uint n)
+{
+	return ucallocz(n, 1);
+}

+ 0 - 26
sys/src/9/kw/ureg.h

@@ -1,26 +0,0 @@
-typedef struct Ureg {
-	u32int	r0;
-	u32int	r1;
-	u32int	r2;
-	u32int	r3;
-	u32int	r4;
-	u32int	r5;
-	u32int	r6;
-	u32int	r7;
-	u32int	r8;
-	u32int	r9;				/* up */
-	u32int	r10;				/* m */
-	u32int	r11;				/* loader temprorary */
-	u32int	r12;				/* SB */
-	union {
-		u32int	r13;
-		u32int	sp;
-	};
-	union {
-		u32int	r14;
-		u32int	link;
-	};
-	u32int	type;				/* of exception */
-	u32int	psr;
-	u32int	pc;				/* interrupted addr */
-} Ureg;

File diff suppressed because it is too large
+ 314 - 214
sys/src/9/kw/usbehci.c


+ 223 - 0
sys/src/9/omap/arch.c

@@ -0,0 +1,223 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include <tos.h>
+#include "ureg.h"
+
+#include "arm.h"
+
+/*
+ * A lot of this stuff doesn't belong here
+ * but this is a convenient dumping ground for
+ * later sorting into the appropriate buckets.
+ */
+
+/* Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg* ureg, Proc* p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp+4;
+	ureg->r14 = PTR2UINT(sched);
+}
+
+/*
+ * called in sysfile.c
+ */
+void
+evenaddr(uintptr addr)
+{
+	if(addr & 3){
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+/* go to user space */
+void
+kexit(Ureg*)
+{
+	uvlong t;
+	Tos *tos;
+
+	/* precise time accounting, kernel exit */
+	tos = (Tos*)(USTKTOP-sizeof(Tos));
+	cycles(&t);
+	tos->kcycles += t - up->kentry;
+	tos->pcycles = up->pcycles;
+	tos->cyclefreq = m->cpuhz;
+	tos->pid = up->pid;
+
+	/* make visible immediately to user proc */
+	cachedwbinvse(tos, sizeof *tos);
+}
+
+/*
+ *  return the userpc the last exception happened at
+ */
+uintptr
+userpc(void)
+{
+	Ureg *ureg = up->dbgreg;
+	return ureg->pc;
+}
+
+/* This routine must save the values of registers the user is not permitted
+ * to write from devproc and then restore the saved values before returning.
+ */
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	USED(ureg, pureg, uva, n);
+}
+
+/*
+ *  this is the body for all kproc's
+ */
+static void
+linkproc(void)
+{
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc exiting", 0);
+}
+
+/*
+ *  setup stack and initial PC for a new kernel proc.  This is architecture
+ *  dependent because of the starting stack location
+ */
+void
+kprocchild(Proc *p, void (*func)(void*), void *arg)
+{
+	p->sched.pc = PTR2UINT(linkproc);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK);
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+/*
+ *  pc output by dumpaproc
+ */
+uintptr
+dbgpc(Proc* p)
+{
+	Ureg *ureg;
+
+	ureg = p->dbgreg;
+	if(ureg == 0)
+		return 0;
+
+	return ureg->pc;
+}
+
+/*
+ *  set mach dependent process state for a new process
+ */
+void
+procsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc* p)
+{
+	uvlong t;
+
+	cycles(&t);
+	p->pcycles += t;
+
+// TODO: save and restore VFPv3 FP state once 5[cal] know the new registers.
+	fpuprocsave(p);
+}
+
+void
+procrestore(Proc* p)
+{
+	uvlong t;
+
+	if(p->kp)
+		return;
+	cycles(&t);
+	p->pcycles -= t;
+
+	fpuprocrestore(p);
+}
+
+int
+userureg(Ureg* ureg)
+{
+	return (ureg->psr & PsrMask) == PsrMusr;
+}
+
+/*
+ * atomic ops
+ * make sure that we don't drag in the C library versions
+ */
+
+long
+_xdec(long *p)
+{
+	int s, v;
+
+	s = splhi();
+	v = --*p;
+	splx(s);
+	return v;
+}
+
+void
+_xinc(long *p)
+{
+	int s;
+
+	s = splhi();
+	++*p;
+	splx(s);
+}
+
+int
+ainc(int *p)
+{
+	int s, v;
+
+	s = splhi();
+	v = ++*p;
+	splx(s);
+	return v;
+}
+
+int
+adec(int *p)
+{
+	int s, v;
+
+	s = splhi();
+	v = --*p;
+	splx(s);
+	return v;
+}
+
+int
+cas32(void* addr, u32int old, u32int new)
+{
+	int r, s;
+
+	s = splhi();
+	if(r = (*(u32int*)addr == old))
+		*(u32int*)addr = new;
+	splx(s);
+	if (r)
+		coherence();
+	return r;
+}

+ 151 - 0
sys/src/9/omap/coproc.c

@@ -0,0 +1,151 @@
+/*
+ * arm co-processors
+ * CP15 (system control) is the one that gets used the most in practice.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "arm.h"
+
+#define MAP2PCSPACE(va, pc) ((uintptr)(va) & ~KSEGM | (pc) & KSEGM)
+
+enum {
+	/* alternates:	0xe12fff1e	BX (R14); last e is R14 */
+	/*		0xe28ef000	B 0(R14); second e is R14 (ken) */
+	Retinst	= 0xe1a0f00e,		/* MOV R14, R15 */
+};
+
+void
+cpwr(int cp, int op1, int crn, int crm, int op2, ulong val)
+{
+	int s;
+	volatile ulong instr[2];
+	void *pcaddr;
+	void (*fp)(ulong);
+
+	s = splhi();
+	op1 &= 7;
+	op2 &= 7;
+	crn &= 017;
+	crm &= 017;
+	cp &= 017;
+	/* MCR.  Rt will be R0. */
+	instr[0] = 0xee000010 |
+		op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm;
+	instr[1] = Retinst;
+	coherence();
+
+	pcaddr = (void *)MAP2PCSPACE(instr, getcallerpc(&cp));
+	cachedwbse(pcaddr, sizeof instr);
+	cacheiinv();
+
+	fp = (void (*)(ulong))pcaddr;
+	(*fp)(val);
+	coherence();
+	splx(s);
+}
+
+void
+cpwrsc(int op1, int crn, int crm, int op2, ulong val)
+{
+	cpwr(CpSC, op1, crn, crm, op2, val);
+}
+
+ulong
+cprd(int cp, int op1, int crn, int crm, int op2)
+{
+	int s;
+	ulong res;
+	volatile ulong instr[2];
+	void *pcaddr;
+	ulong (*fp)(void);
+
+	s = splhi();
+	op1 &= 7;
+	op2 &= 7;
+	crn &= 017;
+	crm &= 017;
+	/*
+	 * MRC.  return value will be in R0, which is convenient.
+	 * Rt will be R0.
+	 */
+	instr[0] = 0xee100010 |
+		op1 << 21 | crn << 16 | cp << 8 | op2 << 5 | crm;
+	instr[1] = Retinst;
+	coherence();
+
+	pcaddr = (void *)MAP2PCSPACE(instr, getcallerpc(&cp));
+	cachedwbse(pcaddr, sizeof instr);
+	cacheiinv();
+
+	fp = (ulong (*)(void))pcaddr;
+	res = (*fp)();
+	splx(s);
+	return res;
+}
+
+ulong
+cprdsc(int op1, int crn, int crm, int op2)
+{
+	return cprd(CpSC, op1, crn, crm, op2);
+}
+
+/* floating point */
+
+ulong
+fprd(int fpreg)
+{
+	int s;
+	ulong res;
+	volatile ulong instr[2];
+	void *pcaddr;
+	ulong (*fp)(void);
+
+	s = splhi();
+	fpreg &= 017;
+	/*
+	 * VMRS.  return value will be in R0, which is convenient.
+	 * Rt will be R0.
+	 */
+	instr[0] = 0xeef00a10 | fpreg << 16 | 0 << 12;
+	instr[1] = Retinst;
+	coherence();
+
+	pcaddr = (void *)MAP2PCSPACE(instr, getcallerpc(&fpreg));
+	cachedwbse(pcaddr, sizeof instr);
+	cacheiinv();
+
+	fp = (ulong (*)(void))pcaddr;
+	res = (*fp)();
+	splx(s);
+	return res;
+}
+
+void
+fpwr(int fpreg, ulong val)
+{
+	int s;
+	volatile ulong instr[2];
+	void *pcaddr;
+	void (*fp)(ulong);
+
+	s = splhi();
+	fpreg &= 017;
+	/* VMSR.  Rt will be R0. */
+	instr[0] = 0xeee00a10 | fpreg << 16 | 0 << 12;
+	instr[1] = Retinst;
+	coherence();
+
+	pcaddr = (void *)MAP2PCSPACE(instr, getcallerpc(&fpreg));
+	cachedwbse(pcaddr, sizeof instr);
+	cacheiinv();
+
+	fp = (void (*)(ulong))pcaddr;
+	(*fp)(val);
+	coherence();
+	splx(s);
+}

+ 300 - 0
sys/src/9/omap/fpi.c

@@ -0,0 +1,300 @@
+/*
+ * Floating Point Interpreter.
+ * shamelessly stolen from an original by ark.
+ */
+#include "fpi.h"
+
+void
+fpiround(Internal *i)
+{
+	unsigned long guard;
+
+	guard = i->l & GuardMask;
+	i->l &= ~GuardMask;
+	if(guard > (LsBit>>1) || (guard == (LsBit>>1) && (i->l & LsBit))){
+		i->l += LsBit;
+		if(i->l & CarryBit){
+			i->l &= ~CarryBit;
+			i->h++;
+			if(i->h & CarryBit){
+				if (i->h & 0x01)
+					i->l |= CarryBit;
+				i->l >>= 1;
+				i->h >>= 1;
+				i->e++;
+			}
+		}
+	}
+}
+
+static void
+matchexponents(Internal *x, Internal *y)
+{
+	int count;
+
+	count = y->e - x->e;
+	x->e = y->e;
+	if(count >= 2*FractBits){
+		x->l = x->l || x->h;
+		x->h = 0;
+		return;
+	}
+	if(count >= FractBits){
+		count -= FractBits;
+		x->l = x->h|(x->l != 0);
+		x->h = 0;
+	}
+	while(count > 0){
+		count--;
+		if(x->h & 0x01)
+			x->l |= CarryBit;
+		if(x->l & 0x01)
+			x->l |= 2;
+		x->l >>= 1;
+		x->h >>= 1;
+	}
+}
+
+static void
+shift(Internal *i)
+{
+	i->e--;
+	i->h <<= 1;
+	i->l <<= 1;
+	if(i->l & CarryBit){
+		i->l &= ~CarryBit;
+		i->h |= 0x01;
+	}
+}
+
+static void
+normalise(Internal *i)
+{
+	while((i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+static void
+renormalise(Internal *i)
+{
+	if(i->e < -2 * FractBits)
+		i->e = -2 * FractBits;
+	while(i->e < 1){
+		i->e++;
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->h >>= 1;
+		i->l = (i->l>>1)|(i->l & 0x01);
+	}
+	if(i->e >= ExpInfinity)
+		SetInfinity(i);
+}
+
+void
+fpinormalise(Internal *x)
+{
+	if(!IsWeird(x) && !IsZero(x))
+		normalise(x);
+}
+
+void
+fpiadd(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	i->s = x->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	if(x->e > y->e){
+		t = x;
+		x = y;
+		y = t;
+	}
+	matchexponents(x, y);
+	i->e = x->e;
+	i->h = x->h + y->h;
+	i->l = x->l + y->l;
+	if(i->l & CarryBit){
+		i->h++;
+		i->l &= ~CarryBit;
+	}
+	if(i->h & (HiddenBit<<1)){
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->l = (i->l>>1)|(i->l & 0x01);
+		i->h >>= 1;
+		i->e++;
+	}
+	if(IsWeird(i))
+		SetInfinity(i);
+}
+
+void
+fpisub(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	if(y->e < x->e
+	   || (y->e == x->e && (y->h < x->h || (y->h == x->h && y->l < x->l)))){
+		t = x;
+		x = y;
+		y = t;
+	}
+	i->s = y->s;
+	if(IsNaN(y)){
+		SetQNaN(i);
+		return;
+	}
+	if(IsInfinity(y)){
+		if(IsInfinity(x))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	matchexponents(x, y);
+	i->e = y->e;
+	i->h = y->h - x->h;
+	i->l = y->l - x->l;
+	if(i->l < 0){
+		i->l += CarryBit;
+		i->h--;
+	}
+	if(i->h == 0 && i->l == 0)
+		SetZero(i);
+	else while(i->e > 1 && (i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+#define	CHUNK		(FractBits/2)
+#define	CMASK		((1<<CHUNK)-1)
+#define	HI(x)		((short)((x)>>CHUNK) & CMASK)
+#define	LO(x)		((short)(x) & CMASK)
+#define	SPILL(x)	((x)>>CHUNK)
+#define	M(x, y)		((long)a[x]*(long)b[y])
+#define	C(h, l)		(((long)((h) & CMASK)<<CHUNK)|((l) & CMASK))
+
+void
+fpimul(Internal *x, Internal *y, Internal *i)
+{
+	long a[4], b[4], c[7], f[4];
+
+	i->s = x->s^y->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y) || IsZero(x) || IsZero(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	else if(IsZero(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->e = x->e + y->e - (ExpBias - 1);
+
+	a[0] = HI(x->h); b[0] = HI(y->h);
+	a[1] = LO(x->h); b[1] = LO(y->h);
+	a[2] = HI(x->l); b[2] = HI(y->l);
+	a[3] = LO(x->l); b[3] = LO(y->l);
+
+	c[6] =                               M(3, 3);
+	c[5] =                     M(2, 3) + M(3, 2) + SPILL(c[6]);
+	c[4] =           M(1, 3) + M(2, 2) + M(3, 1) + SPILL(c[5]);
+	c[3] = M(0, 3) + M(1, 2) + M(2, 1) + M(3, 0) + SPILL(c[4]);
+	c[2] = M(0, 2) + M(1, 1) + M(2, 0)           + SPILL(c[3]);
+	c[1] = M(0, 1) + M(1, 0)                     + SPILL(c[2]);
+	c[0] = M(0, 0)                               + SPILL(c[1]);
+
+	f[0] = c[0];
+	f[1] = C(c[1], c[2]);
+	f[2] = C(c[3], c[4]);
+	f[3] = C(c[5], c[6]);
+
+	if((f[0] & HiddenBit) == 0){
+		f[0] <<= 1;
+		f[1] <<= 1;
+		f[2] <<= 1;
+		f[3] <<= 1;
+		if(f[1] & CarryBit){
+			f[0] |= 1;
+			f[1] &= ~CarryBit;
+		}
+		if(f[2] & CarryBit){
+			f[1] |= 1;
+			f[2] &= ~CarryBit;
+		}
+		if(f[3] & CarryBit){
+			f[2] |= 1;
+			f[3] &= ~CarryBit;
+		}
+		i->e--;
+	}
+	i->h = f[0];
+	i->l = f[1];
+	if(f[2] || f[3])
+		i->l |= 1;
+	renormalise(i);
+}
+
+void
+fpidiv(Internal *x, Internal *y, Internal *i)
+{
+	i->s = x->s^y->s;
+	if(IsNaN(x) || IsNaN(y)
+	   || (IsInfinity(x) && IsInfinity(y)) || (IsZero(x) && IsZero(y))){
+		SetQNaN(i);
+		return;
+	}
+	else if(IsZero(x) || IsInfinity(y)){
+		SetInfinity(i);
+		return;
+	}
+	else if(IsInfinity(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->h = 0;
+	i->l = 0;
+	i->e = y->e - x->e + (ExpBias + 2*FractBits - 1);
+	do{
+		if(y->h > x->h || (y->h == x->h && y->l >= x->l)){
+			i->l |= 0x01;
+			y->h -= x->h;
+			y->l -= x->l;
+			if(y->l < 0){
+				y->l += CarryBit;
+				y->h--;
+			}
+		}
+		shift(y);
+		shift(i);
+	}while ((i->h & HiddenBit) == 0);
+	if(y->h || y->l)
+		i->l |= 0x01;
+	renormalise(i);
+}
+
+int
+fpicmp(Internal *x, Internal *y)
+{
+	if(IsNaN(x) && IsNaN(y))
+		return 0;
+	if(IsInfinity(x) && IsInfinity(y))
+		return y->s - x->s;
+	if(x->e == y->e && x->h == y->h && x->l == y->l)
+		return y->s - x->s;
+	if(x->e < y->e
+	   || (x->e == y->e && (x->h < y->h || (x->h == y->h && x->l < y->l))))
+		return y->s ? 1: -1;
+	return x->s ? -1: 1;
+}

+ 61 - 0
sys/src/9/omap/fpi.h

@@ -0,0 +1,61 @@
+typedef long Word;
+typedef unsigned long Single;
+typedef struct {
+	unsigned long l;
+	unsigned long h;
+} Double;
+
+enum {
+	FractBits	= 28,
+	CarryBit	= 0x10000000,
+	HiddenBit	= 0x08000000,
+	MsBit		= HiddenBit,
+	NGuardBits	= 3,
+	GuardMask	= 0x07,
+	LsBit		= (1<<NGuardBits),
+
+	SingleExpBias	= 127,
+	SingleExpMax	= 255,
+	DoubleExpBias	= 1023,
+	DoubleExpMax	= 2047,
+
+	ExpBias		= DoubleExpBias,
+	ExpInfinity	= DoubleExpMax,
+};
+
+typedef struct {
+	unsigned char s;
+	short e;
+	long l;				/* 0000FFFFFFFFFFFFFFFFFFFFFFFFFGGG */
+	long h;				/* 0000HFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+} Internal;
+
+#define IsWeird(n)	((n)->e >= ExpInfinity)
+#define	IsInfinity(n)	(IsWeird(n) && (n)->h == HiddenBit && (n)->l == 0)
+#define	SetInfinity(n)	((n)->e = ExpInfinity, (n)->h = HiddenBit, (n)->l = 0)
+#define IsNaN(n)	(IsWeird(n) && (((n)->h & ~HiddenBit) || (n)->l))
+#define	SetQNaN(n)	((n)->s = 0, (n)->e = ExpInfinity, 		\
+			 (n)->h = HiddenBit|(LsBit<<1), (n)->l = 0)
+#define IsZero(n)	((n)->e == 1 && (n)->h == 0 && (n)->l == 0)
+#define SetZero(n)	((n)->e = 1, (n)->h = 0, (n)->l = 0)
+
+/*
+ * fpi.c
+ */
+extern void fpiround(Internal *);
+extern void fpiadd(Internal *, Internal *, Internal *);
+extern void fpisub(Internal *, Internal *, Internal *);
+extern void fpimul(Internal *, Internal *, Internal *);
+extern void fpidiv(Internal *, Internal *, Internal *);
+extern int fpicmp(Internal *, Internal *);
+extern void fpinormalise(Internal*);
+
+/*
+ * fpimem.c
+ */
+extern void fpis2i(Internal *, void *);
+extern void fpid2i(Internal *, void *);
+extern void fpiw2i(Internal *, void *);
+extern void fpii2s(void *, Internal *);
+extern void fpii2d(void *, Internal *);
+extern void fpii2w(Word *, Internal *);

+ 576 - 0
sys/src/9/omap/fpiarm.c

@@ -0,0 +1,576 @@
+/*
+ * this doesn't attempt to implement ARM floating-point properties
+ * that aren't visible in the Inferno environment.
+ * all arithmetic is done in double precision.
+ * the FP trap status isn't updated.
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+#include	"ureg.h"
+
+#include	"arm.h"
+#include	"fpi.h"
+
+/* undef this if correct kernel r13 isn't in Ureg;
+ * check calculation in fpiarm below
+ */
+
+
+#define	REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
+#define	FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&7])
+
+typedef struct FP2 FP2;
+typedef struct FP1 FP1;
+
+struct FP2 {
+	char*	name;
+	void	(*f)(Internal, Internal, Internal*);
+};
+
+struct FP1 {
+	char*	name;
+	void	(*f)(Internal*, Internal*);
+};
+
+enum {
+	N = 1<<31,
+	Z = 1<<30,
+	C = 1<<29,
+	V = 1<<28,
+	REGPC = 15,
+};
+
+enum {
+	fpemudebug = 0,
+};
+
+#undef OFR
+#define	OFR(X)	((ulong)&((Ureg*)0)->X)
+
+static	int	roff[] = {
+	OFR(r0), OFR(r1), OFR(r2), OFR(r3),
+	OFR(r4), OFR(r5), OFR(r6), OFR(r7),
+	OFR(r8), OFR(r9), OFR(r10), OFR(r11),
+	OFR(r12), OFR(r13), OFR(r14), OFR(pc),
+};
+
+static Internal fpconst[8] = {	/* indexed by op&7 */
+	/* s, e, l, h */
+	{0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
+	{0, 0x3FF, 0x00000000, 0x08000000},	/* 1.0 */
+	{0, 0x400, 0x00000000, 0x08000000},	/* 2.0 */
+	{0, 0x400, 0x00000000, 0x0C000000},	/* 3.0 */
+	{0, 0x401, 0x00000000, 0x08000000},	/* 4.0 */
+	{0, 0x401, 0x00000000, 0x0A000000},	/* 5.0 */
+	{0, 0x3FE, 0x00000000, 0x08000000},	/* 0.5 */
+	{0, 0x402, 0x00000000, 0x0A000000},	/* 10.0 */
+};
+
+/*
+ * arm binary operations
+ */
+
+static void
+fadd(Internal m, Internal n, Internal *d)
+{
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsub(Internal m, Internal n, Internal *d)
+{
+	m.s ^= 1;
+	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
+}
+
+static void
+fsubr(Internal m, Internal n, Internal *d)
+{
+	n.s ^= 1;
+	(n.s == m.s? fpiadd: fpisub)(&n, &m, d);
+}
+
+static void
+fmul(Internal m, Internal n, Internal *d)
+{
+	fpimul(&m, &n, d);
+}
+
+static void
+fdiv(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&m, &n, d);
+}
+
+static void
+fdivr(Internal m, Internal n, Internal *d)
+{
+	fpidiv(&n, &m, d);
+}
+
+/*
+ * arm unary operations
+ */
+
+static void
+fmov(Internal *m, Internal *d)
+{
+	*d = *m;
+}
+
+static void
+fmovn(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s ^= 1;
+}
+
+static void
+fabsf(Internal *m, Internal *d)
+{
+	*d = *m;
+	d->s = 0;
+}
+
+static void
+frnd(Internal *m, Internal *d)
+{
+	short e;
+
+	(m->s? fsub: fadd)(fpconst[6], *m, d);
+	if(IsWeird(d))
+		return;
+	fpiround(d);
+	e = (d->e - ExpBias) + 1;
+	if(e <= 0)
+		SetZero(d);
+	else if(e > FractBits){
+		if(e < 2*FractBits)
+			d->l &= ~((1<<(2*FractBits - e))-1);
+	}else{
+		d->l = 0;
+		if(e < FractBits)
+			d->h &= ~((1<<(FractBits-e))-1);
+	}
+}
+
+static	FP1	optab1[16] = {	/* Fd := OP Fm */
+[0]	{"MOVF",	fmov},
+[1]	{"NEGF",	fmovn},
+[2]	{"ABSF",	fabsf},
+[3]	{"RNDF",	frnd},
+[4]	{"SQTF",	/*fsqt*/0},
+/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
+/* URD and NRM aren't implemented */
+};
+
+static	FP2	optab2[16] = {	/* Fd := Fn OP Fm */
+[0]	{"ADDF",	fadd},
+[1]	{"MULF",	fmul},
+[2]	{"SUBF",	fsub},
+[3]	{"RSUBF",	fsubr},
+[4]	{"DIVF",	fdiv},
+[5]	{"RDIVF",	fdivr},
+/* POW, RPW deprecated */
+[8]	{"REMF",	/*frem*/0},
+[9]	{"FMF",	fmul},	/* fast multiply */
+[10]	{"FDV",	fdiv},	/* fast divide */
+[11]	{"FRD",	fdivr},	/* fast reverse divide */
+/* POL deprecated */
+};
+
+static ulong
+fcmp(Internal *n, Internal *m)
+{
+	int i;
+	Internal rm, rn;
+
+	if(IsWeird(m) || IsWeird(n)){
+		/* BUG: should trap if not masked */
+		return V|C;
+	}
+	rn = *n;
+	rm = *m;
+	fpiround(&rn);
+	fpiround(&rm);
+	i = fpicmp(&rn, &rm);
+	if(i > 0)
+		return C;
+	else if(i == 0)
+		return C|Z;
+	else
+		return N;
+}
+
+static void
+fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp)
+{
+	void *mem;
+
+	mem = (void*)ea;
+	(*f)(&FR(ufp, d), mem);
+	if(fpemudebug)
+		print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
+}
+
+static void
+fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp)
+{
+	Internal tmp;
+	void *mem;
+
+	mem = (void*)ea;
+	tmp = FR(ufp, s);
+	if(fpemudebug)
+		print("MOV%c	F%d,#%lux\n", n==8? 'D': 'F', s, ea);
+	(*f)(mem, &tmp);
+}
+
+static int
+condok(int cc, int c)
+{
+	switch(c){
+	case 0:	/* Z set */
+		return cc&Z;
+	case 1:	/* Z clear */
+		return (cc&Z) == 0;
+	case 2:	/* C set */
+		return cc&C;
+	case 3:	/* C clear */
+		return (cc&C) == 0;
+	case 4:	/* N set */
+		return cc&N;
+	case 5:	/* N clear */
+		return (cc&N) == 0;
+	case 6:	/* V set */
+		return cc&V;
+	case 7:	/* V clear */
+		return (cc&V) == 0;
+	case 8:	/* C set and Z clear */
+		return cc&C && (cc&Z) == 0;
+	case 9:	/* C clear or Z set */
+		return (cc&C) == 0 || cc&Z;
+	case 10:	/* N set and V set, or N clear and V clear */
+		return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
+	case 11:	/* N set and V clear, or N clear and V set */
+		return (cc&(N|V))==N || (cc&(N|V))==V;
+	case 12:	/* Z clear, and either N set and V set or N clear and V clear */
+		return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
+	case 13:	/* Z set, or N set and V clear or N clear and V set */
+		return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
+	case 14:	/* always */
+		return 1;
+	case 15:	/* never (reserved) */
+		return 0;
+	}
+	return 0;	/* not reached */
+}
+
+static void
+unimp(ulong pc, ulong op)
+{
+	char buf[60];
+
+	snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
+	if(fpemudebug)
+		print("FPE: %s\n", buf);
+	error(buf);
+	/* no return */
+}
+
+static void
+fpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
+{
+	int rn, rd, tag, o;
+	long off;
+	ulong ea;
+	Internal tmp, *fm, *fn;
+
+	/* note: would update fault status here if we noted numeric exceptions */
+
+	/*
+	 * LDF, STF; 10.1.1
+	 */
+	if(((op>>25)&7) == 6){
+		if(op & (1<<22))
+			unimp(pc, op);	/* packed or extended */
+		rn = (op>>16)&0xF;
+		off = (op&0xFF)<<2;
+		if((op & (1<<23)) == 0)
+			off = -off;
+		ea = REG(ur, rn);
+		if(rn == REGPC)
+			ea += 8;
+		if(op & (1<<24))
+			ea += off;
+		rd = (op>>12)&7;
+		if(op & (1<<20)){
+			if(op & (1<<15))
+				fld(fpid2i, rd, ea, 8, ufp);
+			else
+				fld(fpis2i, rd, ea, 4, ufp);
+		}else{
+			if(op & (1<<15))
+				fst(fpii2d, ea, rd, 8, ufp);
+			else
+				fst(fpii2s, ea, rd, 4, ufp);
+		}
+		if((op & (1<<24)) == 0)
+			ea += off;
+		if(op & (1<<21))
+			REG(ur, rn) = ea;
+		return;
+	}
+
+	/*
+	 * CPRT/transfer, 10.3
+	 */
+	if(op & (1<<4)){
+		rd = (op>>12) & 0xF;
+
+		/*
+		 * compare, 10.3.1
+		 */
+		if(rd == 15 && op & (1<<20)){
+			rn = (op>>16)&7;
+			fn = &FR(ufp, rn);
+			if(op & (1<<3)){
+				fm = &fpconst[op&7];
+				if(fpemudebug)
+					tag = 'C';
+			}else{
+				fm = &FR(ufp, op&7);
+				if(fpemudebug)
+					tag = 'F';
+			}
+			switch((op>>21)&7){
+			default:
+				unimp(pc, op);
+			case 4:	/* CMF: Fn :: Fm */
+			case 6:	/* CMFE: Fn :: Fm (with exception) */
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, fm);
+				break;
+			case 5:	/* CNF: Fn :: -Fm */
+			case 7:	/* CNFE: Fn :: -Fm (with exception) */
+				tmp = *fm;
+				tmp.s ^= 1;
+				ur->psr &= ~(N|C|Z|V);
+				ur->psr |= fcmp(fn, &tmp);
+				break;
+			}
+			if(fpemudebug)
+				print("CMPF	%c%d,F%ld =%#lux\n",
+					tag, rn, op&7, ur->psr>>28);
+			return;
+		}
+
+		/*
+		 * other transfer, 10.3
+		 */
+		switch((op>>20)&0xF){
+		default:
+			unimp(pc, op);
+		case 0:	/* FLT */
+			rn = (op>>16) & 7;
+			fpiw2i(&FR(ufp, rn), &REG(ur, rd));
+			if(fpemudebug)
+				print("MOVW[FD]	R%d, F%d\n", rd, rn);
+			break;
+		case 1:	/* FIX */
+			if(op & (1<<3))
+				unimp(pc, op);
+			rn = op & 7;
+			tmp = FR(ufp, rn);
+			fpii2w(&REG(ur, rd), &tmp);
+			if(fpemudebug)
+				print("MOV[FD]W	F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
+			break;
+		case 2:	/* FPSR := Rd */
+			ufp->status = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPSR\n", rd);
+			break;
+		case 3:	/* Rd := FPSR */
+			REG(ur, rd) = ufp->status;
+			if(fpemudebug)
+				print("MOVW	FPSR, R%d\n", rd);
+			break;
+		case 4:	/* FPCR := Rd */
+			ufp->control = REG(ur, rd);
+			if(fpemudebug)
+				print("MOVW	R%d, FPCR\n", rd);
+			break;
+		case 5:	/* Rd := FPCR */
+			REG(ur, rd) = ufp->control;
+			if(fpemudebug)
+				print("MOVW	FPCR, R%d\n", rd);
+			break;
+		}
+		return;
+	}
+
+	/*
+	 * arithmetic
+	 */
+
+	if(op & (1<<3)){	/* constant */
+		fm = &fpconst[op&7];
+		if(fpemudebug)
+			tag = 'C';
+	}else{
+		fm = &FR(ufp, op&7);
+		if(fpemudebug)
+			tag = 'F';
+	}
+	rd = (op>>12)&7;
+	o = (op>>20)&0xF;
+	if(op & (1<<15)){	/* monadic */
+		FP1 *fp;
+		fp = &optab1[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		if(fpemudebug)
+			print("%s	%c%ld,F%d\n", fp->name, tag, op&7, rd);
+		(*fp->f)(fm, &FR(ufp, rd));
+	} else {
+		FP2 *fp;
+		fp = &optab2[o];
+		if(fp->f == nil)
+			unimp(pc, op);
+		rn = (op>>16)&7;
+		if(fpemudebug)
+			print("%s	%c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
+		(*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
+	}
+}
+
+void
+casemu(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, ro, rn, *rd;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	ro = rp[op>>16 & 0x7];
+	rn = rp[op>>0 & 0x7];
+	rd = rp + (op>>12 & 0x7);
+	rp = (ulong*)*rd;
+	validaddr((ulong)rp, 4, 1);
+	splhi();
+	if(*rd = (*rp == ro))
+		*rp = rn;
+	spllo();
+}
+
+int ldrexvalid;
+
+void
+ldrex(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, *rd, *addr;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	rd = rp + (op>>16 & 0x7);
+	addr = (ulong*)*rd;
+	validaddr((ulong)addr, 4, 0);
+	ldrexvalid = 1;
+	rp[op>>12 & 0x7] = *addr;
+	if(fpemudebug)
+		print("ldrex, r%ld = [r%ld]@0x%8.8p = 0x%8.8lux",
+			op>>12 & 0x7, op>>16 & 0x7, addr, rp[op>>12 & 0x7]);
+}
+
+void
+strex(ulong pc, ulong op, Ureg *ur)
+{
+	ulong *rp, rn, *rd, *addr;
+
+	USED(pc);
+
+	rp = (ulong*)ur;
+	rd = rp + (op>>16 & 0x7);
+	rn = rp[op>>0 & 0x7];
+	addr = (ulong*)*rd;
+	validaddr((ulong)addr, 4, 1);
+	splhi();
+	if(ldrexvalid){
+		if(fpemudebug)
+			print("strex valid, [r%ld]@0x%8.8p = r%ld = 0x%8.8lux",
+				op>>16 & 0x7, addr, op>>0 & 0x7, rn);
+		*addr = rn;
+		ldrexvalid = 0;
+		rp[op>>12 & 0x7] = 0;
+	}else{
+		if(fpemudebug)
+			print("strex invalid, r%ld = 1", op>>16 & 0x7);
+		rp[op>>12 & 0x7] = 1;
+	}
+	spllo();
+}
+
+struct {
+	ulong	opc;
+	ulong	mask;
+	void	(*f)(ulong, ulong, Ureg*);
+} specialopc[] = {
+	{ 0x01900f9f, 0x0ff00fff, ldrex },
+	{ 0x01800f90, 0x0ff00ff0, strex },
+	{ 0x0ed00100, 0x0ef08100, casemu },
+	{ 0x00000000, 0x00000000, nil }
+};
+
+/*
+ * returns the number of FP instructions emulated
+ */
+int
+fpiarm(Ureg *ur)
+{
+	ulong op, o;
+	FPsave *ufp;
+	int i, n;
+
+	if(up == nil)
+		panic("fpiarm not in a process");
+	ufp = &up->fpsave;
+	/* because all the state is in the proc structure,
+	 * it need not be saved/restored
+	 */
+	if(up->fpstate != FPactive){
+//		assert(sizeof(Internal) == sizeof(ufp->regs[0]));
+		up->fpstate = FPactive;
+		ufp->control = 0;
+		ufp->status = (0x01<<28)|(1<<12);	/* software emulation, alternative C flag */
+		for(n = 0; n < 8; n++)
+			FR(ufp, n) = fpconst[0];
+	}
+	for(n=0; ;n++){
+		validaddr(ur->pc, 4, 0);
+		op = *(ulong*)(ur->pc);
+		if(fpemudebug)
+			print("%#lux: %#8.8lux ", ur->pc, op);
+		o = (op>>24) & 0xF;
+		if(condok(ur->psr, op>>28)){
+			for(i = 0; specialopc[i].f; i++)
+				if((op & specialopc[i].mask) == specialopc[i].opc)
+					break;
+			if(specialopc[i].f)
+				specialopc[i].f(ur->pc, op, ur);
+			else if((op & 0xF00) != 0x100 || o != 0xE && (o&~1) != 0xC)
+				break;
+			else
+				fpemu(ur->pc, op, ur, ufp);
+		}else if((op & 0xF00) != 0x100 || o != 0xE && (o&~1) != 0xC)
+			break;
+		ur->pc += 4;
+	}
+	if(fpemudebug) print("\n");
+	return n;
+}

+ 136 - 0
sys/src/9/omap/fpimem.c

@@ -0,0 +1,136 @@
+#include "fpi.h"
+
+/*
+ * the following routines depend on memory format, not the machine
+ */
+
+void
+fpis2i(Internal *i, void *v)
+{
+	Single *s = v;
+
+	i->s = (*s & 0x80000000) ? 1: 0;
+	if((*s & ~0x80000000) == 0){
+		SetZero(i);
+		return;
+	}
+	i->e = ((*s>>23) & 0x00FF) - SingleExpBias + ExpBias;
+	i->h = (*s & 0x007FFFFF)<<(1+NGuardBits);
+	i->l = 0;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpid2i(Internal *i, void *v)
+{
+	Double *d = v;
+
+	i->s = (d->h & 0x80000000) ? 1: 0;
+	i->e = (d->h>>20) & 0x07FF;
+	i->h = ((d->h & 0x000FFFFF)<<(4+NGuardBits))|((d->l>>25) & 0x7F);
+	i->l = (d->l & 0x01FFFFFF)<<NGuardBits;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpiw2i(Internal *i, void *v)
+{
+	Word w, word = *(Word*)v;
+	short e;
+
+	if(word < 0){
+		i->s = 1;
+		word = -word;
+	}
+	else
+		i->s = 0;
+	if(word == 0){
+		SetZero(i);
+		return;
+	}
+	if(word > 0){
+		for (e = 0, w = word; w; w >>= 1, e++)
+			;
+	} else
+		e = 32;
+	if(e > FractBits){
+		i->h = word>>(e - FractBits);
+		i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+	}
+	else {
+		i->h = word<<(FractBits - e);
+		i->l = 0;
+	}
+	i->e = (e - 1) + ExpBias;
+}
+
+void
+fpii2s(void *v, Internal *i)
+{
+	short e;
+	Single *s = (Single*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	*s = i->s ? 0x80000000: 0;
+	e = i->e;
+	if(e < ExpBias){
+		if(e <= (ExpBias - SingleExpBias))
+			return;
+		e = SingleExpBias - (ExpBias - e);
+	}
+	else  if(e >= (ExpBias + (SingleExpMax-SingleExpBias))){
+		*s |= SingleExpMax<<23;
+		return;
+	}
+	else
+		e = SingleExpBias + (e - ExpBias);
+	*s |= (e<<23)|(i->h>>(1+NGuardBits));
+}
+
+void
+fpii2d(void *v, Internal *i)
+{
+	Double *d = (Double*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	i->l = ((i->h & GuardMask)<<25)|(i->l>>NGuardBits);
+	i->h >>= NGuardBits;
+	d->h = i->s ? 0x80000000: 0;
+	d->h |= (i->e<<20)|((i->h & 0x00FFFFFF)>>4);
+	d->l = (i->h<<28)|i->l;
+}
+
+void
+fpii2w(Word *word, Internal *i)
+{
+	Word w;
+	short e;
+
+	fpiround(i);
+	e = (i->e - ExpBias) + 1;
+	if(e <= 0)
+		w = 0;
+	else if(e > 31)
+		w = 0x7FFFFFFF;
+	else if(e > FractBits)
+		w = (i->h<<(e - FractBits))|(i->l>>(2*FractBits - e));
+	else
+		w = i->h>>(FractBits-e);
+	if(i->s)
+		w = -w;
+	*word = w;
+}

+ 25 - 0
sys/src/9/omap/init9.s

@@ -0,0 +1,25 @@
+/*
+ * This is the same as the C programme:
+ *
+ *	void
+ *	main(char* argv0)
+ *	{
+ *		startboot(argv0, &argv0);
+ *	}
+ *
+ * It is in assembler because SB needs to be
+ * set and doing this in C drags in too many
+ * other routines.
+ */
+TEXT main(SB), 1, $8
+	MOVW	$setR12(SB), R12		/* load the SB */
+	MOVW	$boot(SB), R0
+
+	ADD	$12, R13, R1			/* pointer to 0(FP) */
+
+	MOVW	R0, 4(R13)			/* pass argc, argv */
+	MOVW	R1, 8(R13)
+
+	BL	startboot(SB)
+_loop:
+	B	_loop

+ 186 - 0
sys/src/9/omap/lexception.s

@@ -0,0 +1,186 @@
+/*
+ * arm exception handlers
+ */
+#include "arm.s"
+
+#undef B					/* B is for 'botch' */
+
+/*
+ *  exception vectors, copied by trapinit() to somewhere useful
+ */
+TEXT vectors(SB), 1, $-4
+	MOVW	0x18(R15), R15		/* reset */
+	MOVW	0x18(R15), R15		/* undefined instr. */
+	MOVW	0x18(R15), R15		/* SWI & SMC */
+	MOVW	0x18(R15), R15		/* prefetch abort */
+	MOVW	0x18(R15), R15		/* data abort */
+	MOVW	0x18(R15), R15		/* reserved */
+	MOVW	0x18(R15), R15		/* IRQ */
+	MOVW	0x18(R15), R15		/* FIQ */
+
+TEXT vtable(SB), 1, $-4
+	WORD	$_vsvc(SB)		/* reset, in svc mode already */
+	WORD	$_vund(SB)		/* undefined, switch to svc mode */
+	WORD	$_vsvc(SB)		/* swi, in svc mode already */
+	WORD	$_vpabt(SB)		/* prefetch abort, switch to svc mode */
+	WORD	$_vdabt(SB)		/* data abort, switch to svc mode */
+	WORD	$_vsvc(SB)		/* reserved */
+	WORD	$_virq(SB)		/* IRQ, switch to svc mode */
+	WORD	$_vfiq(SB)		/* FIQ, switch to svc mode */
+
+TEXT _vrst(SB), 1, $-4
+	BL	_reset(SB)
+
+TEXT _vsvc(SB), 1, $-4			/* SWI */
+	MOVW.W	R14, -4(R13)		/* ureg->pc = interrupted PC */
+	MOVW	SPSR, R14		/* ureg->psr = SPSR */
+	MOVW.W	R14, -4(R13)		/* ... */
+	MOVW	$PsrMsvc, R14		/* ureg->type = PsrMsvc */
+	MOVW.W	R14, -4(R13)		/* ... */
+
+//	MOVM.DB.W.S [R0-R14], (R13)	/* save user level registers, at end r13 points to ureg */
+	MOVM.DB.S [R0-R14], (R13)	/* save user level registers */
+	SUB	$(15*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	$(L1-MACHSIZE), R10	/* m */
+	MOVW	8(R10), R9		/* up */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$8, R13			/* space for argument+link */
+
+	BL	syscall(SB)
+
+	ADD	$(8+4*15), R13		/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$8, R13			/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+TEXT _vund(SB), 1, $-4			/* undefined */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMund, R0
+	B	_vswitch
+
+TEXT _vpabt(SB), 1, $-4			/* prefetch abort */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMabt, R0		/* r0 = type */
+	B	_vswitch
+
+TEXT _vdabt(SB), 1, $-4			/* data abort */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$(PsrMabt+1), R0	/* r0 = type */
+	B	_vswitch
+
+TEXT _virq(SB), 1, $-4			/* IRQ */
+	MOVM.IA	[R0-R4], (R13)		/* free some working space */
+	MOVW	$PsrMirq, R0		/* r0 = type */
+	B	_vswitch
+
+	/*
+	 *  come here with type in R0 and R13 pointing above saved [r0-r4].
+	 *  we'll switch to SVC mode and then call trap.
+	 */
+_vswitch:
+	MOVW	SPSR, R1		/* save SPSR for ureg */
+	MOVW	R14, R2			/* save interrupted pc for ureg */
+	MOVW	R13, R3			/* save pointer to where the original [R0-R4] are */
+
+	/*
+	 * switch processor to svc mode.  this switches the banked registers
+	 * (r13 [sp] and r14 [link]) to those of svc mode.
+	 */
+	MOVW	CPSR, R14
+	BIC	$PsrMask, R14
+	ORR	$(PsrDirq|PsrDfiq|PsrMsvc), R14
+	MOVW	R14, CPSR		/* switch! */
+	DSB; ISB			/* force new cpsr to take effect */
+
+	AND.S	$0xf, R1, R4		/* interrupted code kernel or user? */
+	BEQ	_userexcep
+
+	/* here for trap from SVC mode */
+	MOVM.DB.W [R0-R2], (R13)	/* set ureg->{type, psr, pc}; r13 points to ureg->type  */
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+	/*
+	 * In order to get a predictable value in R13 after the stores,
+	 * separate the store-multiple from the stack-pointer adjustment.
+	 * We'll assume that the old value of R13 should be stored on the stack.
+	 */
+	/* save kernel level registers, at end r13 points to ureg */
+//	MOVM.DB.W [R0-R14], (R13)
+	MOVM.DB	[R0-R14], (R13)
+	SUB	$(15*4), R13		/* SP now points to saved R0 */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+	MOVW	$0xdeaddead, R11	/* marker */
+
+	BL	trap(SB)
+
+	ADD	$(4*2+4*15), R13	/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+
+	MOVM.DB (R13), [R0-R14]		/* restore registers */
+
+	ADD	$(4*2), R13		/* pop past ureg->{type+psr} to pc */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+	/* here for trap from USER mode */
+_userexcep:
+	MOVM.DB.W [R0-R2], (R13)	/* set ureg->{type, psr, pc}; r13 points to ureg->type  */
+	MOVM.IA	  (R3), [R0-R4]		/* restore [R0-R4] from previous mode's stack */
+
+//	MOVM.DB.W.S [R0-R14], (R13)	/* save kernel level registers, at end r13 points to ureg */
+	MOVM.DB.S [R0-R14], (R13)	/* save kernel level registers */
+	SUB	$(15*4), R13		/* r13 now points to ureg */
+
+	MOVW	$setR12(SB), R12	/* Make sure we've got the kernel's SB loaded */
+
+	MOVW	$(L1-MACHSIZE), R10	/* m */
+	MOVW	8(R10), R9		/* up */
+
+	MOVW	R13, R0			/* first arg is pointer to ureg */
+	SUB	$(4*2), R13		/* space for argument+link (for debugger) */
+
+	BL	trap(SB)
+
+	ADD	$(4*2+4*15), R13	/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$(4*2), R13		/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+TEXT _vfiq(SB), 1, $-4			/* FIQ */
+WAVE('%')
+	RFE				/* FIQ is special, ignore it for now */
+
+/*
+ *  set the stack value for the mode passed in R0
+ */
+TEXT setr13(SB), 1, $-4
+	MOVW	4(FP), R1
+
+	MOVW	CPSR, R2
+	BIC	$PsrMask, R2, R3
+	ORR	R0, R3
+	MOVW	R3, CPSR
+	BARRIERS
+
+	MOVW	R13, R3
+	MOVW	R1, R13
+
+	MOVW	R2, CPSR
+	BARRIERS
+	MOVW	R3, R0
+	RET

+ 38 - 0
sys/src/9/omap/lproc.s

@@ -0,0 +1,38 @@
+#include "mem.h"
+#include "arm.h"
+
+/*
+ *  This is the first jump from kernel to user mode.
+ *  Fake a return from interrupt.
+ *
+ *  Enter with R0 containing the user stack pointer.
+ *  UTZERO + 0x20 is always the entry point.
+ *
+ */
+TEXT touser(SB), 1, $-4
+	/* store the user stack pointer into the USR_r13 */
+	MOVM.DB.W [R0], (R13)
+	MOVM.S.IA.W (R13), [R13]
+
+	/* set up a PSR for user level */
+	MOVW	$(PsrMusr), R0
+	MOVW	R0, SPSR
+
+	/* save the PC on the stack */
+	MOVW	$(UTZERO+0x20), R0
+	MOVM.DB.W [R0], (R13)
+
+	/* return from interrupt */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */
+
+/*
+ *  here to jump to a newly forked process
+ */
+TEXT forkret(SB), 1, $-4
+	ADD	$(4*15), R13		/* make r13 point to ureg->type */
+	MOVW	8(R13), R14		/* restore link */
+	MOVW	4(R13), R0		/* restore SPSR */
+	MOVW	R0, SPSR		/* ... */
+	MOVM.DB.S (R13), [R0-R14]	/* restore registers */
+	ADD	$8, R13			/* pop past ureg->{type+psr} */
+	RFE				/* MOVM.IA.S.W (R13), [R15] */

BIN
sys/src/9/omap/nvram


+ 138 - 0
sys/src/9/omap/random.c

@@ -0,0 +1,138 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+
+struct Rb
+{
+	QLock;
+	Rendez	producer;
+	Rendez	consumer;
+	ulong	randomcount;
+	uchar	buf[128];
+	uchar	*ep;
+	uchar	*rp;
+	uchar	*wp;
+	uchar	next;
+	uchar	wakeme;
+	ushort	bits;
+	ulong	randn;
+} rb;
+
+static int
+rbnotfull(void*)
+{
+	int i;
+
+	i = rb.rp - rb.wp;
+	return i != 1 && i != (1 - sizeof(rb.buf));
+}
+
+static int
+rbnotempty(void*)
+{
+	return rb.wp != rb.rp;
+}
+
+static void
+genrandom(void*)
+{
+	up->basepri = PriNormal;
+	up->priority = up->basepri;
+
+	for(;;){
+		for(;;)
+			if(++rb.randomcount > 100000)
+				break;
+		if(anyhigher())
+			sched();
+		if(!rbnotfull(0))
+			sleep(&rb.producer, rbnotfull, 0);
+	}
+}
+
+/*
+ *  produce random bits in a circular buffer
+ */
+static void
+randomclock(void)
+{
+	if(rb.randomcount == 0 || !rbnotfull(0))
+		return;
+
+	rb.bits = (rb.bits<<2) ^ rb.randomcount;
+	rb.randomcount = 0;
+
+	rb.next++;
+	if(rb.next != 8/2)
+		return;
+	rb.next = 0;
+
+	*rb.wp ^= rb.bits;
+	if(rb.wp+1 == rb.ep)
+		rb.wp = rb.buf;
+	else
+		rb.wp = rb.wp+1;
+
+	if(rb.wakeme)
+		wakeup(&rb.consumer);
+}
+
+void
+randominit(void)
+{
+	addclock0link(randomclock, 1000/HZ);
+	rb.ep = rb.buf + sizeof(rb.buf);
+	rb.rp = rb.wp = rb.buf;
+	kproc("genrandom", genrandom, 0);
+}
+
+/*
+ *  consume random bytes from a circular buffer
+ */
+ulong
+randomread(void *xp, ulong n)
+{
+	uchar *e, *p;
+	ulong x;
+
+	p = xp;
+
+	if(waserror()){
+		qunlock(&rb);
+		nexterror();
+	}
+
+	qlock(&rb);
+	for(e = p + n; p < e; ){
+		if(rb.wp == rb.rp){
+			rb.wakeme = 1;
+			wakeup(&rb.producer);
+			sleep(&rb.consumer, rbnotempty, 0);
+			rb.wakeme = 0;
+			continue;
+		}
+
+		/*
+		 *  beating clocks will be predictable if
+		 *  they are synchronized.  Use a cheap pseudo
+		 *  random number generator to obscure any cycles.
+		 */
+		x = rb.randn*1103515245 ^ *rb.rp;
+		*p++ = rb.randn = x;
+
+		if(rb.rp+1 == rb.ep)
+			rb.rp = rb.buf;
+		else
+			rb.rp = rb.rp+1;
+	}
+	qunlock(&rb);
+	poperror();
+
+	wakeup(&rb.producer);
+
+	return n;
+}

+ 424 - 0
sys/src/9/omap/sdscsi.c

@@ -0,0 +1,424 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+#include "../port/sd.h"
+
+static int
+scsitest(SDreq* r)
+{
+	r->write = 0;
+	memset(r->cmd, 0, sizeof(r->cmd));
+	r->cmd[1] = r->lun<<5;
+	r->clen = 6;
+	r->data = nil;
+	r->dlen = 0;
+	r->flags = 0;
+
+	r->status = ~0;
+
+	return r->unit->dev->ifc->rio(r);
+}
+
+int
+scsiverify(SDunit* unit)
+{
+	SDreq *r;
+	int i, status;
+	uchar *inquiry;
+
+	if((r = malloc(sizeof(SDreq))) == nil)
+		return 0;
+	if((inquiry = sdmalloc(sizeof(unit->inquiry))) == nil){
+		free(r);
+		return 0;
+	}
+	r->unit = unit;
+	r->lun = 0;		/* ??? */
+
+	memset(unit->inquiry, 0, sizeof(unit->inquiry));
+	r->write = 0;
+	r->cmd[0] = 0x12;
+	r->cmd[1] = r->lun<<5;
+	r->cmd[4] = sizeof(unit->inquiry)-1;
+	r->clen = 6;
+	r->data = inquiry;
+	r->dlen = sizeof(unit->inquiry)-1;
+	r->flags = 0;
+
+	r->status = ~0;
+	if(unit->dev->ifc->rio(r) != SDok){
+		free(r);
+		return 0;
+	}
+	memmove(unit->inquiry, inquiry, r->dlen);
+	free(inquiry);
+
+	SET(status);
+	for(i = 0; i < 3; i++){
+		while((status = scsitest(r)) == SDbusy)
+			;
+		if(status == SDok || status != SDcheck)
+			break;
+		if(!(r->flags & SDvalidsense))
+			break;
+		if((r->sense[2] & 0x0F) != 0x02)
+			continue;
+
+		/*
+		 * Unit is 'not ready'.
+		 * If it is in the process of becoming ready or needs
+		 * an initialising command, set status so it will be spun-up
+		 * below.
+		 * If there's no medium, that's OK too, but don't
+		 * try to spin it up.
+		 */
+		if(r->sense[12] == 0x04){
+			if(r->sense[13] == 0x02 || r->sense[13] == 0x01){
+				status = SDok;
+				break;
+			}
+		}
+		if(r->sense[12] == 0x3A)
+			break;
+	}
+
+	if(status == SDok){
+		/*
+		 * Try to ensure a direct-access device is spinning.
+		 * Don't wait for completion, ignore the result.
+		 */
+		if((unit->inquiry[0] & 0x1F) == 0){
+			memset(r->cmd, 0, sizeof(r->cmd));
+			r->write = 0;
+			r->cmd[0] = 0x1B;
+			r->cmd[1] = (r->lun<<5)|0x01;
+			r->cmd[4] = 1;
+			r->clen = 6;
+			r->data = nil;
+			r->dlen = 0;
+			r->flags = 0;
+
+			r->status = ~0;
+			unit->dev->ifc->rio(r);
+		}
+	}
+	free(r);
+
+	if(status == SDok || status == SDcheck)
+		return 1;
+	return 0;
+}
+
+static int
+scsirio(SDreq* r)
+{
+	/*
+	 * Perform an I/O request, returning
+	 *	-1	failure
+	 *	 0	ok
+	 *	 1	no medium present
+	 *	 2	retry
+	 * The contents of r may be altered so the
+	 * caller should re-initialise if necesary.
+	 */
+	r->status = ~0;
+	switch(r->unit->dev->ifc->rio(r)){
+	default:
+		break;
+	case SDcheck:
+		if(!(r->flags & SDvalidsense))
+			break;
+		switch(r->sense[2] & 0x0F){
+		case 0x00:		/* no sense */
+		case 0x01:		/* recovered error */
+			return 2;
+		case 0x06:		/* check condition */
+			/*
+			 * 0x28 - not ready to ready transition,
+			 *	  medium may have changed.
+			 * 0x29 - power on or some type of reset.
+			 */
+			if(r->sense[12] == 0x28 && r->sense[13] == 0)
+				return 2;
+			if(r->sense[12] == 0x29)
+				return 2;
+			break;
+		case 0x02:		/* not ready */
+			/*
+			 * If no medium present, bail out.
+			 * If unit is becoming ready, rather than not
+			 * not ready, wait a little then poke it again. 				 */
+			if(r->sense[12] == 0x3A)
+				break;
+			if(r->sense[12] != 0x04 || r->sense[13] != 0x01)
+				break;
+
+			while(waserror())
+				;
+			tsleep(&up->sleep, return0, 0, 500);
+			poperror();
+			scsitest(r);
+			return 2;
+		default:
+			break;
+		}
+		break;
+	case SDok:
+		return 0;
+	}
+	return -1;
+}
+
+int
+scsionline(SDunit* unit)
+{
+	SDreq *r;
+	uchar *p;
+	int ok, retries;
+
+	if((r = malloc(sizeof(SDreq))) == nil)
+		return 0;
+	if((p = sdmalloc(8)) == nil){
+		free(r);
+		return 0;
+	}
+
+	ok = 0;
+
+	r->unit = unit;
+	r->lun = 0;				/* ??? */
+	for(retries = 0; retries < 10; retries++){
+		/*
+		 * Read-capacity is mandatory for DA, WORM, CD-ROM and
+		 * MO. It may return 'not ready' if type DA is not
+		 * spun up, type MO or type CD-ROM are not loaded or just
+		 * plain slow getting their act together after a reset.
+		 */
+		r->write = 0;
+		memset(r->cmd, 0, sizeof(r->cmd));
+		r->cmd[0] = 0x25;
+		r->cmd[1] = r->lun<<5;
+		r->clen = 10;
+		r->data = p;
+		r->dlen = 8;
+		r->flags = 0;
+
+		r->status = ~0;
+		switch(scsirio(r)){
+		default:
+			break;
+		case 0:
+			unit->sectors = (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
+			unit->secsize = (p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7];
+
+			/*
+			 * Some ATAPI CD readers lie about the block size.
+			 * Since we don't read audio via this interface
+			 * it's okay to always fudge this.
+			 */
+			if(unit->secsize == 2352)
+				unit->secsize = 2048;
+			/*
+			 * Devices with removable media may return 0 sectors
+			 * when they have empty media (e.g. sata dvd writers);
+			 * if so, keep the count zero.
+			 *
+			 * Read-capacity returns the LBA of the last sector,
+			 * therefore the number of sectors must be incremented.
+			 */
+			if(unit->sectors != 0)
+				unit->sectors++;
+			ok = 1;
+			break;
+		case 1:
+			ok = 1;
+			break;
+		case 2:
+			continue;
+		}
+		break;
+	}
+	free(p);
+	free(r);
+
+	if(ok)
+		return ok+retries;
+	else
+		return 0;
+}
+
+int
+scsiexec(SDunit* unit, int write, uchar* cmd, int clen, void* data, int* dlen)
+{
+	SDreq *r;
+	int status;
+
+	if((r = malloc(sizeof(SDreq))) == nil)
+		return SDmalloc;
+	r->unit = unit;
+	r->lun = cmd[1]>>5;		/* ??? */
+	r->write = write;
+	memmove(r->cmd, cmd, clen);
+	r->clen = clen;
+	r->data = data;
+	if(dlen)
+		r->dlen = *dlen;
+	r->flags = 0;
+
+	r->status = ~0;
+
+	/*
+	 * Call the device-specific I/O routine.
+	 * There should be no calls to 'error()' below this
+	 * which percolate back up.
+	 */
+	switch(status = unit->dev->ifc->rio(r)){
+	case SDok:
+		if(dlen)
+			*dlen = r->rlen;
+		/*FALLTHROUGH*/
+	case SDcheck:
+		/*FALLTHROUGH*/
+	default:
+		/*
+		 * It's more complicated than this. There are conditions
+		 * which are 'ok' but for which the returned status code
+		 * is not 'SDok'.
+		 * Also, not all conditions require a reqsense, might
+		 * need to do a reqsense here and make it available to the
+		 * caller somehow.
+		 *
+		 * Mañana.
+		 */
+		break;
+	}
+	sdfree(r);
+
+	return status;
+}
+
+static void
+scsifmt10(SDreq *r, int write, int lun, ulong nb, uvlong bno)
+{
+	uchar *c;
+
+	c = r->cmd;
+	if(write == 0)
+		c[0] = 0x28;
+	else
+		c[0] = 0x2A;
+	c[1] = lun<<5;
+	c[2] = bno>>24;
+	c[3] = bno>>16;
+	c[4] = bno>>8;
+	c[5] = bno;
+	c[6] = 0;
+	c[7] = nb>>8;
+	c[8] = nb;
+	c[9] = 0;
+
+	r->clen = 10;
+}
+
+static void
+scsifmt16(SDreq *r, int write, int lun, ulong nb, uvlong bno)
+{
+	uchar *c;
+
+	c = r->cmd;
+	if(write == 0)
+		c[0] = 0x88;
+	else
+		c[0] = 0x8A;
+	c[1] = lun<<5;		/* so wrong */
+	c[2] = bno>>56;
+	c[3] = bno>>48;
+	c[4] = bno>>40;
+	c[5] = bno>>32;
+	c[6] = bno>>24;
+	c[7] = bno>>16;
+	c[8] = bno>>8;
+	c[9] = bno;
+	c[10] = nb>>24;
+	c[11] = nb>>16;
+	c[12] = nb>>8;
+	c[13] = nb;
+	c[14] = 0;
+	c[15] = 0;
+
+	r->clen = 16;
+}
+
+long
+scsibio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno)
+{
+	SDreq *r;
+	long rlen;
+
+	if((r = malloc(sizeof(SDreq))) == nil)
+		error(Enomem);
+	r->unit = unit;
+	r->lun = lun;
+again:
+	r->write = write;
+	if(bno >= (1ULL<<32))
+		scsifmt16(r, write, lun, nb, bno);
+	else
+		scsifmt10(r, write, lun, nb, bno);
+	r->data = data;
+	r->dlen = nb*unit->secsize;
+	r->flags = 0;
+
+	r->status = ~0;
+	switch(scsirio(r)){
+	default:
+		rlen = -1;
+		break;
+	case 0:
+		rlen = r->rlen;
+		break;
+	case 2:
+		rlen = -1;
+		if(!(r->flags & SDvalidsense))
+			break;
+		switch(r->sense[2] & 0x0F){
+		default:
+			break;
+		case 0x01:		/* recovered error */
+			print("%s: recovered error at sector %llud\n",
+				unit->name, bno);
+			rlen = r->rlen;
+			break;
+		case 0x06:		/* check condition */
+			/*
+			 * Check for a removeable media change.
+			 * If so, mark it by zapping the geometry info
+			 * to force an online request.
+			 */
+			if(r->sense[12] != 0x28 || r->sense[13] != 0)
+				break;
+			if(unit->inquiry[1] & 0x80)
+				unit->sectors = 0;
+			break;
+		case 0x02:		/* not ready */
+			/*
+			 * If unit is becoming ready,
+			 * rather than not not ready, try again.
+			 */
+			if(r->sense[12] == 0x04 && r->sense[13] == 0x01)
+				goto again;
+			break;
+		}
+		break;
+	}
+	free(r);
+
+	return rlen;
+}
+

+ 119 - 0
sys/src/9/omap/softfpu.c

@@ -0,0 +1,119 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+int
+fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
+{
+	/*
+	 * Called from procdevtab.read and procdevtab.write
+	 * allow user process access to the FPU registers.
+	 * This is the only FPU routine which is called directly
+	 * from the port code; it would be nice to have dynamic
+	 * creation of entries in the device file trees...
+	 */
+	USED(proc, a, n, offset, write);
+
+	return 0;
+}
+
+void
+fpunotify(Ureg*)
+{
+	/*
+	 * Called when a note is about to be delivered to a
+	 * user process, usually at the end of a system call.
+	 * Note handlers are not allowed to use the FPU so
+	 * the state is marked (after saving if necessary) and
+	 * checked in the Device Not Available handler.
+	 */
+}
+
+void
+fpunoted(void)
+{
+	/*
+	 * Called from sysnoted() via the machine-dependent
+	 * noted() routine.
+	 * Clear the flag set above in fpunotify().
+	 */
+}
+
+void
+fpusysrfork(Ureg*)
+{
+	/*
+	 * Called early in the non-interruptible path of
+	 * sysrfork() via the machine-dependent syscall() routine.
+	 * Save the state so that it can be easily copied
+	 * to the child process later.
+	 */
+}
+
+void
+fpusysrforkchild(Proc*, Proc*)
+{
+	/*
+	 * Called later in sysrfork() via the machine-dependent
+	 * sysrforkchild() routine.
+	 * Copy the parent FPU state to the child.
+	 */
+}
+
+void
+fpuprocsave(Proc*)
+{
+	/*
+	 * Called from sched() and sleep() via the machine-dependent
+	 * procsave() routine.
+	 * About to go in to the scheduler.
+	 * If the process wasn't using the FPU
+	 * there's nothing to do.
+	 */
+}
+
+void
+fpuprocrestore(Proc*)
+{
+	/*
+	 * The process has been rescheduled and is about to run.
+	 * Nothing to do here right now. If the process tries to use
+	 * the FPU again it will cause a Device Not Available
+	 * exception and the state will then be restored.
+	 */
+}
+
+void
+fpusysprocsetup(Proc*)
+{
+	/*
+	 * Disable the FPU.
+	 * Called from sysexec() via sysprocsetup() to
+	 * set the FPU for the new process.
+	 */
+}
+
+void