Browse Source

riscv: it builds.

I had to turn off the amd64-specific tls regression test.

It boots as a coreboot payload on the spike emulator
as far as it currently can.

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Ronald G. Minnich 7 years ago
parent
commit
1584469569

+ 3 - 3
sys/src/9/port/devcap.c

@@ -237,9 +237,9 @@ capwrite(Chan *c, void *va, int32_t n, int64_t m)
 		if(key == nil)
 		if(key == nil)
 			error(Eshort);
 			error(Eshort);
 		*key++ = 0;
 		*key++ = 0;
-
-		hmac_sha1((uint8_t*)from, strlen(from), (uint8_t*)key,
-			  strlen(key), hash, nil);
+		panic("need a sha256");
+		//hmac_sha1((uint8_t*)from, strlen(from), (uint8_t*)key,
+		//strlen(key), hash, nil);
 
 
 		p = remcap(hash);
 		p = remcap(hash);
 		if(p == nil){
 		if(p == nil){

+ 11 - 11
sys/src/9/port/riscvport.json

@@ -10,19 +10,24 @@
 			"mksys -o ../port/error.h '-mode=error.h' $HARVEY/sys/src/sysconf.json"
 			"mksys -o ../port/error.h '-mode=error.h' $HARVEY/sys/src/sysconf.json"
 		],
 		],
 		"#SourceFiles": [
 		"#SourceFiles": [
+			"../port/devcoreboot.c",
+			"../port/devkprof.c",
+			"../port/cpu_buffer.c",
+			"../port/devkbin.c",
+			"../port/devssl.c",
+			"../port/devtls.c"
+		],
+		"SourceFiles": [
 			"../port/alarm.c",
 			"../port/alarm.c",
 			"../port/allocb.c",
 			"../port/allocb.c",
 			"../port/cache.c",
 			"../port/cache.c",
 			"../port/chan.c",
 			"../port/chan.c",
-			"../port/cpu_buffer.c",
+			"../port/dev.c",
 			"../port/devcap.c",
 			"../port/devcap.c",
 			"../port/devcons.c",
 			"../port/devcons.c",
-			"../port/devcoreboot.c",
 			"../port/devdup.c",
 			"../port/devdup.c",
 			"../port/devenv.c",
 			"../port/devenv.c",
 			"../port/devfdmux.c",
 			"../port/devfdmux.c",
-			"../port/devkprof.c",
-			"../port/devkbin.c",
 			"../port/devmnt.c",
 			"../port/devmnt.c",
 			"../port/devmntn.c",
 			"../port/devmntn.c",
 			"../port/devpipe.c",
 			"../port/devpipe.c",
@@ -33,8 +38,6 @@
 			"../port/devsd.c",
 			"../port/devsd.c",
 			"../port/devsegment.c",
 			"../port/devsegment.c",
 			"../port/devsrv.c",
 			"../port/devsrv.c",
-			"../port/devssl.c",
-			"../port/devtls.c",
 			"../port/devtab.c",
 			"../port/devtab.c",
 			"../port/devtrace.c",
 			"../port/devtrace.c",
 			"../port/devuart.c",
 			"../port/devuart.c",
@@ -75,10 +78,7 @@
 			"../port/syszio.c",
 			"../port/syszio.c",
 			"../port/taslock.c",
 			"../port/taslock.c",
 			"../port/tod.c",
 			"../port/tod.c",
-			"../port/virtio_lib.c",
 			"../port/watermarks.c"
 			"../port/watermarks.c"
-		],
-		"SourceFiles": [
-			"../port/qio.c"
-		] }
+		]
+	}
 }
 }

+ 6 - 0
sys/src/9/riscv/.gitignore

@@ -0,0 +1,6 @@
+/systab.c
+/cpu.c
+/init.h
+/errstr.h
+/init
+/riscvcpu.c

+ 343 - 0
sys/src/9/riscv/acore.c

@@ -0,0 +1,343 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include <tos.h>
+#include <pool.h>
+#include "ureg.h"
+#include "io.h"
+#include "../port/pmc.h"
+
+/*
+ * NIX code run at the AC.
+ * This is the "AC kernel".
+ */
+
+/*
+ * FPU:
+ *
+ * The TC handles the FPU by keeping track of the state for the
+ * current process. If it has been used and must be saved, it is saved, etc.
+ * When a process gets to the AC, we handle the FPU directly, and save its
+ * state before going back to the TC (or the TC state would be stale).
+ *
+ * Because of this, each time the process comes back to the AC and
+ * uses the FPU it will get a device not available trap and
+ * the state will be restored. This could be optimized because the AC
+ * is single-process, and we do not have to disable the FPU while
+ * saving, so it does not have to be restored.
+ */
+
+extern char* acfpunm(Ureg* ureg, void *unused_voidp);
+extern char* acfpumf(Ureg* ureg, void *unused_voidp);
+extern char* acfpuxf(Ureg* ureg, void *unused_voidp);
+extern void acfpusysprocsetup(Proc*);
+
+extern void _acsysret(void);
+extern void _actrapret(void);
+
+ACVctl *acvctl[256];
+
+/*
+ * Test inter core calls by calling a cores to print something, and then
+ * waiting for it to complete.
+ */
+static void
+testiccfn(void)
+{
+	print("called: %s\n", ( char *)machp()->NIX.icc->data);
+}
+
+void
+testicc(int i)
+{
+	Mach *mp;
+
+	if((mp = sys->machptr[i]) != nil && mp->online != 0){
+		if(mp->NIX.nixtype != NIXAC){
+			print("testicc: core %d is not an AC\n", i);
+			return;
+		}
+		print("calling core %d... ", i);
+		mp->NIX.icc->flushtlb = 0;
+		snprint(( char *)mp->NIX.icc->data, ICCLNSZ, "<%d>", i);
+		mfence();
+		mp->NIX.icc->fn = testiccfn;
+		mwait(&mp->NIX.icc->fn);
+	}
+}
+
+/*
+ * Check if the AC kernel (mach) stack has more than 4*KiB free.
+ * Do not call panic, the stack is gigantic.
+ */
+static void
+acstackok(void)
+{
+	char dummy;
+	char *sstart;
+
+	sstart = (char *)machp() - PGSZ - 4*PTSZ - MACHSTKSZ;
+	if(&dummy < sstart + 4*KiB){
+		print("ac kernel stack overflow, cpu%d stopped\n", machp()->machno);
+		DONE();
+	}
+}
+
+/*
+ * Main scheduling loop done by the application core.
+ * Some of functions run will not return.
+ * The system call handler will reset the stack and
+ * call acsched again.
+ * We loop because some functions may return and we should
+ * wait for another call.
+ */
+void
+acsched(void)
+{
+	acmmuswitch();
+	for(;;){
+		acstackok();
+		mwait(&machp()->NIX.icc->fn);
+		if(machp()->NIX.icc->flushtlb)
+			acmmuswitch();
+		DBG("acsched: cpu%d: fn %#p\n", machp()->machno, machp()->NIX.icc->fn);
+		machp()->NIX.icc->fn();
+		DBG("acsched: cpu%d: idle\n", machp()->machno);
+		mfence();
+		machp()->NIX.icc->fn = nil;
+	}
+}
+
+void
+acmmuswitch(void)
+{
+	extern Page mach0pml4;
+
+	DBG("acmmuswitch mpl4 %#p mach0pml4 %#p m0pml4 %#p\n", machp()->MMU.pml4->pa, mach0pml4.pa, sys->machptr[0]->MMU.pml4->pa);
+
+
+	rootput(machp()->MMU.pml4->pa);
+}
+
+/*
+ * Beware: up is not set when this function is called.
+ */
+void
+actouser(void)
+{
+#if 0
+	void xactouser(uint64_t);
+	Ureg *u;
+
+	acfpusysprocsetup(m->proc);
+
+	u = m->proc->dbgreg;
+	DBG("cpu%d: touser usp = %#p entry %#p\n", machp()->machno, u->sp, u->ip);
+	xactouser(u->sp);
+#endif
+	panic("actouser");
+}
+
+void
+actrapret(void)
+{
+	/* done by actrap() */
+}
+
+/*
+ * Entered in AP core context, upon traps (system calls go through acsyscall)
+ * using up->dbgreg means cores MUST be homogeneous.
+ *
+ * BUG: We should setup some trapenable() mechanism for the AC,
+ * so that code like fpu.c could arrange for handlers specific for
+ * the AC, instead of doint that by hand here.
+ *
+ * All interrupts are masked while in the "kernel"
+ */
+void
+actrap(Ureg *u)
+{
+	panic("actrap");
+#if 0
+	char *n;
+	ACVctl *v;
+
+	n = nil;
+
+	_pmcupdate(m);
+	if(m->proc != nil){
+		m->proc->nactrap++;
+		m->proc->actime1 = fastticks(nil);
+	}
+	if(u->type < nelem(acvctl)){
+		v = acvctl[u->type];
+		if(v != nil){
+			DBG("actrap: cpu%d: %llu\n", machp()->machno, u->type);
+			n = v->f(u, v->a);
+			if(n != nil)
+				goto Post;
+			return;
+		}
+	}
+	switch(u->type){
+	case IdtDF:
+		print("AC: double fault\n");
+		dumpregs(u);
+		ndnr();
+	case IdtIPI:
+		m->intr++;
+		DBG("actrap: cpu%d: IPI\n", machp()->machno);
+		apiceoi(IdtIPI);
+		break;
+	case IdtTIMER:
+		apiceoi(IdtTIMER);
+		panic("timer interrupt in an AC");
+		break;
+	case IdtPF:
+		/* this case is here for debug only */
+		m->pfault++;
+		DBG("actrap: cpu%d: PF cr2 %#llx\n", machp()->machno, cr2get());
+		break;
+	default:
+		print("actrap: cpu%d: %llu\n", machp()->machno, u->type);
+	}
+Post:
+	m->NIX.icc->rc = ICCTRAP;
+	m->cr2 = cr2get();
+	memmove(m->proc->dbgreg, u, sizeof *u);
+	m->NIX.icc->note = n;
+	fpuprocsave(m->proc);
+	_pmcupdate(m);
+	mfence();
+	m->NIX.icc->fn = nil;
+	ready(m->proc);
+
+	mwait(&m->NIX.icc->fn);
+
+	if(m->NIX.icc->flushtlb)
+		acmmuswitch();
+	if(m->NIX.icc->fn != actrapret)
+		acsched();
+	DBG("actrap: ret\n");
+	memmove(u, m->proc->dbgreg, sizeof *u);
+	if(m->proc)
+		m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1);
+#endif
+}
+
+void
+acsyscall(void)
+{
+	panic("acsyscall");
+#if 0
+	Proc *p;
+
+	/*
+	 * If we saved the Ureg into m->proc->dbgregs,
+	 * There's nothing else we have to do.
+	 * Otherwise, we should m->proc->dbgregs = u;
+	 */
+	DBG("acsyscall: cpu%d\n", machp()->machno);
+
+	_pmcupdate(m);
+	p = m->proc;
+	p->actime1 = fastticks(nil);
+	m->syscall++;	/* would also count it in the TS core */
+	m->NIX.icc->rc = ICCSYSCALL;
+	m->cr2 = cr2get();
+	fpuprocsave(p);
+	_pmcupdate(m);
+	mfence();
+	m->NIX.icc->fn = nil;
+	ready(p);
+	/*
+	 * The next call is probably going to make us jmp
+	 * into user code, forgetting all our state in this
+	 * stack, upon the next syscall.
+	 * We don't nest calls in the current stack for too long.
+	 */
+	acsched();
+#endif
+}
+
+/*
+ * Called in AP core context, to return from system call.
+ */
+void
+acsysret(void)
+{
+panic("acsysret");
+#if 0
+	DBG("acsysret\n");
+	if(m->proc != nil)
+		m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1);
+	_acsysret();
+#endif
+}
+
+void
+dumpreg(void *u)
+{
+	print("reg is %p\n", u);
+	ndnr();
+}
+
+char *rolename[] =
+{
+	[NIXAC] = "AC",
+	[NIXTC] = "TC",
+	[NIXKC] = "KC",
+	[NIXXC] = "XC",
+};
+
+void
+acmodeset(int mode)
+{
+	switch(mode){
+	case NIXAC:
+	case NIXKC:
+	case NIXTC:
+	case NIXXC:
+		break;
+	default:
+		panic("acmodeset: bad mode %d", mode);
+	}
+	machp()->NIX.nixtype = mode;
+}
+
+void
+acinit(void)
+{
+	Mach *mp;
+	Proc *pp;
+
+	/*
+	 * Lower the priority of the apic to 0,
+	 * to accept interrupts.
+	 * Raise it later if needed to disable them.
+	 */
+	panic("apicpri");
+	//apicpri(0);
+
+	/*
+	 * Be sure a few  assembler assumptions still hold.
+	 * Someone moved m->stack and I had fun debugging...
+	 */
+	mp = 0;
+	pp = 0;
+	assert((uintptr)&mp->proc == 16);
+	assert((uintptr)&pp->dbgreg == 24);
+	assert((uintptr)&mp->stack == 24);
+}

+ 201 - 0
sys/src/9/riscv/arch.c

@@ -0,0 +1,201 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * EPISODE 12B
+ * How to recognise different types of trees from quite a long way away.
+ * NO. 1
+ * THE LARCH
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ureg.h"
+
+/* the rules are different for different compilers. We need to define up. */
+// Initialize it to force it into data.
+// That way, if we set them in assembly, they won't get zero'd by the bss init in main
+// N.B. There was an interesting hack in plan 9 c. You could grab up to two registers for your
+// program. In the case of Plan 9, m was r15, and up was r14. Very slick, and if there is a way to do
+// this in gcc or clang I don't know it. This also nicely handled per cpu info; R15/14 were always right for
+// your core and context.
+//Mach *m = (void *)0;
+
+int
+incref(Ref *r)
+{
+	int x;
+
+	lock(&r->l);
+	x = ++r->ref;
+	unlock(&r->l);
+	return x;
+}
+
+int
+decref(Ref *r)
+{
+	int x;
+
+	lock(&r->l);
+	x = --r->ref;
+	unlock(&r->l);
+	if(x < 0)
+		panic("decref pc=%#p", getcallerpc());
+
+	return x;
+}
+
+void fpuprocrestore(Proc *p)
+{
+	panic("fpuprocrestore");
+}
+
+void
+procrestore(Proc *p)
+{
+	uint64_t t;
+
+	if(p->kp)
+		return;
+	cycles(&t);
+	p->pcycles -= t;
+
+	fpuprocrestore(p);
+}
+
+void
+fpuprocsave(Proc *p)
+{
+	panic("fpuprocsave");
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ *  NB: the caller should mmuflushtlb after procsave().
+ *  procsave/procrestore don't touch the mmu, they
+ *  care about fpu, mostly.
+ */
+void
+procsave(Proc *p)
+{
+	uint64_t t;
+
+	cycles(&t);
+	p->pcycles += t;
+
+	fpuprocsave(p);
+}
+
+static void
+linkproc(void)
+{
+	Proc *up = externup();
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc dying", 0);
+}
+
+void
+kprocchild(Proc* p, void (*func)(void*), void* arg)
+{
+	/*
+	 * gotolabel() needs a word on the stack in
+	 * which to place the return PC used to jump
+	 * to linkproc().
+	 */
+	p->sched.pc = PTR2UINT(linkproc);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK-BY2SE);
+	p->sched.sp = STACKALIGN(p->sched.sp);
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+/*
+ *  put the processor in the halt state if we've no processes to run.
+ *  an interrupt will get us going again.
+ *  The boot TC in nix can't halt, because it must stay alert in
+ *  case an AC makes a handler process ready.
+ *  We should probably use mwait in that case.
+ */
+void
+idlehands(void)
+{
+/*	if(machp()->NIX.nixtype != NIXAC)
+	halt();*/
+}
+
+#if 0
+void
+ureg2gdb(Ureg *u, uintptr_t *g)
+{
+	g[GDB_AX] = u->ax;
+	g[GDB_BX] = u->bx;
+	g[GDB_CX] = u->cx;
+	g[GDB_DX] = u->dx;
+	g[GDB_SI] = u->si;
+	g[GDB_DI] = u->di;
+	g[GDB_BP] = u->bp;
+	g[GDB_SP] = u->sp;
+	g[GDB_R8] = u->r8;
+	g[GDB_R9] = u->r9;
+	g[GDB_R10] = u->r10;
+	g[GDB_R11] = u->r11;
+	g[GDB_R12] = u->r12;
+	g[GDB_R13] = u->r13;
+	g[GDB_R14] = u->r14;
+	g[GDB_R15] = u->r15;
+	g[GDB_PC] = u->ip;
+
+	/* it's weird, docs say 5 32-bit fields
+	 * but I count 4 if we pack these. Fix me
+	 */
+	g[GDB_PS] = 0; // u->PS;
+	g[GDB_CS] = 0; // u->CS;
+	g[GDB_SS] = 0; // u->SS;
+	g[GDB_DS] = 0; // u->DS;
+	g[GDB_ES] = 0; // u->ES;
+	g[GDB_FS] = 0; // u->FS;
+	g[GDB_GS] = 0; // u->GS;
+}
+
+void
+gdb2ureg(uintptr_t *g, Ureg *u)
+{
+	u->ax = g[GDB_AX];
+	u->bx = g[GDB_BX];
+	u->cx = g[GDB_CX];
+	u->dx = g[GDB_DX];
+	u->si = g[GDB_SI];
+	u->di = g[GDB_DI];
+	u->bp = g[GDB_BP];
+	u->sp = g[GDB_SP];
+	u->r8 = g[GDB_R8];
+	u->r9 = g[GDB_R9];
+	u->r10 = g[GDB_R10];
+	u->r11 = g[GDB_R11];
+	u->r12 = g[GDB_R12];
+	u->r13 = g[GDB_R13];
+	u->r14 = g[GDB_R14];
+	u->r15 = g[GDB_R15];
+	u->ip = g[GDB_PC];
+
+	/* it's weird but gdb seems to have no way to
+	 * express the sp. Hmm.
+	 */
+	u->flags = g[GDB_PS];
+	/* is there any point to this? */
+	u->cs = g[GDB_CS];
+	u->ss = g[GDB_SS];
+}
+#endif

+ 165 - 0
sys/src/9/riscv/archriscv.c

@@ -0,0 +1,165 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#undef DBG
+#define DBG iprint
+
+void
+cpuiddump(void)
+{
+	print("riscv\n");
+}
+
+int64_t
+archhz(void)
+{
+	return 1024*1024*1024ULL;
+}
+
+int
+archmmu(void)
+{
+
+	/*
+	 * Should the check for machp()->machno != 0 be here
+	 * or in the caller (mmuinit)?
+	 *
+	 * To do here:
+	 * check and enable Pse;
+	 * Pge; Nxe.
+	 */
+
+	/*
+	 * How many page sizes are there?
+	 * Always have 4*KiB, but need to check
+	 * configured correctly.
+	 */
+	assert(PGSZ == 4*KiB);
+
+	sys->pgszlg2[0] = 12;
+	sys->pgszmask[0] = (1<<12)-1;
+	sys->pgsz[0] = 1<<12;
+	sys->npgsz = 1;
+
+	sys->pgszlg2[1] = 21;
+	sys->pgszmask[1] = (1<<21)-1;
+	sys->pgsz[1] = 1<<21;
+	sys->npgsz = 2;
+
+		sys->pgszlg2[2] = 30;
+		sys->pgszmask[2] = (1<<30)-1;
+		sys->pgsz[2] = 1<<30;
+		sys->npgsz = 3;
+
+	return sys->npgsz;
+}
+
+static int
+fmtP(Fmt* f)
+{
+	uintmem pa;
+
+	pa = va_arg(f->args, uintmem);
+
+	if(f->flags & FmtSharp)
+		return fmtprint(f, "%#16.16llx", pa);
+
+	return fmtprint(f, "%llu", pa);
+}
+
+static int
+fmtL(Fmt* f)
+{
+	Mpl pl;
+
+	pl = va_arg(f->args, Mpl);
+
+	return fmtprint(f, "%#16.16llx", pl);
+}
+
+static int
+fmtR(Fmt* f)
+{
+	uint64_t r;
+
+	r = va_arg(f->args, uint64_t);
+
+	return fmtprint(f, "%#16.16llx", r);
+}
+
+/* virtual address fmt */
+static int
+fmtW(Fmt *f)
+{
+	uint64_t va;
+
+	va = va_arg(f->args, uint64_t);
+	return fmtprint(f, "%#llx=0x[%llx][%llx][%llx][%llx][%llx]", va,
+		PTLX(va, 3), PTLX(va, 2), PTLX(va, 1), PTLX(va, 0),
+		va & ((1<<PGSHFT)-1));
+
+}
+
+void
+archfmtinstall(void)
+{
+	/*
+	 * Architecture-specific formatting. Not as neat as they
+	 * could be (e.g. there's no defined type for a 'register':
+	 *	L - Mpl, mach priority level
+	 *	P - uintmem, physical address
+	 *	R - register
+	 * With a little effort these routines could be written
+	 * in a fairly architecturally-independent manner, relying
+	 * on the compiler to optimise-away impossible conditions,
+	 * and/or by exploiting the innards of the fmt library.
+	 */
+	fmtinstall('P', fmtP);
+	fmtinstall('L', fmtL);
+	fmtinstall('R', fmtR);
+	fmtinstall('W', fmtW);
+}
+
+void
+archidle(void)
+{
+panic("archidle"); //	halt();
+}
+
+void
+microdelay(int microsecs)
+{
+print("microdelay\n");
+/*
+	uint64_t r, t;
+
+	r = rdtsc();
+	for(t = r + (sys->cyclefreq*microsecs)/1000000ull; r < t; r = rdtsc())
+		;
+ */
+}
+
+void
+millidelay(int millisecs)
+{
+print("millidelay\n");
+/*
+	uint64_t r, t;
+
+	r = rdtsc();
+	for(t = r + (sys->cyclefreq*millisecs)/1000ull; r < t; r = rdtsc())
+		;
+ */
+}

+ 5 - 7
sys/src/9/riscv/build.json

@@ -18,14 +18,15 @@
 					"uint32_t kerndate = 1;"
 					"uint32_t kerndate = 1;"
 				],
 				],
 				"NoDev": [
 				"NoDev": [
+					"kbin",
+					"kprof"
+				],
+				"Dev": [
 					"arch",
 					"arch",
 					"cap",
 					"cap",
 					"cons",
 					"cons",
 					"dup",
 					"dup",
 					"env",
 					"env",
-					"ip",
-					"kbin",
-					"kprof",
 					"mnt",
 					"mnt",
 					"mntn",
 					"mntn",
 					"pipe",
 					"pipe",
@@ -34,11 +35,8 @@
 					"root",
 					"root",
 					"segment",
 					"segment",
 					"srv",
 					"srv",
-					"ssl",
-					"tls",
 					"uart",
 					"uart",
-					"ws",
-					"zp"
+					"ws"
 				],
 				],
 				"NoIp": [
 				"NoIp": [
 					"tcp",
 					"tcp",

+ 11 - 2
sys/src/9/riscv/core.json

@@ -41,12 +41,11 @@
 			"inith.json"
 			"inith.json"
 		],
 		],
 	    "MissingSourceFiles": [
 	    "MissingSourceFiles": [
-			"arch.c",
+		        "acore.c",
 			"archriscv.c",
 			"archriscv.c",
 			"asm.c",
 			"asm.c",
 		        "coreboot.c",
 		        "coreboot.c",
 			"devarch.c",
 			"devarch.c",
-			"map.c",
 			"memory.c",
 			"memory.c",
 			"mmu.c",
 			"mmu.c",
 			"mp.c",
 			"mp.c",
@@ -57,8 +56,18 @@
 		],
 		],
 		"SourceFiles": [
 		"SourceFiles": [
 			"asm.S",
 			"asm.S",
+			"arch.c",
+		        "acore.c",
+			"archriscv.c",
 			"ctype.c",
 			"ctype.c",
+			"devarch.c",
 			"main.c",
 			"main.c",
+			"map.c",
+			"mmu.c",
+			"qmalloc.c",
+			"syscall.c",
+			"systab.c",
+		        "tcore.c",
 			"uart.c"
 			"uart.c"
 		]
 		]
 	}
 	}

+ 598 - 0
sys/src/9/riscv/devarch.c

@@ -0,0 +1,598 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ureg.h"
+
+/* leave this for now; we might want to keep track of MMIO apart from memory. */
+typedef struct IOMap IOMap;
+struct IOMap
+{
+	IOMap	*next;
+	int	reserved;
+	char	tag[13];
+	uintptr_t	start;
+	uintptr_t	end;
+};
+
+static struct
+{
+	Lock l;
+	IOMap	*map;
+	IOMap	*free;
+	IOMap	maps[32];		// some initial free maps
+
+	QLock	ql;			// lock for reading map
+} iomap;
+
+enum {
+	Qdir = 0,
+	Qioalloc = 1,
+	Qiob,
+	Qiow,
+	Qiol,
+	Qbase,
+	Qmapram,
+
+	Qmax = 16,
+};
+
+typedef int32_t Rdwrfn(Chan*, void*, int32_t, int64_t);
+
+static Rdwrfn *readfn[Qmax];
+static Rdwrfn *writefn[Qmax];
+
+static Dirtab archdir[Qmax] = {
+	".",		{ Qdir, 0, QTDIR },	0,	0555,
+	"ioalloc",	{ Qioalloc, 0 },	0,	0444,
+	/* NOTE: kludge until we have real permissions. */
+	"iob",		{ Qiob, 0 },		0,	0660 | 6,
+	"iow",		{ Qiow, 0 },		0,	0660 | 6,
+	"iol",		{ Qiol, 0 },		0,	0660 | 6,
+	"mapram",	{ Qmapram, 0 },	0,	0444,
+};
+Lock archwlock;	/* the lock is only for changing archdir */
+int narchdir = Qbase;
+
+/*
+ * Add a file to the #P listing.  Once added, you can't delete it.
+ * You can't add a file with the same name as one already there,
+ * and you get a pointer to the Dirtab entry so you can do things
+ * like change the Qid version.  Changing the Qid path is disallowed.
+ */
+Dirtab*
+addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+{
+	int i;
+	Dirtab d;
+	Dirtab *dp;
+
+	memset(&d, 0, sizeof d);
+	strcpy(d.name, name);
+	d.perm = perm;
+
+	lock(&archwlock);
+	if(narchdir >= Qmax){
+		unlock(&archwlock);
+		return nil;
+	}
+
+	for(i=0; i<narchdir; i++)
+		if(strcmp(archdir[i].name, name) == 0){
+			unlock(&archwlock);
+			return nil;
+		}
+
+	d.qid.path = narchdir;
+	archdir[narchdir] = d;
+	readfn[narchdir] = rdfn;
+	writefn[narchdir] = wrfn;
+	dp = &archdir[narchdir++];
+	unlock(&archwlock);
+
+	return dp;
+}
+
+void
+ioinit(void)
+{
+	int i;
+
+	for(i = 0; i < nelem(iomap.maps)-1; i++)
+		iomap.maps[i].next = &iomap.maps[i+1];
+	iomap.maps[i].next = nil;
+	iomap.free = iomap.maps;
+}
+
+// Reserve a range to be ioalloced later.
+// This is in particular useful for exchangable cards, such
+// as pcmcia and cardbus cards.
+int
+ioreserve(int n, int size, int align, char *tag)
+{
+	panic("ioreserve");
+#if 0
+	IOMap *map, **l;
+	int i, port;
+
+	lock(&iomap.l);
+	// find a free port above 0x400 and below 0x1000
+	port = 0x400;
+	for(l = &iomap.map; *l; l = &(*l)->next){
+		map = *l;
+		if (map->start < 0x400)
+			continue;
+		i = map->start - port;
+		if(i > size)
+			break;
+		if(align > 0)
+			port = ((port+align-1)/align)*align;
+		else
+			port = map->end;
+	}
+	if(*l == nil){
+		unlock(&iomap.l);
+		return -1;
+	}
+	map = iomap.free;
+	if(map == nil){
+		print("ioalloc: out of maps");
+		unlock(&iomap.l);
+		return port;
+	}
+	iomap.free = map->next;
+	map->next = *l;
+	map->start = port;
+	map->end = port + size;
+	map->reserved = 1;
+	strncpy(map->tag, tag, sizeof(map->tag));
+	map->tag[sizeof(map->tag)-1] = 0;
+	*l = map;
+
+	archdir[0].qid.vers++;
+
+	unlock(&iomap.l);
+	return map->start;
+#endif
+	return 0;
+}
+
+//
+//	alloc some io port space and remember who it was
+//	alloced to.  if port < 0, find a free region.
+//
+int
+ioalloc(int port, int size, int align, char *tag)
+{
+	panic("ioalloc");
+#if 0
+	IOMap *map, **l;
+	int i;
+
+	lock(&iomap.l);
+	if(port < 0){
+		// find a free port above 0x400 and below 0x1000
+		port = 0x400;
+		for(l = &iomap.map; *l; l = &(*l)->next){
+			map = *l;
+			if (map->start < 0x400)
+				continue;
+			i = map->start - port;
+			if(i > size)
+				break;
+			if(align > 0)
+				port = ((port+align-1)/align)*align;
+			else
+				port = map->end;
+		}
+		if(*l == nil){
+			unlock(&iomap.l);
+			return -1;
+		}
+	} else {
+		// Only 64KB I/O space on the x86.
+		if((port+size) > 0x10000){
+			unlock(&iomap.l);
+			return -1;
+		}
+		// see if the space clashes with previously allocated ports
+		for(l = &iomap.map; *l; l = &(*l)->next){
+			map = *l;
+			if(map->end <= port)
+				continue;
+			if(map->reserved && map->start == port && map->end == port + size) {
+				map->reserved = 0;
+				unlock(&iomap.l);
+				return map->start;
+			}
+			if(map->start >= port+size)
+				break;
+			unlock(&iomap.l);
+			return -1;
+		}
+	}
+	map = iomap.free;
+	if(map == nil){
+		print("ioalloc: out of maps");
+		unlock(&iomap.l);
+		return port;
+	}
+	iomap.free = map->next;
+	map->next = *l;
+	map->start = port;
+	map->end = port + size;
+	strncpy(map->tag, tag, sizeof(map->tag));
+	map->tag[sizeof(map->tag)-1] = 0;
+	*l = map;
+
+	archdir[0].qid.vers++;
+
+	unlock(&iomap.l);
+	return map->start;
+#endif
+	return 0;
+}
+
+void
+iofree(int port)
+{
+	panic("iofree");
+#if 0
+	IOMap *map, **l;
+
+	lock(&iomap.l);
+	for(l = &iomap.map; *l; l = &(*l)->next){
+		if((*l)->start == port){
+			map = *l;
+			*l = map->next;
+			map->next = iomap.free;
+			iomap.free = map;
+			break;
+		}
+		if((*l)->start > port)
+			break;
+	}
+	archdir[0].qid.vers++;
+	unlock(&iomap.l);
+#endif
+}
+
+int
+iounused(int start, int end)
+{
+	IOMap *map;
+
+	for(map = iomap.map; map; map = map->next){
+		if(start >= map->start && start < map->end
+		|| start <= map->start && end > map->start)
+			return 0;
+	}
+	return 1;
+}
+
+#if 0
+static void
+checkport(int start, int end)
+{
+	if(iounused(start, end))
+		return;
+	error(Eperm);
+}
+#endif
+
+static Chan*
+archattach(char* spec)
+{
+	return devattach('P', spec);
+}
+
+Walkqid*
+archwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+	return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
+}
+
+static int32_t
+archstat(Chan* c, uint8_t* dp, int32_t n)
+{
+	return devstat(c, dp, n, archdir, narchdir, devgen);
+}
+
+static Chan*
+archopen(Chan* c, int omode)
+{
+	return devopen(c, omode, archdir, narchdir, devgen);
+}
+
+static void
+archclose(Chan* c)
+{
+}
+
+enum
+{
+	Linelen= 31,
+};
+
+static int32_t
+archread(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	char *buf, *p;
+	//int port;
+	//uint16_t *sp;
+	//uint32_t *lp;
+	IOMap *map;
+	Rdwrfn *fn;
+
+	switch((uint32_t)c->qid.path){
+
+	case Qdir:
+		return devdirread(c, a, n, archdir, narchdir, devgen);
+
+#if 0
+// not now, not ever?
+	case Qiob:
+		port = offset;
+		checkport(offset, offset+n);
+		for(p = a; port < offset+n; port++)
+			*p++ = inb(port);
+		return n;
+
+	case Qiow:
+		if(n & 1)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		sp = a;
+		for(port = offset; port < offset+n; port += 2)
+			*sp++ = ins(port);
+		return n;
+
+	case Qiol:
+		if(n & 3)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		lp = a;
+		for(port = offset; port < offset+n; port += 4)
+			*lp++ = inl(port);
+		return n;
+
+#endif
+	case Qioalloc:
+		break;
+
+	default:
+		if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+
+	if((buf = malloc(n)) == nil)
+		error(Enomem);
+	p = buf;
+	n = n/Linelen;
+	offset = offset/Linelen;
+
+	switch((uint32_t)c->qid.path){
+	case Qioalloc:
+		lock(&iomap.l);
+		for(map = iomap.map; n > 0 && map != nil; map = map->next){
+			if(offset-- > 0)
+				continue;
+			sprint(p, "%#8lx %#8lx %-12.12s\n", map->start, map->end-1, map->tag);
+			p += Linelen;
+			n--;
+		}
+		unlock(&iomap.l);
+		break;
+	case Qmapram:
+/* shit */
+#ifdef NOTYET
+		for(mp = rmapram.map; mp->size; mp++){
+			/*
+			 * Up to MemMinMiB is already set up.
+			 */
+			if(mp->addr < MemMinMiB*MiB){
+				if(mp->addr+mp->size <= MemMinMiB*MiB)
+					continue;
+				pa = MemMinMiB*MiB;
+				size = mp->size - MemMinMiB*MiB-mp->addr;
+			}
+			else{
+				pa = mp->addr;
+				size = mp->size;
+			}
+		}
+#endif
+		error("Not yet");
+
+		break;
+	}
+
+	n = p - buf;
+	memmove(a, buf, n);
+	free(buf);
+
+	return n;
+}
+
+static int32_t
+archwrite(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	//char *p;
+	//int port;
+	//uint16_t *sp;
+	//uint32_t *lp;
+	Rdwrfn *fn;
+
+	switch((uint32_t)c->qid.path){
+#if 0
+
+	case Qiob:
+		p = a;
+		checkport(offset, offset+n);
+		for(port = offset; port < offset+n; port++)
+			outb(port, *p++);
+		return n;
+
+	case Qiow:
+		if(n & 1)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		sp = a;
+		for(port = offset; port < offset+n; port += 2)
+			outs(port, *sp++);
+		return n;
+
+	case Qiol:
+		if(n & 3)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		lp = a;
+		for(port = offset; port < offset+n; port += 4)
+			outl(port, *lp++);
+		return n;
+
+#endif
+	default:
+		if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+	return 0;
+}
+
+Dev archdevtab = {
+	.dc = 'P',
+	.name = "arch",
+
+	.reset = devreset,
+	.init = devinit,
+	.shutdown = devshutdown,
+	.attach = archattach,
+	.walk = archwalk,
+	.stat = archstat,
+	.open = archopen,
+	.create = devcreate,
+	.close = archclose,
+	.read = archread,
+	.bread = devbread,
+	.write = archwrite,
+	.bwrite = devbwrite,
+	.remove = devremove,
+	.wstat = devwstat,
+};
+
+/*
+ */
+void
+nop(void)
+{
+}
+
+void (*coherence)(void) = mfence;
+
+static int32_t
+cputyperead(Chan* c, void *a, int32_t n, int64_t off)
+{
+	return readstr(off, a, n, "riscv");
+}
+
+static int32_t
+numcoresread(Chan* c, void *a, int32_t n, int64_t off)
+{
+        char buf[8];
+        snprint(buf, 8, "%d\n", sys->nmach);
+        return readstr(off, a, n, buf);
+}
+
+void
+archinit(void)
+{
+	addarchfile("cputype", 0444, cputyperead, nil);
+	addarchfile("numcores", 0444, numcoresread, nil);
+}
+
+void
+archreset(void)
+{
+	panic("archreset");
+
+}
+
+/*
+ *  return value and speed of timer
+ */
+uint64_t
+fastticks(uint64_t* hz)
+{
+	if(hz != nil)
+		*hz = machp()->cpuhz;
+	return rdtsc();
+}
+
+uint32_t
+ms(void)
+{
+	return fastticks2us(rdtsc());
+}
+
+/*
+ *  set next timer interrupt
+ */
+void
+timerset(uint64_t x)
+{
+	panic("apictimerset");
+//	extern void apictimerset(uint64_t);
+
+//	apictimerset(x);
+}
+
+void
+cycles(uint64_t* t)
+{
+	panic("cycles");
+	*t = 0;
+}
+
+void
+delay(int millisecs)
+{
+	uint64_t r, t;
+
+	if(millisecs <= 0)
+		millisecs = 1;
+	cycles(&r);
+	for(t = r + (sys->cyclefreq*millisecs)/1000ull; r < t; cycles(&r))
+		;
+}
+
+/*
+ *  performance measurement ticks.  must be low overhead.
+ *  doesn't have to count over a second.
+ */
+uint64_t
+perfticks(void)
+{
+	uint64_t x;
+
+//	if(m->havetsc)
+		cycles(&x);
+//	else
+//		x = 0;
+	return x;
+}

File diff suppressed because it is too large
+ 726 - 465
sys/src/9/riscv/encoding.h


+ 2 - 16
sys/src/9/riscv/fns.h

@@ -160,24 +160,10 @@ void*	vmap(uintptr_t, usize);
 void	vsvminit(int, int, Mach *);
 void	vsvminit(int, int, Mach *);
 void	vunmap(void*, usize);
 void	vunmap(void*, usize);
 
 
-extern uint64_t cr0get(void);
-extern void cr0put(uint64_t);
-extern uint64_t cr2get(void);
-extern uint64_t cr3get(void);
-extern void cr3put(uint64_t);
-extern uint64_t cr4get(void);
-extern void cr4put(uint64_t);
-extern void gdtget(void*);
-extern void gdtput(int, uint64_t, uint16_t);
+extern uint64_t rootget(void);
+extern void rootput(uintptr_t);
 extern void idtput(int, uint64_t);
 extern void idtput(int, uint64_t);
-extern uint64_t rdmsr(uint32_t);
 extern uint64_t rdtsc(void);
 extern uint64_t rdtsc(void);
-extern void trput(uint64_t);
-extern void wrmsr(uint32_t, uint64_t);
-
-// TODO(aki): once we figure this out, these will go.
-extern int infected_with_std(void);
-extern void disinfect_std(void);
 
 
 extern int islo(void);
 extern int islo(void);
 extern void spldone(void);
 extern void spldone(void);

+ 245 - 0
sys/src/9/riscv/main.c

@@ -31,6 +31,12 @@ void die(char *s)
 	while (1);
 	while (1);
 }
 }
 
 
+void
+ndnr(void)
+{
+	die("ndnr");
+}
+
 static void puts(char * s, int n)
 static void puts(char * s, int n)
 {
 {
 	while (n--)
 	while (n--)
@@ -44,6 +50,14 @@ static int x = 0x123456;
  * we don't have to do that. */
  * we don't have to do that. */
 static uint64_t m0stack[4096];
 static uint64_t m0stack[4096];
 static Mach m0;
 static Mach m0;
+Sys asys, *sys=&asys;
+Conf conf;
+uintptr_t kseg0 = KZERO;
+char *cputype = "riscv";
+
+/* I forget where this comes from and I don't care just now. */
+uint32_t kerndate;
+
 
 
 /* general purpose hart startup. We call this via startmach.
 /* general purpose hart startup. We call this via startmach.
  * When we enter here, the machp() function is usable.
  * When we enter here, the machp() function is usable.
@@ -103,3 +117,234 @@ main(uint32_t mbmagic, uint32_t mbaddress)
 	msg("got somewhere");
 	msg("got somewhere");
 	startmach(bsp, &m0);
 	startmach(bsp, &m0);
 }
 }
+
+/* stubs until we implement in assembly */
+int corecolor(int _)
+{
+	return -1;
+}
+
+Proc *externup(void)
+{
+	return machp()->externup;
+}
+
+void errstr(char *s, int i) {
+	panic("errstr");
+}
+
+void
+oprof_alarm_handler(Ureg *u)
+{
+	panic((char *)__func__);
+}
+
+void
+hardhalt(void)
+{
+	panic((char *)__func__);
+}
+
+uintmem
+physalloc(uint64_t _, int*__, void*___)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+void
+ureg2gdb(Ureg *u, uintptr_t *g)
+{
+	panic((char *)__func__);
+}
+
+int
+userureg(Ureg*u)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+uintptr_t
+userpc(Ureg*u)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+
+int tas32(void *_)
+{
+	panic("tas32");
+	return -1;
+}
+int      cas32(void*_, uint32_t __, uint32_t ___)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+void    exit(int _)
+{
+	panic((char *)__func__);
+}
+
+void fpunoted(void)
+{
+	panic((char *)__func__);
+}
+
+void fpunotify(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+void fpusysrfork(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+void kexit(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+char*
+seprintphysstats(char*_, char*__)
+{
+	return "NOT YET";
+}
+
+void
+reboot(void*_, void*__, int32_t ___)
+{
+	panic("reboot");
+}
+
+void fpusysprocsetup(Proc *_)
+{
+	panic((char *)__func__);
+}
+
+void sysrforkret(void)
+{
+	panic((char *)__func__);
+}
+
+void     fpusysrforkchild(Proc*_, Proc*__)
+{
+	panic((char *)__func__);
+}
+
+int
+fpudevprocio(Proc*p, void*v, int32_t _, uintptr_t __, int ___)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+void
+setregisters(Ureg*u, char*f, char*t, int amt)
+{
+	panic((char *)__func__);
+}
+
+uint64_t rdtsc(void)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+int islo(void)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+void mfence(void)
+{
+	panic((char *)__func__);
+}
+
+uintptr_t
+dbgpc(Proc*p)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+
+void dumpstack(void)
+{
+	panic((char *)__func__);
+}
+
+void
+dumpgpr(Ureg* ureg)
+{
+	panic((char *)__func__);
+}
+
+void
+setkernur(Ureg*u, Proc*p)
+{
+	panic((char *)__func__);
+}
+
+
+void
+physfree(uintmem data, uint64_t size)
+{
+	panic("physfree %p 0x%lx", data, size);
+}
+
+void
+stacksnippet(void)
+{
+	//Stackframe *stkfr;
+	kmprint(" stack:");
+//	for(stkfr = stackframe(); stkfr != nil; stkfr = stkfr->next)
+//		kmprint(" %c:%p", ktextaddr(stkfr->pc) ? 'k' : '?', ktextaddr(stkfr->pc) ? (stkfr->pc & 0xfffffff) : stkfr->pc);
+	kmprint("\n");
+}
+
+
+/* crap. */
+/* this should come from build but it's intimately tied in to VGA. Crap. */
+Physseg physseg[8];
+int nphysseg = 8;
+
+/* bringup -- remove asap. */
+void
+DONE(void)
+{
+	print("DONE\n");
+	//prflush();
+	delay(10000);
+	ndnr();
+}
+
+void
+HERE(void)
+{
+	print("here\n");
+	//prflush();
+	delay(5000);
+}
+
+/* The old plan 9 standby ... wave ... */
+
+/* Keep to debug trap.c */
+void wave(int c)
+{
+	testPrint(c);
+}
+
+void hi(char *s)
+{
+	if (! s)
+		s = "<NULL>";
+	while (*s)
+		wave(*s++);
+}
+

+ 61 - 0
sys/src/9/riscv/map.c

@@ -0,0 +1,61 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#define _KADDR(pa)	UINT2PTR(kseg0+((uintptr)(pa)))
+#define _PADDR(va)	PTR2UINT(((uintptr)(va)) - kseg0)
+
+#define TMFM		(64*MiB)
+
+int km, ku, k2;
+void*
+KADDR(uintptr_t pa)
+{
+	uint8_t* va;
+
+	va = UINT2PTR(pa);
+	if(pa < TMFM) {
+		km++;
+		return KSEG0+va;
+	}
+
+	assert(pa < KSEG2);
+	k2++;
+	return KSEG2+va;
+}
+
+uintmem
+PADDR(void* va)
+{
+	uintmem pa;
+
+	pa = PTR2UINT(va);
+	if(pa >= KSEG0 && pa < KSEG0+TMFM)
+		return pa-KSEG0;
+	if(pa > KSEG2)
+		return pa-KSEG2;
+
+	panic("PADDR: va %#p pa #%p @ %#p\n", va, _PADDR(va), getcallerpc());
+	return 0;
+}
+
+KMap*
+kmap(Page* page)
+{
+	DBG("kmap(%#llx) @ %#p: %#p %#p\n",
+		page->pa, getcallerpc(),
+		page->pa, KADDR(page->pa));
+
+	return KADDR(page->pa);
+}

+ 922 - 0
sys/src/9/riscv/mmu.c

@@ -0,0 +1,922 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "encoding.h"
+#include "mmu.h"
+
+/*
+ * To do:
+ *	PteNX;
+ *	mmukmapsync grot for >1 processor;
+ *	replace vmap with newer version (no PDMAP);
+ *	mmuptcopy (PteSHARED trick?);
+ *	calculate and map up to TMFM (conf crap);
+ */
+
+#define TMFM		(64*MiB)		/* kernel memory */
+
+#define PPN(x)		((x)&~(PGSZ-1))
+
+#if 0
+/* Print the page table structures to the console */
+void print_page_table(void) {
+	print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
+}
+#endif
+
+void flush_tlb(void)
+{
+	asm volatile("sfence.vm");
+}
+
+size_t pte_ppn(uint64_t pte)
+{
+	return pte >> PTE_PPN_SHIFT;
+}
+
+uint64_t ptd_create(uintptr_t ppn)
+{
+	return (ppn << PTE_PPN_SHIFT) | PTE_V;
+}
+
+uint64_t pte_create(uintptr_t ppn, int prot, int user)
+{
+	uint64_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
+	if (prot & PTE_W)
+		pte |= PTE_W;
+	if (prot & PTE_X)
+		pte |= PTE_X;
+	if (user)
+		pte |= PTE_U;
+	return pte;
+}
+
+void
+rootput(uintptr_t root)
+{
+	uintptr_t ptbr = root >> RISCV_PGSHIFT;
+	write_csr(sptbr, ptbr);
+
+}
+void
+mmuflushtlb(uint64_t u)
+{
+
+	machp()->tlbpurge++;
+	if(machp()->MMU.pml4->daddr){
+		memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
+		machp()->MMU.pml4->daddr = 0;
+	}
+	rootput((uintptr_t) machp()->MMU.pml4->pa);
+}
+
+void
+mmuflush(void)
+{
+	Proc *up = externup();
+	Mpl pl;
+
+	pl = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(pl);
+}
+
+static void
+mmuptpfree(Proc* proc, int clear)
+{
+	int l;
+	PTE *pte;
+	Page **last, *page;
+
+	for(l = 1; l < 4; l++){
+		last = &proc->MMU.mmuptp[l];
+		if(*last == nil)
+			continue;
+		for(page = *last; page != nil; page = page->next){
+//what is right here? 2 or 1?
+			if(l <= 2 && clear)
+				memset(UINT2PTR(page->va), 0, PTSZ);
+			pte = UINT2PTR(page->prev->va);
+			pte[page->daddr] = 0;
+			last = &page->next;
+		}
+		*last = proc->MMU.mmuptp[0];
+		proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
+		proc->MMU.mmuptp[l] = nil;
+	}
+
+	machp()->MMU.pml4->daddr = 0;
+}
+
+static void
+tabs(int n)
+{
+	int i;
+
+	for(i = 0; i < n; i++)
+		print("  ");
+}
+
+void
+dumpptepg(int lvl, uintptr_t pa)
+{
+	PTE *pte;
+	int tab, i;
+
+	tab = 4 - lvl;
+	pte = UINT2PTR(KADDR(pa));
+	for(i = 0; i < PTSZ/sizeof(PTE); i++)
+		if(pte[i] & PteP){
+			tabs(tab);
+			print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
+
+			/* skip kernel mappings */
+			if((pte[i]&PteU) == 0){
+				tabs(tab+1);
+				print("...kern...\n");
+				continue;
+			}
+			if(lvl > 2)
+				dumpptepg(lvl-1, PPN(pte[i]));
+		}
+}
+
+void
+dumpmmu(Proc *p)
+{
+	int i;
+	Page *pg;
+
+	print("proc %#p\n", p);
+	for(i = 3; i > 0; i--){
+		print("mmuptp[%d]:\n", i);
+		for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
+			print("\tpg %#p = va %#llx pa %#llx"
+				" daddr %#lx next %#p prev %#p\n",
+				pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
+	}
+	print("pml4 %#llx\n", machp()->MMU.pml4->pa);
+	if(0)dumpptepg(4, machp()->MMU.pml4->pa);
+}
+
+void
+dumpmmuwalk(uint64_t addr)
+{
+	int l;
+	PTE *pte, *pml4;
+
+	pml4 = UINT2PTR(machp()->MMU.pml4->va);
+	if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+}
+
+static Page mmuptpfreelist;
+
+static Page*
+mmuptpalloc(void)
+{
+	void* va;
+	Page *page;
+
+	/*
+	 * Do not really need a whole Page structure,
+	 * but it makes testing this out a lot easier.
+	 * Could keep a cache and free excess.
+	 * Have to maintain any fiction for pexit?
+	 */
+	lock(&mmuptpfreelist.l);
+	if((page = mmuptpfreelist.next) != nil){
+		mmuptpfreelist.next = page->next;
+		mmuptpfreelist.ref--;
+		unlock(&mmuptpfreelist.l);
+
+		if(page->ref++ != 0)
+			panic("mmuptpalloc ref\n");
+		page->prev = page->next = nil;
+		memset(UINT2PTR(page->va), 0, PTSZ);
+
+		if(page->pa == 0)
+			panic("mmuptpalloc: free page with pa == 0");
+		return page;
+	}
+	unlock(&mmuptpfreelist.l);
+
+	if((page = malloc(sizeof(Page))) == nil){
+		print("mmuptpalloc Page\n");
+
+		return nil;
+	}
+	if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
+		print("mmuptpalloc va\n");
+		free(page);
+
+		return nil;
+	}
+
+	page->va = PTR2UINT(va);
+	page->pa = PADDR(va);
+	page->ref = 1;
+
+	if(page->pa == 0)
+		panic("mmuptpalloc: no pa");
+	return page;
+}
+
+void
+mmuswitch(Proc* proc)
+{
+	PTE *pte;
+	Page *page;
+	Mpl pl;
+
+	pl = splhi();
+	if(proc->newtlb){
+		/*
+ 		 * NIX: We cannot clear our page tables if they are going to
+		 * be used in the AC
+		 */
+		if(proc->ac == nil)
+			mmuptpfree(proc, 1);
+		proc->newtlb = 0;
+	}
+
+	if(machp()->MMU.pml4->daddr){
+		memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
+		machp()->MMU.pml4->daddr = 0;
+	}
+
+	pte = UINT2PTR(machp()->MMU.pml4->va);
+	for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
+		pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP;
+		if(page->daddr >= machp()->MMU.pml4->daddr)
+			machp()->MMU.pml4->daddr = page->daddr+1;
+		page->prev = machp()->MMU.pml4;
+	}
+
+	//tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack+KSTACK)));
+	rootput((uintptr_t) machp()->MMU.pml4->pa);
+	splx(pl);
+}
+
+void
+mmurelease(Proc* proc)
+{
+	Page *page, *next;
+
+	mmuptpfree(proc, 0);
+
+	for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
+		next = page->next;
+		if(--page->ref)
+			panic("mmurelease: page->ref %d\n", page->ref);
+		lock(&mmuptpfreelist.l);
+		page->next = mmuptpfreelist.next;
+		mmuptpfreelist.next = page;
+		mmuptpfreelist.ref++;
+		page->prev = nil;
+		unlock(&mmuptpfreelist.l);
+	}
+	if(proc->MMU.mmuptp[0] && pga.rend.l.p)
+		wakeup(&pga.rend);
+	proc->MMU.mmuptp[0] = nil;
+
+	panic("tssrsp0");
+	//tssrsp0(machp(), STACKALIGN(machp()->stack+MACHSTKSZ));
+	rootput(machp()->MMU.pml4->pa);
+}
+
+static void
+checkpte(uintmem ppn, void *a)
+{
+	int l;
+	PTE *pte, *pml4;
+	uint64_t addr;
+	char buf[240], *s;
+
+	addr = PTR2UINT(a);
+	pml4 = UINT2PTR(machp()->MMU.pml4->va);
+	pte = 0;
+	s = buf;
+	*s = 0;
+	if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	s = seprint(buf, buf+sizeof buf,
+		"check3: l%d pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	s = seprint(s, buf+sizeof buf,
+		"check2: l%d  pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	if(*pte&PtePS)
+		return;
+	if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	seprint(s, buf+sizeof buf,
+		"check1: l%d  pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	return;
+Panic:
+
+	seprint(s, buf+sizeof buf,
+		"checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
+		l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
+	print("%s\n", buf);
+	seprint(buf, buf+sizeof buf, "start %#llx unused %#llx"
+		" unmap %#llx end %#llx\n",
+		sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
+	panic("%s", buf);
+}
+
+
+static void
+mmuptpcheck(Proc *proc)
+{
+	int lvl, npgs, i;
+	Page *lp, *p, *pgs[16], *fp;
+	uint idx[16];
+
+	if(proc == nil)
+		return;
+	lp = machp()->MMU.pml4;
+	for(lvl = 3; lvl >= 2; lvl--){
+		npgs = 0;
+		for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
+			for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
+				if(fp == p){
+					dumpmmu(proc);
+					panic("ptpcheck: using free page");
+				}
+			for(i = 0; i < npgs; i++){
+				if(pgs[i] == p){
+					dumpmmu(proc);
+					panic("ptpcheck: dup page");
+				}
+				if(idx[i] == p->daddr){
+					dumpmmu(proc);
+					panic("ptcheck: dup daddr");
+				}
+			}
+			if(npgs >= nelem(pgs))
+				panic("ptpcheck: pgs is too small");
+			idx[npgs] = p->daddr;
+			pgs[npgs++] = p;
+			if(lvl == 3 && p->prev != lp){
+				dumpmmu(proc);
+				panic("ptpcheck: wrong prev");
+			}
+		}
+
+	}
+	npgs = 0;
+	for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
+		for(i = 0; i < npgs; i++)
+			if(pgs[i] == fp)
+				panic("ptpcheck: dup free page");
+		pgs[npgs++] = fp;
+	}
+}
+
+static uintmem
+pteflags(uint attr)
+{
+	uintmem flags;
+
+	flags = 0;
+	if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
+		panic("mmuput: wrong attr bits: %#x\n", attr);
+	if(attr&PTEVALID)
+		flags |= PteP;
+	if(attr&PTEWRITE)
+		flags |= PteRW;
+	if(attr&PTEUSER)
+		flags |= PteU;
+	if(attr&PTEUNCACHED)
+		flags |= PtePCD;
+	if(attr&PTENOEXEC)
+		flags |= PteNX;
+	return flags;
+}
+
+void
+invlpg(uintptr_t _)
+{
+	panic("invlpage");
+}
+
+/*
+ * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
+ * For the user, it can be either 2*MiB or 1*GiB pages.
+ * For 2*MiB pages, we use three levels, not four.
+ * For 1*GiB pages, we use two levels.
+ */
+void
+mmuput(uintptr_t va, Page *pg, uint attr)
+{
+	Proc *up = externup();
+	int lvl, user, x, pgsz;
+	PTE *pte;
+	Page *page, *prev;
+	Mpl pl;
+	uintmem pa, ppn;
+	char buf[80];
+
+	ppn = 0;
+	pa = pg->pa;
+	if(pa == 0)
+		panic("mmuput: zero pa");
+
+	if(DBGFLG){
+		snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
+			machp()->machno, up, va, pa, attr);
+		print("%s", buf);
+	}
+	assert(pg->pgszi >= 0);
+	pgsz = sys->pgsz[pg->pgszi];
+	if(pa & (pgsz-1))
+		panic("mmuput: pa offset non zero: %#llx\n", pa);
+	pa |= pteflags(attr);
+
+	pl = splhi();
+	if(DBGFLG)
+		mmuptpcheck(up);
+	user = (va < KZERO);
+	x = PTLX(va, 3);
+
+	pte = UINT2PTR(machp()->MMU.pml4->va);
+	pte += x;
+	prev = machp()->MMU.pml4;
+
+	for(lvl = 3; lvl >= 0; lvl--){
+		if(user){
+			if(pgsz == 2*MiB && lvl == 1)	 /* use 2M */
+				break;
+			if(pgsz == 1ull*GiB && lvl == 2)	/* use 1G */
+				break;
+		}
+		for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
+			if(page->prev == prev && page->daddr == x){
+				if(*pte == 0){
+					print("mmu: jmk and nemo had fun\n");
+					*pte = PPN(page->pa)|PteU|PteRW|PteP;
+				}
+				break;
+			}
+
+		if(page == nil){
+			if(up->MMU.mmuptp[0] == nil)
+				page = mmuptpalloc();
+			else {
+				page = up->MMU.mmuptp[0];
+				up->MMU.mmuptp[0] = page->next;
+			}
+			page->daddr = x;
+			page->next = up->MMU.mmuptp[lvl];
+			up->MMU.mmuptp[lvl] = page;
+			page->prev = prev;
+			*pte = PPN(page->pa)|PteU|PteRW|PteP;
+			if(lvl == 3 && x >= machp()->MMU.pml4->daddr)
+				machp()->MMU.pml4->daddr = x+1;
+		}
+		x = PTLX(va, lvl-1);
+
+		ppn = PPN(*pte);
+		if(ppn == 0)
+			panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
+
+		pte = UINT2PTR(KADDR(ppn));
+		pte += x;
+		prev = page;
+	}
+
+	if(DBGFLG)
+		checkpte(ppn, pte);
+	*pte = pa|PteU;
+
+	if(user)
+		switch(pgsz){
+		case 2*MiB:
+		case 1*GiB:
+			*pte |= PtePS;
+			break;
+		default:
+			panic("mmuput: user pages must be 2M or 1G");
+		}
+	splx(pl);
+
+	if(DBGFLG){
+		snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
+			machp()->machno, up, pte, pte?*pte:~0);
+		print("%s", buf);
+	}
+
+	invlpg(va);			/* only if old entry valid? */
+}
+
+#if 0
+static Lock mmukmaplock;
+#endif
+static Lock vmaplock;
+
+#define PML4X(v)	PTLX((v), 3)
+#define PDPX(v)		PTLX((v), 2)
+#define PDX(v)		PTLX((v), 1)
+#define PTX(v)		PTLX((v), 0)
+
+int
+mmukmapsync(uint64_t va)
+{
+	USED(va);
+
+	return 0;
+}
+
+#if 0
+static PTE
+pdeget(uintptr_t va)
+{
+	PTE *pdp;
+
+	if(va < 0xffffffffc0000000ull)
+		panic("pdeget(%#p)", va);
+
+	pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+
+	return pdp[PDX(va)];
+}
+
+#endif
+/*
+ * Add kernel mappings for pa -> va for a section of size bytes.
+ * Called only after the va range is known to be unoccupied.
+ */
+static int
+pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
+{
+	uintptr_t pae;
+	PTE *pd, *pde, *pt, *pte;
+	int pdx, pgsz;
+	Page *pg;
+
+	pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+
+	for(pae = pa + size; pa < pae; pa += pgsz){
+		pdx = PDX(va);
+		pde = &pd[pdx];
+
+		/*
+		 * Check if it can be mapped using a big page,
+		 * i.e. is big enough and starts on a suitable boundary.
+		 * Assume processor can do it.
+		 */
+		if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
+			assert(*pde == 0);
+			*pde = pa|attr|PtePS|PteP;
+			pgsz = PGLSZ(1);
+		}
+		else{
+			if(*pde == 0){
+				pg = mmuptpalloc();
+				assert(pg != nil && pg->pa != 0);
+				*pde = pg->pa|PteRW|PteP;
+				memset((PTE*)(PDMAP+pdx*4096), 0, 4096);
+			}
+			assert(*pde != 0);
+
+			pt = (PTE*)(PDMAP+pdx*4096);
+			pte = &pt[PTX(va)];
+			assert(!(*pte & PteP));
+			*pte = pa|attr|PteP;
+			pgsz = PGLSZ(0);
+		}
+		va += pgsz;
+	}
+
+	return 0;
+}
+
+static int
+findhole(PTE* a, int n, int count)
+{
+	int have, i;
+
+	have = 0;
+	for(i = 0; i < n; i++){
+		if(a[i] == 0)
+			have++;
+		else
+			have = 0;
+		if(have >= count)
+			return i+1 - have;
+	}
+
+	return -1;
+}
+
+/*
+ * Look for free space in the vmap.
+ */
+static uintptr_t
+vmapalloc(usize size)
+{
+	int i, n, o;
+	PTE *pd, *pt;
+	int pdsz, ptsz;
+
+	pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+	pd += PDX(VMAP);
+	pdsz = VMAPSZ/PGLSZ(1);
+
+	/*
+	 * Look directly in the PD entries if the size is
+	 * larger than the range mapped by a single entry.
+	 */
+	if(size >= PGLSZ(1)){
+		n = HOWMANY(size, PGLSZ(1));
+		if((o = findhole(pd, pdsz, n)) != -1)
+			return VMAP + o*PGLSZ(1);
+		return 0;
+	}
+
+	/*
+	 * Size is smaller than that mapped by a single PD entry.
+	 * Look for an already mapped PT page that has room.
+	 */
+	n = HOWMANY(size, PGLSZ(0));
+	ptsz = PGLSZ(0)/sizeof(PTE);
+	for(i = 0; i < pdsz; i++){
+		if(!(pd[i] & PteP) || (pd[i] & PtePS))
+			continue;
+
+		pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
+		if((o = findhole(pt, ptsz, n)) != -1)
+			return VMAP + i*PGLSZ(1) + o*PGLSZ(0);
+	}
+
+	/*
+	 * Nothing suitable, start using a new PD entry.
+	 */
+	if((o = findhole(pd, pdsz, 1)) != -1)
+		return VMAP + o*PGLSZ(1);
+
+	return 0;
+}
+
+/*
+ * KSEG0 maps low memory.
+ * KSEG2 maps almost all memory, but starting at an address determined
+ * by the address space map (see asm.c).
+ * Thus, almost everything in physical memory is already mapped, but
+ * there are things that fall in the gap
+ * (acpi tables, device memory-mapped registers, etc.)
+ * for those things, we also want to disable caching.
+ * vmap() is required to access them.
+ */
+void*
+vmap(uintptr_t pa, usize size)
+{
+	uintptr_t va;
+	usize o, sz;
+
+	DBG("vmap(%#p, %lu) pc=%#p\n", pa, size, getcallerpc());
+
+	if(machp()->machno != 0)
+		print("vmap: machp()->machno != 0");
+
+	/*
+	 * This is incomplete; the checks are not comprehensive
+	 * enough.
+	 * Sometimes the request is for an already-mapped piece
+	 * of low memory, in which case just return a good value
+	 * and hope that a corresponding vunmap of the address
+	 * will have the same address.
+	 * To do this properly will require keeping track of the
+	 * mappings; perhaps something like kmap, but kmap probably
+	 * can't be used early enough for some of the uses.
+	 */
+	if(pa+size < 1ull*MiB)
+		return KADDR(pa);
+	if(pa < 1ull*MiB)
+		return nil;
+
+	/*
+	 * Might be asking for less than a page.
+	 * This should have a smaller granularity if
+	 * the page size is large.
+	 */
+	o = pa & ((1<<PGSHFT)-1);
+	pa -= o;
+	sz = ROUNDUP(size+o, PGSZ);
+
+	if(pa == 0){
+		print("vmap(0, %lu) pc=%#p\n", size, getcallerpc());
+		return nil;
+	}
+	ilock(&vmaplock);
+	if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
+		iunlock(&vmaplock);
+		return nil;
+	}
+	iunlock(&vmaplock);
+
+	DBG("vmap(%#p, %lu) => %#p\n", pa+o, size, va+o);
+
+	return UINT2PTR(va + o);
+}
+
+void
+vunmap(void* v, usize size)
+{
+	uintptr_t va;
+
+	DBG("vunmap(%#p, %lu)\n", v, size);
+
+	if(machp()->machno != 0)
+		print("vmap: machp()->machno != 0");
+
+	/*
+	 * See the comments above in vmap.
+	 */
+	va = PTR2UINT(v);
+	if(va >= KZERO && va+size < KZERO+1ull*MiB)
+		return;
+
+	/*
+	 * Here will have to deal with releasing any
+	 * resources used for the allocation (e.g. page table
+	 * pages).
+	 */
+	DBG("vunmap(%#p, %lu)\n", v, size);
+}
+
+int
+mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
+	uint64_t (*alloc)(usize))
+{
+	int l;
+	uintmem pa;
+	PTE *pte;
+
+	Mpl pl;
+
+	pl = splhi();
+	if(DBGFLG > 1)
+		DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
+	pte = &pml4[PTLX(va, 3)];
+	for(l = 3; l >= 0; l--){
+		if(l == level)
+			break;
+		if(!(*pte & PteP)){
+			if(alloc == nil)
+				break;
+			pa = alloc(PTSZ);
+			if(pa == ~0)
+				return -1;
+			memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
+			*pte = pa|PteRW|PteP;
+		}
+		else if(*pte & PtePS)
+			break;
+		pte = UINT2PTR(KADDR(PPN(*pte)));
+		pte += PTLX(va, l-1);
+	}
+	*ret = pte;
+	splx(pl);
+	return l;
+}
+
+uintmem
+mmuphysaddr(uintptr_t va)
+{
+	int l;
+	PTE *pte;
+	uintmem mask, pa;
+
+	/*
+	 * Given a VA, find the PA.
+	 * This is probably not the right interface,
+	 * but will do as an experiment. Usual
+	 * question, should va be void* or uintptr?
+	 */
+	l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil);
+	DBG("physaddr: va %#p l %d\n", va, l);
+	if(l < 0)
+		return ~0;
+
+	mask = PGLSZ(l)-1;
+	pa = (*pte & ~mask) + (va & mask);
+
+	DBG("physaddr: l %d va %#p pa %#llx\n", l, va, pa);
+
+	return pa;
+}
+
+Page mach0pml4;
+
+void
+mmuinit(void)
+{
+	panic("mmuinit");
+#if 0
+	uint8_t *p;
+	Page *page;
+	uint64_t o, pa, r, sz;
+
+	archmmu();
+	DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.pml4, sys->npgsz);
+
+	if(machp()->machno != 0){
+		/* NIX: KLUDGE: Has to go when each mach is using
+		 * its own page table
+		 */
+		p = UINT2PTR(machp()->stack);
+		p += MACHSTKSZ;
+
+		memmove(p, UINT2PTR(mach0pml4.va), PTSZ);
+		machp()->MMU.pml4 = &machp()->MMU.pml4kludge;
+		machp()->MMU.pml4->va = PTR2UINT(p);
+		machp()->MMU.pml4->pa = PADDR(p);
+		machp()->MMU.pml4->daddr = mach0pml4.daddr;	/* # of user mappings in pml4 */
+
+		r = rdmsr(Efer);
+		r |= Nxe;
+		wrmsr(Efer, r);
+		rootput(machp()->MMU.pml4->pa);
+		DBG("m %#p pml4 %#p\n", machp(), machp()->MMU.pml4);
+		return;
+	}
+
+	page = &mach0pml4;
+	page->pa = read_csr(sptbr);
+	page->va = PTR2UINT(KADDR(page->pa));
+
+	machp()->MMU.pml4 = page;
+
+	r = rdmsr(Efer);
+	r |= Nxe;
+	wrmsr(Efer, r);
+
+	/*
+	 * Set up the various kernel memory allocator limits:
+	 * pmstart/pmend bound the unused physical memory;
+	 * vmstart/vmend bound the total possible virtual memory
+	 * used by the kernel;
+	 * vmunused is the highest virtual address currently mapped
+	 * and used by the kernel;
+	 * vmunmapped is the highest virtual address currently
+	 * mapped by the kernel.
+	 * Vmunused can be bumped up to vmunmapped before more
+	 * physical memory needs to be allocated and mapped.
+	 *
+	 * This is set up here so meminit can map appropriately.
+	 */
+	o = sys->pmstart;
+	sz = ROUNDUP(o, 4*MiB) - o;
+	pa = asmalloc(0, sz, 1, 0);
+	if(pa != o)
+		panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
+	sys->pmstart += sz;
+
+	sys->vmstart = KSEG0;
+	sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
+	sys->vmunmapped = sys->vmstart + o + sz;
+	sys->vmend = sys->vmstart + TMFM;
+
+	print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
+		sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
+
+	/*
+	 * Set up the map for PD entry access by inserting
+	 * the relevant PDP entry into the PD. It's equivalent
+	 * to PADDR(sys->pd)|PteRW|PteP.
+	 *
+	 */
+	sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA);
+	print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]);
+	assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP));
+
+
+	dumpmmuwalk(KZERO);
+
+	mmuphysaddr(PTR2UINT(end));
+#endif
+}

+ 179 - 0
sys/src/9/riscv/mmu.h

@@ -0,0 +1,179 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/* Cr0 */
+#define Pe		0x00000001		/* Protected Mode Enable */
+#define Mp		0x00000002		/* Monitor Coprocessor */
+#define Em		0x00000004		/* Emulate Coprocessor */
+#define Ts		0x00000008		/* Task Switched */
+#define Et		0x00000010		/* Extension Type */
+#define Ne		0x00000020		/* Numeric Error  */
+#define Wp		0x00010000		/* Write Protect */
+#define Am		0x00040000		/* Alignment Mask */
+#define Nw		0x20000000		/* Not Writethrough */
+#define Cd		0x40000000		/* Cache Disable */
+#define Pg		0x80000000		/* Paging Enable */
+
+/* Cr3 */
+#define Pwt		0x00000008		/* Page-Level Writethrough */
+#define Pcd		0x00000010		/* Page-Level Cache Disable */
+
+/* Cr4 */
+#define Vme		0x00000001		/* Virtual-8086 Mode Extensions */
+#define Pvi		0x00000002		/* Protected Mode Virtual Interrupts */
+#define Tsd		0x00000004		/* Time-Stamp Disable */
+#define De		0x00000008		/* Debugging Extensions */
+#define Pse		0x00000010		/* Page-Size Extensions */
+#define Pae		0x00000020		/* Physical Address Extension */
+#define Mce		0x00000040		/* Machine Check Enable */
+#define Pge		0x00000080		/* Page-Global Enable */
+#define Pce		0x00000100		/* Performance Monitoring Counter Enable */
+#define Osfxsr		0x00000200		/* FXSAVE/FXRSTOR Support */
+#define Osxmmexcpt	0x00000400		/* Unmasked Exception Support */
+
+/* Rflags */
+#define Cf		0x00000001		/* Carry Flag */
+#define Pf		0x00000004		/* Parity Flag */
+#define Af		0x00000010		/* Auxiliary Flag */
+#define Zf		0x00000040		/* Zero Flag */
+#define Sf		0x00000080		/* Sign Flag */
+#define Tf		0x00000100		/* Trap Flag */
+#define If		0x00000200		/* Interrupt Flag */
+#define Df		0x00000400		/* Direction Flag */
+#define Of		0x00000800		/* Overflow Flag */
+#define Iopl0		0x00000000		/* I/O Privilege Level */
+#define Iopl1		0x00001000
+#define Iopl2		0x00002000
+#define Iopl3		0x00003000
+#define Nt		0x00004000		/* Nested Task */
+#define Rf		0x00010000		/* Resume Flag */
+#define Vm		0x00020000		/* Virtual-8086 Mode */
+#define Ac		0x00040000		/* Alignment Check */
+#define Vif		0x00080000		/* Virtual Interrupt Flag */
+#define Vip		0x00100000		/* Virtual Interrupt Pending */
+#define Id		0x00200000		/* ID Flag */
+
+/* MSRs */
+#define PerfEvtbase	0xc0010000		/* Performance Event Select */
+#define PerfCtrbase	0xc0010004		/* Performance Counters */
+
+#define Efer		0xc0000080		/* Extended Feature Enable */
+#define Star		0xc0000081		/* Legacy Target IP and [CS]S */
+#define Lstar		0xc0000082		/* Long Mode Target IP */
+#define Cstar		0xc0000083		/* Compatibility Target IP */
+#define Sfmask		0xc0000084		/* SYSCALL Flags Mask */
+#define FSbase		0xc0000100		/* 64-bit FS Base Address */
+#define GSbase		0xc0000101		/* 64-bit GS Base Address */
+#define KernelGSbase	0xc0000102		/* SWAPGS instruction */
+
+/* Efer */
+#define Sce		0x00000001		/* System Call Extension */
+#define Lme		0x00000100		/* Long Mode Enable */
+#define Lma		0x00000400		/* Long Mode Active */
+#define Nxe		0x00000800		/* No-Execute Enable */
+#define Svme		0x00001000		/* SVM Extension Enable */
+#define Ffxsr		0x00004000		/* Fast FXSAVE/FXRSTOR */
+
+/* PML4E/PDPE/PDE/PTE */
+#define PteP		0x0000000000000001	/* Present */
+#define PteRW		0x0000000000000002	/* Read/Write */
+#define PteU		0x0000000000000004	/* User/Supervisor */
+#define PtePWT		0x0000000000000008	/* Page-Level Write Through */
+#define PtePCD		0x0000000000000010	/* Page Level Cache Disable */
+#define PteA		0x0000000000000020	/* Accessed */
+#define PteD		0x0000000000000040	/* Dirty */
+#define PtePS		0x0000000000000080	/* Page Size */
+#define Pte4KPAT	PtePS			/* PTE PAT */
+#define PteG		0x0000000000000100	/* Global */
+#define Pte2MPAT	0x0000000000001000	/* PDE PAT */
+#define Pte1GPAT	Pte2MPAT		/* PDPE PAT */
+#define PteNX		0x8000000000000000	/* No Execute */
+
+/* Exceptions */
+#define IdtDE		0			/* Divide-by-Zero Error */
+#define IdtDB		1			/* Debug */
+#define IdtNMI		2			/* Non-Maskable-Interrupt */
+#define IdtBP		3			/* Breakpoint */
+#define IdtOF		4			/* Overflow */
+#define IdtBR		5			/* Bound-Range */
+#define IdtUD		6			/* Invalid-Opcode */
+#define IdtNM		7			/* Device-Not-Available */
+#define IdtDF		8			/* Double-Fault */
+#define Idt09		9			/* unsupported */
+#define IdtTS		10			/* Invalid-TSS */
+#define IdtNP		11			/* Segment-Not-Present */
+#define IdtSS		12			/* Stack */
+#define IdtGP		13			/* General-Protection */
+#define IdtPF		14			/* Page-Fault */
+#define Idt0F		15			/* reserved */
+#define IdtMF		16			/* x87 FPE-Pending */
+#define IdtAC		17			/* Alignment-Check */
+#define IdtMC		18			/* Machine-Check */
+#define IdtXF		19			/* SIMD Floating-Point */
+
+/* Vestigial Segmented Virtual Memory */
+#define SdISTM		0x0000000700000000	/* Interrupt Stack Table Mask */
+#define SdA		0x0000010000000000	/* Accessed */
+#define SdR		0x0000020000000000	/* Readable (Code) */
+#define SdW		0x0000020000000000	/* Writeable (Data) */
+#define SdE		0x0000040000000000	/* Expand Down */
+#define SdaTSS		0x0000090000000000	/* Available TSS */
+#define SdbTSS		0x00000b0000000000	/* Busy TSS */
+#define SdCG		0x00000c0000000000	/* Call Gate */
+#define SdIG		0x00000e0000000000	/* Interrupt Gate */
+#define SdTG		0x00000f0000000000	/* Trap Gate */
+#define SdCODE		0x0000080000000000	/* Code/Data */
+#define SdS		0x0000100000000000	/* System/User */
+#define SdDPL0		0x0000000000000000	/* Descriptor Privilege Level */
+#define SdDPL1		0x0000200000000000
+#define SdDPL2		0x0000400000000000
+#define SdDPL3		0x0000600000000000
+#define SdP		0x0000800000000000	/* Present */
+#define Sd4G		0x000f00000000ffff	/* 4G Limit */
+#define SdL		0x0020000000000000	/* Long Attribute */
+#define SdD		0x0040000000000000	/* Default Operand Size */
+#define SdG		0x0080000000000000	/* Granularity */
+
+/* Performance Counter Configuration */
+#define PeHo		0x0000020000000000	/* Host only */
+#define PeGo		0x0000010000000000	/* Guest only */
+#define PeEvMskH	0x0000000f00000000	/* Event mask H */
+#define PeCtMsk		0x00000000ff000000	/* Counter mask */
+#define PeInMsk		0x0000000000800000	/* Invert mask */
+#define PeCtEna		0x0000000000400000	/* Counter enable */
+#define PeInEna		0x0000000000100000	/* Interrupt enable */
+#define PePnCtl		0x0000000000080000	/* Pin control */
+#define PeEdg		0x0000000000040000	/* Edge detect */
+#define PeOS		0x0000000000020000	/* OS mode */
+#define PeUsr		0x0000000000010000	/* User mode */
+#define PeUnMsk		0x000000000000ff00	/* Unit Mask */
+#define PeEvMskL	0x00000000000000ff	/* Event Mask L */
+
+#define PeEvMsksh	32			/* Event mask shift */
+
+/* Segment Selector */
+#define SsRPL0		0x0000			/* Requestor Privilege Level */
+#define SsRPL1		0x0001
+#define SsRPL2		0x0002
+#define SsRPL3		0x0003
+#define SsTIGDT		0x0000			/* GDT Table Indicator  */
+#define SsTILDT		0x0004			/* LDT Table Indicator */
+#define SsSIM		0xfff8			/* Selector Index Mask */
+
+#define SSEL(si, tirpl)	(((si)<<3)|(tirpl))	/* Segment Selector */
+
+#define SiNULL		0			/* NULL selector index */
+#define SiCS		1			/* CS selector index */
+#define SiDS		2			/* DS selector index */
+#define SiU32CS		3			/* User CS selector index */
+#define SiUDS		4			/* User DS selector index */
+#define SiUCS		5			/* User CS selector index */
+#define SiFS		6			/* FS selector index */
+#define SiGS		7			/* GS selector index */
+#define SiTSS		8			/* TSS selector index */

+ 683 - 0
sys/src/9/riscv/qmalloc.c

@@ -0,0 +1,683 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * malloc
+ *
+ *	Uses Quickfit (see SIGPLAN Notices October 1988)
+ *	with allocator from Kernighan & Ritchie
+ *
+ * This is a placeholder.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include	<pool.h>
+
+typedef double Align;
+typedef union Header Header;
+typedef struct Qlist Qlist;
+
+union Header {
+	struct {
+		Header*	next;
+		uint	size;
+	} s;
+	Align	al;
+};
+
+struct Qlist {
+	Lock	lk;
+	Header*	first;
+
+	uint	nalloc;
+};
+
+enum {
+	Unitsz		= sizeof(Header),	/* 16 bytes on amd64 */
+};
+
+#define	NUNITS(n)	(HOWMANY(n, Unitsz) + 1)
+#define	NQUICK		((512/Unitsz)+1)	/* 33 on amd64 */
+
+static	Qlist	quicklist[NQUICK+1];
+static	Header	misclist;
+static	Header	*rover;
+static	unsigned tailsize;
+static	unsigned tailnunits;
+static	Header	*tailbase;
+static	Header	*tailptr;
+static	Header	checkval;
+static	int	morecore(unsigned);
+
+enum
+{
+	QSmalign = 0,
+	QSmalignquick,
+	QSmalignrover,
+	QSmalignfront,
+	QSmalignback,
+	QSmaligntail,
+	QSmalignnottail,
+	QSmalloc,
+	QSmallocrover,
+	QSmalloctail,
+	QSfree,
+	QSfreetail,
+	QSfreequick,
+	QSfreenext,
+	QSfreeprev,
+	QSmax
+};
+
+static	void	qfreeinternal(void*);
+static	int	qstats[QSmax];
+static	char*	qstatstr[QSmax] = {
+[QSmalign] = "malign",
+[QSmalignquick] = "malignquick",
+[QSmalignrover] = "malignrover",
+[QSmalignfront] = "malignfront",
+[QSmalignback] = "malignback",
+[QSmaligntail] = "maligntail",
+[QSmalignnottail] = "malignnottail",
+[QSmalloc] = "malloc",
+[QSmallocrover] = "mallocrover",
+[QSmalloctail] = "malloctail",
+[QSfree] = "free",
+[QSfreetail] = "freetail",
+[QSfreequick] = "freequick",
+[QSfreenext] = "freenext",
+[QSfreeprev] = "freeprev",
+};
+
+static	Lock		mainlock;
+
+#define	MLOCK		ilock(&mainlock)
+#define	MUNLOCK		iunlock(&mainlock)
+#define QLOCK(l)	ilock(l)
+#define QUNLOCK(l)	iunlock(l)
+
+#define	tailalloc(p, n)	((p)=tailptr, tailsize -= (n), tailptr+=(n),\
+			 (p)->s.size=(n), (p)->s.next = &checkval)
+
+#define ISPOWEROF2(x)	(/*((x) != 0) && */!((x) & ((x)-1)))
+#define ALIGNHDR(h, a)	(Header*)((((uintptr)(h))+((a)-1)) & ~((a)-1))
+
+/*
+ * From libc malloc.c to *draw devices
+ */
+
+typedef struct Private	Private;
+struct Private {
+	Lock		lk;
+	char*		end;
+	char		msg[256];	/* a rock for messages to be printed at unlock */
+};
+
+/*
+ * Experiment: per-core quick lists.
+ * change quicklist to be
+ * static	Qlist	quicklist[MACHMAX][NQUICK+1];
+ * and define QLIST to be quicklist[machp()->machno]
+ *
+ * using quicklist[machp()->machno] runs out of memory soon.
+ * using quicklist[machp()->machno%4] yields times worse than using quicklist!
+ */
+#define QLIST	quicklist
+
+static void*
+qmallocalign(usize nbytes, uintptr_t align, int32_t offset, usize span)
+{
+	Qlist *qlist;
+	uintptr_t aligned;
+	Header **pp, *p, *q, *r;
+	uint naligned, nunits, n;
+
+	if(nbytes == 0 || offset != 0 || span != 0)
+		return nil;
+
+	if(!ISPOWEROF2(align) || align < sizeof(Header))
+		return nil;
+
+	qstats[QSmalign]++;
+	nunits = NUNITS(nbytes);
+	if(nunits <= NQUICK){
+		/*
+		 * Look for a conveniently aligned block
+		 * on one of the quicklists.
+		 */
+		qlist = &QLIST[nunits];
+		QLOCK(&qlist->lk);
+		pp = &qlist->first;
+		for(p = *pp; p != nil; p = p->s.next){
+			if(ALIGNED(p+1, align)){
+				*pp = p->s.next;
+				p->s.next = &checkval;
+				QUNLOCK(&qlist->lk);
+				qstats[QSmalignquick]++;
+				return p+1;
+			}
+			pp = &p->s.next;
+		}
+		QUNLOCK(&qlist->lk);
+	}
+
+	MLOCK;
+	if(nunits > tailsize) {
+		/* hard way */
+		if((q = rover) != nil){
+			do {
+				p = q->s.next;
+				if(p->s.size < nunits)
+					continue;
+				aligned = ALIGNED(p+1, align);
+				naligned = NUNITS(align)-1;
+				if(!aligned && p->s.size < nunits+naligned)
+					continue;
+
+				/*
+				 * This block is big enough, remove it
+				 * from the list.
+				 */
+				q->s.next = p->s.next;
+				rover = q;
+				qstats[QSmalignrover]++;
+
+				/*
+				 * Free any runt in front of the alignment.
+				 */
+				if(!aligned){
+					r = p;
+					p = ALIGNHDR(p+1, align) - 1;
+					n = p - r;
+					p->s.size = r->s.size - n;
+
+					r->s.size = n;
+					r->s.next = &checkval;
+					qfreeinternal(r+1);
+					qstats[QSmalignfront]++;
+				}
+
+				/*
+				 * Free any residue after the aligned block.
+				 */
+				if(p->s.size > nunits){
+					r = p+nunits;
+					r->s.size = p->s.size - nunits;
+					r->s.next = &checkval;
+					qstats[QSmalignback]++;
+					qfreeinternal(r+1);
+
+					p->s.size = nunits;
+				}
+
+				p->s.next = &checkval;
+				MUNLOCK;
+				return p+1;
+			} while((q = p) != rover);
+		}
+		if((n = morecore(nunits)) == 0){
+			MUNLOCK;
+			return nil;
+		}
+		tailsize += n;
+	}
+
+	q = ALIGNHDR(tailptr+1, align);
+	if(q == tailptr+1){
+		tailalloc(p, nunits);
+		qstats[QSmaligntail]++;
+	}
+	else{
+		naligned = NUNITS(align)-1;
+		if(tailsize < nunits+naligned){
+			/*
+			 * There are at least nunits,
+			 * get enough for alignment.
+			 */
+			if((n = morecore(naligned)) == 0){
+				MUNLOCK;
+				return nil;
+			}
+			tailsize += n;
+		}
+		/*
+		 * Save the residue before the aligned allocation
+		 * and free it after the tail pointer has been bumped
+		 * for the main allocation.
+		 */
+		n = q-tailptr - 1;
+		tailalloc(r, n);
+		tailalloc(p, nunits);
+		qstats[QSmalignnottail]++;
+		qfreeinternal(r+1);
+	}
+	MUNLOCK;
+
+	return p+1;
+}
+
+static void*
+qmalloc(usize nbytes)
+{
+	Qlist *qlist;
+	Header *p, *q;
+	uint nunits, n;
+
+///* FIXME: (ignore for now)
+	if(nbytes == 0)
+		return nil;
+//*/
+
+	qstats[QSmalloc]++;
+	nunits = NUNITS(nbytes);
+	if(nunits <= NQUICK){
+		qlist = &QLIST[nunits];
+		QLOCK(&qlist->lk);
+		if((p = qlist->first) != nil){
+			qlist->first = p->s.next;
+			qlist->nalloc++;
+			QUNLOCK(&qlist->lk);
+			p->s.next = &checkval;
+			return p+1;
+		}
+		QUNLOCK(&qlist->lk);
+	}
+
+	MLOCK;
+	if(nunits > tailsize) {
+		/* hard way */
+		if((q = rover) != nil){
+			do {
+				p = q->s.next;
+				if(p->s.size >= nunits) {
+					if(p->s.size > nunits) {
+						p->s.size -= nunits;
+						p += p->s.size;
+						p->s.size = nunits;
+					} else
+						q->s.next = p->s.next;
+					p->s.next = &checkval;
+					rover = q;
+					qstats[QSmallocrover]++;
+					MUNLOCK;
+					return p+1;
+				}
+			} while((q = p) != rover);
+		}
+		if((n = morecore(nunits)) == 0){
+			MUNLOCK;
+			return nil;
+		}
+		tailsize += n;
+	}
+	qstats[QSmalloctail]++;
+	tailalloc(p, nunits);
+	MUNLOCK;
+
+	return p+1;
+}
+
+static void
+qfreeinternal(void* ap)
+{
+	Qlist *qlist;
+	Header *p, *q;
+	uint nunits;
+
+	if(ap == nil)
+		return;
+	qstats[QSfree]++;
+
+	p = (Header*)ap - 1;
+	if((nunits = p->s.size) == 0 || p->s.next != &checkval)
+		panic("malloc: corrupt allocation arena\n");
+	if(tailptr != nil && p+nunits == tailptr) {
+		/* block before tail */
+		tailptr = p;
+		tailsize += nunits;
+		qstats[QSfreetail]++;
+		return;
+	}
+	if(nunits <= NQUICK) {
+		qlist = &QLIST[nunits];
+		QLOCK(&qlist->lk);
+		p->s.next = qlist->first;
+		qlist->first = p;
+		QUNLOCK(&qlist->lk);
+		qstats[QSfreequick]++;
+		return;
+	}
+	if((q = rover) == nil) {
+		q = &misclist;
+		q->s.size = 0;
+		q->s.next = q;
+	}
+	for(; !(p > q && p < q->s.next); q = q->s.next)
+		if(q >= q->s.next && (p > q || p < q->s.next))
+			break;
+	if(p+p->s.size == q->s.next) {
+		p->s.size += q->s.next->s.size;
+		p->s.next = q->s.next->s.next;
+		qstats[QSfreenext]++;
+	} else
+		p->s.next = q->s.next;
+	if(q+q->s.size == p) {
+		q->s.size += p->s.size;
+		q->s.next = p->s.next;
+		qstats[QSfreeprev]++;
+	} else
+		q->s.next = p;
+	rover = q;
+}
+
+uint32_t
+msize(void* ap)
+{
+	Header *p;
+	uint nunits;
+
+	if(ap == nil)
+		return 0;
+
+	p = (Header*)ap - 1;
+	if((nunits = p->s.size) == 0 || p->s.next != &checkval)
+		panic("malloc: corrupt allocation arena\n");
+
+	return (nunits-1) * sizeof(Header);
+}
+
+static void
+mallocreadfmt(char* s, char* e)
+{
+	char *p;
+	Header *q;
+	int i, n, t;
+	Qlist *qlist;
+
+	p = seprint(s, e,
+		"%llu memory\n"
+		"%d pagesize\n"
+		"%llu kernel\n",
+		(uint64_t)conf.npage*PGSZ,
+		PGSZ,
+		(uint64_t)conf.npage-conf.upages);
+
+	t = 0;
+	for(i = 0; i <= NQUICK; i++) {
+		n = 0;
+		qlist = &QLIST[i];
+		QLOCK(&qlist->lk);
+		for(q = qlist->first; q != nil; q = q->s.next){
+//			if(q->s.size != i)
+//				p = seprint(p, e, "q%d\t%#p\t%u\n",
+//					i, q, q->s.size);
+			n++;
+		}
+		QUNLOCK(&qlist->lk);
+
+//		if(n != 0)
+//			p = seprint(p, e, "q%d %d\n", i, n);
+		t += n * i*sizeof(Header);
+	}
+	p = seprint(p, e, "quick: %u bytes total\n", t);
+
+	MLOCK;
+	if((q = rover) != nil){
+		i = t = 0;
+		do {
+			t += q->s.size;
+			i++;
+//			p = seprint(p, e, "m%d\t%#p\n", q->s.size, q);
+		} while((q = q->s.next) != rover);
+
+		p = seprint(p, e, "rover: %d blocks %u bytes total\n",
+			i, t*sizeof(Header));
+	}
+	p = seprint(p, e, "total allocated %lu, %u remaining\n",
+		(tailptr-tailbase)*sizeof(Header), tailnunits*sizeof(Header));
+
+	for(i = 0; i < nelem(qstats); i++){
+		if(qstats[i] == 0)
+			continue;
+		p = seprint(p, e, "%s %u\n", qstatstr[i], qstats[i]);
+	}
+	MUNLOCK;
+}
+
+int32_t
+mallocreadsummary(Chan* c, void *a, int32_t n, int32_t offset)
+{
+	char *alloc;
+
+	alloc = malloc(16*READSTR);
+	mallocreadfmt(alloc, alloc+16*READSTR);
+	panic("mallocreadsummary"); //n = readstr(offset, a, n, alloc);
+	free(alloc);
+
+	return n;
+}
+
+void
+mallocsummary(void)
+{
+	Header *q;
+	int i, n, t;
+	Qlist *qlist;
+
+	t = 0;
+	for(i = 0; i <= NQUICK; i++) {
+		n = 0;
+		qlist = &QLIST[i];
+		QLOCK(&qlist->lk);
+		for(q = qlist->first; q != nil; q = q->s.next){
+			if(q->s.size != i)
+				DBG("q%d\t%#p\t%u\n", i, q, q->s.size);
+			n++;
+		}
+		QUNLOCK(&qlist->lk);
+
+		t += n * i*sizeof(Header);
+	}
+	print("quick: %u bytes total\n", t);
+
+	MLOCK;
+	if((q = rover) != nil){
+		i = t = 0;
+		do {
+			t += q->s.size;
+			i++;
+		} while((q = q->s.next) != rover);
+	}
+	MUNLOCK;
+
+	if(i != 0){
+		print("rover: %d blocks %u bytes total\n",
+			i, t*sizeof(Header));
+	}
+	print("total allocated %lu, %u remaining\n",
+		(tailptr-tailbase)*sizeof(Header), tailnunits*sizeof(Header));
+
+	for(i = 0; i < nelem(qstats); i++){
+		if(qstats[i] == 0)
+			continue;
+		print("%s %u\n", qstatstr[i], qstats[i]);
+	}
+}
+
+void
+free(void* ap)
+{
+	MLOCK;
+	qfreeinternal(ap);
+	MUNLOCK;
+}
+
+void*
+malloc(uint32_t size)
+{
+	void* v;
+
+	if((v = qmalloc(size)) != nil)
+		memset(v, 0, size);
+
+	return v;
+}
+
+void*
+mallocz(uint32_t size, int clr)
+{
+	void *v;
+
+	if((v = malloc(size)) != nil && clr)
+		memset(v, 0, size);
+
+	return v;
+}
+
+void*
+mallocalign(uint32_t nbytes, uint32_t align, int32_t offset, uint32_t span)
+{
+	void *v;
+
+	/*
+	 * Should this memset or should it be left to the caller?
+	 */
+	if((v = qmallocalign(nbytes, align, offset, span)) != nil)
+		memset(v, 0, nbytes);
+
+	return v;
+}
+
+void*
+smalloc(uint32_t size)
+{
+	Proc *up = externup();
+	void *v;
+
+	while((v = mallocz(size, 1)) == nil)
+		tsleep(&up->sleep, return0, 0, 100);
+	return v;
+}
+
+void*
+realloc(void* ap, uint32_t size)
+{
+	void *v;
+	Header *p;
+	uint32_t osize;
+	uint nunits, ounits;
+
+	/*
+	 * Easy stuff:
+	 * free and return nil if size is 0
+	 * (implementation-defined behaviour);
+	 * behave like malloc if ap is nil;
+	 * check for arena corruption;
+	 * do nothing if units are the same.
+	 */
+	if(size == 0){
+		MLOCK;
+		qfreeinternal(ap);
+		MUNLOCK;
+
+		return nil;
+	}
+	if(ap == nil)
+		return qmalloc(size);
+
+	p = (Header*)ap - 1;
+	if((ounits = p->s.size) == 0 || p->s.next != &checkval)
+		panic("realloc: corrupt allocation arena\n");
+
+	if((nunits = NUNITS(size)) <= ounits)
+		return ap;
+
+	/*
+	 * Slightly harder:
+	 * if this allocation abuts the tail, try to just
+	 * adjust the tailptr.
+	 */
+	MLOCK;
+	if(tailptr != nil && p+ounits == tailptr){
+		if(ounits > nunits){
+			p->s.size = nunits;
+			tailsize += ounits-nunits;
+			MUNLOCK;
+			return ap;
+		}
+		if(tailsize >= nunits-ounits){
+			p->s.size = nunits;
+			tailsize -= nunits-ounits;
+			MUNLOCK;
+			return ap;
+		}
+	}
+	MUNLOCK;
+
+	/*
+	 * Worth doing if it's a small reduction?
+	 * Do it anyway if <= NQUICK?
+	if((ounits-nunits) < 2)
+		return ap;
+	 */
+
+	/*
+	 * Too hard (or can't be bothered):
+	 * allocate, copy and free.
+	 * What does the standard say for failure here?
+	 */
+	if((v = qmalloc(size)) != nil){
+		osize = (ounits-1)*sizeof(Header);
+		if(size < osize)
+			osize = size;
+		memmove(v, ap, osize);
+		MLOCK;
+		qfreeinternal(ap);
+		MUNLOCK;
+	}
+
+	return v;
+}
+
+void
+setmalloctag(void* v, uint32_t i)
+{
+}
+
+void
+mallocinit(void)
+{
+	if(tailptr != nil)
+		return;
+
+	tailbase = UINT2PTR(sys->vmunused);
+	tailptr = tailbase;
+	tailnunits = NUNITS(sys->vmend - sys->vmunused);
+	print("base %#p ptr %#p nunits %u\n", tailbase, tailptr, tailnunits);
+}
+
+static int
+morecore(uint nunits)
+{
+	/*
+	 * First (simple) cut.
+	 * Pump it up when you don't really need it.
+	 * Pump it up until you can feel it.
+	 */
+	if(nunits < NUNITS(128*KiB))
+		nunits = NUNITS(128*KiB);
+	if(nunits > tailnunits)
+		nunits = tailnunits;
+	tailnunits -= nunits;
+
+	return nunits;
+}

+ 485 - 0
sys/src/9/riscv/syscall.c

@@ -0,0 +1,485 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "../port/error.h"
+
+#include "sys.h"
+
+#include <tos.h>
+
+#include "ureg.h"
+
+extern int nosmp;
+
+typedef struct {
+	uintptr_t	ip;
+	Ureg*	arg0;
+	char*	arg1;
+	char	msg[ERRMAX];
+	Ureg*	old;
+	Ureg	ureg;
+} NFrame;
+
+/*
+ *   Return user to state before notify()
+ */
+void
+noted(Ureg* cur, uintptr_t arg0)
+{
+	Proc *up = externup();
+	NFrame *nf;
+	Note note;
+	Ureg *nur;
+
+	qlock(&up->debug);
+	if(arg0 != NRSTR && !up->notified){
+		qunlock(&up->debug);
+		pprint("suicide: call to noted when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+	fpunoted();
+
+	nf = up->ureg;
+
+	/* sanity clause */
+	if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){
+		qunlock(&up->debug);
+		pprint("suicide: bad ureg %#p in noted\n", nf);
+		pexit("Suicide", 0);
+	}
+
+	nur = &nf->ureg;
+	/* don't let user change system flags */
+#if 0
+	nur->flags &= (Of|Df|Sf|Zf|Af|Pf|Cf);
+	nur->flags |= cur->flags & ~(Of|Df|Sf|Zf|Af|Pf|Cf);
+#endif
+	memmove(cur, nur, sizeof(Ureg));
+
+	switch((int)arg0){
+	case NCONT:
+	case NRSTR:
+		if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted pc=%#p sp=%#p\n",
+				nur->ip, nur->sp);
+			pexit("Suicide", 0);
+		}
+		up->ureg = nf->old;
+		qunlock(&up->debug);
+		break;
+	case NSAVE:
+		if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted pc=%#p sp=%#p\n",
+				nur->ip, nur->sp);
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+
+		splhi();
+		nf->arg1 = nf->msg;
+		nf->arg0 = &nf->ureg;
+		cur->bp = PTR2UINT(nf->arg0);
+		nf->ip = 0;
+		cur->sp = PTR2UINT(nf);
+		break;
+	default:
+		memmove(&note, &up->lastnote, sizeof(Note));
+		qunlock(&up->debug);
+		pprint("suicide: bad arg %#p in noted: %s\n", arg0, note.msg);
+		pexit(note.msg, 0);
+		break;
+	case NDFLT:
+		memmove(&note, &up->lastnote, sizeof(Note));
+		qunlock(&up->debug);
+		if(note.flag == NDebug)
+			pprint("suicide: %s\n", note.msg);
+		pexit(note.msg, note.flag != NDebug);
+		break;
+	}
+}
+
+/*
+ *  Call user, if necessary, with note.
+ *  Pass user the Ureg struct and the note on his stack.
+ */
+int
+notify(Ureg* ureg)
+{
+	Proc *up = externup();
+	int l;
+	Mpl pl;
+	Note note;
+	uintptr_t sp;
+	NFrame *nf;
+
+	/*
+	 * Calls procctl splhi, see comment in procctl for the reasoning.
+	 */
+	if(up->procctl)
+		procctl(up);
+	if(up->nnote == 0)
+		return 0;
+
+	fpunotify(ureg);
+
+	pl = spllo();
+	qlock(&up->debug);
+
+	up->notepending = 0;
+	memmove(&note, &up->note[0], sizeof(Note));
+	if(strncmp(note.msg, "sys:", 4) == 0){
+		l = strlen(note.msg);
+		if(l > ERRMAX-sizeof(" pc=0x0123456789abcdef"))
+			l = ERRMAX-sizeof(" pc=0x0123456789abcdef");
+		sprint(note.msg+l, " pc=%#p", ureg->ip);
+	}
+
+	if(note.flag != NUser && (up->notified || up->notify == nil)){
+		qunlock(&up->debug);
+		if(note.flag == NDebug)
+			pprint("suicide: %s\n", note.msg);
+		pexit(note.msg, note.flag != NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+
+	if(up->notify == nil){
+		qunlock(&up->debug);
+		pexit(note.msg, note.flag != NDebug);
+	}
+	if(!okaddr(PTR2UINT(up->notify), sizeof(ureg->ip), 0)){
+		qunlock(&up->debug);
+		pprint("suicide: bad function address %#p in notify\n",
+			up->notify);
+		pexit("Suicide", 0);
+	}
+
+	sp = ureg->sp - ROUNDUP(sizeof(NFrame), 16) - 128; // amd64 red zone, also wanted by go stack traces
+	if(!okaddr(sp, sizeof(NFrame), 1)){
+		qunlock(&up->debug);
+		pprint("suicide: bad stack address %#p in notify\n", sp);
+		pexit("Suicide", 0);
+	}
+
+	nf = UINT2PTR(sp);
+	memmove(&nf->ureg, ureg, sizeof(Ureg));
+	nf->old = up->ureg;
+	up->ureg = nf;	/* actually the NFrame, for noted */
+	memmove(nf->msg, note.msg, ERRMAX);
+	nf->arg1 = nf->msg;
+	nf->arg0 = &nf->ureg;
+	ureg->a0 = (uintptr)nf->arg0;
+	ureg->a1 = (uintptr)nf->arg1;
+	//print("Setting di to %p and si to %p\n", ureg->di, ureg->si);
+	ureg->bp = PTR2UINT(nf->arg0);
+	nf->ip = 0;
+
+	ureg->sp = sp;
+	ureg->ip = PTR2UINT(up->notify);
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &note, sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(pl);
+
+	return 1;
+}
+
+void
+noerrorsleft(void)
+{
+	Proc *up = externup();
+	int i;
+
+	if(up->nerrlab){
+		/* NIX processes will have a waserror in their handler */
+		if(up->ac != nil && up->nerrlab == 1)
+			return;
+
+		print("bad errstack: %d extra\n", up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n",
+				up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+}
+
+int printallsyscalls;
+
+void
+syscall(unsigned int scallnr, Ureg *ureg)
+{
+	// can only handle 6 args right now.
+	uintptr_t a0, a1, a2, a3;
+	uintptr_t a4, a5;
+
+	a0 = ureg->a0;
+	a1 = ureg->a1;
+	a2 = ureg->a2;
+	a3 = ureg->a3;
+	a4 = ureg->a4;
+	a5 = ureg->a5;
+	Proc *up = externup();
+	if (0) iprint("Syscall %d, %lx, %lx, %lx %lx %lx %lx\n", scallnr, a0, a1, a2, a3, a4, a5);
+	char *e;
+	uintptr_t	sp;
+	int s;
+	int64_t startns, stopns;
+	Ar0 ar0;
+	static Ar0 zar0;
+
+	panic("test userureg");
+	//if(!userureg(ureg))
+		//panic("syscall: cs %#llx\n", ureg->cs);
+
+	cycles(&up->kentry);
+
+	machp()->syscall++;
+	up->nsyscall++;
+	up->nqsyscall++;
+	up->insyscall = 1;
+	up->pc = ureg->ip;
+	up->dbgreg = ureg;
+	sp = ureg->sp;
+	startns = stopns = 0;
+	if (0) hi("so far syscall!\n");
+	if (up->pid == 0 || printallsyscalls) {
+		syscallfmt('E', scallnr, nil, startns, stopns, a0, a1, a2, a3, a4, a5);
+		if(up->syscalltrace) {
+			print("E %s\n", up->syscalltrace);
+			free(up->syscalltrace);
+			up->syscalltrace = nil;
+		}
+	}
+
+	if(up->procctl == Proc_tracesyscall){
+		/*
+		 * Redundant validaddr.  Do we care?
+		 * Tracing syscalls is not exactly a fast path...
+		 * Beware, validaddr currently does a pexit rather
+		 * than an error if there's a problem; that might
+		 * change in the future.
+		 */
+		if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE))
+			validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0);
+
+		syscallfmt('E', scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
+		up->procctl = Proc_stopme;
+		procctl(up);
+		if(up->syscalltrace)
+			free(up->syscalltrace);
+		up->syscalltrace = nil;
+		startns = todget(nil);
+	}
+	if (0) hi("more syscall!\n");
+	up->scallnr = scallnr;
+	if(scallnr == RFORK)
+		fpusysrfork(ureg);
+	spllo();
+
+	sp = ureg->sp;
+	up->nerrlab = 0;
+	ar0 = zar0;
+	if(!waserror()){
+		if(scallnr >= nsyscall || systab[scallnr].f == nil){
+			pprint("bad sys call number %d pc %#llx\n",
+				scallnr, ureg->ip);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+
+		if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE))
+			validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0);
+
+		memmove(up->arg, UINT2PTR(sp+BY2SE), sizeof(up->arg));
+		up->psstate = systab[scallnr].n;
+	if (0) hi("call syscall!\n");
+		systab[scallnr].f(&ar0, a0, a1, a2, a3, a4, a5);
+	if (0) hi("it returned!\n");
+		poperror();
+	}
+	else{
+		/* failure: save the error buffer for errstr */
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+		if(DBGFLG && up->pid == 1)
+			iprint("%s: syscall %s error %s\n",
+				up->text, systab[scallnr].n, up->syserrstr);
+		ar0 = systab[scallnr].r;
+	}
+
+	/*
+	 * NIX: for the execac() syscall, what follows is done within
+	 * the system call, because it never returns.
+	 * See acore.c:/^retfromsyscall
+	 */
+
+	noerrorsleft();
+
+	/*
+	 * Put return value in frame.
+	 */
+	ureg->a0 = ar0.p;
+
+	if (up->pid == 0 || printallsyscalls) {
+		stopns = todget(nil);
+		syscallfmt('X', scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
+		if(up->syscalltrace) {
+			print("X %s\n", up->syscalltrace);
+			free(up->syscalltrace);
+			up->syscalltrace = nil;
+		}
+	}
+
+	if(up->procctl == Proc_tracesyscall){
+		uint8_t what = 'X';
+		stopns = todget(nil);
+		up->procctl = Proc_stopme;
+		if (scallnr == RFORK && a0 & RFPROC && ar0.i > 0)
+			what = 'F';
+		syscallfmt(what, scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
+		s = splhi();
+		procctl(up);
+		splx(s);
+		if(up->syscalltrace)
+			free(up->syscalltrace);
+		up->syscalltrace = nil;
+	}else if(up->procctl == Proc_totc || up->procctl == Proc_toac)
+		procctl(up);
+
+	if (0) hi("past sysretfmt\n");
+	up->insyscall = 0;
+	up->psstate = 0;
+
+	if(scallnr == NOTED)
+		noted(ureg, a0);
+
+	if (0) hi("now to splhi\n");
+	splhi();
+	if(scallnr != RFORK && (up->procctl || up->nnote))
+		notify(ureg);
+
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched){
+		sched();
+		splhi();
+	}
+	kexit(ureg);
+	if (0) hi("done kexit\n");
+}
+
+uintptr_t
+sysexecstack(uintptr_t stack, int argc)
+{
+	uintptr_t sp;
+	/*
+	 * Given a current bottom-of-stack and a count
+	 * of pointer arguments to be pushed onto it followed
+	 * by an integer argument count, return a suitably
+	 * aligned new bottom-of-stack which will satisfy any
+	 * hardware stack-alignment contraints.
+	 * Rounding the stack down to be aligned with the
+	 * natural size of a pointer variable usually suffices,
+	 * but some architectures impose further restrictions,
+	 * e.g. 32-bit SPARC, where the stack must be 8-byte
+	 * aligned although pointers and integers are 32-bits.
+	 */
+	USED(argc);
+
+	sp = STACKALIGN(stack);
+	/* but we need to align the stack to 16 bytes, not 8, once
+	 * nil
+	 * argv
+	 * argc
+	 * are pushed. So if we have odd arguments, we need an odd-8-byte
+	 * aligned stack; else, an even aligned stack.
+	 */
+	if (argc & 1)
+		sp -= sp & 8 ? 0 : 8;
+	else
+		sp -= sp & 8 ? 8 : 0;
+	//print("For %d args, sp is now %p\n", argc, sp);
+	return sp;
+}
+
+void*
+sysexecregs(uintptr_t entry, uint32_t ssize, void *tos)
+{
+	Proc *up = externup();
+	uintptr_t *sp;
+	Ureg *ureg;
+
+	// We made sure it was correctly aligned in sysexecstack, above.
+	if (ssize & 0xf) {
+		print("your stack is wrong: stacksize is not 16-byte aligned: %d\n", ssize);
+		panic("misaligned stack in sysexecregs");
+	}
+	sp = (uintptr_t*)(USTKTOP - ssize);
+
+	ureg = up->dbgreg;
+	ureg->sp = PTR2UINT(sp);
+	ureg->ip = entry;
+	//ureg->type = 64;			/* fiction for acid */
+	panic("sysexecregs");
+	//ureg->dx = (uintptr_t)tos;
+
+	/*
+	 * return the address of kernel/user shared data
+	 * (e.g. clock stuff)
+	 */
+	return UINT2PTR(USTKTOP-sizeof(Tos));
+}
+
+void
+sysprocsetup(Proc* p)
+{
+	fpusysprocsetup(p);
+}
+
+void
+sysrforkchild(Proc* child, Proc* parent)
+{
+	Ureg *cureg;
+// If STACKPAD is 1 things go very bad very quickly.
+// But it is the right value ...
+#define STACKPAD 1 /* for return PC? */
+	/*
+	 * Add STACKPAD*BY2SE to the stack to account for
+	 *  - the return PC
+	 *  (NOT NOW) - trap's arguments (syscallnr, ureg)
+	 */
+	child->sched.sp = PTR2UINT(child->kstack+KSTACK-((sizeof(Ureg)+STACKPAD*BY2SE)));
+	child->sched.pc = PTR2UINT(sysrforkret);
+
+	cureg = (Ureg*)(child->sched.sp+STACKPAD*BY2SE);
+	memmove(cureg, parent->dbgreg, sizeof(Ureg));
+
+	/* Things from bottom of syscall which were never executed */
+	child->psstate = 0;
+	child->insyscall = 0;
+	//iprint("Child SP set tp %p\n", (void *)child->sched.sp);
+
+	fpusysrforkchild(child, parent);
+}

+ 355 - 0
sys/src/9/riscv/tcore.c

@@ -0,0 +1,355 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include <tos.h>
+#include <pool.h>
+#include "ureg.h"
+#include "io.h"
+
+Lock nixaclock;	/* NIX AC lock; held while assigning procs to cores */
+
+/*
+ * NIX support for the time sharing core.
+ */
+
+extern void actrapret(void);
+extern void acsysret(void);
+
+Mach*
+getac(Proc *p, int core)
+{
+	Proc *up = externup();
+	int i;
+	Mach *mp;
+
+	mp = nil;
+	if(core == 0)
+		panic("can't getac for a %s", rolename[NIXTC]);
+	lock(&nixaclock);
+	if(waserror()){
+		unlock(&nixaclock);
+		nexterror();
+	}
+	if(core > 0){
+		if(core >= MACHMAX)
+			error("no such core");
+		mp = sys->machptr[core];
+		if(mp == nil || mp->online == 0 || mp->proc != nil)
+			error("core not online or busy");
+		if(mp->NIX.nixtype != NIXAC)
+			error("core is not an AC");
+	Found:
+		mp->proc = p;
+	}else{
+		for(i = 0; i < MACHMAX; i++)
+			if((mp = sys->machptr[i]) != nil && mp->online && mp->NIX.nixtype == NIXAC)
+				if(mp->proc == nil)
+					goto Found;
+		error("not enough cores");
+	}
+	unlock(&nixaclock);
+	poperror();
+	return mp;
+}
+
+/*
+ * BUG:
+ * The AC must not accept interrupts while in the kernel,
+ * or we must be prepared for nesting them, which we are not.
+ * This is important for note handling, because postnote()
+ * assumes that it's ok to send an IPI to an AC, no matter its
+ * state. The /proc interface also assumes that.
+ *
+ */
+void
+intrac(Proc *p)
+{
+	Mach *ac;
+
+	ac = p->ac;
+	if(ac == nil){
+		DBG("intrac: Proc.ac is nil. no ipi sent.\n");
+		return;
+	}
+	/*
+	 * It's ok if the AC gets idle in the mean time.
+	 */
+	DBG("intrac: ipi to cpu%d\n", ac->machno);
+	// what to do?
+	panic((char *)__func__);
+	//apicipi(ac->apicno);
+}
+
+void
+putac(Mach *m)
+{
+	mfence();
+	m->proc = nil;
+}
+
+void
+stopac(void)
+{
+	Proc *up = externup();
+	Mach *mp;
+
+	mp = up->ac;
+	if(mp == nil)
+		return;
+	if(mp->proc != up)
+		panic("stopac");
+
+	lock(&nixaclock);
+	up->ac = nil;
+	mp->proc = nil;
+	unlock(&nixaclock);
+
+	/* TODO:
+	 * send sipi to up->ac, it would rerun squidboy(), and
+	 * wait for us to give it a function to run.
+	 */
+}
+
+/*
+ * Functions starting with ac... are run in the application core.
+ * All other functions are run by the time-sharing cores.
+ */
+
+typedef void (*APfunc)(void);
+extern int notify(Ureg*);
+
+/*
+ * run an arbitrary function with arbitrary args on an ap core
+ * first argument is always pml4 for process
+ * make a field and a struct for the args cache line.
+ *
+ * Returns the return-code for the ICC or -1 if the process was
+ * interrupted while issuing the ICC.
+ */
+int
+runac(Mach *mp, APfunc func, int flushtlb, void *a, int32_t n)
+{
+	Proc *up = externup();
+	uint8_t *dpg, *spg;
+
+	if (n > sizeof(mp->NIX.icc->data))
+		panic("runac: args too long");
+
+	if(mp->online == 0)
+		panic("Bad core");
+	if(mp->proc != nil && mp->proc != up)
+		panic("runapfunc: mach is busy with another proc?");
+
+	memmove(mp->NIX.icc->data, a, n);
+	if(flushtlb){
+		DBG("runac flushtlb: cppml4 %#p %#p\n", mp->MMU.pml4->pa, machp()->MMU.pml4->pa);
+		dpg = UINT2PTR(mp->MMU.pml4->va);
+		spg = UINT2PTR(machp()->MMU.pml4->va);
+		/* We should copy less:
+		 *	memmove(dgp, spg, machp()->MMU.pml4->daddr * sizeof(PTE));
+		 */
+		memmove(dpg, spg, PTSZ);
+		if(0){
+			print("runac: upac pml4 %#p\n", up->ac->MMU.pml4->pa);
+			dumpptepg(4, up->ac->MMU.pml4->pa);
+		}
+	}
+	mp->NIX.icc->flushtlb = flushtlb;
+	mp->NIX.icc->rc = ICCOK;
+
+	DBG("runac: exotic proc on cpu%d\n", mp->machno);
+	if(waserror()){
+		qunlock(&up->debug);
+		nexterror();
+	}
+	qlock(&up->debug);
+	up->nicc++;
+	up->state = Exotic;
+	up->psstate = 0;
+	qunlock(&up->debug);
+	poperror();
+	mfence();
+	mp->NIX.icc->fn = func;
+	sched();
+	return mp->NIX.icc->rc;
+}
+
+/*
+ * Cleanup done by runacore to pretend we are going back to user space.
+ * We won't return and won't do what syscall() would normally do.
+ * Do it here instead.
+ */
+static void
+fakeretfromsyscall(Ureg *ureg)
+{
+	Proc *up = externup();
+	int s;
+
+	poperror();	/* as syscall() would do if we would return */
+	if(up->procctl == Proc_tracesyscall){	/* Would this work? */
+		up->procctl = Proc_stopme;
+		s = splhi();
+		procctl(up);
+		splx(s);
+	}
+
+	up->insyscall = 0;
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched){
+		sched();
+		splhi();
+	}
+	kexit(ureg);
+}
+
+/*
+ * Move the current process to an application core.
+ * This is performed at the end of execac(), and
+ * we pretend to be returning to user-space, but instead we
+ * dispatch the process to another core.
+ * 1. We do the final bookkeeping that syscall() would do after
+ *    a return from sysexec(), because we are not returning.
+ * 2. We dispatch the process to an AC using an ICC.
+ *
+ * This function won't return unless the process is reclaimed back
+ * to the time-sharing core, and is the handler for the process
+ * to deal with traps and system calls until the process dies.
+ *
+ * Remember that this function is the "line" between user and kernel
+ * space, it's not expected to raise|handle any error.
+ *
+ * We install a safety error label, just in case we raise errors,
+ * which we shouldn't. (noerrorsleft knows that for exotic processes
+ * there is an error label pushed by us).
+ */
+void
+runacore(void)
+{
+	Proc *up = externup();
+	Ureg *ureg;
+	void (*fn)(void);
+	int rc, flush, s;
+	//char *n;
+	uint64_t t1;
+
+	if(waserror())
+		panic("runacore: error: %s\n", up->errstr);
+	ureg = up->dbgreg;
+	fakeretfromsyscall(ureg);
+	fpusysrfork(ureg);
+
+	procpriority(up, PriKproc, 1);
+	rc = runac(up->ac, actouser, 1, nil, 0);
+	procpriority(up, PriNormal, 0);
+	for(;;){
+		t1 = fastticks(nil);
+		flush = 0;
+		fn = nil;
+		switch(rc){
+		case ICCTRAP:
+			s = splhi();
+			panic("cr2");
+			//machp()->MMU.cr2 = up->ac->MMU.cr2;
+			//DBG("runacore: trap %llu cr2 %#llx ureg %#p\n",
+			//ureg->type, machp()->MMU.cr2, ureg);
+#if 0
+			switch(ureg->type){
+			case IdtIPI:
+				if(up->procctl || up->nnote)
+					notify(up->dbgreg);
+				if(up->ac == nil)
+					goto ToTC;
+				kexit(up->dbgreg);
+				break;
+			case IdtNM:
+			case IdtMF:
+			case IdtXF:
+				/* these are handled in the AC;
+				 * If we get here, they left in m->NIX.icc->data
+				 * a note to be posted to the process.
+				 * Post it, and make the vector a NOP.
+				 */
+				n = up->ac->NIX.icc->note;
+				if(n != nil)
+					postnote(up, 1, n, NDebug);
+				ureg->type = IdtIPI;		/* NOP */
+				break;
+			default:
+				rootput(machp()->MMU.pml4->pa);
+				if(0 && ureg->type == IdtPF){
+					print("before PF:\n");
+					print("AC:\n");
+					dumpptepg(4, up->ac->MMU.pml4->pa);
+					print("\n%s:\n", rolename[NIXTC]);
+					dumpptepg(4, machp()->MMU.pml4->pa);
+				}
+				trap(ureg);
+			}
+#endif
+			splx(s);
+			flush = 1;
+			fn = actrapret;
+			break;
+		case ICCSYSCALL:
+			DBG("runacore: syscall a0 %#llx ureg %#p\n",
+				ureg->a0, ureg);
+			rootput(machp()->MMU.pml4->pa);
+			//syscall(ureg->ax, ureg);
+			flush = 1;
+			fn = acsysret;
+			if(0)
+			if(up->nqtrap > 2 || up->nsyscall > 1)
+				goto ToTC;
+			if(up->ac == nil)
+				goto ToTC;
+			break;
+		default:
+			panic("runacore: unexpected rc = %d", rc);
+		}
+		up->tctime += fastticks2us(fastticks(nil) - t1);
+		procpriority(up, PriExtra, 1);
+		rc = runac(up->ac, fn, flush, nil, 0);
+		procpriority(up, PriNormal, 0);
+	}
+ToTC:
+	/*
+	 *  to procctl, then syscall,  to
+	 *  be back in the TC
+	 */
+	DBG("runacore: up %#p: return\n", up);
+}
+
+extern ACVctl *acvctl[];
+
+void
+actrapenable(int vno, char* (*f)(Ureg*, void*), void* a, char *name)
+{
+	ACVctl *v;
+
+	if(vno < 0 || vno >= 256)
+		panic("actrapenable: vno %d\n", vno);
+	v = malloc(sizeof(Vctl));
+	v->f = f;
+	v->a = a;
+	v->vno = vno;
+	strncpy(v->name, name, KNAMELEN);
+	v->name[KNAMELEN-1] = 0;
+
+	if(acvctl[vno])
+		panic("AC traps can't be shared");
+	acvctl[vno] = v;
+}
+
+

+ 15 - 0
sys/src/libc/riscv/getcallerpc.S

@@ -0,0 +1,15 @@
+// This file is part of the Harvey operating system.  It is subject to the
+// license terms of the GNU GPL v2 in LICENSE.gpl found in the top-level
+// directory of this distribution and at http://www.gnu.org/licenses/gpl-2.0.txt
+//
+// No part of Harvey operating system, including this file, may be copied,
+// modified, propagated, or distributed except according to the terms
+// contained in the LICENSE.gpl file.
+
+.text
+
+.globl getcallerpc
+getcallerpc:
+//	LD	a0,x1
+	li	a0, 0
+	RET

+ 15 - 0
sys/src/libc/riscv/getcallstack.S

@@ -0,0 +1,15 @@
+// This file is part of the Harvey operating system.  It is subject to the
+// license terms of the GNU GPL v2 in LICENSE.gpl found in the top-level
+// directory of this distribution and at http://www.gnu.org/licenses/gpl-2.0.txt
+//
+// No part of Harvey operating system, including this file, may be copied,
+// modified, propagated, or distributed except according to the terms
+// contained in the LICENSE.gpl file.
+
+.text
+
+.globl getcallstack
+getcallstack:
+	li a0, 0
+	li a1, 0
+	RET

+ 3 - 1
sys/src/regress/build.json

@@ -4,6 +4,9 @@
 			"/sys/src/cmd/cmd.json"
 			"/sys/src/cmd/cmd.json"
 		],
 		],
 		"Install": "/$ARCH/bin/regress",
 		"Install": "/$ARCH/bin/regress",
+		"NoSourceFilesCmd": [
+			"tls.c"
+		],
 		"SourceFilesCmd": [
 		"SourceFilesCmd": [
 			"alarm.c",
 			"alarm.c",
 			"args.c",
 			"args.c",
@@ -35,7 +38,6 @@
 			"sysfatal.c",
 			"sysfatal.c",
 			"sysstatread.c",
 			"sysstatread.c",
 			"thread.c",
 			"thread.c",
-			"tls.c",
 			"tsemacquire.c",
 			"tsemacquire.c",
 			"va_copy.c",
 			"va_copy.c",
 			"vseprint.c",
 			"vseprint.c",

Some files were not shown because too many files changed in this diff