Browse Source

riscv: it builds.

I had to turn off the amd64-specific tls regression test.

It boots as a coreboot payload on the spike emulator
as far as it currently can.

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Ronald G. Minnich 3 years ago
parent
commit
1584469569

+ 3 - 3
sys/src/9/port/devcap.c

@@ -237,9 +237,9 @@ capwrite(Chan *c, void *va, int32_t n, int64_t m)
 		if(key == nil)
 			error(Eshort);
 		*key++ = 0;
-
-		hmac_sha1((uint8_t*)from, strlen(from), (uint8_t*)key,
-			  strlen(key), hash, nil);
+		panic("need a sha256");
+		//hmac_sha1((uint8_t*)from, strlen(from), (uint8_t*)key,
+		//strlen(key), hash, nil);
 
 		p = remcap(hash);
 		if(p == nil){

+ 11 - 11
sys/src/9/port/riscvport.json

@@ -10,19 +10,24 @@
 			"mksys -o ../port/error.h '-mode=error.h' $HARVEY/sys/src/sysconf.json"
 		],
 		"#SourceFiles": [
+			"../port/devcoreboot.c",
+			"../port/devkprof.c",
+			"../port/cpu_buffer.c",
+			"../port/devkbin.c",
+			"../port/devssl.c",
+			"../port/devtls.c"
+		],
+		"SourceFiles": [
 			"../port/alarm.c",
 			"../port/allocb.c",
 			"../port/cache.c",
 			"../port/chan.c",
-			"../port/cpu_buffer.c",
+			"../port/dev.c",
 			"../port/devcap.c",
 			"../port/devcons.c",
-			"../port/devcoreboot.c",
 			"../port/devdup.c",
 			"../port/devenv.c",
 			"../port/devfdmux.c",
-			"../port/devkprof.c",
-			"../port/devkbin.c",
 			"../port/devmnt.c",
 			"../port/devmntn.c",
 			"../port/devpipe.c",
@@ -33,8 +38,6 @@
 			"../port/devsd.c",
 			"../port/devsegment.c",
 			"../port/devsrv.c",
-			"../port/devssl.c",
-			"../port/devtls.c",
 			"../port/devtab.c",
 			"../port/devtrace.c",
 			"../port/devuart.c",
@@ -75,10 +78,7 @@
 			"../port/syszio.c",
 			"../port/taslock.c",
 			"../port/tod.c",
-			"../port/virtio_lib.c",
 			"../port/watermarks.c"
-		],
-		"SourceFiles": [
-			"../port/qio.c"
-		] }
+		]
+	}
 }

+ 6 - 0
sys/src/9/riscv/.gitignore

@@ -0,0 +1,6 @@
+/systab.c
+/cpu.c
+/init.h
+/errstr.h
+/init
+/riscvcpu.c

+ 343 - 0
sys/src/9/riscv/acore.c

@@ -0,0 +1,343 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include <tos.h>
+#include <pool.h>
+#include "ureg.h"
+#include "io.h"
+#include "../port/pmc.h"
+
+/*
+ * NIX code run at the AC.
+ * This is the "AC kernel".
+ */
+
+/*
+ * FPU:
+ *
+ * The TC handles the FPU by keeping track of the state for the
+ * current process. If it has been used and must be saved, it is saved, etc.
+ * When a process gets to the AC, we handle the FPU directly, and save its
+ * state before going back to the TC (or the TC state would be stale).
+ *
+ * Because of this, each time the process comes back to the AC and
+ * uses the FPU it will get a device not available trap and
+ * the state will be restored. This could be optimized because the AC
+ * is single-process, and we do not have to disable the FPU while
+ * saving, so it does not have to be restored.
+ */
+
+extern char* acfpunm(Ureg* ureg, void *unused_voidp);
+extern char* acfpumf(Ureg* ureg, void *unused_voidp);
+extern char* acfpuxf(Ureg* ureg, void *unused_voidp);
+extern void acfpusysprocsetup(Proc*);
+
+extern void _acsysret(void);
+extern void _actrapret(void);
+
+ACVctl *acvctl[256];
+
+/*
+ * Test inter core calls by calling a cores to print something, and then
+ * waiting for it to complete.
+ */
+static void
+testiccfn(void)
+{
+	print("called: %s\n", ( char *)machp()->NIX.icc->data);
+}
+
+void
+testicc(int i)
+{
+	Mach *mp;
+
+	if((mp = sys->machptr[i]) != nil && mp->online != 0){
+		if(mp->NIX.nixtype != NIXAC){
+			print("testicc: core %d is not an AC\n", i);
+			return;
+		}
+		print("calling core %d... ", i);
+		mp->NIX.icc->flushtlb = 0;
+		snprint(( char *)mp->NIX.icc->data, ICCLNSZ, "<%d>", i);
+		mfence();
+		mp->NIX.icc->fn = testiccfn;
+		mwait(&mp->NIX.icc->fn);
+	}
+}
+
+/*
+ * Check if the AC kernel (mach) stack has more than 4*KiB free.
+ * Do not call panic, the stack is gigantic.
+ */
+static void
+acstackok(void)
+{
+	char dummy;
+	char *sstart;
+
+	sstart = (char *)machp() - PGSZ - 4*PTSZ - MACHSTKSZ;
+	if(&dummy < sstart + 4*KiB){
+		print("ac kernel stack overflow, cpu%d stopped\n", machp()->machno);
+		DONE();
+	}
+}
+
+/*
+ * Main scheduling loop done by the application core.
+ * Some of functions run will not return.
+ * The system call handler will reset the stack and
+ * call acsched again.
+ * We loop because some functions may return and we should
+ * wait for another call.
+ */
+void
+acsched(void)
+{
+	acmmuswitch();
+	for(;;){
+		acstackok();
+		mwait(&machp()->NIX.icc->fn);
+		if(machp()->NIX.icc->flushtlb)
+			acmmuswitch();
+		DBG("acsched: cpu%d: fn %#p\n", machp()->machno, machp()->NIX.icc->fn);
+		machp()->NIX.icc->fn();
+		DBG("acsched: cpu%d: idle\n", machp()->machno);
+		mfence();
+		machp()->NIX.icc->fn = nil;
+	}
+}
+
+void
+acmmuswitch(void)
+{
+	extern Page mach0pml4;
+
+	DBG("acmmuswitch mpl4 %#p mach0pml4 %#p m0pml4 %#p\n", machp()->MMU.pml4->pa, mach0pml4.pa, sys->machptr[0]->MMU.pml4->pa);
+
+
+	rootput(machp()->MMU.pml4->pa);
+}
+
+/*
+ * Beware: up is not set when this function is called.
+ */
+void
+actouser(void)
+{
+#if 0
+	void xactouser(uint64_t);
+	Ureg *u;
+
+	acfpusysprocsetup(m->proc);
+
+	u = m->proc->dbgreg;
+	DBG("cpu%d: touser usp = %#p entry %#p\n", machp()->machno, u->sp, u->ip);
+	xactouser(u->sp);
+#endif
+	panic("actouser");
+}
+
+void
+actrapret(void)
+{
+	/* done by actrap() */
+}
+
+/*
+ * Entered in AP core context, upon traps (system calls go through acsyscall)
+ * using up->dbgreg means cores MUST be homogeneous.
+ *
+ * BUG: We should setup some trapenable() mechanism for the AC,
+ * so that code like fpu.c could arrange for handlers specific for
+ * the AC, instead of doint that by hand here.
+ *
+ * All interrupts are masked while in the "kernel"
+ */
+void
+actrap(Ureg *u)
+{
+	panic("actrap");
+#if 0
+	char *n;
+	ACVctl *v;
+
+	n = nil;
+
+	_pmcupdate(m);
+	if(m->proc != nil){
+		m->proc->nactrap++;
+		m->proc->actime1 = fastticks(nil);
+	}
+	if(u->type < nelem(acvctl)){
+		v = acvctl[u->type];
+		if(v != nil){
+			DBG("actrap: cpu%d: %llu\n", machp()->machno, u->type);
+			n = v->f(u, v->a);
+			if(n != nil)
+				goto Post;
+			return;
+		}
+	}
+	switch(u->type){
+	case IdtDF:
+		print("AC: double fault\n");
+		dumpregs(u);
+		ndnr();
+	case IdtIPI:
+		m->intr++;
+		DBG("actrap: cpu%d: IPI\n", machp()->machno);
+		apiceoi(IdtIPI);
+		break;
+	case IdtTIMER:
+		apiceoi(IdtTIMER);
+		panic("timer interrupt in an AC");
+		break;
+	case IdtPF:
+		/* this case is here for debug only */
+		m->pfault++;
+		DBG("actrap: cpu%d: PF cr2 %#llx\n", machp()->machno, cr2get());
+		break;
+	default:
+		print("actrap: cpu%d: %llu\n", machp()->machno, u->type);
+	}
+Post:
+	m->NIX.icc->rc = ICCTRAP;
+	m->cr2 = cr2get();
+	memmove(m->proc->dbgreg, u, sizeof *u);
+	m->NIX.icc->note = n;
+	fpuprocsave(m->proc);
+	_pmcupdate(m);
+	mfence();
+	m->NIX.icc->fn = nil;
+	ready(m->proc);
+
+	mwait(&m->NIX.icc->fn);
+
+	if(m->NIX.icc->flushtlb)
+		acmmuswitch();
+	if(m->NIX.icc->fn != actrapret)
+		acsched();
+	DBG("actrap: ret\n");
+	memmove(u, m->proc->dbgreg, sizeof *u);
+	if(m->proc)
+		m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1);
+#endif
+}
+
+void
+acsyscall(void)
+{
+	panic("acsyscall");
+#if 0
+	Proc *p;
+
+	/*
+	 * If we saved the Ureg into m->proc->dbgregs,
+	 * There's nothing else we have to do.
+	 * Otherwise, we should m->proc->dbgregs = u;
+	 */
+	DBG("acsyscall: cpu%d\n", machp()->machno);
+
+	_pmcupdate(m);
+	p = m->proc;
+	p->actime1 = fastticks(nil);
+	m->syscall++;	/* would also count it in the TS core */
+	m->NIX.icc->rc = ICCSYSCALL;
+	m->cr2 = cr2get();
+	fpuprocsave(p);
+	_pmcupdate(m);
+	mfence();
+	m->NIX.icc->fn = nil;
+	ready(p);
+	/*
+	 * The next call is probably going to make us jmp
+	 * into user code, forgetting all our state in this
+	 * stack, upon the next syscall.
+	 * We don't nest calls in the current stack for too long.
+	 */
+	acsched();
+#endif
+}
+
+/*
+ * Called in AP core context, to return from system call.
+ */
+void
+acsysret(void)
+{
+panic("acsysret");
+#if 0
+	DBG("acsysret\n");
+	if(m->proc != nil)
+		m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1);
+	_acsysret();
+#endif
+}
+
+void
+dumpreg(void *u)
+{
+	print("reg is %p\n", u);
+	ndnr();
+}
+
+char *rolename[] =
+{
+	[NIXAC] = "AC",
+	[NIXTC] = "TC",
+	[NIXKC] = "KC",
+	[NIXXC] = "XC",
+};
+
+void
+acmodeset(int mode)
+{
+	switch(mode){
+	case NIXAC:
+	case NIXKC:
+	case NIXTC:
+	case NIXXC:
+		break;
+	default:
+		panic("acmodeset: bad mode %d", mode);
+	}
+	machp()->NIX.nixtype = mode;
+}
+
+void
+acinit(void)
+{
+	Mach *mp;
+	Proc *pp;
+
+	/*
+	 * Lower the priority of the apic to 0,
+	 * to accept interrupts.
+	 * Raise it later if needed to disable them.
+	 */
+	panic("apicpri");
+	//apicpri(0);
+
+	/*
+	 * Be sure a few  assembler assumptions still hold.
+	 * Someone moved m->stack and I had fun debugging...
+	 */
+	mp = 0;
+	pp = 0;
+	assert((uintptr)&mp->proc == 16);
+	assert((uintptr)&pp->dbgreg == 24);
+	assert((uintptr)&mp->stack == 24);
+}

+ 201 - 0
sys/src/9/riscv/arch.c

@@ -0,0 +1,201 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * EPISODE 12B
+ * How to recognise different types of trees from quite a long way away.
+ * NO. 1
+ * THE LARCH
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ureg.h"
+
+/* the rules are different for different compilers. We need to define up. */
+// Initialize it to force it into data.
+// That way, if we set them in assembly, they won't get zero'd by the bss init in main
+// N.B. There was an interesting hack in plan 9 c. You could grab up to two registers for your
+// program. In the case of Plan 9, m was r15, and up was r14. Very slick, and if there is a way to do
+// this in gcc or clang I don't know it. This also nicely handled per cpu info; R15/14 were always right for
+// your core and context.
+//Mach *m = (void *)0;
+
+int
+incref(Ref *r)
+{
+	int x;
+
+	lock(&r->l);
+	x = ++r->ref;
+	unlock(&r->l);
+	return x;
+}
+
+int
+decref(Ref *r)
+{
+	int x;
+
+	lock(&r->l);
+	x = --r->ref;
+	unlock(&r->l);
+	if(x < 0)
+		panic("decref pc=%#p", getcallerpc());
+
+	return x;
+}
+
+void fpuprocrestore(Proc *p)
+{
+	panic("fpuprocrestore");
+}
+
+void
+procrestore(Proc *p)
+{
+	uint64_t t;
+
+	if(p->kp)
+		return;
+	cycles(&t);
+	p->pcycles -= t;
+
+	fpuprocrestore(p);
+}
+
+void
+fpuprocsave(Proc *p)
+{
+	panic("fpuprocsave");
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ *  NB: the caller should mmuflushtlb after procsave().
+ *  procsave/procrestore don't touch the mmu, they
+ *  care about fpu, mostly.
+ */
+void
+procsave(Proc *p)
+{
+	uint64_t t;
+
+	cycles(&t);
+	p->pcycles += t;
+
+	fpuprocsave(p);
+}
+
+static void
+linkproc(void)
+{
+	Proc *up = externup();
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc dying", 0);
+}
+
+void
+kprocchild(Proc* p, void (*func)(void*), void* arg)
+{
+	/*
+	 * gotolabel() needs a word on the stack in
+	 * which to place the return PC used to jump
+	 * to linkproc().
+	 */
+	p->sched.pc = PTR2UINT(linkproc);
+	p->sched.sp = PTR2UINT(p->kstack+KSTACK-BY2SE);
+	p->sched.sp = STACKALIGN(p->sched.sp);
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+/*
+ *  put the processor in the halt state if we've no processes to run.
+ *  an interrupt will get us going again.
+ *  The boot TC in nix can't halt, because it must stay alert in
+ *  case an AC makes a handler process ready.
+ *  We should probably use mwait in that case.
+ */
+void
+idlehands(void)
+{
+/*	if(machp()->NIX.nixtype != NIXAC)
+	halt();*/
+}
+
+#if 0
+void
+ureg2gdb(Ureg *u, uintptr_t *g)
+{
+	g[GDB_AX] = u->ax;
+	g[GDB_BX] = u->bx;
+	g[GDB_CX] = u->cx;
+	g[GDB_DX] = u->dx;
+	g[GDB_SI] = u->si;
+	g[GDB_DI] = u->di;
+	g[GDB_BP] = u->bp;
+	g[GDB_SP] = u->sp;
+	g[GDB_R8] = u->r8;
+	g[GDB_R9] = u->r9;
+	g[GDB_R10] = u->r10;
+	g[GDB_R11] = u->r11;
+	g[GDB_R12] = u->r12;
+	g[GDB_R13] = u->r13;
+	g[GDB_R14] = u->r14;
+	g[GDB_R15] = u->r15;
+	g[GDB_PC] = u->ip;
+
+	/* it's weird, docs say 5 32-bit fields
+	 * but I count 4 if we pack these. Fix me
+	 */
+	g[GDB_PS] = 0; // u->PS;
+	g[GDB_CS] = 0; // u->CS;
+	g[GDB_SS] = 0; // u->SS;
+	g[GDB_DS] = 0; // u->DS;
+	g[GDB_ES] = 0; // u->ES;
+	g[GDB_FS] = 0; // u->FS;
+	g[GDB_GS] = 0; // u->GS;
+}
+
+void
+gdb2ureg(uintptr_t *g, Ureg *u)
+{
+	u->ax = g[GDB_AX];
+	u->bx = g[GDB_BX];
+	u->cx = g[GDB_CX];
+	u->dx = g[GDB_DX];
+	u->si = g[GDB_SI];
+	u->di = g[GDB_DI];
+	u->bp = g[GDB_BP];
+	u->sp = g[GDB_SP];
+	u->r8 = g[GDB_R8];
+	u->r9 = g[GDB_R9];
+	u->r10 = g[GDB_R10];
+	u->r11 = g[GDB_R11];
+	u->r12 = g[GDB_R12];
+	u->r13 = g[GDB_R13];
+	u->r14 = g[GDB_R14];
+	u->r15 = g[GDB_R15];
+	u->ip = g[GDB_PC];
+
+	/* it's weird but gdb seems to have no way to
+	 * express the sp. Hmm.
+	 */
+	u->flags = g[GDB_PS];
+	/* is there any point to this? */
+	u->cs = g[GDB_CS];
+	u->ss = g[GDB_SS];
+}
+#endif

+ 165 - 0
sys/src/9/riscv/archriscv.c

@@ -0,0 +1,165 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#undef DBG
+#define DBG iprint
+
+void
+cpuiddump(void)
+{
+	print("riscv\n");
+}
+
+int64_t
+archhz(void)
+{
+	return 1024*1024*1024ULL;
+}
+
+int
+archmmu(void)
+{
+
+	/*
+	 * Should the check for machp()->machno != 0 be here
+	 * or in the caller (mmuinit)?
+	 *
+	 * To do here:
+	 * check and enable Pse;
+	 * Pge; Nxe.
+	 */
+
+	/*
+	 * How many page sizes are there?
+	 * Always have 4*KiB, but need to check
+	 * configured correctly.
+	 */
+	assert(PGSZ == 4*KiB);
+
+	sys->pgszlg2[0] = 12;
+	sys->pgszmask[0] = (1<<12)-1;
+	sys->pgsz[0] = 1<<12;
+	sys->npgsz = 1;
+
+	sys->pgszlg2[1] = 21;
+	sys->pgszmask[1] = (1<<21)-1;
+	sys->pgsz[1] = 1<<21;
+	sys->npgsz = 2;
+
+		sys->pgszlg2[2] = 30;
+		sys->pgszmask[2] = (1<<30)-1;
+		sys->pgsz[2] = 1<<30;
+		sys->npgsz = 3;
+
+	return sys->npgsz;
+}
+
+static int
+fmtP(Fmt* f)
+{
+	uintmem pa;
+
+	pa = va_arg(f->args, uintmem);
+
+	if(f->flags & FmtSharp)
+		return fmtprint(f, "%#16.16llx", pa);
+
+	return fmtprint(f, "%llu", pa);
+}
+
+static int
+fmtL(Fmt* f)
+{
+	Mpl pl;
+
+	pl = va_arg(f->args, Mpl);
+
+	return fmtprint(f, "%#16.16llx", pl);
+}
+
+static int
+fmtR(Fmt* f)
+{
+	uint64_t r;
+
+	r = va_arg(f->args, uint64_t);
+
+	return fmtprint(f, "%#16.16llx", r);
+}
+
+/* virtual address fmt */
+static int
+fmtW(Fmt *f)
+{
+	uint64_t va;
+
+	va = va_arg(f->args, uint64_t);
+	return fmtprint(f, "%#llx=0x[%llx][%llx][%llx][%llx][%llx]", va,
+		PTLX(va, 3), PTLX(va, 2), PTLX(va, 1), PTLX(va, 0),
+		va & ((1<<PGSHFT)-1));
+
+}
+
+void
+archfmtinstall(void)
+{
+	/*
+	 * Architecture-specific formatting. Not as neat as they
+	 * could be (e.g. there's no defined type for a 'register':
+	 *	L - Mpl, mach priority level
+	 *	P - uintmem, physical address
+	 *	R - register
+	 * With a little effort these routines could be written
+	 * in a fairly architecturally-independent manner, relying
+	 * on the compiler to optimise-away impossible conditions,
+	 * and/or by exploiting the innards of the fmt library.
+	 */
+	fmtinstall('P', fmtP);
+	fmtinstall('L', fmtL);
+	fmtinstall('R', fmtR);
+	fmtinstall('W', fmtW);
+}
+
+void
+archidle(void)
+{
+panic("archidle"); //	halt();
+}
+
+void
+microdelay(int microsecs)
+{
+print("microdelay\n");
+/*
+	uint64_t r, t;
+
+	r = rdtsc();
+	for(t = r + (sys->cyclefreq*microsecs)/1000000ull; r < t; r = rdtsc())
+		;
+ */
+}
+
+void
+millidelay(int millisecs)
+{
+print("millidelay\n");
+/*
+	uint64_t r, t;
+
+	r = rdtsc();
+	for(t = r + (sys->cyclefreq*millisecs)/1000ull; r < t; r = rdtsc())
+		;
+ */
+}

+ 5 - 7
sys/src/9/riscv/build.json

@@ -18,14 +18,15 @@
 					"uint32_t kerndate = 1;"
 				],
 				"NoDev": [
+					"kbin",
+					"kprof"
+				],
+				"Dev": [
 					"arch",
 					"cap",
 					"cons",
 					"dup",
 					"env",
-					"ip",
-					"kbin",
-					"kprof",
 					"mnt",
 					"mntn",
 					"pipe",
@@ -34,11 +35,8 @@
 					"root",
 					"segment",
 					"srv",
-					"ssl",
-					"tls",
 					"uart",
-					"ws",
-					"zp"
+					"ws"
 				],
 				"NoIp": [
 					"tcp",

+ 11 - 2
sys/src/9/riscv/core.json

@@ -41,12 +41,11 @@
 			"inith.json"
 		],
 	    "MissingSourceFiles": [
-			"arch.c",
+		        "acore.c",
 			"archriscv.c",
 			"asm.c",
 		        "coreboot.c",
 			"devarch.c",
-			"map.c",
 			"memory.c",
 			"mmu.c",
 			"mp.c",
@@ -57,8 +56,18 @@
 		],
 		"SourceFiles": [
 			"asm.S",
+			"arch.c",
+		        "acore.c",
+			"archriscv.c",
 			"ctype.c",
+			"devarch.c",
 			"main.c",
+			"map.c",
+			"mmu.c",
+			"qmalloc.c",
+			"syscall.c",
+			"systab.c",
+		        "tcore.c",
 			"uart.c"
 		]
 	}

+ 598 - 0
sys/src/9/riscv/devarch.c

@@ -0,0 +1,598 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ureg.h"
+
+/* leave this for now; we might want to keep track of MMIO apart from memory. */
+typedef struct IOMap IOMap;
+struct IOMap
+{
+	IOMap	*next;
+	int	reserved;
+	char	tag[13];
+	uintptr_t	start;
+	uintptr_t	end;
+};
+
+static struct
+{
+	Lock l;
+	IOMap	*map;
+	IOMap	*free;
+	IOMap	maps[32];		// some initial free maps
+
+	QLock	ql;			// lock for reading map
+} iomap;
+
+enum {
+	Qdir = 0,
+	Qioalloc = 1,
+	Qiob,
+	Qiow,
+	Qiol,
+	Qbase,
+	Qmapram,
+
+	Qmax = 16,
+};
+
+typedef int32_t Rdwrfn(Chan*, void*, int32_t, int64_t);
+
+static Rdwrfn *readfn[Qmax];
+static Rdwrfn *writefn[Qmax];
+
+static Dirtab archdir[Qmax] = {
+	".",		{ Qdir, 0, QTDIR },	0,	0555,
+	"ioalloc",	{ Qioalloc, 0 },	0,	0444,
+	/* NOTE: kludge until we have real permissions. */
+	"iob",		{ Qiob, 0 },		0,	0660 | 6,
+	"iow",		{ Qiow, 0 },		0,	0660 | 6,
+	"iol",		{ Qiol, 0 },		0,	0660 | 6,
+	"mapram",	{ Qmapram, 0 },	0,	0444,
+};
+Lock archwlock;	/* the lock is only for changing archdir */
+int narchdir = Qbase;
+
+/*
+ * Add a file to the #P listing.  Once added, you can't delete it.
+ * You can't add a file with the same name as one already there,
+ * and you get a pointer to the Dirtab entry so you can do things
+ * like change the Qid version.  Changing the Qid path is disallowed.
+ */
+Dirtab*
+addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+{
+	int i;
+	Dirtab d;
+	Dirtab *dp;
+
+	memset(&d, 0, sizeof d);
+	strcpy(d.name, name);
+	d.perm = perm;
+
+	lock(&archwlock);
+	if(narchdir >= Qmax){
+		unlock(&archwlock);
+		return nil;
+	}
+
+	for(i=0; i<narchdir; i++)
+		if(strcmp(archdir[i].name, name) == 0){
+			unlock(&archwlock);
+			return nil;
+		}
+
+	d.qid.path = narchdir;
+	archdir[narchdir] = d;
+	readfn[narchdir] = rdfn;
+	writefn[narchdir] = wrfn;
+	dp = &archdir[narchdir++];
+	unlock(&archwlock);
+
+	return dp;
+}
+
+void
+ioinit(void)
+{
+	int i;
+
+	for(i = 0; i < nelem(iomap.maps)-1; i++)
+		iomap.maps[i].next = &iomap.maps[i+1];
+	iomap.maps[i].next = nil;
+	iomap.free = iomap.maps;
+}
+
+// Reserve a range to be ioalloced later.
+// This is in particular useful for exchangable cards, such
+// as pcmcia and cardbus cards.
+int
+ioreserve(int n, int size, int align, char *tag)
+{
+	panic("ioreserve");
+#if 0
+	IOMap *map, **l;
+	int i, port;
+
+	lock(&iomap.l);
+	// find a free port above 0x400 and below 0x1000
+	port = 0x400;
+	for(l = &iomap.map; *l; l = &(*l)->next){
+		map = *l;
+		if (map->start < 0x400)
+			continue;
+		i = map->start - port;
+		if(i > size)
+			break;
+		if(align > 0)
+			port = ((port+align-1)/align)*align;
+		else
+			port = map->end;
+	}
+	if(*l == nil){
+		unlock(&iomap.l);
+		return -1;
+	}
+	map = iomap.free;
+	if(map == nil){
+		print("ioalloc: out of maps");
+		unlock(&iomap.l);
+		return port;
+	}
+	iomap.free = map->next;
+	map->next = *l;
+	map->start = port;
+	map->end = port + size;
+	map->reserved = 1;
+	strncpy(map->tag, tag, sizeof(map->tag));
+	map->tag[sizeof(map->tag)-1] = 0;
+	*l = map;
+
+	archdir[0].qid.vers++;
+
+	unlock(&iomap.l);
+	return map->start;
+#endif
+	return 0;
+}
+
+//
+//	alloc some io port space and remember who it was
+//	alloced to.  if port < 0, find a free region.
+//
+int
+ioalloc(int port, int size, int align, char *tag)
+{
+	panic("ioalloc");
+#if 0
+	IOMap *map, **l;
+	int i;
+
+	lock(&iomap.l);
+	if(port < 0){
+		// find a free port above 0x400 and below 0x1000
+		port = 0x400;
+		for(l = &iomap.map; *l; l = &(*l)->next){
+			map = *l;
+			if (map->start < 0x400)
+				continue;
+			i = map->start - port;
+			if(i > size)
+				break;
+			if(align > 0)
+				port = ((port+align-1)/align)*align;
+			else
+				port = map->end;
+		}
+		if(*l == nil){
+			unlock(&iomap.l);
+			return -1;
+		}
+	} else {
+		// Only 64KB I/O space on the x86.
+		if((port+size) > 0x10000){
+			unlock(&iomap.l);
+			return -1;
+		}
+		// see if the space clashes with previously allocated ports
+		for(l = &iomap.map; *l; l = &(*l)->next){
+			map = *l;
+			if(map->end <= port)
+				continue;
+			if(map->reserved && map->start == port && map->end == port + size) {
+				map->reserved = 0;
+				unlock(&iomap.l);
+				return map->start;
+			}
+			if(map->start >= port+size)
+				break;
+			unlock(&iomap.l);
+			return -1;
+		}
+	}
+	map = iomap.free;
+	if(map == nil){
+		print("ioalloc: out of maps");
+		unlock(&iomap.l);
+		return port;
+	}
+	iomap.free = map->next;
+	map->next = *l;
+	map->start = port;
+	map->end = port + size;
+	strncpy(map->tag, tag, sizeof(map->tag));
+	map->tag[sizeof(map->tag)-1] = 0;
+	*l = map;
+
+	archdir[0].qid.vers++;
+
+	unlock(&iomap.l);
+	return map->start;
+#endif
+	return 0;
+}
+
+void
+iofree(int port)
+{
+	panic("iofree");
+#if 0
+	IOMap *map, **l;
+
+	lock(&iomap.l);
+	for(l = &iomap.map; *l; l = &(*l)->next){
+		if((*l)->start == port){
+			map = *l;
+			*l = map->next;
+			map->next = iomap.free;
+			iomap.free = map;
+			break;
+		}
+		if((*l)->start > port)
+			break;
+	}
+	archdir[0].qid.vers++;
+	unlock(&iomap.l);
+#endif
+}
+
+int
+iounused(int start, int end)
+{
+	IOMap *map;
+
+	for(map = iomap.map; map; map = map->next){
+		if(start >= map->start && start < map->end
+		|| start <= map->start && end > map->start)
+			return 0;
+	}
+	return 1;
+}
+
+#if 0
+static void
+checkport(int start, int end)
+{
+	if(iounused(start, end))
+		return;
+	error(Eperm);
+}
+#endif
+
+static Chan*
+archattach(char* spec)
+{
+	return devattach('P', spec);
+}
+
+Walkqid*
+archwalk(Chan* c, Chan *nc, char** name, int nname)
+{
+	return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
+}
+
+static int32_t
+archstat(Chan* c, uint8_t* dp, int32_t n)
+{
+	return devstat(c, dp, n, archdir, narchdir, devgen);
+}
+
+static Chan*
+archopen(Chan* c, int omode)
+{
+	return devopen(c, omode, archdir, narchdir, devgen);
+}
+
+static void
+archclose(Chan* c)
+{
+}
+
+enum
+{
+	Linelen= 31,
+};
+
+static int32_t
+archread(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	char *buf, *p;
+	//int port;
+	//uint16_t *sp;
+	//uint32_t *lp;
+	IOMap *map;
+	Rdwrfn *fn;
+
+	switch((uint32_t)c->qid.path){
+
+	case Qdir:
+		return devdirread(c, a, n, archdir, narchdir, devgen);
+
+#if 0
+// not now, not ever?
+	case Qiob:
+		port = offset;
+		checkport(offset, offset+n);
+		for(p = a; port < offset+n; port++)
+			*p++ = inb(port);
+		return n;
+
+	case Qiow:
+		if(n & 1)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		sp = a;
+		for(port = offset; port < offset+n; port += 2)
+			*sp++ = ins(port);
+		return n;
+
+	case Qiol:
+		if(n & 3)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		lp = a;
+		for(port = offset; port < offset+n; port += 4)
+			*lp++ = inl(port);
+		return n;
+
+#endif
+	case Qioalloc:
+		break;
+
+	default:
+		if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+
+	if((buf = malloc(n)) == nil)
+		error(Enomem);
+	p = buf;
+	n = n/Linelen;
+	offset = offset/Linelen;
+
+	switch((uint32_t)c->qid.path){
+	case Qioalloc:
+		lock(&iomap.l);
+		for(map = iomap.map; n > 0 && map != nil; map = map->next){
+			if(offset-- > 0)
+				continue;
+			sprint(p, "%#8lx %#8lx %-12.12s\n", map->start, map->end-1, map->tag);
+			p += Linelen;
+			n--;
+		}
+		unlock(&iomap.l);
+		break;
+	case Qmapram:
+/* shit */
+#ifdef NOTYET
+		for(mp = rmapram.map; mp->size; mp++){
+			/*
+			 * Up to MemMinMiB is already set up.
+			 */
+			if(mp->addr < MemMinMiB*MiB){
+				if(mp->addr+mp->size <= MemMinMiB*MiB)
+					continue;
+				pa = MemMinMiB*MiB;
+				size = mp->size - MemMinMiB*MiB-mp->addr;
+			}
+			else{
+				pa = mp->addr;
+				size = mp->size;
+			}
+		}
+#endif
+		error("Not yet");
+
+		break;
+	}
+
+	n = p - buf;
+	memmove(a, buf, n);
+	free(buf);
+
+	return n;
+}
+
+static int32_t
+archwrite(Chan *c, void *a, int32_t n, int64_t offset)
+{
+	//char *p;
+	//int port;
+	//uint16_t *sp;
+	//uint32_t *lp;
+	Rdwrfn *fn;
+
+	switch((uint32_t)c->qid.path){
+#if 0
+
+	case Qiob:
+		p = a;
+		checkport(offset, offset+n);
+		for(port = offset; port < offset+n; port++)
+			outb(port, *p++);
+		return n;
+
+	case Qiow:
+		if(n & 1)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		sp = a;
+		for(port = offset; port < offset+n; port += 2)
+			outs(port, *sp++);
+		return n;
+
+	case Qiol:
+		if(n & 3)
+			error(Ebadarg);
+		checkport(offset, offset+n);
+		lp = a;
+		for(port = offset; port < offset+n; port += 4)
+			outl(port, *lp++);
+		return n;
+
+#endif
+	default:
+		if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+			return fn(c, a, n, offset);
+		error(Eperm);
+		break;
+	}
+	return 0;
+}
+
+Dev archdevtab = {
+	.dc = 'P',
+	.name = "arch",
+
+	.reset = devreset,
+	.init = devinit,
+	.shutdown = devshutdown,
+	.attach = archattach,
+	.walk = archwalk,
+	.stat = archstat,
+	.open = archopen,
+	.create = devcreate,
+	.close = archclose,
+	.read = archread,
+	.bread = devbread,
+	.write = archwrite,
+	.bwrite = devbwrite,
+	.remove = devremove,
+	.wstat = devwstat,
+};
+
+/*
+ */
+void
+nop(void)
+{
+}
+
+void (*coherence)(void) = mfence;
+
+static int32_t
+cputyperead(Chan* c, void *a, int32_t n, int64_t off)
+{
+	return readstr(off, a, n, "riscv");
+}
+
+static int32_t
+numcoresread(Chan* c, void *a, int32_t n, int64_t off)
+{
+        char buf[8];
+        snprint(buf, 8, "%d\n", sys->nmach);
+        return readstr(off, a, n, buf);
+}
+
+void
+archinit(void)
+{
+	addarchfile("cputype", 0444, cputyperead, nil);
+	addarchfile("numcores", 0444, numcoresread, nil);
+}
+
+void
+archreset(void)
+{
+	panic("archreset");
+
+}
+
+/*
+ *  return value and speed of timer
+ */
+uint64_t
+fastticks(uint64_t* hz)
+{
+	if(hz != nil)
+		*hz = machp()->cpuhz;
+	return rdtsc();
+}
+
+uint32_t
+ms(void)
+{
+	return fastticks2us(rdtsc());
+}
+
+/*
+ *  set next timer interrupt
+ */
+void
+timerset(uint64_t x)
+{
+	panic("apictimerset");
+//	extern void apictimerset(uint64_t);
+
+//	apictimerset(x);
+}
+
+void
+cycles(uint64_t* t)
+{
+	panic("cycles");
+	*t = 0;
+}
+
+void
+delay(int millisecs)
+{
+	uint64_t r, t;
+
+	if(millisecs <= 0)
+		millisecs = 1;
+	cycles(&r);
+	for(t = r + (sys->cyclefreq*millisecs)/1000ull; r < t; cycles(&r))
+		;
+}
+
+/*
+ *  performance measurement ticks.  must be low overhead.
+ *  doesn't have to count over a second.
+ */
+uint64_t
+perfticks(void)
+{
+	uint64_t x;
+
+//	if(m->havetsc)
+		cycles(&x);
+//	else
+//		x = 0;
+	return x;
+}

File diff suppressed because it is too large
+ 1113 - 741
sys/src/9/riscv/encoding.h


+ 2 - 16
sys/src/9/riscv/fns.h

@@ -160,24 +160,10 @@ void*	vmap(uintptr_t, usize);
 void	vsvminit(int, int, Mach *);
 void	vunmap(void*, usize);
 
-extern uint64_t cr0get(void);
-extern void cr0put(uint64_t);
-extern uint64_t cr2get(void);
-extern uint64_t cr3get(void);
-extern void cr3put(uint64_t);
-extern uint64_t cr4get(void);
-extern void cr4put(uint64_t);
-extern void gdtget(void*);
-extern void gdtput(int, uint64_t, uint16_t);
+extern uint64_t rootget(void);
+extern void rootput(uintptr_t);
 extern void idtput(int, uint64_t);
-extern uint64_t rdmsr(uint32_t);
 extern uint64_t rdtsc(void);
-extern void trput(uint64_t);
-extern void wrmsr(uint32_t, uint64_t);
-
-// TODO(aki): once we figure this out, these will go.
-extern int infected_with_std(void);
-extern void disinfect_std(void);
 
 extern int islo(void);
 extern void spldone(void);

+ 245 - 0
sys/src/9/riscv/main.c

@@ -31,6 +31,12 @@ void die(char *s)
 	while (1);
 }
 
+void
+ndnr(void)
+{
+	die("ndnr");
+}
+
 static void puts(char * s, int n)
 {
 	while (n--)
@@ -44,6 +50,14 @@ static int x = 0x123456;
  * we don't have to do that. */
 static uint64_t m0stack[4096];
 static Mach m0;
+Sys asys, *sys=&asys;
+Conf conf;
+uintptr_t kseg0 = KZERO;
+char *cputype = "riscv";
+
+/* I forget where this comes from and I don't care just now. */
+uint32_t kerndate;
+
 
 /* general purpose hart startup. We call this via startmach.
  * When we enter here, the machp() function is usable.
@@ -103,3 +117,234 @@ main(uint32_t mbmagic, uint32_t mbaddress)
 	msg("got somewhere");
 	startmach(bsp, &m0);
 }
+
+/* stubs until we implement in assembly */
+int corecolor(int _)
+{
+	return -1;
+}
+
+Proc *externup(void)
+{
+	return machp()->externup;
+}
+
+void errstr(char *s, int i) {
+	panic("errstr");
+}
+
+void
+oprof_alarm_handler(Ureg *u)
+{
+	panic((char *)__func__);
+}
+
+void
+hardhalt(void)
+{
+	panic((char *)__func__);
+}
+
+uintmem
+physalloc(uint64_t _, int*__, void*___)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+void
+ureg2gdb(Ureg *u, uintptr_t *g)
+{
+	panic((char *)__func__);
+}
+
+int
+userureg(Ureg*u)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+uintptr_t
+userpc(Ureg*u)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+
+int tas32(void *_)
+{
+	panic("tas32");
+	return -1;
+}
+int      cas32(void*_, uint32_t __, uint32_t ___)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+void    exit(int _)
+{
+	panic((char *)__func__);
+}
+
+void fpunoted(void)
+{
+	panic((char *)__func__);
+}
+
+void fpunotify(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+void fpusysrfork(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+void kexit(Ureg*_)
+{
+	panic((char *)__func__);
+}
+
+char*
+seprintphysstats(char*_, char*__)
+{
+	return "NOT YET";
+}
+
+void
+reboot(void*_, void*__, int32_t ___)
+{
+	panic("reboot");
+}
+
+void fpusysprocsetup(Proc *_)
+{
+	panic((char *)__func__);
+}
+
+void sysrforkret(void)
+{
+	panic((char *)__func__);
+}
+
+void     fpusysrforkchild(Proc*_, Proc*__)
+{
+	panic((char *)__func__);
+}
+
+int
+fpudevprocio(Proc*p, void*v, int32_t _, uintptr_t __, int ___)
+{
+	panic((char *)__func__);
+	return -1;
+}
+
+void
+setregisters(Ureg*u, char*f, char*t, int amt)
+{
+	panic((char *)__func__);
+}
+
+uint64_t rdtsc(void)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+int islo(void)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+void mfence(void)
+{
+	panic((char *)__func__);
+}
+
+uintptr_t
+dbgpc(Proc*p)
+{
+	panic((char *)__func__);
+	return 0;
+}
+
+
+void dumpstack(void)
+{
+	panic((char *)__func__);
+}
+
+void
+dumpgpr(Ureg* ureg)
+{
+	panic((char *)__func__);
+}
+
+void
+setkernur(Ureg*u, Proc*p)
+{
+	panic((char *)__func__);
+}
+
+
+void
+physfree(uintmem data, uint64_t size)
+{
+	panic("physfree %p 0x%lx", data, size);
+}
+
+void
+stacksnippet(void)
+{
+	//Stackframe *stkfr;
+	kmprint(" stack:");
+//	for(stkfr = stackframe(); stkfr != nil; stkfr = stkfr->next)
+//		kmprint(" %c:%p", ktextaddr(stkfr->pc) ? 'k' : '?', ktextaddr(stkfr->pc) ? (stkfr->pc & 0xfffffff) : stkfr->pc);
+	kmprint("\n");
+}
+
+
+/* crap. */
+/* this should come from build but it's intimately tied in to VGA. Crap. */
+Physseg physseg[8];
+int nphysseg = 8;
+
+/* bringup -- remove asap. */
+void
+DONE(void)
+{
+	print("DONE\n");
+	//prflush();
+	delay(10000);
+	ndnr();
+}
+
+void
+HERE(void)
+{
+	print("here\n");
+	//prflush();
+	delay(5000);
+}
+
+/* The old plan 9 standby ... wave ... */
+
+/* Keep to debug trap.c */
+void wave(int c)
+{
+	testPrint(c);
+}
+
+void hi(char *s)
+{
+	if (! s)
+		s = "<NULL>";
+	while (*s)
+		wave(*s++);
+}
+

+ 61 - 0
sys/src/9/riscv/map.c

@@ -0,0 +1,61 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#define _KADDR(pa)	UINT2PTR(kseg0+((uintptr)(pa)))
+#define _PADDR(va)	PTR2UINT(((uintptr)(va)) - kseg0)
+
+#define TMFM		(64*MiB)
+
+int km, ku, k2;
+void*
+KADDR(uintptr_t pa)
+{
+	uint8_t* va;
+
+	va = UINT2PTR(pa);
+	if(pa < TMFM) {
+		km++;
+		return KSEG0+va;
+	}
+
+	assert(pa < KSEG2);
+	k2++;
+	return KSEG2+va;
+}
+
+uintmem
+PADDR(void* va)
+{
+	uintmem pa;
+
+	pa = PTR2UINT(va);
+	if(pa >= KSEG0 && pa < KSEG0+TMFM)
+		return pa-KSEG0;
+	if(pa > KSEG2)
+		return pa-KSEG2;
+
+	panic("PADDR: va %#p pa #%p @ %#p\n", va, _PADDR(va), getcallerpc());
+	return 0;
+}
+
+KMap*
+kmap(Page* page)
+{
+	DBG("kmap(%#llx) @ %#p: %#p %#p\n",
+		page->pa, getcallerpc(),
+		page->pa, KADDR(page->pa));
+
+	return KADDR(page->pa);
+}

+ 922 - 0
sys/src/9/riscv/mmu.c

@@ -0,0 +1,922 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "encoding.h"
+#include "mmu.h"
+
+/*
+ * To do:
+ *	PteNX;
+ *	mmukmapsync grot for >1 processor;
+ *	replace vmap with newer version (no PDMAP);
+ *	mmuptcopy (PteSHARED trick?);
+ *	calculate and map up to TMFM (conf crap);
+ */
+
+#define TMFM		(64*MiB)		/* kernel memory */
+
+#define PPN(x)		((x)&~(PGSZ-1))
+
+#if 0
+/* Print the page table structures to the console */
+void print_page_table(void) {
+	print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0);
+}
+#endif
+
+void flush_tlb(void)
+{
+	asm volatile("sfence.vm");
+}
+
+size_t pte_ppn(uint64_t pte)
+{
+	return pte >> PTE_PPN_SHIFT;
+}
+
+uint64_t ptd_create(uintptr_t ppn)
+{
+	return (ppn << PTE_PPN_SHIFT) | PTE_V;
+}
+
+uint64_t pte_create(uintptr_t ppn, int prot, int user)
+{
+	uint64_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V;
+	if (prot & PTE_W)
+		pte |= PTE_W;
+	if (prot & PTE_X)
+		pte |= PTE_X;
+	if (user)
+		pte |= PTE_U;
+	return pte;
+}
+
+void
+rootput(uintptr_t root)
+{
+	uintptr_t ptbr = root >> RISCV_PGSHIFT;
+	write_csr(sptbr, ptbr);
+
+}
+void
+mmuflushtlb(uint64_t u)
+{
+
+	machp()->tlbpurge++;
+	if(machp()->MMU.pml4->daddr){
+		memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
+		machp()->MMU.pml4->daddr = 0;
+	}
+	rootput((uintptr_t) machp()->MMU.pml4->pa);
+}
+
+void
+mmuflush(void)
+{
+	Proc *up = externup();
+	Mpl pl;
+
+	pl = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(pl);
+}
+
+static void
+mmuptpfree(Proc* proc, int clear)
+{
+	int l;
+	PTE *pte;
+	Page **last, *page;
+
+	for(l = 1; l < 4; l++){
+		last = &proc->MMU.mmuptp[l];
+		if(*last == nil)
+			continue;
+		for(page = *last; page != nil; page = page->next){
+//what is right here? 2 or 1?
+			if(l <= 2 && clear)
+				memset(UINT2PTR(page->va), 0, PTSZ);
+			pte = UINT2PTR(page->prev->va);
+			pte[page->daddr] = 0;
+			last = &page->next;
+		}
+		*last = proc->MMU.mmuptp[0];
+		proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l];
+		proc->MMU.mmuptp[l] = nil;
+	}
+
+	machp()->MMU.pml4->daddr = 0;
+}
+
+static void
+tabs(int n)
+{
+	int i;
+
+	for(i = 0; i < n; i++)
+		print("  ");
+}
+
+void
+dumpptepg(int lvl, uintptr_t pa)
+{
+	PTE *pte;
+	int tab, i;
+
+	tab = 4 - lvl;
+	pte = UINT2PTR(KADDR(pa));
+	for(i = 0; i < PTSZ/sizeof(PTE); i++)
+		if(pte[i] & PteP){
+			tabs(tab);
+			print("l%d %#p[%#05x]: %#llx\n", lvl, pa, i, pte[i]);
+
+			/* skip kernel mappings */
+			if((pte[i]&PteU) == 0){
+				tabs(tab+1);
+				print("...kern...\n");
+				continue;
+			}
+			if(lvl > 2)
+				dumpptepg(lvl-1, PPN(pte[i]));
+		}
+}
+
+void
+dumpmmu(Proc *p)
+{
+	int i;
+	Page *pg;
+
+	print("proc %#p\n", p);
+	for(i = 3; i > 0; i--){
+		print("mmuptp[%d]:\n", i);
+		for(pg = p->MMU.mmuptp[i]; pg != nil; pg = pg->next)
+			print("\tpg %#p = va %#llx pa %#llx"
+				" daddr %#lx next %#p prev %#p\n",
+				pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev);
+	}
+	print("pml4 %#llx\n", machp()->MMU.pml4->pa);
+	if(0)dumpptepg(4, machp()->MMU.pml4->pa);
+}
+
+void
+dumpmmuwalk(uint64_t addr)
+{
+	int l;
+	PTE *pte, *pml4;
+
+	pml4 = UINT2PTR(machp()->MMU.pml4->va);
+	if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+	if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0)
+		print("cpu%d: mmu l%d pte %#p = %llx\n", machp()->machno, l, pte, *pte);
+}
+
+static Page mmuptpfreelist;
+
+static Page*
+mmuptpalloc(void)
+{
+	void* va;
+	Page *page;
+
+	/*
+	 * Do not really need a whole Page structure,
+	 * but it makes testing this out a lot easier.
+	 * Could keep a cache and free excess.
+	 * Have to maintain any fiction for pexit?
+	 */
+	lock(&mmuptpfreelist.l);
+	if((page = mmuptpfreelist.next) != nil){
+		mmuptpfreelist.next = page->next;
+		mmuptpfreelist.ref--;
+		unlock(&mmuptpfreelist.l);
+
+		if(page->ref++ != 0)
+			panic("mmuptpalloc ref\n");
+		page->prev = page->next = nil;
+		memset(UINT2PTR(page->va), 0, PTSZ);
+
+		if(page->pa == 0)
+			panic("mmuptpalloc: free page with pa == 0");
+		return page;
+	}
+	unlock(&mmuptpfreelist.l);
+
+	if((page = malloc(sizeof(Page))) == nil){
+		print("mmuptpalloc Page\n");
+
+		return nil;
+	}
+	if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){
+		print("mmuptpalloc va\n");
+		free(page);
+
+		return nil;
+	}
+
+	page->va = PTR2UINT(va);
+	page->pa = PADDR(va);
+	page->ref = 1;
+
+	if(page->pa == 0)
+		panic("mmuptpalloc: no pa");
+	return page;
+}
+
+void
+mmuswitch(Proc* proc)
+{
+	PTE *pte;
+	Page *page;
+	Mpl pl;
+
+	pl = splhi();
+	if(proc->newtlb){
+		/*
+ 		 * NIX: We cannot clear our page tables if they are going to
+		 * be used in the AC
+		 */
+		if(proc->ac == nil)
+			mmuptpfree(proc, 1);
+		proc->newtlb = 0;
+	}
+
+	if(machp()->MMU.pml4->daddr){
+		memset(UINT2PTR(machp()->MMU.pml4->va), 0, machp()->MMU.pml4->daddr*sizeof(PTE));
+		machp()->MMU.pml4->daddr = 0;
+	}
+
+	pte = UINT2PTR(machp()->MMU.pml4->va);
+	for(page = proc->MMU.mmuptp[3]; page != nil; page = page->next){
+		pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP;
+		if(page->daddr >= machp()->MMU.pml4->daddr)
+			machp()->MMU.pml4->daddr = page->daddr+1;
+		page->prev = machp()->MMU.pml4;
+	}
+
+	//tssrsp0(machp(), STACKALIGN(PTR2UINT(proc->kstack+KSTACK)));
+	rootput((uintptr_t) machp()->MMU.pml4->pa);
+	splx(pl);
+}
+
+void
+mmurelease(Proc* proc)
+{
+	Page *page, *next;
+
+	mmuptpfree(proc, 0);
+
+	for(page = proc->MMU.mmuptp[0]; page != nil; page = next){
+		next = page->next;
+		if(--page->ref)
+			panic("mmurelease: page->ref %d\n", page->ref);
+		lock(&mmuptpfreelist.l);
+		page->next = mmuptpfreelist.next;
+		mmuptpfreelist.next = page;
+		mmuptpfreelist.ref++;
+		page->prev = nil;
+		unlock(&mmuptpfreelist.l);
+	}
+	if(proc->MMU.mmuptp[0] && pga.rend.l.p)
+		wakeup(&pga.rend);
+	proc->MMU.mmuptp[0] = nil;
+
+	panic("tssrsp0");
+	//tssrsp0(machp(), STACKALIGN(machp()->stack+MACHSTKSZ));
+	rootput(machp()->MMU.pml4->pa);
+}
+
+static void
+checkpte(uintmem ppn, void *a)
+{
+	int l;
+	PTE *pte, *pml4;
+	uint64_t addr;
+	char buf[240], *s;
+
+	addr = PTR2UINT(a);
+	pml4 = UINT2PTR(machp()->MMU.pml4->va);
+	pte = 0;
+	s = buf;
+	*s = 0;
+	if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	s = seprint(buf, buf+sizeof buf,
+		"check3: l%d pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	s = seprint(s, buf+sizeof buf,
+		"check2: l%d  pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	if(*pte&PtePS)
+		return;
+	if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
+		goto Panic;
+	seprint(s, buf+sizeof buf,
+		"check1: l%d  pte %#p = %llx\n",
+		l, pte, pte?*pte:~0);
+	return;
+Panic:
+
+	seprint(s, buf+sizeof buf,
+		"checkpte: l%d addr %#p ppn %#llx kaddr %#p pte %#p = %llx",
+		l, a, ppn, KADDR(ppn), pte, pte?*pte:~0);
+	print("%s\n", buf);
+	seprint(buf, buf+sizeof buf, "start %#llx unused %#llx"
+		" unmap %#llx end %#llx\n",
+		sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
+	panic("%s", buf);
+}
+
+
+static void
+mmuptpcheck(Proc *proc)
+{
+	int lvl, npgs, i;
+	Page *lp, *p, *pgs[16], *fp;
+	uint idx[16];
+
+	if(proc == nil)
+		return;
+	lp = machp()->MMU.pml4;
+	for(lvl = 3; lvl >= 2; lvl--){
+		npgs = 0;
+		for(p = proc->MMU.mmuptp[lvl]; p != nil; p = p->next){
+			for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next)
+				if(fp == p){
+					dumpmmu(proc);
+					panic("ptpcheck: using free page");
+				}
+			for(i = 0; i < npgs; i++){
+				if(pgs[i] == p){
+					dumpmmu(proc);
+					panic("ptpcheck: dup page");
+				}
+				if(idx[i] == p->daddr){
+					dumpmmu(proc);
+					panic("ptcheck: dup daddr");
+				}
+			}
+			if(npgs >= nelem(pgs))
+				panic("ptpcheck: pgs is too small");
+			idx[npgs] = p->daddr;
+			pgs[npgs++] = p;
+			if(lvl == 3 && p->prev != lp){
+				dumpmmu(proc);
+				panic("ptpcheck: wrong prev");
+			}
+		}
+
+	}
+	npgs = 0;
+	for(fp = proc->MMU.mmuptp[0]; fp != nil; fp = fp->next){
+		for(i = 0; i < npgs; i++)
+			if(pgs[i] == fp)
+				panic("ptpcheck: dup free page");
+		pgs[npgs++] = fp;
+	}
+}
+
+static uintmem
+pteflags(uint attr)
+{
+	uintmem flags;
+
+	flags = 0;
+	if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUSER|PTEUNCACHED|PTENOEXEC))
+		panic("mmuput: wrong attr bits: %#x\n", attr);
+	if(attr&PTEVALID)
+		flags |= PteP;
+	if(attr&PTEWRITE)
+		flags |= PteRW;
+	if(attr&PTEUSER)
+		flags |= PteU;
+	if(attr&PTEUNCACHED)
+		flags |= PtePCD;
+	if(attr&PTENOEXEC)
+		flags |= PteNX;
+	return flags;
+}
+
+void
+invlpg(uintptr_t _)
+{
+	panic("invlpage");
+}
+
+/*
+ * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping.
+ * For the user, it can be either 2*MiB or 1*GiB pages.
+ * For 2*MiB pages, we use three levels, not four.
+ * For 1*GiB pages, we use two levels.
+ */
+void
+mmuput(uintptr_t va, Page *pg, uint attr)
+{
+	Proc *up = externup();
+	int lvl, user, x, pgsz;
+	PTE *pte;
+	Page *page, *prev;
+	Mpl pl;
+	uintmem pa, ppn;
+	char buf[80];
+
+	ppn = 0;
+	pa = pg->pa;
+	if(pa == 0)
+		panic("mmuput: zero pa");
+
+	if(DBGFLG){
+		snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#x\n",
+			machp()->machno, up, va, pa, attr);
+		print("%s", buf);
+	}
+	assert(pg->pgszi >= 0);
+	pgsz = sys->pgsz[pg->pgszi];
+	if(pa & (pgsz-1))
+		panic("mmuput: pa offset non zero: %#llx\n", pa);
+	pa |= pteflags(attr);
+
+	pl = splhi();
+	if(DBGFLG)
+		mmuptpcheck(up);
+	user = (va < KZERO);
+	x = PTLX(va, 3);
+
+	pte = UINT2PTR(machp()->MMU.pml4->va);
+	pte += x;
+	prev = machp()->MMU.pml4;
+
+	for(lvl = 3; lvl >= 0; lvl--){
+		if(user){
+			if(pgsz == 2*MiB && lvl == 1)	 /* use 2M */
+				break;
+			if(pgsz == 1ull*GiB && lvl == 2)	/* use 1G */
+				break;
+		}
+		for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next)
+			if(page->prev == prev && page->daddr == x){
+				if(*pte == 0){
+					print("mmu: jmk and nemo had fun\n");
+					*pte = PPN(page->pa)|PteU|PteRW|PteP;
+				}
+				break;
+			}
+
+		if(page == nil){
+			if(up->MMU.mmuptp[0] == nil)
+				page = mmuptpalloc();
+			else {
+				page = up->MMU.mmuptp[0];
+				up->MMU.mmuptp[0] = page->next;
+			}
+			page->daddr = x;
+			page->next = up->MMU.mmuptp[lvl];
+			up->MMU.mmuptp[lvl] = page;
+			page->prev = prev;
+			*pte = PPN(page->pa)|PteU|PteRW|PteP;
+			if(lvl == 3 && x >= machp()->MMU.pml4->daddr)
+				machp()->MMU.pml4->daddr = x+1;
+		}
+		x = PTLX(va, lvl-1);
+
+		ppn = PPN(*pte);
+		if(ppn == 0)
+			panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte);
+
+		pte = UINT2PTR(KADDR(ppn));
+		pte += x;
+		prev = page;
+	}
+
+	if(DBGFLG)
+		checkpte(ppn, pte);
+	*pte = pa|PteU;
+
+	if(user)
+		switch(pgsz){
+		case 2*MiB:
+		case 1*GiB:
+			*pte |= PtePS;
+			break;
+		default:
+			panic("mmuput: user pages must be 2M or 1G");
+		}
+	splx(pl);
+
+	if(DBGFLG){
+		snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llx\n",
+			machp()->machno, up, pte, pte?*pte:~0);
+		print("%s", buf);
+	}
+
+	invlpg(va);			/* only if old entry valid? */
+}
+
+#if 0
+static Lock mmukmaplock;
+#endif
+static Lock vmaplock;
+
+#define PML4X(v)	PTLX((v), 3)
+#define PDPX(v)		PTLX((v), 2)
+#define PDX(v)		PTLX((v), 1)
+#define PTX(v)		PTLX((v), 0)
+
+int
+mmukmapsync(uint64_t va)
+{
+	USED(va);
+
+	return 0;
+}
+
+#if 0
+static PTE
+pdeget(uintptr_t va)
+{
+	PTE *pdp;
+
+	if(va < 0xffffffffc0000000ull)
+		panic("pdeget(%#p)", va);
+
+	pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+
+	return pdp[PDX(va)];
+}
+
+#endif
+/*
+ * Add kernel mappings for pa -> va for a section of size bytes.
+ * Called only after the va range is known to be unoccupied.
+ */
+static int
+pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
+{
+	uintptr_t pae;
+	PTE *pd, *pde, *pt, *pte;
+	int pdx, pgsz;
+	Page *pg;
+
+	pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+
+	for(pae = pa + size; pa < pae; pa += pgsz){
+		pdx = PDX(va);
+		pde = &pd[pdx];
+
+		/*
+		 * Check if it can be mapped using a big page,
+		 * i.e. is big enough and starts on a suitable boundary.
+		 * Assume processor can do it.
+		 */
+		if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
+			assert(*pde == 0);
+			*pde = pa|attr|PtePS|PteP;
+			pgsz = PGLSZ(1);
+		}
+		else{
+			if(*pde == 0){
+				pg = mmuptpalloc();
+				assert(pg != nil && pg->pa != 0);
+				*pde = pg->pa|PteRW|PteP;
+				memset((PTE*)(PDMAP+pdx*4096), 0, 4096);
+			}
+			assert(*pde != 0);
+
+			pt = (PTE*)(PDMAP+pdx*4096);
+			pte = &pt[PTX(va)];
+			assert(!(*pte & PteP));
+			*pte = pa|attr|PteP;
+			pgsz = PGLSZ(0);
+		}
+		va += pgsz;
+	}
+
+	return 0;
+}
+
+static int
+findhole(PTE* a, int n, int count)
+{
+	int have, i;
+
+	have = 0;
+	for(i = 0; i < n; i++){
+		if(a[i] == 0)
+			have++;
+		else
+			have = 0;
+		if(have >= count)
+			return i+1 - have;
+	}
+
+	return -1;
+}
+
+/*
+ * Look for free space in the vmap.
+ */
+static uintptr_t
+vmapalloc(usize size)
+{
+	int i, n, o;
+	PTE *pd, *pt;
+	int pdsz, ptsz;
+
+	pd = (PTE*)(PDMAP+PDX(PDMAP)*4096);
+	pd += PDX(VMAP);
+	pdsz = VMAPSZ/PGLSZ(1);
+
+	/*
+	 * Look directly in the PD entries if the size is
+	 * larger than the range mapped by a single entry.
+	 */
+	if(size >= PGLSZ(1)){
+		n = HOWMANY(size, PGLSZ(1));
+		if((o = findhole(pd, pdsz, n)) != -1)
+			return VMAP + o*PGLSZ(1);
+		return 0;
+	}
+
+	/*
+	 * Size is smaller than that mapped by a single PD entry.
+	 * Look for an already mapped PT page that has room.
+	 */
+	n = HOWMANY(size, PGLSZ(0));
+	ptsz = PGLSZ(0)/sizeof(PTE);
+	for(i = 0; i < pdsz; i++){
+		if(!(pd[i] & PteP) || (pd[i] & PtePS))
+			continue;
+
+		pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
+		if((o = findhole(pt, ptsz, n)) != -1)
+			return VMAP + i*PGLSZ(1) + o*PGLSZ(0);
+	}
+
+	/*
+	 * Nothing suitable, start using a new PD entry.
+	 */
+	if((o = findhole(pd, pdsz, 1)) != -1)
+		return VMAP + o*PGLSZ(1);
+
+	return 0;
+}
+
+/*
+ * KSEG0 maps low memory.
+ * KSEG2 maps almost all memory, but starting at an address determined
+ * by the address space map (see asm.c).
+ * Thus, almost everything in physical memory is already mapped, but
+ * there are things that fall in the gap
+ * (acpi tables, device memory-mapped registers, etc.)
+ * for those things, we also want to disable caching.
+ * vmap() is required to access them.
+ */
+void*
+vmap(uintptr_t pa, usize size)
+{
+	uintptr_t va;
+	usize o, sz;
+
+	DBG("vmap(%#p, %lu) pc=%#p\n", pa, size, getcallerpc());
+
+	if(machp()->machno != 0)
+		print("vmap: machp()->machno != 0");
+
+	/*
+	 * This is incomplete; the checks are not comprehensive
+	 * enough.
+	 * Sometimes the request is for an already-mapped piece
+	 * of low memory, in which case just return a good value
+	 * and hope that a corresponding vunmap of the address
+	 * will have the same address.
+	 * To do this properly will require keeping track of the
+	 * mappings; perhaps something like kmap, but kmap probably
+	 * can't be used early enough for some of the uses.
+	 */
+	if(pa+size < 1ull*MiB)
+		return KADDR(pa);
+	if(pa < 1ull*MiB)
+		return nil;
+
+	/*
+	 * Might be asking for less than a page.
+	 * This should have a smaller granularity if
+	 * the page size is large.
+	 */
+	o = pa & ((1<<PGSHFT)-1);
+	pa -= o;
+	sz = ROUNDUP(size+o, PGSZ);
+
+	if(pa == 0){
+		print("vmap(0, %lu) pc=%#p\n", size, getcallerpc());
+		return nil;
+	}
+	ilock(&vmaplock);
+	if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
+		iunlock(&vmaplock);
+		return nil;
+	}
+	iunlock(&vmaplock);
+
+	DBG("vmap(%#p, %lu) => %#p\n", pa+o, size, va+o);
+
+	return UINT2PTR(va + o);
+}
+
+void
+vunmap(void* v, usize size)
+{
+	uintptr_t va;
+
+	DBG("vunmap(%#p, %lu)\n", v, size);
+
+	if(machp()->machno != 0)
+		print("vmap: machp()->machno != 0");
+
+	/*
+	 * See the comments above in vmap.
+	 */
+	va = PTR2UINT(v);
+	if(va >= KZERO && va+size < KZERO+1ull*MiB)
+		return;
+
+	/*
+	 * Here will have to deal with releasing any
+	 * resources used for the allocation (e.g. page table
+	 * pages).
+	 */
+	DBG("vunmap(%#p, %lu)\n", v, size);
+}
+
+int
+mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
+	uint64_t (*alloc)(usize))
+{
+	int l;
+	uintmem pa;
+	PTE *pte;
+
+	Mpl pl;
+
+	pl = splhi();
+	if(DBGFLG > 1)
+		DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level);
+	pte = &pml4[PTLX(va, 3)];
+	for(l = 3; l >= 0; l--){
+		if(l == level)
+			break;
+		if(!(*pte & PteP)){
+			if(alloc == nil)
+				break;
+			pa = alloc(PTSZ);
+			if(pa == ~0)
+				return -1;
+			memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
+			*pte = pa|PteRW|PteP;
+		}
+		else if(*pte & PtePS)
+			break;
+		pte = UINT2PTR(KADDR(PPN(*pte)));
+		pte += PTLX(va, l-1);
+	}
+	*ret = pte;
+	splx(pl);
+	return l;
+}
+
+uintmem
+mmuphysaddr(uintptr_t va)
+{
+	int l;
+	PTE *pte;
+	uintmem mask, pa;
+
+	/*
+	 * Given a VA, find the PA.
+	 * This is probably not the right interface,
+	 * but will do as an experiment. Usual
+	 * question, should va be void* or uintptr?
+	 */
+	l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil);
+	DBG("physaddr: va %#p l %d\n", va, l);
+	if(l < 0)
+		return ~0;
+
+	mask = PGLSZ(l)-1;
+	pa = (*pte & ~mask) + (va & mask);
+
+	DBG("physaddr: l %d va %#p pa %#llx\n", l, va, pa);
+
+	return pa;
+}
+
+Page mach0pml4;
+
+void
+mmuinit(void)
+{
+	panic("mmuinit");
+#if 0
+	uint8_t *p;
+	Page *page;
+	uint64_t o, pa, r, sz;
+
+	archmmu();
+	DBG("mach%d: %#p pml4 %#p npgsz %d\n", machp()->machno, machp(), machp()->MMU.pml4, sys->npgsz);
+
+	if(machp()->machno != 0){
+		/* NIX: KLUDGE: Has to go when each mach is using
+		 * its own page table
+		 */
+		p = UINT2PTR(machp()->stack);
+		p += MACHSTKSZ;
+
+		memmove(p, UINT2PTR(mach0pml4.va), PTSZ);
+		machp()->MMU.pml4 = &machp()->MMU.pml4kludge;
+		machp()->MMU.pml4->va = PTR2UINT(p);
+		machp()->MMU.pml4->pa = PADDR(p);
+		machp()->MMU.pml4->daddr = mach0pml4.daddr;	/* # of user mappings in pml4 */
+
+		r = rdmsr(Efer);
+		r |= Nxe;
+		wrmsr(Efer, r);
+		rootput(machp()->MMU.pml4->pa);
+		DBG("m %#p pml4 %#p\n", machp(), machp()->MMU.pml4);
+		return;
+	}
+
+	page = &mach0pml4;
+	page->pa = read_csr(sptbr);
+	page->va = PTR2UINT(KADDR(page->pa));
+
+	machp()->MMU.pml4 = page;
+
+	r = rdmsr(Efer);
+	r |= Nxe;
+	wrmsr(Efer, r);
+
+	/*
+	 * Set up the various kernel memory allocator limits:
+	 * pmstart/pmend bound the unused physical memory;
+	 * vmstart/vmend bound the total possible virtual memory
+	 * used by the kernel;
+	 * vmunused is the highest virtual address currently mapped
+	 * and used by the kernel;
+	 * vmunmapped is the highest virtual address currently
+	 * mapped by the kernel.
+	 * Vmunused can be bumped up to vmunmapped before more
+	 * physical memory needs to be allocated and mapped.
+	 *
+	 * This is set up here so meminit can map appropriately.
+	 */
+	o = sys->pmstart;
+	sz = ROUNDUP(o, 4*MiB) - o;
+	pa = asmalloc(0, sz, 1, 0);
+	if(pa != o)
+		panic("mmuinit: pa %#llx memstart %#llx\n", pa, o);
+	sys->pmstart += sz;
+
+	sys->vmstart = KSEG0;
+	sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB);
+	sys->vmunmapped = sys->vmstart + o + sz;
+	sys->vmend = sys->vmstart + TMFM;
+
+	print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n",
+		sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend);
+
+	/*
+	 * Set up the map for PD entry access by inserting
+	 * the relevant PDP entry into the PD. It's equivalent
+	 * to PADDR(sys->pd)|PteRW|PteP.
+	 *
+	 */
+	sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA);
+	print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]);
+	assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP));
+
+
+	dumpmmuwalk(KZERO);
+
+	mmuphysaddr(PTR2UINT(end));
+#endif
+}

+ 179 - 0
sys/src/9/riscv/mmu.h

@@ -0,0 +1,179 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/* Cr0 */
+#define Pe		0x00000001		/* Protected Mode Enable */
+#define Mp		0x00000002		/* Monitor Coprocessor */
+#define Em		0x00000004		/* Emulate Coprocessor */
+#define Ts		0x00000008		/* Task Switched */
+#define Et		0x00000010		/* Extension Type */
+#define Ne		0x00000020		/* Numeric Error  */
+#define Wp		0x00010000		/* Write Protect */
+#define Am		0x00040000		/* Alignment Mask */
+#define Nw		0x20000000		/* Not Writethrough */
+#define Cd		0x40000000		/* Cache Disable */
+#define Pg		0x80000000		/* Paging Enable */
+
+/* Cr3 */
+#define Pwt		0x00000008		/* Page-Level Writethrough */
+#define Pcd		0x00000010		/* Page-Level Cache Disable */
+
+/* Cr4 */
+#define Vme		0x00000001		/* Virtual-8086 Mode Extensions */
+#define Pvi		0x00000002		/* Protected Mode Virtual Interrupts */
+#define Tsd		0x00000004		/* Time-Stamp Disable */
+#define De		0x00000008		/* Debugging Extensions */
+#define Pse		0x00000010		/* Page-Size Extensions */
+#define Pae		0x00000020		/* Physical Address Extension */
+#define Mce		0x00000040		/* Machine Check Enable */
+#define Pge		0x00000080		/* Page-Global Enable */
+#define Pce		0x00000100		/* Performance Monitoring Counter Enable */
+#define Osfxsr		0x00000200		/* FXSAVE/FXRSTOR Support */
+#define Osxmmexcpt	0x00000400		/* Unmasked Exception Support */
+
+/* Rflags */
+#define Cf		0x00000001		/* Carry Flag */
+#define Pf		0x00000004		/* Parity Flag */
+#define Af		0x00000010		/* Auxiliary Flag */
+#define Zf		0x00000040		/* Zero Flag */
+#define Sf		0x00000080		/* Sign Flag */
+#define Tf		0x00000100		/* Trap Flag */
+#define If		0x00000200		/* Interrupt Flag */
+#define Df		0x00000400		/* Direction Flag */
+#define Of		0x00000800		/* Overflow Flag */
+#define Iopl0		0x00000000		/* I/O Privilege Level */
+#define Iopl1		0x00001000
+#define Iopl2		0x00002000
+#define Iopl3		0x00003000
+#define Nt		0x00004000		/* Nested Task */
+#define Rf		0x00010000		/* Resume Flag */
+#define Vm		0x00020000		/* Virtual-8086 Mode */
+#define Ac		0x00040000		/* Alignment Check */
+#define Vif		0x00080000		/* Virtual Interrupt Flag */
+#define Vip		0x00100000		/* Virtual Interrupt Pending */
+#define Id		0x00200000		/* ID Flag */
+
+/* MSRs */
+#define PerfEvtbase	0xc0010000		/* Performance Event Select */
+#define PerfCtrbase	0xc0010004		/* Performance Counters */
+
+#define Efer		0xc0000080		/* Extended Feature Enable */
+#define Star		0xc0000081		/* Legacy Target IP and [CS]S */
+#define Lstar		0xc0000082		/* Long Mode Target IP */
+#define Cstar		0xc0000083		/* Compatibility Target IP */
+#define Sfmask		0xc0000084		/* SYSCALL Flags Mask */
+#define FSbase		0xc0000100		/* 64-bit FS Base Address */
+#define GSbase		0xc0000101		/* 64-bit GS Base Address */
+#define KernelGSbase	0xc0000102		/* SWAPGS instruction */
+
+/* Efer */
+#define Sce		0x00000001		/* System Call Extension */
+#define Lme		0x00000100		/* Long Mode Enable */
+#define Lma		0x00000400		/* Long Mode Active */
+#define Nxe		0x00000800		/* No-Execute Enable */
+#define Svme		0x00001000		/* SVM Extension Enable */
+#define Ffxsr		0x00004000		/* Fast FXSAVE/FXRSTOR */
+
+/* PML4E/PDPE/PDE/PTE */
+#define PteP		0x0000000000000001	/* Present */
+#define PteRW		0x0000000000000002	/* Read/Write */
+#define PteU		0x0000000000000004	/* User/Supervisor */
+#define PtePWT		0x0000000000000008	/* Page-Level Write Through */
+#define PtePCD		0x0000000000000010	/* Page Level Cache Disable */
+#define PteA		0x0000000000000020	/* Accessed */
+#define PteD		0x0000000000000040	/* Dirty */
+#define PtePS		0x0000000000000080	/* Page Size */
+#define Pte4KPAT	PtePS			/* PTE PAT */
+#define PteG		0x0000000000000100	/* Global */
+#define Pte2MPAT	0x0000000000001000	/* PDE PAT */
+#define Pte1GPAT	Pte2MPAT		/* PDPE PAT */
+#define PteNX		0x8000000000000000	/* No Execute */
+
+/* Exceptions */
+#define IdtDE		0			/* Divide-by-Zero Error */
+#define IdtDB		1			/* Debug */
+#define IdtNMI		2			/* Non-Maskable-Interrupt */
+#define IdtBP		3			/* Breakpoint */
+#define IdtOF		4			/* Overflow */
+#define IdtBR		5			/* Bound-Range */
+#define IdtUD		6			/* Invalid-Opcode */
+#define IdtNM		7			/* Device-Not-Available */
+#define IdtDF		8			/* Double-Fault */
+#define Idt09		9			/* unsupported */
+#define IdtTS		10			/* Invalid-TSS */
+#define IdtNP		11			/* Segment-Not-Present */
+#define IdtSS		12			/* Stack */
+#define IdtGP		13			/* General-Protection */
+#define IdtPF		14			/* Page-Fault */
+#define Idt0F		15			/* reserved */
+#define IdtMF		16			/* x87 FPE-Pending */
+#define IdtAC		17			/* Alignment-Check */
+#define IdtMC		18			/* Machine-Check */
+#define IdtXF		19			/* SIMD Floating-Point */
+
+/* Vestigial Segmented Virtual Memory */
+#define SdISTM		0x0000000700000000	/* Interrupt Stack Table Mask */
+#define SdA		0x0000010000000000	/* Accessed */
+#define SdR		0x0000020000000000	/* Readable (Code) */
+#define SdW		0x0000020000000000	/* Writeable (Data) */
+#define SdE		0x0000040000000000	/* Expand Down */
+#define SdaTSS		0x0000090000000000	/* Available TSS */
+#define SdbTSS		0x00000b0000000000	/* Busy TSS */
+#define SdCG		0x00000c0000000000	/* Call Gate */
+#define SdIG		0x00000e0000000000	/* Interrupt Gate */
+#define SdTG		0x00000f0000000000	/* Trap Gate */
+#define SdCODE		0x0000080000000000	/* Code/Data */
+#define SdS		0x0000100000000000	/* System/User */
+#define SdDPL0		0x0000000000000000	/* Descriptor Privilege Level */
+#define SdDPL1		0x0000200000000000
+#define SdDPL2		0x0000400000000000
+#define SdDPL3		0x0000600000000000
+#define SdP		0x0000800000000000	/* Present */
+#define Sd4G		0x000f00000000ffff	/* 4G Limit */
+#define SdL		0x0020000000000000	/* Long Attribute */
+#define SdD		0x0040000000000000	/* Default Operand Size */
+#define SdG		0x0080000000000000	/* Granularity */
+
+/* Performance Counter Configuration */
+#define PeHo		0x0000020000000000	/* Host only */
+#define PeGo		0x0000010000000000	/* Guest only */
+#define PeEvMskH	0x0000000f00000000	/* Event mask H */
+#define PeCtMsk		0x00000000ff000000	/* Counter mask */
+#define PeInMsk		0x0000000000800000	/* Invert mask */
+#define PeCtEna		0x0000000000400000	/* Counter enable */
+#define PeInEna		0x0000000000100000	/* Interrupt enable */
+#define PePnCtl		0x0000000000080000	/* Pin control */
+#define PeEdg		0x0000000000040000	/* Edge detect */
+#define PeOS		0x0000000000020000	/* OS mode */
+#define PeUsr		0x0000000000010000	/* User mode */
+#define PeUnMsk		0x000000000000ff00	/* Unit Mask */
+#define PeEvMskL	0x00000000000000ff	/* Event Mask L */
+
+#define PeEvMsksh	32			/* Event mask shift */
+
+/* Segment Selector */
+#define SsRPL0		0x0000			/* Requestor Privilege Level */
+#define SsRPL1		0x0001
+#define SsRPL2		0x0002
+#define SsRPL3		0x0003
+#define SsTIGDT		0x0000			/* GDT Table Indicator  */
+#define SsTILDT		0x0004			/* LDT Table Indicator */
+#define SsSIM		0xfff8			/* Selector Index Mask */
+
+#define SSEL(si, tirpl)	(((si)<<3)|(tirpl))	/* Segment Selector */
+
+#define SiNULL		0			/* NULL selector index */
+#define SiCS		1			/* CS selector index */
+#define SiDS		2			/* DS selector index */
+#define SiU32CS		3			/* User CS selector index */
+#define SiUDS		4			/* User DS selector index */
+#define SiUCS		5			/* User CS selector index */
+#define SiFS		6			/* FS selector index */
+#define SiGS		7			/* GS selector index */
+#define SiTSS		8			/* TSS selector index */

+ 683 - 0
sys/src/9/riscv/qmalloc.c

@@ -0,0 +1,683 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * malloc
+ *
+ *	Uses Quickfit (see SIGPLAN Notices October 1988)
+ *	with allocator from Kernighan & Ritchie
+ *
+ * This is a placeholder.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include	<pool.h>
+
+typedef double Align;
+typedef union Header Header;
+typedef struct Qlist Qlist;
+
+union Header {
+	struct {
+		Header*	next;
+		uint	size;
+	} s;
+	Align	al;
+};
+
+struct Qlist {
+	Lock	lk;
+	Header*	first;
+
+	uint	nalloc;
+};
+
+enum {
+	Unitsz		= sizeof(Header),	/* 16 bytes on amd64 */
+};
+
+#define	NUNITS(n)	(HOWMANY(n, Unitsz) + 1)
+#define	NQUICK		((512/Unitsz)+1)	/* 33 on amd64 */
+
+static	Qlist	quicklist[NQUICK+1];
+static	Header	misclist;
+static	Header	*rover;
+static	unsigned tailsize;
+static	unsigned tailnunits;
+static	Header	*tailbase;
+static	Header	*tailptr;
+static	Header	checkval;
+static	int	morecore(unsigned);
+
+enum
+{
+	QSmalign = 0,
+	QSmalignquick,
+	QSmalignrover,
+	QSmalignfront,
+	QSmalignback,
+	QSmaligntail,
+	QSmalignnottail,
+	QSmalloc,
+	QSmallocrover,
+	QSmalloctail,
+	QSfree,
+	QSfreetail,
+	QSfreequick,
+	QSfreenext,
+	QSfreeprev,
+	QSmax
+};
+
+static	void	qfreeinternal(void*);
+static	int	qstats[QSmax];
+static	char*	qstatstr[QSmax] = {
+[QSmalign] = "malign",
+[QSmalignquick] = "malignquick",
+[QSmalignrover] = "malignrover",
+[QSmalignfront] = "malignfront",
+[QSmalignback] = "malignback",
+[QSmaligntail] = "maligntail",
+[QSmalignnottail] = "malignnottail",
+[QSmalloc] = "malloc",
+[QSmallocrover] = "mallocrover",
+[QSmalloctail] = "malloctail",
+[QSfree] = "free",
+[QSfreetail] = "freetail",
+[QSfreequick] = "freequick",
+[QSfreenext] = "freenext",
+[QSfreeprev] = "freeprev",
+};
+
+static	Lock		mainlock;
+
+#define	MLOCK		ilock(&mainlock)
+#define	MUNLOCK		iunlock(&mainlock)
+#define QLOCK(l)	ilock(l)
+#define QUNLOCK(l)	iunlock(l)
+
+#define	tailalloc(p, n)	((p)=tailptr, tailsize -= (n), tailptr+=(n),\
+			 (p)->s.size=(n), (p)->s.next = &checkval)
+
+#define ISPOWEROF2(x)	(/*((x) != 0) && */!((x) & ((x)-1)))
+#define ALIGNHDR(h, a)	(Header*)((((uintptr)(h))+((a)-1)) & ~((a)-1))
+
+/*
+ * From libc malloc.c to *draw devices
+ */
+
+typedef struct Private	Private;
+struct Private {
+	Lock		lk;
+	char*		end;
+	char		msg[256];	/* a rock for messages to be printed at unlock */
+};
+
+/*
+ * Experiment: per-core quick lists.
+ * change quicklist to be
+ * static	Qlist	quicklist[MACHMAX][NQUICK+1];
+ * and define QLIST to be quicklist[machp()->machno]
+ *
+ * using quicklist[machp()->machno] runs out of memory soon.
+ * using quicklist[machp()->machno%4] yields times worse than using quicklist!
+ */