Browse Source

riscv: add in asm and mmu support

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Ronald G. Minnich 7 years ago
parent
commit
a9361397d1

+ 446 - 0
sys/src/9/riscv/asm.c

@@ -0,0 +1,446 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * To do:
+ *	find a purpose for this...
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mmu.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+/*
+ * Address Space Map.
+ * Low duty cycle.
+ */
+typedef struct Asm Asm;
+typedef struct Asm {
+	uintmem	addr;
+	uintmem	size;
+	int	type;
+	int	location;
+	Asm*	next;
+} Asm;
+
+enum {
+	AsmNONE		= 0,
+	AsmMEMORY	= 1,
+	AsmRESERVED	= 2,
+	AsmACPIRECLAIM	= 3,
+	AsmACPINVS	= 4,
+
+	AsmDEV		= 5,
+};
+
+static Lock asmlock;
+static Asm asmarray[64] = {
+	{ 0, ~0, AsmNONE, 0, },
+};
+static int asmindex = 1;
+static Asm* asmlist = &asmarray[0];
+static Asm* asmfreelist;
+
+/*static*/ void
+asmdump(void)
+{
+	Asm* assem;
+
+	DBG("asm: index %d:\n", asmindex);
+	for(assem = asmlist; assem != nil; assem = assem->next){
+		DBG(" %#P %#P %d (%P)\n",
+			assem->addr, assem->addr+assem->size,
+			assem->type, assem->size);
+	}
+}
+
+static Asm*
+asmnew(uintmem addr, uintmem size, int type)
+{
+	Asm * assem;
+
+	if(asmfreelist != nil){
+		assem = asmfreelist;
+		asmfreelist = assem->next;
+		assem->next = nil;
+	}
+	else{
+		if(asmindex >= nelem(asmarray))
+			return nil;
+		assem = &asmarray[asmindex++];
+	}
+	assem->addr = addr;
+	assem->size = size;
+	assem->type = type;
+
+	return assem;
+}
+
+int
+asmfree(uintmem addr, uintmem size, int type)
+{
+	Asm *np, *pp, **ppp;
+
+	DBG("asmfree: %#P@%#P, type %d\n", size, addr, type);
+	if(size == 0)
+		return 0;
+
+	lock(&asmlock);
+
+	/*
+	 * Find either a map entry with an address greater
+	 * than that being returned, or the end of the map.
+	 */
+	pp = nil;
+	ppp = &asmlist;
+	for(np = *ppp; np != nil && np->addr <= addr; np = np->next){
+		pp = np;
+		ppp = &np->next;
+	}
+
+	if((pp != nil && pp->addr+pp->size > addr)
+	|| (np != nil && addr+size > np->addr)){
+		unlock(&asmlock);
+		DBG("asmfree: overlap %#Px@%#P, type %d\n", size, addr, type);
+		return -1;
+	}
+
+	if(pp != nil && pp->type == type && pp->addr+pp->size == addr){
+		pp->size += size;
+		if(np != nil && np->type == type && addr+size == np->addr){
+			pp->size += np->size;
+			pp->next = np->next;
+
+			np->next = asmfreelist;
+			asmfreelist = np;
+		}
+
+		unlock(&asmlock);
+		return 0;
+	}
+
+	if(np != nil && np->type == type && addr+size == np->addr){
+		np->addr -= size;
+		np->size += size;
+
+		unlock(&asmlock);
+		return 0;
+	}
+
+	if((pp = asmnew(addr, size, type)) == nil){
+		unlock(&asmlock);
+		DBG("asmfree: losing %#P@%#P, type %d\n", size, addr, type);
+		return -1;
+	}
+	*ppp = pp;
+	pp->next = np;
+
+	unlock(&asmlock);
+
+	return 0;
+}
+
+uintmem
+asmalloc(uintmem addr, uintmem size, int type, int align)
+{
+	uintmem a, o;
+	Asm *assem, *pp;
+
+	DBG("asmalloc: %#P@%#P, type %d\n", size, addr, type);
+	lock(&asmlock);
+	for(pp = nil, assem = asmlist; assem != nil; pp = assem, assem = assem->next){
+		if(assem->type != type)
+			continue;
+		a = assem->addr;
+
+		if(addr != 0){
+			/*
+			 * A specific address range has been given:
+			 *   if the current map entry is greater then
+			 *   the address is not in the map;
+			 *   if the current map entry does not overlap
+			 *   the beginning of the requested range then
+			 *   continue on to the next map entry;
+			 *   if the current map entry does not entirely
+			 *   contain the requested range then the range
+			 *   is not in the map.
+			 * The comparisons are strange to prevent
+			 * overflow.
+			 */
+			if(a > addr)
+				break;
+			if(assem->size < addr - a)
+				continue;
+			if(addr - a > assem->size - size)
+				break;
+			a = addr;
+		}
+
+		if(align > 0)
+			a = ((a+align-1)/align)*align;
+		if(assem->addr+assem->size-a < size)
+			continue;
+
+		o = assem->addr;
+		assem->addr = a+size;
+		assem->size -= a-o+size;
+		if(assem->size == 0){
+			if(pp != nil)
+				pp->next = assem->next;
+			assem->next = asmfreelist;
+			asmfreelist = assem;
+		}
+
+		unlock(&asmlock);
+		if(o != a)
+			asmfree(o, a-o, type);
+		return a;
+	}
+	unlock(&asmlock);
+
+	return 0;
+}
+
+static void
+asminsert(uintmem addr, uintmem size, int type)
+{
+	if(type == AsmNONE || asmalloc(addr, size, AsmNONE, 0) == 0)
+		return;
+	if(asmfree(addr, size, type) == 0)
+		return;
+	asmfree(addr, size, 0);
+}
+
+void
+asminit(void)
+{
+	sys->pmstart = ROUNDUP(PADDR(end), PGSZ);
+	sys->pmend = sys->pmstart;
+	asmalloc(0, sys->pmstart, AsmNONE, 0);
+}
+
+/*
+ * Notes:
+ * asmmapinit and asmmodinit called from multiboot;
+ * subject to change; the numerology here is probably suspect.
+ * Multiboot defines the alignment of modules as 4096.
+ */
+void
+asmmapinit(uintmem addr, uintmem size, int type)
+{
+	switch(type){
+	default:
+		asminsert(addr, size, type);
+		break;
+	case AsmMEMORY:
+		/*
+		 * Adjust things for the peculiarities of this
+		 * architecture.
+		 * Sys->pmend is the largest physical memory address found,
+		 * there may be gaps between it and sys->pmstart, the range
+		 * and how much of it is occupied, might need to be known
+		 * for setting up allocators later.
+		 */
+		if(addr < 1*MiB || addr+size < sys->pmstart)
+			break;
+		if(addr < sys->pmstart){
+			size -= sys->pmstart - addr;
+			addr = sys->pmstart;
+		}
+		asminsert(addr, size, type);
+		sys->pmoccupied += size;
+		if(addr+size > sys->pmend)
+			sys->pmend = addr+size;
+		break;
+	}
+}
+
+void
+asmmodinit(uint32_t start, uint32_t end, char* s)
+{
+	DBG("asmmodinit: %#x -> %#x: <%s> %#x\n",
+		start, end, s, ROUNDUP(end, 4096));
+
+	if(start < sys->pmstart)
+		return;
+	end = ROUNDUP(end, 4096);
+	if(end > sys->pmstart){
+		asmalloc(sys->pmstart, end-sys->pmstart, AsmNONE, 0);
+		sys->pmstart = end;
+	}
+}
+
+static int npg[4];
+
+void*
+asmbootalloc(usize size)
+{
+	uintptr_t va;
+
+	assert(sys->vmunused+size <= sys->vmunmapped);
+	va = sys->vmunused;
+	sys->vmunused += size;
+	memset(UINT2PTR(va), 0, size);
+	return UINT2PTR(va);
+}
+
+static PTE
+asmwalkalloc(usize size)
+{
+	uintmem pa;
+
+	assert(size == PTSZ && sys->vmunused+size <= sys->vmunmapped);
+
+	if(!ALIGNED(sys->vmunused, PTSZ)){
+		DBG("asmwalkalloc: %llu wasted\n",
+			ROUNDUP(sys->vmunused, PTSZ) - sys->vmunused);
+		sys->vmunused = ROUNDUP(sys->vmunused, PTSZ);
+	}
+	if((pa = mmuphysaddr(sys->vmunused)) != ~0)
+		sys->vmunused += size;
+
+	return pa;
+}
+
+// still needed so iallocb gets initialised correctly. needs to go.
+#define ConfCrap
+
+void
+asmmeminit(void)
+{
+	int i, l;
+	Asm* assem;
+	PTE *pte, *pml4;
+	uintptr va;
+	uintmem hi, lo, mem, nextmem, pa;
+#ifdef ConfCrap
+	int cx;
+#endif /* ConfCrap */
+
+	assert(!((sys->vmunmapped|sys->vmend) & sys->pgszmask[1]));
+
+	if((pa = mmuphysaddr(sys->vmunused)) == ~0)
+		panic("asmmeminit 1");
+	pa += sys->vmunmapped - sys->vmunused;
+	mem = asmalloc(pa, sys->vmend - sys->vmunmapped, 1, 0);
+	if(mem != pa)
+		panic("asmmeminit 2");
+	DBG("pa %#llx mem %#llx\n", pa, mem);
+
+	/* assume already 2MiB aligned*/
+	assert(ALIGNED(sys->vmunmapped, 2*MiB));
+	pml4 = UINT2PTR(machp()->MMU.pml4->va);
+	while(sys->vmunmapped < sys->vmend){
+		l = mmuwalk(pml4, sys->vmunmapped, 1, &pte, asmwalkalloc);
+		DBG("%#p l %d\n", sys->vmunmapped, l);
+		*pte = pa|PteRW|PteP;
+		sys->vmunmapped += 2*MiB;
+		pa += 2*MiB;
+	}
+
+#ifdef ConfCrap
+	cx = 0;
+#endif /* ConfCrap */
+	for(assem = asmlist; assem != nil; assem = assem->next){
+		DBG("asm: addr %#P end %#P type %d size %P\n",
+			assem->addr, assem->addr+assem->size,
+			assem->type, assem->size);
+		if((assem->type != AsmMEMORY)&&(assem->type != AsmRESERVED)) {
+			DBG("Skipping, it's not AsmMEMORY or AsmRESERVED\n");
+			continue;
+		}
+		va = KSEG2+assem->addr;
+		DBG("asm: addr %#P end %#P type %d size %P\n",
+			assem->addr, assem->addr+assem->size,
+			assem->type, assem->size);
+
+		lo = assem->addr;
+		hi = assem->addr+assem->size;
+		/* Convert a range into pages */
+		for(mem = lo; mem < hi; mem = nextmem){
+			nextmem = (mem + PGLSZ(0)) & ~sys->pgszmask[0];
+
+			/* Try large pages first */
+			for(i = sys->npgsz - 1; i >= 0; i--){
+				if((mem & sys->pgszmask[i]) != 0)
+					continue;
+				if(mem + PGLSZ(i) > hi)
+					continue;
+				/* This page fits entirely within the range. */
+				/* Mark it a usable */
+				if((l = mmuwalk(pml4, va, i, &pte, asmwalkalloc)) < 0)
+					panic("asmmeminit 3");
+
+				if (assem->type == AsmMEMORY)
+					*pte = mem|PteRW|PteP;
+				else
+					*pte = mem|PteP;
+
+				if(l > 0)
+					*pte |= PteFinal;
+
+				nextmem = mem + PGLSZ(i);
+				va += PGLSZ(i);
+				npg[i]++;
+
+				break;
+			}
+		}
+
+#ifdef ConfCrap
+		/*
+		 * Fill in conf crap.
+		 */
+		if(cx >= nelem(conf.mem))
+			continue;
+		lo = ROUNDUP(assem->addr, PGSZ);
+//if(lo >= 600ull*MiB)
+//    continue;
+		conf.mem[cx].base = lo;
+		hi = ROUNDDN(hi, PGSZ);
+//if(hi > 600ull*MiB)
+//  hi = 600*MiB;
+		conf.mem[cx].npage = (hi - lo)/PGSZ;
+		conf.npage += conf.mem[cx].npage;
+		DBG("cm %d: addr %#llx npage %lu\n",
+			cx, conf.mem[cx].base, conf.mem[cx].npage);
+		cx++;
+#endif /* ConfCrap */
+	}
+	DBG("%d %d %d\n", npg[0], npg[1], npg[2]);
+
+#ifdef ConfCrap
+	/*
+	 * Fill in more conf crap.
+	 * This is why I hate Plan 9.
+	 */
+	conf.upages = conf.npage;
+	i = (sys->vmend - sys->vmstart)/PGSZ;		/* close enough */
+	conf.ialloc = (i/2)*PGSZ;
+	DBG("npage %llu upage %lu kpage %d\n",
+		conf.npage, conf.upages, i);
+
+#endif /* ConfCrap */
+}
+
+void
+asmumeminit(void)
+{
+	Asm *assem;
+	extern void physallocdump(void);
+
+	for(assem = asmlist; assem != nil; assem = assem->next){
+		if(assem->type != AsmMEMORY)
+			continue;
+		physinit(assem->addr, assem->size);
+	}
+	physallocdump();
+}

+ 0 - 0
sys/src/9/riscv/asm.S → sys/src/9/riscv/assembly.S


+ 3 - 2
sys/src/9/riscv/core.json

@@ -43,7 +43,6 @@
 	    "MissingSourceFiles": [
 		        "acore.c",
 			"archriscv.c",
-			"asm.c",
 		        "coreboot.c",
 			"devarch.c",
 			"memory.c",
@@ -55,8 +54,9 @@
 			"trap.c"
 		],
 		"SourceFiles": [
-			"asm.S",
+			"assembly.S",
 			"arch.c",
+			"asm.c",
 		        "acore.c",
 			"archriscv.c",
 			"ctype.c",
@@ -64,6 +64,7 @@
 			"main.c",
 			"map.c",
 			"mmu.c",
+			"physalloc.c",
 			"qmalloc.c",
 			"syscall.c",
 			"systab.c",

+ 3 - 19
sys/src/9/riscv/main.c

@@ -102,6 +102,9 @@ void bsp(void)
 	// probably pull in the one from coreboot for riscv.
 
 	consuartputs = puts;
+	asminit();
+	fmtinit();
+	print("\nHarvey\n");
 
 	die("Completed hart for bsp OK!\n");
 }
@@ -145,13 +148,6 @@ hardhalt(void)
 	panic((char *)__func__);
 }
 
-uintmem
-physalloc(uint64_t _, int*__, void*___)
-{
-	panic((char *)__func__);
-	return 0;
-}
-
 void
 ureg2gdb(Ureg *u, uintptr_t *g)
 {
@@ -209,12 +205,6 @@ void kexit(Ureg*_)
 	panic((char *)__func__);
 }
 
-char*
-seprintphysstats(char*_, char*__)
-{
-	return "NOT YET";
-}
-
 void
 reboot(void*_, void*__, int32_t ___)
 {
@@ -292,12 +282,6 @@ setkernur(Ureg*u, Proc*p)
 }
 
 
-void
-physfree(uintmem data, uint64_t size)
-{
-	panic("physfree %p 0x%lx", data, size);
-}
-
 void
 stacksnippet(void)
 {

+ 10 - 7
sys/src/9/riscv/mmu.c

@@ -327,7 +327,7 @@ checkpte(uintmem ppn, void *a)
 	s = seprint(s, buf+sizeof buf,
 		"check2: l%d  pte %#p = %llx\n",
 		l, pte, pte?*pte:~0);
-	if(*pte&PtePS)
+	if(*pte&PteFinal)
 		return;
 	if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0)
 		goto Panic;
@@ -410,10 +410,12 @@ pteflags(uint attr)
 		flags |= PteRW;
 	if(attr&PTEUSER)
 		flags |= PteU;
+	/* Can't do this -- what do we do?
 	if(attr&PTEUNCACHED)
 		flags |= PtePCD;
+	*/
 	if(attr&PTENOEXEC)
-		flags |= PteNX;
+		flags &= ~PteX;
 	return flags;
 }
 
@@ -516,7 +518,7 @@ mmuput(uintptr_t va, Page *pg, uint attr)
 		switch(pgsz){
 		case 2*MiB:
 		case 1*GiB:
-			*pte |= PtePS;
+			*pte |= attr & PteFinal | PteP;
 			break;
 		default:
 			panic("mmuput: user pages must be 2M or 1G");
@@ -590,7 +592,8 @@ pdmap(uintptr_t pa, int attr, uintptr_t va, usize size)
 		 */
 		if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){
 			assert(*pde == 0);
-			*pde = pa|attr|PtePS|PteP;
+			/* attr had better include one of Pte{W,R,X}*/
+			*pde = pa|attr|PteP;
 			pgsz = PGLSZ(1);
 		}
 		else{
@@ -664,7 +667,7 @@ vmapalloc(usize size)
 	n = HOWMANY(size, PGLSZ(0));
 	ptsz = PGLSZ(0)/sizeof(PTE);
 	for(i = 0; i < pdsz; i++){
-		if(!(pd[i] & PteP) || (pd[i] & PtePS))
+		if(!(pd[i] & PteP) || (pd[i] & PteFinal))
 			continue;
 
 		pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096);
@@ -732,7 +735,7 @@ vmap(uintptr_t pa, usize size)
 		return nil;
 	}
 	ilock(&vmaplock);
-	if((va = vmapalloc(sz)) == 0 || pdmap(pa, PtePCD|PteRW, va, sz) < 0){
+	if((va = vmapalloc(sz)) == 0 || pdmap(pa, /*PtePCD|*/PteRW, va, sz) < 0){
 		iunlock(&vmaplock);
 		return nil;
 	}
@@ -794,7 +797,7 @@ mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret,
 			memset(UINT2PTR(KADDR(pa)), 0, PTSZ);
 			*pte = pa|PteRW|PteP;
 		}
-		else if(*pte & PtePS)
+		else if(*pte & PteFinal)
 			break;
 		pte = UINT2PTR(KADDR(PPN(*pte)));
 		pte += PTLX(va, l-1);

+ 10 - 169
sys/src/9/riscv/mmu.h

@@ -7,173 +7,14 @@
  * in the LICENSE file.
  */
 
-/* Cr0 */
-#define Pe		0x00000001		/* Protected Mode Enable */
-#define Mp		0x00000002		/* Monitor Coprocessor */
-#define Em		0x00000004		/* Emulate Coprocessor */
-#define Ts		0x00000008		/* Task Switched */
-#define Et		0x00000010		/* Extension Type */
-#define Ne		0x00000020		/* Numeric Error  */
-#define Wp		0x00010000		/* Write Protect */
-#define Am		0x00040000		/* Alignment Mask */
-#define Nw		0x20000000		/* Not Writethrough */
-#define Cd		0x40000000		/* Cache Disable */
-#define Pg		0x80000000		/* Paging Enable */
-
-/* Cr3 */
-#define Pwt		0x00000008		/* Page-Level Writethrough */
-#define Pcd		0x00000010		/* Page-Level Cache Disable */
-
-/* Cr4 */
-#define Vme		0x00000001		/* Virtual-8086 Mode Extensions */
-#define Pvi		0x00000002		/* Protected Mode Virtual Interrupts */
-#define Tsd		0x00000004		/* Time-Stamp Disable */
-#define De		0x00000008		/* Debugging Extensions */
-#define Pse		0x00000010		/* Page-Size Extensions */
-#define Pae		0x00000020		/* Physical Address Extension */
-#define Mce		0x00000040		/* Machine Check Enable */
-#define Pge		0x00000080		/* Page-Global Enable */
-#define Pce		0x00000100		/* Performance Monitoring Counter Enable */
-#define Osfxsr		0x00000200		/* FXSAVE/FXRSTOR Support */
-#define Osxmmexcpt	0x00000400		/* Unmasked Exception Support */
-
-/* Rflags */
-#define Cf		0x00000001		/* Carry Flag */
-#define Pf		0x00000004		/* Parity Flag */
-#define Af		0x00000010		/* Auxiliary Flag */
-#define Zf		0x00000040		/* Zero Flag */
-#define Sf		0x00000080		/* Sign Flag */
-#define Tf		0x00000100		/* Trap Flag */
-#define If		0x00000200		/* Interrupt Flag */
-#define Df		0x00000400		/* Direction Flag */
-#define Of		0x00000800		/* Overflow Flag */
-#define Iopl0		0x00000000		/* I/O Privilege Level */
-#define Iopl1		0x00001000
-#define Iopl2		0x00002000
-#define Iopl3		0x00003000
-#define Nt		0x00004000		/* Nested Task */
-#define Rf		0x00010000		/* Resume Flag */
-#define Vm		0x00020000		/* Virtual-8086 Mode */
-#define Ac		0x00040000		/* Alignment Check */
-#define Vif		0x00080000		/* Virtual Interrupt Flag */
-#define Vip		0x00100000		/* Virtual Interrupt Pending */
-#define Id		0x00200000		/* ID Flag */
-
-/* MSRs */
-#define PerfEvtbase	0xc0010000		/* Performance Event Select */
-#define PerfCtrbase	0xc0010004		/* Performance Counters */
-
-#define Efer		0xc0000080		/* Extended Feature Enable */
-#define Star		0xc0000081		/* Legacy Target IP and [CS]S */
-#define Lstar		0xc0000082		/* Long Mode Target IP */
-#define Cstar		0xc0000083		/* Compatibility Target IP */
-#define Sfmask		0xc0000084		/* SYSCALL Flags Mask */
-#define FSbase		0xc0000100		/* 64-bit FS Base Address */
-#define GSbase		0xc0000101		/* 64-bit GS Base Address */
-#define KernelGSbase	0xc0000102		/* SWAPGS instruction */
-
-/* Efer */
-#define Sce		0x00000001		/* System Call Extension */
-#define Lme		0x00000100		/* Long Mode Enable */
-#define Lma		0x00000400		/* Long Mode Active */
-#define Nxe		0x00000800		/* No-Execute Enable */
-#define Svme		0x00001000		/* SVM Extension Enable */
-#define Ffxsr		0x00004000		/* Fast FXSAVE/FXRSTOR */
-
 /* PML4E/PDPE/PDE/PTE */
-#define PteP		0x0000000000000001	/* Present */
-#define PteRW		0x0000000000000002	/* Read/Write */
-#define PteU		0x0000000000000004	/* User/Supervisor */
-#define PtePWT		0x0000000000000008	/* Page-Level Write Through */
-#define PtePCD		0x0000000000000010	/* Page Level Cache Disable */
-#define PteA		0x0000000000000020	/* Accessed */
-#define PteD		0x0000000000000040	/* Dirty */
-#define PtePS		0x0000000000000080	/* Page Size */
-#define Pte4KPAT	PtePS			/* PTE PAT */
-#define PteG		0x0000000000000100	/* Global */
-#define Pte2MPAT	0x0000000000001000	/* PDE PAT */
-#define Pte1GPAT	Pte2MPAT		/* PDPE PAT */
-#define PteNX		0x8000000000000000	/* No Execute */
-
-/* Exceptions */
-#define IdtDE		0			/* Divide-by-Zero Error */
-#define IdtDB		1			/* Debug */
-#define IdtNMI		2			/* Non-Maskable-Interrupt */
-#define IdtBP		3			/* Breakpoint */
-#define IdtOF		4			/* Overflow */
-#define IdtBR		5			/* Bound-Range */
-#define IdtUD		6			/* Invalid-Opcode */
-#define IdtNM		7			/* Device-Not-Available */
-#define IdtDF		8			/* Double-Fault */
-#define Idt09		9			/* unsupported */
-#define IdtTS		10			/* Invalid-TSS */
-#define IdtNP		11			/* Segment-Not-Present */
-#define IdtSS		12			/* Stack */
-#define IdtGP		13			/* General-Protection */
-#define IdtPF		14			/* Page-Fault */
-#define Idt0F		15			/* reserved */
-#define IdtMF		16			/* x87 FPE-Pending */
-#define IdtAC		17			/* Alignment-Check */
-#define IdtMC		18			/* Machine-Check */
-#define IdtXF		19			/* SIMD Floating-Point */
-
-/* Vestigial Segmented Virtual Memory */
-#define SdISTM		0x0000000700000000	/* Interrupt Stack Table Mask */
-#define SdA		0x0000010000000000	/* Accessed */
-#define SdR		0x0000020000000000	/* Readable (Code) */
-#define SdW		0x0000020000000000	/* Writeable (Data) */
-#define SdE		0x0000040000000000	/* Expand Down */
-#define SdaTSS		0x0000090000000000	/* Available TSS */
-#define SdbTSS		0x00000b0000000000	/* Busy TSS */
-#define SdCG		0x00000c0000000000	/* Call Gate */
-#define SdIG		0x00000e0000000000	/* Interrupt Gate */
-#define SdTG		0x00000f0000000000	/* Trap Gate */
-#define SdCODE		0x0000080000000000	/* Code/Data */
-#define SdS		0x0000100000000000	/* System/User */
-#define SdDPL0		0x0000000000000000	/* Descriptor Privilege Level */
-#define SdDPL1		0x0000200000000000
-#define SdDPL2		0x0000400000000000
-#define SdDPL3		0x0000600000000000
-#define SdP		0x0000800000000000	/* Present */
-#define Sd4G		0x000f00000000ffff	/* 4G Limit */
-#define SdL		0x0020000000000000	/* Long Attribute */
-#define SdD		0x0040000000000000	/* Default Operand Size */
-#define SdG		0x0080000000000000	/* Granularity */
-
-/* Performance Counter Configuration */
-#define PeHo		0x0000020000000000	/* Host only */
-#define PeGo		0x0000010000000000	/* Guest only */
-#define PeEvMskH	0x0000000f00000000	/* Event mask H */
-#define PeCtMsk		0x00000000ff000000	/* Counter mask */
-#define PeInMsk		0x0000000000800000	/* Invert mask */
-#define PeCtEna		0x0000000000400000	/* Counter enable */
-#define PeInEna		0x0000000000100000	/* Interrupt enable */
-#define PePnCtl		0x0000000000080000	/* Pin control */
-#define PeEdg		0x0000000000040000	/* Edge detect */
-#define PeOS		0x0000000000020000	/* OS mode */
-#define PeUsr		0x0000000000010000	/* User mode */
-#define PeUnMsk		0x000000000000ff00	/* Unit Mask */
-#define PeEvMskL	0x00000000000000ff	/* Event Mask L */
-
-#define PeEvMsksh	32			/* Event mask shift */
-
-/* Segment Selector */
-#define SsRPL0		0x0000			/* Requestor Privilege Level */
-#define SsRPL1		0x0001
-#define SsRPL2		0x0002
-#define SsRPL3		0x0003
-#define SsTIGDT		0x0000			/* GDT Table Indicator  */
-#define SsTILDT		0x0004			/* LDT Table Indicator */
-#define SsSIM		0xfff8			/* Selector Index Mask */
-
-#define SSEL(si, tirpl)	(((si)<<3)|(tirpl))	/* Segment Selector */
-
-#define SiNULL		0			/* NULL selector index */
-#define SiCS		1			/* CS selector index */
-#define SiDS		2			/* DS selector index */
-#define SiU32CS		3			/* User CS selector index */
-#define SiUDS		4			/* User DS selector index */
-#define SiUCS		5			/* User CS selector index */
-#define SiFS		6			/* FS selector index */
-#define SiGS		7			/* GS selector index */
-#define SiTSS		8			/* TSS selector index */
+#define PteP            0x0000000000000001	/* Valid */
+#define PteR		0x0000000000000002	/* Read */
+#define PteW		0x0000000000000004	/* Write */
+#define PteRW		0x0000000000000006	/* Read/Write */
+#define PteX		0x0000000000000008	/* Read */
+#define PteFinal        0x000000000000000e      /* Last PTE in the chain */
+#define PteU		0x0000000000000010	/* User/Supervisor */
+#define PteA		0x0000000000000040	/* Accessed */
+#define PteD		0x0000000000000080	/* Dirty */
+#define PteG		0x0000000000000020	/* Global */

+ 542 - 0
sys/src/9/riscv/physalloc.c

@@ -0,0 +1,542 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+/*
+ * Buddy allocator for physical memory allocation.
+ * One per ACPI affinity domain, to color pages depending on their
+ * NUMA location.
+ *
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#define ISPOWEROF2(x)	(((x) != 0) && !((x) & ((x)-1)))
+#define UNO		((uintmem)1)
+
+enum {
+	BKmin		= 21,			/* Minimum lg2 */
+	BKmax		= 30,			/* Maximum lg2 */
+
+	Ndoms = 16,				/* Max # of domains */
+
+	Used = 0,
+	Avail = 1,
+};
+
+
+#define INDEX(b, v)	((uint)(((v))/(b)->bminsz))
+#define BLOCK(b, i)	((i)-INDEX((b),(b)->memory))
+
+typedef struct Buddy Buddy;
+struct Buddy {
+	int16_t	tag;		/* Used or Avail */
+	int16_t	kval;
+	uint	next;
+	uint	prev;
+	void	*p;
+};
+
+/*
+ * Bals should allocate using its base address as 0.
+ * For now, all of them refer to the entire memory and we record
+ * the base and size for each one.
+ */
+typedef struct Bal Bal;
+struct Bal {
+	uintmem	base;
+	uint64_t	size;
+	usize	nfree;
+	usize	nblocks;
+	int	kmin;		/* Minimum lg2 */
+	int	kmax;		/* Maximum lg2 */
+	uintmem	bminsz;		/* minimum block sz */
+	uintmem memory;
+	uint	kspan;
+
+	Buddy* blocks;
+	Buddy* avail;
+};
+
+static Bal bal[Ndoms];
+static int ndoms;
+static Lock budlock;
+
+char*
+seprintphysstats(char *s,  char *e)
+{
+	Bal *b;
+	int i;
+
+	lock(&budlock);
+	for(i = 0; i < Ndoms; i++){
+		b = &bal[i];
+		if(b->size > 0)
+			s = seprint(s, e, "%lu/%lu %lluK color %d blocks avail\n",
+				b->nfree, b->nblocks, b->bminsz/KiB, i);
+	}
+	unlock(&budlock);
+	return s;
+}
+
+static void
+xphysfree(Bal *b, uintmem data, uint64_t size)
+{
+	uint i;
+	Buddy *l, *p;
+	Buddy *blocks, *avail;
+
+	DBG("physfree\n");
+
+	/*
+	 * Knuth's Algorithm S (Buddy System Liberation).
+	 */
+	blocks = b->blocks;
+	avail = b->avail;
+
+	if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
+		return;
+	i = INDEX(b,data);
+
+	lock(&budlock);
+S1:
+	/*
+	 * Find buddy.
+	 */
+	l = &blocks[BLOCK(b,i)];
+	l->p = nil;
+	DBG("\tbsl: BLOCK(b,i) %d index %llu kval %d\n",
+		BLOCK(b,i), BLOCK(b,i)/((1<<l->kval)/b->bminsz), l->kval);
+	if((BLOCK(b,i)/((1<<l->kval)/b->bminsz)) & 1)	/* simpler test? */
+		p = l - (1<<l->kval)/b->bminsz;
+	else
+		p = l + (1<<l->kval)/(b->bminsz);
+	DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks);
+
+	/*
+	 * Is buddy available?
+	 * Can't merge if:
+	 *	this is the largest block;
+	 *	buddy isn't free;
+	 *	buddy has been subsequently split again.
+	 */
+	if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){
+		/*
+		 * Put on list.
+		 */
+		l->tag = Avail;
+		l->next = avail[l->kval].next;
+		l->prev = 0;
+		if(l->next != 0)
+			blocks[BLOCK(b,l->next)].prev = i;
+		avail[l->kval].next = i;
+
+		b->nfree += size/b->bminsz;
+
+		unlock(&budlock);
+		DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
+			i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
+		return;
+	}
+
+	/*
+	 * Combine with buddy.
+	 * This removes block P from the avail list.
+	 */
+	if(p->prev != 0){
+		blocks[BLOCK(b,p->prev)].next = p->next;
+		p->prev = 0;
+	}
+	else
+		avail[p->kval].next = 0;
+	if(p->next != 0){
+		blocks[BLOCK(b,p->next)].prev = p->prev;
+		p->next = 0;
+	}
+	p->tag = Used;
+
+	/*
+	 * Now can try to merge this larger block.
+	k++;
+	 */
+	DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks);
+	if(p < l)
+		l = p;
+	i = l - blocks + INDEX(b,b->memory);
+	l->kval++;
+	DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n",
+		i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
+	goto S1;
+}
+
+void
+physfree(uintmem data, uint64_t size)
+{
+	Bal *b;
+	int i;
+
+	for(i = 0; i < Ndoms; i++){
+		b = &bal[i];
+		if(b->base <= data && data < b->base + b->size){
+			xphysfree(b, data, size);
+			return;
+		}
+	}
+	panic("physfree: no bal");
+}
+
+static void*
+xphystag(Bal *b, uintmem data)
+{
+	uint i;
+	Buddy *blocks;
+
+	DBG("phystag\n");
+
+	blocks = b->blocks;
+
+	if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
+		return nil;
+	i = INDEX(b,data);
+	return blocks[BLOCK(b,i)].p;
+}
+
+void*
+phystag(uintmem data)
+{
+	Bal *b;
+	int i;
+
+	for(i = 0; i < Ndoms; i++){
+		b = &bal[i];
+		if(b->base <= data && data < b->base + b->size)
+			return xphystag(b, data);
+	}
+	return nil;
+}
+
+static uint8_t lg2table[256] = {
+	0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static int
+lg2floor(uint64_t w)
+{
+	uint64_t hi, lo;
+
+	if((lo = (w>>48)) != 0){
+		if((hi = (lo>>8)) != 0)
+			return 56+lg2table[hi];
+		return 48+lg2table[lo];
+	}
+	if((lo = (w>>32)) != 0){
+		if((hi = (lo>>8)) != 0)
+			return 40+lg2table[hi];
+		return 32+lg2table[lo];
+	}
+	if((lo = (w>>16)) != 0){
+		if((hi = (lo>>8)) != 0)
+			return 24+lg2table[hi];
+		return 16+lg2table[lo];
+	}
+	if((hi = (w>>8)) != 0)
+		return 8+lg2table[hi];
+	return lg2table[w];
+}
+
+static uintmem
+xphysalloc(Bal *b, uint64_t size, void *tag)
+{
+	uint i, j, k;
+	Buddy *l, *p;
+	Buddy *avail, *blocks;
+	uintmem m;
+
+	DBG("physalloc\n");
+	assert(b->size > 0);
+
+	avail = b->avail;
+	blocks = b->blocks;
+
+	/*
+	 * Knuth's Algorithm R (Buddy System Reservation).
+	 */
+	if(size < b->bminsz)
+		size = b->bminsz;
+
+	/*
+	 * Find block.
+	 */
+	if(!ISPOWEROF2(size))
+		return 0;
+	k = lg2floor(size);
+
+	lock(&budlock);
+	for(j = k; j <= b->kmax; j++){
+		if(avail[j].next != 0)
+			break;
+	}
+	DBG("bsr: size %#llud k %d j %d\n", size, k, j);
+	if(j > b->kmax){
+		unlock(&budlock);
+		return 0;
+	}
+
+	/*
+	 * Remove from list.
+	 */
+	i = avail[j].next;
+	l = &blocks[BLOCK(b,i)];
+	DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
+		i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
+	avail[j].next = l->next;
+	blocks[avail[j].next].prev = 0;
+	l->prev = l->next = 0;
+	l->tag = Used;
+	l->kval = k;
+
+	/*
+	 * Split required?
+	 */
+	while(j > k){
+		/*
+		 * Split.
+		 */
+		j--;
+		p = &blocks[BLOCK(b,i) + (UNO<<j)/(b->bminsz)];
+		p->tag = Avail;
+		p->kval = j;
+		p->next = avail[j].next;
+		p->prev = 0;
+		if(p->next != 0)
+			blocks[BLOCK(b,p->next)].prev = i + (UNO<<j)/(b->bminsz);
+		avail[j].next = i + (UNO<<j)/(b->bminsz);
+		DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n",
+			i, p - blocks, j, p->next, BLOCK(b,p->next),
+			p->tag?"avail":"used");
+	}
+	b->nfree -= size/b->bminsz;
+	unlock(&budlock);
+
+	m = b->memory + b->bminsz*BLOCK(b,i);
+	assert(m >= b->base && m < b->base + b->size);
+	blocks[BLOCK(b,i)].p = tag;
+
+	return m;
+}
+
+uintmem
+physalloc(uint64_t size, int *colorp, void *tag)
+{
+	int i, color;
+	uintmem m;
+
+	m = 0;
+
+	color = *colorp;
+	if(color >= 0){
+		color %= ndoms;
+		if(bal[color].kmin > 0){
+			*colorp = color;
+			m = xphysalloc(&bal[color], size, tag);
+		}
+	}
+	if(m == 0)
+		for(i = 0; i < ndoms; i++)
+			if(bal[i].kmin > 0)
+				if((m = xphysalloc(&bal[i], size, tag)) != 0){
+					*colorp = i;
+					return m;
+				}
+	return m;
+}
+
+#if 0
+static void
+dump(Bal *b)
+{
+	uint bi, i, k;
+	Buddy *blocks;
+
+	blocks = b->blocks;
+	for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){
+		if(blocks[i].tag == Used)
+			continue;
+		print("blocks[%d]: size %d prev %d next %d\n",
+			i, 1<<b->blocks[i].kval, blocks[i].prev, blocks[i].next);
+		//i += (1<<blocks[i].kval)/b->bminsz-1;
+	}
+
+	for(k = 0; k <= b->kmax; k++){
+		print("a[%d]:", k);
+		for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){
+			print(" %d", bi);
+		}
+		print("\n");
+	}
+}
+#endif
+
+void
+physallocdump(void)
+{
+	int n;
+
+	for(n = 0; n < Ndoms; n++)
+		if(bal[n].size > 0)
+			print("physalloc color=%d base=%#llx size=%#llx\n",
+				n, bal[n].base, bal[n].size);
+}
+
+static int
+plop(Bal *b, uintmem a, int k, int type)
+{
+	uint i;
+	Buddy *l;
+
+
+	DBG("plop(a %#p k %d type %d)\n", a, k, type);
+
+	i = INDEX(b,a);
+	l = &b->blocks[BLOCK(b,i)];
+
+	l->kval = k;
+	xphysfree(b, a, 1<<k);
+
+	return 1;
+}
+
+static int
+iimbchunk(Bal *b, uintmem a, uintmem e, int type)
+{
+	int k;
+	uint s;
+
+	a = ROUNDUP(a, b->bminsz);
+	e = ROUNDDN(e, b->bminsz);
+	DBG("iimbchunk: start a %#P e %#P\n", a, e);
+
+	b->nblocks += (e-a)/b->bminsz;
+
+	for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){
+		if(a & s){
+			plop(b, a, k, type);
+			a += s;
+		}
+	}
+	DBG("done1 a %#P e %#P s %#x %d\n", a, e, s, k);
+
+	while(a+s <= e){
+		plop(b, a, k, type);
+		a += s;
+	}
+	DBG("done2 a %#P e %#P s %#x %d\n", a, e, s, k);
+
+	for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){
+		if(a+s <= e){
+			plop(b, a, k, type);
+			a += s;
+		}
+	}
+	DBG("done3 a %#P e %#P s %#x %d\n", a, e, s, k);
+
+	return 0;
+}
+
+/*
+ * Called from umeminit to initialize user memory allocators.
+ */
+void
+physinit(uintmem a, uint64_t size)
+{
+	uintmem dtsz;
+	Bal *b;
+	int i, dom;
+	uintmem addr, len;
+
+	DBG("physinit %#llx %#llx\n", a, size);
+
+	for(addr = a; addr < a+size; addr += len){
+		dom = 0;
+		len = 0; // acpimblocksize(addr, &dom);
+		/* len can be zero if there's no acpi information about addr */
+		if(len == 0 || addr + len > a + size)
+			len = a + size - addr;
+		/*
+		 * Each block belongs to a different domain (ie. cpu/mem socket)
+		 * We must create a buddy allocator for each block, so we could
+		 * allocate memory from different domains.
+		 *
+		 * This code assumes that a domain may be extended later and
+		 * that there is no interleaving of domains. Ok by now.
+		 */
+		DBG("physmem block dom %d addr %#llx size %#llx\n",
+			dom, addr, len);
+		if(dom < 0 || dom >= Ndoms){
+			print("physinit: invalid dom %d\n", dom);
+			dom = 0;
+		}
+		b = &bal[dom];
+		if(dom >= ndoms)
+			ndoms = dom+1;
+		if(b->kmin == 0){
+			b->base = addr;
+			b->size = len;
+			b->kmin = BKmin;
+			b->kmax = BKmax;
+			b->bminsz = (UNO<<b->kmin);
+			b->memory = sys->pmstart;
+			b->kspan = lg2floor(sys->pmend);
+			if(!ISPOWEROF2(sys->pmend))
+				b->kspan++;
+			dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1));
+			DBG("kspan %u (arrysz = %llu)\n", b->kspan, dtsz);
+			b->blocks = malloc(dtsz);
+			if(b->blocks == nil)
+				panic("physinit: no blocks");
+			memset(b->blocks, 0, dtsz);
+			b->avail = malloc(sizeof(Buddy)*(b->kmax+1));
+			if(b->avail == nil)
+				panic("physinit: no avail");
+			memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1));
+		}else{
+			if(addr < b->base)
+				panic("physinit: decreasing base");
+			if(b->base+b->size < addr + len)
+				b->size = (addr-b->base) + len;
+			for(i = 0; i < Ndoms; i++)
+				if(bal[i].kmin && &bal[i] != b)
+				if(bal[i].base < b->base + b->size &&
+				   bal[i].base + bal[i].size > b->base + b->size)
+					panic("physinit: doms overlap");
+		}
+		assert(addr >= b->base && addr+len <= b->base + b->size);
+
+		iimbchunk(b, addr, addr+len, 0);
+	}
+
+
+}