Browse Source

Link the kernel at 0xFFFF_8000_0000_0000. (#1043)

Link the kernel at the beginning of the upper
part of the canonical address space.  Lots of
other miscellaneous fixes.

Signed-off-by: Dan Cross <cross@gajendra.net>

Co-authored-by: Dan Cross <cross@gajendra.net>
Dan Cross 3 years ago
parent
commit
3ce07d9c86
44 changed files with 1483 additions and 2232 deletions
  1. 9 6
      amd64/include/u.h
  2. 1 1
      sys/src/9/386/pci.c
  3. 1 1
      sys/src/9/amd64/amd64.h
  4. 4 6
      sys/src/9/amd64/apic.c
  5. 6 4
      sys/src/9/amd64/archamd64.c
  6. 0 485
      sys/src/9/amd64/asm.c
  7. 1 1
      sys/src/9/amd64/cflags.json
  8. 3 2
      sys/src/9/amd64/core.json
  9. 67 55
      sys/src/9/amd64/dat.h
  10. 11 29
      sys/src/9/amd64/devacpi.c
  11. 354 645
      sys/src/9/amd64/entry.S
  12. 27 12
      sys/src/9/amd64/fns.h
  13. 0 1
      sys/src/9/amd64/gcc.json
  14. 36 55
      sys/src/9/amd64/kernel.ld
  15. 1 1
      sys/src/9/amd64/klib.json
  16. 1 6
      sys/src/9/amd64/l64idt.S
  17. 0 79
      sys/src/9/amd64/l64syscall.s
  18. 9 34
      sys/src/9/amd64/l64v.S
  19. 12 14
      sys/src/9/amd64/l64vsyscall.S
  20. 127 204
      sys/src/9/amd64/main.c
  21. 7 19
      sys/src/9/amd64/map.c
  22. 10 22
      sys/src/9/amd64/mem.h
  23. 87 4
      sys/src/9/amd64/memory.c
  24. 342 399
      sys/src/9/amd64/mmu.c
  25. 50 48
      sys/src/9/amd64/multiboot.c
  26. 224 0
      sys/src/9/amd64/pamap.c
  27. 0 2
      sys/src/9/amd64/physalloc.c
  28. 4 2
      sys/src/9/amd64/qmalloc.c
  29. 32 42
      sys/src/9/amd64/sipi.c
  30. 4 19
      sys/src/9/amd64/trap.c
  31. 4 8
      sys/src/9/amd64/vsvm.c
  32. 11 8
      sys/src/9/boot/aux.c
  33. 13 6
      sys/src/9/boot/boot.c
  34. 3 1
      sys/src/9/boot/bootip.c
  35. 0 1
      sys/src/9/boot/local.c
  36. 2 1
      sys/src/9/port/devcons.c
  37. 4 2
      sys/src/9/port/fault.c
  38. 2 1
      sys/src/9/port/lib.h
  39. 2 1
      sys/src/9/port/portfns.h
  40. 3 1
      sys/src/9/port/proc.c
  41. 1 2
      sys/src/9/riscv/cflags.json
  42. 0 1
      sys/src/9/riscv/core.json
  43. 7 0
      sys/src/9/riscv/mmu.c
  44. 1 1
      util/src/harvey/cmd/qtap/main.go

+ 9 - 6
amd64/include/u.h

@@ -18,15 +18,18 @@ typedef signed int	int32_t;
 typedef long		ssize_t;
 typedef	unsigned long long uint64_t;
 typedef	long long	int64_t;
-typedef uint64_t uintptr;
-typedef uint64_t uintptr_t;
-typedef int64_t intptr_t;
-typedef uint32_t	usize;
-typedef unsigned long size_t;
-typedef	uint32_t		Rune;
+typedef uint64_t	uintptr;
+typedef uint64_t	uintptr_t;
+typedef int64_t		intptr_t;
+typedef uint64_t	usize;
+typedef unsigned long	size_t;
+typedef	uint32_t	Rune;
 typedef union FPdbleword FPdbleword;
 typedef uint64_t	jmp_buf[10]; // for registers.
 
+#define	alignas		_Alignas
+#define static_assert	_Static_assert
+
 #define	JMPBUFSP	6
 #define	JMPBUFPC	7
 #define	JMPBUFARG1	8

+ 1 - 1
sys/src/9/386/pci.c

@@ -353,7 +353,7 @@ pcireservemem(void)
 	for(p = nil; (p = pcimatch(p, 0, 0)) != nil;)
 		for(i=0; i<nelem(p->mem); i++)
 			if(p->mem[i].bar && (p->mem[i].bar&1) == 0)
-				asmmapinit(p->mem[i].bar&~0x0F, p->mem[i].size, 5);
+				pamapinsert(p->mem[i].bar&~0x0F, p->mem[i].size, PamDEV);
 }
 
 static void

+ 1 - 1
sys/src/9/amd64/amd64.h

@@ -93,7 +93,7 @@
 #define PteG		0x0000000000000100	/* Global */
 #define Pte2MPAT	0x0000000000001000	/* PDE PAT */
 #define Pte1GPAT	Pte2MPAT		/* PDPE PAT */
-#define PteNX		0x8000000000000000	/* No Execute */
+#define PteNX		0x8000000000000000ULL	/* No Execute */
 
 /* Exceptions */
 #define IdtDE		0			/* Divide-by-Zero Error */

+ 4 - 6
sys/src/9/amd64/apic.c

@@ -138,7 +138,7 @@ apicinit(int apicno, uintmem pa, int isbp)
 		return;
 	}
 	if(apicbase == nil){
-		if((apicbase = vmap(pa, 1024)) == nil){
+		if((apicbase = vmap(pa, 4096)) == nil){
 			print("apicinit%d: can't map apicbase\n", apicno);
 			return;
 		}
@@ -405,8 +405,7 @@ apictimerset(uint64_t next)
 void
 apicsipi(int apicno, uintmem pa)
 {
-	int i;
-	uint32_t crhi, crlo;
+	uint32_t crhi;
 
 	/*
 	 * SIPI - Start-up IPI.
@@ -419,10 +418,9 @@ apicsipi(int apicno, uintmem pa)
 	apicrput(Iclo, DSnone|TMlevel|MTir);
 	millidelay(10);
 
-	crlo = DSnone|TMedge|MTsipi|((uint32_t)pa/(4*KiB));
-	for(i = 0; i < 2; i++){
+	for(int i = 0; i < 2; i++){
 		apicrput(Ichi, crhi);
-		apicrput(Iclo, crlo);
+		apicrput(Iclo, DSnone|TMedge|MTsipi|((uint32_t)pa/(4*KiB)));
 		microdelay(200);
 	}
 }

+ 6 - 4
sys/src/9/amd64/archamd64.c

@@ -192,13 +192,16 @@ cpuidhz(uint32_t *info0, uint32_t *info1, CpuHypervisor hypervisor)
 //print("msr 2a is 0x%x >> 22 0x%x\n", rdmsr(0x2a), rdmsr(0x2a)>>22);
 			break;
 		case 0x000306a0:		/* i7,5,3 3xxx */
+		case 0x000306f0:		/* i7,5,3 5xxx and Xeon dev machine */
 		case 0x000206c0:		/* i7,5,3 4xxx */
 		case 0x00050650:		/* i9 7900X */ // Moved here by RGM 10/27/2020
 		case 0x000506e0:		/* i7,5,3 6xxx */
 			// reading msr 0xcd gets a GPF on this CPU.
-			// per the coreboot irc:
-			// <icon[m]> rminnich: if you need the base for the core's clock multiplier, it's 100MHz since sandybridge
-			// Which, going by the Good Book (35-46 volume 3C) is index 5.
+			// From Ron on coreboot irc:
+			// rminnich: "if you need the base for the core's clock
+			//            multiplier, it's 100MHz since sandybridge
+			//            Which, going by the Good Book (35-46 volume 3C)
+			//            is index 5."
 			f = 5;
 			// This will likely be true of many of the CPUs below. FSB died a *long* time ago.
 			// fallthrough
@@ -213,7 +216,6 @@ cpuidhz(uint32_t *info0, uint32_t *info1, CpuHypervisor hypervisor)
 		case 0x000106c0:		/* Atom (45nm, 32nm) */
 		case 0x000106e0:		/* i7,5,3 8xx */
 		case 0x000206a0:		/* i7,5,3 2xxx */
-		case 0x000306f0:		/* i7,5,3 5xxx */
 		case 0x000806e0:		/* i7,5,3 85xx */
 		case 0x000906e0:		/* i7,5,3 77xx 8xxx */
 			/*

+ 0 - 485
sys/src/9/amd64/asm.c

@@ -1,485 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-/*
- * To do:
- *	find a purpose for this...
- */
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-
-#include "amd64.h"
-
-// Sorry.
-#include "acpi.h"
-
-/*
- * Address Space Map.
- * Low duty cycle.
- */
-typedef struct Asm Asm;
-typedef struct Asm {
-	uintmem	addr;
-	uintmem	size;
-	int	type;
-	int	location;
-	Asm*	next;
-} Asm;
-
-enum {
-	AsmNONE		= 0,
-	AsmMEMORY	= 1,
-	AsmRESERVED	= 2,
-	AsmACPIRECLAIM	= 3,
-	AsmACPINVS	= 4,
-
-	AsmDEV		= 5,
-};
-
-static Lock asmlock;
-static Asm asmarray[64] = {
-	{ 0, ~0, AsmNONE, 0, },
-};
-static int asmindex = 1;
-static Asm* asmlist = &asmarray[0];
-static Asm* asmfreelist;
-
-/*static*/ void
-asmdump(void)
-{
-	Asm* assem;
-
-	DBG("asm: index %d:\n", asmindex);
-	for(assem = asmlist; assem != nil; assem = assem->next){
-		DBG(" %#P %#P %d (%P)\n",
-			assem->addr, assem->addr+assem->size,
-			assem->type, assem->size);
-	}
-}
-
-// The RSDP is extraordinarily important, and we hence pollute asm
-// with a search function.
-void *
-asmrsdp(void)
-{
-	Asm* assem;
-
-	DBG("asm: index %d:\n", asmindex);
-	for(assem = asmlist; assem != nil; assem = assem->next){
-		if ((assem->type != AsmACPIRECLAIM) && (assem->type != AsmACPINVS))
-			continue;
-		void *v = vmap(assem->addr, assem->size), *rsd;
-		if (v == nil) {
-			print("Failed to vmap %#llx for %#llx bytes\n", assem->addr, assem->size);
-			continue;
-		}
-		rsd = sigscan(v, assem->size,  RSDPTR);
-		if (rsd != nil) {
-			print("Found RSD in asm at %p\n", rsd);
-			return rsd;
-		}
-		vunmap(v, assem->size);
-	}
-	return nil;
-}
-
-static Asm*
-asmnew(uintmem addr, uintmem size, int type)
-{
-	Asm * assem;
-
-	if(asmfreelist != nil){
-		assem = asmfreelist;
-		asmfreelist = assem->next;
-		assem->next = nil;
-	}
-	else{
-		if(asmindex >= nelem(asmarray))
-			return nil;
-		assem = &asmarray[asmindex++];
-	}
-	assem->addr = addr;
-	assem->size = size;
-	assem->type = type;
-
-	return assem;
-}
-
-int
-asmfree(uintmem addr, uintmem size, int type)
-{
-	Asm *np, *pp, **ppp;
-
-	DBG("asmfree: %#P@%#P, type %d\n", size, addr, type);
-	if(size == 0)
-		return 0;
-
-	lock(&asmlock);
-
-	/*
-	 * Find either a map entry with an address greater
-	 * than that being returned, or the end of the map.
-	 */
-	pp = nil;
-	ppp = &asmlist;
-	for(np = *ppp; np != nil && np->addr <= addr; np = np->next){
-		pp = np;
-		ppp = &np->next;
-	}
-
-	if((pp != nil && pp->addr+pp->size > addr)
-	|| (np != nil && addr+size > np->addr)){
-		unlock(&asmlock);
-		DBG("asmfree: overlap %#Px@%#P, type %d\n", size, addr, type);
-		return -1;
-	}
-
-	if(pp != nil && pp->type == type && pp->addr+pp->size == addr){
-		pp->size += size;
-		if(np != nil && np->type == type && addr+size == np->addr){
-			pp->size += np->size;
-			pp->next = np->next;
-
-			np->next = asmfreelist;
-			asmfreelist = np;
-		}
-
-		unlock(&asmlock);
-		return 0;
-	}
-
-	if(np != nil && np->type == type && addr+size == np->addr){
-		np->addr -= size;
-		np->size += size;
-
-		unlock(&asmlock);
-		return 0;
-	}
-
-	if((pp = asmnew(addr, size, type)) == nil){
-		unlock(&asmlock);
-		DBG("asmfree: losing %#P@%#P, type %d\n", size, addr, type);
-		return -1;
-	}
-	*ppp = pp;
-	pp->next = np;
-
-	unlock(&asmlock);
-
-	return 0;
-}
-
-uintmem
-asmalloc(uintmem addr, uintmem size, int type, int align)
-{
-	uintmem a, o;
-	Asm *assem, *pp;
-
-	DBG("asmalloc: %#P@%#P, type %d\n", size, addr, type);
-	lock(&asmlock);
-	for(pp = nil, assem = asmlist; assem != nil; pp = assem, assem = assem->next){
-		if(assem->type != type)
-			continue;
-		a = assem->addr;
-
-		if(addr != 0){
-			/*
-			 * A specific address range has been given:
-			 *   if the current map entry is greater then
-			 *   the address is not in the map;
-			 *   if the current map entry does not overlap
-			 *   the beginning of the requested range then
-			 *   continue on to the next map entry;
-			 *   if the current map entry does not entirely
-			 *   contain the requested range then the range
-			 *   is not in the map.
-			 * The comparisons are strange to prevent
-			 * overflow.
-			 */
-			if(a > addr)
-				break;
-			if(assem->size < addr - a)
-				continue;
-			if(addr - a > assem->size - size)
-				break;
-			a = addr;
-		}
-
-		if(align > 0)
-			a = ((a+align-1)/align)*align;
-		if(assem->addr+assem->size-a < size)
-			continue;
-
-		o = assem->addr;
-		assem->addr = a+size;
-		assem->size -= a-o+size;
-		if(assem->size == 0){
-			if(pp != nil)
-				pp->next = assem->next;
-			// What if assem == asmlist
-			// and the assem->size is now zero, i.e. it is completely used?
-			// Then if the code were to set assem->next = asmfreelist,
-			// it would destroy the asmlist. The original code here failed if
-			// the first region size was less than size requested.
-			// That typically happened when allocating address space
-			// for KSEG0. This if below covers that case.
-			if (assem == asmlist)
-				asmlist = assem->next;
-			assem->next = asmfreelist;
-			asmfreelist = assem;
-                }
-
-		unlock(&asmlock);
-		if(o != a)
-			asmfree(o, a-o, type);
-		return a;
-	}
-	unlock(&asmlock);
-
-	return 0;
-}
-
-static void
-asminsert(uintmem addr, uintmem size, int type)
-{
-	if(type == AsmNONE || asmalloc(addr, size, AsmNONE, 0) == 0)
-		return;
-	if(asmfree(addr, size, type) == 0)
-		return;
-	asmfree(addr, size, 0);
-}
-
-void
-asminit(void)
-{
-	sys->pmstart = ROUNDUP(PADDR(end), PGSZ);
-	sys->pmend = sys->pmstart;
-	asmalloc(0, sys->pmstart, AsmNONE, 0);
-}
-
-/*
- * Notes:
- * asmmapinit and asmmodinit called from multiboot;
- * subject to change; the numerology here is probably suspect.
- * Multiboot defines the alignment of modules as 4096.
- */
-void
-asmmapinit(uintmem addr, uintmem size, int type)
-{
-	switch(type){
-	default:
-		asminsert(addr, size, type);
-		break;
-	case AsmMEMORY:
-		/*
-		 * Adjust things for the peculiarities of this
-		 * architecture.
-		 * Sys->pmend is the largest physical memory address found,
-		 * there may be gaps between it and sys->pmstart, the range
-		 * and how much of it is occupied, might need to be known
-		 * for setting up allocators later.
-		 */
-		if(addr+size < sys->pmstart)
-			break;
-		if(addr < sys->pmstart){
-			size -= sys->pmstart - addr;
-			addr = sys->pmstart;
-		}
-		asminsert(addr, size, type);
-		sys->pmoccupied += size;
-		if(addr+size > sys->pmend)
-			sys->pmend = addr+size;
-		break;
-	}
-}
-
-void
-asmmodinit(uint32_t start, uint32_t end, char* s)
-{
-	DBG("asmmodinit: %#x -> %#x: <%s> %#x\n",
-		start, end, s, ROUNDUP(end, 4096));
-
-	if(start < sys->pmstart)
-		return;
-	end = ROUNDUP(end, 4096);
-	if(end > sys->pmstart){
-		asmalloc(sys->pmstart, end-sys->pmstart, AsmNONE, 0);
-		sys->pmstart = end;
-	}
-}
-
-static int npg[4];
-
-void*
-asmbootalloc(usize size)
-{
-	uintptr_t va;
-
-	assert(sys->vmunused+size <= sys->vmunmapped);
-	va = sys->vmunused;
-	sys->vmunused += size;
-	memset(UINT2PTR(va), 0, size);
-	return UINT2PTR(va);
-}
-
-static PTE
-asmwalkalloc(usize size)
-{
-	uintmem pa;
-
-	assert(size == PTSZ && sys->vmunused+size <= sys->vmunmapped);
-
-	if(!ALIGNED(sys->vmunused, PTSZ)){
-		DBG("asmwalkalloc: %llu wasted\n",
-			ROUNDUP(sys->vmunused, PTSZ) - sys->vmunused);
-		sys->vmunused = ROUNDUP(sys->vmunused, PTSZ);
-	}
-	if((pa = mmuphysaddr(sys->vmunused)) != ~0)
-		sys->vmunused += size;
-
-	return pa;
-}
-
-// still needed so iallocb gets initialised correctly. needs to go.
-#define ConfCrap
-
-void
-asmmeminit(void)
-{
-	int i, l;
-	Asm* assem;
-	PTE *pte, *pml4;
-	uintptr va;
-	uintmem hi, lo, mem, nextmem, pa;
-#ifdef ConfCrap
-	int cx;
-#endif /* ConfCrap */
-
-	assert(!((sys->vmunmapped|sys->vmend) & sys->pgszmask[1]));
-
-	if((pa = mmuphysaddr(sys->vmunused)) == ~0)
-		panic("asmmeminit 1");
-	pa += sys->vmunmapped - sys->vmunused;
-	mem = asmalloc(pa, sys->vmend - sys->vmunmapped, 1, 0);
-	if(mem != pa)
-		panic("asmmeminit 2");
-	DBG("pa %#llx mem %#llx\n", pa, mem);
-
-	/* assume already 2MiB aligned*/
-	assert(ALIGNED(sys->vmunmapped, 2*MiB));
-	pml4 = UINT2PTR(machp()->MMU.pml4->va);
-	while(sys->vmunmapped < sys->vmend){
-		l = mmuwalk(pml4, sys->vmunmapped, 1, &pte, asmwalkalloc);
-		DBG("%#p l %d\n", sys->vmunmapped, l);
-		*pte = pa|PtePS|PteRW|PteP;
-		sys->vmunmapped += 2*MiB;
-		pa += 2*MiB;
-	}
-
-#ifdef ConfCrap
-	cx = 0;
-#endif /* ConfCrap */
-	for(assem = asmlist; assem != nil; assem = assem->next){
-		DBG("asm: addr %#P end %#P type %d size %P\n",
-			assem->addr, assem->addr+assem->size,
-			assem->type, assem->size);
-		if((assem->type != AsmMEMORY)&&(assem->type != AsmRESERVED)) {
-			DBG("Skipping, it's not AsmMEMORY or AsmRESERVED\n");
-			continue;
-		}
-		va = KSEG2+assem->addr;
-		DBG("asm: addr %#P end %#P type %d size %P\n",
-			assem->addr, assem->addr+assem->size,
-			assem->type, assem->size);
-
-		lo = assem->addr;
-		hi = assem->addr+assem->size;
-		/* Convert a range into pages */
-		for(mem = lo; mem < hi; mem = nextmem){
-			nextmem = (mem + PGLSZ(0)) & ~sys->pgszmask[0];
-
-			/* Try large pages first */
-			for(i = sys->npgsz - 1; i >= 0; i--){
-				if((mem & sys->pgszmask[i]) != 0)
-					continue;
-				if(mem + PGLSZ(i) > hi)
-					continue;
-				/* This page fits entirely within the range. */
-				/* Mark it a usable */
-				if((l = mmuwalk(pml4, va, i, &pte, asmwalkalloc)) < 0)
-					panic("asmmeminit 3");
-
-				if (assem->type == AsmMEMORY)
-					*pte = mem|PteRW|PteP;
-				else
-					*pte = mem|PteP;
-
-				if(l > 0)
-					*pte |= PtePS;
-
-				nextmem = mem + PGLSZ(i);
-				va += PGLSZ(i);
-				npg[i]++;
-
-				break;
-			}
-		}
-
-#ifdef ConfCrap
-		/*
-		 * Fill in conf crap.
-		 */
-		if(cx >= nelem(conf.mem))
-			continue;
-		lo = ROUNDUP(assem->addr, PGSZ);
-//if(lo >= 600ull*MiB)
-//    continue;
-		conf.mem[cx].base = lo;
-		hi = ROUNDDN(hi, PGSZ);
-//if(hi > 600ull*MiB)
-//  hi = 600*MiB;
-		conf.mem[cx].npage = (hi - lo)/PGSZ;
-		conf.npage += conf.mem[cx].npage;
-		DBG("cm %d: addr %#llx npage %lu\n",
-			cx, conf.mem[cx].base, conf.mem[cx].npage);
-		cx++;
-#endif /* ConfCrap */
-	}
-	DBG("%d %d %d\n", npg[0], npg[1], npg[2]);
-
-#ifdef ConfCrap
-	/*
-	 * Fill in more conf crap.
-	 * This is why I hate Plan 9.
-	 */
-	conf.upages = conf.npage;
-	i = (sys->vmend - sys->vmstart)/PGSZ;		/* close enough */
-	conf.ialloc = (i/2)*PGSZ;
-	DBG("npage %llu upage %lu kpage %d\n",
-		conf.npage, conf.upages, i);
-
-#endif /* ConfCrap */
-}
-
-void
-asmumeminit(void)
-{
-	Asm *assem;
-	extern void physallocdump(void);
-
-	for(assem = asmlist; assem != nil; assem = assem->next){
-		if(assem->type != AsmMEMORY)
-			continue;
-		physinit(assem->addr, assem->size);
-	}
-	physallocdump();
-}

+ 1 - 1
sys/src/9/amd64/cflags.json

@@ -2,6 +2,7 @@
 	{
 		"Name": "buildflags",
 		"Cflags": [
+			"-fpic",
 			"-ffreestanding",
 			"-fno-builtin",
 			"-fno-omit-frame-pointer",
@@ -9,7 +10,6 @@
 			"-g",
 			"-gdwarf-2",
 			"-ggdb",
-			"-mcmodel=small",
 			"-mno-red-zone",
 			"-O0",
 			"-static",

+ 3 - 2
sys/src/9/amd64/core.json

@@ -2,8 +2,8 @@
 	{
 		"Name": "core",
 		"Cflags": [
+			"-fpic",
 			"-fno-stack-protector",
-			"-mcmodel=kernel",
 			"-O0",
 			"-static",
 			"-mno-red-zone",
@@ -38,6 +38,7 @@
 			"-z",
 			"max-page-size=0x1000",
 			"-nostdlib",
+			"--no-relax",
 			"-g",
 			"-T",
 			"kernel.ld"
@@ -69,7 +70,6 @@
 			"apic.c",
 			"arch.c",
 			"archamd64.c",
-			"asm.c",
 			"backtrace.c",
 			"../port/dev9p.c",
 			"../port/devbridge.c",
@@ -95,6 +95,7 @@
 			"mpacpi.c",
 			"msi.c",
 			"multiboot.c",
+			"pamap.c",
 			"physalloc.c",
 			"pmcio.c",
 			"qmalloc.c",

+ 67 - 55
sys/src/9/amd64/dat.h

@@ -24,6 +24,7 @@ typedef struct Mach Mach;
 typedef uint64_t Mpl;
 typedef struct Page Page;
 typedef struct Pcidev Pcidev;
+typedef struct PAMap PAMap;
 typedef struct PFPU PFPU;
 typedef struct PmcCtr PmcCtr;
 typedef struct PmcCtl PmcCtl;
@@ -132,7 +133,7 @@ struct PFPU {
 #define NCOLOR 1
 struct PMMU
 {
-	Page*	mmuptp[4];		/* page table pages for each level */
+	Page	*root;
 };
 
 /*
@@ -204,8 +205,6 @@ struct MMMU
 {
 	uintptr_t cr2;
 	Page*	pml4;			/* pml4 for this processor */
-	PTE*	pmap;			/* unused as of yet */
-
 	Page	pml4kludge;		/* NIX KLUDGE: we need a page */
 };
 
@@ -225,10 +224,7 @@ enum
 struct ICC
 {
 	/* fn is kept in its own cache line */
-	union{
-		void	(*fn)(void);
-		unsigned char	_ln1_[ICCLNSZ];
-	};
+	alignas(ICCLNSZ) void	(*fn)(void);
 	int	flushtlb;	/* on the AC, before running fn */
 	int	rc;		/* return code from AC to TC */
 	char*	note;		/* to be posted in the TC after returning */
@@ -346,6 +342,7 @@ struct Mach
 	Sched *sch;
 	int load;
 };
+static_assert(sizeof(Mach) <= PGSZ, "Mach is too big");
 
 struct Stackframe
 {
@@ -354,11 +351,13 @@ struct Stackframe
 };
 
 /*
- * This is the low memory map, between 0x100000 and 0x110000.
- * It is located there to allow fundamental datastructures to be
+ * This is the low memory map, between 1MiB and 2MiB.
+ *
+ * It is located there to allow fundamental data structures to be
  * created and used before knowing where free memory begins
  * (e.g. there may be modules located after the kernel BSS end).
- * The layout is known in the bootstrap code in l32p.s.
+ * The layout is known in the bootstrap code in entry.S
+ *
  * It is logically two parts: the per processor data structures
  * for the bootstrap processor (stack, Mach, vsvm, and page tables),
  * and the global information about the system (syspage, ptrpage).
@@ -366,57 +365,74 @@ struct Stackframe
  * the unions.
  */
 struct Sys {
-	unsigned char	machstk[MACHSTKSZ];
+	alignas(4096) unsigned char machstk[MACHSTKSZ];
 
-	PTE	pml4[PTSZ/sizeof(PTE)];	/*  */
-	PTE	pdp[PTSZ/sizeof(PTE)];
-	PTE	pd[PTSZ/sizeof(PTE)];
-	PTE	pt[PTSZ/sizeof(PTE)];
+	PTE	ipml4[PTSZ/sizeof(PTE)];	// Only used very early in boot
+	PTE	epml4[PTSZ/sizeof(PTE)];	// Only used for ...
+	PTE	epml3[PTSZ/sizeof(PTE)];	// ...BSP initialization...
+	PTE	epml2[PTSZ/sizeof(PTE)][4];	// ...and AP early boot.
+	PTE	pml4[PTSZ/sizeof(PTE)];		// Real PML4
+	PTE	pml3[((128+64)*PTSZ)/sizeof(PTE)];
 
 	unsigned char	vsvmpage[4*KiB];
 
-	union {
-		Mach	mach;
-		unsigned char	machpage[MACHSZ];
-	};
-
-	union {
-		struct {
-			uint64_t	pmstart;	/* physical memory */
-			uint64_t	pmoccupied;	/* how much is occupied */
-			uint64_t	pmend;		/* total span */
-
-			uintptr_t	vmstart;	/* base address for malloc */
-			uintptr_t	vmunused;	/* 1st unused va */
-			uintptr_t	vmunmapped;	/* 1st unmapped va */
-			uintptr_t	vmend;		/* 1st unusable va */
-			uint64_t	epoch;		/* crude time synchronisation */
-
-			int		nc[NIXROLES];		/* number of online processors */
-			int		nmach;
-			int		load;
-			uint64_t	ticks;			/* of the clock since boot time */
-		};
-		unsigned char	syspage[4*KiB];
-	};
-
-	union {
-		Mach*	machptr[MACHMAX];
-		unsigned char	ptrpage[4*KiB];
-	};
-
-	uint64_t	cyclefreq;		/* Frequency of user readable cycle counter (mach 0) */
+	alignas(4096)	Mach	mach;
+
+	alignas(4096)	Mach	*machptr[MACHMAX];
+
+	uint64_t	pmstart;	/* physical memory */
+	uint64_t	pmend;		/* total span */
+
+	uint64_t	epoch;		/* crude time synchronisation */
+
+	int		nc[NIXROLES];	/* number of online processors */
+	int		nmach;
+	int		load;
+	uint64_t	ticks;		/* of the clock since boot time */
+
+	uint64_t	cyclefreq;	/* Frequency of user readable cycle counter (mach 0) */
 
 	uint	pgszlg2[NPGSZ];		/* per Mach or per Sys? */
 	uint	pgszmask[NPGSZ];	/* Per sys -aki */
 	uint	pgsz[NPGSZ];
 	int	npgsz;
-
-	unsigned char	_57344_[2][4*KiB];		/* unused */
 };
+static_assert(sizeof(Sys) <= (1*MiB-1*KiB), "Sys is too big");
 
-extern Sys *sys;
+extern Sys *const sys;
 #define MACHP(x) (sys->machptr[(x)])
+
+/*
+ * The Physical Address Map.  This describes the physical address
+ * space layout of the machine, also taking into account where the
+ * kernel is loaded, multiboot modules, etc.  Unused regions do not
+ * appear in the map.
+ */
+#define	PHYSADDRSIZE	(1ULL<<46)
+
+struct PAMap {
+	uintmem	addr;
+	usize	size;
+	int	type;
+	PAMap	*next;
+};
+
+enum {
+	PamNONE = 0,
+	PamMEMORY,
+	PamRESERVED,
+	PamACPI,
+	PamPRESERVE,
+	PamUNUSABLE,
+	PamDEV,
+	PamMODULE,
+	PamKTEXT,
+	PamKRDONLY,
+	PamKRDWR,
+};
+
+extern PAMap *pamap;
+
 /*
  * KMap
  */
@@ -426,8 +442,7 @@ extern KMap* kmap(Page*);
 #define kunmap(k)
 #define VA(k)		PTR2UINT(k)
 
-struct
-{
+struct {
 	Lock l;
 	int	nonline;			/* # of active CPUs */
 	int nbooting;			/* # of CPUs waiting for the bTC to go */
@@ -460,10 +475,7 @@ struct ISAConf {
  * the clock which is only maintained by the bootstrap processor (0).
  */
 
-extern uintptr_t kseg0;
-
-extern char*rolename[];
-
+extern char *rolename[];
 
 
 /*

+ 11 - 29
sys/src/9/amd64/devacpi.c

@@ -52,7 +52,7 @@ enum {
 static uint64_t lastpath;
 static PSlice emptyslice;
 static Atable **atableindex;
-static Rsdp *rsd;
+static Rsdp *rsd = nil;
 static Queue *acpiev;
 Dev acpidevtab;
 
@@ -467,9 +467,9 @@ static long regio(Reg *r, void *p, uint32_t len, uintptr_t off, int iswr)
 	switch (r->spc) {
 		case Rsysmem:
 			if (r->p == nil)
-				r->p = vmap(r->base, len);
+				r->p = KADDR(r->base);
 			if (r->p == nil)
-				error("regio: vmap/KADDR failed");
+				error("regio: KADDR failed");
 			rp = (uintptr_t) r->p + off;
 			rio = memio;
 			break;
@@ -517,17 +517,14 @@ static void *sdtmap(uintptr_t pa, size_t want, size_t *n, int cksum)
 {
 	Sdthdr *sdt;
 	Acpilist *p;
+
 	if(v)print("sdtmap %p\n", (void *)pa);
 	if (!pa) {
 		print("sdtmap: nil pa\n");
 		return nil;
 	}
+	sdt = KADDR(pa);
 	if (want) {
-		sdt = vmap(pa, want);
-		if (sdt == nil) {
-			print("acpi: vmap full table @%p/0x%x: nil\n", (void *)pa, want);
-			return nil;
-		}
 		/* realistically, we get a full page, and acpica seems to know that somehow. */
 		uintptr_t endaddress = (uintptr_t) sdt;
 		endaddress += want + 0xfff;
@@ -535,27 +532,16 @@ static void *sdtmap(uintptr_t pa, size_t want, size_t *n, int cksum)
 		want = endaddress - (uintptr_t)sdt;
 		*n = want;
 	} else {
-		sdt = vmap(pa, sizeof(Sdthdr));
-		if (sdt == nil) {
-			print("acpi: vmap header@%p/%d: nil\n", (void *)pa, sizeof(Sdthdr));
-			return nil;
+		if(v){
+			hexdump(sdt, sizeof(Sdthdr));
+			print("sdt %p\n", sdt);
 		}
-		//hexdump(sdt, sizeof(Sdthdr));
-		if(v)print("sdt %p\n", sdt);
-		if(v)print("get it\n");
 		*n = l32get(sdt->length);
 		if(v)print("*n is %d\n", *n);
 		if (*n == 0) {
 			print("sdt has zero length: pa = %p, sig = %.4s\n", pa, sdt->sig);
 			return nil;
 		}
-
-		sdt = vmap(pa, *n);
-		if (sdt == nil) {
-			print("acpi: vmap full table @%p/0x%x: nil\n", (void *)pa, *n);
-			return nil;
-		}
-		if(v)print("check it\n");
 		if (cksum != 0 && sdtchecksum(sdt, *n) != 0) {
 			print("acpi: %c%c%c%c: bad checksum. pa = %p, len = %lu\n", sdt->sig[0], sdt->sig[1], sdt->sig[2], sdt->sig[3], pa, *n);
 			return nil;
@@ -1464,14 +1450,12 @@ static char *seprinttable(char *s, char *e, Atable *t)
 	return seprint(s, e, "\n\n");
 }
 
-void *rsdsearch(void *start, uintptr_t size)
+void *
+rsdsearch(void *start, uintptr_t size)
 {
 	if (rsd != nil)
 		return rsd;
-	rsd = sigscan(start, size,  RSDPTR);
-	if (rsd != nil)
-		return rsd;
-	rsd = asmrsdp();
+	rsd = sigscan(start, size, RSDPTR);
 	return rsd;
 }
 
@@ -1569,8 +1553,6 @@ static void parsersdptr(void)
 	int asize, cksum;
 	uintptr_t sdtpa;
 
-//	static_assert(sizeof(Sdthdr) == 36);
-
 	/* Find the root pointer. */
 	/*
 	 * Search for the data structure signature:

+ 354 - 645
sys/src/9/amd64/entry.S

@@ -1,686 +1,395 @@
 #include "mem.h"
 #include "amd64.h"
-#ifndef __ASSEMBLER__
-#define __ASSEMBLER__
-#endif
-
-// N.B. on comments: /**/ are from the original NIX for the most part.
-// // comments are mostly Harvey ca. 2020.
-
-// This file is a pastiche of coreboot and NIX source, done in a way to get SOMETHING
-// that would work in ATT syntax, as opposed to Plan 9 syntax. We took the opportunity
-// to clean some things up. We broke Multiboot support for 5 years as a result.
-// To fix multiboot, the code moves eax, ebp to edi, esi to match the calling convention.
-// DO NOT USE edi and esi, or rdi and rsi, in any part of this code.
-// Yes, there is a stack, but it's best not to count on it being more than 8 bytes deep.
-
-// It gets REALLY ugly to try  to link this at some low address and then have the rest of the
-// kernel linked high. Really, really ugly. And that defines any attempt to load at a random
-// address. So, you have to learn to write position independent code here.
-// It will make you stronger. Assuming you survive the training.
-.code32
 
-#define pFARJMP32(s, o)	.byte 0xea;	.long o; .word s	/* far jump to ptr32:16 */
-
-
-/* do we enter in 16-bit mode? If so, take the code from coreboot that goes from
- * 16->32
- */
-/*
- * Enter here in 32-bit protected mode. Welcome to 1982.
- * Make sure the GDT is set as it should be:
- *	disable interrupts;
- *	load the GDT with the table in _gdt32p;
- *	load all the data segments
- *	load the code segment via a far jump.
- */
-#define MULTIBOOT_PAGE_ALIGN  (1<<0)
-#define MULTIBOOT_MEMORY_INFO (1<<1)
-#define MULTIBOOT_HEADER_MAGIC (0x1BADB002)
-#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_MEMORY_INFO | MULTIBOOT_PAGE_ALIGN)
-#define CHECKSUM (-(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS))
-
-# The kernel bootstrap (this code) is linked and loaded at physical address
-# 0x00100000 (1MB), which is the start of extended memory.  (See kernel.ld)
-
-# Flagging boottext to be text.  Check out:
-# http://sourceware.org/binutils/docs/as/Section.html
-.section .boottext, "awx"
+// It gets ugly to try to link this at some low address
+// and then have the rest of the kernel linked high; that
+// goes doubly for any attempt to load at a random address.
+//
+// So you have to learn to write position independent
+// code here.
+//
+// It will make you stronger.
+//
+// Assuming you survive the training.
+
+// Useful definitions.
+#define GdtNULL		(0<<3)
+#define GdtCODE64	(1<<3)
+#define GdtCODE32	(2<<3)
+#define GdtDATA32	(3<<3)
+
+#define SegREAD		(1<<41)
+#define SegWRITE	(1<<42)
+#define SegCODE		(1<<43)
+#define SegDATA		(0<<43)
+#define SegMB1		(1<<44)
+#define SegPRESENT	(1<<47)
+#define SegLONG		(1<<53)
+
+#define Seg32DEFAULT	(1<<54)
+#define Seg32GRAN	(1<<55)
+#define Seg32LIMIT	((0xF<<48)+0xFFFF)
+#define Seg32DEF	(Seg32DEFAULT|Seg32GRAN|Seg32LIMIT)
+
+#define MULTIBOOT_FLAG_PGALIGN	(1<<0)
+#define MULTIBOOT_FLAG_MEMINFO	(1<<1)
+#define MULTIBOOT_MAGIC		0x1BADB002
+#define MULTIBOOT_FLAGS		(MULTIBOOT_FLAG_PGALIGN | MULTIBOOT_FLAG_MEMINFO)
+#define MULTIBOOT_CHECKSUM	-(MULTIBOOT_MAGIC + MULTIBOOT_FLAGS)
 
-.code32
 .align 4
-_protected:
+.section .boottext, "awx"
 multiboot_header:
-.long MULTIBOOT_HEADER_MAGIC
-.long MULTIBOOT_HEADER_FLAGS
-.long CHECKSUM
+.long MULTIBOOT_MAGIC
+.long MULTIBOOT_FLAGS
+.long MULTIBOOT_CHECKSUM
 
-	.globl _start
+// When we get here we are in protected mode with a GDT.  We set
+// up IA32e mode and get into long mode with paging enabled.
+.code32
+.align 4
+.globl _start
 _start:
-	cli
-	jmp 1f
+	movl	$(start - KZERO), %ebp
+	jmpl	*%ebp
 
+.text
+.code32
+.align 4
+start:
+	cli
+	cld
 
-	/* This is the GDT for the ROM stage part of coreboot. It
-	 * is different from the RAM stage GDT which is defined in
-	 * c_start.S
-	 */
+	// Save the multiboot magic number.
+	movl	%eax, %ebp
+
+	// Make the basic page tables for CPU0 to map 0-4GiB
+	// physical to KZERO, in addition to an identity map
+	// for the switch from protected to paged mode.  There
+	// is an assumption here that the creation and later
+	// removal of the identity map will not interfere with
+	// the KZERO mappings.
+	//
+	// We assume a recent processor with Page Size Extensions
+	// and use two 2MiB entries.
+
+	// Zero the stack, page tables, vsvm, unused pages, m, sys, etc.
+	movl	$(KSYS-KZERO), %esi
+	movl	$((KTZERO-KSYS)/4), %ecx
+	xorl	%eax, %eax
+	movl	%esi, %edi
+	rep stosl
 
-	.align	4
-.globl gdtptr
-gdt:
-gdtptr:
-	.word	gdt_end - gdt -1 /* compute the table limit */
-	.long	gdt		 /* we know the offset */
-	.word	0
-
-	/* selgdt 0x08, flat code segment */
-	.word	0xffff, 0x0000
-	.byte	0x00, 0x9b, 0xcf, 0x00 /* G=1 and 0x0f, So we get 4Gbytes for limit */
-
-	/* selgdt 0x10,flat data segment */
-	.word	0xffff, 0x0000
-	.byte	0x00, 0x93, 0xcf, 0x00
-
-	/* long mode code segment. */
-	.quad	0x0020980000000000		/* Long mode CS */
-
-gdt_end:
-
-
-/*
- *	When we come here we are in protected mode. We expand
- *	the stack and copies the data segment from ROM to the
- *	memory.
- *
- *	After that, we call the chipset bootstrap routine that
- *	does what is left of the chipset initialization.
- *
- *	NOTE aligned to 4 so that we are sure that the prefetch
- *	cache will be reloaded.
- */
-	.align	4
-1:
-//	jmp 1b
-.globl protected_start
-protected_start:
-
-	lgdt	%cs:gdtptr
-	ljmp	$8, $__protected_start
-
-__protected_start:
-	// Save the multiboot args rdi,rsi; this matches
-	// the calling convention.
-	movl	%eax, %edi
-	movl	%ebx, %esi
-	movw	$0x10, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %ss
-	movw	%ax, %fs
-	movw	%ax, %gs
+	// Zero the real mode IVT.
+	movl	$0, %edi
+	movl	$1024, %ecx
+	rep stosb
 
-	/* Restore the BIST value to %eax */
-	movl	%ebp, %eax
+	// We could zero the BSS here, but the loader does it for us.
 
-entry32:
-1:
-	movb	$0x30, %al
-	movw $0x30, %dx
-	outb %dx
-// This gets us into a reasonable mode. We can skip the plan 9 gdt code.
-	call 1f
-1:
-	popl	%ebp
-	/* when you execute this instruction, bp has the value
-	 * of 1f.
-	 * So add the length of this instruction and the
-	 * 5 bytes of the jmp that follows it.
-	 * It will then point to start of header.
-	 */
-	addl $12, %ebp
-	/* Now make it point to gdt32p (gdt, 32 bits, physical)
-	 */
-	addl $14, %ebp
-	JMP _endofheader
-
-_startofheader:
-	.byte	0x90				/* NOP */
-	.byte	0x90				/* NOP */
-
-_multibootheader:	/* must be 4-byte aligned */
-	.long	0x1badb002			/* magic */
-	.long	0x00000003			/* flags */
-	.long	-(0x1badb002 + 0x00000003)	/* checksum */
-
-_gdt32p:
-	.quad	0x0000000000000000		/* NULL descriptor */
-	.quad	0x00cf9a000000ffff		/* CS */
-	.quad	0x00cf92000000ffff		/* DS */
-	.quad	0x0020980000000000		/* Long mode CS */
-
-_gdtptr32p:
-	.word	4*8-1
-	.long	_gdt32p
-
-_gdt64p:
-	.quad	0x0000000000000000		/* NULL descriptor */
-	.quad	0x0020980000000000		/* CS */
-
-_gdtptr64p:
-	.word	2*8-1
-	.quad	_gdt64p
-
-
-_endofheader:
-	pushl %eax				/* possible passed-in magic */
-
-/*
- * Make the basic page tables for CPU0 to map 0-16MiB physical
- * to KZERO, and include an identity map for the switch from protected
- * to paging mode. There's an assumption here that the creation and later
- * removal of the identity map will not interfere with the KZERO mappings;
- * the conditions for clearing the identity map are
- *	clear PML4 entry when (KZER0 & 0x0000ff8000000000) != 0;
- *	clear PDP entry when (KZER0 & 0x0000007fc0000000) != 0;
- *	don`t clear PD entry when (KZER0 & 0x000000003fe00000) == 0;
- * the code below assumes these conditions are met.
- *
- * Assume a recent processor with Page Size Extensions
- * and use two 2MiB entries.
- */
-/*
- * The layout is decribed in data.h:
- *	_protected:	start of kernel text
- *	- 4*KiB		unused
- *	- 4*KiB		unused
- *	- 4*KiB		ptrpage
- *	- 4*KiB		syspage
- *	- MACHSZ	m
- *	- 4*KiB		vsvmpage for gdt, tss
- *	- PTSZ		PT for PMAPADDR		unused - assumes in KZERO PD
- *	- PTSZ		PD
- *	- PTSZ		PDP
- *	- PTSZ		PML4
- *	- MACHSTKSZ	stack
- */
-
-/*
- * Macros for accessing page table entries; change the
- * C-style array-index macros into a page table byte offset
- */
-#define PML4O(v)	((PTLX((v), 3))<<3)
-#define PDPO(v)		((PTLX((v), 2))<<3)
-#define PDO(v)		((PTLX((v), 1))<<3)
-#define PTO(v)		((PTLX((v), 0))<<3)
-
-_warp64:
-	// WARNING edi and esi usage
-	// We use the stosl below, which requires esi and edi.
-	// We need to save them, and we can not use eax or ecx.
-	// We can, however, use edx and ebp; do so.
-	movl	%edi, %edx
-	// NO CALL OR PUSH/POP AFTER THIS POINT.
-	movl 	%esi, %esp
-	movl	$_protected-(MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ/*+KZERO*/), %esi
-
-	// Don't zero the lowest two pages, they typically contain
-	// multiboot info. TODO: don't zero the stack. Code that depends
-	// on stack variables being zero'd is buggy by definition.
-	movl	%esi, %edi
-	addl    $8192, %edi
-	xorl	%eax, %eax
-	movl	$((MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ)>>2), %ecx
-	subl    $2048, %ecx
+	// Set the stack and find the start of the page tables.
+	movl	%esi, %eax
+	addl	$MACHSTKSZ, %eax
+	movl	%eax, %esp			// Give ourselves a stack
 
-	cld
-	rep;	stosl				/* stack, P*, vsvm, m, sys */
-	movl	%esi, %eax			/* sys-KZERO */
-
-	movl	%edx, %edi
-	// END WARNING edi usage.
-	// ESI is still used!
-	// NO CALL OR PUSH/POP UNTIL rsp IS LOADED BELOW
-	addl	$(MACHSTKSZ), %eax		/* PML4 */
-	movl	%eax, %CR3			/* load the mmu */
+	// %eax points to the PML4 that we'll use for double-mapping
+	// low RAM and KZERO.
+	movl	%eax, %cr3			// load the MMU; paging still disabled
 	movl	%eax, %edx
-	addl	$(PTSZ|PteRW|PteP), %edx	/* PDP at PML4 + PTSZ */
-	movl	%edx, PML4O(0)(%eax)		/* PML4E for identity map */
-	movl	%edx, PML4O(KZERO)(%eax)	/* PML4E for KZERO, PMAPADDR */
-
-	addl	$PTSZ, %eax			/* PDP at PML4 + PTSZ */
-	addl	$PTSZ, %edx			/* PD at PML4 + 2*PTSZ */
-	movl	%edx, PDPO(0)(%eax)		/* PDPE for identity map */
-	movl	%edx, PDPO(KZERO)(%eax)		/* PDPE for KZERO, PMAPADDR */
-
-	addl	$PTSZ, %eax				/* PD at PML4 + 2*PTSZ */
-	movl	$(PtePS|PteRW|PteP), %edx
-	movl	%edx, PDO(0)(%eax)			/* PDE for identity 0-2MiB */
-	movl	%edx, PDO(KZERO)(%eax)			/* PDE for KZERO 0-2MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+1*PGLSZ(1))(%eax)	/* PDE for KZERO 4-6MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+2*PGLSZ(1))(%eax)	/* PDE for KZERO 4-6MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+3*PGLSZ(1))(%eax)	/* PDE for KZERO 6-8MiB */
-
-	// and up through 16 (and on to 64). This sucks, we'll make it better later. //
-	// Note that at some point, we create the whole map.
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+4*PGLSZ(1))(%eax)	/* PDE for KZERO 8-10MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+5*PGLSZ(1))(%eax)	/* PDE for KZERO 10-12MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+6*PGLSZ(1))(%eax)	/* PDE for KZERO 12-14MiB */
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+7*PGLSZ(1))(%eax)	/* PDE for KZERO 14-16MiB */
-
-	// and up to 64! I tried a loop but it's harder than I thought.
-	// better code welcome.
-	// Warning: you've only got 256M to work with. This is a foundational mistake,
-	// and it's way harder to fix than I thought. So, ugly.
-	// 16 -> 32
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+8*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+9*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+10*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+11*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+12*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+13*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+14*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+15*PGLSZ(1))(%eax)
-
-	// 32 -> 48
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+16*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+17*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+18*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+19*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+20*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+21*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+22*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+23*PGLSZ(1))(%eax)
-
-	// 48 -> 64
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+24*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+25*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+26*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+27*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+28*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+29*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+30*PGLSZ(1))(%eax)
-	addl	$PGLSZ(1), %edx
-	movl	%edx, PDO(KZERO+31*PGLSZ(1))(%eax)
-
-	movl	%eax, %edx			/* PD at PML4 + 2*PTSZ */
-	addl	$(PTSZ|PteRW|PteP), %edx	/* PT at PML4 + 3*PTSZ */
-	movl	%edx, PDO(PMAPADDR)(%eax)	/* PDE for PMAPADDR */
-
-/*
- * Enable and activate Long Mode. From the manual:
- * 	make sure Page Size Extentions are off, and Page Global
- *	Extensions and Physical Address Extensions are on in CR4;
- *	set Long Mode Enable in the Extended Feature Enable MSR;
- *	set Paging Enable in CR0;
- *	make an inter-segment jump to the Long Mode code.
- * It`s all in 32-bit mode until the jump is made.
- */
-lme:
+	addl	$(2*PTSZ|PteRW|PteP), %edx	// EPML3 at IPML4 + 2*PTSZ
+	movl	%edx, (%eax)			// IPML4E for identity map
+	movl	%edx, 2048(%eax)		// IPML4E for KZERO
+
+	// The next page frame contains a PML4 that removes the double
+	// mapping, leaving only KZERO mapped.
+	addl	$PTSZ, %eax			// EPML4 at IPML4 + PTSZ
+	movl	%edx, 2048(%eax)		// EPML4E for EMPL3 at KZERO
+
+	// Fill in the early PML3 (PDPT) to point the early PML2's (PDs)
+	// that provide the initial 4GiB mapping in the kernel.
+	addl	$PTSZ, %eax			// EPML3 at EPML4 + PTSZ
+	addl	$PTSZ, %edx			// EPML2[0] at EPML3 + PTSZ
+	movl	%edx, (%eax)			// EPML3E for EPML2[0]
+	addl	$PTSZ, %edx			// EPML2[1] at EPML2[0] + PTSZ
+	movl	%edx, 8(%eax)			// EPML3E for EPML2[1]
+	addl	$PTSZ, %edx			// EPML2[2] at EPML2[1] + PTSZ
+	movl	%edx, 16(%eax)			// EPML3E for EPML2[2]
+	addl	$PTSZ, %edx			// EPML2[3] at EPML2[2] + PTSZ
+	movl	%edx, 24(%eax)			// EPML3E for EPML2[3]
+
+	// Map the first 4GiB (the entire 32-bit) address space.
+	// Note that this requires 16KiB.
+	//
+	// The first 2MiB are mapped using 4KiB pages.  The first 1MiB
+	// memory contains holes for MMIO and ROM and other things that
+	// we want special attributes for.  We'll set those in the
+	// kernel proper, but we provide 4KiB pages here.  There is 4KiB
+	// of RAM for the PT immediately after the PDs.
+	addl	$PTSZ, %eax			// PML2[0] at PML3[0] + PTSZ
+	movl	$2048, %ecx			// 2048 * 2MiB pages covers 4GiB
+	movl	$(PtePS|PteRW|PteP), %edx	// Large page PDEs
+1:	movl	%edx, (%eax)			// PDE for 2MiB pages
+	addl	$8, %eax
+	addl	$(2<<20), %edx
+	subl	$1, %ecx
+	test	%ecx, %ecx
+	jnz	1b
+
+	// %eax now points to the page after the EPML2s, which is the real
+	// self-referential PML4.
+	// Map the first 192 entries for the upper portion of the address
+	// to PML3s; this is the primordial root of sharing for the kernel.
+	movl	%eax, %edx
+	addl	$(PTSZ|PteRW|PteP), %edx	// PML3[0] at PML4 + PTSZ
+	movl	$256, %ecx
+1:	movl	%edx, (%eax, %ecx, 8)
+	addl	$PTSZ, %edx
+	incl	%ecx
+	cmp	$(256+192), %ecx
+	jne	1b
+
+	// Enable and activate Long Mode.  From the manual:
+	// make sure Page Size Extentions are off, and Page Global
+	// Extensions and Physical Address Extensions are on in CR4;
+	// set Long Mode Enable in the Extended Feature Enable MSR;
+	// set Paging Enable in CR0;
+	// make an inter-segment jump to the Long Mode code.
+	// It`s all in 32-bit mode until the jump is made.
 	movl	%cr4, %eax
-	ANDL	$~Pse, %eax			/* Page Size */
-	ORL	$(Pge|Pae), %eax		/* Page Global, Phys. Address */
+	andl	$~Pse, %eax			// Page Size
+	orl	$(Pge|Pae), %eax		// Page Global, Phys. Address
 	movl	%eax, %cr4
 
-	movl	$Efer, %ecx			/* Extended Feature Enable */
-	RDMSR
-	ORL	$Lme, %eax			/* Long Mode Enable */
-	WRMSR
+	movl	$Efer, %ecx			// Extended Feature Enable
+	rdmsr
+	orl	$Lme, %eax			// Long Mode Enable
+	orl	$Nxe, %eax			// Long Mode Enable
+	wrmsr
 
 	movl	%cr0, %edx
-	ANDL	$~(Cd|Nw|Ts|Mp), %edx
-	ORL	$(Pg|Wp), %edx			/* Paging Enable */
+	andl	$~(Cd|Nw|Ts|Mp), %edx
+	orl	$(Pg|Wp), %edx			// Paging Enable
 	movl	%edx, %cr0
-	ljmp $0x18, $_identity
-	//pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity-KZERO)
-
-/*
- * Long mode. Welcome to 2003.
- * Jump out of the identity map space;
- * load a proper long mode GDT.
- */
+
+	// Load the 64-bit GDT
+	movl	$(gdtdesc-KZERO), %eax
+	lgdt	(%eax)
+
+	ljmpl	$GdtCODE64, $(1f-KZERO)
+
 .code64
+1:
+	// Long mode. Welcome to 2003.  Jump out of the identity map
+	// and into the kernel address space.
 
-_identity:
-	movq	$_start64v, %rax
-	JMP	*%rax
-.section .text
-_gdt64v:
-	.quad	0x0000000000000000		/* NULL descriptor */
-	.quad	0x0020980000000000		/* CS */
-
-_gdtptr64v:
-	.word	3*8-1
-	.quad	_gdt64v
-
-// At this point, we are safe to use kernel addresses, as we are in
-// kernel virtual address space.
-_start64v:
-	movq	$_gdtptr64v, %rax
+	// Load a 64-bit GDT in the kernel address space.
+	movabsq	$gdtdescv, %rax
 	lgdt	(%rax)
 
-	XORQ	%rdx, %rdx
-	movw	%dx, %ds			/* not used in long mode */
-	movw	%dx, %es			/* not used in long mode */
+	// Zero out the segment registers: they are not used in long mode.
+	xorl	%edx, %edx
+	movw	%dx, %ds
+	movw	%dx, %es
 	movw	%dx, %fs
 	movw	%dx, %gs
-	movw	%dx, %ss			/* not used in long mode */
-
-	movq	%rsi, %rsi			/* sys-KZERO */
-	movq	%rsi, %rax
-	addq	$KZERO, %rax
-	movq	%rax, sys			/* sys */
-
-	addq	$(MACHSTKSZ), %rax		/* PML4 and top of stack */
-	// put multiboot args back.
-	// NO USE OF rbp PAST THIS POINT.
-	movq	%rsp, %rbp
-	movq	%rax, %rsp			/* set stack */
-	// YOU CAN NOW USE THE STACK AGAIN.
-
-// Don't undo this until all APs are started. Then we don't need to bother
-// having the APs remap it. Save work.
-	// OK, this part is called "we climbed up the tree on a ladder, now pull
-	// the ladder up after us.". We remove the identity mapping.
-_zap0pml4:
-	cmpq	$PML4O(KZERO), %rdx		/* KZER0 & 0x0000ff8000000000 */
-	JE	_zap0pdp
-	//movq	%rdx, PML4O(0)(%rax) 		/* zap identity map PML4E */
-_zap0pdp:
-	addq	$PTSZ, %rax			/* PDP at PML4 + PTSZ */
-	cmpq	$PDPO(KZERO), %rdx 		/* KZER0 & 0x0000007fc0000000 */
-	JE	_zap0pd
-	//movq	%rdx, PDPO(0)(%rax)		/* zap identity map PDPE */
-_zap0pd:
-	addq	$PTSZ, %rax			/* PD at PML4 + 2*PTSZ */
-	cmpq	$PDO(KZERO), %rdx		/* KZER0 & 0x000000003fe00000 */
-	JE	_zap0done
-	//movq	%rdx, PDO(0)(%rax)		/* zap identity map PDE */
-_zap0done:
-	// now for the scary part. In some sense, all page table zapping to date
-	// has been theoretical. This is going to flush it. If we survive this ...
-
-	addq	$(MACHSTKSZ), %rsi		/* PML4-KZERO */
-	movq	%rsi, %CR3			/* flush TLB */
-
-	addq	$(2*PTSZ+4*KiB), %rax		/* PD+PT+vsvm */
-	movq	%rax, entrym
-	movq	$0, (%rax) 			/* machp()->machno = 0 */
-
-	PUSHQ	%rdx				/* clear flags */
-	POPFQ
-
-	movq	%rbp, %rsi			/* expand multiboot args to 64 bits */
-	movq	%rdi, %rdi			/* multiboot magic */
-	CALL	main
+	movw	%dx, %ss
 
+	// We can now use linked addresses for the stack and code.
+	// We'll jump into the kernel from here.
+	movabsq	$KZERO, %rax
+	addq	%rax, %rsp
+	movabsq	$warp64, %rax
+	jmp	*%rax
+
+.text
+.code64
+warp64:
+	// At this point, we are fully in the kernel virtual
+	// address space and we can discard the identity mapping.
+	// There is a PML4 sans identity map 4KiB beyond the
+	// current PML4; load that, which also flushes the TLB.
+	movq	%cr3, %rax
+	addq	$PTSZ, %rax
+	movq	%rax, %cr3			// Also flushes TLB.
+
+	// &sys->mach is the first argument to main()
+	movabsq	$KSYS, %rdi
+	addq	$(MACHSTKSZ+(1+1+1+4+1+192)*PTSZ+PGSZ), %rdi
+	movq	%rbp, %rsi			// multiboot magic
+	movq	%rbx, %rdx			// multiboot info pointer
+
+	// Push a dummy stack frame and jump to `main`.
+	pushq	$0
+	movq	$0, %rbp
+	leaq	main(%rip), %rax
+	push	%rax
+	pushq	$2				// clear flags
+	popfq
+	ret
+	ud2
+
+// no deposit, no return
+// do not resuscitate
 .globl ndnr
-ndnr:	/* no deposit, no return */
-	/* do not resuscitate */
-_dnr:
+ndnr:
 	sti
 	hlt
-	JMP	_dnr				/* do not resuscitate */
-
-	// SIPI startup handler. The first bits of this code, which are 16-bit, are copied
-	// to 0x3000. That code jumps to the 32-bit entry point right after the lgdt, which is in
-	// the normal place, no need to copy it. If this works, it's a lot more compact
-	// than what Plan 9 used to do.
-	/*
- * Start-up request IPI handler.
- *
- * This code is executed on an application processor in response to receiving
- * a Start-up IPI (SIPI) from another processor.
- * This must be placed on a 4KiB boundary
- * somewhere in the 1st MiB of conventional memory. However,
- * due to some shortcuts below it's restricted further to within the 1st 64KiB.
- * The AP starts in real-mode, with
- *   CS selector set to the startup memory address/16;
- *   CS base set to startup memory address;
- *   CS limit set to 64KiB;
- *   CPL and IP set to 0.
- */
-
-/*
- * Real mode. Welcome to 1978.
- * Load a basic GDT, turn on protected mode and make
- * inter-segment jump to the protected mode code.
- */
-	.align 4096
-.code32
-.globl b1978
+	jmp	ndnr
+
+// Start-up request IPI handler.
+//
+// This code is executed on an application processor in response
+// to receiving a Start-up IPI (SIPI) from another processor.  The
+// vector given in the SIPI determines the memory address the
+// where the AP starts execution.
+//
+// The AP starts in real-mode, with
+//   CS selector set to the startup memory address/16;
+//   CS base set to startup memory address;
+//   CS limit set to 64KiB;
+//   CPL and IP set to 0.
+//
+// This must be placed on a 4KiB boundary, and while it may seem
+// like this should be in a text section, it is deliberately not.
+// The AP entry code is copied to a page in low memory at APENTRY
+// for execution, so as far as the rest of the kernel is concerned
+// it is simply read-only data.  We put it into .rodata so that it
+// is mapped onto a non-executable page and the kernel cannot
+// accidentally jump into it once it is running in C code on a
+// real page table.
+//
+// The 16-bit code loads a basic GDT, turns on 32-bit protected
+// mode and makes an inter-segment jump to the protected mode code
+// right after.
+//
+// 32-bit code enables long mode and paging, sets a stack and
+// jumps to 64-bit mode, which fixes up virtual addresses for
+// the stack and PC and jumps into C.
+
+#define APENTRY		0x3000
+#define APPERCPU	(0x4000-8)
+
+.section .rodata
+
+.globl b1978, e1978
+.code16
+.align 4096
 b1978:
-_sipistartofheader:
-	NOP; NOP; NOP
-	.quad	0xa5a5a5a5a5a5a5a5
-// real mode gdt located in low 64k
-// GOT TO THIS LOOP
-//1: jmp 1b
-	// clang stupidity. Or smartness. It can't do .code16!
-	.byte 0xfa //cli
-	.byte 0x66, 0x31, 0xc0 //xorl	%eax, %eax
-	.byte 0x0f, 0x22, 0xd8 // movl	%eax, %cr3 // invalidate tlb
-	.byte 0x8e, 0xd8 //movw	%ax, %ds
-	.byte 0x8c, 0xc8 // movw	%cs, %ax
-	.byte 0xbb, 0x80, 0x30 //movw	$0x3080, %bx
-	.byte 0x67, 0x66, 0x0f, 0x01, 0x13 // data32 lgdt (%ebx)
-
-	.byte 0x0f, 0x20, 0xc0 //movl	%cr0, %eax
-	.byte 0x66, 0x25, 0xd1, 0xff, 0xfa, 0x7f // andl	$0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
-	.byte 0x66, 0x0d, 0x01, 0x00, 0x00, 0x60 // orl	$0x60000001, %eax /* CD, NW, PE = 1 */
-	.byte 0x0f, 0x22, 0xc0 // movl	%eax, %cr0
-
-	//ljmpl $8, $0x3040
-	.byte 0x66, 0xea // ljmpl, 066 prefix since we're 16 bits
-	.byte 0x40, 0x30, 0x00, 0x00 // 32 bit offset
-	.byte 0x08, 0x00 // 16 bit segment
-.align 32
+	// We start here in real mode.  Welcome to 1978.
+	cli
+	cld
+
+	lgdtl	(APENTRY+(apgdtdesc-b1978))
+
+	movl	%cr0, %eax
+	orl	$Pe, %eax
+	movl	%eax, %cr0
+
+	ljmpl   $GdtCODE32, $(b1982-KZERO)
+
+.align 16
+gdt:
+// 0: Null segment
+.quad	0
+// 8: Kernel 64-bit code segment
+.quad	(SegREAD|SegCODE|SegMB1|SegPRESENT|SegLONG)
+// 16: Kernel 32-bit code segment (for bootstrapping APs)
+.quad	(SegREAD|SegCODE|SegMB1|SegPRESENT|Seg32DEF)
+// 24: Kernel 32-bit data segment (for bootstrapping APs)
+.quad	(SegREAD|SegWRITE|SegMB1|SegPRESENT|Seg32DEF)
+egdt:
+
+.skip 6
+apgdtdesc:
+.word	egdt - gdt - 1
+.long	(APENTRY+gdt-b1978)
 
-.code32
-	movw	$0x10, %ax
-	MOVW	%AX, %DS
-	MOVW	%AX, %ES
-	MOVW	%AX, %FS
-	MOVW	%AX, %GS
-	MOVW	%AX, %SS
-	/* Now that we are in protected mode jump to a 32 bit code segment. */
-	ljmpl	$8, $_approtected
-.align	64
-gdt78:
-gdtptr78:
-	.word	4*8-1
-	.long 0x3080 // $gdt78-$b1978
-	.word 0 // unused
-	/* selgdt 0x08, flat code segment */
-	.word	0xffff, 0x0000
-	.byte	0x00, 0x9b, 0xcf, 0x00 /* G=1 and 0x0f, So we get 4Gbytes for limit */
-
-	/* selgdt 0x10,flat data segment */
-	.word	0xffff, 0x0000
-	.byte	0x00, 0x93, 0xcf, 0x00
-	.quad	0x0020980000000000		/* Long mode CS */
-gdt78_end:
-.global e1978
 e1978:
 
-/*
- * Protected mode. Welcome to 1982.
- * Get the local APIC ID from the memory mapped APIC;
-#ifdef UseOwnPageTables
- * load the PDB with the page table address, which is located
- * in the word immediately preceeding _real<>-KZERO(SB);
- * this is also the (physical) address of the top of stack;
-#else
- * load the PML4 with the shared page table address;
-#endif
- * make an identity map for the inter-segment jump below,
- * using the stack space to hold a temporary PDP and PD;
- * enable and activate long mode;
- * make an inter-segment jump to the long mode code.
- */
-.section .boottext, "awx"
+.text
 .code32
+b1982:
+	// Protected mode. Welcome to 1982.
+	movw	$GdtDATA32, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %gs
+	movw	%ax, %ss
 
-/*
- * Macros for accessing page table entries; must turn
- * the C-style array-index macros into a page table byte
- * offset.
- */
-#define PML4O(v)	((PTLX((v), 3))<<3)
-#define PDPO(v)		((PTLX((v), 2))<<3)
-#define PDO(v)		((PTLX((v), 1))<<3)
-#define PTO(v)		((PTLX((v), 0))<<3)
-
-_approtected:
-	MOVL	$0xfee00000, %ebp	/* apicbase */
-	MOVL	0x20(%eBP), %eBP	/* Id */
-	SHRL	$24, %eBP		/* becomes RARG later */
-
-#ifdef UseOwnPageTables
-	MOVL	$_real<>-KZERO(SB), AX
-	MOVL	-4(AX), %eSI			/* page table PML4 */
-#else
-	MOVL	$(0x00100000+MACHSTKSZ), %eSI	/* page table PML4 */
-#endif
-	// endif before
-	MOVL	%eSI, %eAX
-	MOVL	%eAX, %CR3			/* load the mmu */
-#if 0
-	MOVL	%eAX, %eDX
-	SUBL	$MACHSTKSZ, %eDX		/* PDP for identity map */
-	ADDL	$(PteRW|PteP), %eDX
-	MOVL	%eDX, PML4O(0)(%eAX)		/* PML4E for identity map */
-
-	SUBL	$MACHSTKSZ, %eAX		/* PDP for identity map */
-	ADDL	$PTSZ, %eDX
-	MOVL	%eDX, PDPO(0)(%eAX)		/* PDPE for identity map */
-	MOVL	$(PtePS|PteRW|PteP), %edX
-	ADDL	$PTSZ, %eAX			/* PD for identity map */
-	MOVL	%eDX, PDO(0)(%eAX)		/* PDE for identity 0-[24]MiB */
-#endif
-
-/*
- * Enable and activate Long Mode. From the manual:
- * 	make sure Page Size Extentions are off, and Page Global
- *	Extensions and Physical Address Extensions are on in CR4;
- *	set Long Mode Enable in the Extended Feature Enable MSR;
- *	set Paging Enable in CR0;
- *	make an inter-segment jump to the Long Mode code.
- * It's all in 32-bit mode until the jump is made.
- */
-aplme:
-	MOVL	%CR4, %eAX
-	ANDL	$~Pse, %eAX			/* Page Size */
-	ORL	$(Pge|Pae), %eAX		/* Page Global, Phys. Address */
-	MOVL	%eAX, %CR4
-
-	MOVL	$Efer, %eCX			/* Extended Feature Enable */
-	RDMSR
-	ORL	$Lme, %eAX			/* Long Mode Enable */
-	WRMSR
-
-	MOVL	%CR0, %eDX
-	ANDL	$~(Cd|Nw|Ts|Mp), %eDX
-	ORL	$(Pg|Wp), %eDX			/* Paging Enable */
-	MOVL	%eDX, %CR0
-
-	ljmp $0x18, $_apidentity
-
-/*
- * Long mode. Welcome to 2003.
- * Jump out of the identity map space;
- * load a proper long mode GDT;
- * zap the identity map;
- * initialise the stack and call the
- * C startup code in m->splpc.
- */
-.code64
+	// load the PML4 with the shared page table address;
+	// make an identity map for the inter-segment jump below,
+	// using the stack space to hold a temporary PDP and PD;
+	// enable and activate long mode;
+	// make an inter-segment jump to the long mode code.
+	movl	$(KSYS-KZERO+MACHSTKSZ), %eax	// Page table
+	movl	%eax, %cr3			// load the mmu
+
+	// Enable and activate Long Mode.
+	movl	%cr4, %eax
+	andl	$~Pse, %eax			// Page Size
+	orl	$(Pge|Pae), %eax		// Page Global, Phys. Address
+	movl	%eax, %cr4
+
+	movl	$Efer, %ecx			// Extended Feature Enable
+	rdmsr
+	orl	$Lme, %eax			// Long Mode Enable
+	orl	$Nxe, %eax			// Long Mode Enable
+	wrmsr
 
-_apidentity:
-	MOVQ	$_apstart64v, %rAX
-	JMP	*%rAX
+	movl	%cr0, %edx
+	andl	$~(Cd|Nw|Ts|Mp), %edx
+	orl	$(Pg|Wp), %edx			// Paging Enable
+	movl	%edx, %cr0
 
-.section .text
-_apstart64v:
-	MOVQ	$_gdtptr64v, %rAX
+	ljmp	$GdtCODE64, $(1f-KZERO)
 
+.code64
+1:
+	movq	APPERCPU, %rdi
+	addq	$MACHSTKSZ, %rdi
+	movq	%rdi, %rsp			// set stack
+	addq	$(PTSZ+PGSZ), %rdi		// Mach *
+
+	movabsq	$apwarp64, %rax
+	pushq	%rax
+	ret
+	ud2
+
+apwarp64:
+	movabsq	$gdtdescv, %rax
 	lgdt	(%rax)
-	XORQ	%rDX, %rDX
-	MOVW	%DX, %DS			/* not used in long mode */
-	MOVW	%DX, %ES			/* not used in long mode */
-	MOVW	%DX, %FS
-	MOVW	%DX, %GS
-	MOVW	%DX, %SS			/* not used in long mode */
-
-	movq	%rsi, %rsi			/* PML4-KZERO */
-	MOVQ	%rsI, %rAX
-	ADDQ	$KZERO, %rAX			/* PML4 and top of stack */
-
-	MOVQ	%rAX, %rSP			/* set stack */
-
-	// DON'T ZAP.
-	// DO IT LATER.
-	//MOVQ	%rDX, PML4O(0)(%rAX)		/* zap identity map */
-
-	MOVQ	%rSI, %CR3			/* flush TLB */
-#ifndef UseOwnPageTables
-	/*
-	 * SI still points to the base of the bootstrap
-	 * processor page tables.
-	 * Want to use that for clearing the identity map,
-	 * but want to use the passed-in address for
-	 * setting up the stack and Mach.
-	 */
-	// oh, barf.
-//	MOVQ	$_real, %rAX
-	MOVQ	$0x3000, %rAX
-	MOVL	-4(%rAX), %eSI			/* PML4 */
-	MOVq	%rSI, %rSI			/* PML4-KZERO */
-#endif
-	MOVQ	%rSI, %rAX
-	ADDQ	$KZERO, %rAX			/* PML4 and top of stack */
-
-	MOVQ	%rAX, %rSP			/* set stack */
-
-	PUSHQ	%rDX				/* clear flags */
-	POPFQ
-
-	// put this in %rdx so it can be the third argument. We need to write it into
-	// %gs
-	ADDQ	/*$4*PTSZ+$4*KiB*/$0x5000, %raX	/* PML4+PDP+PD+PT+vsvm */
-	MOVq	%rbp, %rdi			/* APIC ID */
-	movq	%rax, %rsi			/* Mach * */
-
-	MOVQ	8(%rsi), %rAX			/* m->splpc */
-	xorq	%rbp, %rbp			/* stack trace ends here */
-	CALL	*%raX				/* CALL squidboy(SB) */
+
+	xorl	%edx, %edx
+	movw	%dx, %ds
+	movw	%dx, %es
+	movw	%dx, %fs
+	movw	%dx, %gs
+	movw	%dx, %ss
+
+	movq	%cr3, %rax
+	addq	$(7*PTSZ), %rax
+	movq	%rax, %cr3			// flush TLB
+
+	pushq	$0
+	movq	$0, %rbp
+	movq	8(%rdi), %rax			// m->splpc
+	pushq	%rax
+	pushq	$2				// Clear flags
+	popfq
+	ret					// Call squidboy
+	ud2
+
+.section .rodata
+
+.align 16
+.skip 6
+gdtdesc:
+.word	egdt - gdt - 1
+.long	(gdt-KZERO)
+
+.align 16
+.skip 6
+gdtdescv:
+.word	egdt - gdt - 1
+.quad	gdt

+ 27 - 12
sys/src/9/amd64/fns.h

@@ -29,12 +29,6 @@ void	acmodeset(int);
 void	archfmtinstall(void);
 void	archidle(void);
 int	archmmu(void);
-int	asmfree(uintmem, uintmem, int);
-uint64_t	asmalloc(uintmem, uintmem, int, int);
-void	asminit(void);
-void	asmmapinit(uintmem, uintmem, int);
-extern void asmmodinit(uint32_t, uint32_t, char*);
-void *asmrsdp(void);
 void	noerrorsleft(void);
 void	archinit(void);
 void	archreset(void);
@@ -55,8 +49,8 @@ int	dbgprint(char*, ...);
 int	decref(Ref*);
 void	delay(int);
 void	dumpmmu(Proc*);
-void	dumpmmuwalk(uint64_t pa);
-void	dumpptepg(int lvl,uintptr_t pa);
+void	dumpmmuwalk(const PTE *pml4, uint64_t pa);
+void	dumpptepg(int lvl, uintptr_t pa);
 #define	evenaddr(x)				/* x86 doesn't care */
 int	fpudevprocio(Proc*, void*, int32_t, uintptr_t, int);
 void	fpuinit(void);
@@ -113,11 +107,12 @@ void	mapupainit(uint64_t, uint32_t);
 void	meminit(void);
 void	mfence(void);
 void	mmuflushtlb(void);
+void	mmukflushtlb(void);
 void	mmuinit(void);
-uintptr_t	mmukmap(uintptr_t, uintptr_t, usize);
+void	mmukphysmap(PTE *pml4, uintmem, PTE, usize);
 int	mmukmapsync(uint64_t);
-uintmem	mmuphysaddr(uintptr_t);
-int	mmuwalk(PTE*, uintptr_t, int, PTE**, PTE (*)(usize));
+uintmem	mmuphysaddr(const PTE *pml4, uintptr_t);
+int	mmuwalk(const PTE *pml4, uintptr_t, int, const PTE**);
 int	multiboot(uint32_t, uint32_t, int);
 void	ndnr(void);
 unsigned char	nvramread(int);
@@ -148,6 +143,10 @@ void	pcisetmwi(Pcidev*);
 int	pcisetpms(Pcidev*, int);
 void pcishowdev(Pcidev*);
 int	pickcore(int, int);
+void	pamapdump(void);
+void	pamapinit(void);
+void	pamapinsert(uintmem, usize, int);
+void	pamapmerge(void);
 void	printcpufreq(void);
 void	putac(Mach*);
 void *rsdsearch(void *start, uintptr_t size);
@@ -183,6 +182,7 @@ extern uint64_t cr0get(void);
 extern void cr0put(uint64_t);
 extern uint64_t cr2get(void);
 extern uint64_t cr3get(void);
+extern uintmem pml4get(void);
 extern void cr3put(uint64_t);
 extern uint64_t cr4get(void);
 extern void cr4put(uint64_t);
@@ -283,9 +283,24 @@ void DONE(void);
 
 /* all these go to 0x3f8 */
 void hi(char *s);
+void hihex(uint64_t x);
 
 Mach *machp(void);
-Proc *externup(void);
+static inline Proc *externup(void)
+{
+	Mpl pl = splhi();
+	Proc *proc = machp()->externup;
+	splx(pl);
+	return proc;
+}
+
+static inline int machno()
+{
+	Mpl pl = splhi();
+	int no = machp()->machno;
+	splx(pl);
+	return no;
+}
 
 /* temporary. */
 void die(char *);

+ 0 - 1
sys/src/9/amd64/gcc.json

@@ -3,7 +3,6 @@
 		"Name": "buildflags",
 		"Cflags": [
 			"-fcommon",
-			"-fno-pie",
 			"-fvar-tracking",
 			"-fvar-tracking-assignments"
 		]

+ 36 - 55
sys/src/9/amd64/kernel.ld

@@ -1,78 +1,59 @@
-/* Simple linker script for the ROS kernel.
-   See the GNU ld 'info' manual ("info ld") to learn the syntax. */
-
-/* This script needs to be invoked with -z max-page-size=0x1000.  Otherwise,
- * ld will offset our first section to 1MB within the actual file.  Multiboot
- * requires the header to be in the first two pages. */
+/*
+ * Linker script for Harvey.
+ */
 
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
 ENTRY(_start)
-/* start the kernel at 0x110000. 
- * That way we can use lower ram for critical structures
- */
-KERN_LOAD_ADDR = 0xfffffffff0000000;
 
-SECTIONS
-{
-	/* Entry Linked and loaded at 0x00100000 (includes multiboot) */
-	. = 0x00110000;
+SECTIONS {
+	/*
+	 * start the kernel at 0x0xffff_8000_0020_0000
+	 * This preserves some RAM between 1MiB and the
+	 * start of the kernel for critical structures.
+	 */
+	. = 0xffff800000100000;
 
-	.bootstrap : {
+	PROVIDE(boottext = .);
+	.text.boot : ALIGN(4096) {
 		*(.boottext .bootdata)
+		. = ALIGN(4096);
+		PROVIDE(eboottext = .);
+		. = ALIGN(2097152);
+		PROVIDE(esys = .);
 	}
 
-	/* Link the main kernel for the space after entry + KERN_LOAD_ADDR.  We'll
-	 * still load it adjacent in physical memory */
-	. += KERN_LOAD_ADDR;
-
-	.text : AT(ADDR(.text) - KERN_LOAD_ADDR) {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
+	PROVIDE(text = .);
+	.text : ALIGN(4096) {
+		*(.text* .stub .gnu.linkonce.t.*)
+		. = ALIGN(2097152);
+		PROVIDE(etext = .);
 	}
 
-	PROVIDE(etext = .);	/* Define the 'etext' symbol to this value */
-
-	/* Linker-made tables.  Our tables (e.g. devtab) are 2^6 aligned,
-	 * independently of us aligning '.'.  We align '.' to get the right start,
-	 * e.g.  __devtabstart. */
-	. = ALIGN(64);
-	/* We shouldn't have to use PROVIDE, but if we don't, we get the wrong
-	 * value for '.'.  And items with empty tables get the KLA (basically 0) */
-	PROVIDE(__devtabstart = .);
-	PROVIDE(devtab = .);
-	.devtab : {
-		*(.devtab)
+	.rodata : ALIGN(4096) {
+		*(.rodata* .gnu.linkonce.r.*)
+		. = ALIGN(2097152);
+		PROVIDE(erodata = .);
 	}
-	PROVIDE(__devtabend = .);
 
-	.rodata : {
-		*(.rodata .rodata.* .gnu.linkonce.r.*)
+	.data : ALIGN(4096) {
+		*(.data*)
 	}
-
-	/* TODO: add some debug info.  i hear stabs are 32 bit only, so we'll need
-	 * to bring in some dwarves.  for now, hack in the symbols to compile. */
-	PROVIDE(__STAB_BEGIN__ = .);
-	PROVIDE(__STAB_END__ = .);
-	PROVIDE(__STABSTR_BEGIN__ = .);
-	PROVIDE(__STABSTR_END__ = .);
-
-	/* Adjust the address for the data segment to the next page */
-	. = ALIGN(0x1000);
-
-	/* The data segment */
-	.data : {
-		*(.data)
+	.got : ALIGN(4096) {
+		*(.got)
+	}
+	.got.plt : ALIGN(4096) {
+		*(.got.plt)
 	}
-
 	PROVIDE(edata = .);
 
-	.bss : {
-		*(.bss)
+	.bss : ALIGN(4096) {
+		*(.bss*)
 		*(COMMON)
+		. = ALIGN(2097152);
 	}
-
 	PROVIDE(end = .);
 
 	/DISCARD/ : {
 		*(.eh_frame .note.GNU-stack)
+		*(.debug*)
 	}
 }

+ 1 - 1
sys/src/9/amd64/klib.json

@@ -2,7 +2,7 @@
 	{
 		"Name": "KernelLibs",
 		"Cflags": [
-			"-mcmodel=kernel",
+			"-fpic",
 			"-O0",
 			"-mno-red-zone",
 			"-ffreestanding",

+ 1 - 6
sys/src/9/amd64/l64idt.S

@@ -3,6 +3,7 @@
  */
 #include "amd64.h"
 
+.text
 .code64
 
 	/* Interrupts.  * Let's just talk about hardware
@@ -38,9 +39,6 @@
 	* look identical for the common code and exit we save %rax and line things up.
 	*/
 
-.globl ire
-.globl irx
-.globl irxe
 // When we enter:
 // registers are NOT saved. We need to save them all.
 // return PC is on the stat8(%rsp). It should be left there.
@@ -87,7 +85,6 @@ _intrcommon:
 	SWAPGS
 
 _intrnested:
-	incq ire
 	PUSHQ	%r15
 	PUSHQ	%r14
 	PUSHQ	%r13
@@ -114,7 +111,6 @@ _intrnested:
 	CALL	_trap
 .globl _intrr
 _intrr:
-	incq	irxe
 	POPQ	%rax
 	POPQ	%rbx
 	POPQ	%rCX
@@ -141,7 +137,6 @@ _iretnested:
 	// The %rax you pushed (error code)
 	// EIP from the vector table.
 	ADDQ	$16, %rsp
-	incq irx
 	iretq
 
 .globl idthandlers

+ 0 - 79
sys/src/9/amd64/l64syscall.s

@@ -1,79 +0,0 @@
-#include "mem.h"
-#include "amd64.h"
-
-.code64
-
-/*
- */
-.globl touser
-touser:
-	CLI
-	SWAPGS
-	MOVQ	$SSEL(SiUDS, SsRPL3), %rax
-	MOVW	%ax, %DS
-	MOVW	%ax, %ES
-	MOVW	%ax, %FS
-	MOVW	%ax, %GS
-
-	MOVQ	$(UTZERO+0x28), %rcx		/* ip */
-	MOVQ	$If, %r11			/* flags */
-
-	MOVQ	%rdi, %rsp			/* sp */
-
-	.byte 0x48; SYSRET			/* SYSRETQ */
-
-/*
- */
-.globl syscallentry
-syscallentry:
-	SWAPGS
-	.byte 0x65; MOVQ 0, %r15		/* m-> (MOVQ GS:0x0, R15) */
-	MOVQ	16(%r15),%r14     		/* m->proc */
-	MOVQ	%rsp, %r13
-	MOVQ	16(RUSER), %rsp			/* m->proc->kstack */
-	ADDQ	$KSTACK, %rsp
-	PUSHQ	$SSEL(SiUDS, SsRPL3)		/* old stack segment */
-	PUSHQ	%r13				/* old sp */
-	PUSHQ	%r11				/* old flags */
-	PUSHQ	$SSEL(SiUCS, SsRPL3)		/* old code segment */
-	PUSHQ	%rcx				/* old ip */
-
-	SUBQ	$(18*8), %rsp			/* unsaved registers */
-
-	MOVW	$SSEL(SiUDS, SsRPL3), (15*8+0)(%rsp)
-	MOVW	%ES, (15*8+2)(%rsp)
-	MOVW	%FS, (15*8+4)(%rsp)
-	MOVW	%GS, (15*8+6)(%rsp)
-
-	PUSHQ	%rsp				/* Ureg* */
-	PUSHQ	%rdi				/* system call number */
-	CALL	syscall
-
-.globl syscallreturn
-syscallreturn:
-	MOVQ	16(%rsp), AX			/* Ureg.ax */
-	MOVQ	(16+6*8)(%rsp), BP		/* Ureg.bp */
-_syscallreturn:
-	ADDQ	$(17*8), %rsp			/* registers + arguments */
-
-	CLI
-	SWAPGS
-	MOVW	0(%rsp), %DS
-	MOVW	2(%rsp), %ES
-	MOVW	4(%rsp), %FS
-	MOVW	6(%rsp), %GS
-
-	MOVQ	24(%rsp), %rcx			/* ip */
-	MOVQ	40(%rsp), %r11			/* flags */
-
-	MOVQ	48(%rsp), %rsp			/* sp */
-
-	.byte 0x48; SYSRET			/* SYSRETQ */
-
-.globl sysrforkret
-sysrforkret:
-	// DEBUG
-	cli
-1: jmp 1b
-	MOVQ	$0, %rax
-	JMP	_syscallreturn

+ 9 - 34
sys/src/9/amd64/l64v.S

@@ -142,26 +142,16 @@ gdtput:
 
 .global idtput
 idtput:
-	// save %rdi, since we are going to modify it.
-	pushq	%rdi
-	// Push the two quads onto the stack,
-	// which arranges them in memory.
-	pushq	%rsi
-	shlq	$48, %rdi
-	pushq	%rdi
-	movq	%rsp, %rax
-	addq	$6, %rax
-	lidt	(%rax)
-	popq	%rdi
-	popq	%rsi
-	popq	%rdi
-	RET
+	subq	$16, %rsp
+	movq	%rsi, 8(%rsp)
+	movw	%di, 6(%rsp)
+	lidt	6(%rsp)
+	addq	$16, %rsp
+	ret
 
 .global trput
 trput:
-	// panic
-	mov 0, %rax
-	//ltr	%rdi
+	ltr	%di
 	RET
 
 /*
@@ -514,26 +504,11 @@ _machp_bad:
 	popq %rbp
 	jmp _machp_out
 
-.global externup
-externup:
-	movq	%gs:40, %rax
-	ret
-
-/* not needed.
-.global mul64fract
-mul64fract:
-	MOVQ	%rdi, %rax
-	MULQ	%rsi			/ * a*b *
-	SHRQ	$32, %rax:DX
-	MOVQ	%rax, (%rdi)
-	RET
-*/
-
 ///*
 // * Testing.
 // */
 //.global ud2
 ud2:
-//	BYTE $0x0f; BYTE $0x0b
-//	RET
+	ud2
+	RET
 //

+ 12 - 14
sys/src/9/amd64/l64vsyscall.S

@@ -4,12 +4,13 @@
 #define __ASSEMBLER__
 #endif
 
-.globl sce
-.globl scx
+.text
+.code64
+
 /*
  * starting the user program up. First time.
  */
-	.globl touser
+.globl touser
 touser:
 	CLI
 	SWAPGS
@@ -29,9 +30,8 @@ touser:
 	MOVQ	%RDI, %RSP			/* sp */
 	sysretq
 
-	.globl syscallentry
+.globl syscallentry
 syscallentry:
-	incq	sce
 	SWAPGS
 
 	movq	%r15, %gs:32	/* stash r15 to m->rathole */
@@ -73,15 +73,14 @@ syscallentry:
 	MOVQ	%rsp, %rsi 				/* Ureg* */
 	// system call number is in %rax, as per linux.
 	movq	%rax, %rdi
-	xorq	%rax, %rax
-	pushq	%rax
+	xorl	%eax, %eax
+	pushq	$2
 	popfq			/* clear all flags. is there something else we should clear too? */
-	movq	$0, %rbp	/* stack traces end here */
+	movq	%rsp, %rbp
 	CALL	syscall
 
-	.globl	syscallreturn
+.globl syscallreturn
 syscallreturn:
-
 	// restore from ureg
 	MOVQ	(0*8)(%rsp),%rAX
 	MOVQ	(1*8)(%rsp),%rBX
@@ -108,10 +107,9 @@ syscallreturn:
 	MOVQ	16(%rsp), %r11			/* flags */
 	MOVQ	24(%rsp), %rSP			/* sp */
 
-	incq	scx
 	sysretq
 
-	.globl sysrforkret
+.globl sysrforkret
 sysrforkret:
-	MOVQ	$0, 0(%rsp)
-	JMP	syscallreturn
+	MOVQ	$0, (0*8)(%rsp)
+	jmp	syscallreturn

+ 127 - 204
sys/src/9/amd64/main.c

@@ -31,16 +31,7 @@ static uintptr_t sp;		/* XXX - must go - user stack of init proc */
  */
 int64_t hz;
 
-uintptr_t kseg0 = KZERO;
-
-// N.B.: sys is initialized in entry.S
-// This nil is a no-op anyway. Compilers see a zero and
-// put it in BSS anyway.
-Sys* sys = nil;
-usize sizeofSys = sizeof(Sys);
-
-// N.B.: entrym is initialized in entry.S
-Mach *entrym;
+Sys *const sys = UINT2PTR(KSYS);
 
 /*
  * Option arguments from the command line.
@@ -50,8 +41,8 @@ Mach *entrym;
  */
 char *cputype = "amd64";
 static int64_t oargc;
-static char* oargv[20];
-static char oargb[1024];
+static char* oargv[128];
+static char oargb[4096];
 static int oargblen;
 
 static int maxcores = 1024;	/* max # of cores given as an argument */
@@ -60,8 +51,7 @@ static int numtcs = 32;		/* initial # of TCs */
 char dbgflg[256];
 static int vflag;
 
-int nosmp;
-int acpionly = 1;
+int nosmp = 0;
 static int showpost = 0;
 
 // Harvey is out of date on many platforms, and frequently the only
@@ -79,7 +69,7 @@ static int showpost = 0;
 // and time of this bit of extra code is so small as to not matter.
 void post(char *msg, uint8_t terminal)
 {
-	if (! showpost)
+	if (!showpost)
 		return;
 	for(int i = 0; i < strlen(msg); i++) {
 		outb(0x3f8, msg[i]);
@@ -111,17 +101,17 @@ sigscan(uint8_t* address, int length, char* signature)
 static int
 ktextaddr(uintptr_t pc)
 {
-	return (pc & 0xfffffffff0000000) == 0xfffffffff0000000;
+	return (pc & KTZERO) == KTZERO;
 }
 
 void
 stacksnippet(void)
 {
 	Stackframe *stkfr;
-	kmprint(" stack:");
+	print("cpu%d stack:", machno());
 	for(stkfr = stackframe(); stkfr != nil; stkfr = stkfr->next)
-		kmprint(" %c:%p", ktextaddr(stkfr->pc) ? 'k' : '?', ktextaddr(stkfr->pc) ? (stkfr->pc & 0xfffffff) : stkfr->pc);
-	kmprint("\n");
+		print(" %c:%p", ktextaddr(stkfr->pc) ? 'k' : '?', stkfr->pc);
+	print("\n");
 }
 
 void
@@ -150,7 +140,9 @@ machp_bad(void)
 void
 optionsinit(char* s)
 {
-	oargblen = strecpy(oargb, oargb+sizeof(oargb), s) - oargb;
+	oargblen = strlcpy(oargb, s, sizeof(oargb));
+	if(oargblen>=sizeof(oargb))
+		panic("optionsinit: kernel command line too long");
 	oargc = tokenize(oargb, oargv, nelem(oargv)-1);
 	oargv[oargc] = nil;
 }
@@ -211,12 +203,14 @@ loadenv(int argc, char* argv[])
 	 */
 	while(--argc > 0){
 		char* next = *++argv;
-		if(next[0] !='-'){
-			if (gettokens(next, env, 2, "=")  == 2){;
-				ksetenv(env[0], env[1], 0);
-			}else{
-				print("Ignoring parameter (%s) with no value: %s\n", next, env[0]);
+		if(next[0]!='-'){
+			print("loadenv: processing '%s'\n", next);
+			if(strchr(next, '=') == nil){
+				print("Ignoring parameter (%s) with no value.\n", next);
+				continue;
 			}
+			if (gettokens(next, env, 2, "=") == 2)
+				ksetenv(env[0], env[1], 0);
 		}
 	}
 }
@@ -224,11 +218,11 @@ loadenv(int argc, char* argv[])
 extern int num_cpus;
 
 void
-squidboy(int apicno, Mach *mach)
+squidboy(Mach *mach)
 {
 	// FIX QEMU. extern int64_t hz;
 	int64_t hz;
-	mach->self = (uintptr_t)mach;
+
 	sys->machptr[mach->machno] = mach;
 	/*
 	 * Need something for initial delays
@@ -245,10 +239,9 @@ squidboy(int apicno, Mach *mach)
 
 	// NOTE: you can't do ANYTHING here before vsvminit.
 	// PRINT WILL PANIC. So wait.
-
 	vsvminit(MACHSTKSZ, mach->NIX.nixtype, mach);
 
-	//DBG("Hello squidboy %d %d\n", apicno, machp()->machno);
+	DBG("Hello squidboy APIC %d (cpu%d)\n", mach->apicno, machno());
 
 	/*
 	 * Beware the Curse of The Non-Interruptable Were-Temporary.
@@ -261,7 +254,6 @@ squidboy(int apicno, Mach *mach)
 	mach->cyclefreq = hz;
 	mach->cpumhz = hz/1000000ll;
 
-
 	mmuinit();
 	if(!apiconline())
 		ndnr();
@@ -404,12 +396,14 @@ HERE(void)
 /* The old plan 9 standby ... wave ... */
 
 /* Keep to debug trap.c */
-void wave(int c)
+void
+wave(int c)
 {
 	outb(0x3f8, c);
 }
 
-void hi(char *s)
+void
+hi(char *s)
 {
 	if (! s)
 		s = "<NULL>";
@@ -417,6 +411,14 @@ void hi(char *s)
 		wave(*s++);
 }
 
+void
+hihex(uint64_t x)
+{
+	const char *hex = "0123456789abcdef";
+	for (int i = 60; i >= 0; i -= 4)
+		wave(hex[(x>>i)&0xF]);
+}
+
 /*
  * for gdb:
  * call this anywhere in your code.
@@ -441,109 +443,70 @@ void die(char *s)
 	staydead = 1;
 }
 
-/*
-void bmemset(void *p)
-{
-	__asm__ __volatile__("1: jmp 1b");
-}
-*/
-
-void debugtouser(void *va)
-{
-	uintptr_t uva = (uintptr_t) va;
-	PTE *pte, *pml4;
-
-	pml4 = UINT2PTR(machp()->MMU.pml4->va);
-	mmuwalk(pml4, uva, 0, &pte, nil);
-	iprint("va %p m %p m>pml4 %p machp()->pml4->va %p pml4 %p PTE 0x%lx\n", va,
-			machp(), machp()->MMU.pml4, machp()->MMU.pml4->va, (void *)pml4, *pte);
-}
-
-/*
-void badcall(uint64_t where, uint64_t what)
+void
+badcall(uint64_t where, uint64_t what)
 {
-	hi("Bad call from function "); put64(where); hi(" to "); put64(what); hi("\n");
-	while (1)
+	hi("Bad call from function ");
+	hihex(where);
+	hi(" to ");
+	hihex(what); hi("\n");
+	for(;;)
 		;
 }
-*/
 
-void errstr(char *s, int i) {
+void
+errstr(char *s, int i)
+{
 	panic("errstr");
 }
 
 static int x = 0x123456;
 
-/* tear down the identity map we created in assembly. ONLY do this after all the
- * APs have started up (and you know they've done so. But you must do it BEFORE
- * you create address spaces for procs, i.e. userinit()
- */
-static void
-teardownidmap(Mach *mach)
+void
+main(Mach *mach, uint32_t mbmagic, uint32_t mbaddress)
 {
-	int i;
-	uintptr_t va = 0;
-	PTE *p;
-	/* loop on the level 2 because we should not assume we know
-	 * how many there are But stop after 1G no matter what, and
-	 * report if there were that many, as that is odd.
-	 */
-	for(i = 0; i < 512; i++, va += BIGPGSZ) {
-		if (mmuwalk(UINT2PTR(mach->MMU.pml4->va), va, 1, &p, nil) != 1)
-			break;
-		if (! *p)
-			break;
-		iprint("teardown: va %p, pte %p\n", (void *)va, p);
-		*p = 0;
-	}
-	iprint("Teardown: zapped %d PML1 entries\n", i);
+	int postterminal = 1;
+	USED(postterminal);
 
-	for(i = 2; i < 4; i++) {
-		if (mmuwalk(UINT2PTR(mach->MMU.pml4->va), 0, i, &p, nil) != i) {
-			iprint("weird; 0 not mapped at %d\n", i);
-			continue;
-		}
-		iprint("teardown: zap %p at level %d\n", p, i);
-		if (p)
-			*p = 0;
-	}
-}
+	sys->machptr[mach->machno] = mach;
 
+	mach->self = PTR2UINT(mach);
+	mach->machno = 0;
+	mach->online = 1;
+	mach->NIX.nixtype = NIXTC;
+	mach->stack = PTR2UINT(sys->machstk);
+	mach->vsvm = sys->vsvmpage;
+	mach->externup = nil;
 
-void
-main(uint32_t mbmagic, uint32_t mbaddress)
-{
-	int postterminal = 1;
-	Mach *mach = entrym;
-	/* when we get here, entrym is set to core0 mach. */
-	sys->machptr[mach->machno] = entrym;
-	// Very special case for BSP only. Too many things
-	// assume this is set.
-	wrmsr(GSbase, PTR2UINT(&sys->machptr[mach->machno]));
+	/*
+	 * Need something for initial delays
+	 * until a timebase is worked out.
+	 */
+	mach->cpuhz = 2000000000ll;
+	mach->cpumhz = 2000;
+
+	// Initialize VSM so that we can use interrupts and so forth.
+	vsvminit(MACHSTKSZ, NIXTC, mach);
 	if (machp() != mach)
-		panic("mach and machp() are different!!\n");
-	assert(sizeof(Mach) <= PGSZ);
+		panic("After vsvminit, m and machp() are different");
+
+	// The kernel maps the first 4GiB before entry to main().  If the
+	// image is too big, we will fail to boot properly.
+	if((uintptr_t)end-KZERO > 4ULL*GiB)
+		panic("main: kernel too big: image ends after 4GiB");
 
 	/*
 	 * Check that our data is on the right boundaries.
 	 * This works because the immediate value is in code.
 	 */
-	//cgaprint(800, "hello harvey\n");
-	//for(;;);
 	if (x != 0x123456)
 		panic("Data is not set up correctly\n");
 
-	// Clear BSS
-	// N.B. This wipes out the sys variable, which is set in entry.S.
-	// It will also wipe out any other data that entry.S might
-	// set that is not initialized, and hence in bss, such as entrym,
-	// which is why code also had to set mach again. This is why Aki
-	// had the 'figure this out' comment below.
-	// Further, it is not needed. Kexec and other bootloaders will
-	// do this for us. Not sure why this was here. -- RGM.
-	// memset(edata, 0, end - edata);
-	// Score one for ELF -- the bss zeroing comes in by default. That
-	// was not the case in 9load I guess.
+	sys->cyclefreq = mach->cpuhz;
+	sys->nmach = 1;
+	active.nonline = 1;
+	active.exiting = 0;
+	active.nbooting = 0;
 
 	/*
 	 * ilock via i8250enable via i8250console
@@ -551,52 +514,23 @@ main(uint32_t mbmagic, uint32_t mbaddress)
 	 * also 'up' set to nil.
 	 */
 	cgapost(sizeof(uintptr_t)*8);
-	memset(mach, 0, sizeof(Mach));
-
-	mach->self = (uintptr_t)mach;
-	mach->machno = 0;
-	mach->online = 1;
-	mach->NIX.nixtype = NIXTC;
-	mach->stack = PTR2UINT(sys->machstk);
-	// NOPE. Wipes out multiboot.
-	//*(uintptr_t*)mach->stack = STACKGUARD;
-	mach->vsvm = sys->vsvmpage;
-	mach->externup = nil;
-	active.nonline = 1;
-	active.exiting = 0;
-	active.nbooting = 0;
 
-	asminit(); post("	asminit();", postterminal++);
-	multiboot(mbmagic, mbaddress, 0); post("	multiboot(mbmagic, mbaddress, 0);", postterminal++);
-	options(oargc, oargv); post("	options(oargc, oargv);", postterminal++);
+	mallocinit();
+	pamapinit();
+	multiboot(mbmagic, mbaddress, 0);
+	options(oargc, oargv);
+	pamapmerge();
 
-	/*
-	 * Need something for initial delays
-	 * until a timebase is worked out.
-	 */
-	mach->cpuhz = 2000000000ll;
-	mach->cpumhz = 2000;
-	sys->cyclefreq = mach->cpuhz;
+	fmtinit();
 
-	cgainit(); post("	cgainit();", postterminal++);
-	i8250console("0"); post("	i8250console(\"0\");", postterminal++);
+	cgainit();
+	i8250console("0");
 
 	consputs = cgaconsputs;
 
-	/* It all ends here. */
-	vsvminit(MACHSTKSZ, NIXTC, mach); post("	vsvminit(MACHSTKSZ, NIXTC, mach);", postterminal++);
-	if (machp() != mach)
-		panic("After vsvminit, m and machp() are different");
-
-	sys->nmach = 1;
-
-	fmtinit(); post("	fmtinit();", postterminal++);
 	print("\nHarvey\n");
-	multiboot(mbmagic, mbaddress, postterminal++); post("	multiboot(mbmagic, mbaddress, 1);", 1);
-
-	if(vflag){
-		multiboot(mbmagic, mbaddress, vflag);
-	}
+	multiboot(mbmagic, mbaddress, 1);
+	pamapdump();
 
 	mach->perf.period = 1;
 	if((hz = archhz()) != 0ll){
@@ -610,29 +544,15 @@ main(uint32_t mbmagic, uint32_t mbaddress)
 	//iprint("So, until that's fixed, we bring up AP cores slowly. Sorry!\n");
 
 	/*
-	 * Mmuinit before meminit because it
-	 * flushes the TLB via machp()->pml4->pa.
-	 */
-	mmuinit(); post("	mmuinit();", postterminal++);
-
-	ioinit(); post("	ioinit();", postterminal++);
-	keybinit(); post("	keybinit();", postterminal++);
-	meminit(); post("	meminit();", postterminal++);
-	confinit(); post("	confinit();", postterminal++);
-	archinit(); post("	archinit();", postterminal++);
-	mallocinit(); post("	mallocinit();", postterminal++);
-
-	/* test malloc. It's easier to find out it's broken here,
-	 * not deep in some call chain.
-	 * See next note.
-	 *
+	 * Mmuinit before meminit because it flushes the TLB.
 	 */
-	if (1) {
-		void *v = malloc(1234);
-		hi("allocated\n ");
-		free(v);
-		hi("free ok\n");
-	}
+	mmuinit();
+
+	ioinit();
+	keybinit();
+	meminit();
+	confinit();
+	archinit();
 
 	/*
 	 * Acpiinit will cause the first malloc
@@ -643,9 +563,9 @@ main(uint32_t mbmagic, uint32_t mbaddress)
 	 * (it's amazing how far you can get with
 	 * things like that completely broken).
 	 */
-if (1){	acpiinit(); hi("	acpiinit();\n");}
+	acpiinit();
 
-	umeminit(); post("	umeminit();", postterminal++);
+	umeminit();
 
 	/*
 	 * This is necessary with GRUB and QEMU.
@@ -653,46 +573,43 @@ if (1){	acpiinit(); hi("	acpiinit();\n");}
 	 * because the vector base is likely different, causing
 	 * havoc. Do it before any APIC initialisation.
 	 */
-	i8259init(32); post("	i8259init(32);", postterminal++);
+	i8259init(32);
 
-	procinit0(); post("	procinit0();", postterminal++);
+	procinit0();
 	print("before mpacpi, maxcores %d\n", maxcores);
-	mpacpi(maxcores); post("	mpacpi(maxcores);", postterminal++);
-	trapinit(); post("	trapinit();", postterminal++);
-	printinit(); post("	printinit();", postterminal++);
-	apiconline(); post("	apiconline();", postterminal++);
-	ioapiconline(); post("	ioapiconline();", postterminal++);
+	mpacpi(maxcores);
+	trapinit();
+	printinit();
+	apiconline();
+	ioapiconline();
 	/* Forcing to single core if desired */
 	if(!nosmp) {
 		sipi();
 	} else {
 		print("SMP Disabled by command line\n");
 	}
-//working.
-	teardownidmap(mach); post("	teardownidmap(mach);", postterminal++);
-	timersinit(); post("	timersinit();", postterminal++);
-	fpuinit(); post("	fpuinit();", postterminal++);
-	psinit(conf.nproc); post("	psinit(conf.nproc);", postterminal++);
-	initimage(); post("	initimage();", postterminal++);
-	links(); post("	links();", postterminal++);
-
-
-	keybenable(); post("	keybenable();", postterminal++);
-	mouseenable(); post("	mouseenable();", postterminal++);
-
-	devtabreset(); post("	devtabreset();", postterminal++);
-	pageinit(); post("	pageinit();", postterminal++);
-	swapinit(); post("	swapinit();", postterminal++);
-	userinit(); post("	userinit();", postterminal++);
+	timersinit();
+	fpuinit();
+	psinit(conf.nproc);
+	initimage();
+	links();
+
+	keybenable();
+	mouseenable();
+
+	devtabreset();
+	pageinit();
+	swapinit();
+	userinit();
 	/* Forcing to single core if desired */
 	if(!nosmp) {
-		nixsquids(); post("		nixsquids();", postterminal++);
-		testiccs(); post("		testiccs();", postterminal++);
+		nixsquids();
+		testiccs();
 	}
 
-	alloc_cpu_buffers(); post("	alloc_cpu_buffers();", postterminal++);
+	alloc_cpu_buffers();
 
-	acpistart(); post("	acpistart();", postterminal++);
+	acpistart();
 	print("CPU Freq. %dMHz\n", mach->cpumhz);
 
 	print("schedinit...\n");
@@ -735,7 +652,6 @@ init0(void)
 		poperror();
 	}
 	kproc("alarm", alarmkproc, 0);
-	//debugtouser((void *)UTZERO);
 	touser(sp);
 }
 
@@ -853,6 +769,11 @@ userinit(void)
 	/* This depends on init having a text segment < 2M. */
 	memmove(UINT2PTR(VA(k) + init_data_start - (UTZERO + BIGPGSZ)), init_data_out, sizeof(init_data_out));
 	kunmap(k);
+
+	// XXX: Something is faulting when init moves to
+	// another core.
+	//procwired(p, 0);
+
 	ready(p);
 }
 
@@ -874,6 +795,8 @@ shutdown(int ispanic)
 	int ms, once;
 
 	lock(&active.l);
+	if(ispanic)
+		stacksnippet();
 	if(ispanic)
 		active.ispanic = ispanic;
 	else if(machp()->machno == 0 && machp()->online == 0)

+ 7 - 19
sys/src/9/amd64/map.c

@@ -13,26 +13,16 @@
 #include "dat.h"
 #include "fns.h"
 
-#define _KADDR(pa)	UINT2PTR(kseg0+((uintptr)(pa)))
-#define _PADDR(va)	PTR2UINT(((uintptr)(va)) - kseg0)
+#define _KADDR(pa)	UINT2PTR(KZERO+((uintptr)(pa)))
+#define _PADDR(va)	PTR2UINT(((uintptr)(va))-KZERO)
 
-#define TMFM		(64*MiB)
-
-int km, ku, k2;
 void*
 KADDR(uintptr_t pa)
 {
-	uint8_t* va;
-
-	va = UINT2PTR(pa);
-	if(pa < TMFM) {
-		km++;
-		return KSEG0+va;
-	}
+	if (pa < KZERO)
+		return _KADDR(pa);
 
-	assert(pa < KSEG2);
-	k2++;
-	return KSEG2+va;
+	return UINT2PTR(pa);
 }
 
 uintmem
@@ -41,10 +31,8 @@ PADDR(void* va)
 	uintmem pa;
 
 	pa = PTR2UINT(va);
-	if(pa >= KSEG0 && pa < KSEG0+TMFM)
-		return pa-KSEG0;
-	if(pa > KSEG2)
-		return pa-KSEG2;
+	if(pa >= KZERO)
+		return pa-KZERO;
 
 	panic("PADDR: va %#p pa #%p @ %#p\n", va, _PADDR(va), getcallerpc());
 	return 0;

+ 10 - 22
sys/src/9/amd64/mem.h

@@ -16,9 +16,9 @@
  * https://sourceware.org/bugzilla/show_bug.cgi?id=190
  */
 #ifndef __ASSEMBLER__
-#define KiB		1024u			/* Kibi 0x0000000000000400 */
-#define MiB		1048576u		/* Mebi 0x0000000000100000 */
-#define GiB		1073741824u		/* Gibi 000000000040000000 */
+#define KiB		1024ull			/* Kibi 0x0000000000000400 */
+#define MiB		1048576ull		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824ull		/* Gibi 000000000040000000 */
 #define TiB		1099511627776ull	/* Tebi 0x0000010000000000 */
 #define PiB		1125899906842624ull	/* Pebi 0x0004000000000000 */
 #define EiB		1152921504606846976ull	/* Exbi 0x1000000000000000 */
@@ -60,7 +60,7 @@
 
 #define MACHSZ		(4*KiB)			/* Mach+stack size */
 #define MACHMAX		32			/* max. number of cpus */
-#define MACHSTKSZ	(6*(4*KiB))		/* Mach stack size */
+#define MACHSTKSZ	(8*(4*KiB))		/* Mach stack size */
 
 #define KSTACK		(16*1024)		/* Size of Proc kernel stack */
 #define STACKALIGN(sp)	((sp) & ~(BY2SE-1))	/* bug: assure with alloc */
@@ -116,25 +116,13 @@
  */
 
 #ifndef __ASSEMBLER__
-#define KSEG2		(0xfffffe0000000000ull)	/* 1TB - KMAP */
-/*			 0xffffff0000000000ull	end of KSEG2 */
-#define VMAP		(0xffffffffe0000000ull)
-#define VMAPSZ		(256*MiB)
-#define KSEG0		(0xfffffffff0000000ull)	/* 256MB - this is confused */
-#define KZERO		(0xfffffffff0000000ull)
-#define KTZERO		(KZERO+1*MiB+64*KiB)
-#define PDMAP		(0xffffffffff800000ull)
-#define PMAPADDR		(0xffffffffffe00000ull)	/* unused as of yet */
-/*			 0xffffffffffffffffull	end of KSEG0 */
+#define KZERO		0xffff800000000000ull
+#define KSYS		(KZERO+1ull*MiB+1ull*PGSZ)
+#define KTZERO		(KZERO+2ull*MiB)
 #else
-#define KSEG2           (0xfffffe0000000000)
-#define VMAPSZ          (256*MiB)
-#define VMAP            (0xffffffffe0000000)
-#define KSEG0           (0xfffffffff0000000)
-#define KZERO           (0xfffffffff0000000)
-#define KTZERO          (KZERO+1*MiB+64*KiB)
-#define PDMAP           (0xffffffffff800000)
-#define PMAPADDR        (0xffffffffffe00000)
+#define KZERO		0xffff800000000000
+#define KSYS		(KZERO+1*MiB+1*PGSZ)
+#define KTZERO		(KZERO+2*MiB)
 #endif
 
 // YUCK.

+ 87 - 4
sys/src/9/amd64/memory.c

@@ -13,18 +13,101 @@
 #include "dat.h"
 #include "fns.h"
 
+#include "amd64.h"
+
 void
 meminit(void)
 {
-	extern void asmmeminit(void);
+	int cx = 0;
+
+	for(PAMap *m = pamap; m != nil; m = m->next){
+		DBG("meminit: addr %#P end %#P type %d size %P\n",
+			m->addr, m->addr+m->size,
+			m->type, m->size);
+		PTE pgattrs = PteP;
+		switch(m->type){
+		default:
+			DBG("(Skipping)\n");
+			continue;
+		case PamKTEXT:
+			pgattrs |= PteG;
+			break;
+		case PamDEV:
+			pgattrs |= PtePCD;
+		case PamMEMORY:
+		case PamKRDWR:
+			pgattrs |= PteRW;
+		case PamACPI:
+		case PamPRESERVE:
+		case PamRESERVED:
+		case PamKRDONLY:
+			pgattrs |= PteNX;
+		}
+		mmukphysmap(UINT2PTR(machp()->MMU.pml4->va), m->addr, pgattrs, m->size);
+
+		/*
+		 * Fill in conf data.
+		 */
+		if (m->type != PamMEMORY)
+			continue;
+		if(cx >= nelem(conf.mem))
+			continue;
+		uintmem lo = ROUNDUP(m->addr, PGSZ);
+		conf.mem[cx].base = lo;
+		uintmem hi = ROUNDDN(m->addr + m->size, PGSZ);
+		conf.mem[cx].npage = (hi - lo)/PGSZ;
+		conf.npage += conf.mem[cx].npage;
+		DBG("cm %d: addr %#llx npage %lu\n",
+			cx, conf.mem[cx].base, conf.mem[cx].npage);
+		cx++;
+	}
+	mmukflushtlb();
+
+	/*
+	 * Fill in more legacy conf data.
+	 * This is why I hate Plan 9.
+	 */
+	conf.upages = conf.npage;
+	conf.ialloc = 64*MiB;	// Arbitrary.
+	DBG("npage %llu upage %lu\n", conf.npage, conf.upages);
+}
+
+static void
+setphysmembounds(void)
+{
+	uintmem pmstart, pmend;
 
-	asmmeminit();
+	pmstart = ROUNDUP(PADDR(end), 2*MiB);
+	pmend = pmstart;
+	for(PAMap *m = pamap; m != nil; m = m->next){
+		if(m->type == PamMODULE && m->addr+m->size > pmstart)
+			pmstart = ROUNDUP(m->addr+m->size, 2*MiB);
+		if(m->type == PamMEMORY && m->addr+m->size > pmend)
+			pmend = ROUNDDN(m->addr+m->size, 2*MiB);
+	}
+	sys->pmstart = pmstart;
+	sys->pmend = pmend;
 }
 
 void
 umeminit(void)
 {
-	extern void asmumeminit(void);
+	extern void physallocdump(void);
+	setphysmembounds();
 
-	asmumeminit();
+	for(PAMap *m = pamap; m != nil; m = m->next){
+		if(m->type != PamMEMORY)
+			continue;
+		if(m->addr < 2*MiB)
+			continue;
+		// if(m->size < 2*MiB)
+		// 	continue;
+		// usize offset=ROUNDUP(m->addr, 2*MiB)-m->addr;
+		// m->size -= offset;
+		// m->addr += offset;
+		// if (m->size == 0)
+		// 	continue;
+		physinit(m->addr, m->size);
+	}
+	physallocdump();
 }

File diff suppressed because it is too large
+ 342 - 399
sys/src/9/amd64/mmu.c


+ 50 - 48
sys/src/9/amd64/multiboot.c

@@ -64,6 +64,35 @@ struct MMap {
 	uint32_t	type;
 };
 
+static int
+mbpamtype(int acpitype)
+{
+	switch(acpitype){
+	case 1: return PamMEMORY;
+	case 2: return PamRESERVED;
+	case 3: return PamACPI;
+	case 4: return PamPRESERVE;
+	case 5: return PamUNUSABLE;
+	default:
+		print("multiboot: unknown memory type %d", acpitype);
+		break;
+	}
+	return PamNONE;
+}
+
+static const char *
+mbtypename(int type)
+{
+	switch(type){
+	case 1: return "Memory";
+	case 2: return "Reserved";
+	case 3: return "ACPI Reclaim Memory";
+	case 4: return "ACPI NVS Memory";
+	default: break;
+	}
+	return "(unknown)";
+}
+
 int
 multiboot(uint32_t magic, uint32_t pmbi, int vflag)
 {
@@ -77,7 +106,7 @@ multiboot(uint32_t magic, uint32_t pmbi, int vflag)
 	if(vflag)
 		print("magic %#x pmbi %#x\n", magic, pmbi);
 	if(magic != 0x2badb002)
-		print("no magic in multiboot\n");//return -1;
+		return -1;
 
 	mbi = KADDR(pmbi);
 	if(vflag)
@@ -89,58 +118,16 @@ multiboot(uint32_t magic, uint32_t pmbi, int vflag)
 		else
 			optionsinit(p);
 	}
-	if(mbi->flags & Fmods){
-		for(i = 0; i < mbi->modscount; i++){
-			mod = KADDR(mbi->modsaddr + i*16);
-			if(mod->string != 0)
-				p = KADDR(mod->string);
-			else
-				p = "";
-			if(vflag)
-				print("mod %#x %#x <%s>\n",
-					mod->modstart, mod->modend, p);
-			else
-				asmmodinit(mod->modstart, mod->modend, p);
-		}
-	}
 	if(mbi->flags & Fmmap){
 		mmap = KADDR(mbi->mmapaddr);
 		n = 0;
 		while(n < mbi->mmaplength){
 			addr = (((uint64_t)mmap->base[1])<<32)|mmap->base[0];
 			len = (((uint64_t)mmap->length[1])<<32)|mmap->length[0];
-			switch(mmap->type){
-			default:
-				if(vflag)
-					print("type %u", mmap->type);
-				break;
-			case 1:
-				if(vflag)
-					print("Memory");
-				else
-					asmmapinit(addr, len, mmap->type);
-				break;
-			case 2:
-				if(vflag)
-					print("reserved");
-				else if (addr == 0) {
-					print("addr 0 is memory, not reserved\n");
-					asmmapinit(addr, len, 1);
-				} else
-					asmmapinit(addr, len, mmap->type);
-				break;
-			case 3:
-				if(vflag)
-					print("ACPI Reclaim Memory");
-				else
-					asmmapinit(addr, len, mmap->type);
-				break;
-			case 4:
-				if(vflag)
-					print("ACPI NVS Memory");
-				else
-					asmmapinit(addr, len, mmap->type);
-				break;
+			if(vflag){
+				print("%s (%u)", mbtypename(mmap->type), mmap->type);
+			}else{
+				pamapinsert(addr, len, mbpamtype(mmap->type));
 			}
 			switch(mmap->type) {
 				// There is no consistency in which type of e820 segment RSDP is stored in.
@@ -151,13 +138,28 @@ multiboot(uint32_t magic, uint32_t pmbi, int vflag)
 					break;
 			}
 			if(vflag)
-				print("\n\t%#16.16llx %#16.16llx (%llu)\n",
+				print("\t%#16.16llx %#16.16llx (%llu)\n",
 					addr, addr+len, len);
 
 			n += mmap->size+sizeof(mmap->size);
 			mmap = KADDR(mbi->mmapaddr+n);
 		}
 	}
+	if(mbi->flags & Fmods){
+		for(i = 0; i < mbi->modscount; i++){
+			mod = KADDR(mbi->modsaddr + i*16);
+			p = "";
+			if(mod->string != 0)
+				p = KADDR(mod->string);
+			if(vflag)
+				print("mod %#x %#x <%s>\n",
+					mod->modstart, mod->modend, p);
+			else{
+				usize len = mod->modend-mod->modstart;
+				pamapinsert(mod->modstart, len, PamMODULE);
+			}
+		}
+	}
 	if(vflag && (mbi->flags & Fbootloadername)){
 		p = KADDR(mbi->bootloadername);
 		print("bootloadername <%s>\n", p);

+ 224 - 0
sys/src/9/amd64/pamap.c

@@ -0,0 +1,224 @@
+/*
+ * This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file.
+ */
+
+// Physical Address Map.
+//
+// Describes the layout of physical memory, including the
+// kernel segments, MMIO regions, multiboot modules, etc.
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "amd64.h"
+
+PAMap *pamap = nil;
+
+void
+pamapinit(void)
+{
+	// Nop.
+}
+
+const char *
+pamtypename(int type)
+{
+	const char *names[] = {
+	[PamNONE] 	= "NONE",
+	[PamMEMORY]	= "MEMORY",
+	[PamRESERVED]	= "RESERVED",
+	[PamACPI]	= "ACPI",
+	[PamPRESERVE]	= "PRESERVE",
+	[PamUNUSABLE]	= "UNUSABLE",
+	[PamDEV]	= "DEV",
+	[PamMODULE]	= "MODULE",
+	[PamKTEXT]	= "KTEXT",
+	[PamKRDONLY]	= "KRDONLY",
+	[PamKRDWR]	= "KRDWR",
+	};
+	assert(type < nelem(names));
+	return names[type];
+}
+
+void
+pamapdump(void)
+{
+	print("pamap: {\n");
+	for(PAMap *p = pamap; p != nil; p = p->next){
+		assert(p->type <= PamKRDWR);
+		print("    [%#P, %#P) %-8s (%llu)\n",
+			p->addr, p->addr + p->size,
+			pamtypename(p->type), p->size);
+	}
+	print("}\n");
+}
+
+PAMap *
+pamapnew(uintmem addr, usize size, int type)
+{
+	PAMap *m = malloc(sizeof(*m));
+	assert(m != nil);
+	memset(m, 0, sizeof(*m));
+	m->addr = addr;
+	m->size = size;
+	m->type = type;
+	m->next = nil;
+
+	return m;
+}
+
+static void
+pamapclearrange(uintmem addr, usize size, int type)
+{
+	PAMap **ppp = &pamap, *np = pamap;
+	while(np != nil && size > 0){
+		if(addr+size <= np->addr)
+			break;		// The range isn't in the list.
+
+		// Are we there yet?
+		if(np->addr < addr && np->addr+np->size <= addr){
+			ppp = &np->next;
+			np = np->next;
+			continue;
+		}
+
+		// We found overlap.
+		//
+		// If the left-side overlaps, adjust the current
+		// node to end at the overlap, and insert a new
+		// node at the overlap point.  It may be immediately
+		// deleted, but that's ok.
+		//
+		// If the right side overlaps, adjust size and
+		// delta accordingly.
+		//
+		// In both cases, we're trying to get the overlap
+		// to start at the same place.
+		//
+		// If the ranges overlap and start at the same
+		// place, adjust the current node and remove it if
+		// it becomes empty.
+		if(np->addr < addr){
+			assert(addr < np->addr + np->size);
+			uintmem osize = np->size;
+			np->size = addr - np->addr;
+			PAMap *tp = pamapnew(addr, osize - np->size, np->type);
+			tp->next = np->next;
+			np->next = tp;
+			ppp = &np->next;
+			np = tp;
+		}else if(addr < np->addr){
+			assert(np->addr < addr + size);
+			usize delta = np->addr - addr;
+			addr += delta;
+			size -= delta;
+		}
+		if(addr == np->addr){
+			usize delta = size;
+			if (delta > np->size)
+				delta = np->size;
+			np->size -= delta;
+			np->addr += delta;
+			addr += delta;
+			size -= delta;
+		}
+
+		// If the resulting range is empty, remove it.
+		if(np->size == 0){
+			PAMap *tmp = np->next;
+			*ppp = tmp;
+			free(np);
+			np = tmp;
+			continue;
+		}
+		ppp = &np->next;
+		np = np->next;
+	}
+}
+
+void
+pamapinsert(uintmem addr, usize size, int type)
+{
+	PAMap *np, *pp, **ppp;
+
+	assert(type <= PamKRDWR);
+
+	// Ignore empty regions.
+	if(size == 0)
+		return;
+
+	// If the list is empty, just add the entry and return.
+	if(pamap == nil){
+		pamap = pamapnew(addr, size, type);
+		return;
+	}
+
+	// Remove this region from any existing regions.
+	pamapclearrange(addr, size, type);
+
+	// Find either a map entry with an address greater
+	// than that being returned, or the end of the map.
+	ppp = &pamap;
+	np = pamap;
+	pp = nil;
+	while(np != nil && np->addr <= addr){
+		ppp = &np->next;
+		pp = np;
+		np = np->next;
+	}
+
+	// See if we can combine with previous region.
+	if(pp != nil && pp->type == type && pp->addr+pp->size == addr){
+		pp->size += size;
+
+		// And successor region?  If we do it here,
+		// we free the successor node.
+		if(np != nil && np->type == type && addr+size == np->addr){
+			pp->size += np->size;
+			pp->next = np->next;
+			free(np);
+		}
+
+		return;
+	}
+
+	// Can we combine with the successor region?
+	if(np != nil && np->type == type && addr+size == np->addr){
+		np->addr = addr;
+		np->size += size;
+		return;
+	}
+
+	// Insert a new node.
+	pp = pamapnew(addr, size, type);
+	*ppp = pp;
+	pp->next = np;
+}
+
+void
+pamapmerge(void)
+{
+	// Extended BIOS Data Area
+	pamapinsert(0x80000, 0xA0000-0x80000, PamKRDWR);
+
+	// VGA/CGA MMIO region
+	pamapinsert(0xA0000, 0xC0000-0xA0000, PamDEV);
+
+	// BIOS ROM stuff
+	pamapinsert(0xC0000, 0xF0000-0xC0000, PamKRDONLY);
+	pamapinsert(0xF0000, 0x100000-0xF0000, PamKRDONLY);
+
+	// Add the kernel segments.
+	pamapinsert(PADDR((void*)KSYS), KTZERO-KSYS, PamKRDWR);
+	pamapinsert(PADDR((void*)KTZERO), etext-(char*)KTZERO, PamKTEXT);
+	pamapinsert(PADDR(etext), erodata-etext, PamKRDONLY);
+	pamapinsert(PADDR(erodata), edata-erodata, PamKRDWR);
+	pamapinsert(PADDR(edata), end-edata, PamKRDWR);
+}

+ 0 - 2
sys/src/9/amd64/physalloc.c

@@ -538,6 +538,4 @@ physinit(uintmem a, uint64_t size)
 
 		iimbchunk(b, addr, addr+len, 0);
 	}
-
-
 }

+ 4 - 2
sys/src/9/amd64/qmalloc.c

@@ -656,12 +656,14 @@ setmalloctag(void* v, uint32_t i)
 void
 mallocinit(void)
 {
+	static alignas(2*MiB) unsigned char kheap[256 *MiB];
+
 	if(tailptr != nil)
 		return;
 
-	tailbase = UINT2PTR(sys->vmunused);
+	tailbase = (Header *)kheap;
 	tailptr = tailbase;
-	tailnunits = NUNITS(sys->vmend - sys->vmunused);
+	tailnunits = NUNITS(sizeof(kheap));
 	print("base %#p ptr %#p nunits %u\n", tailbase, tailptr, tailnunits);
 }
 

+ 32 - 42
sys/src/9/amd64/sipi.c

@@ -16,39 +16,40 @@
 #include "apic.h"
 
 #undef DBG
-#define DBG print
+#define DBG if(1)print
 #define SIPIHANDLER	(KZERO+0x3000)
 
 void
 sipi(void)
 {
-	extern int b1978;
+	extern char b1978, e1978;
 	Apic *apic;
-	Mach *mach;
+	volatile Mach *mach;
 	int apicno, i;
-	uint32_t *sipiptr;
+	volatile uint64_t *sipiptr;
 	uintmem sipipa;
-	uint8_t *alloc, *p;
+	uint8_t *p;
 	extern void squidboy(int);
+	usize apsize;
 
 	/*
 	 * Move the startup code into place,
 	 * must be aligned properly.
 	 */
-	sipipa = mmuphysaddr(SIPIHANDLER);
-	if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB))
-		return;
-	sipiptr = UINT2PTR(SIPIHANDLER);
-	memmove(sipiptr, &b1978, 4096);
+	sipipa = mmuphysaddr(UINT2PTR(machp()->MMU.pml4->va), SIPIHANDLER);
+	if((sipipa == 0 || sipipa & (PGSZ - 1)) || sipipa > (1*MiB - 2*PGSZ))
+		panic("sipi: SIPI page improperly aligned or too far away, pa %#p", sipipa);
+	sipiptr = KADDR(sipipa);
 	DBG("sipiptr %#p sipipa %#llx\n", sipiptr, sipipa);
+	memmove((void *)sipiptr, &b1978, &e1978-&b1978);
 
 	/*
-	 * Notes:
-	 * The Universal Startup Algorithm described in the MP Spec. 1.4.
-	 * The data needed per-processor is the sum of the stack, page
-	 * table pages, vsvm page and the Mach page. The layout is similar
-	 * to that described in data.h for the bootstrap processor, but
-	 * with any unused space elided.
+	 * Notes: SMP startup algorithm.
+	 * The data needed per-processor is the sum of the stack,
+	 * vsvm pages and the Mach page. The layout is similar
+	 * to that described in dat.h for the bootstrap processor,
+	 * but with no unused, and we use the page tables from
+	 * early boot.
 	 */
 	for(apicno = 0; apicno < Napic; apicno++){
 		apic = &xlapic[apicno];
@@ -60,16 +61,13 @@ sipi(void)
 		 * bootstrap processor, until the lsipi code is worked out,
 		 * so only the Mach and stack portions are used below.
 		 */
-		alloc = mallocalign(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096, 0, 0);
-		if(alloc == nil)
-			continue;
-		memset(alloc, 0, MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ);
-		p = alloc+MACHSTKSZ;
-
-		sipiptr[-1] = mmuphysaddr(PTR2UINT(p));
-		DBG("p %#p sipiptr[-1] %#x\n", p, sipiptr[-1]);
+		apsize = MACHSTKSZ+PTSZ+PGSZ;
+		p = mallocalign(apsize+MACHSZ, PGSZ, 0, 0);
+		if(p == nil)
+			panic("sipi: cannot allocate for apicno %d", apicno);
 
-		p += 4*PTSZ+4*KiB;
+		sipiptr[511] = PTR2UINT(p);
+		DBG("sipi mem for apicid %d is %#p sipiptr[511] %#llx\n", apicno, p, sipiptr[511]);
 
 		/*
 		 * Committed. If the AP startup fails, can't safely
@@ -77,33 +75,25 @@ sipi(void)
 		 * the AP is up to. Perhaps should try to put it
 		 * back into the INIT state?
 		 */
-		mach = (Mach*)p;
+		mach = (volatile Mach*)(p+apsize);
+		mach->self = PTR2UINT(mach);
 		mach->machno = apic->Lapic.machno;		/* NOT one-to-one... */
 		mach->splpc = PTR2UINT(squidboy);
 		mach->apicno = apicno;
-		mach->stack = PTR2UINT(alloc);
-		mach->vsvm = alloc+MACHSTKSZ+4*PTSZ;
-//OH OH		mach->pml4 = (PTE*)(alloc+MACHSTKSZ);
-
-		p = KADDR(0x467);
-		*p++ = sipipa;
-		*p++ = sipipa>>8;
-		*p++ = 0;
-		*p = 0;
+		mach->stack = PTR2UINT(p);
+		mach->vsvm = p+MACHSTKSZ+PTSZ;
 
-		nvramwrite(0x0f, 0x0a);
-		//print("APICSIPI: %d, %p\n", apicno, (void *)sipipa);
+		DBG("APICSIPI: %d, %p\n", apicno, (void *)sipipa);
 		apicsipi(apicno, sipipa);
 
-		for(i = 0; i < 1000; i++){
+		for(i = 0; i < 5000; i++){
 			if(mach->splpc == 0)
 				break;
-			millidelay(5);
+			millidelay(1);
 		}
-		nvramwrite(0x0f, 0x00);
 
-		/*DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n",
+		DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n",
 			mach, sys->machptr[mach->machno],
-			apicno, mach->machno, mach->cpumhz);*/
+			apicno, mach->machno, mach->cpumhz);
 	}
 }

+ 4 - 19
sys/src/9/amd64/trap.c

@@ -21,14 +21,6 @@
 #include	"io.h"
 #include	"amd64.h"
 
-
-// counters. Set by assembly code.
-// interrupt enter and exit, systecm call enter and exit.
-unsigned long ire, irx, sce, scx;
-// Did we start doing an exit for the interrupts?
-// ir exit entry :-)
-unsigned long irxe;
-
 extern int notify(Ureg*);
 
 static void debugbpt(Ureg*, void*);
@@ -440,7 +432,7 @@ trap(Ureg* ureg)
 	uint64_t gsbase = rdmsr(GSbase);
 	//if (sce > scx) iprint("====================");
 	lastvno = vno;
-	if (gsbase < 1ULL<<63)
+	if (gsbase < KZERO)
 		die("bogus gsbase");
 	Proc *up = externup();
 	char buf[ERRMAX];
@@ -606,8 +598,6 @@ dumpgpr(Ureg* ureg)
 void
 dumpregs(Ureg* ureg)
 {
-die("dumpregs");
-
 	dumpgpr(ureg);
 
 	/*
@@ -620,8 +610,6 @@ die("dumpregs");
 	print("cr0\t%#16.16llx\n", cr0get());
 	print("cr2\t%#16.16llx\n", machp()->MMU.cr2);
 	print("cr3\t%#16.16llx\n", cr3get());
-die("dumpregs");
-//	archdumpregs();
 }
 
 /*
@@ -747,17 +735,12 @@ faultamd64(Ureg* ureg, void* v)
 	}
 
 	ftype = (ureg->error&2) ? FT_WRITE : (ureg->error&16) ? FT_EXEC : FT_READ;
-/*
-if (read) hi("read fault\n"); else hi("write fault\n");
-hi("addr "); put64(addr); hi("\n");
- */
 
 	insyscall = up->insyscall;
 	up->insyscall = 1;
 	if (0)hi("call fault\n");
 
 	if(fault(addr, ureg->ip, ftype) < 0){
-iprint("could not %s fault %p\n", faulttypes[ftype], addr);
 		/*
 		 * It is possible to get here with !user if, for example,
 		 * a process was in a system call accessing a shared
@@ -769,8 +752,10 @@ iprint("could not %s fault %p\n", faulttypes[ftype], addr);
 		 * (up->nerrlab != 0) if this is a system call, if not then
 		 * the game's a bogey.
 		 */
-		if(!user && (!insyscall || up->nerrlab == 0))
+		if(!user && (!insyscall || up->nerrlab == 0)){
+			dumpregs(ureg);
 			panic("fault: %#llx\n", addr);
+		}
 		sprint(buf, "sys: trap: fault %s addr=%#llx",
 			faulttypes[ftype], addr);
 		postnote(up, 1, buf, NDebug);

+ 4 - 8
sys/src/9/amd64/vsvm.c

@@ -178,18 +178,14 @@ vsvminit(int size, int nixtype, Mach *mach)
 	//*(uintptr_t*)mach->stack = STACKGUARD;
 	tssinit(mach, mach->stack+size);
 	gdtput(sizeof(gdt64)-1, PTR2UINT(mach->gdt), SSEL(SiCS, SsTIGDT|SsRPL0));
+	trput(SSEL(SiTSS, SsTIGDT|SsRPL0));
+
+	idtput(sizeof(idt64)-1, PTR2UINT(idt64));
 
 #if 0 // NO ACs YET
-	if(nixtype != NIXAC)
-#endif
-		idtput(sizeof(idt64)-1, PTR2UINT(idt64));
-#if 0
-	else
+	if(nixtype == NIXAC)
 		idtput(sizeof(acidt64)-1, PTR2UINT(acidt64));
 #endif
-	// I have no idea how to do this another way.
-	//trput(SSEL(SiTSS, SsTIGDT|SsRPL0));
-	asm volatile("ltr %w0"::"q" (SSEL(SiTSS, SsTIGDT|SsRPL0)));
 
 	wrmsr(FSbase, 0ull);
 	wrmsr(GSbase, PTR2UINT(mach));

+ 11 - 8
sys/src/9/boot/aux.c

@@ -126,7 +126,7 @@ int
 outin(char *prompt, char *def, int len)
 {
 	int n;
-	char buf[256];
+	char *p, buf[256];
 
 	if(len >= sizeof buf)
 		len = sizeof(buf)-1;
@@ -138,17 +138,20 @@ outin(char *prompt, char *def, int len)
 	print("%s[%s]: ", prompt, *def ? def : "no default");
 	memset(buf, 0, sizeof buf);
 	n = read(0, buf, len);
-
 	if(cpuflag){
 		alarm(0);
 		notify(0);
 	}
-
-	if(n < 0){
+	if(n < 0)
 		return 1;
-	}
-	if (n > 1) {
-		strncpy(def, buf, len);
-	}
+	buf[sizeof(buf)-1] = '\0';
+	p = strchr(buf, '\n');
+	if(p != nil)
+		*p = '\0';
+	p = strchr(buf, '\r');
+	if(p != nil)
+		*p = '\0';
+	if(n > 0)
+		strlcpy(def, buf, len);
 	return n;
 }

+ 13 - 6
sys/src/9/boot/boot.c

@@ -135,9 +135,14 @@ boot(int argc, char *argv[])
 	if(method[0].name == nil)
 		fatal("no boot methods");
 	mp = rootserver(argc ? *argv : 0);
-	(*mp->config)(mp);
-	islocal = strcmp(mp->name, "local") == 0;
-	ishybrid = strcmp(mp->name, "hybrid") == 0;
+	if(mp==nil){
+		configrc(mp);
+		for(;;){}
+	}else{
+		(*mp->config)(mp);
+		islocal = strcmp(mp->name, "local") == 0;
+		ishybrid = strcmp(mp->name, "hybrid") == 0;
+	}
 
 	/*
 	 *  load keymap if it is there.
@@ -269,10 +274,10 @@ findmethod(char *a)
 
 static void catstuff(void)
 {
-	static char *files[] = {"#c/drivers", "#P/ioalloc", "#P/irqalloc", "#t", nil};
-	static char dat[8192];
+	char *files[] = {"#c/drivers", "#P/ioalloc", "#P/irqalloc"};
+	char dat[8192];
 	int rc, ifd, pid;
-	for (int i = 0; files[i]; i++) {
+	for (int i = 0; i < nelem(files); i++) {
 		memset(dat, 0, sizeof(dat));
 		rc = readfile(files[i], dat, sizeof(dat)-1);
 		if (rc) {
@@ -324,6 +329,7 @@ rootserver(char *arg)
 	if (1)
 		catstuff();
 	/* look for required reply */
+	memset(reply, 0, sizeof(reply));
 	rc = readfile("#e/nobootprompt", reply, sizeof(reply));
 	if(rc == 0 && reply[0]){
 		mp = findmethod(reply);
@@ -341,6 +347,7 @@ rootserver(char *arg)
 	sprint(prompt+n, ")");
 
 	/* create default reply */
+	memset(reply, 0, sizeof(reply));
 	readfile("#e/bootargs", reply, sizeof(reply));
 	if(reply[0] == 0 && arg != 0)
 		strcpy(reply, arg);

+ 3 - 1
sys/src/9/boot/bootip.c

@@ -14,7 +14,7 @@
 #include "boot.h"
 
 static	uint8_t	fsip[IPaddrlen];
-	uint8_t	auip[IPaddrlen];
+static	uint8_t	auip[IPaddrlen];
 static	char	mpoint[32];
 
 static int isvalidip(uint8_t*);
@@ -103,6 +103,7 @@ configip(int bargc, char **bargv, int needfs)
 	if(!isvalidip(fsip))
 		netenv("fs", fsip);
 	while(!isvalidip(fsip)){
+		memset(buf, 0, sizeof(buf));
 		outin("filesystem IP address", buf, sizeof(buf));
 		if (parseip(fsip, buf) == -1)
 			fprint(2, "configip: can't parse fs ip %s\n", buf);
@@ -112,6 +113,7 @@ configip(int bargc, char **bargv, int needfs)
 	if(!isvalidip(auip))
 		netenv("auth", auip);
 	while(!isvalidip(auip)){
+		memset(buf, 0, sizeof(buf));
 		outin("authentication server IP address", buf, sizeof(buf));
 		if (parseip(auip, buf) == -1)
 			fprint(2, "configip: can't parse auth ip %s\n", buf);

+ 0 - 1
sys/src/9/boot/local.c

@@ -69,7 +69,6 @@ configlocal(Method *mp)
 	} else {
 		disk = "#s/sdE0/";
 	}
-print("configlocal: disk is %s\n", disk);
 	/* if we've decided on one, pass it on to all programs */
 	if(disk) {
 		setenv("bootdisk", disk);

+ 2 - 1
sys/src/9/port/devcons.c

@@ -391,7 +391,8 @@ panic(char *fmt, ...)
 	//prflush();
 	buf[n] = '\n';
 	putstrn(buf, n+1);
-	//dumpstack();
+	dumpstack();
+	stacksnippet();
 	delay(1000);	/* give time to consoles */
 	die("wait forever");
 	exit(1);

+ 4 - 2
sys/src/9/port/fault.c

@@ -52,9 +52,11 @@ fault(uintptr_t addr, uintptr_t pc, int ftype)
 	spllo();
 	for(i = 0;; i++) {
 		s = seg(up, addr, 1);	 /* leaves s->lk qlocked if seg != nil */
-		//print("%s fault seg for %p is %p base %p top %p\n", faulttypes[ftype], addr, s, s->base, s->top);
-		if(s == nil)
+		if(s == nil){
+			//iprint("fault seg is nil\n");
 			goto fail;
+		}
+		//iprint("%s fault seg for %p is %p base %p top %p\n", faulttypes[ftype], addr, s, s->base, s->top);
 		if(ftype == FT_READ && (s->type&SG_READ) == 0)
 			goto fail;
 		if(ftype == FT_WRITE && (s->type&SG_WRITE) == 0)

+ 2 - 1
sys/src/9/port/lib.h

@@ -33,7 +33,7 @@ extern	int	strcmp(const char*, const char*);
 extern	char*	strcpy(char*, const char*);
 extern	char*	strecpy(char*, char*, const char*);
 extern	char*	strncat(char*, const char*, int32_t);
-extern	char*	strlcpy(char*, const char*, int32_t);
+extern	size_t	strlcpy(char*, const char*, int32_t);
 extern	char*	strncpy(char*, const char*, int32_t);
 extern	int	strncmp(const char*, const char*, int32_t);
 extern	char*	strrchr(const char*, int);
@@ -261,6 +261,7 @@ struct Zio
 };
 
 extern	char	etext[];
+extern	char	erodata[];
 extern	char	edata[];
 extern	char	end[];
 

+ 2 - 1
sys/src/9/port/portfns.h

@@ -139,7 +139,7 @@ int		freebroken(void);
 void		freepte(Segment*, Pte*);
 void		getcolor(uint32_t, uint32_t*, uint32_t*, uint32_t*);
 char*		getconfenv(void);
-int		getpgszi(uint32_t);
+int		getpgszi(usize);
 Segment*	getzkseg(void);
 void		gotolabel(Label*);
 int		haswaitq(void*);
@@ -203,6 +203,7 @@ void		microdelay(int);
 uint64_t	mk64fract(uint64_t, uint64_t);
 void		mkqid(Qid*, int64_t, uint32_t, int);
 void		mmuflush(void);
+void		mmuprocinit(Proc*);
 void		mmuput(uintptr_t, Page*, uint);
 void		mmurelease(Proc*);
 void		mmuswitch(Proc*);

+ 3 - 1
sys/src/9/port/proc.c

@@ -257,6 +257,7 @@ sched(void)
 		machp()->schedticks = machp()->ticks + HZ/10;
 	machp()->readied = 0;
 	machp()->externup = p;
+	assert(p != nil);
 	up = p;
 	up->nqtrap = 0;
 	up->nqsyscall = 0;
@@ -265,7 +266,7 @@ sched(void)
 	machp()->proc = up;
 	mmuswitch(up);
 
-		      assert(!up->wired || up->wired == machp());
+	assert(!up->wired || up->wired == machp());
 	gotolabel(&up->sched);
 }
 
@@ -850,6 +851,7 @@ newproc(void)
 	p->ac = nil;
 	p->nfullq = 0;
 	memset(&p->MMU, 0, sizeof p->MMU);
+	mmuprocinit(p);
 	return p;
 }
 

+ 1 - 2
sys/src/9/riscv/cflags.json

@@ -2,7 +2,6 @@
 	{
 		"Name": "buildflags",
 		"Cflags": [
-			"-mcmodel=medany",
 			"-ffreestanding",
 			"-fno-builtin",
 			"-fno-omit-frame-pointer",
@@ -20,4 +19,4 @@
 			"--no-relax"
 		]
 	}
-]
+]

+ 0 - 1
sys/src/9/riscv/core.json

@@ -2,7 +2,6 @@
 	{
 		"Name": "core",
 		"Cflags": [
-			"-mcmodel=medany",
 			"-O0",
 			"-static",
 			"-ffreestanding",

+ 7 - 0
sys/src/9/riscv/mmu.c

@@ -824,3 +824,10 @@ mmuinit(void)
 
 	mmuphysaddr(PTR2UINT(end));
 }
+
+// This is a no-op on RISC-V.
+void
+mmuprocinit(Proc *p)
+{
+	USED(p);
+}

+ 1 - 1
util/src/harvey/cmd/qtap/main.go

@@ -10,8 +10,8 @@ import (
 )
 
 func main() {
+	os.Setenv("UROOT_QEMU", "qemu-system-x86_64 -m 512");
 	opts := &qemu.Options{
-		QEMUPath:     "qemu-system-x86_64",
 		Kernel:       os.ExpandEnv("$HARVEY/sys/src/9/amd64/harvey.32bit"),
 		SerialOutput: os.Stdout,
 		KernelArgs:   "service=terminal nobootprompt=tcp maxcores=1024 nvram=/boot/nvram nvrlen=512 nvroff=0 acpiirq=1",

Some files were not shown because too many files changed in this diff