Browse Source

New demand loading scheme and segment setup,
New elf64 loader, remove sysexec dependency to a.out.h

Change-Id: Ifb1c6d8aeb2619646303a15267164b9f9b0ecfed

Aki Nyrhinen 8 years ago
parent
commit
0f2a855029

+ 2 - 1
sys/src/9/k10/main.c

@@ -41,6 +41,7 @@ Mach *entrym;
  * Optionsinit() is called from multiboot() to
  * set it all up.
  */
+char *cputype = "amd64";
 static int64_t oargc;
 static char* oargv[20];
 static char oargb[128];
@@ -599,7 +600,7 @@ init0(void)
 	if(!waserror()){
 		snprint(buf, sizeof(buf), "%s %s", "AMD64", conffile);
 		ksetenv("terminal", buf, 0);
-		ksetenv("cputype", "amd64", 0);
+		ksetenv("cputype", cputype, 0);
 		if(cpuserver)
 			ksetenv("service", "cpu", 0);
 		else

+ 2 - 2
sys/src/9/port/devproc.c

@@ -1869,7 +1869,7 @@ txt2data(Proc *p, Segment *s)
 	ps = newseg(SG_DATA, s->base, s->size);
 	ps->image = s->image;
 	incref(ps->image);
-	ps->ph = s->ph;
+	ps->ldseg = s->ldseg;
 	ps->flushme = 1;
 
 	qlock(&p->seglock);
@@ -1896,7 +1896,7 @@ data2txt(Segment *s)
 	ps = newseg(SG_TEXT, s->base, s->size);
 	ps->image = s->image;
 	incref(ps->image);
-	ps->ph = s->ph;
+	ps->ldseg = s->ldseg;
 	ps->flushme = 1;
 
 	return ps;

+ 482 - 0
sys/src/9/port/elf64.c

@@ -0,0 +1,482 @@
+#include	"u.h"
+#include	"tos.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"getput.h"
+
+typedef struct Elf64_Ehdr Elf64_Ehdr;
+typedef struct Elf64_Phdr Elf64_Phdr;
+
+enum {
+	EI_MAG0 = 0,	// File identification
+	EI_MAG1 = 1,
+	EI_MAG2 = 2,
+	EI_MAG3 = 3,
+	EI_CLASS = 4,	// File class
+		ELFCLASS32 = 1,	// 32-bit objects
+		ELFCLASS64 = 2,	// 64-bit objects
+	EI_DATA = 5,	// Data encoding
+		ELFDATA2LSB = 1,	// Object file data structures are littleendian
+		ELFDATA2MSB = 2,	// Object file data structures are bigendian
+	EI_VERSION = 6,	// File version
+	EI_OSABI = 7,	// OS/ABI identification
+		ELFOSABI_SYSV = 0,	// System V ABI
+		ELFOSABI_HPUX = 1,
+	EI_ABIVERSION = 8,	// ABI version
+	EI_PAD = 9,	// Start of padding bytes
+	EI_NIDENT = 16,	// Size of e_ident[]
+
+	ET_NONE = 0,	// No file type
+	ET_REL = 1,	// Relocatable object file
+	ET_EXEC = 2,	// Executable file
+	ET_DYN = 3,	// Shared object file
+	ET_CORE = 4,	// Core file
+
+	PT_NULL = 0,	// Unused entry
+	PT_LOAD = 1,	// Loadable segment
+	PT_DYNAMIC = 2,	// Dynamic linking tables
+	PT_INTERP = 3,	// Program interpreter path name
+	PT_NOTE = 4,	// Note sections
+	PT_SHLIB = 5,	// Reserved
+	PT_PHDR = 6,	// Program header table
+		EM_NONE = 0, //  No
+		EM_M32 = 1, //  AT&T WE
+		EM_SPARC = 2, //
+		EM_386 = 3, //  Intel
+		EM_68K = 4, //  Motorola
+		EM_88K = 5, //  Motorola
+		EM_IAMCU = 6, //  Intel
+		EM_860 = 7, //  Intel
+		EM_MIPS = 8, //  MIPS I
+		EM_S370 = 9, //  IBM System/370
+		EM_MIPS_RS3_LE = 10, //  MIPS RS3000
+		// reserved
+		EM_PARISC = 15, //  Hewlett-Packard
+		// reserved
+		EM_VPP500 = 17, //  Fujitsu
+		EM_SPARC32PLUS = 18, //  Enhanced instruction set
+		EM_960 = 19, //  Intel
+		EM_PPC = 20, //
+		EM_PPC64 = 21, //  64-bit
+		EM_S390 = 22, //  IBM System/390
+		EM_SPU = 23, //  IBM
+		// reserved
+		EM_V800 = 36, //  NEC
+		EM_FR20 = 37, //  Fujitsu
+		EM_RH32 = 38, //  TRW
+		EM_RCE = 39, //  Motorola
+		EM_ARM = 40, //  ARM 32-bit architecture
+		EM_ALPHA = 41, //  Digital
+		EM_SH = 42, //  Hitachi
+		EM_SPARCV9 = 43, //  SPARC Version
+		EM_TRICORE = 44, //  Siemens TriCore embedded
+		EM_ARC = 45, //  Argonaut RISC Core, Argonaut Technologies
+		EM_H8_300 = 46, //  Hitachi
+		EM_H8_300H = 47, //  Hitachi
+		EM_H8S = 48, //  Hitachi
+		EM_H8_500 = 49, //  Hitachi
+		EM_IA_64 = 50, //  Intel IA-64 processor
+		EM_MIPS_X = 51, //  Stanford
+		EM_COLDFIRE = 52, //  Motorola
+		EM_68HC12 = 53, //  Motorola
+		EM_MMA = 54, //  Fujitsu MMA Multimedia
+		EM_PCP = 55, //  Siemens
+		EM_NCPU = 56, //  Sony nCPU embedded RISC
+		EM_NDR1 = 57, //  Denso NDR1
+		EM_STARCORE = 58, //  Motorola Star*Core
+		EM_ME16 = 59, //  Toyota ME16
+		EM_ST100 = 60, //  STMicroelectronics ST100
+		EM_TINYJ = 61, //  Advanced Logic Corp. TinyJ embedded processor
+		EM_X86_64 = 62, //  AMD x86-64
+		EM_PDSP = 63, //  Sony DSP
+		EM_PDP10 = 64, //  Digital Equipment Corp.
+		EM_PDP11 = 65, //  Digital Equipment Corp.
+		EM_FX66 = 66, //  Siemens FX66
+		EM_ST9PLUS = 67, //  STMicroelectronics ST9+ 8/16 bit
+		EM_ST7 = 68, //  STMicroelectronics ST7 8-bit
+		EM_68HC16 = 69, //  Motorola MC68HC16
+		EM_68HC11 = 70, //  Motorola MC68HC11
+		EM_68HC08 = 71, //  Motorola MC68HC08
+		EM_68HC05 = 72, //  Motorola MC68HC05
+		EM_SVX = 73, //  Silicon Graphics
+		EM_ST19 = 74, //  STMicroelectronics ST19 8-bit
+		EM_VAX = 75, //  Digital
+		EM_CRIS = 76, //  Axis Communications 32-bit embedded
+		EM_JAVELIN = 77, //  Infineon Technologies 32-bit embedded
+		EM_FIREPATH = 78, //  Element 14 64-bit DSP
+		EM_ZSP = 79, //  LSI Logic 16-bit DSP
+		EM_MMIX = 80, //  Donald Knuth's educational 64-bit
+		EM_HUANY = 81, //  Harvard University machine-independent object
+		EM_PRISM = 82, //  SiTera
+		EM_AVR = 83, //  Atmel AVR 8-bit
+		EM_FR30 = 84, //  Fujitsu
+		EM_D10V = 85, //  Mitsubishi
+		EM_D30V = 86, //  Mitsubishi
+		EM_V850 = 87, //  NEC
+		EM_M32R = 88, //  Mitsubishi
+		EM_MN10300 = 89, //  Matsushita
+		EM_MN10200 = 90, //  Matsushita
+		EM_PJ = 91, //
+		EM_OPENRISC = 92, //  OpenRISC 32-bit embedded
+		EM_ARC_COMPACT = 93, //  ARC International ARCompact processor (old spelling/synonym:
+		EM_XTENSA = 94, //  Tensilica Xtensa
+		EM_VIDEOCORE = 95, //  Alphamosaic VideoCore
+		EM_TMM_GPP = 96, //  Thompson Multimedia General Purpose
+		EM_NS32K = 97, //  National Semiconductor 32000
+		EM_TPC = 98, //  Tenor Network TPC
+		EM_SNP1K = 99, //  Trebia SNP 1000
+		EM_ST200 = 100, //  STMicroelectronics (www.st.com) ST200
+		EM_IP2K = 101, //  Ubicom IP2xxx microcontroller
+		EM_MAX = 102, //  MAX
+		EM_CR = 103, //  National Semiconductor CompactRISC
+		EM_F2MC16 = 104, //  Fujitsu
+		EM_MSP430 = 105, //  Texas Instruments embedded microcontroller
+		EM_BLACKFIN = 106, //  Analog Devices Blackfin (DSP)
+		EM_SE_C33 = 107, //  S1C33 Family of Seiko Epson
+		EM_SEP = 108, //  Sharp embedded
+		EM_ARCA = 109, //  Arca RISC
+		EM_UNICORE = 110, //  Microprocessor series from PKU-Unity Ltd. and MPRC of Peking
+		EM_EXCESS = 111, //  eXcess: 16/32/64-bit configurable embedded
+		EM_DXP = 112, //  Icera Semiconductor Inc. Deep Execution
+		EM_ALTERA_NIOS2 = 113, //  Altera Nios II soft-core
+		EM_CRX = 114, //  National Semiconductor CompactRISC CRX
+		EM_XGATE = 115, //  Motorola XGATE embedded
+		EM_C166 = 116, //  Infineon C16x/XC16x
+		EM_M16C = 117, //  Renesas M16C series
+		EM_DSPIC30F = 118, //  Microchip Technology dsPIC30F Digital Signal
+		EM_CE = 119, //  Freescale Communication Engine RISC
+		EM_M32C = 120, //  Renesas M32C series
+		// reserved
+		EM_TSK3000 = 131, //  Altium TSK3000
+		EM_RS08 = 132, //  Freescale RS08 embedded
+		EM_SHARC = 133, //  Analog Devices SHARC family of 32-bit DSP
+		EM_ECOG2 = 134, //  Cyan Technology eCOG2
+		EM_SCORE7 = 135, //  Sunplus S+core7 RISC
+		EM_DSP24 = 136, //  New Japan Radio (NJR) 24-bit DSP
+		EM_VIDEOCORE3 = 137, //  Broadcom VideoCore III
+		EM_LATTICEMICO32 = 138, //  RISC processor for Lattice FPGA
+		EM_SE_C17 = 139, //  Seiko Epson C17
+		EM_TI_C6000 = 140, //  The Texas Instruments TMS320C6000 DSP
+		EM_TI_C2000 = 141, //  The Texas Instruments TMS320C2000 DSP
+		EM_TI_C5500 = 142, //  The Texas Instruments TMS320C55x DSP
+		EM_TI_ARP32 = 143, //  Texas Instruments Application Specific RISC Processor, 32bit
+		EM_TI_PRU = 144, //  Texas Instruments Programmable Realtime
+		// reserved
+		EM_MMDSP_PLUS = 160, //  STMicroelectronics 64bit VLIW Data Signal
+		EM_CYPRESS_M8C = 161, //  Cypress M8C
+		EM_R32C = 162, //  Renesas R32C series
+		EM_TRIMEDIA = 163, //  NXP Semiconductors TriMedia architecture
+		EM_QDSP6 = 164, //  QUALCOMM DSP6
+		EM_8051 = 165, //  Intel 8051 and
+		EM_STXP7X = 166, //  STMicroelectronics STxP7x family of configurable and extensible RISC
+		EM_NDS32 = 167, //  Andes Technology compact code size embedded RISC processor
+		EM_ECOG1 = 168, //  Cyan Technology eCOG1X
+		EM_ECOG1X = 168, //  Cyan Technology eCOG1X
+		EM_MAXQ30 = 169, //  Dallas Semiconductor MAXQ30 Core
+		EM_XIMO16 = 170, //  New Japan Radio (NJR) 16-bit DSP
+		EM_MANIK = 171, //  M2000 Reconfigurable RISC
+		EM_CRAYNV2 = 172, //  Cray Inc. NV2 vector
+		EM_RX = 173, //  Renesas RX
+		EM_METAG = 174, //  Imagination Technologies META processor
+		EM_MCST_ELBRUS = 175, //  MCST Elbrus general purpose hardware
+		EM_ECOG16 = 176, //  Cyan Technology eCOG16
+		EM_CR16 = 177, //  National Semiconductor CompactRISC CR16 16-bit
+		EM_ETPU = 178, //  Freescale Extended Time Processing
+		EM_SLE9X = 179, //  Infineon Technologies SLE9X
+		EM_L10M = 180, //  Intel
+		EM_K10M = 181, //  Intel
+		// reserved(Intel)
+		EM_AARCH64 = 183, //  ARM 64-bit architecture
+		// reserved(ARM)
+		EM_AVR32 = 185, //  Atmel Corporation 32-bit microprocessor
+		EM_STM8 = 186, //  STMicroeletronics STM8 8-bit
+		EM_TILE64 = 187, //  Tilera TILE64 multicore architecture
+		EM_TILEPRO = 188, //  Tilera TILEPro multicore architecture
+		EM_MICROBLAZE = 189, //  Xilinx MicroBlaze 32-bit RISC soft processor
+		EM_CUDA = 190, //  NVIDIA CUDA
+		EM_TILEGX = 191, //  Tilera TILE-Gx multicore architecture
+		EM_CLOUDSHIELD = 192, //  CloudShield architecture
+		EM_COREA_1ST = 193, //  KIPO-KAIST Core-A 1st generation processor
+		EM_COREA_2ND = 194, //  KIPO-KAIST Core-A 2nd generation processor
+		EM_ARC_COMPACT2 = 195, //  Synopsys ARCompact
+		EM_OPEN8 = 196, //  Open8 8-bit RISC soft processor
+		EM_RL78 = 197, //  Renesas RL78
+		EM_VIDEOCORE5 = 198, //  Broadcom VideoCore V
+		EM_78KOR = 199, //  Renesas 78KOR
+		EM_56800EX = 200, //  Freescale 56800EX Digital Signal Controller
+		EM_BA1 = 201, //  Beyond BA1 CPU
+		EM_BA2 = 202, //  Beyond BA2 CPU
+		EM_XCORE = 203, //  XMOS xCORE processor
+		EM_MCHP_PIC = 204, //  Microchip 8-bit PIC(r)
+		// reserved(Intel)
+		EM_KM32 = 210, //  KM211 KM32 32-bit
+		EM_KMX32 = 211, //  KM211 KMX32 32-bit
+		EM_KMX16 = 212, //  KM211 KMX16 16-bit
+		EM_KMX8 = 213, //  KM211 KMX8 8-bit
+		EM_KVARC = 214, //  KM211 KVARC
+		EM_CDP = 215, //  Paneve CDP architecture
+		EM_COGE = 216, //  Cognitive Smart Memory
+		EM_COOL = 217, //  Bluechip Systems
+		EM_NORC = 218, //  Nanoradio Optimized
+		EM_CSR_KALIMBA = 219, //  CSR Kalimba architecture
+		EM_Z80 = 220, //  Zilog
+		EM_VISIUM = 221, //  Controls and Data Services VISIUMcore
+		EM_FT32 = 222, //  FTDI Chip FT32 high performance 32-bit RISC
+		EM_MOXIE = 223, //  Moxie processor
+		EM_AMDGPU = 224, //  AMD GPU
+		EM_RISCV = 243, // Berkeley RISC-V
+
+	PF_X = 0x1,	// Execute permission
+	PF_W = 0x2,	// Write permission
+	PF_R = 0x4,	// Read permission
+};
+
+struct Elf64_Ehdr {
+	uint8_t e_ident[16];	/* ELF identification */
+	uint8_t e_type[2];	/* Object file type */
+	uint8_t e_machine[2];	/* Machine type */
+	uint8_t e_version[4];	/* Object file version */
+	uint8_t e_entry[8];	/* Entry point address */
+	uint8_t e_phoff[8];	/* Program header offset */
+	uint8_t e_shoff[8];	/* Section header offset */
+	uint8_t e_flags[4];	/* Processor-specific flags */
+	uint8_t e_ehsize[2];	/* ELF header size */
+	uint8_t e_phentsize[2];	/* Size of program header entry */
+	uint8_t e_phnum[2];	/* Number of program header entries */
+	uint8_t e_shentsize[2];	/* Size of section header entry */
+	uint8_t e_shnum[2];	/* Number of section header entries */
+	uint8_t e_shstrndx[2];	/* Section name string table index */
+};
+
+struct Elf64_Phdr {
+	uint8_t p_type[4];	/* Type of segment */
+	uint8_t p_flags[4];	/* Segment attributes */
+	uint8_t p_offset[8];	/* Offset in file */
+	uint8_t p_vaddr[8];	/* Virtual address in memory */
+	uint8_t p_paddr[8];	/* Reserved */
+	uint8_t p_filesz[8];	/* Size of segment in file */
+	uint8_t p_memsz[8];	/* Size of segment in memory */
+	uint8_t p_align[8];	/* Alignment of segment */
+};
+
+static struct {
+	char *mach;
+	int e_machine;
+} elfmachs[] = {
+	{"amd64", EM_X86_64},
+	{"arm64", EM_AARCH64},
+	{"power64", EM_PPC64},
+};
+
+static int
+ispow2(uintptr_t a)
+{
+  return ((a != 0) && (a & (a-1)) == 0);
+}
+
+static int
+overlap(uintptr_t a0, uintptr_t aend, uintptr_t b0, uintptr_t bend)
+{
+	uint64_t max0, minend;
+	max0 = a0 > b0 ? a0 : b0;
+	minend = aend < bend ? aend : bend;
+	return max0 < minend;
+}
+
+/*
+ *	return the number of ldsegs in rp
+ */
+int
+elf64ldseg(Chan *c, uintptr_t *entryp, Ldseg **rp, char *mach, uint32_t minpgsz)
+{
+	Mach *m = machp();
+	Elf64_Ehdr ehdr;
+	uint16_t (*get16)(uint8_t *);
+	uint32_t (*get32)(uint8_t *);
+	uint64_t (*get64)(uint8_t *);
+	uint8_t *phbuf, *phend;
+	uint8_t *fp;
+	Ldseg *ldseg;
+	uint64_t entry;
+	int i, j, si;
+
+	entry = 0;
+	phbuf = nil;
+	ldseg = nil;
+	si = 0;
+
+	if(waserror()){
+		if(ldseg != nil)
+			free(ldseg);
+		if(phbuf != nil)
+			free(phbuf);
+		nexterror();
+	}
+
+	if(c->dev->read(c, &ehdr, sizeof ehdr, 0) != sizeof ehdr){
+		print("elf64ldseg: too short for header\n");
+		goto done; // too short to be elf but could be something else
+	}
+
+	fp = ehdr.e_ident;
+	if(fp[EI_MAG0] == '\x7f' && fp[EI_MAG1] == 'E' && fp[EI_MAG2] == 'L' && fp[EI_MAG3] == 'F'){
+
+		if(fp[EI_DATA] == ELFDATA2LSB){
+			get16 = get16le;
+			get32 = get32le;
+			get64 = get64le;
+		} else if(fp[EI_DATA] == ELFDATA2MSB){
+			get16 = get16be;
+			get32 = get32be;
+			get64 = get64be;
+		}
+
+		if(fp[EI_CLASS] == ELFCLASS64){
+			int64_t phoff;
+			uint32_t phnum, phentsize;
+			uint16_t e_machine;
+
+			e_machine = get16(ehdr.e_machine);
+			if(mach != nil){
+				for(i = 0; i < nelem(elfmachs); i++)
+					if(elfmachs[i].e_machine == e_machine && !strcmp(mach, elfmachs[i].mach))
+						break;
+				if(i == nelem(elfmachs)){
+					print("elf64ldseg: e_machine %d incorrect for host %s\n", e_machine, mach);
+					error(Ebadexec);
+				}
+			}
+			entry = get64(ehdr.e_entry);
+			phoff = get16(ehdr.e_phoff);
+			phnum = get16(ehdr.e_phnum);
+			phentsize = get16(ehdr.e_phentsize);
+
+			if(phentsize*phnum > minpgsz){
+				print("elf64ldseg: phentsize %d phnum %d exceeds page size %d\n", phentsize, phnum, minpgsz);
+				error(Ebadexec);
+			}
+
+			phbuf = malloc(phentsize*phnum);
+			if(phbuf == nil){
+				print("elf64ldseg: malloc fail\n");
+				error(Ebadexec);
+			}
+
+			if(c->dev->read(c, phbuf, phentsize*phnum, phoff) != phentsize*phnum){
+				print("elf64ldseg: read program header fail\n");
+				error(Ebadexec);
+			}
+
+			si = 0;
+			phend = phbuf + phentsize*phnum;
+			for(fp = phbuf; fp < phend; fp += phentsize){
+				Elf64_Phdr *phdr;
+				phdr = (Elf64_Phdr*)fp;
+				if(get32(phdr->p_type) == PT_LOAD)
+					si++;
+			}
+			ldseg = malloc(si * sizeof ldseg[0]);
+			if(ldseg == nil){
+				print("elf64ldseg: malloc fail\n");
+				error(Ebadexec);
+			}
+
+			si = 0;
+			for(fp = phbuf; fp < phend; fp += phentsize){
+				Elf64_Phdr *phdr;
+
+				phdr = (Elf64_Phdr*)fp;
+				if(get32(phdr->p_type) == PT_LOAD){
+					uint64_t offset, vaddr, align, filesz, memsz;
+					uint32_t flags;
+
+					flags = get32(phdr->p_flags);	/* Segment attributes */
+					offset = get64(phdr->p_offset);	/* Offset in file */
+					vaddr = get64(phdr->p_vaddr);	/* Virtual address in memory */
+					filesz = get64(phdr->p_filesz);	/* Size of segment in file */
+					memsz = get64(phdr->p_memsz);	/* Size of segment in memory */
+					align = get64(phdr->p_align);	/* Alignment of segment */
+
+					ldseg[si].type = SG_LOAD;
+					if((flags & PF_R) != 0)
+						ldseg[si].type |= SG_READ;
+					if((flags & PF_W) != 0)
+						ldseg[si].type |= SG_WRITE;
+					if((flags & PF_X) != 0)
+						ldseg[si].type |= SG_EXEC;
+
+					if(memsz < filesz){
+						print("elf64ldseg: memsz %d < filesz %d\n", memsz, filesz);
+						error(Ebadexec);
+					}
+
+					if(!ispow2(align)){
+						print("elf64ldseg: align 0x%x not a power of 2\n", align);
+						error(Ebadexec);
+					}
+
+					if(align < minpgsz){
+						print("elf64ldseg: align 0x%x < minpgsz 0x%x\n", align, minpgsz);
+						error(Ebadexec);
+					}
+
+					if(offset & (align-1) != vaddr & (align-1)){
+						print("elf64ldseg: va offset 0x%x != file offset 0x%x (align 0x%x)\n",
+							offset & (align-1),
+							vaddr & (align-1),
+							align
+						);
+						error(Ebadexec);
+					}
+
+					ldseg[si].pgsz = align;
+					ldseg[si].memsz = memsz;
+					ldseg[si].filesz = filesz;
+					ldseg[si].pg0fileoff = offset & ~(align-1);
+					ldseg[si].pg0vaddr = vaddr & ~(align-1);
+					ldseg[si].pg0off = offset & (align-1);
+
+					si++;
+				}
+			}
+			for(i = 0; i < si; i++){
+				for(j = 0; j < si; j++){
+					if(i != j){
+						Ldseg *lda, *ldb;
+						lda = ldseg+i;
+						ldb = ldseg+j;
+						if(overlap(
+							lda->pg0vaddr, lda->pg0vaddr + lda->pg0off + lda->memsz,
+							ldb->pg0vaddr, ldb->pg0vaddr + ldb->pg0off + ldb->memsz
+						)){
+							print("elf64ldseg: load segs %p:%p and %p:%p ovelap\n",
+								lda->pg0vaddr, lda->pg0vaddr + lda->pg0off + lda->memsz,
+								ldb->pg0vaddr, ldb->pg0vaddr + ldb->pg0off + ldb->memsz
+							);
+							error(Ebadexec);
+						}
+					}
+				}
+			}
+		} else {
+			print("elf64ldseg: not elfclass64\n");
+			error(Ebadexec);
+		}
+	}
+
+done:
+	if(phbuf != nil)
+		free(phbuf);
+	if(rp != nil){
+		*rp = ldseg;
+	} else if(ldseg != nil){
+		free(ldseg);
+	}
+	if(entryp != nil)
+		*entryp = entry;
+	poperror();
+	return si;
+}

+ 59 - 44
sys/src/9/port/fault.c

@@ -14,8 +14,9 @@
 #include	"fns.h"
 #include	"../port/error.h"
 
+#undef DBG
+#define DBG if(0)print
 
-#define debug if(0)print
 
 /*
  * Fault calls fixfault which ends up calling newpage, which
@@ -32,7 +33,11 @@ fault(uintptr_t addr, int read)
 	char *sps;
 	int i, color;
 
-if(m->externup->nlocks) print("fault nlocks %d\n", m->externup->nlocks);
+	if(m->externup->nlocks)
+		print("%s fault nlocks %d addr %p\n",
+			read ? "read" : "write",
+			m->externup->nlocks,
+			addr);
 
 	sps = m->externup->psstate;
 	m->externup->psstate = "Fault";
@@ -155,6 +160,7 @@ fixfault(Segment *s, uintptr_t addr, int read, int dommuput, int color)
 			error("No mmap support yet");
 		goto common;
 
+	case SG_LOAD:
 	case SG_DATA:
 	case SG_TEXT: 			/* Demand load */
 		if(pagedout(*pg))
@@ -193,7 +199,7 @@ fixfault(Segment *s, uintptr_t addr, int read, int dommuput, int color)
 				int pgref = lkp->ref;
 				unlock(lkp);
 
-				debug("fixfault %d: copy on %s, %s(%c%c%c) 0x%p segref %d pgref %d\n",
+				DBG("fixfault %d: copy on %s, %s(%c%c%c) 0x%p segref %d pgref %d\n",
 					m->externup->pid,
 					read ? "read " : "write",
 					segtypes[s->type & SG_TYPE],
@@ -273,23 +279,27 @@ pio(Segment *s, uintptr_t addr, uint32_t soff, Page **p, int color)
 	c = nil;
 	pgsz = m->pgsz[s->pgszi];
 	if(loadrec == nil) {	/* from a text/data image */
-		// where page begins in file
-		daddr = (s->ph.offset + soff) & ~(pgsz-1);
-		// where segment begins on the page
-		doff = s->ph.offset & (pgsz-1);
-		// how much more to read?
-		ask = doff+s->ph.filesz - soff;
-		if(ask > pgsz)
-			ask = pgsz;
-		// read offset only if it is the first page
-		if(soff > 0)
+		daddr = s->ldseg.pg0fileoff + soff;
+		doff = s->ldseg.pg0off;
+
+		if(soff < doff+s->ldseg.filesz){
+			ask = doff+s->ldseg.filesz - soff;
+			if(ask > pgsz)
+				ask = pgsz;
+			if(soff > 0)
+				doff = 0;
+
+			newpg = lookpage(s->image, daddr+doff);
+			if(newpg != nil) {
+				*p = newpg;
+				return;
+			}
+		} else {
+			// zero fill
+			ask = 0;
 			doff = 0;
-
-		newpg = lookpage(s->image, daddr);
-		if(newpg != nil) {
-			*p = newpg;
-			return;
 		}
+
 		c = s->image->c;
 	} else {
 		panic("no swap");
@@ -302,34 +312,36 @@ pio(Segment *s, uintptr_t addr, uint32_t soff, Page **p, int color)
 	// of newpage here was 0 -- "don't zero".
 	// It is now 1 -- "do zero" because ELF only covers
 	// part of the page.
-	newpg = newpage(1, 0, addr, pgsz, color);
-	k = kmap(newpg);
-	kaddr = (char*)VA(k);
+	newpg = newpage(1, nil, addr, pgsz, color);
 
-	while(waserror()) {
-		if(strcmp(m->externup->errstr, Eintr) == 0)
-			continue;
+	if(ask > doff){
+		k = kmap(newpg);
+		kaddr = (char*)VA(k);
+
+		while(waserror()) {
+			if(strcmp(m->externup->errstr, Eintr) == 0)
+				continue;
+			kunmap(k);
+			putpage(newpg);
+			faulterror(Eioload, c, 0);
+		}
+
+		DBG(
+			"pio %d %s(%c%c%c) addr+doff 0x%p daddr+doff 0x%x ask-doff %d\n",
+			m->externup->pid, segtypes[s->type & SG_TYPE],
+			(s->type & SG_READ) != 0 ? 'r' : '-',
+			(s->type & SG_WRITE) != 0 ? 'w' : '-',
+			(s->type & SG_EXEC) != 0 ? 'x' : '-',
+			addr+doff, daddr+doff, ask-doff
+		);
+
+		n = c->dev->read(c, kaddr+doff, ask-doff, daddr+doff);
+		if(n != ask-doff)
+			faulterror(Eioload, c, 0);
+		poperror();
 		kunmap(k);
-		putpage(newpg);
-		faulterror(Eioload, c, 0);
 	}
 
-	static char *segtypes[]={ "Bad0", "Text", "Data", "Bss", "Stack", "Shared", "Phys" };
-	debug(
-		"pio %d %s(%c%c%c) addr+doff 0x%p daddr+doff 0x%x ask-doff %d\n",
-		m->externup->pid, segtypes[s->type & SG_TYPE],
-		(s->type & SG_READ) != 0 ? 'r' : '-',
-		(s->type & SG_WRITE) != 0 ? 'w' : '-',
-		(s->type & SG_EXEC) != 0 ? 'x' : '-',
-		addr+doff, daddr+doff, ask-doff
-	);
-	n = c->dev->read(c, kaddr+doff, ask-doff, daddr+doff);
-	if(n != ask-doff)
-		faulterror(Eioload, c, 0);
-
-	poperror();
-	kunmap(k);
-
 	qlock(&s->lk);
 	if(loadrec == nil) {	/* This is demand load */
 		/*
@@ -337,8 +349,11 @@ pio(Segment *s, uintptr_t addr, uint32_t soff, Page **p, int color)
 		 *  s->lk was unlocked
 		 */
 		if(*p == nil) {
-			newpg->daddr = daddr;
-			cachepage(newpg, s->image);
+			// put it to page cache if there was i/o for it
+			if(ask > doff){
+				newpg->daddr = daddr+doff;
+				cachepage(newpg, s->image);
+			}
 			*p = newpg;
 		} else {
 			print("racing on demand load\n");

+ 128 - 0
sys/src/9/port/getput.c

@@ -0,0 +1,128 @@
+#include	"u.h"
+
+void
+put64be(uint8_t *p, uint64_t x)
+{
+	p[0] = x>>56;
+	p[1] = x>>48;
+	p[2] = x>>40;
+	p[3] = x>>32;
+	p[4] = x>>24;
+	p[5] = x>>16;
+	p[6] = x>>8;
+	p[7] = x;
+}
+
+void
+put32be(uint8_t *p, uint32_t x)
+{
+	p[0] = x>>24;
+	p[1] = x>>16;
+	p[2] = x>>8;
+	p[3] = x;
+}
+
+void
+put24be(uint8_t *p, int x)
+{
+	p[0] = x>>16;
+	p[1] = x>>8;
+	p[2] = x;
+}
+
+void
+put16be(uint8_t *p, int x)
+{
+	p[0] = x>>8;
+	p[1] = x;
+}
+
+uint64_t
+get64be(uint8_t *p)
+{
+	return
+		((uint64_t)p[0]<<56)|((uint64_t)p[1]<<48)|((uint64_t)p[2]<<40)|(uint64_t)p[3]<<32|
+		((uint64_t)p[4]<<24)|((uint64_t)p[5]<<16)|((uint64_t)p[6]<<8)|(uint64_t)p[7];
+}
+
+uint32_t
+get32be(uint8_t *p)
+{
+	return ((uint32_t)p[0]<<24)|((uint32_t)p[1]<<16)|((uint32_t)p[2]<<8)|(uint32_t)p[3];
+}
+
+uint32_t
+get24be(uint8_t *p)
+{
+	return ((uint32_t)p[0]<<16)|((uint32_t)p[1]<<8)|(uint32_t)p[2];
+}
+
+uint16_t
+get16be(uint8_t *p)
+{
+	return ((uint16_t)p[0]<<8)|(uint16_t)p[1];
+}
+
+
+void
+put64le(uint8_t *p, uint64_t x)
+{
+	p[0] = x;
+	p[1] = x>>8;
+	p[2] = x>>16;
+	p[3] = x>>24;
+	p[4] = x>>32;
+	p[5] = x>>40;
+	p[6] = x>>48;
+	p[7] = x>>56;
+}
+
+void
+put32le(uint8_t *p, uint32_t x)
+{
+	p[0] = x;
+	p[1] = x>>8;
+	p[2] = x>>16;
+	p[3] = x>>24;
+}
+
+void
+put24le(uint8_t *p, int x)
+{
+	p[0] = x;
+	p[1] = x>>8;
+	p[2] = x>>16;
+}
+
+void
+put16le(uint8_t *p, int x)
+{
+	p[0] = x;
+	p[1] = x>>8;
+}
+
+uint64_t
+get64le(uint8_t *p)
+{
+	return
+		((uint64_t)p[0])|((uint64_t)p[1]<<8)|((uint64_t)p[2]<<16)|(uint64_t)p[3]<<24|
+		((uint64_t)p[4]<<32)|((uint64_t)p[5]<<40)|((uint64_t)p[6]<<48)|(uint64_t)p[7]<<56;
+}
+
+uint32_t
+get32le(uint8_t *p)
+{
+	return ((uint32_t)p[0])|((uint32_t)p[1]<<8)|((uint32_t)p[2]<<16)|(uint32_t)p[3]<<24;
+}
+
+uint32_t
+get24le(uint8_t *p)
+{
+	return ((uint32_t)p[0])|((uint32_t)p[1]<<8)|(uint32_t)p[2]<<16;
+}
+
+uint16_t
+get16le(uint8_t *p)
+{
+	return ((uint16_t)p[0])|(uint16_t)p[1]<<8;
+}

+ 20 - 0
sys/src/9/port/getput.h

@@ -0,0 +1,20 @@
+
+uint16_t get16be(uint8_t *p);
+uint32_t get24be(uint8_t *p);
+uint32_t get32be(uint8_t *p);
+uint64_t get64be(uint8_t *p);
+
+void put16be(uint8_t *p, int x);
+void put24be(uint8_t *p, int x);
+void put32be(uint8_t *p, uint32_t x);
+void put64be(uint8_t *p, uint64_t x);
+
+uint16_t get16le(uint8_t *p);
+uint32_t get24le(uint8_t *p);
+uint32_t get32le(uint8_t *p);
+uint64_t get64le(uint8_t *p);
+
+void put16le(uint8_t *p, int x);
+void put24le(uint8_t *p, int x);
+void put32le(uint8_t *p, uint32_t x);
+void put64le(uint8_t *p, uint64_t x);

+ 1 - 1
sys/src/9/port/lib.h

@@ -6,7 +6,7 @@
  * modified, propagated, or distributed except according to the terms contained
  * in the LICENSE file.
  */
-#include <elf.h>
+
 /*
  * functions (possibly) linked in, complete, from libc.
  */

+ 1 - 1
sys/src/9/port/pager.c

@@ -96,7 +96,7 @@ pageout(Proc *p, Segment *s)
 	Pte *l;
 	Page **pg, *entry;
 
-	if((s->type&SG_TYPE) != SG_TEXT)
+	if((s->type&SG_TYPE) != SG_LOAD && (s->type&SG_TYPE) != SG_TEXT)
 		panic("pageout");
 
 	if(!canqlock(&s->lk))	/* We cannot afford to wait, we will surely deadlock */

+ 2 - 0
sys/src/9/port/port.json

@@ -36,8 +36,10 @@
 		"../port/devws.c",
 		"../port/devzp.c",
 		"../port/edf.c",
+		"../port/elf64.c",
 		"../port/ethermii.c",
 		"../port/fault.c",
+		"../port/getput.c",
 		"../port/hexdump.c",
 		"../port/image.c",
 		"../port/kdebug.c",

+ 17 - 3
sys/src/9/port/portdat.h

@@ -23,6 +23,7 @@ typedef struct Fastcall Fastcall;
 typedef struct Fgrp	Fgrp;
 typedef struct Image	Image;
 typedef struct Kzio 	Kzio;
+typedef struct Ldseg	Ldseg;
 typedef struct Log	Log;
 typedef struct Logflag	Logflag;
 typedef struct Lockstats Lockstats;
@@ -441,6 +442,7 @@ enum
 	SG_SHARED	= 0x5,
 	SG_PHYSICAL	= 0x6,
 	SG_MMAP		= 0x7,
+	SG_LOAD		= 0x8,	/* replaces SG_TEXT, SG_DATA */
 
 	SG_PERM		= 0xf0,
 	SG_READ		= 0x10,
@@ -512,6 +514,17 @@ struct Zseg
 
 #define NOCOLOR -1
 
+/* demand loading params of a segment */
+struct Ldseg {
+	int64_t	memsz;
+	int64_t	filesz;
+	int64_t	pg0fileoff;
+	uintptr_t	pg0vaddr;
+	uint32_t	pg0off;
+	uint32_t	pgsz;
+	uint16_t	type;
+};
+
 struct Segment
 {
 	Ref;
@@ -524,12 +537,11 @@ struct Segment
 	uintptr_t	base;		/* virtual base */
 	uintptr_t	top;		/* virtual top */
 	usize	size;		/* size in pages */
-	/* We will be using the ELF ProgHdr for getting file contents into the segment. */
-	ProgHdr ph;
+	Ldseg	ldseg;
 	int	flushme;	/* maintain icache for this segment */
 	Image	*image;		/* text in file attached to this segment */
 	Physseg *pseg;
-	uint32_t*	profile;	/* Tick profile area */
+	uint32_t	*profile;	/* Tick profile area */
 	Pte	**map;
 	int	mapsize;
 	Pte	*ssegmap[SSEGMAPSIZE];
@@ -539,6 +551,7 @@ struct Segment
 	Zseg	zseg;
 };
 
+
 /*
  * NIX zero-copy IO structure.
  */
@@ -968,6 +981,7 @@ enum
 
 extern	Conf	conf;
 extern	char*	conffile;
+extern	char*	cputype;
 extern	int	cpuserver;
 extern  char*	eve;
 extern	char	hostdomain[];

+ 1 - 0
sys/src/9/port/portfns.h

@@ -110,6 +110,7 @@ void		edfrecord(Proc*);
 void		edfrun(Proc*, int);
 void		edfstop(Proc*);
 void		edfyield(void);
+int		elf64ldseg(Chan *c, uintptr_t *entryp, Ldseg **rp, char *mach, uint32_t minpgsz);
 int		emptystr(char*);
 int		encrypt(void*, void*, int);
 void		envcpy(Egrp*, Egrp*);

+ 6 - 2
sys/src/9/port/segment.c

@@ -22,7 +22,8 @@ char *segtypes[SG_TYPE]={
 	[SG_BSS] "Bss",
 	[SG_STACK] "Stack",
 	[SG_SHARED] "Shared",
-	[SG_PHYSICAL] "Phys"
+	[SG_PHYSICAL] "Phys",
+	[SG_LOAD] "Load"
 };
 
 
@@ -213,6 +214,9 @@ dupseg(Segment **seg, int segno, int share)
 		n = newseg(s->type, s->base, s->size);
 		break;
 
+	case SG_LOAD:
+		if((s->type & SG_EXEC) != 0 && (s->type & SG_WRITE) == 0)
+			goto sameseg;
 	case SG_DATA:		/* Copy on write plus demand load info */
 		if((s->type & SG_EXEC) != 0){
 			poperror();
@@ -226,7 +230,7 @@ dupseg(Segment **seg, int segno, int share)
 
 		incref(s->image);
 		n->image = s->image;
-		n->ph = s->ph;
+		n->ldseg = s->ldseg;
 		n->pgszi = s->pgszi;
 		n->color = s->color;
 		n->ptepertab = s->ptepertab;

+ 61 - 730
sys/src/9/port/sysproc.c

@@ -16,569 +16,11 @@
 #include	"../port/error.h"
 
 #include	"../port/edf.h"
-#include	<a.out.h>
-#include 	<trace.h>
+#include	<trace.h>
 
 #undef DBG
-#define DBG if(0) print
+#define DBG if(0)print
 
-/* this is ugly but we need libmach in the kernel. So this is a first pass.
- * FIX ME.
- */
-
-#include "ureg.h"  /* for Elfmach struct */
-
-typedef struct Fhdr Fhdr;
-typedef struct Fhdr
-{
-	char *name;			/* identifier of executable */
-	uint8_t	type;		/* file type - see codes above */
-	uint8_t	hdrsz;		/* header size */
-	uint8_t	_magic;		/* _MAGIC() magic */
-	uint8_t	spare;
-	int32_t	magic;		/* magic number */
-	uint64_t txtaddr;	/* text address */
-	int64_t	txtoff;		/* start of text in file */
-	uint64_t dataddr;	/* start of data segment */
-	int64_t	datoff;		/* offset to data seg in file */
-	int64_t	symoff;		/* offset of symbol table in file */
-	uint64_t entry;		/* entry point */
-	int64_t	sppcoff;	/* offset of sp-pc table in file */
-	int64_t	lnpcoff;	/* offset of line number-pc table in file */
-	int32_t	txtsz;		/* text size */
-	int32_t	datsz;		/* size of data seg */
-	int32_t	bsssz;		/* size of bss */
-	int32_t	symsz;		/* size of symbol table */
-	int32_t	sppcsz;		/* size of sp-pc table */
-	int32_t	lnpcsz;		/* size of line number-pc table */
-	/* add the indexes of the ELF text and data segments. This is one awful hack
-	 * but we want to get this plane off the ground. We can fix it better later.
-	 */
-	int it, id;
-} Fhdr;
-
-/*
- *	Common a.out header describing all architectures
- */
-typedef struct {
-	union{
-		struct {
-			Exec;			/* a.out.h */
-			uint64_t hdr[1];
-		};
-		E64hdr;				/* elf.h */
-	} e;
-	int32_t dummy;			/* padding to ensure extra long */
-} ExecHdr;
-
-typedef struct Elfmach
-{
-	char *name;
-	int mtype;				/* machine type code */
-	int32_t regsize;		/* sizeof registers in bytes */
-	int32_t fpregsize;		/* sizeof fp registers in bytes */
-	char *pc;				/* pc name */
-	char *sp;				/* sp name */
-	char *link;				/* link register name */
-	char *sbreg;			/* static base register name */
-	uint64_t sb;			/* static base register value */
-	int pgsize;				/* page size */
-	uint64_t kbase;			/* kernel base address */
-	uint64_t ktmask;		/* ktzero = kbase & ~ktmask */
-	uint64_t utop;			/* user stack top */
-	int pcquant;			/* quantization of pc */
-	int szaddr;				/* sizeof(void*) */
-	int szreg;				/* sizeof(register) */
-	int szfloat;			/* sizeof(float) */
-	int szdouble;			/* sizeof(double) */
-} Elfmach;
-
-enum {
-	MAMD64,
-	FAMD64,
-	FAMD64B,
-};
-
-#define REGSIZE	sizeof(struct Ureg)
-#define FPREGSIZE	512		/* TO DO? currently only 0x1A0 used */
-
-Elfmach mamd64=
-{
-	"amd64",
-	MAMD64,					/* machine type */
-	REGSIZE,				/* size of registers in bytes */
-	FPREGSIZE,				/* size of fp registers in bytes */
-	"PC",					/* name of PC */
-	"SP",					/* name of SP */
-	0,						/* link register */
-	"setSB",				/* static base register name (bogus anyways) */
-	0,						/* static base register value */
-	0x200000,				/* page size */
-	0xfffffffff0110000ull,	/* kernel base */
-	0xffff800000000000ull,	/* kernel text mask */
-	0x00007ffffffff000ull,	/* user stack top */
-	1,						/* quantization of pc */
-	8,						/* szaddr */
-	4,						/* szreg */
-	4,						/* szfloat */
-	8,						/* szdouble */
-};
-
-/* definition of per-executable file type structures */
-Elfmach *elfmach;
-Elfmach *machkind = &mamd64;
-
-typedef struct Exectable{
-	int32_t	magic;			/* big-endian magic number of file */
-	char *name;				/* executable identifier */
-	char *dlmname;			/* dynamically loadable module identifier */
-	uint8_t	type;			/* Internal code */
-	uint8_t	_magic;			/* _MAGIC() magic */
-	Elfmach	*elfmach;		/* Per-machine data */
-	int32_t	hsize;			/* header size */
-	uint32_t (*swal)(uint32_t);		/* beswal or leswal */
-	int	(*hparse)(Ar0*, Chan*, Fhdr*, ExecHdr*);
-} ExecTable;
-
-/* Map from mach.h */
-
-/* Structure to map a segment to a position in a file */
-
-typedef struct Map {
-		int     nsegs;		/* number of segments */
-		struct segment {	/* per-segment map */
-			char *name;		/* the segment name */
-			int fd;			/* file descriptor */
-			int inuse;		/* in use - not in use */
-			int cache;		/* should cache reads? */
-			uint64_t b;		/* base */
-			uint64_t e;		/* end */
-			int64_t f;		/* offset within file */
-		} seg[1];			/* actually n of these */
-} Map;
-
-static int crackhdr(Ar0 *ar0, Chan *c, Fhdr *fp, ExecHdr *d);
-/* Trying seek */
-
-static int64_t
-chanseek(Ar0 *ar0, Chan *c, int64_t offset, int whence)
-{
-	uint8_t buf[sizeof(Dir)+100];
-	Dir dir;
-	int n;
-
-	if(c->dev->dc == '|')
-		error(Eisstream);
-
-	switch(whence){
-	case 0:
-		if((c->qid.type & QTDIR) && offset != 0LL)
-			error(Eisdir);
-		c->offset = offset;
-		break;
-
-	case 1:
-		if(c->qid.type & QTDIR)
-			error(Eisdir);
-		lock(c);	/* lock for read/write update */
-		offset += c->offset;
-		c->offset = offset;
-		unlock(c);
-		break;
-
-	case 2:
-		if(c->qid.type & QTDIR)
-			error(Eisdir);
-		n = c->dev->stat(c, buf, sizeof buf);
-		if(convM2D(buf, n, &dir, nil) == 0)
-			error("internal error: stat error in seek");
-		offset += dir.length;
-		c->offset = offset;
-		break;
-
-	default:
-		error(Ebadarg);
-	}
-	c->uri = 0;
-	c->dri = 0;
-	if (0) // FIX ME: this cclose is needed later.
-	cclose(c);
-
-	return offset;
-}
-
-/* libmach swap.c */
-
-/*
- * big-endian int8_t
- */
-uint16_t
-beswab(uint16_t s)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&s;
-	return (p[0]<<8) | p[1];
-}
-
-/* big-endian int32_t */
-
-uint32_t
-beswal(uint32_t l)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&l;
-	return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
-}
-
-/* big-endian int64_t */
-
-uint64_t
-beswav(uint64_t v)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&v;
-	return ((uint64_t)p[0]<<56) | ((uint64_t)p[1]<<48) | ((uint64_t)p[2]<<40)
-				  | ((uint64_t)p[3]<<32) | ((uint64_t)p[4]<<24)
-				  | ((uint64_t)p[5]<<16) | ((uint64_t)p[6]<<8)
-				  | (uint64_t)p[7];
-}
-
-/*
- * little-endian int8_t (short)
- */
-uint16_t
-leswab(uint16_t s)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&s;
-	return (p[1]<<8) | p[0];
-}
-
-/*
- * little-endian int32_t
- */
-uint32_t
-leswal(uint32_t l)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&l;
-	return (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
-}
-
-/*
- * little-endian int64_t
- */
-uint64_t
-leswav(uint64_t v)
-{
-	uint8_t *p;
-
-	p = (uint8_t*)&v;
-	return ((uint64_t)p[7]<<56) | ((uint64_t)p[6]<<48) | ((uint64_t)p[5]<<40)
-				  | ((uint64_t)p[4]<<32) | ((uint64_t)p[3]<<24)
-				  | ((uint64_t)p[2]<<16) | ((uint64_t)p[1]<<8)
-				  | (uint64_t)p[0];
-}
-
-/* Atomics */
-
-static void
-settext(Fhdr *fp, uint64_t e, uint64_t a, int32_t s, int64_t off)
-{
-	fp->txtaddr = a;
-	fp->entry = e;
-	fp->txtsz = s;
-	fp->txtoff = off;
-}
-
-static void
-setdata(Fhdr *fp, uint64_t a, int32_t s, int64_t off, int32_t bss)
-{
-	fp->dataddr = a;
-	fp->datsz = s;
-	fp->datoff = off;
-	fp->bsssz = bss;
-}
-
-static void
-setsym(Fhdr *fp, int32_t symsz, int32_t sppcsz, int32_t lnpcsz,
-       int64_t symoff)
-{
-	fp->symsz = symsz;
-	fp->symoff = symoff;
-	fp->sppcsz = sppcsz;
-	fp->sppcoff = fp->symoff+fp->symsz;
-	fp->lnpcsz = lnpcsz;
-	fp->lnpcoff = fp->sppcoff+fp->sppcsz;
-}
-
-#if 0
-static uint64_t
-_round(uint64_t a, uint32_t b)
-{
-	uint64_t w;
-
-	w = (a/b)*b;
-	if (a!=w)
-		w += b;
-	return(w);
-}
-#endif
-/*  Convert header to canonical form */
-static void
-hswal(void *v, int n, uint32_t (*swap)(uint32_t))
-{
-	uint32_t *ulp;
-
-	for(ulp = v; n--; ulp++)
-		*ulp = (*swap)(*ulp);
-}
-
-/* map.c */
-
-int
-findseg(Map *map, char *name)
-{
-	int i;
-
-	if (!map)
-		return -1;
-	for (i = 0; i < map->nsegs; i++)
-		if (map->seg[i].inuse && !strcmp(map->seg[i].name, name))
-			return i;
-	return -1;
-}
-
-Map *
-newmap(Map *map, int n)
-{
-	int size;
-
-	size = sizeof(Map)+(n-1)*sizeof(struct segment);
-	if (map == 0)
-		map = malloc(size);
-	else
-		map = realloc(map, size);
-	if (map == 0) {
-		error("out of memory: %r");
-		return 0;
-	}
-	memset(map, 0, size);
-	map->nsegs = n;
-	return map;
-}
-
-Map*
-loadmap(Map *map, int fd, Fhdr *fp)
-{
-	map = newmap(map, 2);
-	if (map == 0)
-		return 0;
-
-	map->seg[0].b = fp->txtaddr;
-	map->seg[0].e = fp->txtaddr+fp->txtsz;
-	map->seg[0].f = fp->txtoff;
-	map->seg[0].fd = fd;
-	map->seg[0].inuse = 1;
-	map->seg[0].name = "text";
-	map->seg[1].b = fp->dataddr;
-	map->seg[1].e = fp->dataddr+fp->datsz;
-	map->seg[1].f = fp->datoff;
-	map->seg[1].fd = fd;
-	map->seg[1].inuse = 1;
-	map->seg[1].name = "data";
-	return map;
-}
-
-/* commons */
-#if 0
-static void
-commonboot(Fhdr *fp)
-{
-	switch(fp->type) {				/* boot image */
-	case FAMD64:
-		fp->type = FAMD64B;
-		fp->txtaddr = fp->entry;
-		fp->name = "amd64 plan 9 boot image";
-		fp->dataddr = _round(fp->txtaddr+fp->txtsz, 4096);
-		break;
-	default:
-		return;
-	}
-	fp->hdrsz = 0;			/* header stripped */
-}
-
-static int
-commonllp64(Ar0 *ar0, Chan *c, Fhdr *fp, ExecHdr *hp)
-{
-	int32_t pgsize;
-	uint64_t entry;
-
-	hswal(&hp->e, sizeof(Exec)/sizeof(int32_t), beswal);
-	if(!(hp->e.magic & HDR_MAGIC))
-		return 0;
-
-	/*
-	 * There can be more magic here if the
-	 * header ever needs more expansion.
-	 * For now just catch use of any of the
-	 * unused bits.
-	 */
-	if((hp->e.magic & ~DYN_MAGIC)>>16)
-		return 0;
-	entry = beswav(hp->e.hdr[0]);
-
-	pgsize = elfmach->pgsize;
-	settext(fp, entry, pgsize+fp->hdrsz, hp->e.text, fp->hdrsz);
-	setdata(fp, _round(pgsize+fp->txtsz+fp->hdrsz, pgsize),
-		hp->e.data, fp->txtsz+fp->hdrsz, hp->e.bss);
-	setsym(fp, hp->e.syms, hp->e.spsz, hp->e.pcsz, fp->datoff+fp->datsz);
-
-	if(hp->e.magic & DYN_MAGIC) {
-		fp->txtaddr = 0;
-		fp->dataddr = fp->txtsz;
-		return 1;
-	}
-	commonboot(fp);
-	return 1;
-}
-#endif
-/* ELF */
-
-static int
-elf64dotout(Ar0 *ar0, Chan *c, Fhdr *fp, ExecHdr *hp)
-{
-	E64hdr *ep;
-
-	uint16_t (*swab)(uint16_t);
-	uint32_t (*swal)(uint32_t);
-	uint64_t (*swav)(uint64_t);
-	int i, is, phsz;
-	uint64_t uvl;
-
-	ep = &hp->e;
-	if(ep->ident[DATA] == ELFDATA2LSB) {
-		swab = leswab;
-		swal = leswal;
-		swav = leswav;
-	} else if(ep->ident[DATA] == ELFDATA2MSB) {
-		swab = beswab;
-		swal = beswal;
-		swav = beswav;
-	} else {
-		error("bad ELF64 encoding - not big or little endian");
-		return 0;
-	}
-
-	ep->type = swab(ep->type);
-	ep->machine = swab(ep->machine);
-	ep->version = swal(ep->version);
-	if(ep->type != EXEC || ep->version != CURRENT)
-		return 0;
-	ep->elfentry = swav(ep->elfentry);
-	ep->phoff = swav(ep->phoff);
-	ep->shoff = swav(ep->shoff);
-	ep->flags = swal(ep->flags);
-	ep->ehsize = swab(ep->ehsize);
-	ep->phentsize = swab(ep->phentsize);
-	ep->phnum = swab(ep->phnum);
-	ep->shentsize = swab(ep->shentsize);
-	ep->shnum = swab(ep->shnum);
-	ep->shstrndx = swab(ep->shstrndx);
-
-	fp->magic = ELF_MAG;
-	fp->hdrsz = (ep->ehsize+ep->phnum*ep->phentsize+16)&~15;
-	elfmach = &mamd64;
-	fp->type = FAMD64;
-	fp->name = "amd64 ELF64 executable";
-
-	if(ep->phentsize != sizeof(P64hdr)) {
-		error("bad ELF64 header size");
-		return 0;
-	}
-	phsz = sizeof(P64hdr)*ep->phnum;
-	hp->e.ph = malloc(phsz);
-	if(hp->e.ph == nil)
-		return 0;
-	chanseek(ar0, c, ep->phoff, 0);
-	if(c->dev->read(c, hp->e.ph, phsz, c->offset) < 0){
-		free(hp->e.ph);
-		return 0;
-	}
-	for(i = 0; i < ep->phnum; i++) {
-		hp->e.ph[i].type = swal(hp->e.ph[i].type);
-		hp->e.ph[i].flags = swal(hp->e.ph[i].flags);
-		hp->e.ph[i].offset = swav(hp->e.ph[i].offset);
-		hp->e.ph[i].vaddr = swav(hp->e.ph[i].vaddr);
-		hp->e.ph[i].paddr = swav(hp->e.ph[i].paddr);
-		hp->e.ph[i].filesz = swav(hp->e.ph[i].filesz);
-		hp->e.ph[i].memsz = swav(hp->e.ph[i].memsz);
-		hp->e.ph[i].align = swav(hp->e.ph[i].align);
-	}
-
-	/* find text, data and symbols and install them */
-	fp->it = fp->id = is = -1;
-	for(i = 0; i < ep->phnum; i++) {
-		if(hp->e.ph[i].type == LOAD
-		&& (hp->e.ph[i].flags & (R|X)) == (R|X) && fp->it == -1)
-			fp->it = i;
-		else if(hp->e.ph[i].type == LOAD
-		&& (hp->e.ph[i].flags & (R|W)) == (R|W) && fp->id == -1)
-			fp->id = i;
-		else if(hp->e.ph[i].type == NOPTYPE && is == -1)
-			is = i;
-	}
-	if(fp->it == -1 || fp->id == -1) {
-		error("No ELF64 TEXT or DATA sections");
-		free(hp->e.ph);
-		return 0;
-	}
-
-	settext(fp, ep->elfentry, hp->e.ph[fp->it].vaddr, hp->e.ph[fp->it].memsz, hp->e.ph[fp->it].offset);
-	/* note: this comment refers to a bug in 8c. Who cares? We need to move on. */
-	/* 8c: out of fixed registers */
-	uvl = hp->e.ph[fp->id].memsz - hp->e.ph[fp->id].filesz;
-	setdata(fp, hp->e.ph[fp->id].vaddr, hp->e.ph[fp->id].filesz, hp->e.ph[fp->id].offset, uvl);
-	if(is != -1)
-		setsym(fp, hp->e.ph[is].filesz, 0, hp->e.ph[is].memsz, hp->e.ph[is].offset);
-	return 1;
-}
-
-static int
-elfdotout(Ar0 *ar0, Chan *c, Fhdr *fp, ExecHdr *hp)
-{
-	E64hdr *ep;
-
-	/* bitswap the header according to the DATA format */
-	ep = &hp->e;
-
-	if(ep->ident[CLASS] == ELFCLASS64)
-		return elf64dotout(ar0, c, fp, hp);
-
-	error("bad ELF class - not 64-bit");
-	return 0;
-}
-
-ExecTable exectab[] =
-{
-	{ ELF_MAG,			/* any ELF */
-		"elf executable",
-		nil,
-		0,				/* FNONE */
-		0,
-		&mamd64,		/* Mach* type */
-		sizeof(E64hdr),
-		nil,
-		elfdotout },
-	{ 0 },
-};
-
-/* End of libmach */
 
 void
 sysrfork(Ar0* ar0, ...)
@@ -612,7 +54,7 @@ sysrfork(Ar0* ar0, ...)
 	if((flag & (RFCORE|RFCCORE)) == (RFCORE|RFCCORE))
 		error(Ebadarg);
 	if(flag & RFCORE && m->externup->wired != nil)
-		error("wired proc cannot move to ac");		
+		error("wired proc cannot move to ac");
 
 	if((flag&RFPROC) == 0) {
 		if(flag & (RFMEM|RFNOWAIT))
@@ -835,35 +277,26 @@ l2be(int32_t l)
 	return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
 }
 #endif
-typedef struct {
-	Exec;
-	uint64_t hdr[1];
-} Hdr;
 
 /*
  * flags can ONLY specify that you want an AC for you, or
  * that you want an XC for you.
- * 
  */
 static void
 execac(Ar0* ar0, int flags, char *ufile, char **argv)
 {
-	ExecHdr d;
-	Fhdr f;
 	Mach *m = machp();
-	Hdr hdr;
 	Fgrp *fg;
 	Tos *tos;
 	Chan *chan, *ichan;
 	Image *img;
 	Segment *s;
-	int argc, i, n;
+	Ldseg *ldseg;
+	int argc, i, n, nldseg;
 	char *a, *elem, *file, *p;
-	char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1];
-	int32_t hdrsz, textsz, datasz, bsssz;
-	uintptr_t textlim, datalim, bsslim, entry, stack;
-	uintptr_t textaddr, dataddr;
-	//	static int colorgen;
+	char line[64], *progarg[sizeof(line)/2+1];
+	int32_t hdrsz;
+	uintptr_t entry, stack;
 
 
 	file = nil;
@@ -911,16 +344,14 @@ execac(Ar0* ar0, int flags, char *ufile, char **argv)
 	 * The #! line must be less than sizeof(Exec) in size,
 	 * including the terminating \n.
 	 */
-	hdrsz = ichan->dev->read(ichan, &hdr, sizeof(Hdr), 0);
+	hdrsz = ichan->dev->read(ichan, line, sizeof line, 0);
 	if(hdrsz < 2)
 		error(Ebadexec);
-	p = (char*)&hdr;
-	if(p[0] == '#' && p[1] == '!'){
-		p = memccpy(line, (char*)&hdr, '\n',
-			    MIN(sizeof(Exec), hdrsz));
+	if(line[0] == '#' && line[1] == '!'){
+		p = memchr(line, '\n', MIN(sizeof line, hdrsz));
 		if(p == nil)
 			error(Ebadexec);
-		*(p-1) = '\0';
+		*p = '\0';
 		argc = tokenize(line+2, progarg, nelem(progarg));
 		if(argc == 0)
 			error(Ebadexec);
@@ -937,9 +368,6 @@ execac(Ar0* ar0, int flags, char *ufile, char **argv)
 		chan = nil;	/* in case namec errors out */
 		USED(chan);
 		chan = namec(p, Aopen, OEXEC, 0);
-		hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0);
-		if(hdrsz < 2)
-			error(Ebadexec);
 	}else{
 		chan = ichan;
 		incref(ichan);
@@ -948,61 +376,30 @@ execac(Ar0* ar0, int flags, char *ufile, char **argv)
 	cclose(ichan);
 	poperror();
 
-	/* start over. */
-	chanseek(ar0, chan, 0, 0);
 	/*
 	 * #! has had its chance, now we need a real binary.
 	 */
 
-	crackhdr(ar0, chan, &f, &d);
-
-	if((f.txtaddr&(BIGPGSZ-1)) != (f.txtoff&(BIGPGSZ-1))){
-		print("exec: text offset %p not page-aligned with file offset %p\n", f.txtaddr, f.txtoff);
-		error(Ebadexec);
-	}
-	if((f.dataddr&(BIGPGSZ-1)) != (f.datoff&(BIGPGSZ-1))){
-		print("exec: data offset %p not page-aligned with file offset %p\n", f.txtaddr, f.txtoff);
+	nldseg = elf64ldseg(chan, &entry, &ldseg, cputype, BIGPGSZ);
+	if(nldseg == 0){
+		print("execac: elf64ldseg returned 0 segs!\n");
 		error(Ebadexec);
 	}
 
-	textsz = f.txtsz + (f.txtoff&(BIGPGSZ-1));
-	datasz = f.datsz + (f.datoff&(BIGPGSZ-1));
-	bsssz = f.bsssz;
-	entry = f.entry;
-	textaddr = f.txtaddr - (f.txtoff&(BIGPGSZ-1));
-	dataddr = f.dataddr - (f.datoff&(BIGPGSZ-1));
-
-	textlim = BIGPGROUND(textaddr+textsz);
-	datalim = BIGPGROUND(dataddr+datasz);
-	bsslim = BIGPGROUND(dataddr+datasz+bsssz);
-
-	/*
-	 * Check the binary header for consistency,
-	 * e.g. the entry point is within the text segment and
-	 * the segments don't overlap each other.
-	 */
-	if(entry < textaddr+hdrsz || entry >= textaddr+hdrsz+textsz)
-		error(Ebadexec);
-
-	if(textsz >= textlim || datasz > datalim || bsssz > bsslim
-	|| textlim >= USTKTOP || datalim >= USTKTOP || bsslim >= USTKTOP
-	|| datalim < textlim || bsslim < datalim)
-		error(Ebadexec);
-
 	if(m->externup->ac != nil && m->externup->ac != m)
 		m->externup->color = corecolor(m->externup->ac->machno);
 	else
 		m->externup->color = corecolor(m->machno);
 
 	/*
-	 * The new stack is created in ESEG, temporarily mapped elsewhere.
+	 * The new stack is temporarily mapped elsewhere.
 	 * The stack contains, in descending address order:
 	 *	a structure containing housekeeping and profiling data (Tos);
 	 *	argument strings;
 	 *	array of vectors to the argument strings with a terminating
 	 *	nil (argv).
-	 * When the exec is committed, this temporary stack in ESEG will
-	 * become SSEG.
+	 * When the exec is committed, this temporary stack is relocated
+	 * to become the actual stack segment.
 	 * The architecture-dependent code which jumps to the new image
 	 * will also push a count of the argument array onto the stack (argc).
 	 */
@@ -1057,6 +454,7 @@ execac(Ar0* ar0, int flags, char *ufile, char **argv)
 	 * the strings argv points to are valid.
 	 */
 	for(i = 0;; i++, argv++){
+
 		a = *(char**)validaddr(argv, sizeof(char**), 0);
 		if(a == nil)
 			break;
@@ -1173,54 +571,54 @@ execac(Ar0* ar0, int flags, char *ufile, char **argv)
 	s->top = USTKTOP;
 	relocateseg(s, USTKTOP-TSTKTOP);
 
-DBG(
-	"exec: text: 0x%p textlim: 0x%p data: 0x%p datalim 0x%p brk: 0x%p\n"
-	"	txtaddr 0x%p txtoff 0x%p txtsz 0x%x\n"
-	"	dataddr 0x%p datoff 0x%p datsz 0x%x\n"
-	"	bssaddr 0x%p bsssz 0x%x\n",
-	textaddr, textlim, dataddr, datalim, bsslim,
-	f.txtaddr, f.txtoff, f.txtsz,
-	f.dataddr, f.datoff, f.datsz,
-	f.dataddr+f.datsz, f.bsssz
-);
-	/* Text.  Shared. Attaches to cache image if possible
-	 * but prepaged if EXAC
-	 */
-	// TODO: Just use the program header instead of these other things.
-	img = attachimage(SG_TEXT|SG_EXEC|SG_READ, chan, m->externup->color, textaddr, (dataddr-textaddr)/BIGPGSZ);
-	s = img->s;
-	s->ph = d.e.ph[f.it];
-
-	// TODO(aki): this stupid hack really needs to go.
-	s->ph.filesz = f.datoff+f.datsz-f.txtoff;
-
-	m->externup->seg[sno++] = s;
-	s->flushme = 1;
-	if(img->color != m->externup->color)
-		m->externup->color = img->color;
-	unlock(img);
-
-	/* Data. Shared. */
-	s = newseg(SG_DATA|SG_READ|SG_WRITE, dataddr, (datalim-dataddr)/BIGPGSZ);
-	m->externup->seg[sno++] = s;
-	s->color = m->externup->color;
+	img = nil;
+	uintptr_t datalim;
+	datalim = 0;
+	for(i = 0; i < nldseg; i++){
+
+		if(img == nil){
+			img = attachimage(ldseg[i].type, chan, m->externup->color,
+				ldseg[i].pg0vaddr,
+				(ldseg[i].pg0off+ldseg[i].memsz+BIGPGSZ-1)/BIGPGSZ
+			);
+			s = img->s;
+			s->flushme = 1;
+			if(img->color != m->externup->color)
+				m->externup->color = img->color;
+			unlock(img);
+		} else {
+			s = newseg(ldseg[i].type, ldseg[i].pg0vaddr, (ldseg[i].pg0off+ldseg[i].memsz+BIGPGSZ-1)/BIGPGSZ);
+			s->color = m->externup->color;
+			incref(img);
+			s->image = img;
+		}
 
-	/* Attached by hand */
-	incref(img);
-	s->image = img;
-	s->ph = d.e.ph[f.id];
+		s->ldseg = ldseg[i];
+		m->externup->seg[sno++] = s;
+		if(datalim < ldseg[i].pg0vaddr+ldseg[i].memsz)
+			datalim = ldseg[i].pg0vaddr+ldseg[i].memsz;
+	}
 
 	/* BSS. Zero fill on demand for TS */
-	s = newseg(SG_BSS|SG_READ|SG_WRITE, datalim, (bsslim-datalim)/BIGPGSZ);
+	s = newseg(SG_BSS|SG_READ|SG_WRITE, (datalim + BIGPGSZ-1) & ~(BIGPGSZ-1), 0);
 	m->externup->seg[sno++] = s;
 	s->color= m->externup->color;
 
-	/* MMAP region. Put it at 512GiB for now. 
-	s = newseg(SG_MMAP, 512 * GiB, 1);
-	m->externup->seg[sno++] = s;
-	if (0) print("mmap seg[%d] is %p\n", sno, m->externup->seg);
-	s->color= m->externup->color;
-	*/
+	for(i = 0; i < sno; i++){
+		s = m->externup->seg[i];
+		DBG(
+			"execac %d %s(%c%c%c) %p:%p va %p off %p fsz %d msz %d\n",
+			m->externup->pid, segtypes[s->type & SG_TYPE],
+			(s->type & SG_READ) != 0 ? 'r' : '-',
+			(s->type & SG_WRITE) != 0 ? 'w' : '-',
+			(s->type & SG_EXEC) != 0 ? 'x' : '-',
+			s->base, s->top,
+			s->ldseg.pg0vaddr+s->ldseg.pg0off,
+			s->ldseg.pg0fileoff+s->ldseg.pg0off,
+			s->ldseg.filesz,
+			s->ldseg.memsz
+		);
+	}
 
 	/* the color of the stack was decided when we created it before,
 	 * it may have nothing to do with the color of other segments.
@@ -1266,12 +664,6 @@ DBG(
 		m->externup->procctl = Proc_toac;
 		m->externup->prepagemem = 1;
 	}
-DBG(
-	"execac up %#p done\n"
-	"	textsz %lx datasz %lx bsssz %lx hdrsz %lx\n"
-	"	textlim %ullx datalim %ullx bsslim %ullx\n",
-	m->externup, textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim
-);
 }
 
 void
@@ -1295,67 +687,6 @@ sysexecac(Ar0* ar0, ...)
 	execac(ar0, flags, file, argv);
 }
 
-static int
-crackhdr(Ar0 *ar0, Chan *c, Fhdr *fp, ExecHdr *d)
-{
-	ExecTable *mp;
-	int nb, ret;
-	uint32_t magic;
-
-	fp->type = 0; /* FNONE */
-	nb = c->dev->read(c, (char *)&d->e, sizeof(d->e), c->offset);
-	if (nb <= 0)
-		error("crackhdr: header read failed");
-
-	ret = 0;
-	magic = beswal(d->e.magic);		/* big-endian */
-	for (mp = exectab; mp->magic; mp++) {
-		if (nb < mp->hsize) {
-			continue;
-		}
-
-		/*
-		 * The magic number has morphed into something
-		 * with fields (the straw was DYN_MAGIC) so now
-		 * a flag is needed in Fhdr to distinguish _MAGIC()
-		 * magic numbers from foreign magic numbers.
-		 *
-		 * This code is creaking a bit and if it has to
-		 * be modified/extended much more it's probably
-		 * time to step back and redo it all.
-		 */
-		if(mp->_magic){
-			if(mp->magic != (magic & ~DYN_MAGIC))
-				continue;
-
-			if ((magic & DYN_MAGIC) && mp->dlmname != nil)
-				fp->name = mp->dlmname;
-			else
-				fp->name = mp->name;
-		}
-		else{
-			if(mp->magic != magic)
-				continue;
-			fp->name = mp->name;
-		}
-		fp->type = mp->type;
-		fp->hdrsz = mp->hsize;		/* will be zero on bootables */
-		fp->_magic = mp->_magic;
-		fp->magic = magic;
-
-		machkind = mp->elfmach;
-		if(mp->swal != nil)
-			hswal(d, sizeof(d->e)/sizeof(uint32_t), mp->swal);
-		ret = mp->hparse(ar0, c, fp, d);
-		chanseek(ar0, c, mp->hsize, 0);		/* seek to end of header */
-		break;
-	}
-	if(mp->magic == 0) {
-		error("Sysproc: unknown header type");
-	}
-	return ret;
-}
-
 void
 sysexec(Ar0* ar0, ...)
 {

+ 1 - 0
sys/src/9/port/sysseg.c

@@ -181,6 +181,7 @@ syssegbrk(Ar0* ar0, ...)
 		if(addr == s->top && (s->base < s->top))
 			continue;
 		switch(s->type&SG_TYPE) {
+		case SG_LOAD:
 		case SG_TEXT:
 		case SG_DATA:
 		case SG_STACK: