Browse Source

acid elf wip

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>
Graham MacDonald 4 years ago
parent
commit
06edad0f1c
6 changed files with 225 additions and 48 deletions
  1. 5 1
      sys/include/a.out.h
  2. 30 2
      sys/include/elf.h
  3. 7 1
      sys/include/mach.h
  4. 87 29
      sys/src/libmach/executable.c
  5. 2 2
      sys/src/libmach/obj.c
  6. 94 13
      sys/src/libmach/sym.c

+ 5 - 1
sys/include/a.out.h

@@ -7,6 +7,8 @@
  * in the LICENSE file.
  */
 
+// TODO rename
+
 typedef	struct	Exec	Exec;
 struct	Exec
 {
@@ -50,7 +52,9 @@ typedef	struct	Sym	Sym;
 struct	Sym
 {
 	int64_t	value;
-	uint	sig;
+	//uint	sig;
 	char	type;
 	char	*name;
+	uint8_t	binding;
+	uint8_t	symtype;
 };

+ 30 - 2
sys/include/elf.h

@@ -75,6 +75,15 @@ typedef struct {
 	uint64_t	entsize;	/* entry size if table */
 } S64hdr;
 
+typedef struct {
+	uint32_t	st_name;	/* Symbol name */
+	uint8_t		st_info;	/* Type and Binding attributes */
+	uint8_t		st_other;	/* Reserved */
+	uint16_t	st_shndx;	/* Section table index */
+	uint64_t	st_value;	/* Symbol value */
+	uint64_t	st_size;	/* Size of object (e.g., common) */
+} E64Sym;
+
 enum {
 	/* Ehdr codes */
 	MAG0 = 0,		/* ident[] indexes */
@@ -136,13 +145,32 @@ enum {
 	X = 0x1,
 
 	/* Shdr Codes */
-	Progbits = 1,	/* section types */
+	Progbits = 1,		/* section types */
+	Symtab = 2,
 	Strtab = 3,
 	Nobits = 8,
 
-	Swrite = 1,	/* section attributes */
+	Swrite = 1,		/* section attributes */
 	Salloc = 2,
 	Sexec = 4,
+
+	STB_LOCAL = 0,		/* Symbol bindings */
+	STB_GLOBAL = 1,
+	STB_WEAK = 2,
+	STB_LOOS = 10,
+	STB_HIOS = 12,
+	STB_LOPROC = 13,
+	STB_HIPROC = 15,
+
+	STT_NOTYPE = 0, 	/* Symbol types */
+	STT_OBJECT = 1,
+	STT_FUNC = 2,
+	STT_SECTION = 3,
+	STT_FILE = 4,
+	STT_LOOS = 10,
+	STT_HIOS = 12,
+	STT_LOPROC = 13,
+	STT_HIPROC = 15,
 };
 
 #define	ELF_MAG		((0x7f<<24) | ('E'<<16) | ('L'<<8) | 'F')

+ 7 - 1
sys/include/mach.h

@@ -231,7 +231,7 @@ struct	Machdata {		/* Machine-dependent debugger support */
 };
 
 /*
- *	Common a.out header describing all architectures
+ *	Common executable header describing all architectures
  */
 typedef struct Fhdr
 {
@@ -255,6 +255,12 @@ typedef struct Fhdr
 	int32_t		symsz;		/* size of symbol table */
 	int32_t		sppcsz;		/* size of sp-pc table */
 	int32_t		lnpcsz;		/* size of line number-pc table */
+
+	// TODO work out which of the above are no longer useful
+	// and which should be uint64
+	int8_t		bigendian;	/* big endian or not */
+	uint64_t	stroff;		/* strtab offset in file */
+	uint64_t	strsz;		/* size of strtab seg */
 } Fhdr;
 
 extern	int	asstype;	/* dissembler type - machdata.c */

+ 87 - 29
sys/src/libmach/executable.c

@@ -36,11 +36,12 @@ typedef struct {
 	int32_t dummy;			/* padding to ensure extra long */
 } ExecHdr;
 
-static	int	elfdotout(int, Fhdr*, ExecHdr*);
-static	void	setsym(Fhdr*, int32_t, int32_t, int32_t, int64_t);
-static	void	setdata(Fhdr*, uint64_t, int32_t, int64_t, int32_t);
-static	void	settext(Fhdr*, uint64_t, uint64_t, int32_t, int64_t);
-static	void	hswal(void*, int, uint32_t(*)(uint32_t));
+static int	elfdotout(int, Fhdr*, ExecHdr*);
+static void	setsym(Fhdr*, int32_t, int32_t, int32_t, int64_t);
+static void	setdata(Fhdr*, uint64_t, int32_t, int64_t, int32_t);
+static void	settext(Fhdr*, uint64_t, uint64_t, int32_t, int64_t);
+static void	setstr(Fhdr *fp, int64_t stroff, uint64_t strsz);
+static void	hswal(void*, int, uint32_t(*)(uint32_t));
 
 /*
  *	definition of per-executable file type structures
@@ -181,15 +182,11 @@ hswal(void *v, int n, uint32_t (*swap)(uint32_t))
 static int
 elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 {
-	E64hdr *ep;
-	P64hdr *ph;
 	uint16_t (*swab)(uint16_t);
 	uint32_t (*swal)(uint32_t);
 	uint64_t (*swav)(uint64_t);
-	int i, it, id, is, phsz;
-	uint64_t uvl;
 
-	ep = &hp->e.E64hdr;
+	E64hdr *ep = &hp->e.E64hdr;
 	if(ep->ident[DATA] == ELFDATA2LSB) {
 		swab = leswab;
 		swal = leswal;
@@ -202,6 +199,7 @@ elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 		werrstr("bad ELF64 encoding - not big or little endian");
 		return 0;
 	}
+	fp->bigendian = ep->ident[DATA] == ELFDATA2MSB;
 
 	ep->type = swab(ep->type);
 	ep->machine = swab(ep->machine);
@@ -231,12 +229,13 @@ elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 		break;
 	}
 
+	// Program headers
 	if(ep->phentsize != sizeof(P64hdr)) {
-		werrstr("bad ELF64 header size");
+		werrstr("bad ELF64 program header size");
 		return 0;
 	}
-	phsz = sizeof(P64hdr)*ep->phnum;
-	ph = malloc(phsz);
+	int phsz = sizeof(P64hdr)*ep->phnum;
+	P64hdr *ph = malloc(phsz);
 	if(!ph)
 		return 0;
 	seek(fd, ep->phoff, 0);
@@ -244,7 +243,8 @@ elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 		free(ph);
 		return 0;
 	}
-	for(i = 0; i < ep->phnum; i++) {
+
+	for(int i = 0; i < ep->phnum; i++) {
 		ph[i].type = swal(ph[i].type);
 		ph[i].flags = swal(ph[i].flags);
 		ph[i].offset = swav(ph[i].offset);
@@ -256,16 +256,14 @@ elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 	}
 
 	/* find text, data and symbols and install them */
-	it = id = is = -1;
-	for(i = 0; i < ep->phnum; i++) {
-		if(ph[i].type == LOAD
-		&& (ph[i].flags & (R|X)) == (R|X) && it == -1)
+	int it = -1, id = -1;
+	for(int i = 0; i < ep->phnum; i++) {
+		if(ph[i].type == LOAD && (ph[i].flags & (R|X)) == (R|X) && it == -1)
 			it = i;
-		else if(ph[i].type == LOAD
-		&& (ph[i].flags & (R|W)) == (R|W) && id == -1)
+		else if(ph[i].type == LOAD && (ph[i].flags & (R|W)) == (R|W) && id == -1)
 			id = i;
-		else if(ph[i].type == NOPTYPE && is == -1)
-			is = i;
+		//else if(ph[i].type == NOPTYPE && is == -1)
+		//	is = i;
 	}
 	if(it == -1 || id == -1) {
 		werrstr("No ELF64 TEXT or DATA sections");
@@ -274,12 +272,67 @@ elf64dotout(int fd, Fhdr *fp, ExecHdr *hp)
 	}
 
 	settext(fp, ep->elfentry, ph[it].vaddr, ph[it].memsz, ph[it].offset);
-	/* 8c: out of fixed registers */
-	uvl = ph[id].memsz - ph[id].filesz;
+	uint64_t uvl = ph[id].memsz - ph[id].filesz;
 	setdata(fp, ph[id].vaddr, ph[id].filesz, ph[id].offset, uvl);
-	if(is != -1)
-		setsym(fp, ph[is].filesz, 0, ph[is].memsz, ph[is].offset);
 	free(ph);
+
+	// Section headers - get the symbol table offset from here
+	if (ep->shentsize != sizeof(S64hdr)) {
+		werrstr("bad ELF64 section header size");
+		return 0;
+	}
+	int shsz = sizeof(S64hdr)*ep->shnum;
+	S64hdr *sh = malloc(shsz);
+	if (!sh) {
+		return 0;
+	}
+	seek(fd, ep->shoff, 0);
+	if (read(fd, sh, shsz) < 0) {
+		free(sh);
+		return 0;
+	}
+
+	for (int i = 0; i < ep->shnum; i++) {
+		sh[i].name = swal(sh[i].name);
+		sh[i].type = swal(sh[i].type);
+		sh[i].flags = swav(sh[i].flags);
+		sh[i].addr = swav(sh[i].addr);
+		sh[i].offset = swav(sh[i].offset);
+		sh[i].size = swav(sh[i].size);
+		sh[i].link = swal(sh[i].link);
+		sh[i].info = swal(sh[i].info);
+		sh[i].addralign = swav(sh[i].addralign);
+		sh[i].entsize = swav(sh[i].entsize);
+	}
+
+	int isym = -1, istr = -1;
+	for (int i = 0; i < ep->shnum; i++) {
+		if (sh[i].type == Symtab && isym == -1) {
+			// Assume the first is the one we want for now
+			// There may be more than one if it's dynamic, but we
+			// don't support than, so hopefully this is ok for now
+			isym = i;
+		} else if (sh[i].type == Strtab && istr == -1) {
+			// Assume first is the one we want for now, but we
+			// should probably check that the name is '.strtab' to
+			// distinguish from .shstrtab.
+			istr = i;
+			break;
+		}
+	}
+
+	if (isym != -1) {
+		print("isym: %d\n", isym);
+		setsym(fp, sh[isym].size, 0, sh[isym].size, sh[isym].offset);
+	}
+
+	if (istr != -1) {
+		print("istr: %d\n", istr);
+		setstr(fp, sh[istr].offset, sh[istr].size);
+	}
+
+	free(sh);
+
 	return 1;
 }
 
@@ -317,8 +370,7 @@ setdata(Fhdr *fp, uint64_t a, int32_t s, int64_t off, int32_t bss)
 }
 
 static void
-setsym(Fhdr *fp, int32_t symsz, int32_t sppcsz, int32_t lnpcsz,
-       int64_t symoff)
+setsym(Fhdr *fp, int32_t symsz, int32_t sppcsz, int32_t lnpcsz, int64_t symoff)
 {
 	fp->symsz = symsz;
 	fp->symoff = symoff;
@@ -327,4 +379,10 @@ setsym(Fhdr *fp, int32_t symsz, int32_t sppcsz, int32_t lnpcsz,
 	fp->lnpcsz = lnpcsz;
 	fp->lnpcoff = fp->sppcoff+fp->sppcsz;
 }
- 
+ 
+static void
+setstr(Fhdr *fp, int64_t stroff, uint64_t strsz)
+{
+	fp->stroff = stroff;
+	fp->strsz = strsz;
+}

+ 2 - 2
sys/src/libmach/obj.c

@@ -187,7 +187,7 @@ objlookup(int id, char *name, int type, uint sig)
 	s = names[id];
 	if(s && strcmp(s->name, name) == 0) {
 		s->type = type;
-		s->sig = sig;
+		//s->sig = sig;
 		return;
 	}
 
@@ -232,7 +232,7 @@ objlookup(int id, char *name, int type, uint sig)
 	sp = malloc(sizeof(Symtab));
 	sp->s.name = name;
 	sp->s.type = type;
-	sp->s.sig = sig;
+	//sp->s.sig = sig;
 	sp->s.value = islocal(type) ? MAXOFF : 0;
 	names[id] = &sp->s;
 	sp->next = hash[h];

+ 94 - 13
sys/src/libmach/sym.c

@@ -11,6 +11,7 @@
 #include <libc.h>
 #include <bio.h>
 #include <mach.h>
+#include <elf.h>
 
 #define	HUGEINT	0x7fffffff
 #define	NNAME	20		/* a relic of the past */
@@ -61,12 +62,13 @@ static	uint8_t 	*pclineend;	/* end of pc-line table */
 static	uint8_t		*spoff;		/* start of pc-sp state table */
 static	uint8_t		*spoffend;	/* end of pc-sp offset table */
 static	Sym		*symbols;	/* symbol table */
+static	char		*strings;	/* string table */
 static	Txtsym		*txt;		/* Base of text symbol table */
 static	uint64_t	txtstart;	/* start of text segment */
 static	uint64_t	txtend;		/* end of text segment */
 
 static void	cleansyms(void);
-static int32_t	decodename(Biobuf*, Sym*);
+//static int32_t	decodename(Biobuf*, Sym*);
 static int16_t	*encfname(char*);
 static int 	fline(char*, int, int32_t, Hist*, Hist**);
 static void	fillsym(Sym*, Symbol*);
@@ -78,7 +80,7 @@ static int	hline(File*, int16_t*, int32_t*);
 static void	printhist(char*, Hist*, int);
 static int	buildtbls(void);
 static int	symcomp(const void*, const void*);
-static int	symerrmsg(int, char*);
+//static int	symerrmsg(int, char*);
 static int	txtcomp(const void*, const void*);
 static int	filecomp(const void*, const void*);
 
@@ -88,11 +90,13 @@ static int	filecomp(const void*, const void*);
 int
 syminit(int fd, Fhdr *fp)
 {
-	Sym *p;
-	int32_t i, l, size;
-	int64_t vl;
-	Biobuf b;
-	int svalsz;
+	//Sym *p;
+	//int32_t i, l, size;
+	//int64_t vl;
+
+	uint16_t (*swab)(uint16_t) = fp->bigendian ? beswab : leswab;
+	uint32_t (*swal)(uint32_t) = fp->bigendian ? beswal : leswal;
+	uint64_t (*swav)(uint64_t) = fp->bigendian ? beswav : leswav;
 
 	if(fp->symsz == 0)
 		return 0;
@@ -101,20 +105,92 @@ syminit(int fd, Fhdr *fp)
 
 	cleansyms();
 	textseg(fp->txtaddr, fp);
-		/* minimum symbol record size = 4+1+2 bytes */
-	symbols = malloc((fp->symsz/(4+1+2)+1)*sizeof(Sym));
+	
+	nsym = fp->symsz / sizeof(E64Sym);
+
+	symbols = mallocz(nsym*sizeof(Sym), 1);
 	if(symbols == 0) {
-		werrstr("can't malloc %ld bytes", fp->symsz);
+		werrstr("can't allocate memory for symbol table");
 		return -1;
 	}
+
+	strings = malloc(fp->strsz);
+	if(strings == 0) {
+		werrstr("can't allocate memory to load string table");
+		return -1;
+	}
+
+	E64Sym *esyms = malloc(fp->symsz);
+	if(esyms == 0) {
+		werrstr("can't allocate memory to load symbol table");
+		free(esyms);
+		return -1;
+	}
+
+	Biobuf b;
 	Binit(&b, fd, OREAD);
 	Bseek(&b, fp->symoff, 0);
-	nsym = 0;
-	size = 0;
+	print("sizeof esyms: %d\n", fp->symsz);
+	if (Bread(&b, esyms, fp->symsz) != fp->symsz) {
+		werrstr("can't read symbol table");
+		free(esyms);
+		return -1;
+	}
+
+	print("stroff %p sizeof strings: %d\n", fp->stroff, fp->strsz);
+	Bseek(&b, fp->stroff, 0);
+	if (Bread(&b, strings, fp->strsz) != fp->strsz) {
+		werrstr("can't read string table");
+		free(esyms);
+		return -1;
+	}
+
+	print("symsz: %d, numsyms: %d strings: %p\n", fp->symsz, nsym, strings);
+
+	for (int i = 0; i < nsym; i++) {
+		esyms[i].st_name = swal(esyms[i].st_name);
+		esyms[i].st_shndx = swab(esyms[i].st_shndx);
+		esyms[i].st_value = swav(esyms[i].st_value);
+		esyms[i].st_size = swav(esyms[i].st_size);
+	}
+
+	for (int i = 0; i < nsym; i++) {
+		//print("i: %d val: %llx size: %d type: %d ndx: %d name %p\n", i, esyms[i].st_value, esyms[i].st_size, esyms[i].st_info, esyms[i].st_shndx, esyms[i].st_name);
+		if (esyms[i].st_name) {
+			symbols[i].name = &strings[esyms[i].st_name];
+		}
+		symbols[i].value = esyms[i].st_value;
+
+		symbols[i].binding = esyms[i].st_info >> 4;
+		symbols[i].symtype = esyms[i].st_info & 0xf;
+
+		print("%05d %010lld type %d binding %d %s\n", i, symbols[i].value, symbols[i].symtype, symbols[i].binding, symbols[i].name);
+
+		if (symbols[i].symtype == STT_FILE) {
+			nfiles++;
+		} else if (symbols[i].symtype == STT_FUNC) {
+			if (symbols[i].binding == STB_GLOBAL) {
+				nglob++;
+			} else if (symbols[i].binding == STB_LOCAL) {
+				nauto++;
+			}
+		} else if (symbols[i].symtype == STT_OBJECT) {
+			// Don't see objects handled by acid yet...
+		}
+
+		// TODO fmax?
+		// TODO nhist?
+	}
+
+	free(esyms);
+
+#if 0
+	int svalsz;
 	if((fp->_magic && (fp->magic & HDR_MAGIC)) || mach->szaddr == 8)
 		svalsz = 8;
 	else
 		svalsz = 4;
+
 	for(p = symbols; size < fp->symsz; p++, nsym++) {
 		if(svalsz == 8){
 			if(Bread(&b, &vl, 8) != 8)
@@ -200,9 +276,11 @@ syminit(int fd, Fhdr *fp)
 		}
 		pclineend = pcline+fp->lnpcsz;
 	}
+#endif // 0
 	return nsym;
 }
 
+#if 0
 static int
 symerrmsg(int n, char *table)
 {
@@ -274,7 +352,7 @@ decodename(Biobuf *bp, Sym *p)
 	}
 	return n;
 }
-
+#endif // 0
 /*
  *	free any previously loaded symbol tables
  */
@@ -310,6 +388,9 @@ cleansyms(void)
 	if(symbols)
 		free(symbols);
 	symbols = 0;
+	if(strings)
+		free(strings);
+	strings = 0;
 	nsym = 0;
 	if(spoff)
 		free(spoff);