Browse Source

Kernel profiling. Merge it anyway. We need it. And it had bit rotted.

This is too useful not to have.

*** NB *** There is currently a bug in this code which causes a kernel
panic when Harvey is copying large files over TCP/IP.  It's not ready
for merging!!

Ifyou want to try it:

1) Start profiling with: echo opstart >'#K/kpctl'
2) Do some profiling stuff...
3) Stop profiling with: echo opstop >'#K/kpctl'
4) copy the profint information to a regular file:
	cat '#K/kpoprofile' >/tmp/oprofile
5) use the util/profile to format it:
	/profile -kernel ~/src/harvey/sys/src/9/k10/9k8cpu.32bit \
		-profile ~/src/harvey/tmp/oprofile >blah
6) Convert it into frequency data with:
	cat blah |cut -f 4 -d ' '|sort|uniq -c|sort -n -r|less

Change-Id: Ic275d5e61e3615ee992b067ead5144ec5b739b51
Signed-off-by: Keith Poole <keith.poole@gmail.com>
Keith Poole 8 years ago
parent
commit
efcc84bdb9

+ 11 - 2
sys/include/oprofile.h

@@ -35,7 +35,8 @@
 #define IBS_FETCH_CODE			13
 #define IBS_OP_CODE			14
 
- 
+#pragma incomplete Ureg
+
 /**
  * One-time exit/cleanup for the arch.
  */
@@ -85,7 +86,15 @@ void oprofile_cpu_buffer_inc_smpl_lost(void);
  
 /* cpu buffer functions */
 
-struct op_sample;
+/* CPU buffer is composed of samples.
+ * As these are extracted from the buffer, they are encapsulated
+ * in entries, which include additional info.
+ */
+struct op_sample {
+	unsigned long eip;
+	unsigned long event;
+	unsigned long data[0];
+};
 
 struct op_entry {
 	void *event;

+ 8 - 1
sys/src/9/k10/main.c

@@ -170,6 +170,8 @@ loadenv(int argc, char* argv[])
 	}
 }
 
+extern int num_cpus;
+
 void
 squidboy(int apicno, Mach *mach)
 {
@@ -231,6 +233,9 @@ squidboy(int apicno, Mach *mach)
 
 	print("cpu%d color %d role %s tsc %lld\n",
 		mach->machno, corecolor(mach->machno), rolename[mach->NIX.nixtype], mach->rdtsc);
+	if (mach->machno + 1 > num_cpus){
+		num_cpus = mach->machno + 1;
+	}
 	switch(mach->NIX.nixtype){
 	case NIXAC:
 		acmmuswitch();
@@ -278,7 +283,7 @@ testiccs(void)
 	for(i = 0; i < MACHMAX; i++)
 		if((mach = sys->machptr[i]) != nil && mach->online && mach->NIX.nixtype == NIXAC)
 			testicc(i);
-	print("bootcore: all cores done\n");
+	print("bootcore: all cores done\n", i);
 }
 
 /*
@@ -619,6 +624,8 @@ if (0){	acpiinit(); hi("	acpiinit();\n");}
 		testiccs();
 	}
 
+	alloc_cpu_buffers();
+
 	print("CPU Freq. %dMHz\n", mach->cpumhz);
 
 	print("schedinit...\n");

+ 36 - 33
sys/src/9/k10/trap.c

@@ -433,6 +433,9 @@ trap(Ureg* ureg)
 			if(ctl->Vkey.irq == IrqCLOCK || ctl->Vkey.irq == IrqTIMER)
 				clockintr = 1;
 
+			if (ctl->Vkey.irq == IrqTIMER)
+				oprof_alarm_handler(ureg);
+
 			if(up && !clockintr)
 				preempted();
 		}
@@ -513,38 +516,38 @@ dumpgpr(Ureg* ureg)
 {
 	Proc *up = externup();
 	if(up != nil)
-		iprint("cpu%d: registers for %s %d\n",
+		print("cpu%d: registers for %s %d\n",
 			machp()->machno, up->text, up->pid);
 	else
-		iprint("cpu%d: registers for kernel\n", machp()->machno);
-
-	iprint("ax\t%#16.16llux\n", ureg->ax);
-	iprint("bx\t%#16.16llux\n", ureg->bx);
-	iprint("cx\t%#16.16llux\n", ureg->cx);
-	iprint("dx\t%#16.16llux\n", ureg->dx);
-	iprint("di\t%#16.16llux\n", ureg->di);
-	iprint("si\t%#16.16llux\n", ureg->si);
-	iprint("bp\t%#16.16llux\n", ureg->bp);
-	iprint("r8\t%#16.16llux\n", ureg->r8);
-	iprint("r9\t%#16.16llux\n", ureg->r9);
-	iprint("r10\t%#16.16llux\n", ureg->r10);
-	iprint("r11\t%#16.16llux\n", ureg->r11);
-	iprint("r12\t%#16.16llux\n", ureg->r12);
-	iprint("r13\t%#16.16llux\n", ureg->r13);
-	iprint("r14\t%#16.16llux\n", ureg->r14);
-	iprint("r15\t%#16.16llux\n", ureg->r15);
-	iprint("type\t%#llux\n", ureg->type);
-	iprint("error\t%#llux\n", ureg->error);
-	iprint("pc\t%#llux\n", ureg->ip);
-	iprint("cs\t%#llux\n", ureg->cs);
-	iprint("flags\t%#llux\n", ureg->flags);
-	iprint("sp\t%#llux\n", ureg->sp);
-	iprint("ss\t%#llux\n", ureg->ss);
-	iprint("type\t%#llux\n", ureg->type);
-	iprint("FS\t%#llux\n", rdmsr(FSbase));
-	iprint("GS\t%#llux\n", rdmsr(GSbase));
-
-	iprint("m\t%#16.16p\nup\t%#16.16p\n", machp(), up);
+		print("cpu%d: registers for kernel\n", machp()->machno);
+
+	print("ax\t%#16.16llux\n", ureg->ax);
+	print("bx\t%#16.16llux\n", ureg->bx);
+	print("cx\t%#16.16llux\n", ureg->cx);
+	print("dx\t%#16.16llux\n", ureg->dx);
+	print("di\t%#16.16llux\n", ureg->di);
+	print("si\t%#16.16llux\n", ureg->si);
+	print("bp\t%#16.16llux\n", ureg->bp);
+	print("r8\t%#16.16llux\n", ureg->r8);
+	print("r9\t%#16.16llux\n", ureg->r9);
+	print("r10\t%#16.16llux\n", ureg->r10);
+	print("r11\t%#16.16llux\n", ureg->r11);
+	print("r12\t%#16.16llux\n", ureg->r12);
+	print("r13\t%#16.16llux\n", ureg->r13);
+	print("r14\t%#16.16llux\n", ureg->r14);
+	print("r15\t%#16.16llux\n", ureg->r15);
+	print("type\t%#llux\n", ureg->type);
+	print("error\t%#llux\n", ureg->error);
+	print("pc\t%#llux\n", ureg->ip);
+	print("cs\t%#llux\n", ureg->cs);
+	print("flags\t%#llux\n", ureg->flags);
+	print("sp\t%#llux\n", ureg->sp);
+	print("ss\t%#llux\n", ureg->ss);
+	print("type\t%#llux\n", ureg->type);
+	print("FS\t%#llux\n", rdmsr(FSbase));
+	print("GS\t%#llux\n", rdmsr(GSbase));
+
+	print("m\t%#16.16p\nup\t%#16.16p\n", machp(), up);
 }
 
 void
@@ -560,9 +563,9 @@ dumpregs(Ureg* ureg)
 	 * CR4. If there is a CR4 and machine check extensions, read the machine
 	 * check address and machine check type registers if RDMSR supported.
 	 */
-	iprint("cr0\t%#16.16llux\n", cr0get());
-	iprint("cr2\t%#16.16llux\n", machp()->MMU.cr2);
-	iprint("cr3\t%#16.16llux\n", cr3get());
+	print("cr0\t%#16.16llux\n", cr0get());
+	print("cr2\t%#16.16llux\n", machp()->MMU.cr2);
+	print("cr3\t%#16.16llux\n", cr3get());
 die("dumpregs");
 //	archdumpregs();
 }

+ 8 - 5
sys/src/9/port/cpu_buffer.c

@@ -28,7 +28,7 @@
 #include <oprofile.h>
 
 #define OP_BUFFER_FLAGS	0
-int num_cpus = 8; // FIXME -- where do we get this.
+int num_cpus = 1;		// Probably this many cpus (will get increased by squidboy if > 1)
 
 /* we allocate an array of these and set the pointer in mach */
 struct oprofile_cpu_buffer *op_cpu_buffer;
@@ -437,7 +437,7 @@ void oprofile_control_trace(int onoff)
 		cpu_buf->tracing = onoff;
 
 		if (onoff) {
-			print("Enable tracing on %d\n", core);
+			print("Enable tracing on cpu %d\n", core);
 			continue;
 		}
 
@@ -738,12 +738,15 @@ int
 oprofread(void *va, int n)
 {
 	int len = qlen(opq);
+	if (!len)
+		return 0;
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[machp()->machno];
-	if (len == 0) {
-		if (cpu_buf->tracing == 0)
-			return 0;
+	if (cpu_buf->tracing == 1){
+		error("can't read profiling while trace is running");
+		return 0;
 	}
 
 	len = qread(opq, va, n);
+	print("oprofread len=%d\n", len);
 	return len;
 }

+ 0 - 10
sys/src/9/port/cpu_buffer.h

@@ -15,16 +15,6 @@ void start_cpu_work(void);
 void end_cpu_work(void);
 void flush_cpu_work(void);
 
-/* CPU buffer is composed of samples.
- * As these are extracted from the buffer, they are encapsulated
- * in entries, which include additional info.
- */
-struct op_sample {
-	unsigned long eip;
-	unsigned long event;
-	unsigned long data[0];
-};
-
 struct op_entry;
 
 struct oprofile_cpu_buffer {

+ 1 - 1
sys/src/9/port/devcons.c

@@ -253,7 +253,7 @@ panic(char *fmt, ...)
 	//prflush();
 	buf[n] = '\n';
 	putstrn(buf, n+1);
-//	dumpstack();
+	//dumpstack();
 	delay(1000);	/* give time to consoles */
 	die("wait forever");
 	exit(1);

+ 2 - 2
sys/src/9/port/devkprof.c

@@ -201,9 +201,9 @@ kprofwrite(Chan *c, void *a, int32_t n, int64_t m)
 		else if(strncmp(a, "stop", 4) == 0) {
 			print("kprof stopped. %d ms\n", kprof.buf[0]);
 			kprof.time = 0;
-		} else if (!strcmp(a, "opstart")) {
+		} else if (!strncmp(a, "opstart", 7)) {
 			oprofile_control_trace(1);
-		} else if (!strcmp(a, "opstop")) {
+		} else if (!strncmp(a, "opstop", 6)) {
 			oprofile_control_trace(0);
 		} else {
 			error("startclr|start|stop|opstart|opstop");

+ 1 - 1
util/GO9PRUN

@@ -7,7 +7,7 @@ ufspid=$!
 sudo qemu-system-x86_64 -s -cpu Opteron_G1 -smp 1 -m 2048  \
 -enable-kvm \
 -serial stdio \
---machine pc \
+--machine pc,accel=kvm \
 -net nic,model=rtl8139 \
 -net user,hostfwd=tcp::5555-:1522 \
 -net dump,file=/tmp/vm0.pcap \

+ 95 - 0
util/src/harvey/cmd/profile/profile.go

@@ -0,0 +1,95 @@
+package main
+
+import (
+	"debug/elf"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"sort"
+)
+
+type Symbol struct {
+	Addr uint64
+	Name string
+}
+
+type Symbols []*Symbol
+
+func (s Symbols) Len() int      { return len(s) }
+func (s Symbols) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+
+type ByAddress struct{ Symbols }
+
+func (s ByAddress) Less(i, j int) bool { return s.Symbols[i].Addr < s.Symbols[j].Addr }
+
+var profile = flag.String("profile", "", "name of file containing profile data")
+var kernel = flag.String("kernel", "", "kernel to profile against")
+var symbolTable = []*Symbol{}
+
+func bytesToInt(array []byte, offset int) uint32 {
+	return uint32(array[offset]) | (uint32(array[offset+1]) << 8) | (uint32(array[offset+2]) << 16) | (uint32(array[offset+3]) << 24)
+}
+
+func bytesToLong(array []byte, offset int) uint64 {
+	return uint64(bytesToInt(array, offset)) | (uint64(bytesToInt(array, offset+4)) << 32)
+}
+
+func loadSymbols(kernel *elf.File) {
+	syms, err := kernel.Symbols()
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	for _, sym := range syms {
+		value := sym.Value | 0xffffffff00000000
+		symbolTable = append(symbolTable, &Symbol{value, sym.Name})
+	}
+	sort.Sort(ByAddress{symbolTable})
+}
+
+func findFunction(addr uint64) string {
+	var prevSym *Symbol
+	for _, sym := range symbolTable {
+		if sym.Addr > addr && prevSym != nil {
+			return prevSym.Name
+		}
+		prevSym = sym
+	}
+	return "*** Not Found ***"
+}
+
+func main() {
+	flag.Parse()
+	kernelElf, err := elf.Open(*kernel)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	loadSymbols(kernelElf)
+
+	backtraces, err := ioutil.ReadFile(*profile)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	offset := 0
+	for offset < len(backtraces) {
+		topOfStack := true
+		header1 := bytesToInt(backtraces, offset)
+		//timestamp := bytesToLong(backtraces, offset+8)
+		//coreId := (header1 >> 16) & 0xffff
+		numTraces := header1 & 0xff
+		offset += 16
+		//fmt.Printf("timestamp: %d, core id: %d\n", timestamp, coreId)
+		traceEnd := offset + int(numTraces*8)
+		for ; offset < traceEnd; offset += 8 {
+			if topOfStack {
+				addr := bytesToLong(backtraces, offset)
+				fmt.Printf("stack: %x, method %s\n", addr, findFunction(addr))
+			}
+			topOfStack = true
+		}
+	}
+}