Browse Source

Reduce overhead of setlabel

On benchmarks/error this is worth a full 2%.

Next steps: shrink size of the struct used to store errors.
Figure out what to do about %rbp

But this CL can be committed as is.

Change-Id: Ia9f0e2b457eb44e8906a72758f7b31b7253cda3c
Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Ronald G. Minnich 8 years ago
parent
commit
038a1b67fa

+ 18 - 0
sys/src/9/k10/fns.h

@@ -285,3 +285,21 @@ void dumpgpr(Ureg* ureg);
 
 /* debug support. */
 int backtrace_list(uintptr_t pc, uintptr_t fp, uintptr_t *pcs, size_t nr_slots);
+
+/* horror */
+static inline void __clobber_callee_regs(void)
+{
+	asm volatile ("" : : : "rbx", "r12", "r13", "r14", "r15");
+}
+
+int slim_setlabel(Label*) __attribute__((returns_twice));
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+
+#define setlabel(label) ({int err;                                                 \
+                    __clobber_callee_regs();                               \
+                    err = slim_setlabel(label);                                     \
+                    err;})
+
+#pragma GCC diagnostic pop

+ 4 - 28
sys/src/9/k10/l64v.S

@@ -435,20 +435,8 @@ gotolabel:
 	MOVQ	%rdi, %rax
 	MOVQ	0(%rdi), %rsp
 
-	MOVQ	(16+0*8)(%rdi), %rBX
-	MOVQ	(16+1*8)(%rdi), %rCX
-	MOVQ	(16+2*8)(%rdi), %rDX
-	MOVQ	(16+3*8)(%rdi), %rSI
+	// Can't kill this quite yet.
 	MOVQ	(16+5*8)(%rdi), %rBP
-	MOVQ	(16+6*8)(%rdi), %r8
-	MOVQ	(16+7*8)(%rdi), %r9
-	MOVQ	(16+8*8)(%rdi), %r10
-	MOVQ	(16+9*8)(%rdi), %r11
-	MOVQ	(16+10*8)(%rdi), %r12
-	MOVQ	(16+11*8)(%rdi), %r13
-	MOVQ	(16+12*8)(%rdi), %r14
-	MOVQ	(16+13*8)(%rdi), %r15
-	MOVQ	(16+4*8)(%rdi), %rDI
 
 	MOVQ	8(%rax), %rax			/* put return PC on the stack */
 						/* NOTE: replaces previous caller? */
@@ -459,26 +447,14 @@ gotolabel:
 	/* save all registers on this stack, the save stack
 	* in the label struct.
 	*/
-.global setlabel
-setlabel:
+.global slim_setlabel
+slim_setlabel:
 	// %rax is trashable.
 	MOVQ	0(%rSP), %rax			/* store return PC */
 	MOVQ	%rax, 8(%rdi)
 
-	MOVQ	%rBX, (16+0*8)(%rdi)
-	MOVQ	%rCX, (16+1*8)(%rdi)
-	MOVQ	%rDX, (16+2*8)(%rdi)
-	MOVQ	%rSI, (16+3*8)(%rdi)
-	MOVQ	%rDI, (16+4*8)(%rdi)
+	// Can't kill this quite yet.
 	MOVQ	%rBP, (16+5*8)(%rdi)
-	MOVQ	%r8, (16+6*8)(%rdi)
-	MOVQ	%r9, (16+7*8)(%rdi)
-	MOVQ	%r10, (16+8*8)(%rdi)
-	MOVQ	%r11, (16+9*8)(%rdi)
-	MOVQ	%r12, (16+10*8)(%rdi)
-	MOVQ	%r13, (16+11*8)(%rdi)
-	MOVQ	%r14, (16+12*8)(%rdi)
-	MOVQ	%r15, (16+13*8)(%rdi)
 
 	MOVQ	%rSP, 0(%rdi)	/* store SP */
 	MOVL	$0, %eax	/* return 0 */

+ 2 - 1
sys/src/9/port/portfns.h

@@ -359,7 +359,8 @@ char*		seprintpagestats(char*, char*);
 char*		seprintphysstats(char*, char*);
 int		setcolor(uint32_t, uint32_t, uint32_t, uint32_t);
 void		setkernur(Ureg*, Proc*);
-int		setlabel(Label*);
+// The horror. But, well, it does make a BIG performance difference. We're told.
+int		slim_setlabel(Label*);
 void		setregisters(Ureg*, char*, char*, int);
 char*		skipslash(char*);
 void		sleep(Rendez*, int (*)(void*), void*);

+ 0 - 0
sys/src/benchmarks/build.json → sys/src/bench/build.json


+ 0 - 0
sys/src/benchmarks/error.c → sys/src/bench/error.c


+ 2 - 0
sys/src/bench/results/019a7ad/error

@@ -0,0 +1,2 @@
+# error benchmark
+1000000 518199829556

+ 2 - 0
sys/src/bench/results/592dd55/error

@@ -0,0 +1,2 @@
+# error benchmark
+1000000 506763266044