Browse Source

all: import power64 support from Charles Forsyth (thanks Charles Forsyth)

David du Colombier 2 years ago
parent
commit
1785a648da
64 changed files with 18733 additions and 0 deletions
  1. 73 0
      power64/include/ape/float.h
  2. 78 0
      power64/include/ape/math.h
  3. 11 0
      power64/include/ape/stdarg.h
  4. 50 0
      power64/include/ape/ureg.h
  5. 142 0
      sys/lib/acid/power64
  6. 4 0
      sys/src/ape/lib/9/power64/getcallerpc.s
  7. 28 0
      sys/src/ape/lib/9/power64/getfcr.s
  8. 11 0
      sys/src/ape/lib/ap/power64/_seek.c
  9. 34 0
      sys/src/ape/lib/ap/power64/atom.s
  10. 10 0
      sys/src/ape/lib/ap/power64/mkfile
  11. 115 0
      sys/src/ape/lib/ap/syscall/gencall
  12. 200 0
      sys/src/cmd/9a/a.h
  13. 946 0
      sys/src/cmd/9a/a.y
  14. 875 0
      sys/src/cmd/9a/lex.c
  15. 19 0
      sys/src/cmd/9a/mkfile
  16. 419 0
      sys/src/cmd/9c/9.out.h
  17. 14 0
      sys/src/cmd/9c/Notes
  18. 126 0
      sys/src/cmd/9c/bits.c
  19. 1102 0
      sys/src/cmd/9c/cgen.c
  20. 309 0
      sys/src/cmd/9c/enam.c
  21. 352 0
      sys/src/cmd/9c/gc.h
  22. 229 0
      sys/src/cmd/9c/list.c
  23. 76 0
      sys/src/cmd/9c/machcap.c
  24. 18 0
      sys/src/cmd/9c/mkenam
  25. 42 0
      sys/src/cmd/9c/mkfile
  26. 609 0
      sys/src/cmd/9c/mul.c
  27. 1042 0
      sys/src/cmd/9c/peep.c
  28. 1134 0
      sys/src/cmd/9c/reg.c
  29. 248 0
      sys/src/cmd/9c/sgen.c
  30. 628 0
      sys/src/cmd/9c/swt.c
  31. 1417 0
      sys/src/cmd/9c/txt.c
  32. 22 0
      sys/src/cmd/9l/TODO
  33. 626 0
      sys/src/cmd/9l/asm.c
  34. 1485 0
      sys/src/cmd/9l/asmout.c
  35. 37 0
      sys/src/cmd/9l/cnam.c
  36. 65 0
      sys/src/cmd/9l/compat.c
  37. 355 0
      sys/src/cmd/9l/l.h
  38. 311 0
      sys/src/cmd/9l/list.c
  39. 17 0
      sys/src/cmd/9l/mkcname
  40. 33 0
      sys/src/cmd/9l/mkfile
  41. 523 0
      sys/src/cmd/9l/noop.c
  42. 1602 0
      sys/src/cmd/9l/obj.c
  43. 372 0
      sys/src/cmd/9l/optab.c
  44. 666 0
      sys/src/cmd/9l/pass.c
  45. 804 0
      sys/src/cmd/9l/sched.c
  46. 1011 0
      sys/src/cmd/9l/span.c
  47. 9 0
      sys/src/libc/9syscall/mkfile
  48. 14 0
      sys/src/libc/power64/_seek.c
  49. 4 0
      sys/src/libc/power64/argv0.s
  50. 34 0
      sys/src/libc/power64/atom.s
  51. 17 0
      sys/src/libc/power64/cycles.s
  52. 4 0
      sys/src/libc/power64/getcallerpc.s
  53. 28 0
      sys/src/libc/power64/getfcr.s
  54. 23 0
      sys/src/libc/power64/main9.s
  55. 35 0
      sys/src/libc/power64/main9p.s
  56. 35 0
      sys/src/libc/power64/mkfile
  57. 22 0
      sys/src/libc/power64/notejmp.c
  58. 26 0
      sys/src/libc/power64/setjmp.s
  59. 103 0
      sys/src/libc/power64/sqrt.c
  60. 19 0
      sys/src/libc/power64/tas.s
  61. 13 0
      sys/src/libmp/power64/mkfile
  62. 11 0
      sys/src/libsec/power64/mkfile
  63. 27 0
      sys/src/libthread/power64.c
  64. 19 0
      sys/src/libthread/xincpower64.s

+ 73 - 0
power64/include/ape/float.h

@@ -0,0 +1,73 @@
+#ifndef __FLOAT
+#define __FLOAT
+/* IEEE, default rounding */
+
+#define FLT_ROUNDS	1
+#define FLT_RADIX	2
+
+#define FLT_DIG		6
+#define FLT_EPSILON	1.19209290e-07
+#define FLT_MANT_DIG	24
+#define FLT_MAX		3.40282347e+38
+#define FLT_MAX_10_EXP	38
+#define FLT_MAX_EXP	128
+#define FLT_MIN		1.17549435e-38
+#define FLT_MIN_10_EXP	-37
+#define FLT_MIN_EXP	-125
+
+#define DBL_DIG		15
+#define DBL_EPSILON	2.2204460492503131e-16
+#define DBL_MANT_DIG	53
+#define DBL_MAX		1.797693134862315708145e+308
+#define DBL_MAX_10_EXP	308
+#define DBL_MAX_EXP	1024
+#define DBL_MIN		2.225073858507201383090233e-308
+#define DBL_MIN_10_EXP	-307
+#define DBL_MIN_EXP	-1021
+#define LDBL_MANT_DIG	DBL_MANT_DIG
+#define LDBL_EPSILON	DBL_EPSILON
+#define LDBL_DIG	DBL_DIG
+#define LDBL_MIN_EXP	DBL_MIN_EXP
+#define LDBL_MIN	DBL_MIN
+#define LDBL_MIN_10_EXP	DBL_MIN_10_EXP
+#define LDBL_MAX_EXP	DBL_MAX_EXP
+#define LDBL_MAX	DBL_MAX
+#define LDBL_MAX_10_EXP	DBL_MAX_10_EXP
+
+typedef 	union FPdbleword FPdbleword;
+union FPdbleword
+{
+	double	x;
+	struct {	/* big endian */
+		long hi;
+		long lo;
+	};
+};
+
+#ifdef _RESEARCH_SOURCE
+/* define stuff needed for floating conversion */
+#define IEEE_MC68k	1
+#define Sudden_Underflow 1
+#endif
+#ifdef _PLAN9_SOURCE
+/* FCR */
+#define	FPINEX	(1<<7)
+#define	FPOVFL	(1<<9)
+#define	FPUNFL	(1<<8)
+#define	FPZDIV	(1<<10)
+#define	FPRNR	(0<<0)
+#define	FPRZ	(1<<0)
+#define	FPRPINF	(2<<0)
+#define	FPRNINF	(3<<0)
+#define	FPRMASK	(3<<0)
+#define	FPPEXT	0
+#define	FPPSGL	0
+#define	FPPDBL	0
+#define	FPPMASK	0
+/* FSR */
+#define	FPAINEX	(1<<2)
+#define	FPAOVFL	(1<<4)
+#define	FPAUNFL	(1<<3)
+#define	FPAZDIV	(1<<5)
+#endif
+#endif /* __FLOAT */

+ 78 - 0
power64/include/ape/math.h

@@ -0,0 +1,78 @@
+#ifndef __MATH
+#define __MATH
+#pragma lib "/$M/lib/ape/libap.a"
+
+/* a HUGE_VAL appropriate for IEEE double-precision */
+/* the correct value, 1.797693134862316e+308, causes a ken overflow */
+#define HUGE_VAL 1.79769313486231e+308
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern double acos(double);
+extern double asin(double);
+extern double atan(double);
+extern double atan2(double, double);
+extern double cos(double);
+extern double hypot(double, double);
+extern double sin(double);
+extern double tan(double);
+extern double cosh(double);
+extern double sinh(double);
+extern double tanh(double);
+extern double exp(double);
+extern double frexp(double, int *);
+extern double ldexp(double, int);
+extern double log(double);
+extern double log10(double);
+extern double modf(double, double *);
+extern double pow(double, double);
+extern double sqrt(double);
+extern double ceil(double);
+extern double fabs(double);
+extern double floor(double);
+extern double fmod(double, double);
+extern double NaN(void);
+extern int isNaN(double);
+extern double Inf(int);
+extern int isInf(double, int);
+
+#ifdef _RESEARCH_SOURCE
+/* does >> treat left operand as unsigned ? */
+#define Unsigned_Shifts 1
+#define	M_E		2.7182818284590452354	/* e */
+#define	M_LOG2E		1.4426950408889634074	/* log 2e */
+#define	M_LOG10E	0.43429448190325182765	/* log 10e */
+#define	M_LN2		0.69314718055994530942	/* log e2 */
+#define	M_LN10		2.30258509299404568402	/* log e10 */
+#define	M_PI		3.14159265358979323846	/* pi */
+#define	M_PI_2		1.57079632679489661923	/* pi/2 */
+#define	M_PI_4		0.78539816339744830962	/* pi/4 */
+#define	M_1_PI		0.31830988618379067154	/* 1/pi */
+#define	M_2_PI		0.63661977236758134308	/* 2/pi */
+#define	M_2_SQRTPI	1.12837916709551257390	/* 2/sqrt(pi) */
+#define	M_SQRT2		1.41421356237309504880	/* sqrt(2) */
+#define	M_SQRT1_2	0.70710678118654752440	/* 1/sqrt(2) */
+
+extern double hypot(double, double);
+extern double erf(double);
+extern double erfc(double);
+extern double j0(double);
+extern double y0(double);
+extern double j1(double);
+extern double y1(double);
+extern double jn(int, double);
+extern double yn(int, double);
+
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#define isnan(x) isNaN(x)
+#define isinf(x) isInf(x, 0)
+
+#endif /* __MATH */

+ 11 - 0
power64/include/ape/stdarg.h

@@ -0,0 +1,11 @@
+#ifndef __STDARG
+#define __STDARG
+
+typedef char *va_list;
+
+#define va_start(list, start) list = (char *)(&(start)+1)
+#define va_end(list)
+#define va_arg(list, mode) (sizeof(mode)==1 ? ((mode *) (list += 4))[-4] : \
+sizeof(mode)==2 ? ((mode *) (list += 4))[-2] : ((mode *) (list += sizeof(mode)))[-1])
+
+#endif /* __STDARG */

+ 50 - 0
power64/include/ape/ureg.h

@@ -0,0 +1,50 @@
+#ifndef __UREG_H
+#define __UREG_H
+#if !defined(_PLAN9_SOURCE)
+    This header file is an extension to ANSI/POSIX
+#endif
+
+struct Ureg
+{	unsigned long	cause;
+	union { unsigned long	srr1; unsigned long status;};
+	unsigned long	pc;	/* SRR0 */
+	unsigned long	pad;
+	unsigned long	lr;
+	unsigned long	cr;
+	unsigned long	xer;
+	unsigned long	ctr;
+	unsigned long	r0;
+	union{ unsigned long r1;	unsigned long	sp;	unsigned long	usp; };
+	unsigned long	r2;
+	unsigned long	r3;
+	unsigned long	r4;
+	unsigned long	r5;
+	unsigned long	r6;
+	unsigned long	r7;
+	unsigned long	r8;
+	unsigned long	r9;
+	unsigned long	r10;
+	unsigned long	r11;
+	unsigned long	r12;
+	unsigned long	r13;
+	unsigned long	r14;
+	unsigned long	r15;
+	unsigned long	r16;
+	unsigned long	r17;
+	unsigned long	r18;
+	unsigned long	r19;
+	unsigned long	r20;
+	unsigned long	r21;
+	unsigned long	r22;
+	unsigned long	r23;
+	unsigned long	r24;
+	unsigned long	r25;
+	unsigned long	r26;
+	unsigned long	r27;
+	unsigned long	r28;
+	unsigned long	r29;
+	unsigned long	r30;
+	unsigned long	r31;
+};
+
+#endif

+ 142 - 0
sys/lib/acid/power64

@@ -0,0 +1,142 @@
+// power64
+// incomplete until there are processes to debug
+
+defn acidinit()			// Called after all the init modules are loaded
+{
+	bplist = {};
+	bpfmt = 'X';
+
+	srcpath = {
+		"./",
+		"/sys/src/libc/port/",
+		"/sys/src/libc/9sys/",
+		"/sys/src/libc/power64/"
+	};
+
+	srcfiles = {};		// list of loaded files
+	srctext = {};		// the text of the files
+}
+
+defn stk()			// trace
+{
+	_stk(*PC, *SP, linkreg(0), 0);
+}
+
+defn lstk()			// trace with locals
+{
+	_stk(*PC, *SP, linkreg(0), 1);
+}
+
+defn gpr()			// print general purpose registers
+{
+	print("R0    ", *R0, "\n");
+	print("SP    ", *SP, "\n");
+	print("R2    ", *R2, "\n");
+	print("R3    ", *R3, "\n");
+	print("R4    ", *R4, "\n");
+	print("R5    ", *R5, "\n");
+	print("R6    ", *R6, "\n");
+	print("R7    ", *R7, "\n");
+	print("R8    ", *R8, "\n");
+	print("R9    ", *R9, "\n");
+	print("R10    ", *R10, "\n");
+	print("R11    ", *R11, "\n");
+	print("R12    ", *R12, "\n");
+	print("R13    ", *R13, "\n");
+	print("R14    ", *R14, "\n");
+	print("R15    ", *R15, "\n");
+	print("R16    ", *R16, "\n");
+	print("R17    ", *R17, "\n");
+	print("R18    ", *R18, "\n");
+	print("R19    ", *R19, "\n");
+	print("R20    ", *R20, "\n");
+	print("R21    ", *R21, "\n");
+	print("R22    ", *R22, "\n");
+	print("R23    ", *R23, "\n");
+	print("R24    ", *R24, "\n");
+	print("R25    ", *R25, "\n");
+	print("R26    ", *R26, "\n");
+	print("R27    ", *R27, "\n");
+	print("R28    ", *R28, "\n");
+	print("R29    ", *R29, "\n");
+	print("R30    ", *R30, "\n");
+	print("R31    ", *R31, "\n");
+}
+
+defn Fpr()
+{
+	fpr();
+}
+
+defn fpr()
+{
+	print("F0\t",  *fmt(F0, 'G'),  "\tF1\t",  *fmt(F1, 'G'), "\n");
+	print("F2\t",  *fmt(F2, 'G'),  "\tF3\t",  *fmt(F3, 'G'), "\n");
+	print("F4\t",  *fmt(F4, 'G'),  "\tF5\t",  *fmt(F5, 'G'), "\n");
+	print("F6\t",  *fmt(F6, 'G'),  "\tF7\t",  *fmt(F7, 'G'), "\n");
+	print("F8\t",  *fmt(F8, 'G'),  "\tF9\t",  *fmt(F9, 'G'), "\n");
+	print("F10\t", *fmt(F10, 'G'), "\tF11\t", *fmt(F11, 'G'), "\n");
+	print("F12\t", *fmt(F12, 'G'), "\tF13\t", *fmt(F13, 'G'), "\n");
+	print("F14\t", *fmt(F14, 'G'), "\tF15\t", *fmt(F15, 'G'), "\n");
+	print("F16\t", *fmt(F16, 'G'), "\tF17\t", *fmt(F17, 'G'), "\n");
+	print("F18\t", *fmt(F18, 'G'), "\tF19\t", *fmt(F19, 'G'), "\n");
+	print("F20\t", *fmt(F20, 'G'), "\tF21\t", *fmt(F21, 'G'), "\n");
+	print("F22\t", *fmt(F22, 'G'), "\tF23\t", *fmt(F23, 'G'), "\n");
+	print("F24\t", *fmt(F24, 'G'), "\tF25\t", *fmt(F25, 'G'), "\n");
+	print("F26\t", *fmt(F26, 'G'), "\tF27\t", *fmt(F27, 'G'), "\n");
+	print("F28\t", *fmt(F28, 'G'), "\tF29\t", *fmt(F29, 'G'), "\n");
+	print("F30\t", *fmt(F30, 'G'), "\tF31\t", *fmt(F31, 'G'), "\n");
+}
+
+defn spr()				// print special processor registers
+{
+	local pc, link, cause;
+
+	pc = *PC;
+	print("PC\t", pc, " ", fmt(pc, 'a'), "  ");
+	pfl(pc);
+
+	link = *R31;
+	print("SP\t", *SP, "\tLINK\t", link, " ", fmt(link, 'a'), " ");
+	pfl(link);
+
+	cause = *CAUSE;
+	print("SRR1\t", *SRR1, "\tCAUSE\t", cause, " ", reason(cause), "\n");
+	print("LR\t", *LR, "\tCR\t", *CR, "\n");
+
+	print("XER\t", *XER, "\tCTR\t", *CTR, "\n");
+}
+
+defn regs()				// print all registers
+{
+	spr();
+	gpr();
+}
+
+defn pstop(pid)
+{
+	local l, pc;
+
+	pc = *PC;
+
+	print(pid,": ", reason(*CAUSE), "\t");
+	print(fmt(pc, 'a'), "\t", fmt(pc, 'i'), "\n");
+
+	if notes then {
+		if notes[0] != "sys: breakpoint" then {
+			print("Notes pending:\n");
+			l = notes;
+			while l do {
+				print("\t", head l, "\n");
+				l = tail l;
+			}
+		}
+	}
+}
+
+defn linkreg(addr)
+{
+	return *LR;
+}
+
+print("/sys/lib/acid/power64");

+ 4 - 0
sys/src/ape/lib/9/power64/getcallerpc.s

@@ -0,0 +1,4 @@
+TEXT	getcallerpc(SB), $-4
+	MOVD	0(R1), R3
+	RETURN
+

+ 28 - 0
sys/src/ape/lib/9/power64/getfcr.s

@@ -0,0 +1,28 @@
+TEXT	getfcr(SB), $8
+	MOVFL	FPSCR, F3
+	FMOVD	F3, f-8(SP)
+	MOVW	-4(SP), R3
+	RETURN
+
+TEXT	getfsr(SB), $8
+	MOVFL	FPSCR, F3
+	FMOVD	F3, f-8(SP)
+	MOVW	-4(SP), R3
+	RETURN
+
+TEXT	setfcr(SB), $8
+	SYNC
+	MOVW	R3, -4(SP)
+	FMOVD	-8(SP), F3
+	MOVFL	F3, FPSCR
+	ISYNC
+	RETURN
+
+TEXT	setfsr(SB), $8
+	SYNC
+	MOVW	R3, -4(SP)
+	FMOVD	-8(SP), F3
+	MOVFL	F3, FPSCR
+	ISYNC
+	RETURN
+

+ 11 - 0
sys/src/ape/lib/ap/power64/_seek.c

@@ -0,0 +1,11 @@
+extern long __SEEK(long long*, int, long long, int);
+
+long long
+_SEEK(int fd, long long o, int p)
+{
+	long long l;
+
+	if(__SEEK(&l, fd, o, p) < 0)
+		l = -1;
+	return l;
+}

+ 34 - 0
sys/src/ape/lib/ap/power64/atom.s

@@ -0,0 +1,34 @@
+TEXT ainc(SB), 1, $-4				/* long ainc(long *); */
+	BR	_trap
+	RET
+
+TEXT adec(SB), 1, $-4				/* long adec(long*); */
+	BR	_trap
+	RET
+
+TEXT _xinc(SB), 1, $-4				/* void _xinc(long *); */
+	BR	_trap
+	RET
+
+TEXT _xdec(SB), 1, $-4				/* long _xdec(long *); */
+	BR	_trap
+	RET
+
+/*
+ * int cas(uint* p, int ov, int nv);
+ */
+TEXT cas(SB), 1, $-4
+	BR	_trap
+	RET
+
+/*
+ * int casv(u64int* p, u64int ov, u64int nv);
+ */
+TEXT casv(SB), 1, $-4
+	BR	_trap
+	RET
+
+_trap:
+	MOVD	$0, R0
+	MOVD	0(R0), R0
+	RET

+ 10 - 0
sys/src/ape/lib/ap/power64/mkfile

@@ -0,0 +1,10 @@
+APE=/sys/src/ape
+<$APE/config
+LIB=/$objtype/lib/ape/libap.a
+OFILES=\
+	_seek.$O\
+
+</sys/src/cmd/mksyslib
+
+CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE
+

+ 115 - 0
sys/src/ape/lib/ap/syscall/gencall

@@ -0,0 +1,115 @@
+#!/bin/rc
+		{switch($objtype){
+		case mips
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R1, '0(FP)'
+			echo MOVW '$'$n, R1
+			echo SYSCALL
+			if(~ $i _SEEK || ~ $i nsec) {
+				echo 'MOVW $-1,R5
+				BNE R1,R5,4(PC)
+				MOVW a+0(FP),R5
+				MOVW R1,0(R5)
+				MOVW R1,4(R5)'
+			}
+			echo RET
+		case mips2
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R1, '0(FP)'
+			echo MOVW '$'$n, R1
+			echo ADD '$4',R29
+			echo SYSCALL
+			echo ADD '$-4',R29
+			echo RET
+		case spim
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R1, '0(FP)'
+			echo MOVW '$'$n, R1
+			echo ADD '$4',R29
+			echo SYSCALL
+			echo ADD '$-4',R29
+			if(~ $i _SEEK || ~ $i nsec) {	# untested so far - geoff
+				echo 'MOVW $-1,R5
+				BNE R1,R5,4(PC)
+				MOVW a+0(FP),R5
+				MOVW R1,0(R5)
+				MOVW R1,4(R5)'
+			}
+			echo RET
+		case 386
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVL '$'$n, AX
+			echo INT '$'64
+			if(~ $i _SEEK || ~ $i nsec) {
+				echo 'CMPL AX,$-1
+				JNE 4(PC)
+				MOVL a+0(FP),CX
+				MOVL AX,0(CX)
+				MOVL AX,4(CX)'
+			}
+			echo RET
+		case amd64
+			if(~ $i _SEEK)
+				echo TEXT __SEEK'(SB)', 1, '$0'
+			if not
+				echo TEXT $i'(SB)', 1, '$0'
+			#
+			# For architectures which pass the first argument
+			# in a register, if the system call takes no arguments
+			# there will be no 'a0+0(FP)' reserved on the stack.
+			#
+			if(! ~ $i nsec)
+				echo MOVQ RARG, 'a0+0(FP)'
+			echo MOVQ '$'$n, RARG
+			echo SYSCALL
+			echo RET
+		case sparc
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R7, '0(FP)'
+			echo MOVW '$'$n, R7
+			echo TA R0
+			if(~ $i _SEEK || ~ $i nsec) {
+				echo 'CMP R7,$-1
+				BNE 4(PC)
+				MOVW a+0(FP),R8
+				MOVW R7,0(R8)
+				MOVW R7,4(R8)'
+			}
+			echo RETURN
+		case arm
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R0, '0(FP)'
+			echo MOVW '$'$n, R0
+			echo SWI 0
+			if(~ $i _SEEK || ~ $i nsec) {
+				echo 'CMP $-1,R0
+				BNE 4(PC)
+				MOVW a+0(FP),R1
+				MOVW R0,0(R1)
+				MOVW R0,4(R1)'
+			}
+			echo RET
+		case power
+			echo TEXT $i'(SB)', 1, '$0'
+			echo MOVW R3, '0(FP)'
+			echo MOVW '$'$n, R3
+			echo SYSCALL
+			if(~ $i _SEEK || ~ $i nsec) {
+				echo 'CMP R3,$-1
+				BNE 4(PC)
+				MOVW a+0(FP),R8
+				MOVW R3,0(R8)
+				MOVW R3,4(R8)'
+			}
+			echo RETURN
+		case power64
+			if(~ $i _SEEK)
+				echo TEXT __SEEK'(SB)', 1, '$0'
+			if not
+				echo TEXT $i'(SB)', 1, '$0'
+			echo MOVD R3, '0(FP)'
+			echo MOVW '$'$n, R3
+			echo SYSCALL
+			echo RETURN
+		}} > $i.s
+		$AS $i.s

+ 200 - 0
sys/src/cmd/9a/a.h

@@ -0,0 +1,200 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "../9c/9.out.h"
+
+#ifndef	EXTERN
+#define	EXTERN	extern
+#endif
+
+typedef	struct	Sym	Sym;
+typedef	struct	Gen	Gen;
+typedef	struct	Io	Io;
+typedef	struct	Hist	Hist;
+
+#define	MAXALIGN	7
+#define	FPCHIP		1
+#define	NSYMB		8192
+#define	BUFSIZ		8192
+#define	HISTSZ		20
+#define	NINCLUDE	10
+#define	NHUNK		10000
+#define	EOF		(-1)
+#define	IGN		(-2)
+#define	GETC()		((--fi.c < 0)? filbuf(): *fi.p++ & 0xff)
+#define	NHASH		503
+#define	STRINGSZ	200
+#define	NMACRO		10
+
+#define	ALLOC(lhs, type)\
+	while(nhunk < sizeof(type))\
+		gethunk();\
+	lhs = (type*)hunk;\
+	nhunk -= sizeof(type);\
+	hunk += sizeof(type);
+
+#define	ALLOCN(lhs, len, n)\
+	if(lhs+len != hunk || nhunk < n) {\
+		while(nhunk <= len)\
+			gethunk();\
+		memmove(hunk, lhs, len);\
+		lhs = hunk;\
+		hunk += len;\
+		nhunk -= len;\
+	}\
+	hunk += n;\
+	nhunk -= n;
+
+struct	Sym
+{
+	Sym*	link;
+	char*	macro;
+	vlong	value;
+	ushort	type;
+	char	*name;
+	char	sym;
+};
+#define	S	((Sym*)0)
+
+struct
+{
+	char*	p;
+	int	c;
+} fi;
+
+struct	Io
+{
+	Io*	link;
+	char	b[BUFSIZ];
+	char*	p;
+	short	c;
+	short	f;
+};
+#define	I	((Io*)0)
+
+struct
+{
+	Sym*	sym;
+	short	type;
+} h[NSYM];
+
+struct	Gen
+{
+	Sym*	sym;
+	vlong	offset;
+	short	type;
+	short	reg;
+	short	xreg;
+	short	name;
+	ushort	mask;
+	double	dval;
+	char	sval[8];
+};
+
+struct	Hist
+{
+	Hist*	link;
+	char*	name;
+	long	line;
+	vlong	offset;
+};
+#define	H	((Hist*)0)
+
+enum
+{
+	CLAST,
+	CMACARG,
+	CMACRO,
+	CPREPROC
+};
+
+EXTERN	char	debug[256];
+EXTERN	Sym*	hash[NHASH];
+EXTERN	char*	Dlist[30];
+EXTERN	int	nDlist;
+EXTERN	Hist*	ehist;
+EXTERN	int	newflag;
+EXTERN	Hist*	hist;
+EXTERN	char*	hunk;
+EXTERN	char*	include[NINCLUDE];
+EXTERN	Io*	iofree;
+EXTERN	Io*	ionext;
+EXTERN	Io*	iostack;
+EXTERN	long	lineno;
+EXTERN	int	nerrors;
+EXTERN	long	nhunk;
+EXTERN	int	nosched;
+EXTERN	int	ninclude;
+EXTERN	Gen	nullgen;
+EXTERN	char*	outfile;
+EXTERN	int	pass;
+EXTERN	char*	pathname;
+EXTERN	long	pc;
+EXTERN	int	peekc;
+EXTERN	int	sym;
+EXTERN	char	symb[NSYMB];
+EXTERN	int	thechar;
+EXTERN	char*	thestring;
+EXTERN	long	thunk;
+EXTERN	Biobuf	obuf;
+
+void	errorexit(void);
+void	pushio(void);
+void	newio(void);
+void	newfile(char*, int);
+Sym*	slookup(char*);
+Sym*	lookup(void);
+void	syminit(Sym*);
+long	yylex(void);
+int	getc(void);
+int	getnsc(void);
+void	unget(int);
+int	escchar(int);
+void	cinit(void);
+void	pinit(char*);
+void	cclean(void);
+void	outcode(int, Gen*, int, Gen*);
+void	outgcode(int, Gen*, int, Gen*, Gen*);
+void	zname(char*, int, int);
+void	zaddr(Gen*, int);
+void	ieeedtod(Ieee*, double);
+int	filbuf(void);
+Sym*	getsym(void);
+void	domacro(void);
+void	macund(void);
+void	macdef(void);
+void	macexpand(Sym*, char*);
+void	macinc(void);
+void	macprag(void);
+void	maclin(void);
+void	macif(int);
+void	macend(void);
+void	dodefine(char*);
+void	prfile(long);
+void	outhist(void);
+void	linehist(char*, int);
+void	gethunk(void);
+void	yyerror(char*, ...);
+int	yyparse(void);
+void	setinclude(char*);
+int	assemble(char*);
+
+/*
+ *	system-dependent stuff from ../cc/compat.c
+ */
+enum				/* keep in synch with ../cc/cc.h */
+{
+	Plan9	= 1<<0,
+	Unix	= 1<<1,
+	Windows	= 1<<2
+};
+int	mywait(int*);
+int	mycreat(char*, int);
+int	systemtype(int);
+int	pathchar(void);
+char*	mygetwd(char*, int);
+int	myexec(char*, char*[]);
+int	mydup(int, int);
+int	myfork(void);
+int	mypipe(int*);
+void*	mysbrk(ulong);

+ 946 - 0
sys/src/cmd/9a/a.y

@@ -0,0 +1,946 @@
+%{
+#include "a.h"
+%}
+%union
+{
+	Sym	*sym;
+	vlong	lval;
+	double	dval;
+	char	sval[8];
+	Gen	gen;
+}
+%left	'|'
+%left	'^'
+%left	'&'
+%left	'<' '>'
+%left	'+' '-'
+%left	'*' '/' '%'
+%token	<lval>	LMOVW LMOVB LABS LLOGW LSHW LADDW LCMP LCROP
+%token	<lval>	LBRA LFMOV LFCONV LFCMP LFADD LFMA LTRAP LXORW
+%token	<lval>	LNOP LEND LRETT LWORD LTEXT LDATA LRETRN
+%token	<lval>	LCONST LSP LSB LFP LPC LCREG LFLUSH
+%token	<lval>	LREG LFREG LR LCR LF LFPSCR
+%token	<lval>	LLR LCTR LSPR LSPREG LSEG LMSR
+%token	<lval>	LSCHED LXLD LXST LXOP LXMV
+%token	<lval>	LRLWM LMOVMW LMOVEM LMOVFL LMTFSB LMA
+%token	<dval>	LFCONST
+%token	<sval>	LSCONST
+%token	<sym>	LNAME LLAB LVAR
+%type	<lval>	con expr pointer offset sreg
+%type	<gen>	addr rreg regaddr name creg freg xlreg lr ctr
+%type	<gen>	imm ximm fimm rel psr lcr cbit fpscr fpscrf msr mask
+%%
+prog:
+|	prog line
+
+line:
+	LLAB ':'
+	{
+		if($1->value != pc)
+			yyerror("redeclaration of %s", $1->name);
+		$1->value = pc;
+	}
+	line
+|	LNAME ':'
+	{
+		$1->type = LLAB;
+		$1->value = pc;
+	}
+	line
+|	LNAME '=' expr ';'
+	{
+		$1->type = LVAR;
+		$1->value = $3;
+	}
+|	LVAR '=' expr ';'
+	{
+		if($1->value != $3)
+			yyerror("redeclaration of %s", $1->name);
+		$1->value = $3;
+	}
+|	LSCHED ';'
+	{
+		nosched = $1;
+	}
+|	';'
+|	inst ';'
+|	error ';'
+
+inst:
+/*
+ * load ints and bytes
+ */
+	LMOVW rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW addr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW regaddr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVB rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVB addr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVB regaddr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * load floats
+ */
+|	LFMOV addr ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOV regaddr ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOV fimm ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOV freg ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOV freg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOV freg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * store ints and bytes
+ */
+|	LMOVW rreg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW rreg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVB rreg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVB rreg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * store floats
+ */
+|	LMOVW freg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW freg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * floating point status
+ */
+|	LMOVW fpscr ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW freg ','  fpscr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW freg ',' imm ',' fpscr
+	{
+		outgcode($1, &$2, NREG, &$4, &$6);
+	}
+|	LMOVW fpscr ',' creg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW imm ',' fpscrf
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMTFSB imm ',' con
+	{
+		outcode($1, &$2, $4, &nullgen);
+	}
+/*
+ * field moves (mtcrf)
+ */
+|	LMOVW rreg ',' imm ',' lcr
+	{
+		outgcode($1, &$2, NREG, &$4, &$6);
+	}
+|	LMOVW rreg ',' creg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW rreg ',' lcr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * integer operations
+ * logical instructions
+ * shift instructions
+ * unary instructions
+ */
+|	LADDW rreg ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LADDW imm ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LADDW rreg ',' imm ',' rreg
+	{
+		outgcode($1, &$2, NREG, &$4, &$6);
+	}
+|	LADDW rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LADDW imm ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LLOGW rreg ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LLOGW rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LSHW rreg ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LSHW rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LSHW imm ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LSHW imm ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LABS rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LABS rreg
+	{
+		outcode($1, &$2, NREG, &$2);
+	}
+/*
+ * multiply-accumulate
+ */
+|	LMA rreg ',' sreg ',' rreg
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+/*
+ * move immediate: macro for cau+or, addi, addis, and other combinations
+ */
+|	LMOVW imm ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW ximm ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * condition register operations
+ */
+|	LCROP cbit ',' cbit
+	{
+		outcode($1, &$2, $4.reg, &$4);
+	}
+|	LCROP cbit ',' con ',' cbit
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+/*
+ * condition register moves
+ * move from machine state register
+ */
+|	LMOVW creg ',' creg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW psr ',' creg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW lcr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW psr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW xlreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW rreg ',' xlreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW creg ',' psr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVW rreg ',' psr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * branch, branch conditional
+ * branch conditional register
+ * branch conditional to count register
+ */
+|	LBRA rel
+	{
+		outcode($1, &nullgen, NREG, &$2);
+	}
+|	LBRA addr
+	{
+		outcode($1, &nullgen, NREG, &$2);
+	}
+|	LBRA '(' xlreg ')'
+	{
+		outcode($1, &nullgen, NREG, &$3);
+	}
+|	LBRA ',' rel
+	{
+		outcode($1, &nullgen, NREG, &$3);
+	}
+|	LBRA ',' addr
+	{
+		outcode($1, &nullgen, NREG, &$3);
+	}
+|	LBRA ',' '(' xlreg ')'
+	{
+		outcode($1, &nullgen, NREG, &$4);
+	}
+|	LBRA creg ',' rel
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LBRA creg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LBRA creg ',' '(' xlreg ')'
+	{
+		outcode($1, &$2, NREG, &$5);
+	}
+|	LBRA con ',' rel
+	{
+		outcode($1, &nullgen, $2, &$4);
+	}
+|	LBRA con ',' addr
+	{
+		outcode($1, &nullgen, $2, &$4);
+	}
+|	LBRA con ',' '(' xlreg ')'
+	{
+		outcode($1, &nullgen, $2, &$5);
+	}
+|	LBRA con ',' con ',' rel
+	{
+		Gen g;
+		g = nullgen;
+		g.type = D_CONST;
+		g.offset = $2;
+		outcode($1, &g, $4, &$6);
+	}
+|	LBRA con ',' con ',' addr
+	{
+		Gen g;
+		g = nullgen;
+		g.type = D_CONST;
+		g.offset = $2;
+		outcode($1, &g, $4, &$6);
+	}
+|	LBRA con ',' con ',' '(' xlreg ')'
+	{
+		Gen g;
+		g = nullgen;
+		g.type = D_CONST;
+		g.offset = $2;
+		outcode($1, &g, $4, &$7);
+	}
+/*
+ * conditional trap
+ */
+|	LTRAP rreg ',' sreg
+	{
+		outcode($1, &$2, $4, &nullgen);
+	}
+|	LTRAP imm ',' sreg
+	{
+		outcode($1, &$2, $4, &nullgen);
+	}
+|	LTRAP rreg comma
+	{
+		outcode($1, &$2, NREG, &nullgen);
+	}
+|	LTRAP comma
+	{
+		outcode($1, &nullgen, NREG, &nullgen);
+	}
+/*
+ * floating point operate
+ */
+|	LFCONV freg ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFADD freg ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFADD freg ',' freg ',' freg
+	{
+		outcode($1, &$2, $4.reg, &$6);
+	}
+|	LFMA freg ',' freg ',' freg ',' freg
+	{
+		outgcode($1, &$2, $4.reg, &$6, &$8);
+	}
+|	LFCMP freg ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFCMP freg ',' freg ',' creg
+	{
+		outcode($1, &$2, $6.reg, &$4);
+	}
+/*
+ * CMP
+ */
+|	LCMP rreg ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LCMP rreg ',' imm
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LCMP rreg ',' rreg ',' creg
+	{
+		outcode($1, &$2, $6.reg, &$4);
+	}
+|	LCMP rreg ',' imm ',' creg
+	{
+		outcode($1, &$2, $6.reg, &$4);
+	}
+/*
+ * rotate and mask
+ */
+|	LRLWM  imm ',' rreg ',' imm ',' rreg
+	{
+		outgcode($1, &$2, $4.reg, &$6, &$8);
+	}
+|	LRLWM  imm ',' rreg ',' mask ',' rreg
+	{
+		outgcode($1, &$2, $4.reg, &$6, &$8);
+	}
+|	LRLWM  rreg ',' rreg ',' imm ',' rreg
+	{
+		outgcode($1, &$2, $4.reg, &$6, &$8);
+	}
+|	LRLWM  rreg ',' rreg ',' mask ',' rreg
+	{
+		outgcode($1, &$2, $4.reg, &$6, &$8);
+	}
+/*
+ * load/store multiple
+ */
+|	LMOVMW addr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LMOVMW rreg ',' addr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+/*
+ * various indexed load/store
+ * indexed unary (eg, cache clear)
+ */
+|	LXLD regaddr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LXLD regaddr ',' imm ',' rreg
+	{
+		outgcode($1, &$2, NREG, &$4, &$6);
+	}
+|	LXST rreg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LXST rreg ',' imm ',' regaddr
+	{
+		outgcode($1, &$2, NREG, &$4, &$6);
+	}
+|	LXMV regaddr ',' rreg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LXMV rreg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LXOP regaddr
+	{
+		outcode($1, &$2, NREG, &nullgen);
+	}
+/*
+ * NOP
+ */
+|	LNOP comma
+	{
+		outcode($1, &nullgen, NREG, &nullgen);
+	}
+|	LNOP rreg comma
+	{
+		outcode($1, &$2, NREG, &nullgen);
+	}
+|	LNOP freg comma
+	{
+		outcode($1, &$2, NREG, &nullgen);
+	}
+|	LNOP ',' rreg
+	{
+		outcode($1, &nullgen, NREG, &$3);
+	}
+|	LNOP ',' freg
+	{
+		outcode($1, &nullgen, NREG, &$3);
+	}
+/*
+ * word
+ */
+|	LWORD imm comma
+	{
+		if($1 == ADWORD && $2.type == D_CONST)
+			$2.type = D_DCONST;
+		outcode($1, &$2, NREG, &nullgen);
+	}
+|	LWORD ximm comma
+	{
+		if($1 == ADWORD && $2.type == D_CONST)
+			$2.type = D_DCONST;
+		outcode($1, &$2, NREG, &nullgen);
+	}
+/*
+ * END
+ */
+|	LEND comma
+	{
+		outcode($1, &nullgen, NREG, &nullgen);
+	}
+/*
+ * TEXT/GLOBL
+ */
+|	LTEXT name ',' imm
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LTEXT name ',' con ',' imm
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LTEXT name ',' imm ':' imm
+	{
+		outgcode($1, &$2, NREG, &$6, &$4);
+	}
+|	LTEXT name ',' con ',' imm ':' imm
+	{
+		outgcode($1, &$2, $4, &$8, &$6);
+	}
+/*
+ * DATA
+ */
+|	LDATA name '/' con ',' imm
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LDATA name '/' con ',' ximm
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+|	LDATA name '/' con ',' fimm
+	{
+		outcode($1, &$2, $4, &$6);
+	}
+/*
+ * RETURN
+ */
+|	LRETRN	comma
+	{
+		outcode($1, &nullgen, NREG, &nullgen);
+	}
+
+rel:
+	con '(' LPC ')'
+	{
+		$$ = nullgen;
+		$$.type = D_BRANCH;
+		$$.offset = $1 + pc;
+	}
+|	LNAME offset
+	{
+		$$ = nullgen;
+		if(pass == 2)
+			yyerror("undefined label: %s", $1->name);
+		$$.type = D_BRANCH;
+		$$.sym = $1;
+		$$.offset = $2;
+	}
+|	LLAB offset
+	{
+		$$ = nullgen;
+		$$.type = D_BRANCH;
+		$$.sym = $1;
+		$$.offset = $1->value + $2;
+	}
+
+rreg:
+	sreg
+	{
+		$$ = nullgen;
+		$$.type = D_REG;
+		$$.reg = $1;
+	}
+
+xlreg:
+	lr
+|	ctr
+
+lr:
+	LLR
+	{
+		$$ = nullgen;
+		$$.type = D_SPR;
+		$$.offset = $1;
+	}
+
+lcr:
+	LCR
+	{
+		$$ = nullgen;
+		$$.type = D_CREG;
+		$$.reg = NREG;	/* whole register */
+	}
+
+ctr:
+	LCTR
+	{
+		$$ = nullgen;
+		$$.type = D_SPR;
+		$$.offset = $1;
+	}
+
+msr:
+	LMSR
+	{
+		$$ = nullgen;
+		$$.type = D_MSR;
+	}
+
+psr:
+	LSPREG
+	{
+		$$ = nullgen;
+		$$.type = D_SPR;
+		$$.offset = $1;
+	}
+|	LSPR '(' con ')'
+	{
+		$$ = nullgen;
+		$$.type = $1;
+		$$.offset = $3;
+	}
+|	msr
+
+fpscr:
+	LFPSCR
+	{
+		$$ = nullgen;
+		$$.type = D_FPSCR;
+		$$.reg = NREG;
+	}
+
+fpscrf:
+	LFPSCR '(' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_FPSCR;
+		$$.reg = $3;
+	}
+
+freg:
+	LFREG
+	{
+		$$ = nullgen;
+		$$.type = D_FREG;
+		$$.reg = $1;
+	}
+|	LF '(' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_FREG;
+		$$.reg = $3;
+	}
+
+creg:
+	LCREG
+	{
+		$$ = nullgen;
+		$$.type = D_CREG;
+		$$.reg = $1;
+	}
+|	LCR '(' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_CREG;
+		$$.reg = $3;
+	}
+
+
+cbit:	con
+	{
+		$$ = nullgen;
+		$$.type = D_REG;
+		$$.reg = $1;
+	}
+
+mask:
+	con ',' con
+	{
+		int mb, me;
+		ulong v;
+
+		$$ = nullgen;
+		$$.type = D_CONST;
+		mb = $1;
+		me = $3;
+		if(mb < 0 || mb > 31 || me < 0 || me > 31){
+			yyerror("illegal mask start/end value(s)");
+			mb = me = 0;
+		}
+		if(mb <= me)
+			v = ((ulong)~0L>>mb) & (~0L<<(31-me));
+		else
+			v = ~(((ulong)~0L>>(me+1)) & (~0L<<(31-(mb-1))));
+		$$.offset = v;
+	}
+
+ximm:
+	'$' addr
+	{
+		$$ = $2;
+		$$.type = D_CONST;
+	}
+|	'$' LSCONST
+	{
+		$$ = nullgen;
+		$$.type = D_SCONST;
+		memcpy($$.sval, $2, sizeof($$.sval));
+	}
+
+fimm:
+	'$' LFCONST
+	{
+		$$ = nullgen;
+		$$.type = D_FCONST;
+		$$.dval = $2;
+	}
+|	'$' '-' LFCONST
+	{
+		$$ = nullgen;
+		$$.type = D_FCONST;
+		$$.dval = -$3;
+	}
+
+imm:	'$' con
+	{
+		$$ = nullgen;
+		$$.type = D_CONST;
+		$$.offset = $2;
+	}
+
+sreg:
+	LREG
+|	LR '(' con ')'
+	{
+		if($$ < 0 || $$ >= NREG)
+			print("register value out of range\n");
+		$$ = $3;
+	}
+
+regaddr:
+	'(' sreg ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.reg = $2;
+		$$.offset = 0;
+	}
+|	'(' sreg '+' sreg ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.reg = $2;
+		$$.xreg = $4;
+		$$.offset = 0;
+	}
+
+addr:
+	name
+|	con '(' sreg ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.reg = $3;
+		$$.offset = $1;
+	}
+
+name:
+	con '(' pointer ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.name = $3;
+		$$.sym = S;
+		$$.offset = $1;
+	}
+|	LNAME offset '(' pointer ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.name = $4;
+		$$.sym = $1;
+		$$.offset = $2;
+	}
+|	LNAME '<' '>' offset '(' LSB ')'
+	{
+		$$ = nullgen;
+		$$.type = D_OREG;
+		$$.name = D_STATIC;
+		$$.sym = $1;
+		$$.offset = $4;
+	}
+
+comma:
+|	','
+
+offset:
+	{
+		$$ = 0;
+	}
+|	'+' con
+	{
+		$$ = $2;
+	}
+|	'-' con
+	{
+		$$ = -$2;
+	}
+
+pointer:
+	LSB
+|	LSP
+|	LFP
+
+con:
+	LCONST
+|	LVAR
+	{
+		$$ = $1->value;
+	}
+|	'-' con
+	{
+		$$ = -$2;
+	}
+|	'+' con
+	{
+		$$ = $2;
+	}
+|	'~' con
+	{
+		$$ = ~$2;
+	}
+|	'(' expr ')'
+	{
+		$$ = $2;
+	}
+
+expr:
+	con
+|	expr '+' expr
+	{
+		$$ = $1 + $3;
+	}
+|	expr '-' expr
+	{
+		$$ = $1 - $3;
+	}
+|	expr '*' expr
+	{
+		$$ = $1 * $3;
+	}
+|	expr '/' expr
+	{
+		$$ = $1 / $3;
+	}
+|	expr '%' expr
+	{
+		$$ = $1 % $3;
+	}
+|	expr '<' '<' expr
+	{
+		$$ = $1 << $4;
+	}
+|	expr '>' '>' expr
+	{
+		$$ = $1 >> $4;
+	}
+|	expr '&' expr
+	{
+		$$ = $1 & $3;
+	}
+|	expr '^' expr
+	{
+		$$ = $1 ^ $3;
+	}
+|	expr '|' expr
+	{
+		$$ = $1 | $3;
+	}

+ 875 - 0
sys/src/cmd/9a/lex.c

@@ -0,0 +1,875 @@
+#define	EXTERN
+#include "a.h"
+#include "y.tab.h"
+#include <ctype.h>
+
+void
+main(int argc, char *argv[])
+{
+	char *p;
+	int nout, nproc, status, i, c;
+
+	thechar = '9';
+	thestring = "power64";
+	memset(debug, 0, sizeof(debug));
+	cinit();
+	outfile = 0;
+	include[ninclude++] = ".";
+	ARGBEGIN {
+	default:
+		c = ARGC();
+		if(c >= 0 || c < sizeof(debug))
+			debug[c] = 1;
+		break;
+
+	case 'o':
+		outfile = ARGF();
+		break;
+
+	case 'D':
+		p = ARGF();
+		if(p)
+			Dlist[nDlist++] = p;
+		break;
+
+	case 'I':
+		p = ARGF();
+		setinclude(p);
+		break;
+	} ARGEND
+	if(*argv == 0) {
+		print("usage: %ca [-options] file.s\n", thechar);
+		errorexit();
+	}
+	if(argc > 1 && systemtype(Windows)){
+		print("can't assemble multiple files on windows\n");
+		errorexit();
+	}
+	if(argc > 1) {
+		nproc = 1;
+		if(p = getenv("NPROC"))
+			nproc = atol(p);
+		c = 0;
+		nout = 0;
+		for(;;) {
+			while(nout < nproc && argc > 0) {
+				i = myfork();
+				if(i < 0) {
+					i = mywait(&status);
+					if(i < 0)
+						errorexit();
+					if(status)
+						c++;
+					nout--;
+					continue;
+				}
+				if(i == 0) {
+					print("%s:\n", *argv);
+					if(assemble(*argv))
+						errorexit();
+					exits(0);
+				}
+				nout++;
+				argc--;
+				argv++;
+			}
+			i = mywait(&status);
+			if(i < 0) {
+				if(c)
+					errorexit();
+				exits(0);
+			}
+			if(status)
+				c++;
+			nout--;
+		}
+	}
+	if(assemble(argv[0]))
+		errorexit();
+	exits(0);
+}
+
+int
+assemble(char *file)
+{
+	char ofile[100], incfile[20], *p;
+	int i, of;
+
+	strcpy(ofile, file);
+	if(p = strrchr(ofile, pathchar())) {
+		include[0] = ofile;
+		*p++ = 0;
+	} else
+		p = ofile;
+	if(outfile == 0) {
+		outfile = p;
+		if(p = strrchr(outfile, '.'))
+			if(p[1] == 's' && p[2] == 0)
+				p[0] = 0;
+		p = strrchr(outfile, 0);
+		p[0] = '.';
+		p[1] = thechar;
+		p[2] = 0;
+	}
+	p = getenv("INCLUDE");
+	if(p) {
+		setinclude(p);
+	} else {
+		if(systemtype(Plan9)) {
+			sprint(incfile,"/%s/include", thestring);
+			setinclude(strdup(incfile));
+		}
+	}
+
+	of = mycreat(outfile, 0664);
+	if(of < 0) {
+		yyerror("%ca: cannot create %s", thechar, outfile);
+		errorexit();
+	}
+	Binit(&obuf, of, OWRITE);
+
+	pass = 1;
+	nosched = 0;
+	pinit(file);
+	for(i=0; i<nDlist; i++)
+		dodefine(Dlist[i]);
+	yyparse();
+	if(nerrors) {
+		cclean();
+		return nerrors;
+	}
+
+	pass = 2;
+	nosched = 0;
+	outhist();
+	pinit(file);
+	for(i=0; i<nDlist; i++)
+		dodefine(Dlist[i]);
+	yyparse();
+	cclean();
+	return nerrors;
+}
+
+struct
+{
+	char	*name;
+	ushort	type;
+	ushort	value;
+} itab[] =
+{
+	"SP",		LSP,	D_AUTO,
+	"SB",		LSB,	D_EXTERN,
+	"FP",		LFP,	D_PARAM,
+	"PC",		LPC,	D_BRANCH,
+
+	"LR",		LLR,	D_LR,
+	"CTR",		LCTR,	D_CTR,
+
+	"XER",		LSPREG,	D_XER,
+	"MSR",		LMSR,	D_MSR,
+	"FPSCR",	LFPSCR,	D_FPSCR,
+	"SPR",		LSPR,	D_SPR,
+	"DCR",		LSPR,	D_DCR,
+
+	"CR",		LCR,	0,
+	"CR0",		LCREG,	0,
+	"CR1",		LCREG,	1,
+	"CR2",		LCREG,	2,
+	"CR3",		LCREG,	3,
+	"CR4",		LCREG,	4,
+	"CR5",		LCREG,	5,
+	"CR6",		LCREG,	6,
+	"CR7",		LCREG,	7,
+
+	"R",		LR,	0,
+	"R0",		LREG,	0,
+	"R1",		LREG,	1,
+	"R2",		LREG,	2,
+	"R3",		LREG,	3,
+	"R4",		LREG,	4,
+	"R5",		LREG,	5,
+	"R6",		LREG,	6,
+	"R7",		LREG,	7,
+	"R8",		LREG,	8,
+	"R9",		LREG,	9,
+	"R10",		LREG,	10,
+	"R11",		LREG,	11,
+	"R12",		LREG,	12,
+	"R13",		LREG,	13,
+	"R14",		LREG,	14,
+	"R15",		LREG,	15,
+	"R16",		LREG,	16,
+	"R17",		LREG,	17,
+	"R18",		LREG,	18,
+	"R19",		LREG,	19,
+	"R20",		LREG,	20,
+	"R21",		LREG,	21,
+	"R22",		LREG,	22,
+	"R23",		LREG,	23,
+	"R24",		LREG,	24,
+	"R25",		LREG,	25,
+	"R26",		LREG,	26,
+	"R27",		LREG,	27,
+	"R28",		LREG,	28,
+	"R29",		LREG,	29,
+	"R30",		LREG,	30,
+	"R31",		LREG,	31,
+
+	"F",		LF,	0,
+	"F0",		LFREG,	0,
+	"F1",		LFREG,	1,
+	"F2",		LFREG,	2,
+	"F3",		LFREG,	3,
+	"F4",		LFREG,	4,
+	"F5",		LFREG,	5,
+	"F6",		LFREG,	6,
+	"F7",		LFREG,	7,
+	"F8",		LFREG,	8,
+	"F9",		LFREG,	9,
+	"F10",		LFREG,	10,
+	"F11",		LFREG,	11,
+	"F12",		LFREG,	12,
+	"F13",		LFREG,	13,
+	"F14",		LFREG,	14,
+	"F15",		LFREG,	15,
+	"F16",		LFREG,	16,
+	"F17",		LFREG,	17,
+	"F18",		LFREG,	18,
+	"F19",		LFREG,	19,
+	"F20",		LFREG,	20,
+	"F21",		LFREG,	21,
+	"F22",		LFREG,	22,
+	"F23",		LFREG,	23,
+	"F24",		LFREG,	24,
+	"F25",		LFREG,	25,
+	"F26",		LFREG,	26,
+	"F27",		LFREG,	27,
+	"F28",		LFREG,	28,
+	"F29",		LFREG,	29,
+	"F30",		LFREG,	30,
+	"F31",		LFREG,	31,
+
+	"CREQV",	LCROP, ACREQV,
+	"CRXOR",	LCROP, ACRXOR,
+	"CRAND",	LCROP, ACRAND,
+	"CROR",		LCROP, ACROR,
+	"CRANDN",	LCROP, ACRANDN,
+	"CRORN",	LCROP, ACRORN,
+	"CRNAND",	LCROP, ACRNAND,
+	"CRNOR",	LCROP, ACRNOR,
+
+	"ADD",		LADDW, AADD,
+	"ADDV",		LADDW, AADDV,
+	"ADDCC",	LADDW, AADDCC,
+	"ADDVCC",	LADDW, AADDVCC,
+	"ADDC",		LADDW, AADDC,
+	"ADDCV",	LADDW, AADDCV,
+	"ADDCCC",	LADDW, AADDCCC,
+	"ADDCVCC",	LADDW, AADDCVCC,
+	"ADDE",		LLOGW, AADDE,
+	"ADDEV",	LLOGW, AADDEV,
+	"ADDECC",	LLOGW, AADDECC,
+	"ADDEVCC",	LLOGW, AADDEVCC,
+
+	"ADDME",	LABS, AADDME,
+	"ADDMEV",	LABS, AADDMEV,
+	"ADDMECC",	LABS, AADDMECC,
+	"ADDMEVCC",	LABS, AADDMEVCC,
+	"ADDZE",	LABS, AADDZE,
+	"ADDZEV",	LABS, AADDZEV,
+	"ADDZECC",	LABS, AADDZECC,
+	"ADDZEVCC",	LABS, AADDZEVCC,
+
+	"SUB",		LADDW, ASUB,
+	"SUBV",		LADDW, ASUBV,
+	"SUBCC",	LADDW, ASUBCC,
+	"SUBVCC",	LADDW, ASUBVCC,
+	"SUBE",		LLOGW, ASUBE,
+	"SUBECC",	LLOGW, ASUBECC,
+	"SUBEV",	LLOGW, ASUBEV,
+	"SUBEVCC",	LLOGW, ASUBEVCC,
+	"SUBC",		LADDW, ASUBC,
+	"SUBCCC",	LADDW, ASUBCCC,
+	"SUBCV",	LADDW, ASUBCV,
+	"SUBCVCC",	LADDW, ASUBCVCC,
+
+	"SUBME",	LABS, ASUBME,
+	"SUBMEV",	LABS, ASUBMEV,
+	"SUBMECC",	LABS, ASUBMECC,
+	"SUBMEVCC",	LABS, ASUBMEVCC,
+	"SUBZE",	LABS, ASUBZE,
+	"SUBZEV",	LABS, ASUBZEV,
+	"SUBZECC",	LABS, ASUBZECC,
+	"SUBZEVCC",	LABS, ASUBZEVCC,
+
+	"AND",		LADDW, AAND,
+	"ANDCC",	LADDW, AANDCC,	/* includes andil & andiu */
+	"ANDN",		LLOGW, AANDN,
+	"ANDNCC",	LLOGW, AANDNCC,
+	"EQV",		LLOGW, AEQV,
+	"EQVCC",	LLOGW, AEQVCC,
+	"NAND",		LLOGW, ANAND,
+	"NANDCC",	LLOGW, ANANDCC,
+	"NOR",		LLOGW, ANOR,
+	"NORCC",	LLOGW, ANORCC,
+	"OR",		LADDW, AOR,	/* includes oril & oriu */
+	"ORCC",		LADDW, AORCC,
+	"ORN",		LLOGW, AORN,
+	"ORNCC",	LLOGW, AORNCC,
+	"XOR",		LADDW, AXOR,	/* includes xoril & xoriu */
+	"XORCC",	LLOGW, AXORCC,
+
+	"EXTSB",	LABS,	AEXTSB,
+	"EXTSBCC",	LABS,	AEXTSBCC,
+	"EXTSH",	LABS, AEXTSH,
+	"EXTSHCC",	LABS, AEXTSHCC,
+
+	"CNTLZW",	LABS, ACNTLZW,
+	"CNTLZWCC",	LABS, ACNTLZWCC,
+
+	"RLWMI",	LRLWM, ARLWMI,
+	"RLWMICC",	LRLWM, ARLWMICC,
+	"RLWNM",	LRLWM, ARLWNM,
+	"RLWNMCC", LRLWM, ARLWNMCC,
+
+	"SLW",		LSHW, ASLW,
+	"SLWCC",	LSHW, ASLWCC,
+	"SRW",		LSHW, ASRW,
+	"SRWCC",	LSHW, ASRWCC,
+	"SRAW",		LSHW, ASRAW,
+	"SRAWCC",	LSHW, ASRAWCC,
+
+	"BR",		LBRA, ABR,
+	"BC",		LBRA, ABC,
+	"BCL",		LBRA, ABC,
+	"BL",		LBRA, ABL,
+	"BEQ",		LBRA, ABEQ,
+	"BNE",		LBRA, ABNE,
+	"BGT",		LBRA, ABGT,
+	"BGE",		LBRA, ABGE,
+	"BLT",		LBRA, ABLT,
+	"BLE",		LBRA, ABLE,
+	"BVC",		LBRA, ABVC,
+	"BVS",		LBRA, ABVS,
+
+	"CMP",		LCMP, ACMP,
+	"CMPU",		LCMP, ACMPU,
+
+	"DIVW",		LLOGW, ADIVW,
+	"DIVWV",	LLOGW, ADIVWV,
+	"DIVWCC",	LLOGW, ADIVWCC,
+	"DIVWVCC",	LLOGW, ADIVWVCC,
+	"DIVWU",	LLOGW, ADIVWU,
+	"DIVWUV",	LLOGW, ADIVWUV,
+	"DIVWUCC",	LLOGW, ADIVWUCC,
+	"DIVWUVCC",	LLOGW, ADIVWUVCC,
+
+	"FABS",		LFCONV,	AFABS,
+	"FABSCC",	LFCONV,	AFABSCC,
+	"FNEG",		LFCONV,	AFNEG,
+	"FNEGCC",	LFCONV,	AFNEGCC,
+	"FNABS",	LFCONV,	AFNABS,
+	"FNABSCC",	LFCONV,	AFNABSCC,
+
+	"FADD",		LFADD,	AFADD,
+	"FADDCC",	LFADD,	AFADDCC,
+	"FSUB",		LFADD,  AFSUB,
+	"FSUBCC",	LFADD,	AFSUBCC,
+	"FMUL",		LFADD,	AFMUL,
+	"FMULCC",	LFADD,	AFMULCC,
+	"FDIV",		LFADD,	AFDIV,
+	"FDIVCC",	LFADD,	AFDIVCC,
+	"FRSP",		LFCONV,	AFRSP,
+	"FRSPCC",	LFCONV,	AFRSPCC,
+	"FCTIW",	LFCONV,	AFCTIW,
+	"FCTIWCC",	LFCONV,	AFCTIWCC,
+	"FCTIWZ",	LFCONV,	AFCTIWZ,
+	"FCTIWZCC",	LFCONV,	AFCTIWZCC,
+
+	"FMADD",	LFMA, AFMADD,
+	"FMADDCC",	LFMA, AFMADDCC,
+	"FMSUB",	LFMA, AFMSUB,
+	"FMSUBCC",	LFMA, AFMSUBCC,
+	"FNMADD",	LFMA, AFNMADD,
+	"FNMADDCC",	LFMA, AFNMADDCC,
+	"FNMSUB",	LFMA, AFNMSUB,
+	"FNMSUBCC",	LFMA, AFNMSUBCC,
+	"FMADDS",	LFMA, AFMADDS,
+	"FMADDSCC",	LFMA, AFMADDSCC,
+	"FMSUBS",	LFMA, AFMSUBS,
+	"FMSUBSCC",	LFMA, AFMSUBSCC,
+	"FNMADDS",	LFMA, AFNMADDS,
+	"FNMADDSCC",	LFMA, AFNMADDSCC,
+	"FNMSUBS",	LFMA, AFNMSUBS,
+	"FNMSUBSCC",	LFMA, AFNMSUBSCC,
+
+	"FCMPU",	LFCMP, AFCMPU,
+	"FCMPO",	LFCMP, AFCMPO,
+	"MTFSB0",	LMTFSB, AMTFSB0,
+	"MTFSB1",	LMTFSB,	AMTFSB1,
+
+	"FMOVD",	LFMOV, AFMOVD,
+	"FMOVS",	LFMOV, AFMOVS,
+	"FMOVDCC",	LFCONV,	AFMOVDCC,	/* fmr. */
+
+	"GLOBL",	LTEXT, AGLOBL,
+
+	"MOVB",		LMOVB, AMOVB,
+	"MOVBZ",	LMOVB, AMOVBZ,
+	"MOVBU",	LMOVB, AMOVBU,
+	"MOVBZU", LMOVB, AMOVBZU,
+	"MOVH",		LMOVB, AMOVH,
+	"MOVHZ",	LMOVB, AMOVHZ,
+	"MOVHU",	LMOVB, AMOVHU,
+	"MOVHZU", LMOVB, AMOVHZU,
+	"MOVHBR", 	LXMV, AMOVHBR,
+	"MOVWBR",	LXMV, AMOVWBR,
+	"MOVW",		LMOVW, AMOVW,
+	"MOVWU",	LMOVW, AMOVWU,
+	"MOVMW",	LMOVMW, AMOVMW,
+	"MOVFL",	LMOVW,	AMOVFL,
+
+	"MULLW",	LADDW, AMULLW,		/* includes multiply immediate 10-139 */
+	"MULLWV",	LLOGW, AMULLWV,
+	"MULLWCC",	LLOGW, AMULLWCC,
+	"MULLWVCC",	LLOGW, AMULLWVCC,
+
+	"MULHW",	LLOGW, AMULHW,
+	"MULHWCC",	LLOGW, AMULHWCC,
+	"MULHWU",	LLOGW, AMULHWU,
+	"MULHWUCC",	LLOGW, AMULHWUCC,
+
+	"NEG",		LABS, ANEG,
+	"NEGV",		LABS, ANEGV,
+	"NEGCC",	LABS, ANEGCC,
+	"NEGVCC",	LABS, ANEGVCC,
+
+	"NOP",		LNOP, ANOP,	/* ori 0,0,0 */
+	"SYSCALL",	LNOP, ASYSCALL,
+
+	"RETURN",	LRETRN, ARETURN,
+	"RFI",		LRETRN,	ARFI,
+	"RFCI",		LRETRN,	ARFCI,
+
+	"DATA",		LDATA, ADATA,
+	"END",		LEND, AEND,
+	"TEXT",		LTEXT, ATEXT,
+
+	/* 64-bit instructions */
+	"CNTLZD",	LABS,	ACNTLZD,
+	"CNTLZDCC",	LABS,	ACNTLZDCC,
+	"DIVD",	LLOGW,	ADIVD,
+	"DIVDCC",	LLOGW,	ADIVDCC,
+	"DIVDVCC",	LLOGW,	ADIVDVCC,
+	"DIVDV",	LLOGW,	ADIVDV,
+	"DIVDU",	LLOGW,	ADIVDU,
+	"DIVDUCC",	LLOGW,	ADIVDUCC,
+	"DIVDUVCC",	LLOGW,	ADIVDUVCC,
+	"DIVDUV",	LLOGW,	ADIVDUV,
+	"EXTSW",	LABS, AEXTSW,
+	"EXTSWCC",	LABS, AEXTSWCC,
+	"FCTID",	LFCONV,	AFCTID,
+	"FCTIDCC",	LFCONV,	AFCTIDCC,
+	"FCTIDZ",	LFCONV,	AFCTIDZ,
+	"FCTIDZCC",	LFCONV,	AFCTIDZCC,
+	"FCFID",	LFCONV,	AFCFID,
+	"FCFIDCC",	LFCONV,	AFCFIDCC,
+	"LDAR", LXLD, ALDAR,
+	"MOVD",	LMOVW,	AMOVD,
+	"MOVDU",	LMOVW,	AMOVDU,
+	"MOVWZ",	LMOVW,	AMOVWZ,
+	"MOVWZU",	LMOVW,	AMOVWZU,
+	"MULHD",	LLOGW,	AMULHD,
+	"MULHDCC",	LLOGW,	AMULHDCC,
+	"MULHDU",	LLOGW,	AMULHDU,
+	"MULHDUCC",	LLOGW,	AMULHDUCC,
+	"MULLD",	LADDW,	AMULLD,	/* includes multiply immediate? */
+	"MULLDCC",	LLOGW,	AMULLDCC,
+	"MULLDVCC",	LLOGW,	AMULLDVCC,
+	"MULLDV",	LLOGW,	AMULLDV,
+	"RFID",	LRETRN,	ARFID,
+	"HRFID", LRETRN, AHRFID,
+	"RLDMI",	LRLWM,	ARLDMI,
+	"RLDMICC",	LRLWM,	ARLDMICC,
+	"RLDC",	LRLWM,	ARLDC,
+	"RLDCCC",	LRLWM,	ARLDCCC,
+	"RLDCR",	LRLWM,	ARLDCR,
+	"RLDCRCC",	LRLWM,	ARLDCRCC,
+	"RLDCL",	LRLWM,	ARLDCL,
+	"RLDCLCC",	LRLWM,	ARLDCLCC,
+	"SLBIA",	LNOP,	ASLBIA,
+	"SLBIE",	LNOP,	ASLBIE,
+	"SLBMFEE",	LABS,	ASLBMFEE,
+	"SLBMFEV",	LABS,	ASLBMFEV,
+	"SLBMTE",	LABS,	ASLBMTE,
+	"SLD",	LSHW,	ASLD,
+	"SLDCC",	LSHW,	ASLDCC,
+	"SRD",	LSHW,	ASRD,
+	"SRAD",	LSHW,	ASRAD,
+	"SRADCC",	LSHW,	ASRADCC,
+	"SRDCC",	LSHW,	ASRDCC,
+	"STDCCC",	LXST,	ASTDCCC,
+	"TD",	LADDW,	ATD,
+
+	/* pseudo instructions */
+	"REM",	LLOGW,	AREM,
+	"REMCC",	LLOGW,	AREMCC,
+	"REMV",	LLOGW,	AREMV,
+	"REMVCC",	LLOGW,	AREMVCC,
+	"REMU",	LLOGW,	AREMU,
+	"REMUCC",	LLOGW,	AREMUCC,
+	"REMUV",	LLOGW,	AREMUV,
+	"REMUVCC",	LLOGW,	AREMUVCC,
+	"REMD",	LLOGW,	AREMD,
+	"REMDCC",	LLOGW,	AREMDCC,
+	"REMDV",	LLOGW,	AREMDV,
+	"REMDVCC",	LLOGW,	AREMDVCC,
+	"REMDU",	LLOGW,	AREMDU,
+	"REMDUCC",	LLOGW,	AREMDUCC,
+	"REMDUV",	LLOGW,	AREMDUV,
+	"REMDUVCC",	LLOGW,	AREMDUVCC,
+
+/* special instructions */
+	"DCBF",		LXOP,	ADCBF,
+	"DCBI",		LXOP,	ADCBI,
+	"DCBST",	LXOP,	ADCBST,
+	"DCBT",		LXOP,	ADCBT,
+	"DCBTST",	LXOP,	ADCBTST,
+	"DCBZ",		LXOP,	ADCBZ,
+	"ICBI",		LXOP,	AICBI,
+
+	"ECIWX",	LXLD,	AECIWX,
+	"ECOWX",	LXST,	AECOWX,
+	"LWAR", LXLD, ALWAR,
+	"LWAR", LXLD, ALWAR,
+	"STWCCC", LXST, ASTWCCC,
+	"EIEIO",	LRETRN,	AEIEIO,
+	"TLBIE",	LNOP,	ATLBIE,
+	"TLBIEL",	LNOP,	ATLBIEL,
+	"LSW",	LXLD, ALSW,
+	"STSW",	LXST, ASTSW,
+	
+	"ISYNC",	LRETRN, AISYNC,
+	"SYNC",		LRETRN, ASYNC,
+	"TLBSYNC",	LRETRN,	ATLBSYNC,
+	"PTESYNC",	LRETRN,	APTESYNC,
+/*	"TW",		LADDW,	ATW,*/
+
+	"WORD",		LWORD, AWORD,
+	"DWORD",	LWORD, ADWORD,
+	"SCHED",	LSCHED, 0,
+	"NOSCHED",	LSCHED,	0x80,
+
+	0
+};
+
+void
+cinit(void)
+{
+	Sym *s;
+	int i;
+
+	nullgen.sym = S;
+	nullgen.offset = 0;
+	nullgen.type = D_NONE;
+	nullgen.name = D_NONE;
+	nullgen.reg = NREG;
+	nullgen.xreg = NREG;
+	if(FPCHIP)
+		nullgen.dval = 0;
+	for(i=0; i<sizeof(nullgen.sval); i++)
+		nullgen.sval[i] = 0;
+
+	nerrors = 0;
+	iostack = I;
+	iofree = I;
+	peekc = IGN;
+	nhunk = 0;
+	for(i=0; i<NHASH; i++)
+		hash[i] = S;
+	for(i=0; itab[i].name; i++) {
+		s = slookup(itab[i].name);
+		s->type = itab[i].type;
+		s->value = itab[i].value;
+	}
+	ALLOCN(pathname, 0, 100);
+	if(mygetwd(pathname, 99) == 0) {
+		ALLOCN(pathname, 100, 900);
+		if(mygetwd(pathname, 999) == 0)
+			strcpy(pathname, "/???");
+	}
+}
+
+void
+syminit(Sym *s)
+{
+
+	s->type = LNAME;
+	s->value = 0;
+}
+
+void
+cclean(void)
+{
+
+	outcode(AEND, &nullgen, NREG, &nullgen);
+	Bflush(&obuf);
+}
+
+void
+zname(char *n, int t, int s)
+{
+
+	Bputc(&obuf, ANAME);
+	Bputc(&obuf, ANAME>>8);
+	Bputc(&obuf, t);	/* type */
+	Bputc(&obuf, s);	/* sym */
+	while(*n) {
+		Bputc(&obuf, *n);
+		n++;
+	}
+	Bputc(&obuf, 0);
+}
+
+void
+zaddr(Gen *a, int s)
+{
+	long l;
+	int i;
+	char *n;
+	Ieee e;
+
+	if(a->type == D_CONST){
+		l = a->offset;
+		if((vlong)l != a->offset)
+			a->type = D_DCONST;
+	}
+	Bputc(&obuf, a->type);
+	Bputc(&obuf, a->reg);
+	Bputc(&obuf, s);
+	Bputc(&obuf, a->name);
+	switch(a->type) {
+	default:
+		print("unknown type %d\n", a->type);
+		exits("arg");
+
+	case D_NONE:
+	case D_REG:
+	case D_FREG:
+	case D_CREG:
+	case D_FPSCR:
+	case D_MSR:
+	case D_OPT:
+		break;
+
+	case D_DCR:
+	case D_SPR:
+	case D_OREG:
+	case D_CONST:
+	case D_BRANCH:
+		l = a->offset;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		break;
+
+	case D_DCONST:
+		l = a->offset;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		l = a->offset>>32;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		break;
+
+	case D_SCONST:
+		n = a->sval;
+		for(i=0; i<NSNAME; i++) {
+			Bputc(&obuf, *n);
+			n++;
+		}
+		break;
+
+	case D_FCONST:
+		ieeedtod(&e, a->dval);
+		Bputc(&obuf, e.l);
+		Bputc(&obuf, e.l>>8);
+		Bputc(&obuf, e.l>>16);
+		Bputc(&obuf, e.l>>24);
+		Bputc(&obuf, e.h);
+		Bputc(&obuf, e.h>>8);
+		Bputc(&obuf, e.h>>16);
+		Bputc(&obuf, e.h>>24);
+		break;
+	}
+}
+
+int
+outsim(Gen *g)
+{
+	Sym *s;
+	int sno, t;
+
+	s = g->sym;
+	if(s == S)
+		return 0;
+	sno = s->sym;
+	if(sno < 0 || sno >= NSYM)
+		sno = 0;
+	t = g->name;
+	if(h[sno].type == t && h[sno].sym == s)
+		return sno;
+	zname(s->name, t, sym);
+	s->sym = sym;
+	h[sym].sym = s;
+	h[sym].type = t;
+	sno = sym;
+	sym++;
+	if(sym >= NSYM)
+		sym = 1;
+	return sno;
+}
+
+void
+outcode(int a, Gen *g1, int reg, Gen *g2)
+{
+	int sf, st;
+
+	if(a != AGLOBL && a != ADATA)
+		pc++;
+	if(pass == 1)
+		return;
+	if(g1->xreg != NREG) {
+		if(reg != NREG || g2->xreg != NREG)
+			yyerror("bad addressing modes");
+		reg = g1->xreg;
+	} else
+	if(g2->xreg != NREG) {
+		if(reg != NREG)
+			yyerror("bad addressing modes");
+		reg = g2->xreg;
+	}
+	do {
+		sf = outsim(g1);
+		st = outsim(g2);
+	} while(sf != 0 && st == sf);
+	Bputc(&obuf, a);
+	Bputc(&obuf, a>>8);
+	Bputc(&obuf, reg|nosched);
+	Bputc(&obuf, lineno);
+	Bputc(&obuf, lineno>>8);
+	Bputc(&obuf, lineno>>16);
+	Bputc(&obuf, lineno>>24);
+	zaddr(g1, sf);
+	zaddr(g2, st);
+}
+
+void
+outgcode(int a, Gen *g1, int reg, Gen *g2, Gen *g3)
+{
+	int s1, s2, s3, flag; 
+
+	if(a != AGLOBL && a != ADATA)
+		pc++;
+	if(pass == 1)
+		return;
+	do {
+		s1 = outsim(g1);
+		s2 = outsim(g2);
+		s3 = outsim(g3);
+	} while(s1 && (s2 && s1 == s2 || s3 && s1 == s3) || s2 && (s3 && s2 == s3));
+	flag = 0;
+	if(g2->type != D_NONE)
+		flag = 0x40;	/* flags extra operand */
+	Bputc(&obuf, a);
+	Bputc(&obuf, a>>8);
+	Bputc(&obuf, reg | nosched | flag);
+	Bputc(&obuf, lineno);
+	Bputc(&obuf, lineno>>8);
+	Bputc(&obuf, lineno>>16);
+	Bputc(&obuf, lineno>>24);
+	zaddr(g1, s1);
+	if(flag)
+		zaddr(g2, s2);
+	zaddr(g3, s3);
+}
+
+void
+outhist(void)
+{
+	Gen g;
+	Hist *h;
+	char *p, *q, *op, c;
+	int n;
+
+	g = nullgen;
+	c = pathchar();
+	for(h = hist; h != H; h = h->link) {
+		p = h->name;
+		op = 0;
+		/* on windows skip drive specifier in pathname */
+		if(systemtype(Windows) && p && p[1] == ':'){
+			p += 2;
+			c = *p;
+		}
+		if(p && p[0] != c && h->offset == 0 && pathname){
+			/* on windows skip drive specifier in pathname */
+			if(systemtype(Windows) && pathname[1] == ':') {
+				op = p;
+				p = pathname+2;
+				c = *p;
+			} else if(pathname[0] == c){
+				op = p;
+				p = pathname;
+			}
+		}
+		while(p) {
+			q = strchr(p, c);
+			if(q) {
+				n = q-p;
+				if(n == 0){
+					n = 1;	/* leading "/" */
+					*p = '/';	/* don't emit "\" on windows */
+				}
+				q++;
+			} else {
+				n = strlen(p);
+				q = 0;
+			}
+			if(n) {
+				Bputc(&obuf, ANAME);
+				Bputc(&obuf, ANAME>>8);
+				Bputc(&obuf, D_FILE);	/* type */
+				Bputc(&obuf, 1);	/* sym */
+				Bputc(&obuf, '<');
+				Bwrite(&obuf, p, n);
+				Bputc(&obuf, 0);
+			}
+			p = q;
+			if(p == 0 && op) {
+				p = op;
+				op = 0;
+			}
+		}
+		g.offset = h->offset;
+
+		Bputc(&obuf, AHISTORY);
+		Bputc(&obuf, AHISTORY>>8);
+		Bputc(&obuf, 0);
+		Bputc(&obuf, h->line);
+		Bputc(&obuf, h->line>>8);
+		Bputc(&obuf, h->line>>16);
+		Bputc(&obuf, h->line>>24);
+		zaddr(&nullgen, 0);
+		zaddr(&g, 0);
+	}
+}
+
+#include "../cc/lexbody"
+#include "../cc/macbody"
+#include "../cc/compat"

+ 19 - 0
sys/src/cmd/9a/mkfile

@@ -0,0 +1,19 @@
+</$objtype/mkfile
+
+TARG=9a
+OFILES=\
+	y.tab.$O\
+	lex.$O\
+
+HFILES=\
+	../9c/9.out.h\
+	y.tab.h\
+	a.h\
+
+YFILES=a.y\
+
+BIN=/$objtype/bin
+< /sys/src/cmd/mkone
+YFLAGS=-D1 -d
+
+lex.$O:	../cc/macbody ../cc/lexbody ../cc/compat

+ 419 - 0
sys/src/cmd/9c/9.out.h

@@ -0,0 +1,419 @@
+/*
+ * powerpc 64
+ */
+#define	NSNAME	8
+#define	NSYM	50
+#define	NREG	32
+
+#define NOPROF	(1<<0)
+#define DUPOK	(1<<1)
+
+enum
+{
+	REGZERO		= 0,	/* set to zero */
+	REGSP		= 1,
+	REGSB		= 2,
+	REGRET		= 3,
+	REGARG		= 3,
+	REGMIN		= 7,	/* register variables allocated from here to REGMAX */
+	REGMAX		= 27,
+	REGEXT		= 30,	/* external registers allocated from here down */
+	REGTMP		= 31,	/* used by the linker */
+
+	FREGRET		= 0,
+	FREGMIN		= 17,	/* first register variable */
+	FREGEXT		= 26,	/* first external register */
+	FREGCVI		= 27, /* floating conversion constant */
+	FREGZERO	= 28,	/* both float and double */
+	FREGHALF	= 29,	/* double */
+	FREGONE		= 30,	/* double */
+	FREGTWO		= 31	/* double */
+/*
+ * GENERAL:
+ *
+ * compiler allocates R3 up as temps
+ * compiler allocates register variables R7-R27
+ * compiler allocates external registers R30 down
+ *
+ * compiler allocates register variables F17-F26
+ * compiler allocates external registers F26 down
+ */
+};
+
+enum	as
+{
+	AXXX	= 0,
+	AADD,
+	AADDCC,
+	AADDV,
+	AADDVCC,
+	AADDC,
+	AADDCCC,
+	AADDCV,
+	AADDCVCC,
+	AADDME,
+	AADDMECC,
+	AADDMEVCC,
+	AADDMEV,
+	AADDE,
+	AADDECC,
+	AADDEVCC,
+	AADDEV,
+	AADDZE,
+	AADDZECC,
+	AADDZEVCC,
+	AADDZEV,
+	AAND,
+	AANDCC,
+	AANDN,
+	AANDNCC,
+	ABC,
+	ABCL,
+	ABEQ,
+	ABGE,
+	ABGT,
+	ABL,
+	ABLE,
+	ABLT,
+	ABNE,
+	ABR,
+	ABVC,
+	ABVS,
+	ACMP,
+	ACMPU,
+	ACNTLZW,
+	ACNTLZWCC,
+	ACRAND,
+	ACRANDN,
+	ACREQV,
+	ACRNAND,
+	ACRNOR,
+	ACROR,
+	ACRORN,
+	ACRXOR,
+	ADIVW,
+	ADIVWCC,
+	ADIVWVCC,
+	ADIVWV,
+	ADIVWU,
+	ADIVWUCC,
+	ADIVWUVCC,
+	ADIVWUV,
+	AEQV,
+	AEQVCC,
+	AEXTSB,
+	AEXTSBCC,
+	AEXTSH,
+	AEXTSHCC,
+	AFABS,
+	AFABSCC,
+	AFADD,
+	AFADDCC,
+	AFADDS,
+	AFADDSCC,
+	AFCMPO,
+	AFCMPU,
+	AFCTIW,
+	AFCTIWCC,
+	AFCTIWZ,
+	AFCTIWZCC,
+	AFDIV,
+	AFDIVCC,
+	AFDIVS,
+	AFDIVSCC,
+	AFMADD,
+	AFMADDCC,
+	AFMADDS,
+	AFMADDSCC,
+	AFMOVD,
+	AFMOVDCC,
+	AFMOVDU,
+	AFMOVS,
+	AFMOVSU,
+	AFMSUB,
+	AFMSUBCC,
+	AFMSUBS,
+	AFMSUBSCC,
+	AFMUL,
+	AFMULCC,
+	AFMULS,
+	AFMULSCC,
+	AFNABS,
+	AFNABSCC,
+	AFNEG,
+	AFNEGCC,
+	AFNMADD,
+	AFNMADDCC,
+	AFNMADDS,
+	AFNMADDSCC,
+	AFNMSUB,
+	AFNMSUBCC,
+	AFNMSUBS,
+	AFNMSUBSCC,
+	AFRSP,
+	AFRSPCC,
+	AFSUB,
+	AFSUBCC,
+	AFSUBS,
+	AFSUBSCC,
+	AMOVMW,
+	ALSW,
+	ALWAR,
+	AMOVWBR,
+	AMOVB,
+	AMOVBU,
+	AMOVBZ,
+	AMOVBZU,
+	AMOVH,
+	AMOVHBR,
+	AMOVHU,
+	AMOVHZ,
+	AMOVHZU,
+	AMOVW,
+	AMOVWU,
+	AMOVFL,
+	AMOVCRFS,
+	AMTFSB0,
+	AMTFSB0CC,
+	AMTFSB1,
+	AMTFSB1CC,
+	AMULHW,
+	AMULHWCC,
+	AMULHWU,
+	AMULHWUCC,
+	AMULLW,
+	AMULLWCC,
+	AMULLWVCC,
+	AMULLWV,
+	ANAND,
+	ANANDCC,
+	ANEG,
+	ANEGCC,
+	ANEGVCC,
+	ANEGV,
+	ANOR,
+	ANORCC,
+	AOR,
+	AORCC,
+	AORN,
+	AORNCC,
+	AREM,
+	AREMCC,
+	AREMV,
+	AREMVCC,
+	AREMU,
+	AREMUCC,
+	AREMUV,
+	AREMUVCC,
+	ARFI,
+	ARLWMI,
+	ARLWMICC,
+	ARLWNM,
+	ARLWNMCC,
+	ASLW,
+	ASLWCC,
+	ASRW,
+	ASRAW,
+	ASRAWCC,
+	ASRWCC,
+	ASTSW,
+	ASTWCCC,
+	ASUB,
+	ASUBCC,
+	ASUBVCC,
+	ASUBC,
+	ASUBCCC,
+	ASUBCV,
+	ASUBCVCC,
+	ASUBME,
+	ASUBMECC,
+	ASUBMEVCC,
+	ASUBMEV,
+	ASUBV,
+	ASUBE,
+	ASUBECC,
+	ASUBEV,
+	ASUBEVCC,
+	ASUBZE,
+	ASUBZECC,
+	ASUBZEVCC,
+	ASUBZEV,
+	ASYNC,
+	AXOR,
+	AXORCC,
+
+	ADCBF,
+	ADCBI,
+	ADCBST,
+	ADCBT,
+	ADCBTST,
+	ADCBZ,
+	AECIWX,
+	AECOWX,
+	AEIEIO,
+	AICBI,
+	AISYNC,
+	APTESYNC,
+	ATLBIE,
+	ATLBIEL,
+	ATLBSYNC,
+	ATW,
+
+	ASYSCALL,
+	ADATA,
+	AGLOBL,
+	AGOK,
+	AHISTORY,
+	ANAME,
+	ANOP,
+	ARETURN,
+	ATEXT,
+	AWORD,
+	AEND,
+	ADYNT,
+	AINIT,
+	ASIGNAME,
+
+	ARFCI,
+
+	/* optional on 32-bit */
+	AFRES,
+	AFRESCC,
+	AFRSQRTE,
+	AFRSQRTECC,
+	AFSEL,
+	AFSELCC,
+	AFSQRT,
+	AFSQRTCC,
+	AFSQRTS,
+	AFSQRTSCC,
+
+	/* 64-bit */
+	
+	ACNTLZD,
+	ACNTLZDCC,
+	ACMPW,	/* CMP with L=0 */
+	ACMPWU,
+	ADIVD,
+	ADIVDCC,
+	ADIVDVCC,
+	ADIVDV,
+	ADIVDU,
+	ADIVDUCC,
+	ADIVDUVCC,
+	ADIVDUV,
+	AEXTSW,
+	AEXTSWCC,
+	/* AFCFIW; AFCFIWCC */
+	AFCFID,
+	AFCFIDCC,
+	AFCTID,
+	AFCTIDCC,
+	AFCTIDZ,
+	AFCTIDZCC,
+	ALDAR,
+	AMOVD,
+	AMOVDU,
+	AMOVWZ,
+	AMOVWZU,
+	AMULHD,
+	AMULHDCC,
+	AMULHDU,
+	AMULHDUCC,
+	AMULLD,
+	AMULLDCC,
+	AMULLDVCC,
+	AMULLDV,
+	ARFID,
+	ARLDMI,
+	ARLDMICC,
+	ARLDC,
+	ARLDCCC,
+	ARLDCR,
+	ARLDCRCC,
+	ARLDCL,
+	ARLDCLCC,
+	ASLBIA,
+	ASLBIE,
+	ASLBMFEE,
+	ASLBMFEV,
+	ASLBMTE,
+	ASLD,
+	ASLDCC,
+	ASRD,
+	ASRAD,
+	ASRADCC,
+	ASRDCC,
+	ASTDCCC,
+	ATD,
+
+	/* 64-bit pseudo operation */
+	ADWORD,
+	AREMD,
+	AREMDCC,
+	AREMDV,
+	AREMDVCC,
+	AREMDU,
+	AREMDUCC,
+	AREMDUV,
+	AREMDUVCC,
+
+	/* more 64-bit operations */
+	AHRFID,
+
+	ALAST
+};
+
+/* type/name */
+enum
+{
+	D_GOK	= 0,
+	D_NONE,
+
+/* name */
+	D_EXTERN,
+	D_STATIC,
+	D_AUTO,
+	D_PARAM,
+
+/* type */
+	D_BRANCH,
+	D_OREG,
+	D_CONST,
+	D_FCONST,
+	D_SCONST,
+	D_REG,
+	D_FPSCR,
+	D_MSR,
+	D_FREG,
+	D_CREG,
+	D_SPR,
+	D_OPT,	/* branch/trap option */
+	D_FILE,
+	D_FILE1,
+	D_DCR,	/* device control register */
+	D_DCONST,
+
+/* reg names iff type is D_SPR */
+	D_XER	= 1,
+	D_LR	= 8,
+	D_CTR	= 9
+	/* and many supervisor level registers */
+};
+
+/*
+ * this is the ranlib header
+ */
+#define	SYMDEF	"__.SYMDEF"
+
+/*
+ * this is the simulated IEEE floating point
+ */
+typedef	struct	ieee	Ieee;
+struct	ieee
+{
+	long	l;	/* contains ls-man	0xffffffff */
+	long	h;	/* contains sign	0x80000000
+				    exp		0x7ff00000
+				    ms-man	0x000fffff */
+};

+ 14 - 0
sys/src/cmd/9c/Notes

@@ -0,0 +1,14 @@
+-	effect of register expansion on 32-bit shifts and masks etc
+9c
+-	multab
+-	floating-point conversions
+-	conversions of constants
+-	nodtype for loads
+-	sign-extension instruction (32-64) when in register?
+-	double indexing
+-	SLW (eg, in cat)
+-	scheduling
+
+9l
+-	D_QCONST, DWORD
+-	maskgen

+ 126 - 0
sys/src/cmd/9c/bits.c

@@ -0,0 +1,126 @@
+#include "gc.h"
+
+/*
+Bits
+bor(Bits a, Bits b)
+{
+	Bits c;
+	int i;
+
+	for(i=0; i<BITS; i++)
+		c.b[i] = a.b[i] | b.b[i];
+	return c;
+}
+*/
+
+/*
+Bits
+band(Bits a, Bits b)
+{
+	Bits c;
+	int i;
+
+	for(i=0; i<BITS; i++)
+		c.b[i] = a.b[i] & b.b[i];
+	return c;
+}
+*/
+
+/*
+Bits
+bnot(Bits a)
+{
+	Bits c;
+	int i;
+
+	for(i=0; i<BITS; i++)
+		c.b[i] = ~a.b[i];
+	return c;
+}
+*/
+
+int
+bany(Bits *a)
+{
+	int i;
+
+	for(i=0; i<BITS; i++)
+		if(a->b[i])
+			return 1;
+	return 0;
+}
+
+/*
+int
+beq(Bits a, Bits b)
+{
+	int i;
+
+	for(i=0; i<BITS; i++)
+		if(a.b[i] != b.b[i])
+			return 0;
+	return 1;
+}
+*/
+
+int
+bnum(Bits a)
+{
+	int i;
+	long b;
+
+	for(i=0; i<BITS; i++)
+		if(b = a.b[i])
+			return 32*i + bitno(b);
+	diag(Z, "bad in bnum");
+	return 0;
+}
+
+Bits
+blsh(unsigned n)
+{
+	Bits c;
+
+	c = zbits;
+	c.b[n/32] = 1L << (n%32);
+	return c;
+}
+
+/*
+int
+bset(Bits a, unsigned n)
+{
+	int i;
+
+	if(a.b[n/32] & (1L << (n%32)))
+		return 1;
+	return 0;
+}
+*/
+
+int
+Bconv(va_list *arg, Fconv *fp)
+{
+	char str[STRINGSZ], ss[STRINGSZ], *s;
+	Bits bits;
+	int i;
+
+	str[0] = 0;
+	bits = va_arg(*arg, Bits);
+	while(bany(&bits)) {
+		i = bnum(bits);
+		if(str[0])
+			strcat(str, " ");
+		if(var[i].sym == S) {
+			sprint(ss, "$%ld", var[i].offset);
+			s = ss;
+		} else
+			s = var[i].sym->name;
+		if(strlen(str) + strlen(s) + 1 >= STRINGSZ)
+			break;
+		strcat(str, s);
+		bits.b[i/32] &= ~(1L << (i%32));
+	}
+	strconv(str, fp);
+	return 0;
+}

+ 1102 - 0
sys/src/cmd/9c/cgen.c

@@ -0,0 +1,1102 @@
+#include "gc.h"
+
+void
+cgen(Node *n, Node *nn)
+{
+	Node *l, *r;
+	Prog *p1;
+	Node nod, nod1, nod2, nod3, nod4;
+	int o;
+	long v, curs;
+
+	if(debug['g']) {
+		prtree(nn, "cgen lhs");
+		prtree(n, "cgen");
+	}
+	if(n == Z || n->type == T)
+		return;
+	if(typesu[n->type->etype]) {
+		sugen(n, nn, n->type->width);
+		return;
+	}
+	l = n->left;
+	r = n->right;
+	o = n->op;
+	if(n->addable >= INDEXED) {
+		if(nn == Z) {
+			switch(o) {
+			default:
+				nullwarn(Z, Z);
+				break;
+			case OINDEX:
+				nullwarn(l, r);
+				break;
+			}
+			return;
+		}
+		gmove(n, nn);
+		return;
+	}
+	curs = cursafe;
+
+	if(n->complex >= FNX)
+	if(l->complex >= FNX)
+	if(r != Z && r->complex >= FNX)
+	switch(o) {
+	default:
+		regret(&nod, r);
+		cgen(r, &nod);
+
+		regsalloc(&nod1, r);
+		gopcode(OAS, &nod, Z, &nod1);
+
+		regfree(&nod);
+		nod = *n;
+		nod.right = &nod1;
+		cgen(&nod, nn);
+		return;
+
+	case OFUNC:
+	case OCOMMA:
+	case OANDAND:
+	case OOROR:
+	case OCOND:
+	case ODOT:
+		break;
+	}
+
+	switch(o) {
+	default:
+		diag(n, "unknown op in cgen: %O", o);
+		break;
+
+	case OAS:
+		if(l->op == OBIT)
+			goto bitas;
+		if(l->addable >= INDEXED) {
+			if(nn != Z || r->addable < INDEXED) {
+				regalloc(&nod, r, nn);
+				cgen(r, &nod);
+				gmove(&nod, l);
+				regfree(&nod);
+			} else
+				gmove(r, l);
+			break;
+		}
+		if(l->complex >= r->complex) {
+			reglcgen(&nod1, l, Z);
+			if(r->addable >= INDEXED) {
+				gmove(r, &nod1);
+				if(nn != Z)
+					gmove(r, nn);
+				regfree(&nod1);
+				break;
+			}
+			regalloc(&nod, r, nn);
+			cgen(r, &nod);
+		} else {
+			regalloc(&nod, r, nn);
+			cgen(r, &nod);
+			reglcgen(&nod1, l, Z);
+		}
+		gmove(&nod, &nod1);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	bitas:
+		n = l->left;
+		regalloc(&nod, r, nn);
+		if(l->complex >= r->complex) {
+			reglcgen(&nod1, n, Z);
+			cgen(r, &nod);
+		} else {
+			cgen(r, &nod);
+			reglcgen(&nod1, n, Z);
+		}
+		regalloc(&nod2, n, Z);
+		gopcode(OAS, &nod1, Z, &nod2);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+
+	case OBIT:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		bitload(n, &nod, Z, Z, nn);
+		gopcode(OAS, &nod, Z, nn);
+		regfree(&nod);
+		break;
+
+	case OXOR:
+		if(nn != Z)
+		if(r->op == OCONST && r->vconst == -1){
+			cgen(l, nn);
+			gopcode(OCOM, nn, Z, nn);
+			break;
+		}
+
+	case OADD:
+	case OSUB:
+	case OAND:
+	case OOR:
+	case OLSHR:
+	case OASHL:
+	case OASHR:
+		/*
+		 * immediate operands
+		 */
+		if(nn != Z &&
+		   r->op == OCONST &&
+		   !typefd[n->type->etype] &&
+		   immconst(r)) {
+			cgen(l, nn);
+			if(r->vconst == 0)
+			if(o != OAND)
+				break;
+			if(nn != Z)
+				gopcode(o, r, Z, nn);
+			break;
+		}
+
+	case OMUL:
+	case OLMUL:
+	case OLDIV:
+	case OLMOD:
+	case ODIV:
+	case OMOD:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		if(o == OMUL || o == OLMUL) {
+			if(mulcon(n, nn))
+				break;
+			if(debug['M'])
+				print("%L multiply\n", n->lineno);
+		}
+		if(l->complex >= r->complex) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			regalloc(&nod1, l, Z);		/* note: l used for type, so shifts work! */
+			cgen(r, &nod1);
+			gopcode(o, &nod1, Z, &nod);
+		} else {
+			regalloc(&nod, l, nn);		/* note: l used for type, so shifts work! */
+			cgen(r, &nod);
+			regalloc(&nod1, l, Z);
+			cgen(l, &nod1);
+			gopcode(o, &nod, &nod1, &nod);
+		}
+		gopcode(OAS, &nod, Z, nn);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	case OASLSHR:
+	case OASASHL:
+	case OASASHR:
+	case OASAND:
+	case OASADD:
+	case OASSUB:
+	case OASXOR:
+	case OASOR:
+		if(l->op == OBIT)
+			goto asbitop;
+		if(r->op == OCONST &&
+		   !typefd[n->type->etype] &&
+		   immconst(r)) {
+			if(l->addable < INDEXED)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+			regalloc(&nod, l, nn);		/* note: l used for type, so shifts work! */
+			gopcode(OAS, &nod2, Z, &nod);
+			gopcode(o, r, Z, &nod);
+			gopcode(OAS, &nod, Z, &nod2);
+	
+			regfree(&nod);
+			if(l->addable < INDEXED)
+				regfree(&nod2);
+			break;
+		}
+
+	case OASLMUL:
+	case OASLDIV:
+	case OASLMOD:
+	case OASMUL:
+	case OASDIV:
+	case OASMOD:
+		if(l->op == OBIT)
+			goto asbitop;
+		if(l->complex >= r->complex) {
+			if(l->addable < INDEXED)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+			regalloc(&nod, n, nn);
+			cgen(r, &nod);
+		} else {
+			regalloc(&nod, n, nn);
+			cgen(r, &nod);
+			if(l->addable < INDEXED)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+		}
+		regalloc(&nod1, n, Z);
+		gopcode(OAS, &nod2, Z, &nod1);
+		if(nod1.type->etype != nod.type->etype){
+			regalloc(&nod3, &nod, Z);
+			gmove(&nod1, &nod3);
+			regfree(&nod1);
+			nod1 = nod3;
+		}
+		gopcode(o, &nod, &nod1, &nod);
+		gmove(&nod, &nod2);
+		if(nn != Z)
+			gmove(&nod, nn);
+		regfree(&nod);
+		regfree(&nod1);
+		if(l->addable < INDEXED)
+			regfree(&nod2);
+		break;
+
+	asbitop:
+		regalloc(&nod4, n, nn);
+		regalloc(&nod3, r, Z);
+		if(l->complex >= r->complex) {
+			bitload(l, &nod, &nod1, &nod2, &nod4);
+			cgen(r, &nod3);
+		} else {
+			cgen(r, &nod3);
+			bitload(l, &nod, &nod1, &nod2, &nod4);
+		}
+		gmove(&nod, &nod4);
+		gopcode(n->op, &nod3, Z, &nod4);
+		regfree(&nod3);
+		gmove(&nod4, &nod);
+		regfree(&nod4);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+
+	case OADDR:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		lcgen(l, nn);
+		break;
+
+	case OFUNC:
+		if(l->complex >= FNX) {
+			if(l->op != OIND)
+				diag(n, "bad function call");
+
+			regret(&nod, l->left);
+			cgen(l->left, &nod);
+			regsalloc(&nod1, l->left);
+			gopcode(OAS, &nod, Z, &nod1);
+			regfree(&nod);
+
+			nod = *n;
+			nod.left = &nod2;
+			nod2 = *l;
+			nod2.left = &nod1;
+			nod2.complex = 1;
+			cgen(&nod, nn);
+
+			return;
+		}
+		o = reg[REGARG];
+		gargs(r, &nod, &nod1);
+		if(l->addable < INDEXED) {
+			reglcgen(&nod, l, Z);
+			gopcode(OFUNC, Z, Z, &nod);
+			regfree(&nod);
+		} else
+			gopcode(OFUNC, Z, Z, l);
+		if(REGARG>=0)
+			if(o != reg[REGARG])
+				reg[REGARG]--;
+		if(nn != Z) {
+			regret(&nod, n);
+			gopcode(OAS, &nod, Z, nn);
+			regfree(&nod);
+		}
+		break;
+
+	case OIND:
+		if(nn == Z) {
+			cgen(l, nn);
+			break;
+		}
+		regialloc(&nod, n, nn);
+		r = l;
+		while(r->op == OADD)
+			r = r->right;
+		if(sconst(r)) {
+			v = r->vconst;
+			r->vconst = 0;
+			cgen(l, &nod);
+			nod.xoffset += v;
+			r->vconst = v;
+		} else
+			cgen(l, &nod);
+		regind(&nod, n);
+		gopcode(OAS, &nod, Z, nn);
+		regfree(&nod);
+		break;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OLO:
+	case OLS:
+	case OHI:
+	case OHS:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		boolgen(n, 1, nn);
+		break;
+
+	case OANDAND:
+	case OOROR:
+		boolgen(n, 1, nn);
+		if(nn == Z)
+			patch(p, pc);
+		break;
+
+	case ONOT:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		boolgen(n, 1, nn);
+		break;
+
+	case OCOMMA:
+		cgen(l, Z);
+		cgen(r, nn);
+		break;
+
+	case OCAST:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		/*
+		 * convert from types l->n->nn
+		 */
+		if(nocast(l->type, n->type) && nocast(n->type, nn->type)) {
+			/* both null, gen l->nn */
+			cgen(l, nn);
+			break;
+		}
+		regalloc(&nod, l, nn);
+		cgen(l, &nod);
+		regalloc(&nod1, n, &nod);
+		gopcode(OAS, &nod, Z, &nod1);
+		gopcode(OAS, &nod1, Z, nn);
+		regfree(&nod1);
+		regfree(&nod);
+		break;
+
+	case ODOT:
+		sugen(l, nodrat, l->type->width);
+		if(nn != Z) {
+			warn(n, "non-interruptable temporary");
+			nod = *nodrat;
+			if(!r || r->op != OCONST) {
+				diag(n, "DOT and no offset");
+				break;
+			}
+			nod.xoffset += (long)r->vconst;
+			nod.type = n->type;
+			cgen(&nod, nn);
+		}
+		break;
+
+	case OCOND:
+		bcgen(l, 1);
+		p1 = p;
+		cgen(r->left, nn);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		cgen(r->right, nn);
+		patch(p1, pc);
+		break;
+
+	case OPOSTINC:
+	case OPOSTDEC:
+		v = 1;
+		if(l->type->etype == TIND)
+			v = l->type->link->width;
+		if(o == OPOSTDEC)
+			v = -v;
+		if(l->op == OBIT)
+			goto bitinc;
+		if(nn == Z)
+			goto pre;
+
+		if(l->addable < INDEXED)
+			reglcgen(&nod2, l, Z);
+		else
+			nod2 = *l;
+
+		regalloc(&nod, l, nn);
+		gopcode(OAS, &nod2, Z, &nod);
+		regalloc(&nod1, l, Z);
+		if(typefd[l->type->etype]) {
+			regalloc(&nod3, l, Z);
+			if(v < 0) {
+				gopcode(OAS, nodfconst(-v), Z, &nod3);
+				gopcode(OSUB, &nod3, &nod, &nod1);
+			} else {
+				gopcode(OAS, nodfconst(v), Z, &nod3);
+				gopcode(OADD, &nod3, &nod, &nod1);
+			}
+			regfree(&nod3);
+		} else
+			gopcode(OADD, nodconst(v), &nod, &nod1);
+		gopcode(OAS, &nod1, Z, &nod2);
+
+		regfree(&nod);
+		regfree(&nod1);
+		if(l->addable < INDEXED)
+			regfree(&nod2);
+		break;
+
+	case OPREINC:
+	case OPREDEC:
+		v = 1;
+		if(l->type->etype == TIND)
+			v = l->type->link->width;
+		if(o == OPREDEC)
+			v = -v;
+		if(l->op == OBIT)
+			goto bitinc;
+
+	pre:
+		if(l->addable < INDEXED)
+			reglcgen(&nod2, l, Z);
+		else
+			nod2 = *l;
+
+		regalloc(&nod, l, nn);
+		gopcode(OAS, &nod2, Z, &nod);
+		if(typefd[l->type->etype]) {
+			regalloc(&nod3, l, Z);
+			if(v < 0) {
+				gopcode(OAS, nodfconst(-v), Z, &nod3);
+				gopcode(OSUB, &nod3, Z, &nod);
+			} else {
+				gopcode(OAS, nodfconst(v), Z, &nod3);
+				gopcode(OADD, &nod3, Z, &nod);
+			}
+			regfree(&nod3);
+		} else
+			gopcode(OADD, nodconst(v), Z, &nod);
+		gopcode(OAS, &nod, Z, &nod2);
+		if(nn && l->op == ONAME)	/* in x=++i, emit USED(i) */
+			gins(ANOP, l, Z);
+
+		regfree(&nod);
+		if(l->addable < INDEXED)
+			regfree(&nod2);
+		break;
+
+	bitinc:
+		if(nn != Z && (o == OPOSTINC || o == OPOSTDEC)) {
+			bitload(l, &nod, &nod1, &nod2, Z);
+			gopcode(OAS, &nod, Z, nn);
+			gopcode(OADD, nodconst(v), Z, &nod);
+			bitstore(l, &nod, &nod1, &nod2, Z);
+			break;
+		}
+		bitload(l, &nod, &nod1, &nod2, nn);
+		gopcode(OADD, nodconst(v), Z, &nod);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+	}
+	cursafe = curs;
+}
+
+void
+reglcgen(Node *t, Node *n, Node *nn)
+{
+	Node *r;
+	long v;
+
+	regialloc(t, n, nn);
+	if(n->op == OIND) {
+		r = n->left;
+		while(r->op == OADD)
+			r = r->right;
+		if(sconst(r)) {
+			v = r->vconst;
+			r->vconst = 0;
+			lcgen(n, t);
+			t->xoffset += v;
+			r->vconst = v;
+			regind(t, n);
+			return;
+		}
+	}
+	lcgen(n, t);
+	regind(t, n);
+}
+
+void
+lcgen(Node *n, Node *nn)
+{
+	Prog *p1;
+	Node nod;
+
+	if(debug['g']) {
+		prtree(nn, "lcgen lhs");
+		prtree(n, "lcgen");
+	}
+	if(n == Z || n->type == T)
+		return;
+	if(nn == Z) {
+		nn = &nod;
+		regalloc(&nod, n, Z);
+	}
+	switch(n->op) {
+	default:
+		if(n->addable < INDEXED) {
+			diag(n, "unknown op in lcgen: %O", n->op);
+			break;
+		}
+		nod = *n;
+		nod.op = OADDR;
+		nod.left = n;
+		nod.right = Z;
+		nod.type = types[TIND];
+		gopcode(OAS, &nod, Z, nn);
+		break;
+
+	case OCOMMA:
+		cgen(n->left, n->left);
+		lcgen(n->right, nn);
+		break;
+
+	case OIND:
+		cgen(n->left, nn);
+		break;
+
+	case OCOND:
+		bcgen(n->left, 1);
+		p1 = p;
+		lcgen(n->right->left, nn);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		lcgen(n->right->right, nn);
+		patch(p1, pc);
+		break;
+	}
+}
+
+void
+bcgen(Node *n, int true)
+{
+
+	if(n->type == T)
+		gbranch(OGOTO);
+	else
+		boolgen(n, true, Z);
+}
+
+void
+boolgen(Node *n, int true, Node *nn)
+{
+	int o;
+	Prog *p1, *p2;
+	Node *l, *r, nod, nod1;
+	long curs;
+
+	if(debug['g']) {
+		prtree(nn, "boolgen lhs");
+		prtree(n, "boolgen");
+	}
+	curs = cursafe;
+	l = n->left;
+	r = n->right;
+	switch(n->op) {
+
+	default:
+		if(n->op == OCONST) {
+			o = vconst(n);
+			if(!true)
+				o = !o;
+			gbranch(OGOTO);
+			if(o) {
+				p1 = p;
+				gbranch(OGOTO);
+				patch(p1, pc);
+			}
+			goto com;
+		}
+		regalloc(&nod, n, nn);
+		cgen(n, &nod);
+		o = ONE;
+		if(true)
+			o = comrel[relindex(o)];
+		if(typefd[n->type->etype]) {
+			nodreg(&nod1, n, NREG+FREGZERO);
+			gopcode(o, &nod, Z, &nod1);
+		} else
+			gopcode(o, &nod, Z, nodconst(0));
+		regfree(&nod);
+		goto com;
+
+	case OCOMMA:
+		cgen(l, Z);
+		boolgen(r, true, nn);
+		break;
+
+	case ONOT:
+		boolgen(l, !true, nn);
+		break;
+
+	case OCOND:
+		bcgen(l, 1);
+		p1 = p;
+		bcgen(r->left, true);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		bcgen(r->right, !true);
+		patch(p2, pc);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		patch(p2, pc);
+		goto com;
+
+	case OANDAND:
+		if(!true)
+			goto caseor;
+
+	caseand:
+		bcgen(l, true);
+		p1 = p;
+		bcgen(r, !true);
+		p2 = p;
+		patch(p1, pc);
+		gbranch(OGOTO);
+		patch(p2, pc);
+		goto com;
+
+	case OOROR:
+		if(!true)
+			goto caseand;
+
+	caseor:
+		bcgen(l, !true);
+		p1 = p;
+		bcgen(r, !true);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		patch(p2, pc);
+		goto com;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		o = n->op;
+		if(true)
+			o = comrel[relindex(o)];
+		if(l->complex >= FNX && r->complex >= FNX) {
+			regret(&nod, r);
+			cgen(r, &nod);
+			regsalloc(&nod1, r);
+			gopcode(OAS, &nod, Z, &nod1);
+			regfree(&nod);
+			nod = *n;
+			nod.right = &nod1;
+			boolgen(&nod, true, nn);
+			break;
+		}
+		if(sconst(r)) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			gopcode(o, &nod, Z, r);
+			regfree(&nod);
+			goto com;
+		}
+		if(l->complex >= r->complex) {
+			regalloc(&nod1, l, nn);
+			cgen(l, &nod1);
+			regalloc(&nod, r, Z);
+			cgen(r, &nod);
+		} else {
+			regalloc(&nod, r, nn);
+			cgen(r, &nod);
+			regalloc(&nod1, l, Z);
+			cgen(l, &nod1);
+		}
+		gopcode(o, &nod1, Z, &nod);
+		regfree(&nod);
+		regfree(&nod1);
+
+	com:
+		if(nn != Z) {
+			p1 = p;
+			gopcode(OAS, nodconst(1L), Z, nn);
+			gbranch(OGOTO);
+			p2 = p;
+			patch(p1, pc);
+			gopcode(OAS, nodconst(0L), Z, nn);
+			patch(p2, pc);
+		}
+		break;
+	}
+	cursafe = curs;
+}
+
+void
+sugen(Node *n, Node *nn, long w)
+{
+	Prog *p1;
+	Node nod0, nod1, nod2, nod3, nod4, *l, *r;
+	Type *t;
+	long pc1;
+	int i, m, c;
+
+	if(n == Z || n->type == T)
+		return;
+	if(debug['g']) {
+		prtree(nn, "sugen lhs");
+		prtree(n, "sugen");
+	}
+	if(nn == nodrat)
+		if(w > nrathole)
+			nrathole = w;
+	switch(n->op) {
+	case OIND:
+		if(nn == Z) {
+			nullwarn(n->left, Z);
+			break;
+		}
+
+	default:
+		goto copy;
+
+	case OCONST:
+		if(n->type && typev[n->type->etype]) {
+			if(nn == Z) {
+				nullwarn(n->left, Z);
+				break;
+			}
+
+			t = nn->type;
+			nn->type = types[TLONG];
+			reglcgen(&nod1, nn, Z);
+			nn->type = t;
+
+			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
+			else
+				gopcode(OAS, nod32const(n->vconst), Z, &nod1);
+			nod1.xoffset += SZ_LONG;
+			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+				gopcode(OAS, nod32const(n->vconst), Z, &nod1);
+			else
+				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
+
+			regfree(&nod1);
+			break;
+		}
+		goto copy;
+
+	case ODOT:
+		l = n->left;
+		sugen(l, nodrat, l->type->width);
+		if(nn != Z) {
+			warn(n, "non-interruptable temporary");
+			nod1 = *nodrat;
+			r = n->right;
+			if(!r || r->op != OCONST) {
+				diag(n, "DOT and no offset");
+				break;
+			}
+			nod1.xoffset += (long)r->vconst;
+			nod1.type = n->type;
+			sugen(&nod1, nn, w);
+		}
+		break;
+
+	case OSTRUCT:
+		/*
+		 * rewrite so lhs has no side effects
+		 */
+		if(nn != Z && side(nn)) {
+			nod1 = *n;
+			nod1.type = typ(TIND, n->type);
+			regalloc(&nod2, &nod1, Z);
+			lcgen(nn, &nod2);
+			regsalloc(&nod0, &nod1);
+			gopcode(OAS, &nod2, Z, &nod0);
+			regfree(&nod2);
+
+			nod1 = *n;
+			nod1.op = OIND;
+			nod1.left = &nod0;
+			nod1.right = Z;
+			nod1.complex = 1;
+
+			sugen(n, &nod1, w);
+			return;
+		}
+
+		r = n->left;
+		for(t = n->type->link; t != T; t = t->down) {
+			l = r;
+			if(r->op == OLIST) {
+				l = r->left;
+				r = r->right;
+			}
+			if(nn == Z) {
+				cgen(l, nn);
+				continue;
+			}
+			/*
+			 * hand craft *(&nn + o) = l
+			 */
+			nod0 = znode;
+			nod0.op = OAS;
+			nod0.type = t;
+			nod0.left = &nod1;
+			nod0.right = l;
+
+			nod1 = znode;
+			nod1.op = OIND;
+			nod1.type = t;
+			nod1.left = &nod2;
+
+			nod2 = znode;
+			nod2.op = OADD;
+			nod2.type = typ(TIND, t);
+			nod2.left = &nod3;
+			nod2.right = &nod4;
+
+			nod3 = znode;
+			nod3.op = OADDR;
+			nod3.type = nod2.type;
+			nod3.left = nn;
+
+			nod4 = znode;
+			nod4.op = OCONST;
+			nod4.type = nod2.type;
+			nod4.vconst = t->offset;
+
+			ccom(&nod0);
+			acom(&nod0);
+			xcom(&nod0);
+			nod0.addable = 0;
+
+			/* prtree(&nod0, "hand craft"); /* */
+			cgen(&nod0, Z);
+		}
+		break;
+
+	case OAS:
+		if(nn == Z) {
+			if(n->addable < INDEXED)
+				sugen(n->right, n->left, w);
+			break;
+		}
+		/* BOTCH -- functions can clobber rathole */
+		sugen(n->right, nodrat, w);
+		warn(n, "non-interruptable temporary");
+		sugen(nodrat, n->left, w);
+		sugen(nodrat, nn, w);
+		break;
+
+	case OFUNC:
+		if(nn == Z) {
+			sugen(n, nodrat, w);
+			break;
+		}
+		if(nn->op != OIND) {
+			nn = new1(OADDR, nn, Z);
+			nn->type = types[TIND];
+			nn->addable = 0;
+		} else
+			nn = nn->left;
+		n = new(OFUNC, n->left, new(OLIST, nn, n->right));
+		n->type = types[TVOID];
+		n->left->type = types[TVOID];
+		cgen(n, Z);
+		break;
+
+	case OCOND:
+		bcgen(n->left, 1);
+		p1 = p;
+		sugen(n->right->left, nn, w);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		sugen(n->right->right, nn, w);
+		patch(p1, pc);
+		break;
+
+	case OCOMMA:
+		cgen(n->left, Z);
+		sugen(n->right, nn, w);
+		break;
+	}
+	return;
+
+copy:
+	if(nn == Z)
+		return;
+	if(n->complex >= FNX && nn->complex >= FNX) {
+		t = nn->type;
+		nn->type = types[TLONG];
+		regialloc(&nod1, nn, Z);
+		lcgen(nn, &nod1);
+		regsalloc(&nod2, nn);
+		nn->type = t;
+
+		gopcode(OAS, &nod1, Z, &nod2);
+		regfree(&nod1);
+
+		nod2.type = typ(TIND, t);
+
+		nod1 = nod2;
+		nod1.op = OIND;
+		nod1.left = &nod2;
+		nod1.right = Z;
+		nod1.complex = 1;
+		nod1.type = t;
+
+		sugen(n, &nod1, w);
+		return;
+	}
+
+	if(n->complex > nn->complex) {
+		t = n->type;
+		n->type = types[TLONG];
+		reglcgen(&nod1, n, Z);
+		n->type = t;
+
+		t = nn->type;
+		nn->type = types[TLONG];
+		reglcgen(&nod2, nn, Z);
+		nn->type = t;
+	} else {
+		t = nn->type;
+		nn->type = types[TLONG];
+		reglcgen(&nod2, nn, Z);
+		nn->type = t;
+
+		t = n->type;
+		n->type = types[TLONG];
+		reglcgen(&nod1, n, Z);
+		n->type = t;
+	}
+
+	w /= SZ_LONG;
+	if(w <= 5) {
+		layout(&nod1, &nod2, w, 0, Z);
+		goto out;
+	}
+
+	/*
+	 * minimize space for unrolling loop
+	 * 3,4,5 times. (6 or more is never minimum)
+	 * if small structure, try 2 also.
+	 */
+	c = 0; /* set */
+	m = 100;
+	i = 3;
+	if(w <= 15)
+		i = 2;
+	for(; i<=5; i++)
+		if(i + w%i <= m) {
+			c = i;
+			m = c + w%c;
+		}
+
+	regalloc(&nod3, &regnode, Z);
+	layout(&nod1, &nod2, w%c, w/c, &nod3);
+	
+	pc1 = pc;
+	layout(&nod1, &nod2, c, 0, Z);
+
+	gopcode(OSUB, nodconst(1L), Z, &nod3);
+	nod1.op = OREGISTER;
+	gopcode(OADD, nodconst(c*SZ_LONG), Z, &nod1);
+	nod2.op = OREGISTER;
+	gopcode(OADD, nodconst(c*SZ_LONG), Z, &nod2);
+	
+	gopcode(OGT, &nod3, Z, nodconst(0));
+	patch(p, pc1);
+
+	regfree(&nod3);
+out:
+	regfree(&nod1);
+	regfree(&nod2);
+}
+
+void
+layout(Node *f, Node *t, int c, int cv, Node *cn)
+{
+	Node t1, t2;
+
+	while(c > 3) {
+		layout(f, t, 2, 0, Z);
+		c -= 2;
+	}
+
+	regalloc(&t1, &regnode, Z);
+	regalloc(&t2, &regnode, Z);
+	if(c > 0) {
+		gopcode(OAS, f, Z, &t1);
+		f->xoffset += SZ_LONG;
+	}
+	if(cn != Z)
+		gopcode(OAS, nodconst(cv), Z, cn);
+	if(c > 1) {
+		gopcode(OAS, f, Z, &t2);
+		f->xoffset += SZ_LONG;
+	}
+	if(c > 0) {
+		gopcode(OAS, &t1, Z, t);
+		t->xoffset += SZ_LONG;
+	}
+	if(c > 2) {
+		gopcode(OAS, f, Z, &t1);
+		f->xoffset += SZ_LONG;
+	}
+	if(c > 1) {
+		gopcode(OAS, &t2, Z, t);
+		t->xoffset += SZ_LONG;
+	}
+	if(c > 2) {
+		gopcode(OAS, &t1, Z, t);
+		t->xoffset += SZ_LONG;
+	}
+	regfree(&t1);
+	regfree(&t2);
+}

+ 309 - 0
sys/src/cmd/9c/enam.c

@@ -0,0 +1,309 @@
+char	*anames[] =
+{
+	"XXX",
+	"ADD",
+	"ADDCC",
+	"ADDV",
+	"ADDVCC",
+	"ADDC",
+	"ADDCCC",
+	"ADDCV",
+	"ADDCVCC",
+	"ADDME",
+	"ADDMECC",
+	"ADDMEVCC",
+	"ADDMEV",
+	"ADDE",
+	"ADDECC",
+	"ADDEVCC",
+	"ADDEV",
+	"ADDZE",
+	"ADDZECC",
+	"ADDZEVCC",
+	"ADDZEV",
+	"AND",
+	"ANDCC",
+	"ANDN",
+	"ANDNCC",
+	"BC",
+	"BCL",
+	"BEQ",
+	"BGE",
+	"BGT",
+	"BL",
+	"BLE",
+	"BLT",
+	"BNE",
+	"BR",
+	"BVC",
+	"BVS",
+	"CMP",
+	"CMPU",
+	"CNTLZW",
+	"CNTLZWCC",
+	"CRAND",
+	"CRANDN",
+	"CREQV",
+	"CRNAND",
+	"CRNOR",
+	"CROR",
+	"CRORN",
+	"CRXOR",
+	"DIVW",
+	"DIVWCC",
+	"DIVWVCC",
+	"DIVWV",
+	"DIVWU",
+	"DIVWUCC",
+	"DIVWUVCC",
+	"DIVWUV",
+	"EQV",
+	"EQVCC",
+	"EXTSB",
+	"EXTSBCC",
+	"EXTSH",
+	"EXTSHCC",
+	"FABS",
+	"FABSCC",
+	"FADD",
+	"FADDCC",
+	"FADDS",
+	"FADDSCC",
+	"FCMPO",
+	"FCMPU",
+	"FCTIW",
+	"FCTIWCC",
+	"FCTIWZ",
+	"FCTIWZCC",
+	"FDIV",
+	"FDIVCC",
+	"FDIVS",
+	"FDIVSCC",
+	"FMADD",
+	"FMADDCC",
+	"FMADDS",
+	"FMADDSCC",
+	"FMOVD",
+	"FMOVDCC",
+	"FMOVDU",
+	"FMOVS",
+	"FMOVSU",
+	"FMSUB",
+	"FMSUBCC",
+	"FMSUBS",
+	"FMSUBSCC",
+	"FMUL",
+	"FMULCC",
+	"FMULS",
+	"FMULSCC",
+	"FNABS",
+	"FNABSCC",
+	"FNEG",
+	"FNEGCC",
+	"FNMADD",
+	"FNMADDCC",
+	"FNMADDS",
+	"FNMADDSCC",
+	"FNMSUB",
+	"FNMSUBCC",
+	"FNMSUBS",
+	"FNMSUBSCC",
+	"FRSP",
+	"FRSPCC",
+	"FSUB",
+	"FSUBCC",
+	"FSUBS",
+	"FSUBSCC",
+	"MOVMW",
+	"LSW",
+	"LWAR",
+	"MOVWBR",
+	"MOVB",
+	"MOVBU",
+	"MOVBZ",
+	"MOVBZU",
+	"MOVH",
+	"MOVHBR",
+	"MOVHU",
+	"MOVHZ",
+	"MOVHZU",
+	"MOVW",
+	"MOVWU",
+	"MOVFL",
+	"MOVCRFS",
+	"MTFSB0",
+	"MTFSB0CC",
+	"MTFSB1",
+	"MTFSB1CC",
+	"MULHW",
+	"MULHWCC",
+	"MULHWU",
+	"MULHWUCC",
+	"MULLW",
+	"MULLWCC",
+	"MULLWVCC",
+	"MULLWV",
+	"NAND",
+	"NANDCC",
+	"NEG",
+	"NEGCC",
+	"NEGVCC",
+	"NEGV",
+	"NOR",
+	"NORCC",
+	"OR",
+	"ORCC",
+	"ORN",
+	"ORNCC",
+	"REM",
+	"REMCC",
+	"REMV",
+	"REMVCC",
+	"REMU",
+	"REMUCC",
+	"REMUV",
+	"REMUVCC",
+	"RFI",
+	"RLWMI",
+	"RLWMICC",
+	"RLWNM",
+	"RLWNMCC",
+	"SLW",
+	"SLWCC",
+	"SRW",
+	"SRAW",
+	"SRAWCC",
+	"SRWCC",
+	"STSW",
+	"STWCCC",
+	"SUB",
+	"SUBCC",
+	"SUBVCC",
+	"SUBC",
+	"SUBCCC",
+	"SUBCV",
+	"SUBCVCC",
+	"SUBME",
+	"SUBMECC",
+	"SUBMEVCC",
+	"SUBMEV",
+	"SUBV",
+	"SUBE",
+	"SUBECC",
+	"SUBEV",
+	"SUBEVCC",
+	"SUBZE",
+	"SUBZECC",
+	"SUBZEVCC",
+	"SUBZEV",
+	"SYNC",
+	"XOR",
+	"XORCC",
+	"DCBF",
+	"DCBI",
+	"DCBST",
+	"DCBT",
+	"DCBTST",
+	"DCBZ",
+	"ECIWX",
+	"ECOWX",
+	"EIEIO",
+	"ICBI",
+	"ISYNC",
+	"PTESYNC",
+	"TLBIE",
+	"TLBIEL",
+	"TLBSYNC",
+	"TW",
+	"SYSCALL",
+	"DATA",
+	"GLOBL",
+	"GOK",
+	"HISTORY",
+	"NAME",
+	"NOP",
+	"RETURN",
+	"TEXT",
+	"WORD",
+	"END",
+	"DYNT",
+	"INIT",
+	"SIGNAME",
+	"RFCI",
+	"FRES",
+	"FRESCC",
+	"FRSQRTE",
+	"FRSQRTECC",
+	"FSEL",
+	"FSELCC",
+	"FSQRT",
+	"FSQRTCC",
+	"FSQRTS",
+	"FSQRTSCC",
+	"CNTLZD",
+	"CNTLZDCC",
+	"CMPW",
+	"CMPWU",
+	"DIVD",
+	"DIVDCC",
+	"DIVDVCC",
+	"DIVDV",
+	"DIVDU",
+	"DIVDUCC",
+	"DIVDUVCC",
+	"DIVDUV",
+	"EXTSW",
+	"EXTSWCC",
+	"FCFID",
+	"FCFIDCC",
+	"FCTID",
+	"FCTIDCC",
+	"FCTIDZ",
+	"FCTIDZCC",
+	"LDAR",
+	"MOVD",
+	"MOVDU",
+	"MOVWZ",
+	"MOVWZU",
+	"MULHD",
+	"MULHDCC",
+	"MULHDU",
+	"MULHDUCC",
+	"MULLD",
+	"MULLDCC",
+	"MULLDVCC",
+	"MULLDV",
+	"RFID",
+	"RLDMI",
+	"RLDMICC",
+	"RLDC",
+	"RLDCCC",
+	"RLDCR",
+	"RLDCRCC",
+	"RLDCL",
+	"RLDCLCC",
+	"SLBIA",
+	"SLBIE",
+	"SLBMFEE",
+	"SLBMFEV",
+	"SLBMTE",
+	"SLD",
+	"SLDCC",
+	"SRD",
+	"SRAD",
+	"SRADCC",
+	"SRDCC",
+	"STDCCC",
+	"TD",
+	"DWORD",
+	"REMD",
+	"REMDCC",
+	"REMDV",
+	"REMDVCC",
+	"REMDU",
+	"REMDUCC",
+	"REMDUV",
+	"REMDUVCC",
+	"HRFID",
+	"LAST",
+};

+ 352 - 0
sys/src/cmd/9c/gc.h

@@ -0,0 +1,352 @@
+#include	"../cc/cc.h"
+#include	"../9c/9.out.h"
+
+/*
+ * 9c/powerpc64
+ */
+#define	SZ_CHAR		1
+#define	SZ_SHORT	2
+#define	SZ_INT		4
+#define	SZ_LONG		4
+#define	SZ_IND		8
+#define	SZ_FLOAT	4
+#define	SZ_VLONG	8
+#define	SZ_DOUBLE	8
+#define	FNX		100
+
+typedef	struct	Adr	Adr;
+typedef	struct	Prog	Prog;
+typedef	struct	Case	Case;
+typedef	struct	C1	C1;
+typedef	struct	Multab	Multab;
+typedef	struct	Hintab	Hintab;
+typedef	struct	Var	Var;
+typedef	struct	Reg	Reg;
+typedef	struct	Rgn	Rgn;
+
+struct	Adr
+{
+	union
+	{
+		vlong	offset;
+		double	dval;
+		char	sval[NSNAME];
+	};
+	Sym*	sym;
+	char	type;
+	char	reg;
+	char	name;
+	char	etype;
+};
+#define	A	((Adr*)0)
+
+#define	INDEXED	9
+struct	Prog
+{
+	Adr	from;
+	Adr	from3;		/* third argument for fmadd, fmsub, ... */
+	Adr	to;
+	Prog*	link;
+	long	lineno;
+	short	as;
+	char	reg;
+};
+#define	P	((Prog*)0)
+
+struct	Case
+{
+	Case*	link;
+	vlong	val;
+	long	label;
+	char	def;
+	char isv;
+};
+#define	C	((Case*)0)
+
+struct	C1
+{
+	vlong	val;
+	long	label;
+};
+
+struct	Multab
+{
+	long	val;
+	char	code[20];
+};
+
+struct	Hintab
+{
+	ushort	val;
+	char	hint[10];
+};
+
+struct	Var
+{
+	vlong	offset;
+	Sym*	sym;
+	char	name;
+	char	etype;
+};
+
+struct	Reg
+{
+	long	pc;
+	long	rpo;		/* reverse post ordering */
+
+	Bits	set;
+	Bits	use1;
+	Bits	use2;
+
+	Bits	refbehind;
+	Bits	refahead;
+	Bits	calbehind;
+	Bits	calahead;
+	Bits	regdiff;
+	Bits	act;
+
+	long	regu;
+	long	loop;		/* could be shorter */
+
+	union
+	{
+		Reg*	log5;
+		long	active;
+	};
+	Reg*	p1;
+	Reg*	p2;
+	Reg*	p2link;
+	Reg*	s1;
+	Reg*	s2;
+	Reg*	link;
+	Prog*	prog;
+};
+#define	R	((Reg*)0)
+
+#define	NRGN	600
+struct	Rgn
+{
+	Reg*	enter;
+	short	cost;
+	short	varno;
+	short	regno;
+};
+
+EXTERN	long	breakpc;
+EXTERN	long	nbreak;
+EXTERN	Case*	cases;
+EXTERN	Node	constnode;
+EXTERN	Node	fconstnode;
+EXTERN	Node	vconstnode;
+EXTERN	long	continpc;
+EXTERN	long	curarg;
+EXTERN	long	cursafe;
+EXTERN	Prog*	firstp;
+EXTERN	Prog*	lastp;
+EXTERN	int	hintabsize;
+EXTERN	long	maxargsafe;
+EXTERN	Multab	multab[20];
+EXTERN	int	mnstring;
+EXTERN	Node*	nodrat;
+EXTERN	Node*	nodret;
+EXTERN	Node*	nodsafe;
+EXTERN	long	nrathole;
+EXTERN	long	nstring;
+EXTERN	Prog*	p;
+EXTERN	long	pc;
+EXTERN	Node	regnode;
+EXTERN	Node	qregnode;
+EXTERN	char	string[NSNAME];
+EXTERN	Sym*	symrathole;
+EXTERN	Node	znode;
+EXTERN	Prog	zprog;
+EXTERN	int	reg[NREG+NREG];
+EXTERN	long	exregoffset;
+EXTERN	long	exfregoffset;
+EXTERN	uchar	typechlpv[NTYPE];
+
+#define	BLOAD(r)	band(bnot(r->refbehind), r->refahead)
+#define	BSTORE(r)	band(bnot(r->calbehind), r->calahead)
+#define	LOAD(r)		(~r->refbehind.b[z] & r->refahead.b[z])
+#define	STORE(r)	(~r->calbehind.b[z] & r->calahead.b[z])
+
+#define	bset(a,n)	((a).b[(n)/32]&(1L<<(n)%32))
+
+#define	CLOAD	5
+#define	CREF	5
+#define	CINF	1000
+#define	LOOP	3
+
+EXTERN	Rgn	region[NRGN];
+EXTERN	Rgn*	rgp;
+EXTERN	int	nregion;
+EXTERN	int	nvar;
+
+EXTERN	Bits	externs;
+EXTERN	Bits	params;
+EXTERN	Bits	consts;
+EXTERN	Bits	addrs;
+
+EXTERN	long	regbits;
+EXTERN	long	exregbits;
+
+EXTERN	int	change;
+EXTERN	int	suppress;
+
+EXTERN	Reg*	firstr;
+EXTERN	Reg*	lastr;
+EXTERN	Reg	zreg;
+EXTERN	Reg*	freer;
+EXTERN	Var	var[NVAR];
+EXTERN	long*	idom;
+EXTERN	Reg**	rpo2r;
+EXTERN	long	maxnr;
+
+#define	R0ISZERO	(debug['0']==0)
+
+extern	char*	anames[];
+extern	Hintab	hintab[];
+
+/*
+ * sgen.c
+ */
+void	codgen(Node*, Node*);
+void	gen(Node*);
+void	usedset(Node*, int);
+void	noretval(int);
+void	xcom(Node*);
+int	bcomplex(Node*, Node*);
+
+/*
+ * cgen.c
+ */
+void	cgen(Node*, Node*);
+void	reglcgen(Node*, Node*, Node*);
+void	lcgen(Node*, Node*);
+void	bcgen(Node*, int);
+void	boolgen(Node*, int, Node*);
+void	sugen(Node*, Node*, long);
+void	layout(Node*, Node*, int, int, Node*);
+
+/*
+ * txt.c
+ */
+void	ginit(void);
+void	gclean(void);
+void	nextpc(void);
+void	gargs(Node*, Node*, Node*);
+void	garg1(Node*, Node*, Node*, int, Node**);
+Node*	nodconst(long);
+Node*	nod32const(vlong);
+Node*	nodfconst(double);
+void	nodreg(Node*, Node*, int);
+void	regret(Node*, Node*);
+void	regalloc(Node*, Node*, Node*);
+void	regfree(Node*);
+void	regialloc(Node*, Node*, Node*);
+void	regsalloc(Node*, Node*);
+void	regaalloc1(Node*, Node*);
+void	regaalloc(Node*, Node*);
+void	regind(Node*, Node*);
+void	gprep(Node*, Node*);
+void	raddr(Node*, Prog*);
+void	naddr(Node*, Adr*);
+void	gmove(Node*, Node*);
+void	gins(int a, Node*, Node*);
+void	gopcode(int, Node*, Node*, Node*);
+int	samaddr(Node*, Node*);
+void	gbranch(int);
+int	immconst(Node*);
+void	patch(Prog*, long);
+int	sconst(Node*);
+int	sval(long);
+int	uconst(Node*);
+void	gpseudo(int, Sym*, Node*);
+
+/*
+ * swt.c
+ */
+int	swcmp(void*, void*);
+void	doswit(Node*);
+void	swit1(C1*, int, long, Node*);
+void	swit2(C1*, int, long, Node*, Node*);
+void	casf(void);
+void	bitload(Node*, Node*, Node*, Node*, Node*);
+void	bitstore(Node*, Node*, Node*, Node*, Node*);
+long	outstring(char*, long);
+int	mulcon(Node*, Node*);
+Multab*	mulcon0(Node*, long);
+int	mulcon1(Node*, long, Node*);
+void	nullwarn(Node*, Node*);
+void	sextern(Sym*, Node*, long, long);
+void	gextern(Sym*, Node*, long, long);
+void	outcode(void);
+void	ieeedtod(Ieee*, double);
+
+/*
+ * list
+ */
+void	listinit(void);
+int	Pconv(Fmt*);
+int	Aconv(Fmt*);
+int	Dconv(Fmt*);
+int	Sconv(Fmt*);
+int	Nconv(Fmt*);
+int	Bconv(Fmt*);
+
+/*
+ * reg.c
+ */
+Reg*	rega(void);
+int	rcmp(void*, void*);
+void	regopt(Prog*);
+void	addmove(Reg*, int, int, int);
+Bits	mkvar(Adr*, int);
+void	prop(Reg*, Bits, Bits);
+void	loopit(Reg*, long);
+void	synch(Reg*, Bits);
+ulong	allreg(ulong, Rgn*);
+void	paint1(Reg*, int);
+ulong	paint2(Reg*, int);
+void	paint3(Reg*, int, long, int);
+void	addreg(Adr*, int);
+
+/*
+ * peep.c
+ */
+void	peep(void);
+void	excise(Reg*);
+Reg*	uniqp(Reg*);
+Reg*	uniqs(Reg*);
+int	regtyp(Adr*);
+int	regzer(Adr*);
+int	anyvar(Adr*);
+int	subprop(Reg*);
+int	copyprop(Reg*);
+int	copy1(Adr*, Adr*, Reg*, int);
+int	copyu(Prog*, Adr*, Adr*);
+
+int	copyas(Adr*, Adr*);
+int	copyau(Adr*, Adr*);
+int	copyau1(Prog*, Adr*);
+int	copysub(Adr*, Adr*, Adr*, int);
+int	copysub1(Prog*, Adr*, Adr*, int);
+
+long	RtoB(int);
+long	FtoB(int);
+int	BtoR(long);
+int	BtoF(long);
+
+/*
+ * com64.c
+ */
+int	com64(Node*);
+void	com64init(void);
+void	bool64(Node*);
+
+#pragma	varargck	type	"A"	int
+#pragma	varargck	type	"B"	Bits
+#pragma	varargck	type	"D"	Adr*
+#pragma	varargck	type	"N"	Adr*
+#pragma	varargck	type	"P"	Prog*
+#pragma	varargck	type	"S"	char*

+ 229 - 0
sys/src/cmd/9c/list.c

@@ -0,0 +1,229 @@
+#define EXTERN
+#include "gc.h"
+
+void
+listinit(void)
+{
+
+	fmtinstall('A', Aconv);
+	fmtinstall('P', Pconv);
+	fmtinstall('S', Sconv);
+	fmtinstall('N', Nconv);
+	fmtinstall('D', Dconv);
+	fmtinstall('B', Bconv);
+}
+
+int
+Bconv(Fmt *fp)
+{
+	char str[STRINGSZ], ss[STRINGSZ], *s;
+	Bits bits;
+	int i;
+
+	str[0] = 0;
+	bits = va_arg(fp->args, Bits);
+	while(bany(&bits)) {
+		i = bnum(bits);
+		if(str[0])
+			strcat(str, " ");
+		if(var[i].sym == S) {
+			sprint(ss, "$%lld", var[i].offset);
+			s = ss;
+		} else
+			s = var[i].sym->name;
+		if(strlen(str) + strlen(s) + 1 >= STRINGSZ)
+			break;
+		strcat(str, s);
+		bits.b[i/32] &= ~(1L << (i%32));
+	}
+	return fmtstrcpy(fp, str);
+}
+
+int
+Pconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Prog *p;
+	int a;
+
+	p = va_arg(fp->args, Prog*);
+	a = p->as;
+	if(a == ADATA)
+		sprint(str, "	%A	%D/%d,%D", a, &p->from, p->reg, &p->to);
+	else
+	if(p->as == ATEXT)
+		sprint(str, "	%A	%D,%d,%D", a, &p->from, p->reg, &p->to);
+	else
+	if(p->reg == NREG)
+		sprint(str, "	%A	%D,%D", a, &p->from, &p->to);
+	else
+	if(p->from.type != D_FREG)
+		sprint(str, "	%A	%D,R%d,%D", a, &p->from, p->reg, &p->to);
+	else
+		sprint(str, "	%A	%D,F%d,%D", a, &p->from, p->reg, &p->to);
+	return fmtstrcpy(fp, str);
+}
+
+int
+Aconv(Fmt *fp)
+{
+	char *s;
+	int a;
+
+	a = va_arg(fp->args, int);
+	s = "???";
+	if(a >= AXXX && a <= ALAST)
+		s = anames[a];
+	return fmtstrcpy(fp, s);
+}
+
+int
+Dconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Adr *a;
+
+	a = va_arg(fp->args, Adr*);
+	switch(a->type) {
+
+	default:
+		sprint(str, "GOK-type(%d)", a->type);
+		break;
+
+	case D_NONE:
+		str[0] = 0;
+		if(a->name != D_NONE || a->reg != NREG || a->sym != S)
+			sprint(str, "%N(R%d)(NONE)", a, a->reg);
+		break;
+
+	case D_CONST:
+		if(a->reg != NREG)
+			sprint(str, "$%N(R%d)", a, a->reg);
+		else
+			sprint(str, "$%N", a);
+		break;
+
+	case D_OREG:
+		if(a->reg != NREG)
+			sprint(str, "%N(R%d)", a, a->reg);
+		else
+			sprint(str, "%N", a);
+		break;
+
+	case D_REG:
+		sprint(str, "R%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(R%d)(REG)", a, a->reg);
+		break;
+
+	case D_FREG:
+		sprint(str, "F%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(F%d)(REG)", a, a->reg);
+		break;
+
+	case D_CREG:
+		sprint(str, "C%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(C%d)(REG)", a, a->reg);
+		break;
+
+	case D_BRANCH:
+		sprint(str, "%lld(PC)", a->offset-pc);
+		break;
+
+	case D_FCONST:
+		sprint(str, "$%.17e", a->dval);
+		break;
+
+	case D_SCONST:
+		sprint(str, "$\"%S\"", a->sval);
+		break;
+	}
+	return fmtstrcpy(fp, str);
+}
+
+int
+Sconv(Fmt *fp)
+{
+	int i, c;
+	char str[STRINGSZ], *p, *a;
+
+	a = va_arg(fp->args, char*);
+	p = str;
+	for(i=0; i<NSNAME; i++) {
+		c = a[i] & 0xff;
+		if(c >= 'a' && c <= 'z' ||
+		   c >= 'A' && c <= 'Z' ||
+		   c >= '0' && c <= '9' ||
+		   c == ' ' || c == '%') {
+			*p++ = c;
+			continue;
+		}
+		*p++ = '\\';
+		switch(c) {
+		case 0:
+			*p++ = 'z';
+			continue;
+		case '\\':
+		case '"':
+			*p++ = c;
+			continue;
+		case '\n':
+			*p++ = 'n';
+			continue;
+		case '\t':
+			*p++ = 't';
+			continue;
+		case '\r':
+			*p++ = 'r';
+			continue;
+		case '\f':
+			*p++ = 'f';
+			continue;
+		}
+		*p++ = (c>>6) + '0';
+		*p++ = ((c>>3) & 7) + '0';
+		*p++ = (c & 7) + '0';
+	}
+	*p = 0;
+	return fmtstrcpy(fp, str);
+}
+
+int
+Nconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Adr *a;
+	Sym *s;
+
+	a = va_arg(fp->args, Adr*);
+	s = a->sym;
+	if(s == S) {
+		sprint(str, "%lld", a->offset);
+		goto out;
+	}
+	switch(a->name) {
+	default:
+		sprint(str, "GOK-name(%d)", a->name);
+		break;
+
+	case D_EXTERN:
+		sprint(str, "%s+%lld(SB)", s->name, a->offset);
+		break;
+
+	case D_STATIC:
+		sprint(str, "%s<>+%lld(SB)", s->name, a->offset);
+		break;
+
+	case D_AUTO:
+		sprint(str, "%s-%lld(SP)", s->name, -a->offset);
+		break;
+
+	case D_PARAM:
+		sprint(str, "%s+%lld(FP)", s->name, a->offset);
+		break;
+	}
+out:
+	return fmtstrcpy(fp, str);
+}

+ 76 - 0
sys/src/cmd/9c/machcap.c

@@ -0,0 +1,76 @@
+#include "gc.h"
+
+int
+machcap(Node *n)
+{
+
+	if(n == Z)
+		return 1;	/* test */
+
+	switch(n->op) {
+	case OMUL:
+	case OLMUL:
+	case OASMUL:
+	case OASLMUL:
+		if(typechlv[n->type->etype])
+			return 1;
+		break;
+
+	case OADD:
+	case OAND:
+	case OOR:
+	case OSUB:
+	case OXOR:
+	case OASHL:
+	case OLSHR:
+	case OASHR:
+		if(typechlv[n->left->type->etype])
+			return 1;
+		break;
+
+	case OCAST:
+		return 1;
+
+	case OCOND:
+	case OCOMMA:
+	case OLIST:
+	case OANDAND:
+	case OOROR:
+	case ONOT:
+		return 1;
+
+	case OASADD:
+	case OASSUB:
+	case OASAND:
+	case OASOR:
+	case OASXOR:
+		return 1;
+
+	case OASASHL:
+	case OASASHR:
+	case OASLSHR:
+		return 1;
+
+	case OPOSTINC:
+	case OPOSTDEC:
+	case OPREINC:
+	case OPREDEC:
+		return 1;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OGT:
+	case OLT:
+	case OGE:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		return 1;
+	case ONEG:
+	case OCOM:
+		break;
+	}
+	return 0;
+}

+ 18 - 0
sys/src/cmd/9c/mkenam

@@ -0,0 +1,18 @@
+ed - ../9c/9.out.h <<'!'
+v/^	A/d
+g/^	AEND/s//&,/
+g/^	ALAST/s//&,/
+g/[ 	]*=.*,/s//,/
+v/,/p
+,s/^	A/	"/
+,s/,.*$/",/
+1i
+char	*anames[] =
+{
+.
+,a
+};
+.
+w enam.c
+Q
+!

+ 42 - 0
sys/src/cmd/9c/mkfile

@@ -0,0 +1,42 @@
+</$objtype/mkfile
+
+TARG=9c
+OFILES=\
+	cgen.$O\
+	enam.$O\
+	list.$O\
+	machcap.$O\
+	mul.$O\
+	peep.$O\
+	pgen.$O\
+	pswt.$O\
+	reg.$O\
+	sgen.$O\
+	swt.$O\
+	txt.$O\
+
+HFILES=\
+	gc.h\
+	9.out.h\
+	../cc/cc.h\
+
+LIB=../cc/cc.a$O
+
+BIN=/$objtype/bin
+</sys/src/cmd/mkone
+
+$LIB:
+	cd ../cc
+	mk install
+	mk clean
+
+%.$O: ../cc/%.c
+	$CC $CFLAGS ../cc/$stem.c
+
+t:V:	$O.out
+	$O.out -S t
+	$LD -o t.out t.$O
+	t.out
+
+enam.c:	9.out.h
+	rc mkenam

+ 609 - 0
sys/src/cmd/9c/mul.c

@@ -0,0 +1,609 @@
+#include "gc.h"
+
+/*
+ * code sequences for multiply by constant.
+ * [a-l][0-3]
+ *	lsl	$(A-'a'),r0,r1
+ * [+][0-7]
+ *	add	r0,r1,r2
+ * [-][0-7]
+ *	sub	r0,r1,r2
+ */
+
+static	int	multabp;
+static	long	mulval;
+static	char*	mulcp;
+static	long	valmax;
+static	int	shmax;
+
+static int	docode(char *hp, char *cp, int r0, int r1);
+static int	gen1(int len);
+static int	gen2(int len, long r1);
+static int	gen3(int len, long r0, long r1, int flag);
+enum
+{
+	SR1	= 1<<0,		/* r1 has been shifted */
+	SR0	= 1<<1,		/* r0 has been shifted */
+	UR1	= 1<<2,		/* r1 has not been used */
+	UR0	= 1<<3,		/* r0 has not been used */
+};
+
+Multab*
+mulcon0(Node *n, long v)
+{
+	int a1, a2, g;
+	Multab *m, *m1;
+	char hint[10];
+
+	if(v < 0)
+		v = -v;
+
+	/*
+	 * look in cache
+	 */
+	m = multab;
+	for(g=0; g<nelem(multab); g++) {
+		if(m->val == v) {
+			if(m->code[0] == 0)
+				return 0;
+			return m;
+		}
+		m++;
+	}
+
+	/*
+	 * select a spot in cache to overwrite
+	 */
+	multabp++;
+	if(multabp < 0 || multabp >= nelem(multab))
+		multabp = 0;
+	m = multab+multabp;
+	m->val = v;
+	mulval = v;
+
+	/*
+	 * look in execption hint table
+	 */
+	a1 = 0;
+	a2 = hintabsize;
+	for(;;) {
+		if(a1 >= a2)
+			goto no;
+		g = (a2 + a1)/2;
+		if(v < hintab[g].val) {
+			a2 = g;
+			continue;
+		}
+		if(v > hintab[g].val) {
+			a1 = g+1;
+			continue;
+		}
+		break;
+	}
+
+	if(docode(hintab[g].hint, m->code, 1, 0))
+		return m;
+	print("%L: multiply table failure %ld\n", n->lineno, v);
+	m->code[0] = 0;
+	return 0;
+
+no:
+	/*
+	 * try to search
+	 */
+	hint[0] = 0;
+	for(g=1; g<=6; g++) {
+		if(g >= 6 && v >= 65535)
+			break;
+		mulcp = hint+g;
+		*mulcp = 0;
+		if(gen1(g)) {
+			if(docode(hint, m->code, 1, 0))
+				return m;
+			print("%L: multiply table failure (g=%d h=%s) %ld\n",
+				n->lineno, g, hint, v);
+			break;
+		}
+	}
+
+	/*
+	 * try a recur followed by a shift
+	 */
+	g = 0;
+	while(!(v & 1)) {
+		g++;
+		v >>= 1;
+	}
+	if(g) {
+		m1 = mulcon0(n, v);
+		if(m1) {
+			strcpy(m->code, m1->code);
+			sprint(strchr(m->code, 0), "%c0", g+'a');
+			return m;
+		}
+	}
+	m->code[0] = 0;
+	return 0;
+}
+
+static int
+docode(char *hp, char *cp, int r0, int r1)
+{
+	int c, i;
+
+	c = *hp++;
+	*cp = c;
+	cp += 2;
+	switch(c) {
+	default:
+		c -= 'a';
+		if(c < 1 || c >= 30)
+			break;
+		for(i=0; i<4; i++) {
+			switch(i) {
+			case 0:
+				if(docode(hp, cp, r0<<c, r1))
+					goto out;
+				break;
+			case 1:
+				if(docode(hp, cp, r1<<c, r1))
+					goto out;
+				break;
+			case 2:
+				if(docode(hp, cp, r0, r0<<c))
+					goto out;
+				break;
+			case 3:
+				if(docode(hp, cp, r0, r1<<c))
+					goto out;
+				break;
+			}
+		}
+		break;
+
+	case '+':
+		for(i=0; i<8; i++) {
+			cp[-1] = i+'0';
+			switch(i) {
+			case 1:
+				if(docode(hp, cp, r0+r1, r1))
+					goto out;
+				break;
+			case 5:
+				if(docode(hp, cp, r0, r0+r1))
+					goto out;
+				break;
+			}
+		}
+		break;
+
+	case '-':
+		for(i=0; i<8; i++) {
+			cp[-1] = i+'0';
+			switch(i) {
+			case 1:
+				if(docode(hp, cp, r0-r1, r1))
+					goto out;
+				break;
+			case 2:
+				if(docode(hp, cp, r1-r0, r1))
+					goto out;
+				break;
+			case 5:
+				if(docode(hp, cp, r0, r0-r1))
+					goto out;
+				break;
+			case 6:
+				if(docode(hp, cp, r0, r1-r0))
+					goto out;
+				break;
+			}
+		}
+		break;
+
+	case 0:
+		if(r0 == mulval)
+			return 1;
+	}
+	return 0;
+
+out:
+	cp[-1] = i+'0';
+	return 1;
+}
+
+static int
+gen1(int len)
+{
+	int i;
+
+	for(shmax=1; shmax<30; shmax++) {
+		valmax = 1<<shmax;
+		if(valmax >= mulval)
+			break;
+	}
+	if(mulval == 1)
+		return 1;
+
+	len--;
+	for(i=1; i<=shmax; i++)
+		if(gen2(len, 1<<i)) {
+			*--mulcp = 'a'+i;
+			return 1;
+		}
+	return 0;
+}
+
+static int
+gen2(int len, long r1)
+{
+	int i;
+
+	if(len <= 0) {
+		if(r1 == mulval)
+			return 1;
+		return 0;
+	}
+
+	len--;
+	if(len == 0)
+		goto calcr0;
+
+	if(gen3(len, r1, r1+1, UR1)) {
+		i = '+';
+		goto out;
+	}
+	if(gen3(len, r1-1, r1, UR0)) {
+		i = '-';
+		goto out;
+	}
+	if(gen3(len, 1, r1+1, UR1)) {
+		i = '+';
+		goto out;
+	}
+	if(gen3(len, 1, r1-1, UR1)) {
+		i = '-';
+		goto out;
+	}
+
+	return 0;
+
+calcr0:
+	if(mulval == r1+1) {
+		i = '+';
+		goto out;
+	}
+	if(mulval == r1-1) {
+		i = '-';
+		goto out;
+	}
+	return 0;
+
+out:
+	*--mulcp = i;
+	return 1;
+}
+
+static int
+gen3(int len, long r0, long r1, int flag)
+{
+	int i, f1, f2;
+	long x;
+
+	if(r0 <= 0 ||
+	   r0 >= r1 ||
+	   r1 > valmax)
+		return 0;
+
+	len--;
+	if(len == 0)
+		goto calcr0;
+
+	if(!(flag & UR1)) {
+		f1 = UR1|SR1;
+		for(i=1; i<=shmax; i++) {
+			x = r0<<i;
+			if(x > valmax)
+				break;
+			if(gen3(len, r0, x, f1)) {
+				i += 'a';
+				goto out;
+			}
+		}
+	}
+
+	if(!(flag & UR0)) {
+		f1 = UR1|SR1;
+		for(i=1; i<=shmax; i++) {
+			x = r1<<i;
+			if(x > valmax)
+				break;
+			if(gen3(len, r1, x, f1)) {
+				i += 'a';
+				goto out;
+			}
+		}
+	}
+
+	if(!(flag & SR1)) {
+		f1 = UR1|SR1|(flag&UR0);
+		for(i=1; i<=shmax; i++) {
+			x = r1<<i;
+			if(x > valmax)
+				break;
+			if(gen3(len, r0, x, f1)) {
+				i += 'a';
+				goto out;
+			}
+		}
+	}
+
+	if(!(flag & SR0)) {
+		f1 = UR0|SR0|(flag&(SR1|UR1));
+
+		f2 = UR1|SR1;
+		if(flag & UR1)
+			f2 |= UR0;
+		if(flag & SR1)
+			f2 |= SR0;
+
+		for(i=1; i<=shmax; i++) {
+			x = r0<<i;
+			if(x > valmax)
+				break;
+			if(x > r1) {
+				if(gen3(len, r1, x, f2)) {
+					i += 'a';
+					goto out;
+				}
+			} else
+				if(gen3(len, x, r1, f1)) {
+					i += 'a';
+					goto out;
+				}
+		}
+	}
+
+	x = r1+r0;
+	if(gen3(len, r0, x, UR1)) {
+		i = '+';
+		goto out;
+	}
+
+	if(gen3(len, r1, x, UR1)) {
+		i = '+';
+		goto out;
+	}
+
+	x = r1-r0;
+	if(gen3(len, x, r1, UR0)) {
+		i = '-';
+		goto out;
+	}
+
+	if(x > r0) {
+		if(gen3(len, r0, x, UR1)) {
+			i = '-';
+			goto out;
+		}
+	} else
+		if(gen3(len, x, r0, UR0)) {
+			i = '-';
+			goto out;
+		}
+
+	return 0;
+
+calcr0:
+	f1 = flag & (UR0|UR1);
+	if(f1 == UR1) {
+		for(i=1; i<=shmax; i++) {
+			x = r1<<i;
+			if(x >= mulval) {
+				if(x == mulval) {
+					i += 'a';
+					goto out;
+				}
+				break;
+			}
+		}
+	}
+
+	if(mulval == r1+r0) {
+		i = '+';
+		goto out;
+	}
+	if(mulval == r1-r0) {
+		i = '-';
+		goto out;
+	}
+
+	return 0;
+
+out:
+	*--mulcp = i;
+	return 1;
+}
+
+/*
+ * hint table has numbers that
+ * the search algorithm fails on.
+ * <1000:
+ *	all numbers
+ * <5000:
+ * 	÷ by 5
+ * <10000:
+ * 	÷ by 50
+ * <65536:
+ * 	÷ by 250
+ */
+Hintab	hintab[] =
+{
+	683,	"b++d+e+",
+	687,	"b+e++e-",
+	691,	"b++d+e+",
+	731,	"b++d+e+",
+	811,	"b++d+i+",
+	821,	"b++e+e+",
+	843,	"b+d++e+",
+	851,	"b+f-+e-",
+	853,	"b++e+e+",
+	877,	"c++++g-",
+	933,	"b+c++g-",
+	981,	"c-+e-d+",
+	1375,	"b+c+b+h-",
+	1675,	"d+b++h+",
+	2425,	"c++f-e+",
+	2675,	"c+d++f-",
+	2750,	"b+d-b+h-",
+	2775,	"c-+g-e-",
+	3125,	"b++e+g+",
+	3275,	"b+c+g+e+",
+	3350,	"c++++i+",
+	3475,	"c-+e-f-",
+	3525,	"c-+d+g-",
+	3625,	"c-+e-j+",
+	3675,	"b+d+d+e+",
+	3725,	"b+d-+h+",
+	3925,	"b+d+f-d-",
+	4275,	"b+g++e+",
+	4325,	"b+h-+d+",
+	4425,	"b+b+g-j-",
+	4525,	"b+d-d+f+",
+	4675,	"c++d-g+",
+	4775,	"b+d+b+g-",
+	4825,	"c+c-+i-",
+	4850,	"c++++i-",
+	4925,	"b++e-g-",
+	4975,	"c+f++e-",
+	5500,	"b+g-c+d+",
+	6700,	"d+b++i+",
+	9700,	"d++++j-",
+	11000,	"b+f-c-h-",
+	11750,	"b+d+g+j-",
+	12500,	"b+c+e-k+",
+	13250,	"b+d+e-f+",
+	13750,	"b+h-c-d+",
+	14250,	"b+g-c+e-",
+	14500,	"c+f+j-d-",
+	14750,	"d-g--f+",
+	16750,	"b+e-d-n+",
+	17750,	"c+h-b+e+",
+	18250,	"d+b+h-d+",
+	18750,	"b+g-++f+",
+	19250,	"b+e+b+h+",
+	19750,	"b++h--f-",
+	20250,	"b+e-l-c+",
+	20750,	"c++bi+e-",
+	21250,	"b+i+l+c+",
+	22000,	"b+e+d-g-",
+	22250,	"b+d-h+k-",
+	22750,	"b+d-e-g+",
+	23250,	"b+c+h+e-",
+	23500,	"b+g-c-g-",
+	23750,	"b+g-b+h-",
+	24250,	"c++g+m-",
+	24750,	"b+e+e+j-",
+	25000,	"b++dh+g+",
+	25250,	"b+e+d-g-",
+	25750,	"b+e+b+j+",
+	26250,	"b+h+c+e+",
+	26500,	"b+h+c+g+",
+	26750,	"b+d+e+g-",
+	27250,	"b+e+e+f+",
+	27500,	"c-i-c-d+",
+	27750,	"b+bd++j+",
+	28250,	"d-d-++i-",
+	28500,	"c+c-h-e-",
+	29000,	"b+g-d-f+",
+	29500,	"c+h+++e-",
+	29750,	"b+g+f-c+",
+	30250,	"b+f-g-c+",
+	33500,	"c-f-d-n+",
+	33750,	"b+d-b+j-",
+	34250,	"c+e+++i+",
+	35250,	"e+b+d+k+",
+	35500,	"c+e+d-g-",
+	35750,	"c+i-++e+",
+	36250,	"b+bh-d+e+",
+	36500,	"c+c-h-e-",
+	36750,	"d+e--i+",
+	37250,	"b+g+g+b+",
+	37500,	"b+h-b+f+",
+	37750,	"c+be++j-",
+	38500,	"b+e+b+i+",
+	38750,	"d+i-b+d+",
+	39250,	"b+g-l-+d+",
+	39500,	"b+g-c+g-",
+	39750,	"b+bh-c+f-",
+	40250,	"b+bf+d+g-",
+	40500,	"b+g-c+g+",
+	40750,	"c+b+i-e+",
+	41250,	"d++bf+h+",
+	41500,	"b+j+c+d-",
+	41750,	"c+f+b+h-",
+	42500,	"c+h++g+",
+	42750,	"b+g+d-f-",
+	43250,	"b+l-e+d-",
+	43750,	"c+bd+h+f-",
+	44000,	"b+f+g-d-",
+	44250,	"b+d-g--f+",
+	44500,	"c+e+c+h+",
+	44750,	"b+e+d-h-",
+	45250,	"b++g+j-g+",
+	45500,	"c+d+e-g+",
+	45750,	"b+d-h-e-",
+	46250,	"c+bd++j+",
+	46500,	"b+d-c-j-",
+	46750,	"e-e-b+g-",
+	47000,	"b+c+d-j-",
+	47250,	"b+e+e-g-",
+	47500,	"b+g-c-h-",
+	47750,	"b+f-c+h-",
+	48250,	"d--h+n-",
+	48500,	"b+c-g+m-",
+	48750,	"b+e+e-g+",
+	49500,	"c-f+e+j-",
+	49750,	"c+c+g++f-",
+	50000,	"b+e+e+k+",
+	50250,	"b++i++g+",
+	50500,	"c+g+f-i+",
+	50750,	"b+e+d+k-",
+	51500,	"b+i+c-f+",
+	51750,	"b+bd+g-e-",
+	52250,	"b+d+g-j+",
+	52500,	"c+c+f+g+",
+	52750,	"b+c+e+i+",
+	53000,	"b+i+c+g+",
+	53500,	"c+g+g-n+",
+	53750,	"b+j+d-c+",
+	54250,	"b+d-g-j-",
+	54500,	"c-f+e+f+",
+	54750,	"b+f-+c+g+",
+	55000,	"b+g-d-g-",
+	55250,	"b+e+e+g+",
+	55500,	"b+cd++j+",
+	55750,	"b+bh-d-f-",
+	56250,	"c+d-b+j-",
+	56500,	"c+d+c+i+",
+	56750,	"b+e+d++h-",
+	57000,	"b+d+g-f+",
+	57250,	"b+f-m+d-",
+	57750,	"b+i+c+e-",
+	58000,	"b+e+d+h+",
+	58250,	"c+b+g+g+",
+	58750,	"d-e-j--e+",
+	59000,	"d-i-+e+",
+	59250,	"e--h-m+",
+	59500,	"c+c-h+f-",
+	59750,	"b+bh-e+i-",
+	60250,	"b+bh-e-e-",
+	60500,	"c+c-g-g-",
+	60750,	"b+e-l-e-",
+	61250,	"b+g-g-c+",
+	61750,	"b+g-c+g+",
+	62250,	"f--+c-i-",
+	62750,	"e+f--+g+",
+	64750,	"b+f+d+p-",
+};
+int	hintabsize	= nelem(hintab);

+ 1042 - 0
sys/src/cmd/9c/peep.c

@@ -0,0 +1,1042 @@
+#include "gc.h"
+
+static Reg*
+rnops(Reg *r)
+{
+	Prog *p;
+	Reg *r1;
+
+	if(r != R)
+	for(;;){
+		p = r->prog;
+		if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
+			break;
+		r1 = uniqs(r);
+		if(r1 == R)
+			break;
+		r = r1;
+	}
+	return r;
+}
+
+void
+peep(void)
+{
+	Reg *r, *r1, *r2;
+	Prog *p, *p1;
+	int t;
+/*
+ * complete R structure
+ */
+	t = 0;
+	for(r=firstr; r!=R; r=r1) {
+		r1 = r->link;
+		if(r1 == R)
+			break;
+		p = r->prog->link;
+		while(p != r1->prog)
+		switch(p->as) {
+		default:
+			r2 = rega();
+			r->link = r2;
+			r2->link = r1;
+
+			r2->prog = p;
+			r2->p1 = r;
+			r->s1 = r2;
+			r2->s1 = r1;
+			r1->p1 = r2;
+
+			r = r2;
+			t++;
+
+		case ADATA:
+		case AGLOBL:
+		case ANAME:
+		case ASIGNAME:
+			p = p->link;
+		}
+	}
+
+loop1:
+	t = 0;
+	for(r=firstr; r!=R; r=r->link) {
+		p = r->prog;
+		if(p->as == AMOVW || p->as == AMOVD || p->as == AFMOVS || p->as == AFMOVD)
+		if(regtyp(&p->to)) {
+			if(regtyp(&p->from))
+			if(p->from.type == p->to.type) {
+				if(copyprop(r)) {
+					excise(r);
+					t++;
+				} else
+				if(subprop(r) && copyprop(r)) {
+					excise(r);
+					t++;
+				}
+			}
+			if(regzer(&p->from))
+			if(p->to.type == D_REG) {
+				p->from.type = D_REG;
+				p->from.reg = REGZERO;
+				if(copyprop(r)) {
+					excise(r);
+					t++;
+				} else
+				if(subprop(r) && copyprop(r)) {
+					excise(r);
+					t++;
+				}
+			}
+		}
+	}
+	if(t)
+		goto loop1;
+	/*
+	 * look for MOVB x,R; MOVB R,R
+	 */
+	for(r=firstr; r!=R; r=r->link) {
+		p = r->prog;
+		switch(p->as) {
+		default:
+			continue;
+		case AMOVH:
+		case AMOVHZ:
+		case AMOVB:
+		case AMOVBZ:
+		case AMOVW:
+		case AMOVWZ:
+			if(p->to.type != D_REG)
+				continue;
+			break;
+		}
+		r1 = r->link;
+		if(r1 == R)
+			continue;
+		p1 = r1->prog;
+		if(p1->as != p->as)
+			continue;
+		if(p1->from.type != D_REG || p1->from.reg != p->to.reg)
+			continue;
+		if(p1->to.type != D_REG || p1->to.reg != p->to.reg)
+			continue;
+		excise(r1);
+	}
+
+	if(debug['D'] > 1)
+		return;	/* allow following code improvement to be suppressed */
+
+	/*
+	 * look for OP x,y,R; CMP R, $0 -> OPCC x,y,R
+	 * when OP can set condition codes correctly
+	 */
+	for(r=firstr; r!=R; r=r->link) {
+		p = r->prog;
+		switch(p->as) {
+		case ACMP:
+		case ACMPW:		/* always safe? */
+			if(!regzer(&p->to))
+				continue;
+			r1 = r->s1;
+			if(r1 == R)
+				continue;
+			switch(r1->prog->as) {
+			default:
+				continue;
+			case ABCL:
+			case ABC:
+				/* the conditions can be complex and these are currently little used */
+				continue;
+			case ABEQ:
+			case ABGE:
+			case ABGT:
+			case ABLE:
+			case ABLT:
+			case ABNE:
+			case ABVC:
+			case ABVS:
+				break;
+			}
+			r1 = r;
+			do
+				r1 = uniqp(r1);
+			while (r1 != R && r1->prog->as == ANOP);
+			if(r1 == R)
+				continue;
+			p1 = r1->prog;
+			if(p1->to.type != D_REG || p1->to.reg != p->from.reg)
+				continue;
+			switch(p1->as) {
+			case ASUB:
+			case AADD:
+			case AXOR:
+			case AOR:
+				/* irregular instructions */
+				if(p1->from.type == D_CONST)
+					continue;
+				break;
+			}
+			switch(p1->as) {
+			default:
+				continue;
+			case AMOVW:
+			case AMOVD:
+				if(p1->from.type != D_REG)
+					continue;
+				continue;
+			case AANDCC:
+			case AANDNCC:
+			case AORCC:
+			case AORNCC:
+			case AXORCC:
+			case ASUBCC:
+			case ASUBECC:
+			case ASUBMECC:
+			case ASUBZECC:
+			case AADDCC:
+			case AADDCCC:
+			case AADDECC:
+			case AADDMECC:
+			case AADDZECC:
+			case ARLWMICC:
+			case ARLWNMCC:
+				t = p1->as;
+				break;
+			/* don't deal with floating point instructions for now */
+/*
+			case AFABS:	t = AFABSCC; break;
+			case AFADD:	t = AFADDCC; break;
+			case AFADDS:	t = AFADDSCC; break;
+			case AFCTIW:	t = AFCTIWCC; break;
+			case AFCTIWZ:	t = AFCTIWZCC; break;
+			case AFDIV:	t = AFDIVCC; break;
+			case AFDIVS:	t = AFDIVSCC; break;
+			case AFMADD:	t = AFMADDCC; break;
+			case AFMADDS:	t = AFMADDSCC; break;
+			case AFMOVD:	t = AFMOVDCC; break;
+			case AFMSUB:	t = AFMSUBCC; break;
+			case AFMSUBS:	t = AFMSUBSCC; break;
+			case AFMUL:	t = AFMULCC; break;
+			case AFMULS:	t = AFMULSCC; break;
+			case AFNABS:	t = AFNABSCC; break;
+			case AFNEG:	t = AFNEGCC; break;
+			case AFNMADD:	t = AFNMADDCC; break;
+			case AFNMADDS:	t = AFNMADDSCC; break;
+			case AFNMSUB:	t = AFNMSUBCC; break;
+			case AFNMSUBS:	t = AFNMSUBSCC; break;
+			case AFRSP:	t = AFRSPCC; break;
+			case AFSUB:	t = AFSUBCC; break;
+			case AFSUBS:	t = AFSUBSCC; break;
+			case ACNTLZW:	t = ACNTLZWCC; break;
+			case AMTFSB0:	t = AMTFSB0CC; break;
+			case AMTFSB1:	t = AMTFSB1CC; break;
+*/
+			case AADD:	t = AADDCC; break;
+			case AADDV:	t = AADDVCC; break;
+			case AADDC:	t = AADDCCC; break;
+			case AADDCV:	t = AADDCVCC; break;
+			case AADDME:	t = AADDMECC; break;
+			case AADDMEV:	t = AADDMEVCC; break;
+			case AADDE:	t = AADDECC; break;
+			case AADDEV:	t = AADDEVCC; break;
+			case AADDZE:	t = AADDZECC; break;
+			case AADDZEV:	t = AADDZEVCC; break;
+			case AAND:	t = AANDCC; break;
+			case AANDN:	t = AANDNCC; break;
+			case ADIVW:	t = ADIVWCC; break;
+			case ADIVWV:	t = ADIVWVCC; break;
+			case ADIVWU:	t = ADIVWUCC; break;
+			case ADIVWUV:	t = ADIVWUVCC; break;
+			case ADIVD:	t = ADIVDCC; break;
+			case ADIVDV:	t = ADIVDVCC; break;
+			case ADIVDU:	t = ADIVDUCC; break;
+			case ADIVDUV:	t = ADIVDUVCC; break;
+			case AEQV:	t = AEQVCC; break;
+			case AEXTSB:	t = AEXTSBCC; break;
+			case AEXTSH:	t = AEXTSHCC; break;
+			case AEXTSW:	t = AEXTSWCC; break;
+			case AMULHW:	t = AMULHWCC; break;
+			case AMULHWU:	t = AMULHWUCC; break;
+			case AMULLW:	t = AMULLWCC; break;
+			case AMULLWV:	t = AMULLWVCC; break;
+			case AMULHD:	t = AMULHDCC; break;
+			case AMULHDU:	t = AMULHDUCC; break;
+			case AMULLD:	t = AMULLDCC; break;
+			case AMULLDV:	t = AMULLDVCC; break;
+			case ANAND:	t = ANANDCC; break;
+			case ANEG:	t = ANEGCC; break;
+			case ANEGV:	t = ANEGVCC; break;
+			case ANOR:	t = ANORCC; break;
+			case AOR:	t = AORCC; break;
+			case AORN:	t = AORNCC; break;
+			case AREM:	t = AREMCC; break;
+			case AREMV:	t = AREMVCC; break;
+			case AREMU:	t = AREMUCC; break;
+			case AREMUV:	t = AREMUVCC; break;
+			case AREMD:	t = AREMDCC; break;
+			case AREMDV:	t = AREMDVCC; break;
+			case AREMDU:	t = AREMDUCC; break;
+			case AREMDUV:	t = AREMDUVCC; break;
+			case ARLWMI:	t = ARLWMICC; break;
+			case ARLWNM:	t = ARLWNMCC; break;
+			case ASLW:	t = ASLWCC; break;
+			case ASRAW:	t = ASRAWCC; break;
+			case ASRW:	t = ASRWCC; break;
+			case ASLD:	t = ASLDCC; break;
+			case ASRAD:	t = ASRADCC; break;
+			case ASRD:	t = ASRDCC; break;
+			case ASUB:	t = ASUBCC; break;
+			case ASUBV:	t = ASUBVCC; break;
+			case ASUBC:	t = ASUBCCC; break;
+			case ASUBCV:	t = ASUBCVCC; break;
+			case ASUBME:	t = ASUBMECC; break;
+			case ASUBMEV:	t = ASUBMEVCC; break;
+			case ASUBE:	t = ASUBECC; break;
+			case ASUBEV:	t = ASUBEVCC; break;
+			case ASUBZE:	t = ASUBZECC; break;
+			case ASUBZEV:	t = ASUBZEVCC; break;
+			case AXOR:	t = AXORCC; break;
+				break;
+			}
+			if(debug['D'])
+				print("cmp %P; %P -> ", p1, p);
+			p1->as = t;
+			if(debug['D'])
+				print("%P\n", p1);
+			excise(r);
+			continue;
+		}
+	}
+}
+
+void
+excise(Reg *r)
+{
+	Prog *p;
+
+	p = r->prog;
+	p->as = ANOP;
+	p->from = zprog.from;
+	p->from3 = zprog.from3;
+	p->to = zprog.to;
+	p->reg = zprog.reg; /**/
+}
+
+Reg*
+uniqp(Reg *r)
+{
+	Reg *r1;
+
+	r1 = r->p1;
+	if(r1 == R) {
+		r1 = r->p2;
+		if(r1 == R || r1->p2link != R)
+			return R;
+	} else
+		if(r->p2 != R)
+			return R;
+	return r1;
+}
+
+Reg*
+uniqs(Reg *r)
+{
+	Reg *r1;
+
+	r1 = r->s1;
+	if(r1 == R) {
+		r1 = r->s2;
+		if(r1 == R)
+			return R;
+	} else
+		if(r->s2 != R)
+			return R;
+	return r1;
+}
+
+/*
+ * if the system forces R0 to be zero,
+ * convert references to $0 to references to R0.
+ */
+regzer(Adr *a)
+{
+	if(R0ISZERO) {
+		if(a->type == D_CONST)
+			if(a->sym == S)
+				if(a->offset == 0)
+					return 1;
+		if(a->type == D_REG)
+			if(a->reg == REGZERO)
+				return 1;
+	}
+	return 0;
+}
+
+regtyp(Adr *a)
+{
+
+	if(a->type == D_REG) {
+		if(!R0ISZERO || a->reg != REGZERO)
+			return 1;
+		return 0;
+	}
+	if(a->type == D_FREG)
+		return 1;
+	return 0;
+}
+
+/*
+ * the idea is to substitute
+ * one register for another
+ * from one MOV to another
+ *	MOV	a, R0
+ *	ADD	b, R0	/ no use of R1
+ *	MOV	R0, R1
+ * would be converted to
+ *	MOV	a, R1
+ *	ADD	b, R1
+ *	MOV	R1, R0
+ * hopefully, then the former or latter MOV
+ * will be eliminated by copy propagation.
+ */
+int
+subprop(Reg *r0)
+{
+	Prog *p;
+	Adr *v1, *v2;
+	Reg *r;
+	int t;
+
+	p = r0->prog;
+	v1 = &p->from;
+	if(!regtyp(v1))
+		return 0;
+	v2 = &p->to;
+	if(!regtyp(v2))
+		return 0;
+	for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+		if(uniqs(r) == R)
+			break;
+		p = r->prog;
+		switch(p->as) {
+		case ABL:
+			return 0;
+
+		case AADD:
+		case AADDC:
+		case AADDCC:
+		case AADDE:
+		case AADDECC:
+		case ASUB:
+		case ASUBCC:
+		case ASUBC:
+		case ASUBCCC:
+		case ASUBE:
+		case ASUBECC:
+		case ASLW:
+		case ASRW:
+		case ASRWCC:
+		case ASRAW:
+		case ASRAWCC:
+		case ASLD:
+		case ASRD:
+		case ASRAD:
+		case AOR:
+		case AORCC:
+		case AORN:
+		case AORNCC:
+		case AAND:
+		case AANDCC:
+		case AANDN:
+		case AANDNCC:
+		case ANAND:
+		case ANANDCC:
+		case ANOR:
+		case ANORCC:
+		case AXOR:
+		case AXORCC:
+		case AMULHW:
+		case AMULHWU:
+		case AMULLW:
+		case AMULLD:
+		case ADIVW:
+		case ADIVWU:
+		case ADIVD:
+		case ADIVDU:
+		case AREM:
+		case AREMU:
+		case AREMD:
+		case AREMDU:
+		case ARLWNM:
+		case ARLWNMCC:
+
+		case AFADD:
+		case AFADDS:
+		case AFSUB:
+		case AFSUBS:
+		case AFMUL:
+		case AFMULS:
+		case AFDIV:
+		case AFDIVS:
+			if(p->to.type == v1->type)
+			if(p->to.reg == v1->reg) {
+				if(p->reg == NREG)
+					p->reg = p->to.reg;
+				goto gotit;
+			}
+			break;
+
+		case AADDME:
+		case AADDMECC:
+		case AADDZE:
+		case AADDZECC:
+		case ASUBME:
+		case ASUBMECC:
+		case ASUBZE:
+		case ASUBZECC:
+		case ANEG:
+		case ANEGCC:
+		case AFNEG:
+		case AFNEGCC:
+		case AFMOVS:
+		case AFMOVD:
+		case AMOVW:
+		case AMOVD:
+			if(p->to.type == v1->type)
+			if(p->to.reg == v1->reg)
+				goto gotit;
+			break;
+		}
+		if(copyau(&p->from, v2) ||
+		   copyau1(p, v2) ||
+		   copyau(&p->to, v2))
+			break;
+		if(copysub(&p->from, v1, v2, 0) ||
+		   copysub1(p, v1, v2, 0) ||
+		   copysub(&p->to, v1, v2, 0))
+			break;
+	}
+	return 0;
+
+gotit:
+	copysub(&p->to, v1, v2, 1);
+	if(debug['P']) {
+		print("gotit: %D->%D\n%P", v1, v2, r->prog);
+		if(p->from.type == v2->type)
+			print(" excise");
+		print("\n");
+	}
+	for(r=uniqs(r); r!=r0; r=uniqs(r)) {
+		p = r->prog;
+		copysub(&p->from, v1, v2, 1);
+		copysub1(p, v1, v2, 1);
+		copysub(&p->to, v1, v2, 1);
+		if(debug['P'])
+			print("%P\n", r->prog);
+	}
+	t = v1->reg;
+	v1->reg = v2->reg;
+	v2->reg = t;
+	if(debug['P'])
+		print("%P last\n", r->prog);
+	return 1;
+}
+
+/*
+ * The idea is to remove redundant copies.
+ *	v1->v2	F=0
+ *	(use v2	s/v2/v1/)*
+ *	set v1	F=1
+ *	use v2	return fail
+ *	-----------------
+ *	v1->v2	F=0
+ *	(use v2	s/v2/v1/)*
+ *	set v1	F=1
+ *	set v2	return success
+ */
+int
+copyprop(Reg *r0)
+{
+	Prog *p;
+	Adr *v1, *v2;
+	Reg *r;
+
+	p = r0->prog;
+	v1 = &p->from;
+	v2 = &p->to;
+	if(copyas(v1, v2))
+		return 1;
+	for(r=firstr; r!=R; r=r->link)
+		r->active = 0;
+	return copy1(v1, v2, r0->s1, 0);
+}
+
+copy1(Adr *v1, Adr *v2, Reg *r, int f)
+{
+	int t;
+	Prog *p;
+
+	if(r->active) {
+		if(debug['P'])
+			print("act set; return 1\n");
+		return 1;
+	}
+	r->active = 1;
+	if(debug['P'])
+		print("copy %D->%D f=%d\n", v1, v2, f);
+	for(; r != R; r = r->s1) {
+		p = r->prog;
+		if(debug['P'])
+			print("%P", p);
+		if(!f && uniqp(r) == R) {
+			f = 1;
+			if(debug['P'])
+				print("; merge; f=%d", f);
+		}
+		t = copyu(p, v2, A);
+		switch(t) {
+		case 2:	/* rar, cant split */
+			if(debug['P'])
+				print("; %Drar; return 0\n", v2);
+			return 0;
+
+		case 3:	/* set */
+			if(debug['P'])
+				print("; %Dset; return 1\n", v2);
+			return 1;
+
+		case 1:	/* used, substitute */
+		case 4:	/* use and set */
+			if(f) {
+				if(!debug['P'])
+					return 0;
+				if(t == 4)
+					print("; %Dused+set and f=%d; return 0\n", v2, f);
+				else
+					print("; %Dused and f=%d; return 0\n", v2, f);
+				return 0;
+			}
+			if(copyu(p, v2, v1)) {
+				if(debug['P'])
+					print("; sub fail; return 0\n");
+				return 0;
+			}
+			if(debug['P'])
+				print("; sub%D/%D", v2, v1);
+			if(t == 4) {
+				if(debug['P'])
+					print("; %Dused+set; return 1\n", v2);
+				return 1;
+			}
+			break;
+		}
+		if(!f) {
+			t = copyu(p, v1, A);
+			if(!f && (t == 2 || t == 3 || t == 4)) {
+				f = 1;
+				if(debug['P'])
+					print("; %Dset and !f; f=%d", v1, f);
+			}
+		}
+		if(debug['P'])
+			print("\n");
+		if(r->s2)
+			if(!copy1(v1, v2, r->s2, f))
+				return 0;
+	}
+	return 1;
+}
+
+/*
+ * return
+ * 1 if v only used (and substitute),
+ * 2 if read-alter-rewrite
+ * 3 if set
+ * 4 if set and used
+ * 0 otherwise (not touched)
+ */
+int
+copyu(Prog *p, Adr *v, Adr *s)
+{
+
+	switch(p->as) {
+
+	default:
+		if(debug['P'])
+			print(" (???)");
+		return 2;
+
+
+	case ANOP:	/* read, write */
+	case AMOVH:
+	case AMOVHZ:
+	case AMOVB:
+	case AMOVBZ:
+	case AMOVW:
+	case AMOVWZ:
+	case AMOVD:
+
+	case ANEG:
+	case ANEGCC:
+	case AADDME:
+	case AADDMECC:
+	case AADDZE:
+	case AADDZECC:
+	case ASUBME:
+	case ASUBMECC:
+	case ASUBZE:
+	case ASUBZECC:
+
+	case AFCTIW:
+	case AFCTIWZ:
+	case AFMOVS:
+	case AFMOVD:
+	case AFRSP:
+	case AFNEG:
+	case AFNEGCC:
+		if(s != A) {
+			if(copysub(&p->from, v, s, 1))
+				return 1;
+			if(!copyas(&p->to, v))
+				if(copysub(&p->to, v, s, 1))
+					return 1;
+			return 0;
+		}
+		if(copyas(&p->to, v)) {
+			if(copyau(&p->from, v))
+				return 4;
+			return 3;
+		}
+		if(copyau(&p->from, v))
+			return 1;
+		if(copyau(&p->to, v))
+			return 1;
+		return 0;
+
+	case ARLWMI:	/* read read rar */
+	case ARLWMICC:
+		if(copyas(&p->to, v))
+			return 2;
+		/* fall through */
+
+	case AADD:	/* read read write */
+	case AADDC:
+	case AADDE:
+	case ASUB:
+	case ASLW:
+	case ASRW:
+	case ASRAW:
+	case ASLD:
+	case ASRD:
+	case ASRAD:
+	case AOR:
+	case AORCC:
+	case AORN:
+	case AORNCC:
+	case AAND:
+	case AANDCC:
+	case AANDN:
+	case AANDNCC:
+	case ANAND:
+	case ANANDCC:
+	case ANOR:
+	case ANORCC:
+	case AXOR:
+	case AMULHW:
+	case AMULHWU:
+	case AMULLW:
+	case AMULLD:
+	case ADIVW:
+	case ADIVD:
+	case ADIVWU:
+	case ADIVDU:
+	case AREM:
+	case AREMU:
+	case AREMD:
+	case AREMDU:
+	case ARLWNM:
+	case ARLWNMCC:
+
+	case AFADDS:
+	case AFADD:
+	case AFSUBS:
+	case AFSUB:
+	case AFMULS:
+	case AFMUL:
+	case AFDIVS:
+	case AFDIV:
+		if(s != A) {
+			if(copysub(&p->from, v, s, 1))
+				return 1;
+			if(copysub1(p, v, s, 1))
+				return 1;
+			if(!copyas(&p->to, v))
+				if(copysub(&p->to, v, s, 1))
+					return 1;
+			return 0;
+		}
+		if(copyas(&p->to, v)) {
+			if(p->reg == NREG)
+				p->reg = p->to.reg;
+			if(copyau(&p->from, v))
+				return 4;
+			if(copyau1(p, v))
+				return 4;
+			return 3;
+		}
+		if(copyau(&p->from, v))
+			return 1;
+		if(copyau1(p, v))
+			return 1;
+		if(copyau(&p->to, v))
+			return 1;
+		return 0;
+
+	case ABEQ:
+	case ABGT:
+	case ABGE:
+	case ABLT:
+	case ABLE:
+	case ABNE:
+	case ABVC:
+	case ABVS:
+		break;
+
+	case ACMP:	/* read read */
+	case ACMPU:
+	case ACMPW:
+	case ACMPWU:
+	case AFCMPO:
+	case AFCMPU:
+		if(s != A) {
+			if(copysub(&p->from, v, s, 1))
+				return 1;
+			return copysub(&p->to, v, s, 1);
+		}
+		if(copyau(&p->from, v))
+			return 1;
+		if(copyau(&p->to, v))
+			return 1;
+		break;
+
+	case ABR:	/* funny */
+		if(s != A) {
+			if(copysub(&p->to, v, s, 1))
+				return 1;
+			return 0;
+		}
+		if(copyau(&p->to, v))
+			return 1;
+		return 0;
+
+	case ARETURN:	/* funny */
+		if(v->type == D_REG)
+			if(v->reg == REGRET)
+				return 2;
+		if(v->type == D_FREG)
+			if(v->reg == FREGRET)
+				return 2;
+
+	case ABL:	/* funny */
+		if(v->type == D_REG) {
+			if(v->reg <= REGEXT && v->reg > exregoffset)
+				return 2;
+			if(v->reg == REGARG)
+				return 2;
+		}
+		if(v->type == D_FREG) {
+			if(v->reg <= FREGEXT && v->reg > exfregoffset)
+				return 2;
+		}
+
+		if(s != A) {
+			if(copysub(&p->to, v, s, 1))
+				return 1;
+			return 0;
+		}
+		if(copyau(&p->to, v))
+			return 4;
+		return 3;
+
+	case ATEXT:	/* funny */
+		if(v->type == D_REG)
+			if(v->reg == REGARG)
+				return 3;
+		return 0;
+	}
+	return 0;
+}
+
+int
+a2type(Prog *p)
+{
+
+	switch(p->as) {
+	case AADD:
+	case AADDC:
+	case AADDCC:
+	case AADDCCC:
+	case AADDE:
+	case AADDECC:
+	case AADDME:
+	case AADDMECC:
+	case AADDZE:
+	case AADDZECC:
+	case ASUB:
+	case ASUBC:
+	case ASUBCC:
+	case ASUBCCC:
+	case ASUBE:
+	case ASUBECC:
+	case ASUBME:
+	case ASUBMECC:
+	case ASUBZE:
+	case ASUBZECC:
+	case ASLW:
+	case ASLWCC:
+	case ASRW:
+	case ASRWCC:
+	case ASRAW:
+	case ASRAWCC:
+	case ASLD:
+	case ASLDCC:
+	case ASRD:
+	case ASRDCC:
+	case ASRAD:
+	case ASRADCC:
+	case AOR:
+	case AORCC:
+	case AORN:
+	case AORNCC:
+	case AAND:
+	case AANDCC:
+	case AANDN:
+	case AANDNCC:
+	case AXOR:
+	case AXORCC:
+	case ANEG:
+	case ANEGCC:
+	case AMULHW:
+	case AMULHWU:
+	case AMULLW:
+	case AMULLWCC:
+	case ADIVW:
+	case ADIVWCC:
+	case ADIVWU:
+	case ADIVWUCC:
+	case AREM:
+	case AREMCC:
+	case AREMU:
+	case AREMUCC:
+	case AMULLD:
+	case AMULLDCC:
+	case ADIVD:
+	case ADIVDCC:
+	case ADIVDU:
+	case ADIVDUCC:
+	case AREMD:
+	case AREMDCC:
+	case AREMDU:
+	case AREMDUCC:
+	case ANAND:
+	case ANANDCC:
+	case ANOR:
+	case ANORCC:
+	case ARLWMI:
+	case ARLWMICC:
+	case ARLWNM:
+	case ARLWNMCC:
+		return D_REG;
+
+	case AFADDS:
+	case AFADDSCC:
+	case AFADD:
+	case AFADDCC:
+	case AFSUBS:
+	case AFSUBSCC:
+	case AFSUB:
+	case AFSUBCC:
+	case AFMULS:
+	case AFMULSCC:
+	case AFMUL:
+	case AFMULCC:
+	case AFDIVS:
+	case AFDIVSCC:
+	case AFDIV:
+	case AFDIVCC:
+	case AFNEG:
+	case AFNEGCC:
+		return D_FREG;
+	}
+	return D_NONE;
+}
+
+/*
+ * direct reference,
+ * could be set/use depending on
+ * semantics
+ */
+int
+copyas(Adr *a, Adr *v)
+{
+
+	if(regtyp(v))
+		if(a->type == v->type)
+		if(a->reg == v->reg)
+			return 1;
+	return 0;
+}
+
+/*
+ * either direct or indirect
+ */
+int
+copyau(Adr *a, Adr *v)
+{
+
+	if(copyas(a, v))
+		return 1;
+	if(v->type == D_REG)
+		if(a->type == D_OREG)
+			if(v->reg == a->reg)
+				return 1;
+	return 0;
+}
+
+int
+copyau1(Prog *p, Adr *v)
+{
+
+	if(regtyp(v))
+		if(p->from.type == v->type || p->to.type == v->type)
+		if(p->reg == v->reg) {
+			if(a2type(p) != v->type)
+				print("botch a2type %P\n", p);
+			return 1;
+		}
+	return 0;
+}
+
+/*
+ * substitute s for v in a
+ * return failure to substitute
+ */
+int
+copysub(Adr *a, Adr *v, Adr *s, int f)
+{
+
+	if(f)
+	if(copyau(a, v))
+		a->reg = s->reg;
+	return 0;
+}
+
+int
+copysub1(Prog *p1, Adr *v, Adr *s, int f)
+{
+
+	if(f)
+	if(copyau1(p1, v))
+		p1->reg = s->reg;
+	return 0;
+}

+ 1134 - 0
sys/src/cmd/9c/reg.c

@@ -0,0 +1,1134 @@
+#include "gc.h"
+
+Reg*
+rega(void)
+{
+	Reg *r;
+
+	r = freer;
+	if(r == R) {
+		r = alloc(sizeof(*r));
+	} else
+		freer = r->link;
+
+	*r = zreg;
+	return r;
+}
+
+int
+rcmp(void *a1, void *a2)
+{
+	Rgn *p1, *p2;
+	int c1, c2;
+
+	p1 = a1;
+	p2 = a2;
+	c1 = p2->cost;
+	c2 = p1->cost;
+	if(c1 -= c2)
+		return c1;
+	return p2->varno - p1->varno;
+}
+
+void
+regopt(Prog *p)
+{
+	Reg *r, *r1, *r2;
+	Prog *p1;
+	int i, z;
+	long initpc, val, npc;
+	ulong vreg;
+	Bits bit;
+	struct
+	{
+		long	m;
+		long	c;
+		Reg*	p;
+	} log5[6], *lp;
+
+	firstr = R;
+	lastr = R;
+	nvar = 0;
+	regbits = 0;
+	for(z=0; z<BITS; z++) {
+		externs.b[z] = 0;
+		params.b[z] = 0;
+		consts.b[z] = 0;
+		addrs.b[z] = 0;
+	}
+
+	/*
+	 * pass 1
+	 * build aux data structure
+	 * allocate pcs
+	 * find use and set of variables
+	 */
+	val = 5L * 5L * 5L * 5L * 5L;
+	lp = log5;
+	for(i=0; i<5; i++) {
+		lp->m = val;
+		lp->c = 0;
+		lp->p = R;
+		val /= 5L;
+		lp++;
+	}
+	val = 0;
+	for(; p != P; p = p->link) {
+		switch(p->as) {
+		case ADATA:
+		case AGLOBL:
+		case ANAME:
+		case ASIGNAME:
+			continue;
+		}
+		r = rega();
+		if(firstr == R) {
+			firstr = r;
+			lastr = r;
+		} else {
+			lastr->link = r;
+			r->p1 = lastr;
+			lastr->s1 = r;
+			lastr = r;
+		}
+		r->prog = p;
+		r->pc = val;
+		val++;
+
+		lp = log5;
+		for(i=0; i<5; i++) {
+			lp->c--;
+			if(lp->c <= 0) {
+				lp->c = lp->m;
+				if(lp->p != R)
+					lp->p->log5 = r;
+				lp->p = r;
+				(lp+1)->c = 0;
+				break;
+			}
+			lp++;
+		}
+
+		r1 = r->p1;
+		if(r1 != R)
+		switch(r1->prog->as) {
+		case ARETURN:
+		case ABR:
+		case ARFI:
+		case ARFCI:
+		case ARFID:
+			r->p1 = R;
+			r1->s1 = R;
+		}
+
+		/*
+		 * left side always read
+		 */
+		bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWZ || p->as == AMOVD);
+		for(z=0; z<BITS; z++)
+			r->use1.b[z] |= bit.b[z];
+
+		/*
+		 * right side depends on opcode
+		 */
+		bit = mkvar(&p->to, 0);
+		if(bany(&bit))
+		switch(p->as) {
+		default:
+			diag(Z, "reg: unknown asop: %A", p->as);
+			break;
+
+		/*
+		 * right side write
+		 */
+		case ANOP:
+		case AMOVB:
+		case AMOVBU:
+		case AMOVBZ:
+		case AMOVBZU:
+		case AMOVH:
+		case AMOVHBR:
+		case AMOVWBR:
+		case AMOVHU:
+		case AMOVHZ:
+		case AMOVHZU:
+		case AMOVW:
+		case AMOVWU:
+		case AMOVWZ:
+		case AMOVWZU:
+		case AMOVD:
+		case AMOVDU:
+		case AFMOVD:
+		case AFMOVDCC:
+		case AFMOVDU:
+		case AFMOVS:
+		case AFMOVSU:
+		case AFRSP:
+			for(z=0; z<BITS; z++)
+				r->set.b[z] |= bit.b[z];
+			break;
+
+		/*
+		 * funny
+		 */
+		case ABL:
+			for(z=0; z<BITS; z++)
+				addrs.b[z] |= bit.b[z];
+			break;
+		}
+	}
+	if(firstr == R)
+		return;
+	initpc = pc - val;
+	npc = val;
+
+	/*
+	 * pass 2
+	 * turn branch references to pointers
+	 * build back pointers
+	 */
+	for(r = firstr; r != R; r = r->link) {
+		p = r->prog;
+		if(p->to.type == D_BRANCH) {
+			val = p->to.offset - initpc;
+			r1 = firstr;
+			while(r1 != R) {
+				r2 = r1->log5;
+				if(r2 != R && val >= r2->pc) {
+					r1 = r2;
+					continue;
+				}
+				if(r1->pc == val)
+					break;
+				r1 = r1->link;
+			}
+			if(r1 == R) {
+				nearln = p->lineno;
+				diag(Z, "ref not found\n%P", p);
+				continue;
+			}
+			if(r1 == r) {
+				nearln = p->lineno;
+				diag(Z, "ref to self\n%P", p);
+				continue;
+			}
+			r->s2 = r1;
+			r->p2link = r1->p2;
+			r1->p2 = r;
+		}
+	}
+	if(debug['R']) {
+		p = firstr->prog;
+		print("\n%L %D\n", p->lineno, &p->from);
+	}
+
+	/*
+	 * pass 2.5
+	 * find looping structure
+	 */
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	change = 0;
+	loopit(firstr, npc);
+	if(debug['R'] && debug['v']) {
+		print("\nlooping structure:\n");
+		for(r = firstr; r != R; r = r->link) {
+			print("%ld:%P", r->loop, r->prog);
+			for(z=0; z<BITS; z++)
+				bit.b[z] = r->use1.b[z] |
+					r->use2.b[z] | r->set.b[z];
+			if(bany(&bit)) {
+				print("\t");
+				if(bany(&r->use1))
+					print(" u1=%B", r->use1);
+				if(bany(&r->use2))
+					print(" u2=%B", r->use2);
+				if(bany(&r->set))
+					print(" st=%B", r->set);
+			}
+			print("\n");
+		}
+	}
+
+	/*
+	 * pass 3
+	 * iterate propagating usage
+	 * 	back until flow graph is complete
+	 */
+loop1:
+	change = 0;
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	for(r = firstr; r != R; r = r->link)
+		if(r->prog->as == ARETURN)
+			prop(r, zbits, zbits);
+loop11:
+	/* pick up unreachable code */
+	i = 0;
+	for(r = firstr; r != R; r = r1) {
+		r1 = r->link;
+		if(r1 && r1->active && !r->active) {
+			prop(r, zbits, zbits);
+			i = 1;
+		}
+	}
+	if(i)
+		goto loop11;
+	if(change)
+		goto loop1;
+
+
+	/*
+	 * pass 4
+	 * iterate propagating register/variable synchrony
+	 * 	forward until graph is complete
+	 */
+loop2:
+	change = 0;
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	synch(firstr, zbits);
+	if(change)
+		goto loop2;
+
+
+	/*
+	 * pass 5
+	 * isolate regions
+	 * calculate costs (paint1)
+	 */
+	r = firstr;
+	if(r) {
+		for(z=0; z<BITS; z++)
+			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+		if(bany(&bit)) {
+			nearln = r->prog->lineno;
+			warn(Z, "used and not set: %B", bit);
+			if(debug['R'] && !debug['w'])
+				print("used and not set: %B\n", bit);
+		}
+	}
+	if(debug['R'] && debug['v'])
+		print("\nprop structure:\n");
+	for(r = firstr; r != R; r = r->link)
+		r->act = zbits;
+	rgp = region;
+	nregion = 0;
+	for(r = firstr; r != R; r = r->link) {
+		if(debug['R'] && debug['v'])
+			print("%P\n	set = %B; rah = %B; cal = %B\n",
+				r->prog, r->set, r->refahead, r->calahead);
+		for(z=0; z<BITS; z++)
+			bit.b[z] = r->set.b[z] &
+			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+		if(bany(&bit)) {
+			nearln = r->prog->lineno;
+			warn(Z, "set and not used: %B", bit);
+			if(debug['R'])
+				print("set an not used: %B\n", bit);
+			excise(r);
+		}
+		for(z=0; z<BITS; z++)
+			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+		while(bany(&bit)) {
+			i = bnum(bit);
+			rgp->enter = r;
+			rgp->varno = i;
+			change = 0;
+			if(debug['R'] && debug['v'])
+				print("\n");
+			paint1(r, i);
+			bit.b[i/32] &= ~(1L<<(i%32));
+			if(change <= 0) {
+				if(debug['R'])
+					print("%L$%d: %B\n",
+						r->prog->lineno, change, blsh(i));
+				continue;
+			}
+			rgp->cost = change;
+			nregion++;
+			if(nregion >= NRGN) {
+				warn(Z, "too many regions");
+				goto brk;
+			}
+			rgp++;
+		}
+	}
+brk:
+	qsort(region, nregion, sizeof(region[0]), rcmp);
+
+	/*
+	 * pass 6
+	 * determine used registers (paint2)
+	 * replace code (paint3)
+	 */
+	rgp = region;
+	for(i=0; i<nregion; i++) {
+		bit = blsh(rgp->varno);
+		vreg = paint2(rgp->enter, rgp->varno);
+		vreg = allreg(vreg, rgp);
+		if(debug['R']) {
+			if(rgp->regno >= NREG)
+				print("%L$%d F%d: %B\n",
+					rgp->enter->prog->lineno,
+					rgp->cost,
+					rgp->regno-NREG,
+					bit);
+			else
+				print("%L$%d R%d: %B\n",
+					rgp->enter->prog->lineno,
+					rgp->cost,
+					rgp->regno,
+					bit);
+		}
+		if(rgp->regno != 0)
+			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+		rgp++;
+	}
+	/*
+	 * pass 7
+	 * peep-hole on basic block
+	 */
+	if(!debug['R'] || debug['P'])
+		peep();
+
+	/*
+	 * pass 8
+	 * recalculate pc
+	 */
+	val = initpc;
+	for(r = firstr; r != R; r = r1) {
+		r->pc = val;
+		p = r->prog;
+		p1 = P;
+		r1 = r->link;
+		if(r1 != R)
+			p1 = r1->prog;
+		for(; p != p1; p = p->link) {
+			switch(p->as) {
+			default:
+				val++;
+				break;
+
+			case ANOP:
+			case ADATA:
+			case AGLOBL:
+			case ANAME:
+			case ASIGNAME:
+				break;
+			}
+		}
+	}
+	pc = val;
+
+	/*
+	 * fix up branches
+	 */
+	if(debug['R'])
+		if(bany(&addrs))
+			print("addrs: %B\n", addrs);
+
+	r1 = 0; /* set */
+	for(r = firstr; r != R; r = r->link) {
+		p = r->prog;
+		if(p->to.type == D_BRANCH)
+			p->to.offset = r->s2->pc;
+		r1 = r;
+	}
+
+	/*
+	 * last pass
+	 * eliminate nops
+	 * free aux structures
+	 */
+	for(p = firstr->prog; p != P; p = p->link){
+		while(p->link && p->link->as == ANOP)
+			p->link = p->link->link;
+	}
+	if(r1 != R) {
+		r1->link = freer;
+		freer = firstr;
+	}
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+	Prog *p, *p1;
+	Adr *a;
+	Var *v;
+
+	p1 = alloc(sizeof(*p1));
+	*p1 = zprog;
+	p = r->prog;
+
+	p1->link = p->link;
+	p->link = p1;
+	p1->lineno = p->lineno;
+
+	v = var + bn;
+
+	a = &p1->to;
+	a->sym = v->sym;
+	a->name = v->name;
+	a->offset = v->offset;
+	a->etype = v->etype;
+	a->type = D_OREG;
+	if(a->etype == TARRAY || a->sym == S)
+		a->type = D_CONST;
+
+	p1->as = AMOVW;
+	if(v->etype == TCHAR || v->etype == TUCHAR)
+		p1->as = AMOVB;
+	if(v->etype == TSHORT || v->etype == TUSHORT)
+		p1->as = AMOVH;
+	if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
+		p1->as = AMOVD;
+	if(v->etype == TFLOAT)
+		p1->as = AFMOVS;
+	if(v->etype == TDOUBLE)
+		p1->as = AFMOVD;
+
+	p1->from.type = D_REG;
+	p1->from.reg = rn;
+	if(rn >= NREG) {
+		p1->from.type = D_FREG;
+		p1->from.reg = rn-NREG;
+	}
+	if(!f) {
+		p1->from = *a;
+		*a = zprog.from;
+		a->type = D_REG;
+		a->reg = rn;
+		if(rn >= NREG) {
+			a->type = D_FREG;
+			a->reg = rn-NREG;
+		}
+		if(v->etype == TUCHAR)
+			p1->as = AMOVBZ;
+		if(v->etype == TUSHORT)
+			p1->as = AMOVHZ;
+		if(v->etype == TUINT || v->etype == TULONG)
+			p1->as = AMOVWZ;
+	}
+	if(debug['R'])
+		print("%P\t.a%P\n", p, p1);
+}
+
+Bits
+mkvar(Adr *a, int docon)
+{
+	Var *v;
+	int i, t, n, et, z;
+	long o;
+	Bits bit;
+	Sym *s;
+
+	t = a->type;
+	if(t == D_REG && a->reg != NREG)
+		regbits |= RtoB(a->reg);
+	if(t == D_FREG && a->reg != NREG)
+		regbits |= FtoB(a->reg);
+	s = a->sym;
+	o = a->offset;
+	et = a->etype;
+	if(s == S) {
+		if(t != D_CONST || !docon || a->reg != NREG)
+			goto none;
+		et = TLONG;
+	}
+	if(t == D_CONST) {
+		if(s == S && sval(o))
+			goto none;
+	}
+	n = a->name;
+	v = var;
+	for(i=0; i<nvar; i++) {
+		if(s == v->sym)
+		if(n == v->name)
+		if(o == v->offset)
+			goto out;
+		v++;
+	}
+	if(s)
+		if(s->name[0] == '.')
+			goto none;
+	if(nvar >= NVAR) {
+		if(debug['w'] > 1 && s)
+			warn(Z, "variable not optimized: %s", s->name);
+		goto none;
+	}
+	i = nvar;
+	nvar++;
+	v = &var[i];
+	v->sym = s;
+	v->offset = o;
+	v->etype = et;
+	v->name = n;
+	if(debug['R'])
+		print("bit=%2d et=%2d %D\n", i, et, a);
+out:
+	bit = blsh(i);
+	if(n == D_EXTERN || n == D_STATIC)
+		for(z=0; z<BITS; z++)
+			externs.b[z] |= bit.b[z];
+	if(n == D_PARAM)
+		for(z=0; z<BITS; z++)
+			params.b[z] |= bit.b[z];
+	if(v->etype != et || !(typechlpfd[et] || typev[et]))	/* funny punning */
+		for(z=0; z<BITS; z++)
+			addrs.b[z] |= bit.b[z];
+	if(t == D_CONST) {
+		if(s == S) {
+			for(z=0; z<BITS; z++)
+				consts.b[z] |= bit.b[z];
+			return bit;
+		}
+		if(et != TARRAY)
+			for(z=0; z<BITS; z++)
+				addrs.b[z] |= bit.b[z];
+		for(z=0; z<BITS; z++)
+			params.b[z] |= bit.b[z];
+		return bit;
+	}
+	if(t == D_OREG)
+		return bit;
+
+none:
+	return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+	Reg *r1, *r2;
+	int z;
+
+	for(r1 = r; r1 != R; r1 = r1->p1) {
+		for(z=0; z<BITS; z++) {
+			ref.b[z] |= r1->refahead.b[z];
+			if(ref.b[z] != r1->refahead.b[z]) {
+				r1->refahead.b[z] = ref.b[z];
+				change++;
+			}
+			cal.b[z] |= r1->calahead.b[z];
+			if(cal.b[z] != r1->calahead.b[z]) {
+				r1->calahead.b[z] = cal.b[z];
+				change++;
+			}
+		}
+		switch(r1->prog->as) {
+		case ABL:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] |= ref.b[z] | externs.b[z];
+				ref.b[z] = 0;
+			}
+			break;
+
+		case ATEXT:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] = 0;
+				ref.b[z] = 0;
+			}
+			break;
+
+		case ARETURN:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] = externs.b[z];
+				ref.b[z] = 0;
+			}
+		}
+		for(z=0; z<BITS; z++) {
+			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+				r1->use1.b[z] | r1->use2.b[z];
+			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+			r1->refbehind.b[z] = ref.b[z];
+			r1->calbehind.b[z] = cal.b[z];
+		}
+		if(r1->active)
+			break;
+		r1->active = 1;
+	}
+	for(; r != r1; r = r->p1)
+		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
+			prop(r2, r->refbehind, r->calbehind);
+}
+
+/*
+ * find looping structure
+ *
+ * 1) find reverse postordering
+ * 2) find approximate dominators,
+ *	the actual dominators if the flow graph is reducible
+ *	otherwise, dominators plus some other non-dominators.
+ *	See Matthew S. Hecht and Jeffrey D. Ullman,
+ *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
+ *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
+ *	Oct. 1-3, 1973, pp.  207-217.
+ * 3) find all nodes with a predecessor dominated by the current node.
+ *	such a node is a loop head.
+ *	recursively, all preds with a greater rpo number are in the loop
+ */
+long
+postorder(Reg *r, Reg **rpo2r, long n)
+{
+	Reg *r1;
+
+	r->rpo = 1;
+	r1 = r->s1;
+	if(r1 && !r1->rpo)
+		n = postorder(r1, rpo2r, n);
+	r1 = r->s2;
+	if(r1 && !r1->rpo)
+		n = postorder(r1, rpo2r, n);
+	rpo2r[n] = r;
+	n++;
+	return n;
+}
+
+long
+rpolca(long *idom, long rpo1, long rpo2)
+{
+	long t;
+
+	if(rpo1 == -1)
+		return rpo2;
+	while(rpo1 != rpo2){
+		if(rpo1 > rpo2){
+			t = rpo2;
+			rpo2 = rpo1;
+			rpo1 = t;
+		}
+		while(rpo1 < rpo2){
+			t = idom[rpo2];
+			if(t >= rpo2)
+				fatal(Z, "bad idom");
+			rpo2 = t;
+		}
+	}
+	return rpo1;
+}
+
+int
+doms(long *idom, long r, long s)
+{
+	while(s > r)
+		s = idom[s];
+	return s == r;
+}
+
+int
+loophead(long *idom, Reg *r)
+{
+	long src;
+
+	src = r->rpo;
+	if(r->p1 != R && doms(idom, src, r->p1->rpo))
+		return 1;
+	for(r = r->p2; r != R; r = r->p2link)
+		if(doms(idom, src, r->rpo))
+			return 1;
+	return 0;
+}
+
+void
+loopmark(Reg **rpo2r, long head, Reg *r)
+{
+	if(r->rpo < head || r->active == head)
+		return;
+	r->active = head;
+	r->loop += LOOP;
+	if(r->p1 != R)
+		loopmark(rpo2r, head, r->p1);
+	for(r = r->p2; r != R; r = r->p2link)
+		loopmark(rpo2r, head, r);
+}
+
+void
+loopit(Reg *r, long nr)
+{
+	Reg *r1;
+	long i, d, me;
+
+	if(nr > maxnr) {
+		rpo2r = alloc(nr * sizeof(Reg*));
+		idom = alloc(nr * sizeof(long));
+		maxnr = nr;
+	}
+
+	d = postorder(r, rpo2r, 0);
+	if(d > nr)
+		fatal(Z, "too many reg nodes");
+	nr = d;
+	for(i = 0; i < nr / 2; i++){
+		r1 = rpo2r[i];
+		rpo2r[i] = rpo2r[nr - 1 - i];
+		rpo2r[nr - 1 - i] = r1;
+	}
+	for(i = 0; i < nr; i++)
+		rpo2r[i]->rpo = i;
+
+	idom[0] = 0;
+	for(i = 0; i < nr; i++){
+		r1 = rpo2r[i];
+		me = r1->rpo;
+		d = -1;
+		if(r1->p1 != R && r1->p1->rpo < me)
+			d = r1->p1->rpo;
+		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
+			if(r1->rpo < me)
+				d = rpolca(idom, d, r1->rpo);
+		idom[i] = d;
+	}
+
+	for(i = 0; i < nr; i++){
+		r1 = rpo2r[i];
+		r1->loop++;
+		if(r1->p2 != R && loophead(idom, r1))
+			loopmark(rpo2r, i, r1);
+	}
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+	Reg *r1;
+	int z;
+
+	for(r1 = r; r1 != R; r1 = r1->s1) {
+		for(z=0; z<BITS; z++) {
+			dif.b[z] = (dif.b[z] &
+				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+					r1->set.b[z] | r1->regdiff.b[z];
+			if(dif.b[z] != r1->regdiff.b[z]) {
+				r1->regdiff.b[z] = dif.b[z];
+				change++;
+			}
+		}
+		if(r1->active)
+			break;
+		r1->active = 1;
+		for(z=0; z<BITS; z++)
+			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+		if(r1->s2 != R)
+			synch(r1->s2, dif);
+	}
+}
+
+ulong
+allreg(ulong b, Rgn *r)
+{
+	Var *v;
+	int i;
+
+	v = var + r->varno;
+	r->regno = 0;
+	switch(v->etype) {
+
+	default:
+		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
+		break;
+
+	case TCHAR:
+	case TUCHAR:
+	case TSHORT:
+	case TUSHORT:
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+	case TIND:
+	case TVLONG:
+	case TUVLONG:
+	case TARRAY:
+		i = BtoR(~b);
+		if(i && r->cost > 0) {
+			r->regno = i;
+			return RtoB(i);
+		}
+		break;
+
+	case TDOUBLE:
+	case TFLOAT:
+		i = BtoF(~b);
+		if(i && r->cost > 0) {
+			r->regno = i+NREG;
+			return FtoB(i);
+		}
+		break;
+	}
+	return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+	Reg *r1;
+	Prog *p;
+	int z;
+	ulong bb;
+
+	z = bn/32;
+	bb = 1L<<(bn%32);
+	if(r->act.b[z] & bb)
+		return;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(r1->act.b[z] & bb)
+			break;
+		r = r1;
+	}
+
+	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+		change -= CLOAD * r->loop;
+		if(debug['R'] && debug['v'])
+			print("%ld%P\tld %B $%d\n", r->loop,
+				r->prog, blsh(bn), change);
+	}
+	for(;;) {
+		r->act.b[z] |= bb;
+		p = r->prog;
+
+		if(r->use1.b[z] & bb) {
+			change += CREF * r->loop;
+			if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
+				change = -CINF;		/* cant go Rreg to Freg */
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tu1 %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if((r->use2.b[z]|r->set.b[z]) & bb) {
+			change += CREF * r->loop;
+			if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
+				change = -CINF;		/* cant go Rreg to Freg */
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tu2 %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if(STORE(r) & r->regdiff.b[z] & bb) {
+			change -= CLOAD * r->loop;
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tst %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					paint1(r1, bn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				paint1(r1, bn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(r->act.b[z] & bb)
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+}
+
+ulong
+paint2(Reg *r, int bn)
+{
+	Reg *r1;
+	int z;
+	ulong bb, vreg;
+
+	z = bn/32;
+	bb = 1L << (bn%32);
+	vreg = regbits;
+	if(!(r->act.b[z] & bb))
+		return vreg;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(!(r1->act.b[z] & bb))
+			break;
+		r = r1;
+	}
+	for(;;) {
+		r->act.b[z] &= ~bb;
+
+		vreg |= r->regu;
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					vreg |= paint2(r1, bn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				vreg |= paint2(r1, bn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(!(r->act.b[z] & bb))
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+	return vreg;
+}
+
+void
+paint3(Reg *r, int bn, long rb, int rn)
+{
+	Reg *r1;
+	Prog *p;
+	int z;
+	ulong bb;
+
+	z = bn/32;
+	bb = 1L << (bn%32);
+	if(r->act.b[z] & bb)
+		return;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(r1->act.b[z] & bb)
+			break;
+		r = r1;
+	}
+
+	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+		addmove(r, bn, rn, 0);
+	for(;;) {
+		r->act.b[z] |= bb;
+		p = r->prog;
+
+		if(r->use1.b[z] & bb) {
+			if(debug['R'])
+				print("%P", p);
+			addreg(&p->from, rn);
+			if(debug['R'])
+				print("\t.c%P\n", p);
+		}
+		if((r->use2.b[z]|r->set.b[z]) & bb) {
+			if(debug['R'])
+				print("%P", p);
+			addreg(&p->to, rn);
+			if(debug['R'])
+				print("\t.c%P\n", p);
+		}
+
+		if(STORE(r) & r->regdiff.b[z] & bb)
+			addmove(r, bn, rn, 1);
+		r->regu |= rb;
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					paint3(r1, bn, rb, rn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				paint3(r1, bn, rb, rn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(r->act.b[z] & bb)
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+}
+
+void
+addreg(Adr *a, int rn)
+{
+
+	a->sym = 0;
+	a->name = D_NONE;
+	a->type = D_REG;
+	a->reg = rn;
+	if(rn >= NREG) {
+		a->type = D_FREG;
+		a->reg = rn-NREG;
+	}
+}
+
+/*
+ * track register variables including external registers:
+ *	bit	reg
+ *	0	R7
+ *	1	R8
+ *	...	...
+ *	21	R28
+ */
+long
+RtoB(int r)
+{
+
+	if(r >= REGMIN && r <= REGMAX)
+		return 1L << (r-REGMIN);
+	return 0;
+}
+
+int
+BtoR(long b)
+{
+	b &= 0x001fffffL;
+	if(b == 0)
+		return 0;
+	return bitno(b) + REGMIN;
+}
+
+/*
+ *	bit	reg
+ *	22	F17
+ *	23	F18
+ *	...	...
+ *	31	F26
+ */
+long
+FtoB(int f)
+{
+	if(f < FREGMIN || f > FREGEXT)
+		return 0;
+	return 1L << (f - FREGMIN + 22);
+}
+
+int
+BtoF(long b)
+{
+
+	b &= 0xffc00000L;
+	if(b == 0)
+		return 0;
+	return bitno(b) - 22 + FREGMIN;
+}

+ 248 - 0
sys/src/cmd/9c/sgen.c

@@ -0,0 +1,248 @@
+#include "gc.h"
+
+void
+noretval(int n)
+{
+
+	if(n & 1) {
+		gins(ANOP, Z, Z);
+		p->to.type = D_REG;
+		p->to.reg = REGRET;
+	}
+	if(n & 2) {
+		gins(ANOP, Z, Z);
+		p->to.type = D_FREG;
+		p->to.reg = FREGRET;
+	}
+}
+
+/*
+ *	calculate addressability as follows
+ *		CONST ==> 20		$value
+ *		NAME ==> 10		name
+ *		REGISTER ==> 11		register
+ *		INDREG ==> 12		*[(reg)+offset]
+ *		&10 ==> 2		$name
+ *		ADD(2, 20) ==> 2	$name+offset
+ *		ADD(3, 20) ==> 3	$(reg)+offset
+ *		&12 ==> 3		$(reg)+offset
+ *		*11 ==> 11		??
+ *		*2 ==> 10		name
+ *		*3 ==> 12		*(reg)+offset
+ *	calculate complexity (number of registers)
+ */
+void
+xcom(Node *n)
+{
+	Node *l, *r;
+	int v;
+
+	if(n == Z)
+		return;
+	l = n->left;
+	r = n->right;
+	n->addable = 0;
+	n->complex = 0;
+	switch(n->op) {
+	case OCONST:
+		n->addable = 20;
+		return;
+
+	case OREGISTER:
+		n->addable = 11;
+		return;
+
+	case OINDREG:
+		n->addable = 12;
+		return;
+
+	case ONAME:
+		n->addable = 10;
+		return;
+
+	case OADDR:
+		xcom(l);
+		if(l->addable == 10)
+			n->addable = 2;
+		if(l->addable == 12)
+			n->addable = 3;
+		break;
+
+	case OIND:
+		xcom(l);
+		if(l->addable == 11)
+			n->addable = 12;
+		if(l->addable == 3)
+			n->addable = 12;
+		if(l->addable == 2)
+			n->addable = 10;
+		break;
+
+	case OADD:
+		xcom(l);
+		xcom(r);
+		if(l->addable == 20) {
+			if(r->addable == 2)
+				n->addable = 2;
+			if(r->addable == 3)
+				n->addable = 3;
+		}
+		if(r->addable == 20) {
+			if(l->addable == 2)
+				n->addable = 2;
+			if(l->addable == 3)
+				n->addable = 3;
+		}
+		break;
+
+	case OASMUL:
+	case OASLMUL:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OASASHL;
+			r->vconst = v;
+			r->type = types[TINT];
+		}
+		break;
+
+	case OMUL:
+	case OLMUL:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OASHL;
+			r->vconst = v;
+			r->type = types[TINT];
+		}
+		v = vlog(l);
+		if(v >= 0) {
+			n->op = OASHL;
+			n->left = r;
+			n->right = l;
+			r = l;
+			l = n->left;
+			r->vconst = v;
+			r->type = types[TINT];
+			simplifyshift(n);
+		}
+		break;
+
+	case OASLDIV:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OASLSHR;
+			r->vconst = v;
+			r->type = types[TINT];
+		}
+		break;
+
+	case OLDIV:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OLSHR;
+			r->vconst = v;
+			r->type = types[TINT];
+			simplifyshift(n);
+		}
+		break;
+
+	case OASLMOD:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OASAND;
+			r->vconst--;
+		}
+		break;
+
+	case OLMOD:
+		xcom(l);
+		xcom(r);
+		v = vlog(r);
+		if(v >= 0) {
+			n->op = OAND;
+			r->vconst--;
+		}
+		break;
+
+	case OLSHR:
+	case OASHL:
+	case OASHR:
+		xcom(l);
+		xcom(r);
+		simplifyshift(n);
+		break;
+
+	default:
+		if(l != Z)
+			xcom(l);
+		if(r != Z)
+			xcom(r);
+		break;
+	}
+	if(n->addable >= 10)
+		return;
+	if(l != Z)
+		n->complex = l->complex;
+	if(r != Z) {
+		if(r->complex == n->complex)
+			n->complex = r->complex+1;
+		else
+		if(r->complex > n->complex)
+			n->complex = r->complex;
+	}
+	if(n->complex == 0)
+		n->complex++;
+
+//	if(com64(n))
+//		return;
+
+	switch(n->op) {
+
+	case OFUNC:
+		n->complex = FNX;
+		break;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		/*
+		 * immediate operators, make const on right
+		 */
+		if(l->op == OCONST) {
+			n->left = r;
+			n->right = l;
+			n->op = invrel[relindex(n->op)];
+		}
+		break;
+
+	case OADD:
+	case OXOR:
+	case OAND:
+	case OOR:
+		/*
+		 * immediate operators, make const on right
+		 */
+		if(l->op == OCONST) {
+			n->left = r;
+			n->right = l;
+		}
+		break;
+	}
+}
+

+ 628 - 0
sys/src/cmd/9c/swt.c

@@ -0,0 +1,628 @@
+#include "gc.h"
+
+void
+swit1(C1 *q, int nc, long def, Node *n)
+{
+	Node tn;
+	
+	regalloc(&tn, &regnode, Z);
+	swit2(q, nc, def, n, &tn);
+	regfree(&tn);
+}
+
+void
+swit2(C1 *q, int nc, long def, Node *n, Node *tn)
+{
+	C1 *r;
+	int i;
+	Prog *sp;
+
+	if(nc < 5) {
+		for(i=0; i<nc; i++) {
+			if(sval(q->val)) {
+				gopcode(OEQ, n, Z, nodconst(q->val));
+			} else {
+				gopcode(OSUB, nodconst(q->val), n, tn);
+				gopcode(OEQ, tn, Z, nodconst(0));
+			}
+			patch(p, q->label);
+			q++;
+		}
+		gbranch(OGOTO);
+		patch(p, def);
+		return;
+	}
+	i = nc / 2;
+	r = q+i;
+	if(sval(r->val)) {
+		gopcode(OGT, n, Z, nodconst(r->val));
+		sp = p;
+	} else {
+		gopcode(OSUB, nodconst(r->val), n, tn);
+		gopcode(OGT, tn, Z, nodconst(0));
+		sp = p;
+	}
+	gbranch(OGOTO);
+	p->as = ABEQ;
+	patch(p, r->label);
+	swit2(q, i, def, n, tn);
+
+	patch(sp, pc);
+	swit2(r+1, nc-i-1, def, n, tn);
+}
+
+void
+bitload(Node *b, Node *n1, Node *n2, Node *n3, Node *nn)
+{
+	int sh;
+	long v;
+	Node *l;
+
+	/*
+	 * n1 gets adjusted/masked value
+	 * n2 gets address of cell
+	 * n3 gets contents of cell
+	 */
+	l = b->left;
+	if(n2 != Z) {
+		regalloc(n1, l, nn);
+		reglcgen(n2, l, Z);
+		regalloc(n3, l, Z);
+		gopcode(OAS, n2, Z, n3);
+		gopcode(OAS, n3, Z, n1);
+	} else {
+		regalloc(n1, l, nn);
+		cgen(l, n1);
+	}
+	if(b->type->shift == 0 && typeu[b->type->etype]) {
+		v = ~0 + (1L << b->type->nbits);
+		gopcode(OAND, nodconst(v), Z, n1);
+	} else {
+		sh = 32 - b->type->shift - b->type->nbits;
+		if(sh > 0)
+			gopcode(OASHL, nodconst(sh), Z, n1);
+		sh += b->type->shift;
+		if(sh > 0)
+			if(typeu[b->type->etype])
+				gopcode(OLSHR, nodconst(sh), Z, n1);
+			else
+				gopcode(OASHR, nodconst(sh), Z, n1);
+	}
+}
+
+void
+bitstore(Node *b, Node *n1, Node *n2, Node *n3, Node *nn)
+{
+	long v;
+	Node nod, *l;
+	int sh;
+
+	/*
+	 * n1 has adjusted/masked value
+	 * n2 has address of cell
+	 * n3 has contents of cell
+	 */
+	l = b->left;
+	regalloc(&nod, l, Z);
+	v = ~0 + (1L << b->type->nbits);
+	gopcode(OAND, nodconst(v), Z, n1);
+	gopcode(OAS, n1, Z, &nod);
+	if(nn != Z)
+		gopcode(OAS, n1, Z, nn);
+	sh = b->type->shift;
+	if(sh > 0)
+		gopcode(OASHL, nodconst(sh), Z, &nod);
+	v <<= sh;
+	gopcode(OAND, nodconst(~v), Z, n3);
+	gopcode(OOR, n3, Z, &nod);
+	gopcode(OAS, &nod, Z, n2);
+
+	regfree(&nod);
+	regfree(n1);
+	regfree(n2);
+	regfree(n3);
+}
+
+long
+outstring(char *s, long n)
+{
+	long r;
+
+	if(suppress)
+		return nstring;
+	r = nstring;
+	while(n) {
+		string[mnstring] = *s++;
+		mnstring++;
+		nstring++;
+		if(mnstring >= NSNAME) {
+			gpseudo(ADATA, symstring, nodconst(0L));
+			p->from.offset += nstring - NSNAME;
+			p->reg = NSNAME;
+			p->to.type = D_SCONST;
+			memmove(p->to.sval, string, NSNAME);
+			mnstring = 0;
+		}
+		n--;
+	}
+	return r;
+}
+
+int
+mulcon(Node *n, Node *nn)
+{
+	Node *l, *r, nod1, nod2;
+	Multab *m;
+	long v;
+	int o;
+	char code[sizeof(m->code)+2], *p;
+
+	if(typefd[n->type->etype])
+		return 0;
+	l = n->left;
+	r = n->right;
+	if(l->op == OCONST) {
+		l = r;
+		r = n->left;
+	}
+	if(r->op != OCONST)
+		return 0;
+	v = convvtox(r->vconst, n->type->etype);
+	if(v != r->vconst) {
+		if(debug['M'])
+			print("%L multiply conv: %lld\n", n->lineno, r->vconst);
+		return 0;
+	}
+	m = mulcon0(n, v);
+	if(!m) {
+		if(debug['M'])
+			print("%L multiply table: %lld\n", n->lineno, r->vconst);
+		return 0;
+	}
+
+	memmove(code, m->code, sizeof(m->code));
+	code[sizeof(m->code)] = 0;
+
+	p = code;
+	if(p[1] == 'i')
+		p += 2;
+	regalloc(&nod1, n, nn);
+	cgen(l, &nod1);
+	if(v < 0)
+		gopcode(ONEG, &nod1, Z, &nod1);
+	regalloc(&nod2, n, Z);
+
+loop:
+	switch(*p) {
+	case 0:
+		regfree(&nod2);
+		gopcode(OAS, &nod1, Z, nn);
+		regfree(&nod1);
+		return 1;
+	case '+':
+		o = OADD;
+		goto addsub;
+	case '-':
+		o = OSUB;
+	addsub:	/* number is r,n,l */
+		v = p[1] - '0';
+		r = &nod1;
+		if(v&4)
+			r = &nod2;
+		n = &nod1;
+		if(v&2)
+			n = &nod2;
+		l = &nod1;
+		if(v&1)
+			l = &nod2;
+		gopcode(o, l, n, r);
+		break;
+	default: /* op is shiftcount, number is r,l */
+		v = p[1] - '0';
+		r = &nod1;
+		if(v&2)
+			r = &nod2;
+		l = &nod1;
+		if(v&1)
+			l = &nod2;
+		v = *p - 'a';
+		if(v < 0 || v >= 32) {
+			diag(n, "mulcon unknown op: %c%c", p[0], p[1]);
+			break;
+		}
+		gopcode(OASHL, nodconst(v), l, r);
+		break;
+	}
+	p += 2;
+	goto loop;
+}
+
+void
+sextern(Sym *s, Node *a, long o, long w)
+{
+	long e, lw;
+
+	for(e=0; e<w; e+=NSNAME) {
+		lw = NSNAME;
+		if(w-e < lw)
+			lw = w-e;
+		gpseudo(ADATA, s, nodconst(0));
+		p->from.offset += o+e;
+		p->reg = lw;
+		p->to.type = D_SCONST;
+		memmove(p->to.sval, a->cstring+e, lw);
+	}
+}
+
+void
+gextern(Sym *s, Node *a, long o, long w)
+{
+	if(a->op == OCONST && typev[a->type->etype]) {
+		if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+			gpseudo(ADATA, s, nod32const(a->vconst>>32));
+		else
+			gpseudo(ADATA, s, nod32const(a->vconst));
+		p->from.offset += o;
+		p->reg = 4;
+		if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+			gpseudo(ADATA, s, nod32const(a->vconst));
+		else
+			gpseudo(ADATA, s, nod32const(a->vconst>>32));
+		p->from.offset += o + 4;
+		p->reg = 4;
+		return;
+	}
+	gpseudo(ADATA, s, a);
+	p->from.offset += o;
+	p->reg = w;
+	if(p->to.type == D_OREG)
+		p->to.type = D_CONST;
+}
+
+void	zname(Biobuf*, Sym*, int);
+char*	zaddr(char*, Adr*, int);
+void	zwrite(Biobuf*, Prog*, int, int);
+void	outhist(Biobuf*);
+
+void
+outcode(void)
+{
+	struct { Sym *sym; short type; } h[NSYM];
+	Prog *p;
+	Sym *s;
+	int sf, st, t, sym;
+
+	if(debug['S']) {
+		for(p = firstp; p != P; p = p->link)
+			if(p->as != ADATA && p->as != AGLOBL)
+				pc--;
+		for(p = firstp; p != P; p = p->link) {
+			print("%P\n", p);
+			if(p->as != ADATA && p->as != AGLOBL)
+				pc++;
+		}
+	}
+	outhist(&outbuf);
+	for(sym=0; sym<NSYM; sym++) {
+		h[sym].sym = S;
+		h[sym].type = 0;
+	}
+	sym = 1;
+	for(p = firstp; p != P; p = p->link) {
+	jackpot:
+		sf = 0;
+		s = p->from.sym;
+		while(s != S) {
+			sf = s->sym;
+			if(sf < 0 || sf >= NSYM)
+				sf = 0;
+			t = p->from.name;
+			if(h[sf].type == t)
+			if(h[sf].sym == s)
+				break;
+			s->sym = sym;
+			zname(&outbuf, s, t);
+			h[sym].sym = s;
+			h[sym].type = t;
+			sf = sym;
+			sym++;
+			if(sym >= NSYM)
+				sym = 1;
+			break;
+		}
+		st = 0;
+		s = p->to.sym;
+		while(s != S) {
+			st = s->sym;
+			if(st < 0 || st >= NSYM)
+				st = 0;
+			t = p->to.name;
+			if(h[st].type == t)
+			if(h[st].sym == s)
+				break;
+			s->sym = sym;
+			zname(&outbuf, s, t);
+			h[sym].sym = s;
+			h[sym].type = t;
+			st = sym;
+			sym++;
+			if(sym >= NSYM)
+				sym = 1;
+			if(st == sf)
+				goto jackpot;
+			break;
+		}
+		zwrite(&outbuf, p, sf, st);
+	}
+	firstp = P;
+	lastp = P;
+}
+
+void
+zwrite(Biobuf *b, Prog *p, int sf, int st)
+{
+	char bf[100], *bp;
+	long l;
+
+	bf[0] = p->as;
+	bf[1] = p->as>>8;
+	bf[2] = p->reg;
+	l = p->lineno;
+	bf[3] = l;
+	bf[4] = l>>8;
+	bf[5] = l>>16;
+	bf[6] = l>>24;
+	bp = zaddr(bf+7, &p->from, sf);
+	bp = zaddr(bp, &p->to, st);
+	Bwrite(b, bf, bp-bf);
+}
+
+void
+outhist(Biobuf *b)
+{
+	Hist *h;
+	char *p, *q, *op, c;
+	Prog pg;
+	int n;
+
+	pg = zprog;
+	pg.as = AHISTORY;
+	c = pathchar();
+	for(h = hist; h != H; h = h->link) {
+		p = h->name;
+		op = 0;
+		/* on windows skip drive specifier in pathname */
+		if(systemtype(Windows) && p && p[1] == ':'){
+			p += 2;
+			c = *p;
+		}
+		if(p && p[0] != c && h->offset == 0 && pathname){
+			/* on windows skip drive specifier in pathname */
+			if(systemtype(Windows) && pathname[1] == ':') {
+				op = p;
+				p = pathname+2;
+				c = *p;
+			} else if(pathname[0] == c){
+				op = p;
+				p = pathname;
+			}
+		}
+		while(p) {
+			q = utfrune(p, c);
+			if(q) {
+				n = q-p;
+				if(n == 0){
+					n = 1;	/* leading "/" */
+					*p = '/';	/* don't emit "\" on windows */
+				}
+				q++;
+			} else {
+				n = strlen(p);
+				q = 0;
+			}
+			if(n) {
+				Bputc(b, ANAME);
+				Bputc(b, ANAME>>8);
+				Bputc(b, D_FILE);
+				Bputc(b, 1);
+				Bputc(b, '<');
+				Bwrite(b, p, n);
+				Bputc(b, 0);
+			}
+			p = q;
+			if(p == 0 && op) {
+				p = op;
+				op = 0;
+			}
+		}
+		pg.lineno = h->line;
+		pg.to.type = zprog.to.type;
+		pg.to.offset = h->offset;
+		if(h->offset)
+			pg.to.type = D_CONST;
+
+		zwrite(b, &pg, 0, 0);
+	}
+}
+
+void
+zname(Biobuf *b, Sym *s, int t)
+{
+	char *n, bf[8];
+	ulong sig;
+
+	n = s->name;
+	if(debug['T'] && t == D_EXTERN && s->sig != SIGDONE && s->type != types[TENUM] && s != symrathole){
+		sig = sign(s);
+		bf[0] = ASIGNAME;
+		bf[1] = ASIGNAME>>8;
+		bf[2] = sig;
+		bf[3] = sig>>8;
+		bf[4] = sig>>16;
+		bf[5] = sig>>24;
+		bf[6] = t;
+		bf[7] = s->sym;
+		Bwrite(b, bf, 8);
+		s->sig = SIGDONE;
+	}
+	else{
+		bf[0] = ANAME;
+		bf[1] = ANAME>>8;
+		bf[2] = t;	/* type */
+		bf[3] = s->sym;	/* sym */
+		Bwrite(b, bf, 4);
+	}
+	Bwrite(b, n, strlen(n)+1);
+}
+
+char*
+zaddr(char *bp, Adr *a, int s)
+{
+	long l;
+	Ieee e;
+
+	if(a->type == D_CONST){
+		l = a->offset;
+		if((vlong)l != a->offset)
+			a->type = D_DCONST;
+	}
+	bp[0] = a->type;
+	bp[1] = a->reg;
+	bp[2] = s;
+	bp[3] = a->name;
+	bp += 4;
+	switch(a->type) {
+	default:
+		diag(Z, "unknown type %d in zaddr", a->type);
+
+	case D_NONE:
+	case D_REG:
+	case D_FREG:
+	case D_CREG:
+		break;
+
+	case D_OREG:
+	case D_CONST:
+	case D_BRANCH:
+		l = a->offset;
+		bp[0] = l;
+		bp[1] = l>>8;
+		bp[2] = l>>16;
+		bp[3] = l>>24;
+		bp += 4;
+		break;
+
+	case D_DCONST:
+		l = a->offset;
+		bp[0] = l;
+		bp[1] = l>>8;
+		bp[2] = l>>16;
+		bp[3] = l>>24;
+		bp += 4;
+		l = a->offset>>32;
+		bp[0] = l;
+		bp[1] = l>>8;
+		bp[2] = l>>16;
+		bp[3] = l>>24;
+		bp += 4;
+		break;
+
+	case D_SCONST:
+		memmove(bp, a->sval, NSNAME);
+		bp += NSNAME;
+		break;
+
+	case D_FCONST:
+		ieeedtod(&e, a->dval);
+		l = e.l;
+		bp[0] = l;
+		bp[1] = l>>8;
+		bp[2] = l>>16;
+		bp[3] = l>>24;
+		bp += 4;
+		l = e.h;
+		bp[0] = l;
+		bp[1] = l>>8;
+		bp[2] = l>>16;
+		bp[3] = l>>24;
+		bp += 4;
+		break;
+	}
+	return bp;
+}
+
+long
+align(long i, Type *t, int op)
+{
+	long o;
+	Type *v;
+	int w;
+
+	o = i;
+	w = 1;
+	switch(op) {
+	default:
+		diag(Z, "unknown align opcode %d", op);
+		break;
+
+	case Asu2:	/* padding at end of a struct */
+		w = SZ_VLONG;
+		if(packflg)
+			w = packflg;
+		break;
+
+	case Ael1:	/* initial allign of struct element */
+		for(v=t; v->etype==TARRAY; v=v->link)
+			;
+		w = ewidth[v->etype];
+		if(w <= 0 || w >= SZ_VLONG)
+			w = SZ_VLONG;
+		if(packflg)
+			w = packflg;
+		break;
+
+	case Ael2:	/* width of a struct element */
+		o += t->width;
+		break;
+
+	case Aarg0:	/* initial passbyptr argument in arg list */
+		if(typesu[t->etype]) {
+			o = align(o, types[TIND], Aarg1);
+			o = align(o, types[TIND], Aarg2);
+		}
+		break;
+
+	case Aarg1:	/* initial align of parameter */
+		w = ewidth[t->etype];
+		if(w <= 0 || w >= SZ_VLONG) {
+			w = SZ_VLONG;
+			break;
+		}
+		o += SZ_VLONG - w;	/* big endian adjustment */
+		w = 1;
+		break;
+
+	case Aarg2:	/* width of a parameter */
+		o += t->width;
+		w = SZ_VLONG;
+		break;
+
+	case Aaut3:	/* total align of automatic */
+		o = align(o, t, Ael1);
+		o = align(o, t, Ael2);
+		break;
+	}
+	o = round(o, w);
+	if(debug['A'])
+		print("align %s %ld %T = %ld\n", bnames[op], i, t, o);
+	return o;
+}
+
+long
+maxround(long max, long v)
+{
+	v = round(v, SZ_VLONG);
+	if(v > max)
+		return v;
+	return max;
+}

+ 1417 - 0
sys/src/cmd/9c/txt.c

@@ -0,0 +1,1417 @@
+#include "gc.h"
+
+static	int	resvreg[nelem(reg)];
+
+#define	isv(et)	((et) == TVLONG || (et) == TUVLONG || (et) == TIND)
+
+void
+ginit(void)
+{
+	Type *t;
+
+	thechar = '9';
+	thestring = "power64";
+	exregoffset = REGEXT;
+	exfregoffset = FREGEXT;
+	listinit();
+	nstring = 0;
+	mnstring = 0;
+	nrathole = 0;
+	pc = 0;
+	breakpc = -1;
+	continpc = -1;
+	cases = C;
+	firstp = P;
+	lastp = P;
+	tfield = types[TLONG];
+
+	typeword = typechlvp;
+	typeswitch = typechlv;
+	typecmplx = typesu;
+	/* TO DO */
+	memmove(typechlpv, typechlp, sizeof(typechlpv));
+	typechlpv[TVLONG] = 1;
+	typechlpv[TUVLONG] = 1;
+
+	zprog.link = P;
+	zprog.as = AGOK;
+	zprog.reg = NREG;
+	zprog.from.type = D_NONE;
+	zprog.from.name = D_NONE;
+	zprog.from.reg = NREG;
+	zprog.to = zprog.from;
+
+	regnode.op = OREGISTER;
+	regnode.class = CEXREG;
+	regnode.reg = 0;
+	regnode.complex = 0;
+	regnode.addable = 11;
+	regnode.type = types[TLONG];
+
+	qregnode = regnode;
+	qregnode.type = types[TVLONG];
+
+	constnode.op = OCONST;
+	constnode.class = CXXX;
+	constnode.complex = 0;
+	constnode.addable = 20;
+	constnode.type = types[TLONG];
+
+	vconstnode = constnode;
+	vconstnode.type = types[TVLONG];
+
+	fconstnode.op = OCONST;
+	fconstnode.class = CXXX;
+	fconstnode.complex = 0;
+	fconstnode.addable = 20;
+	fconstnode.type = types[TDOUBLE];
+
+	nodsafe = new(ONAME, Z, Z);
+	nodsafe->sym = slookup(".safe");
+	nodsafe->type = types[TINT];
+	nodsafe->etype = types[TINT]->etype;
+	nodsafe->class = CAUTO;
+	complex(nodsafe);
+
+	t = typ(TARRAY, types[TCHAR]);
+	symrathole = slookup(".rathole");
+	symrathole->class = CGLOBL;
+	symrathole->type = t;
+
+	nodrat = new(ONAME, Z, Z);
+	nodrat->sym = symrathole;
+	nodrat->type = types[TIND];
+	nodrat->etype = TVOID;
+	nodrat->class = CGLOBL;
+	complex(nodrat);
+	nodrat->type = t;
+
+	nodret = new(ONAME, Z, Z);
+	nodret->sym = slookup(".ret");
+	nodret->type = types[TIND];
+	nodret->etype = TIND;
+	nodret->class = CPARAM;
+	nodret = new(OIND, nodret, Z);
+	complex(nodret);
+
+	com64init();
+
+	memset(reg, 0, sizeof(reg));
+	reg[REGZERO] = 1;	/* don't use */
+	reg[REGTMP] = 1;
+	reg[FREGCVI+NREG] = 1;
+	reg[FREGZERO+NREG] = 1;
+	reg[FREGHALF+NREG] = 1;
+	reg[FREGONE+NREG] = 1;
+	reg[FREGTWO+NREG] = 1;
+	memmove(resvreg, reg, sizeof(reg));
+}
+
+void
+gclean(void)
+{
+	int i;
+	Sym *s;
+
+	for(i=0; i<NREG; i++)
+		if(reg[i] && !resvreg[i])
+			diag(Z, "reg %d left allocated", i);
+	for(i=NREG; i<NREG+NREG; i++)
+		if(reg[i] && !resvreg[i])
+			diag(Z, "freg %d left allocated", i-NREG);
+	while(mnstring)
+		outstring("", 1L);
+	symstring->type->width = nstring;
+	symrathole->type->width = nrathole;
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type == T)
+			continue;
+		if(s->type->width == 0)
+			continue;
+		if(s->class != CGLOBL && s->class != CSTATIC)
+			continue;
+		if(s->type == types[TENUM])
+			continue;
+		gpseudo(AGLOBL, s, nodconst(s->type->width));
+	}
+	nextpc();
+	p->as = AEND;
+	outcode();
+}
+
+void
+nextpc(void)
+{
+
+	p = alloc(sizeof(*p));
+	*p = zprog;
+	p->lineno = nearln;
+	pc++;
+	if(firstp == P) {
+		firstp = p;
+		lastp = p;
+		return;
+	}
+	lastp->link = p;
+	lastp = p;
+}
+
+void
+gargs(Node *n, Node *tn1, Node *tn2)
+{
+	long regs;
+	Node fnxargs[20], *fnxp;
+
+	regs = cursafe;
+
+	fnxp = fnxargs;
+	garg1(n, tn1, tn2, 0, &fnxp);	/* compile fns to temps */
+
+	curarg = 0;
+	fnxp = fnxargs;
+	garg1(n, tn1, tn2, 1, &fnxp);	/* compile normal args and temps */
+
+	cursafe = regs;
+}
+
+void
+garg1(Node *n, Node *tn1, Node *tn2, int f, Node **fnxp)
+{
+	Node nod;
+
+	if(n == Z)
+		return;
+	if(n->op == OLIST) {
+		garg1(n->left, tn1, tn2, f, fnxp);
+		garg1(n->right, tn1, tn2, f, fnxp);
+		return;
+	}
+	if(f == 0) {
+		if(n->complex >= FNX) {
+			regsalloc(*fnxp, n);
+			nod = znode;
+			nod.op = OAS;
+			nod.left = *fnxp;
+			nod.right = n;
+			nod.type = n->type;
+			cgen(&nod, Z);
+			(*fnxp)++;
+		}
+		return;
+	}
+	if(typesu[n->type->etype]) {
+		regaalloc(tn2, n);
+		if(n->complex >= FNX) {
+			sugen(*fnxp, tn2, n->type->width);
+			(*fnxp)++;
+		} else
+			sugen(n, tn2, n->type->width);
+		return;
+	}
+	if(REGARG>=0 && curarg == 0 && typechlpv[n->type->etype]) {
+		regaalloc1(tn1, n);
+		if(n->complex >= FNX) {
+			cgen(*fnxp, tn1);
+			(*fnxp)++;
+		} else
+			cgen(n, tn1);
+		return;
+	}
+	if(vconst(n) == 0) {
+		regaalloc(tn2, n);
+		gopcode(OAS, n, Z, tn2);
+		return;
+	}
+	regalloc(tn1, n, Z);
+	if(n->complex >= FNX) {
+		cgen(*fnxp, tn1);
+		(*fnxp)++;
+	} else
+		cgen(n, tn1);
+	regaalloc(tn2, n);
+	gopcode(OAS, tn1, Z, tn2);
+	regfree(tn1);
+}
+
+Node*
+nod32const(vlong v)
+{
+	constnode.vconst = v & MASK(32);
+	return &constnode;
+}
+
+Node*
+nodgconst(vlong v, Type *t)
+{
+	if(!typev[t->etype])
+		return nodconst((long)v);
+	vconstnode.vconst = v;
+	return &vconstnode;
+}
+
+Node*
+nodconst(long v)
+{
+	constnode.vconst = v;
+	return &constnode;
+}
+
+Node*
+nodfconst(double d)
+{
+	fconstnode.fconst = d;
+	return &fconstnode;
+}
+
+void
+nodreg(Node *n, Node *nn, int reg)
+{
+	*n = qregnode;
+	n->reg = reg;
+	n->type = nn->type;
+	n->lineno = nn->lineno;
+}
+
+void
+regret(Node *n, Node *nn)
+{
+	int r;
+
+	r = REGRET;
+	if(typefd[nn->type->etype])
+		r = FREGRET+NREG;
+	nodreg(n, nn, r);
+	reg[r]++;
+}
+
+void
+regalloc(Node *n, Node *tn, Node *o)
+{
+	int i, j;
+	static int lasti;
+
+	switch(tn->type->etype) {
+	case TCHAR:
+	case TUCHAR:
+	case TSHORT:
+	case TUSHORT:
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+	case TVLONG:
+	case TUVLONG:
+	case TIND:
+		if(o != Z && o->op == OREGISTER) {
+			i = o->reg;
+			if(i > 0 && i < NREG)
+				goto out;
+		}
+		j = lasti + REGRET+1;
+		for(i=REGRET+1; i<NREG; i++) {
+			if(j >= NREG)
+				j = REGRET+1;
+			if(reg[j] == 0) {
+				i = j;
+				goto out;
+			}
+			j++;
+		}
+		diag(tn, "out of fixed registers");
+		goto err;
+
+	case TFLOAT:
+	case TDOUBLE:
+		if(o != Z && o->op == OREGISTER) {
+			i = o->reg;
+			if(i >= NREG && i < NREG+NREG)
+				goto out;
+		}
+		j = lasti + NREG;
+		for(i=NREG; i<NREG+NREG; i++) {
+			if(j >= NREG+NREG)
+				j = NREG;
+			if(reg[j] == 0) {
+				i = j;
+				goto out;
+			}
+			j++;
+		}
+		diag(tn, "out of float registers");
+		goto err;
+	}
+	diag(tn, "unknown type in regalloc: %T", tn->type);
+err:
+	i = 0;
+out:
+	if(i)
+		reg[i]++;
+	lasti++;
+	if(lasti >= 5)
+		lasti = 0;
+	nodreg(n, tn, i);
+}
+
+void
+regialloc(Node *n, Node *tn, Node *o)
+{
+	Node nod;
+
+	nod = *tn;
+	nod.type = types[TIND];
+	regalloc(n, &nod, o);
+}
+
+void
+regfree(Node *n)
+{
+	int i;
+
+	i = 0;
+	if(n->op != OREGISTER && n->op != OINDREG)
+		goto err;
+	i = n->reg;
+	if(i < 0 || i >= sizeof(reg))
+		goto err;
+	if(reg[i] <= 0)
+		goto err;
+	reg[i]--;
+	return;
+err:
+	diag(n, "error in regfree: %d", i);
+}
+
+void
+regsalloc(Node *n, Node *nn)
+{
+	cursafe = align(cursafe, nn->type, Aaut3);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+	*n = *nodsafe;
+	n->xoffset = -(stkoff + cursafe);
+	n->type = nn->type;
+	n->etype = nn->type->etype;
+	n->lineno = nn->lineno;
+}
+
+void
+regaalloc1(Node *n, Node *nn)
+{
+	nodreg(n, nn, REGARG);
+	reg[REGARG]++;
+	curarg = align(curarg, nn->type, Aarg1);
+	curarg = align(curarg, nn->type, Aarg2);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+}
+
+void
+regaalloc(Node *n, Node *nn)
+{
+	curarg = align(curarg, nn->type, Aarg1);
+	*n = *nn;
+	n->op = OINDREG;
+	n->reg = REGSP;
+	n->xoffset = curarg + SZ_VLONG;
+	n->complex = 0;
+	n->addable = 20;
+	curarg = align(curarg, nn->type, Aarg2);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+}
+
+void
+regind(Node *n, Node *nn)
+{
+
+	if(n->op != OREGISTER) {
+		diag(n, "regind not OREGISTER");
+		return;
+	}
+	n->op = OINDREG;
+	n->type = nn->type;
+}
+
+void
+raddr(Node *n, Prog *p)
+{
+	Adr a;
+
+	naddr(n, &a);
+	if(R0ISZERO && a.type == D_CONST && a.offset == 0) {
+		a.type = D_REG;
+		a.reg = REGZERO;
+	}
+	if(a.type != D_REG && a.type != D_FREG) {
+		if(n)
+			diag(n, "bad in raddr: %O", n->op);
+		else
+			diag(n, "bad in raddr: <null>");
+		p->reg = NREG;
+	} else
+		p->reg = a.reg;
+}
+
+void
+naddr(Node *n, Adr *a)
+{
+	long v;
+
+	a->type = D_NONE;
+	if(n == Z)
+		return;
+	switch(n->op) {
+	default:
+	bad:
+		diag(n, "bad in naddr: %O", n->op);
+		break;
+
+	case OREGISTER:
+		a->type = D_REG;
+		a->sym = S;
+		a->reg = n->reg;
+		if(a->reg >= NREG) {
+			a->type = D_FREG;
+			a->reg -= NREG;
+		}
+		break;
+
+	case OIND:
+		naddr(n->left, a);
+		if(a->type == D_REG) {
+			a->type = D_OREG;
+			break;
+		}
+		if(a->type == D_CONST) {
+			a->type = D_OREG;
+			break;
+		}
+		goto bad;
+
+	case OINDREG:
+		a->type = D_OREG;
+		a->sym = S;
+		a->offset = n->xoffset;
+		a->reg = n->reg;
+		break;
+
+	case ONAME:
+		a->etype = n->etype;
+		a->type = D_OREG;
+		a->name = D_STATIC;
+		a->sym = n->sym;
+		a->offset = n->xoffset;
+		if(n->class == CSTATIC)
+			break;
+		if(n->class == CEXTERN || n->class == CGLOBL) {
+			a->name = D_EXTERN;
+			break;
+		}
+		if(n->class == CAUTO) {
+			a->name = D_AUTO;
+			break;
+		}
+		if(n->class == CPARAM) {
+			a->name = D_PARAM;
+			break;
+		}
+		goto bad;
+
+	case OCONST:
+		a->sym = S;
+		a->reg = NREG;
+		if(typefd[n->type->etype]) {
+			a->type = D_FCONST;
+			a->dval = n->fconst;
+		} else {
+			a->type = D_CONST;
+			a->offset = n->vconst;
+		}
+		break;
+
+	case OADDR:
+		naddr(n->left, a);
+		if(a->type == D_OREG) {
+			a->type = D_CONST;
+			break;
+		}
+		goto bad;
+
+	case OADD:
+		if(n->left->op == OCONST) {
+			naddr(n->left, a);
+			v = a->offset;
+			naddr(n->right, a);
+		} else {
+			naddr(n->right, a);
+			v = a->offset;
+			naddr(n->left, a);
+		}
+		a->offset += v;
+		break;
+
+	}
+}
+
+void
+fop(int as, int f1, int f2, Node *t)
+{
+	Node nod1, nod2, nod3;
+
+	nodreg(&nod1, t, NREG+f1);
+	nodreg(&nod2, t, NREG+f2);
+	regalloc(&nod3, t, t);
+	gopcode(as, &nod1, &nod2, &nod3);
+	gmove(&nod3, t);
+	regfree(&nod3);
+}
+
+void
+gmove(Node *f, Node *t)
+{
+	int ft, tt, a;
+	Node nod, fxc0, fxc1, fxc2, fxrat;
+	Prog *p1;
+	double d;
+
+	ft = f->type->etype;
+	tt = t->type->etype;
+
+	if(ft == TDOUBLE && f->op == OCONST) {
+		d = f->fconst;
+		if(d == 0.0) {
+			a = FREGZERO;
+			goto ffreg;
+		}
+		if(d == 0.5) {
+			a = FREGHALF;
+			goto ffreg;
+		}
+		if(d == 1.0) {
+			a = FREGONE;
+			goto ffreg;
+		}
+		if(d == 2.0) {
+			a = FREGTWO;
+			goto ffreg;
+		}
+		if(d == -.5) {
+			fop(OSUB, FREGHALF, FREGZERO, t);
+			return;
+		}
+		if(d == -1.0) {
+			fop(OSUB, FREGONE, FREGZERO, t);
+			return;
+		}
+		if(d == -2.0) {
+			fop(OSUB, FREGTWO, FREGZERO, t);
+			return;
+		}
+		if(d == 1.5) {
+			fop(OADD, FREGONE, FREGHALF, t);
+			return;
+		}
+		if(d == 2.5) {
+			fop(OADD, FREGTWO, FREGHALF, t);
+			return;
+		}
+		if(d == 3.0) {
+			fop(OADD, FREGTWO, FREGONE, t);
+			return;
+		}
+	}
+	if(ft == TFLOAT && f->op == OCONST) {
+		d = f->fconst;
+		if(d == 0) {
+			a = FREGZERO;
+		ffreg:
+			nodreg(&nod, f, NREG+a);
+			gmove(&nod, t);
+			return;
+		}
+	}
+	/*
+	 * a load --
+	 * put it into a register then
+	 * worry what to do with it.
+	 */
+	if(f->op == ONAME || f->op == OINDREG || f->op == OIND) {
+		switch(ft) {
+		default:
+			if(ewidth[ft] == 4){
+				if(typeu[ft])
+					a = AMOVWZ;
+				else
+					a = AMOVW;
+			}else
+				a = AMOVD;
+			break;
+		case TINT:
+			a = AMOVW;
+			break;
+		case TUINT:
+			a = AMOVWZ;
+			break;
+		case TFLOAT:
+			a = AFMOVS;
+			break;
+		case TDOUBLE:
+			a = AFMOVD;
+			break;
+		case TCHAR:
+			a = AMOVB;
+			break;
+		case TUCHAR:
+			a = AMOVBZ;
+			break;
+		case TSHORT:
+			a = AMOVH;
+			break;
+		case TUSHORT:
+			a = AMOVHZ;
+			break;
+		}
+		regalloc(&nod, f, t);
+		gins(a, f, &nod);
+		gmove(&nod, t);
+		regfree(&nod);
+		return;
+	}
+
+	/*
+	 * a store --
+	 * put it into a register then
+	 * store it.
+	 */
+	if(t->op == ONAME || t->op == OINDREG || t->op == OIND) {
+		switch(tt) {
+		default:
+			if(ewidth[tt] == 4)
+				a = AMOVW;
+			else
+				a = AMOVD;
+			break;
+		case TINT:
+			a = AMOVW;
+			break;
+		case TUINT:
+			a = AMOVWZ;
+			break;
+		case TUCHAR:
+			a = AMOVBZ;
+			break;
+		case TCHAR:
+			a = AMOVB;
+			break;
+		case TUSHORT:
+			a = AMOVHZ;
+			break;
+		case TSHORT:
+			a = AMOVH;
+			break;
+		case TFLOAT:
+			a = AFMOVS;
+			break;
+		case TDOUBLE:
+			a = AFMOVD;
+			break;
+		}
+		if(!typefd[ft] && vconst(f) == 0) {
+			gins(a, f, t);
+			return;
+		}
+		if(ft == tt)
+			regalloc(&nod, t, f);
+		else
+			regalloc(&nod, t, Z);
+		gmove(f, &nod);
+		gins(a, &nod, t);
+		regfree(&nod);
+		return;
+	}
+
+	/*
+	 * type x type cross table
+	 */
+	a = AGOK;
+	switch(ft) {
+	case TDOUBLE:
+	case TFLOAT:
+		switch(tt) {
+		case TDOUBLE:
+			a = AFMOVD;
+			if(ft == TFLOAT)
+				a = AFMOVS;	/* AFMOVSD */
+			break;
+		case TFLOAT:
+			a = AFRSP;
+			if(ft == TFLOAT)
+				a = AFMOVS;
+			break;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TIND:
+		case TSHORT:
+		case TUSHORT:
+		case TCHAR:
+		case TUCHAR:
+			/* BUG: not right for unsigned long */
+			regalloc(&nod, f, Z);	/* should be type float */
+			regsalloc(&fxrat, f);
+			gins(AFCTIWZ, f, &nod);
+			gins(AFMOVD, &nod, &fxrat);
+			regfree(&nod);
+			fxrat.type = nodrat->type;
+			fxrat.etype = nodrat->etype;
+			fxrat.xoffset += 4;
+			gins(AMOVW, &fxrat, t);	/* TO DO */
+			gmove(t, t);
+			return;
+		case TVLONG:
+		case TUVLONG:
+			/* BUG: not right for unsigned long */
+			regalloc(&nod, f, Z);	/* should be type float */
+			regsalloc(&fxrat, f);
+			gins(AFCTIDZ, f, &nod);
+			gins(AFMOVD, &nod, &fxrat);
+			regfree(&nod);
+			fxrat.type = nodrat->type;
+			fxrat.etype = nodrat->etype;
+			gins(AMOVD, &fxrat, t);
+			gmove(t, t);
+			return;
+		}
+		break;
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+			goto fxtofl;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TSHORT:
+		case TUSHORT:
+		case TCHAR:
+		case TUCHAR:
+			if(typeu[tt])
+				a = AMOVWZ;
+			else
+				a = AMOVW;
+			break;
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+			a = AMOVD;
+			break;
+		}
+		break;
+	case TVLONG:
+	case TUVLONG:
+	case TIND:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+			goto fxtofl;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+		case TSHORT:
+		case TUSHORT:
+		case TCHAR:
+		case TUCHAR:
+			a = AMOVD;	/* TO DO: conversion done? */
+			break;
+		}
+		break;
+	case TSHORT:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+			goto fxtofl;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+			a = AMOVH;
+			break;
+		case TSHORT:
+		case TUSHORT:
+		case TCHAR:
+		case TUCHAR:
+			a = AMOVD;
+			break;
+		}
+		break;
+	case TUSHORT:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+			goto fxtofl;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+			a = AMOVHZ;
+			break;
+		case TSHORT:
+		case TUSHORT:
+		case TCHAR:
+		case TUCHAR:
+			a = AMOVD;
+			break;
+		}
+		break;
+	case TCHAR:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+			goto fxtofl;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+		case TSHORT:
+		case TUSHORT:
+			a = AMOVB;
+			break;
+		case TCHAR:
+		case TUCHAR:
+			a = AMOVD;
+			break;
+		}
+		break;
+	case TUCHAR:
+		switch(tt) {
+		case TDOUBLE:
+		case TFLOAT:
+		fxtofl:
+			/*
+			 * rat[0] = 0x43300000; rat[1] = f^0x80000000;
+			 * t = *(double*)rat - FREGCVI;
+			 * is-unsigned(t) => if(t<0) t += 2^32;
+			 * could be streamlined for int-to-float
+			 */
+			regalloc(&fxc0, f, Z);
+			regalloc(&fxc2, f, Z);
+			regsalloc(&fxrat, t);	/* should be type float */
+			gins(AMOVW, nodconst(0x43300000L), &fxc0);
+			gins(AMOVW, f, &fxc2);
+			gins(AMOVW, &fxc0, &fxrat);
+			gins(AXOR, nodconst(0x80000000L), &fxc2);
+			fxc1 = fxrat;
+			fxc1.type = nodrat->type;
+			fxc1.etype = nodrat->etype;
+			fxc1.xoffset += SZ_LONG;
+			gins(AMOVW, &fxc2, &fxc1);
+			regfree(&fxc2);
+			regfree(&fxc0);
+			regalloc(&nod, t, t);	/* should be type float */
+			gins(AFMOVD, &fxrat, &nod);
+			nodreg(&fxc1, t, NREG+FREGCVI);
+			gins(AFSUB, &fxc1, &nod);
+			a = AFMOVD;
+			if(tt == TFLOAT)
+				a = AFRSP;
+			gins(a, &nod, t);
+			regfree(&nod);
+			if(ft == TULONG) {
+				regalloc(&nod, t, Z);
+				if(tt == TFLOAT) {
+					gins(AFCMPU, t, Z);
+					p->to.type = D_FREG;
+					p->to.reg = FREGZERO;
+					gins(ABGE, Z, Z);
+					p1 = p;
+					gins(AFMOVS, nodfconst(4294967296.), &nod);
+					gins(AFADDS, &nod, t);
+				} else {
+					gins(AFCMPU, t, Z);
+					p->to.type = D_FREG;
+					p->to.reg = FREGZERO;
+					gins(ABGE, Z, Z);
+					p1 = p;
+					gins(AFMOVD, nodfconst(4294967296.), &nod);
+					gins(AFADD, &nod, t);
+				}
+				patch(p1, pc);
+				regfree(&nod);
+			}
+			return;
+		case TINT:
+		case TUINT:
+		case TLONG:
+		case TULONG:
+		case TVLONG:
+		case TUVLONG:
+		case TIND:
+		case TSHORT:
+		case TUSHORT:
+			a = AMOVBZ;
+			break;
+		case TCHAR:
+		case TUCHAR:
+			a = AMOVD;
+			break;
+		}
+		break;
+	}
+	if(a == AGOK)
+		diag(Z, "bad opcode in gmove %T -> %T", f->type, t->type);
+	if(a == AMOVD || (a == AMOVW || a == AMOVWZ) && ewidth[ft] == ewidth[tt] || a == AFMOVS || a == AFMOVD)
+	if(samaddr(f, t))
+		return;
+	gins(a, f, t);
+}
+
+void
+gins(int a, Node *f, Node *t)
+{
+
+	nextpc();
+	p->as = a;
+	if(f != Z)
+		naddr(f, &p->from);
+	if(t != Z)
+		naddr(t, &p->to);
+	if(debug['g'])
+		print("%P\n", p);
+}
+
+void
+gopcode(int o, Node *f1, Node *f2, Node *t)
+{
+	int a, et;
+	Adr ta;
+	int uns;
+
+	uns = 0;
+	et = TLONG;
+	if(f1 != Z && f1->type != T) {
+		if(f1->op == OCONST && t != Z && t->type != T)
+			et = t->type->etype;
+		else
+			et = f1->type->etype;
+	}
+	a = AGOK;
+	switch(o) {
+	case OAS:
+		gmove(f1, t);
+		return;
+
+	case OASADD:
+	case OADD:
+		a = AADD;
+		if(et == TFLOAT)
+			a = AFADDS;
+		else
+		if(et == TDOUBLE)
+			a = AFADD;
+		break;
+
+	case OASSUB:
+	case OSUB:
+		a = ASUB;
+		if(et == TFLOAT)
+			a = AFSUBS;
+		else
+		if(et == TDOUBLE)
+			a = AFSUB;
+		break;
+
+	case OASOR:
+	case OOR:
+		a = AOR;
+		break;
+
+	case OASAND:
+	case OAND:
+		a = AAND;
+		if(f1->op == OCONST)
+			a = AANDCC;
+		break;
+
+	case OASXOR:
+	case OXOR:
+		a = AXOR;
+		break;
+
+	case OASLSHR:
+	case OLSHR:
+		a = ASRW;
+		if(isv(et))
+			a = ASRD;
+		break;
+
+	case OASASHR:
+	case OASHR:
+		a = ASRAW;
+		if(isv(et))
+			a = ASRAD;
+		break;
+
+	case OASASHL:
+	case OASHL:
+		a = ASLW;
+		if(isv(et))
+			a = ASLD;
+		break;
+
+	case OFUNC:
+		a = ABL;
+		break;
+
+	case OASLMUL:
+	case OLMUL:
+	case OASMUL:
+	case OMUL:
+		if(et == TFLOAT) {
+			a = AFMULS;
+			break;
+		} else
+		if(et == TDOUBLE) {
+			a = AFMUL;
+			break;
+		}
+		a = AMULLW;
+		if(isv(et))
+			a = AMULLD;
+		break;
+
+	case OASDIV:
+	case ODIV:
+		if(et == TFLOAT) {
+			a = AFDIVS;
+			break;
+		} else
+		if(et == TDOUBLE) {
+			a = AFDIV;
+			break;
+		} else
+		a = ADIVW;
+		if(isv(et))
+			a = ADIVD;
+		break;
+
+	case OASMOD:
+	case OMOD:
+		a = AREM;
+		if(isv(et))
+			a = AREMD;
+		break;
+
+	case OASLMOD:
+	case OLMOD:
+		a = AREMU;
+		if(isv(et))
+			a = AREMDU;
+		break;
+
+	case OASLDIV:
+	case OLDIV:
+		a = ADIVWU;
+		if(isv(et))
+			a = ADIVDU;
+		break;
+
+	case OCOM:
+		a = ANOR;
+		break;
+
+	case ONEG:
+		a = ANEG;
+		if(et == TFLOAT || et == TDOUBLE)
+			a = AFNEG;
+		break;
+
+	case OEQ:
+		a = ABEQ;
+		goto cmp;
+
+	case ONE:
+		a = ABNE;
+		goto cmp;
+
+	case OLT:
+		a = ABLT;
+		goto cmp;
+
+	case OLE:
+		a = ABLE;
+		goto cmp;
+
+	case OGE:
+		a = ABGE;
+		goto cmp;
+
+	case OGT:
+		a = ABGT;
+		goto cmp;
+
+	case OLO:
+		a = ABLT;
+		goto cmpu;
+
+	case OLS:
+		a = ABLE;
+		goto cmpu;
+
+	case OHS:
+		a = ABGE;
+		goto cmpu;
+
+	case OHI:
+		a = ABGT;
+		goto cmpu;
+
+	cmpu:
+		uns = 1;
+	cmp:
+		nextpc();
+		switch(et){
+		case TINT:
+		case TLONG:
+			p->as = ACMPW;
+			break;
+		case TUINT:
+		case TULONG:
+			p->as = ACMPWU;
+			break;
+		case TFLOAT:
+		case TDOUBLE:
+			p->as = AFCMPU;
+			break;
+		default:
+			p->as = uns? ACMPU: ACMP;
+			break;
+		}
+		if(f1 != Z)
+			naddr(f1, &p->from);
+		if(t != Z)
+			naddr(t, &p->to);
+		if(f1 == Z || t == Z || f2 != Z)
+			diag(Z, "bad cmp in gopcode %O", o);
+		if(debug['g'])
+			print("%P\n", p);
+		f1 = Z;
+		f2 = Z;
+		t = Z;
+		break;
+	}
+	if(a == AGOK)
+		diag(Z, "bad in gopcode %O", o);
+	nextpc();
+	p->as = a;
+	if(f1 != Z)
+		naddr(f1, &p->from);
+	if(f2 != Z) {
+		naddr(f2, &ta);
+		p->reg = ta.reg;
+		if(ta.type == D_CONST && ta.offset == 0) {
+			if(R0ISZERO)
+				p->reg = REGZERO;
+			else
+				diag(Z, "REGZERO in gopcode %O", o);
+		}
+	}
+	if(t != Z)
+		naddr(t, &p->to);
+	if(debug['g'])
+		print("%P\n", p);
+}
+
+int
+samaddr(Node *f, Node *t)
+{
+	return f->op == OREGISTER && t->op == OREGISTER && f->reg == t->reg;
+}
+
+void
+gbranch(int o)
+{
+	int a;
+
+	a = AGOK;
+	switch(o) {
+	case ORETURN:
+		a = ARETURN;
+		break;
+	case OGOTO:
+		a = ABR;
+		break;
+	}
+	nextpc();
+	if(a == AGOK) {
+		diag(Z, "bad in gbranch %O",  o);
+		nextpc();
+	}
+	p->as = a;
+}
+
+void
+patch(Prog *op, long pc)
+{
+
+	op->to.offset = pc;
+	op->to.type = D_BRANCH;
+}
+
+void
+gpseudo(int a, Sym *s, Node *n)
+{
+
+	nextpc();
+	p->as = a;
+	p->from.type = D_OREG;
+	p->from.sym = s;
+	if(a == ATEXT)
+		p->reg = (profileflg ? 0 : NOPROF);
+	p->from.name = D_EXTERN;
+	if(s->class == CSTATIC)
+		p->from.name = D_STATIC;
+	naddr(n, &p->to);
+	if(a == ADATA || a == AGLOBL)
+		pc--;
+}
+
+int
+sval(long v)
+{
+
+	if(v >= -(1<<15) && v < (1<<15))
+		return 1;
+	return 0;
+}
+
+int
+sconst(Node *n)
+{
+	vlong vv;
+
+	if(n->op == OCONST) {
+		if(!typefd[n->type->etype]) {
+			vv = n->vconst;
+			if(vv >= -(((vlong)1)<<15) && vv < (((vlong)1)<<15))
+				return 1;
+		}
+	}
+	return 0;
+}
+
+int
+uconst(Node *n)
+{
+	vlong vv;
+
+	if(n->op == OCONST) {
+		if(!typefd[n->type->etype]) {
+			vv = n->vconst;
+			if(vv >= 0 && vv < (((vlong)1)<<16))
+				return 1;
+		}
+	}
+	return 0;
+}
+
+int
+immconst(Node *n)
+{
+	vlong v;
+
+	if(n->op != OCONST || typefd[n->type->etype])
+		return 0;
+	v = n->vconst;
+	if((v & 0xFFFF) == 0)
+		v >>= 16;
+	if(v >= 0 && v < ((vlong)1<<16))
+		return 1;
+	if(v >= -((vlong)1<<15) && v <= ((vlong)1<<15))
+		return 1;
+	return 0;
+}
+
+long
+exreg(Type *t)
+{
+	long o;
+
+	if(typechlpv[t->etype]) {
+		if(exregoffset <= 3)
+			return 0;
+		o = exregoffset;
+		exregoffset--;
+		return o;
+	}
+	if(typefd[t->etype]) {
+		if(exfregoffset <= 16)
+			return 0;
+		o = exfregoffset + NREG;
+		exfregoffset--;
+		return o;
+	}
+	return 0;
+}
+
+schar	ewidth[NTYPE] =
+{
+	-1,		/* [TXXX] */
+	SZ_CHAR,	/* [TCHAR] */
+	SZ_CHAR,	/* [TUCHAR] */
+	SZ_SHORT,	/* [TSHORT] */
+	SZ_SHORT,	/* [TUSHORT] */
+	SZ_INT,		/* [TINT] */
+	SZ_INT,		/* [TUINT] */
+	SZ_LONG,	/* [TLONG] */
+	SZ_LONG,	/* [TULONG] */
+	SZ_VLONG,	/* [TVLONG] */
+	SZ_VLONG,	/* [TUVLONG] */
+	SZ_FLOAT,	/* [TFLOAT] */
+	SZ_DOUBLE,	/* [TDOUBLE] */
+	SZ_IND,		/* [TIND] */
+	0,		/* [TFUNC] */
+	-1,		/* [TARRAY] */
+	0,		/* [TVOID] */
+	-1,		/* [TSTRUCT] */
+	-1,		/* [TUNION] */
+	SZ_INT,		/* [TENUM] */
+};
+long	ncast[NTYPE] =
+{
+	0,				/* [TXXX] */
+	BCHAR|BUCHAR,			/* [TCHAR] */
+	BCHAR|BUCHAR,			/* [TUCHAR] */
+	BSHORT|BUSHORT,			/* [TSHORT] */
+	BSHORT|BUSHORT,			/* [TUSHORT] */
+	BINT|BUINT|BLONG|BULONG,	/* [TINT] */
+	BINT|BUINT|BLONG|BULONG,	/* [TUINT] */
+	BINT|BUINT|BLONG|BULONG,	/* [TLONG] */
+	BINT|BUINT|BLONG|BULONG,	/* [TULONG] */
+	BVLONG|BUVLONG|BIND,			/* [TVLONG] */
+	BVLONG|BUVLONG|BIND,			/* [TUVLONG] */
+	BFLOAT,				/* [TFLOAT] */
+	BDOUBLE,			/* [TDOUBLE] */
+	BVLONG|BUVLONG|BIND,		/* [TIND] */
+	0,				/* [TFUNC] */
+	0,				/* [TARRAY] */
+	0,				/* [TVOID] */
+	BSTRUCT,			/* [TSTRUCT] */
+	BUNION,				/* [TUNION] */
+	0,				/* [TENUM] */
+};

+ 22 - 0
sys/src/cmd/9l/TODO

@@ -0,0 +1,22 @@
+x	ADDIS
+	must not be used for constant formation of unsigned values
+x	LCON, UCON
+x	D_DCONST
+x	shift and RLWIMI, RLD etc
+-	dynld
+-	9db from qdb.c (or same)
+x	MOVD
+x	MOVW/MOVWZ in macro forms for register-register move with sign-extension or zero extension
+	-> EXTSW or RLWNM to and
+x	CMP vs CMPW/CMPWU
+x	getmask -> uvlong
+x	32-bit special registers require MOVW/MOVWZ; others require MOVD
+
+notes
+ZCON	0
+SCON	16-bit signed
+UCON	low 16-bits zero
+ADDCON	-8000<= v < 0
+ANDCON	0 <= v <= 0xFFFF
+LCON	other
+LECON	will be 64 bits

+ 626 - 0
sys/src/cmd/9l/asm.c

@@ -0,0 +1,626 @@
+#include	"l.h"
+
+#define PADDR(a)	((a) & ~0xfffffffff0000000ull)
+
+#define	LPUT(c)\
+	{\
+		cbp[0] = (c)>>24;\
+		cbp[1] = (c)>>16;\
+		cbp[2] = (c)>>8;\
+		cbp[3] = (c);\
+		cbp += 4;\
+		cbc -= 4;\
+		if(cbc <= 0)\
+			cflush();\
+	}
+
+#define	CPUT(c)\
+	{\
+		cbp[0] = (c);\
+		cbp++;\
+		cbc--;\
+		if(cbc <= 0)\
+			cflush();\
+	}
+
+vlong
+entryvalue(void)
+{
+	char *a;
+	Sym *s;
+
+	a = INITENTRY;
+	if(*a >= '0' && *a <= '9')
+		return atolwhex(a);
+	s = lookup(a, 0);
+	if(s->type == 0)
+		return INITTEXT;
+	if(dlm && s->type == SDATA)
+		return s->value+INITDAT;
+	if(s->type != STEXT && s->type != SLEAF)
+		diag("entry not text: %s", s->name);
+	return s->value;
+}
+
+void
+asmb(void)
+{
+	Prog *p;
+	long t, magic;
+	Optab *o;
+	vlong vl;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f asm\n", cputime());
+	Bflush(&bso);
+	seek(cout, HEADR, 0);
+	pc = INITTEXT;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			curtext = p;
+			autosize = p->to.offset + 8;
+			if(p->from3.type == D_CONST) {
+				for(; pc < p->pc; pc++)
+					CPUT(0);
+			}
+		}
+		if(p->pc != pc) {
+			diag("phase error %llux sb %llux",
+				p->pc, pc);
+			if(!debug['a'])
+				prasm(curp);
+			pc = p->pc;
+		}
+		curp = p;
+		o = oplook(p);	/* could probably avoid this call */
+		if(asmout(p, o, 0)) {
+			p = p->link;
+			pc += 4;
+		}
+		pc += o->size;
+	}
+	if(debug['a'])
+		Bprint(&bso, "\n");
+	Bflush(&bso);
+	cflush();
+
+	curtext = P;
+	switch(HEADTYPE) {
+	case 0:
+	case 1:
+	case 2:
+	case 5:
+	case 9:
+	case 10:
+		seek(cout, HEADR+textsize, 0);
+		break;
+	case 3:
+		seek(cout, rnd(HEADR+textsize, 4), 0);
+		break;
+	}
+
+	if(dlm){
+		char buf[8];
+
+		write(cout, buf, INITDAT-textsize);
+		textsize = INITDAT;
+	}
+
+	for(t = 0; t < datsize; t += sizeof(buf)-100) {
+		if(datsize-t > sizeof(buf)-100)
+			datblk(t, sizeof(buf)-100);
+		else
+			datblk(t, datsize-t);
+	}
+
+	symsize = 0;
+	lcsize = 0;
+	if(!debug['s']) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f sym\n", cputime());
+		Bflush(&bso);
+		switch(HEADTYPE) {
+		case 0:
+		case 1:
+		case 2:
+		case 5:
+		case 9:
+		case 10:
+			seek(cout, HEADR+textsize+datsize, 0);
+			break;
+		case 3:
+			seek(cout, rnd(HEADR+textsize, 4)+datsize, 0);
+			break;
+		}
+		if(!debug['s'])
+			asmsym();
+		if(debug['v'])
+			Bprint(&bso, "%5.2f sp\n", cputime());
+		Bflush(&bso);
+		if(!debug['s'])
+			asmlc();
+		if(dlm)
+			asmdyn();
+		if(HEADTYPE == 0 || HEADTYPE == 1)	/* round up file length for boot image */
+			if((symsize+lcsize) & 1)
+				CPUT(0);
+		cflush();
+	}
+	else if(dlm){
+		asmdyn();
+		cflush();
+	}
+
+	seek(cout, 0L, 0);
+	switch(HEADTYPE) {
+	case 0:
+		lput(0x1030107);		/* magic and sections */
+		lput(textsize);			/* sizes */
+		lput(datsize);
+		lput(bsssize);
+		lput(symsize);			/* nsyms */
+		lput(entryvalue());		/* va of entry */
+		lput(0L);
+		lput(lcsize);
+		break;
+	case 1:
+		if(dlm)
+			lput(0x80000000 | (4*21*21+7));		/* q.out magic */
+		else
+			lput(4*21*21+7);	/* q.out magic */
+		lput(textsize);			/* sizes */
+		lput(datsize);
+		lput(bsssize);
+		lput(symsize);			/* nsyms */
+		lput(entryvalue());		/* va of entry */
+		lput(0L);
+		lput(lcsize);
+		break;
+	case 2:	/* plan9 */
+		magic = 4*27*27+7;
+		magic |= 0x00008000;		/* fat header */
+		if(dlm)
+			magic |= 0x80000000;	/* dlm */
+		lput(magic);
+		lput(textsize);			/* sizes */
+		lput(datsize);
+		lput(bsssize);
+		lput(symsize);			/* nsyms */
+		vl = entryvalue();
+		lput(PADDR(vl));		/* va of entry (real mode on boot) */
+		lput(0L);
+		lput(lcsize);
+		llput(vl);			/* va of entry */
+		break;
+	case 3:
+		break;
+	case 5:
+		elf32(POWER, ELFDATA2MSB, 0, nil);
+		break;
+	case 9:					/* ELF64 Header */
+	case 10:				/* A2 weirdness */
+		elf64(POWER64, ELFDATA2MSB, 0, nil);
+		break;
+	}
+	cflush();
+}
+
+void
+strnput(char *s, int n)
+{
+	for(; *s; s++){
+		CPUT(*s);
+		n--;
+	}
+	for(; n > 0; n--)
+		CPUT(0);
+}
+
+void
+cput(long l)
+{
+	CPUT(l);
+}
+
+void
+wput(long l)
+{
+
+	cbp[0] = l>>8;
+	cbp[1] = l;
+	cbp += 2;
+	cbc -= 2;
+	if(cbc <= 0)
+		cflush();
+}
+
+void
+wputl(long l)
+{
+
+	cbp[0] = l;
+	cbp[1] = l>>8;
+	cbp += 2;
+	cbc -= 2;
+	if(cbc <= 0)
+		cflush();
+}
+
+void
+lput(long l)
+{
+
+	cbp[0] = l>>24;
+	cbp[1] = l>>16;
+	cbp[2] = l>>8;
+	cbp[3] = l;
+	cbp += 4;
+	cbc -= 4;
+	if(cbc <= 0)
+		cflush();
+}
+
+void
+lputl(long l)
+{
+
+	cbp[3] = l>>24;
+	cbp[2] = l>>16;
+	cbp[1] = l>>8;
+	cbp[0] = l;
+	cbp += 4;
+	cbc -= 4;
+	if(cbc <= 0)
+		cflush();
+}
+
+void
+llput(vlong v)
+{
+	lput(v>>32);
+	lput(v);
+}
+
+void
+llputl(vlong v)
+{
+	lputl(v);
+	lputl(v>>32);
+}
+
+void
+cflush(void)
+{
+	int n;
+
+	n = sizeof(buf.cbuf) - cbc;
+	if(n)
+		write(cout, buf.cbuf, n);
+	cbp = buf.cbuf;
+	cbc = sizeof(buf.cbuf);
+}
+
+void
+asmsym(void)
+{
+	Prog *p;
+	Auto *a;
+	Sym *s;
+	int h;
+
+	s = lookup("etext", 0);
+	if(s->type == STEXT)
+		putsymb(s->name, 'T', s->value, s->version);
+
+	for(h=0; h<NHASH; h++)
+		for(s=hash[h]; s!=S; s=s->link)
+			switch(s->type) {
+			case SCONST:
+				putsymb(s->name, 'D', s->value, s->version);
+				continue;
+
+			case SDATA:
+				putsymb(s->name, 'D', s->value+INITDAT, s->version);
+				continue;
+
+			case SBSS:
+				putsymb(s->name, 'B', s->value+INITDAT, s->version);
+				continue;
+
+			case SFILE:
+				putsymb(s->name, 'f', s->value, s->version);
+				continue;
+			}
+
+	for(p=textp; p!=P; p=p->cond) {
+		s = p->from.sym;
+		if(s->type != STEXT && s->type != SLEAF)
+			continue;
+
+		/* filenames first */
+		for(a=p->to.autom; a; a=a->link)
+			if(a->type == D_FILE)
+				putsymb(a->sym->name, 'z', a->aoffset, 0);
+			else
+			if(a->type == D_FILE1)
+				putsymb(a->sym->name, 'Z', a->aoffset, 0);
+
+		if(s->type == STEXT)
+			putsymb(s->name, 'T', s->value, s->version);
+		else
+			putsymb(s->name, 'L', s->value, s->version);
+
+		/* frame, auto and param after */
+		putsymb(".frame", 'm', p->to.offset+8, 0);
+		for(a=p->to.autom; a; a=a->link)
+			if(a->type == D_AUTO)
+				putsymb(a->sym->name, 'a', -a->aoffset, 0);
+			else
+			if(a->type == D_PARAM)
+				putsymb(a->sym->name, 'p', a->aoffset, 0);
+	}
+	if(debug['v'] || debug['n'])
+		Bprint(&bso, "symsize = %lud\n", symsize);
+	Bflush(&bso);
+}
+
+void
+putsymb(char *s, int t, vlong v, int ver)
+{
+	int i, f, l;
+
+	if(t == 'f')
+		s++;
+
+	l = 4;
+	switch(HEADTYPE){
+	default:
+		break;
+	case 2:
+	case 9:
+	case 10:
+		lput(v>>32);
+		l = 8;
+		break;
+	}
+	lput(v);
+	if(ver)
+		t += 'a' - 'A';
+	cput(t+0x80);			/* 0x80 is variable length */
+
+	if(t == 'Z' || t == 'z') {
+		cput(s[0]);
+		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
+			cput(s[i]);
+			cput(s[i+1]);
+		}
+		cput(0);
+		cput(0);
+		i++;
+	}
+	else {
+		for(i=0; s[i]; i++)
+			cput(s[i]);
+		cput(0);
+	}
+	symsize += l + 1 + i + 1;
+
+	if(debug['n']) {
+		if(t == 'z' || t == 'Z') {
+			Bprint(&bso, "%c %.8llux ", t, v);
+			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
+				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
+				Bprint(&bso, "/%x", f);
+			}
+			Bprint(&bso, "\n");
+			return;
+		}
+		if(ver)
+			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
+		else
+			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
+	}
+}
+
+#define	MINLC	4
+void
+asmlc(void)
+{
+	vlong oldpc, oldlc;
+	Prog *p;
+	long v, s;
+
+	oldpc = INITTEXT;
+	oldlc = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
+			if(p->as == ATEXT)
+				curtext = p;
+			if(debug['V'])
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			continue;
+		}
+		if(debug['V'])
+			Bprint(&bso, "\t\t%6ld", lcsize);
+		v = (p->pc - oldpc) / MINLC;
+		while(v) {
+			s = 127;
+			if(v < 127)
+				s = v;
+			CPUT(s+128);	/* 129-255 +pc */
+			if(debug['V'])
+				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
+			v -= s;
+			lcsize++;
+		}
+		s = p->line - oldlc;
+		oldlc = p->line;
+		oldpc = p->pc + MINLC;
+		if(s > 64 || s < -64) {
+			CPUT(0);	/* 0 vv +lc */
+			CPUT(s>>24);
+			CPUT(s>>16);
+			CPUT(s>>8);
+			CPUT(s);
+			if(debug['V']) {
+				if(s > 0)
+					Bprint(&bso, " lc+%ld(%d,%ld)\n",
+						s, 0, s);
+				else
+					Bprint(&bso, " lc%ld(%d,%ld)\n",
+						s, 0, s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+			lcsize += 5;
+			continue;
+		}
+		if(s > 0) {
+			CPUT(0+s);	/* 1-64 +lc */
+			if(debug['V']) {
+				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+		} else {
+			CPUT(64-s);	/* 65-128 -lc */
+			if(debug['V']) {
+				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+		}
+		lcsize++;
+	}
+	while(lcsize & 1) {
+		s = 129;
+		CPUT(s);
+		lcsize++;
+	}
+	if(debug['v'] || debug['V'])
+		Bprint(&bso, "lcsize = %ld\n", lcsize);
+	Bflush(&bso);
+}
+
+void
+datblk(long s, long n)
+{
+	Prog *p;
+	uchar *cast;
+	long l, fl, j;
+	vlong d;
+	int i, c;
+
+	memset(buf.dbuf, 0, n+100);
+	for(p = datap; p != P; p = p->link) {
+		curp = p;
+		l = p->from.sym->value + p->from.offset - s;
+		c = p->reg;
+		i = 0;
+		if(l < 0) {
+			if(l+c <= 0)
+				continue;
+			while(l < 0) {
+				l++;
+				i++;
+			}
+		}
+		if(l >= n)
+			continue;
+		if(p->as != AINIT && p->as != ADYNT) {
+			for(j=l+(c-i)-1; j>=l; j--)
+				if(buf.dbuf[j]) {
+					print("%P\n", p);
+					diag("multiple initialization");
+					break;
+				}
+		}
+		switch(p->to.type) {
+		default:
+			diag("unknown mode in initialization\n%P", p);
+			break;
+
+		case D_FCONST:
+			switch(c) {
+			default:
+			case 4:
+				fl = ieeedtof(&p->to.ieee);
+				cast = (uchar*)&fl;
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[fnuxi8[i+4]];
+					l++;
+				}
+				break;
+			case 8:
+				cast = (uchar*)&p->to.ieee;
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[fnuxi8[i]];
+					l++;
+				}
+				break;
+			}
+			break;
+
+		case D_SCONST:
+			for(; i<c; i++) {
+				buf.dbuf[l] = p->to.sval[i];
+				l++;
+			}
+			break;
+
+		case D_DCONST:
+		case D_CONST:
+			d = p->to.offset;
+			if(p->to.sym) {
+				if(p->to.sym->type == SUNDEF){	/* TO DO: simplify */
+					ckoff(p->to.sym, d);
+					d += p->to.sym->value;
+				}
+				if(p->to.sym->type == STEXT ||
+				   p->to.sym->type == SLEAF)
+					d += p->to.sym->value;
+				if(p->to.sym->type == SDATA)
+					d += p->to.sym->value + INITDAT;
+				if(p->to.sym->type == SBSS)
+					d += p->to.sym->value + INITDAT;
+				if(dlm)
+					dynreloc(p->to.sym, l+s+INITDAT, 1, 0, 0);
+			}
+			fl = d;
+			cast = (uchar*)&fl;
+			switch(c) {
+			default:
+				diag("bad nuxi %d %d\n%P", c, i, curp);
+				break;
+			case 1:
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi1[i]];
+					l++;
+				}
+				break;
+			case 2:
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi2[i]];
+					l++;
+				}
+				break;
+			case 4:
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi4[i]];
+					l++;
+				}
+				break;
+			case 8:
+				cast = (uchar*)&d;
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi8[i]];
+					l++;
+				}
+				break;
+			}
+			break;
+		}
+	}
+	write(cout, buf.dbuf, n);
+}

+ 1485 - 0
sys/src/cmd/9l/asmout.c

@@ -0,0 +1,1485 @@
+#include "l.h"
+
+#define	OPVCC(o,xo,oe,rc) (((o)<<26)|((xo)<<1)|((oe)<<10)|((rc)&1))
+#define	OPCC(o,xo,rc) OPVCC((o),(xo),0,(rc))
+#define	OP(o,xo) OPVCC((o),(xo),0,0)
+
+/* the order is dest, a/s, b/imm for both arithmetic and logical operations */
+#define	AOP_RRR(op,d,a,b) ((op)|(((d)&31L)<<21)|(((a)&31L)<<16)|(((b)&31L)<<11))
+#define	AOP_IRR(op,d,a,simm) ((op)|(((d)&31L)<<21)|(((a)&31L)<<16)|((simm)&0xFFFF))
+#define	LOP_RRR(op,a,s,b) ((op)|(((s)&31L)<<21)|(((a)&31L)<<16)|(((b)&31L)<<11))
+#define	LOP_IRR(op,a,s,uimm) ((op)|(((s)&31L)<<21)|(((a)&31L)<<16)|((uimm)&0xFFFF))
+#define	OP_BR(op,li,aa) ((op)|((li)&0x03FFFFFC)|((aa)<<1))
+#define	OP_BC(op,bo,bi,bd,aa) ((op)|(((bo)&0x1F)<<21)|(((bi)&0x1F)<<16)|((bd)&0xFFFC)|((aa)<<1))
+#define	OP_BCR(op,bo,bi) ((op)|(((bo)&0x1F)<<21)|(((bi)&0x1F)<<16))
+#define	OP_RLW(op,a,s,sh,mb,me) ((op)|(((s)&31L)<<21)|(((a)&31L)<<16)|(((sh)&31L)<<11)|\
+					(((mb)&31L)<<6)|(((me)&31L)<<1))
+
+#define	OP_ADD	OPVCC(31,266,0,0)
+#define	OP_ADDI	OPVCC(14,0,0,0)
+#define	OP_ADDIS OPVCC(15,0,0,0)
+#define	OP_ANDI	OPVCC(28,0,0,0)
+#define	OP_EXTSB	OPVCC(31,954,0,0)
+#define	OP_EXTSH OPVCC(31,922,0,0)
+#define	OP_EXTSW OPVCC(31,986,0,0)
+#define	OP_MCRF	OPVCC(19,0,0,0)
+#define	OP_MCRFS OPVCC(63,64,0,0)
+#define	OP_MCRXR OPVCC(31,512,0,0)
+#define	OP_MFCR	OPVCC(31,19,0,0)
+#define	OP_MFFS	OPVCC(63,583,0,0)
+#define	OP_MFMSR OPVCC(31,83,0,0)
+#define	OP_MFSPR OPVCC(31,339,0,0)
+#define	OP_MFSR	OPVCC(31,595,0,0)
+#define	OP_MFSRIN	OPVCC(31,659,0,0)
+#define	OP_MTCRF OPVCC(31,144,0,0)
+#define	OP_MTFSF OPVCC(63,711,0,0)
+#define	OP_MTFSFI OPVCC(63,134,0,0)
+#define	OP_MTMSR OPVCC(31,146,0,0)
+#define	OP_MTMSRD OPVCC(31,178,0,0)
+#define	OP_MTSPR OPVCC(31,467,0,0)
+#define	OP_MTSR	OPVCC(31,210,0,0)
+#define	OP_MTSRIN	OPVCC(31,242,0,0)
+#define	OP_MULLW OPVCC(31,235,0,0)
+#define	OP_MULLD OPVCC(31,233,0,0)
+#define	OP_OR	OPVCC(31,444,0,0)
+#define	OP_ORI	OPVCC(24,0,0,0)
+#define	OP_ORIS	OPVCC(25,0,0,0)
+#define	OP_RLWINM	OPVCC(21,0,0,0)
+#define	OP_SUBF	OPVCC(31,40,0,0)
+#define	OP_RLDIC	OPVCC(30,4,0,0)
+#define	OP_RLDICR	OPVCC(30,2,0,0)
+#define	OP_RLDICL	OPVCC(30,0,0,0)
+
+#define	oclass(v)	((v).class-1)
+
+long	oprrr(int), opirr(int), opload(int), opstore(int), oploadx(int), opstorex(int);
+
+/*
+ * 32-bit masks
+ */
+int
+getmask(uchar *m, ulong v)
+{
+	int i;
+
+	m[0] = m[1] = 0;
+	if(v != ~0L && v & (1<<31) && v & 1){	/* MB > ME */
+		if(getmask(m, ~v)){
+			i = m[0]; m[0] = m[1]+1; m[1] = i-1;
+			return 1;
+		}
+		return 0;
+	}
+	for(i=0; i<32; i++)
+		if(v & (1<<(31-i))){
+			m[0] = i;
+			do {
+				m[1] = i;
+			} while(++i<32 && (v & (1<<(31-i))) != 0);
+			for(; i<32; i++)
+				if(v & (1<<(31-i)))
+					return 0;
+			return 1;
+		}
+	return 0;
+}
+
+void
+maskgen(Prog *p, uchar *m, ulong v)
+{
+	if(!getmask(m, v))
+		diag("cannot generate mask #%lux\n%P", v, p);
+}
+
+/*
+ * 64-bit masks (rldic etc)
+ */
+int
+getmask64(uchar *m, uvlong v)
+{
+	int i;
+
+	m[0] = m[1] = 0;
+	for(i=0; i<64; i++)
+		if(v & ((uvlong)1<<(63-i))){
+			m[0] = i;
+			do {
+				m[1] = i;
+			} while(++i<64 && (v & ((uvlong)1<<(63-i))) != 0);
+			for(; i<64; i++)
+				if(v & ((uvlong)1<<(63-i)))
+					return 0;
+			return 1;
+		}
+	return 0;
+}
+
+void
+maskgen64(Prog *p, uchar *m, uvlong v)
+{
+	if(!getmask64(m, v))
+		diag("cannot generate mask #%llux\n%P", v, p);
+}
+
+static void
+reloc(Adr *a, long pc, int sext)
+{
+	if(a->name == D_EXTERN || a->name == D_STATIC)
+		dynreloc(a->sym, pc, 1, 1, sext);
+}
+
+static ulong
+loadu32(int r, vlong d)
+{
+	long v;
+
+	v = d>>16;
+	if(isuint32(d))
+		return LOP_IRR(OP_ORIS, r, REGZERO, v);
+	return AOP_IRR(OP_ADDIS, r, REGZERO, v);
+}
+	
+int
+asmout(Prog *p, Optab *o, int aflag)
+{
+	long o1, o2, o3, o4, o5, v, t;
+	vlong d;
+	Prog *ct;
+	int r, a;
+	uchar mask[2];
+
+	o1 = 0;
+	o2 = 0;
+	o3 = 0;
+	o4 = 0;
+	o5 = 0;
+	switch(o->type) {
+	default:
+		if(aflag)
+			return 0;
+		diag("unknown type %d", o->type);
+		if(!debug['a'])
+			prasm(p);
+		break;
+
+	case 0:		/* pseudo ops */
+		if(aflag) {
+			if(p->link) {
+				if(p->as == ATEXT) {
+					ct = curtext;
+					o2 = autosize;
+					curtext = p;
+					autosize = p->to.offset + 8;
+					o1 = asmout(p->link, oplook(p->link), aflag);
+					curtext = ct;
+					autosize = o2;
+				} else
+					o1 = asmout(p->link, oplook(p->link), aflag);
+			}
+			return o1;
+		}
+		break;
+
+	case 1:		/* mov r1,r2 ==> OR Rs,Rs,Ra */
+		if(p->to.reg == REGZERO && p->from.type == D_CONST) {
+			v = regoff(&p->from);
+			if(r0iszero && v != 0) {
+				nerrors--;
+				diag("literal operation on R0\n%P", p);
+			}
+			o1 = LOP_IRR(OP_ADDI, REGZERO, REGZERO, v);
+			break;
+		}
+		o1 = LOP_RRR(OP_OR, p->to.reg, p->from.reg, p->from.reg);
+		break;
+
+	case 2:		/* int/cr/fp op Rb,[Ra],Rd */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, r, p->from.reg);
+		break;
+
+	case 3:		/* mov $soreg/addcon/ucon, r ==> addis/addi $i,reg',r */
+		d = vregoff(&p->from);
+		v = d;
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		if(r0iszero && p->to.reg == 0 && (r != 0 || v != 0))
+			diag("literal operation on R0\n%P", p);
+		a = OP_ADDI;
+		if(o->a1 == C_UCON) {
+			v >>= 16;
+			if(r == REGZERO && isuint32(d)){
+				o1 = LOP_IRR(OP_ORIS, p->to.reg, REGZERO, v);
+				break;
+			}
+			a = OP_ADDIS;
+		}
+		o1 = AOP_IRR(a, p->to.reg, r, v);
+		break;
+
+	case 4:		/* add/mul $scon,[r1],r2 */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		if(r0iszero && p->to.reg == 0)
+			diag("literal operation on R0\n%P", p);
+		o1 = AOP_IRR(opirr(p->as), p->to.reg, r, v);
+		break;
+
+	case 5:		/* syscall */
+		if(aflag)
+			return 0;
+		o1 = oprrr(p->as);
+		break;
+
+	case 6:		/* logical op Rb,[Rs,]Ra; no literal */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = LOP_RRR(oprrr(p->as), p->to.reg, r, p->from.reg);
+		break;
+
+	case 7:		/* mov r, soreg ==> stw o(r) */
+		r = p->to.reg;
+		if(r == NREG)
+			r = o->param;
+		v = regoff(&p->to);
+		if(p->to.type == D_OREG && p->reg != NREG) {
+			if(v)
+				diag("illegal indexed instruction\n%P", p);
+			o1 = AOP_RRR(opstorex(p->as), p->from.reg, p->reg, r);
+		} else
+			o1 = AOP_IRR(opstore(p->as), p->from.reg, r, v);
+		break;
+
+	case 8:		/* mov soreg, r ==> lbz/lhz/lwz o(r) */
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		v = regoff(&p->from);
+		if(p->from.type == D_OREG && p->reg != NREG) {
+			if(v)
+				diag("illegal indexed instruction\n%P", p);
+			o1 = AOP_RRR(oploadx(p->as), p->to.reg, p->reg, r);
+		} else
+			o1 = AOP_IRR(opload(p->as), p->to.reg, r, v);
+		break;
+
+	case 9:		/* movb soreg, r ==> lbz o(r),r2; extsb r2,r2 */
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		v = regoff(&p->from);
+		if(p->from.type == D_OREG && p->reg != NREG) {
+			if(v)
+				diag("illegal indexed instruction\n%P", p);
+			o1 = AOP_RRR(oploadx(p->as), p->to.reg, p->reg, r);
+		} else
+			o1 = AOP_IRR(opload(p->as), p->to.reg, r, v);
+		o2 = LOP_RRR(OP_EXTSB, p->to.reg, p->to.reg, 0);
+		break;
+
+	case 10:		/* sub Ra,[Rb],Rd => subf Rd,Ra,Rb */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, p->from.reg, r);
+		break;
+
+	case 11:	/* br/bl lbra */
+		if(aflag)
+			return 0;
+		v = 0;
+		if(p->cond == UP){
+			if(p->to.sym->type != SUNDEF)
+				diag("bad branch sym type");
+			v = (ulong)p->to.sym->value >> (Roffset-2);
+			dynreloc(p->to.sym, p->pc, 0, 0, 0);
+		}
+		else if(p->cond)
+			v = p->cond->pc - p->pc;
+		if(v & 03) {
+			diag("odd branch target address\n%P", p);
+			v &= ~03;
+		}
+		if(v < -(1L<<25) || v >= (1L<<24))
+			diag("branch too far\n%P", p);
+		o1 = OP_BR(opirr(p->as), v, 0);
+		break;
+
+	case 12:	/* movb r,r (extsb); movw r,r (extsw) */
+		if(p->to.reg == REGZERO && p->from.type == D_CONST) {
+			v = regoff(&p->from);
+			if(r0iszero && v != 0) {
+				nerrors--;
+				diag("literal operation on R0\n%P", p);
+			}
+			o1 = LOP_IRR(OP_ADDI, REGZERO, REGZERO, v);
+			break;
+		}
+		if(p->as == AMOVW)
+			o1 = LOP_RRR(OP_EXTSW, p->to.reg, p->from.reg, 0);
+		else
+			o1 = LOP_RRR(OP_EXTSB, p->to.reg, p->from.reg, 0);
+		break;
+
+	case 13:	/* mov[bhw]z r,r; uses rlwinm not andi. to avoid changing CC */
+		if(p->as == AMOVBZ)
+			o1 = OP_RLW(OP_RLWINM, p->to.reg, p->from.reg, 0, 24, 31);
+		else if(p->as == AMOVH)
+			o1 = LOP_RRR(OP_EXTSH, p->to.reg, p->from.reg, 0);
+		else if(p->as == AMOVHZ)
+			o1 = OP_RLW(OP_RLWINM, p->to.reg, p->from.reg, 0, 16, 31);
+		else if(p->as == AMOVWZ)
+			o1 = OP_RLW(OP_RLDIC, p->to.reg, p->from.reg, 0, 0, 0) | (1<<5);	/* MB=32 */
+		else
+			diag("internal: bad mov[bhw]z\n%P", p);
+		break;
+
+	case 14:	/* rldc[lr] Rb,Rs,$mask,Ra -- left, right give different masks */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		d = vregoff(&p->from3);
+		maskgen64(p, mask, d);
+		switch(p->as){
+		case ARLDCL: case ARLDCLCC:
+			a = mask[0];	/* MB */
+			if(mask[1] != 63)
+				diag("invalid mask for rotate: %llux (end != bit 63)\n%P", d, p);
+			break;
+		case ARLDCR: case ARLDCRCC:
+			a = mask[1];	/* ME */
+			if(mask[0] != 0)
+				diag("invalid mask for rotate: %llux (start != 0)\n%P", d, p);
+			break;
+		default:
+			diag("unexpected op in rldc case\n%P", p);
+			a = 0;
+		}
+		o1 = LOP_RRR(oprrr(p->as), p->to.reg, r, p->from.reg);
+		o1 |= (a&31L)<<6;
+		if(a & 0x20)
+			o1 |= 1<<5;	/* mb[5] is top bit */
+		break;
+
+	case 17:	/* bc bo,bi,lbra (same for now) */
+	case 16:	/* bc bo,bi,sbra */
+		if(aflag)
+			return 0;
+		a = 0;
+		if(p->from.type == D_CONST)
+			a = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		v = 0;
+		if(p->cond)
+			v = p->cond->pc - p->pc;
+		if(v & 03) {
+			diag("odd branch target address\n%P", p);
+			v &= ~03;
+		}
+		if(v < -(1L<<16) || v >= (1L<<15))
+			diag("branch too far\n%P", p);
+		o1 = OP_BC(opirr(p->as), a, r, v, 0);
+		break;
+
+	case 15:	/* br/bl (r) => mov r,lr; br/bl (lr) */
+		if(aflag)
+			return 0;
+		if(p->as == ABC || p->as == ABCL)
+			v = regoff(&p->to)&31L;
+		else
+			v = 20;	/* unconditional */
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		o1 = AOP_RRR(OP_MTSPR, p->to.reg, 0, 0) | ((D_LR&0x1f)<<16) | (((D_LR>>5)&0x1f)<<11);
+		o2 = OPVCC(19, 16, 0, 0);
+		if(p->as == ABL || p->as == ABCL)
+			o2 |= 1;
+		o2 = OP_BCR(o2, v, r);
+		break;
+
+	case 18:	/* br/bl (lr/ctr); bc/bcl bo,bi,(lr/ctr) */
+		if(aflag)
+			return 0;
+		if(p->as == ABC || p->as == ABCL)
+			v = regoff(&p->from)&31L;
+		else
+			v = 20;	/* unconditional */
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		switch(oclass(p->to)) {
+		case C_CTR:
+			o1 = OPVCC(19, 528, 0, 0);
+			break;
+		case C_LR:
+			o1 = OPVCC(19, 16, 0, 0);
+			break;
+		default:
+			diag("bad optab entry (18): %d\n%P", p->to.class, p);
+			v = 0;
+		}
+		if(p->as == ABL || p->as == ABCL)
+			o1 |= 1;
+		o1 = OP_BCR(o1, v, r);
+		break;
+
+	case 19:	/* mov $lcon,r ==> cau+or */
+		d = vregoff(&p->from);
+		o1 = loadu32(p->to.reg, d);
+		o2 = LOP_IRR(OP_ORI, p->to.reg, p->to.reg, (long)d);
+		if(dlm)
+			reloc(&p->from, p->pc, 0);
+		break;
+
+	case 20:	/* add $ucon,,r */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		if(p->as == AADD && (!r0iszero && p->reg == 0 || r0iszero && p->to.reg == 0))
+			diag("literal operation on R0\n%P", p);
+		o1 = AOP_IRR(opirr(p->as+AEND), p->to.reg, r, v>>16);
+		break;
+
+	case 22:	/* add $lcon,r1,r2 ==> cau+or+add */	/* could do add/sub more efficiently */
+		if(p->to.reg == REGTMP || p->reg == REGTMP)
+			diag("cant synthesize large constant\n%P", p);
+		d = vregoff(&p->from);
+		o1 = loadu32(REGTMP, d);
+		o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, (long)d);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o3 = AOP_RRR(oprrr(p->as), p->to.reg, REGTMP, r);
+		if(dlm)
+			reloc(&p->from, p->pc, 0);
+		break;
+
+	case 23:	/* and $lcon,r1,r2 ==> cau+or+and */	/* masks could be done using rlnm etc. */
+		if(p->to.reg == REGTMP || p->reg == REGTMP)
+			diag("cant synthesize large constant\n%P", p);
+		d = vregoff(&p->from);
+		o1 = loadu32(REGTMP, d);
+		o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, (long)d);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o3 = LOP_RRR(oprrr(p->as), p->to.reg, REGTMP, r);
+		if(dlm)
+			reloc(&p->from, p->pc, 0);
+		break;
+/*24*/
+
+	case 25:	/* sld[.] $sh,rS,rA -> rldicr[.] $sh,rS,mask(0,63-sh),rA; srd[.] -> rldicl */
+		v = regoff(&p->from);
+		if(v < 0)
+			v = 0;
+		else if(v > 63)
+			v = 63;
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		switch(p->as){
+		case ASLD: case ASLDCC:
+			a = 63-v;
+			o1 = OP_RLDICR;
+			break;
+		case ASRD: case ASRDCC:
+			a = v;
+			v = 64-v;
+			o1 = OP_RLDICL;
+			break;
+		default:
+			diag("unexpected op in sldi case\n%P", p);
+			a = 0;
+			o1 = 0;
+		}
+		o1 = AOP_RRR(o1, r, p->to.reg, (v&0x1F));
+		o1 |= (a&31L)<<6;
+		if(v & 0x20)
+			o1 |= 1<<1;
+		if(a & 0x20)
+			o1 |= 1<<5;	/* mb[5] is top bit */
+		if(p->as == ASLDCC || p->as == ASRDCC)
+			o1 |= 1;	/* Rc */
+		break;
+
+	case 26:	/* mov $lsext/auto/oreg,,r2 ==> addis+addi */
+		if(p->to.reg == REGTMP)
+			diag("can't synthesize large constant\n%P", p);
+		v = regoff(&p->from);
+		if(v & 0x8000L)
+			v += 0x10000L;
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, r, v>>16);
+		o2 = AOP_IRR(OP_ADDI, p->to.reg, REGTMP, v);
+		break;
+
+	case 27:		/* subc ra,$simm,rd => subfic rd,ra,$simm */
+		v = regoff(&p->from3);
+		r = p->from.reg;
+		o1 = AOP_IRR(opirr(p->as), p->to.reg, r, v);
+		break;
+
+	case 28:	/* subc r1,$lcon,r2 ==> cau+or+subfc */
+		if(p->to.reg == REGTMP || p->from.reg == REGTMP)
+			diag("can't synthesize large constant\n%P", p);
+		v = regoff(&p->from3);
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, v>>16);
+		o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, v);
+		o3 = AOP_RRR(oprrr(p->as), p->to.reg, p->from.reg, REGTMP);
+		if(dlm)
+			reloc(&p->from3, p->pc, 0);
+		break;
+
+	case 29:	/* rldic[lr]? $sh,s,$mask,a -- left, right, plain give different masks */
+		v = regoff(&p->from);
+		d = vregoff(&p->from3);
+		maskgen64(p, mask, d);
+		switch(p->as){
+		case ARLDC: case ARLDCCC:
+			a = mask[0];	/* MB */
+			if(mask[1] != (63-v))
+				diag("invalid mask for shift: %llux (shift %ld)\n%P", d, v, p);
+			break;
+		case ARLDCL: case ARLDCLCC:
+			a = mask[0];	/* MB */
+			if(mask[1] != 63)
+				diag("invalid mask for shift: %llux (shift %ld)\n%P", d, v, p);
+			break;
+		case ARLDCR: case ARLDCRCC:
+			a = mask[1];	/* ME */
+			if(mask[0] != 0)
+				diag("invalid mask for shift: %llux (shift %ld)\n%P", d, v, p);
+			break;
+		default:
+			diag("unexpected op in rldic case\n%P", p);
+			a = 0;
+		}
+		o1 = AOP_RRR(opirr(p->as), p->reg, p->to.reg, (v&0x1F));
+		o1 |= (a&31L)<<6;
+		if(v & 0x20)
+			o1 |= 1<<1;
+		if(a & 0x20)
+			o1 |= 1<<5;	/* mb[5] is top bit */
+		break;
+
+	case 30:	/* rldimi $sh,s,$mask,a */
+		v = regoff(&p->from);
+		d = vregoff(&p->from3);
+		maskgen64(p, mask, d);
+		if(mask[1] != (63-v))
+			diag("invalid mask for shift: %llux (shift %ld)\n%P", d, v, p);
+		o1 = AOP_RRR(opirr(p->as), p->reg, p->to.reg, (v&0x1F));
+		o1 |= (mask[0]&31L)<<6;
+		if(v & 0x20)
+			o1 |= 1<<1;
+		if(mask[0] & 0x20)
+			o1 |= 1<<5;	/* mb[5] is top bit */
+		break;
+
+	case 31:	/* dword */
+		if(aflag)
+			return 0;
+		d = vregoff(&p->from);
+		o1 = d>>32;
+		o2 = d;
+		break;
+
+	case 32:	/* fmul frc,fra,frd */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, r, 0)|((p->from.reg&31L)<<6);
+		break;
+
+	case 33:	/* fabs [frb,]frd; fmr. frb,frd */
+		r = p->from.reg;
+		if(oclass(p->from) == C_NONE)
+			r = p->to.reg;
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, 0, r);
+		break;
+
+	case 34:	/* FMADDx fra,frb,frc,frd (d=a*b+c); FSELx a<0? (d=b): (d=c) */
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, p->from.reg, p->reg)|((p->from3.reg&31L)<<6);
+		break;
+
+	case 35:	/* mov r,lext/lauto/loreg ==> cau $(v>>16),sb,r'; store o(r') */
+		v = regoff(&p->to);
+		if(v & 0x8000L)
+			v += 0x10000L;
+		r = p->to.reg;
+		if(r == NREG)
+			r = o->param;
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, r, v>>16);
+		o2 = AOP_IRR(opstore(p->as), p->from.reg, REGTMP, v);
+		break;
+
+	case 36:	/* mov bz/h/hz lext/lauto/lreg,r ==> lbz/lha/lhz etc */
+		v = regoff(&p->from);
+		if(v & 0x8000L)
+			v += 0x10000L;
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, r, v>>16);
+		o2 = AOP_IRR(opload(p->as), p->to.reg, REGTMP, v);
+		break;
+
+	case 37:	/* movb lext/lauto/lreg,r ==> lbz o(reg),r; extsb r */
+		v = regoff(&p->from);
+		if(v & 0x8000L)
+			v += 0x10000L;
+		r = p->from.reg;
+		if(r == NREG)
+			r = o->param;
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, r, v>>16);
+		o2 = AOP_IRR(opload(p->as), p->to.reg, REGTMP, v);
+		o3 = LOP_RRR(OP_EXTSB, p->to.reg, p->to.reg, 0);
+		break;
+
+	case 40:	/* word */
+		if(aflag)
+			return 0;
+		o1 = regoff(&p->from);
+		break;
+
+	case 41:	/* stswi */
+		o1 = AOP_RRR(opirr(p->as), p->from.reg, p->to.reg, 0) | ((regoff(&p->from3)&0x7F)<<11);
+		break;
+
+	case 42:	/* lswi */
+		o1 = AOP_RRR(opirr(p->as), p->to.reg, p->from.reg, 0) | ((regoff(&p->from3)&0x7F)<<11);
+		break;
+
+	case 43:	/* unary indexed source: dcbf (b); dcbf (a+b) */
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		o1 = AOP_RRR(oprrr(p->as), 0, r, p->from.reg);
+		break;
+
+	case 44:	/* indexed store */
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		o1 = AOP_RRR(opstorex(p->as), p->from.reg, r, p->to.reg);
+		break;
+	case 45:	/* indexed load */
+		r = p->reg;
+		if(r == NREG)
+			r = 0;
+		o1 = AOP_RRR(oploadx(p->as), p->to.reg, r, p->from.reg);
+		break;
+
+	case 46:	/* plain op */
+		o1 = oprrr(p->as);
+		break;
+
+	case 47:	/* op Ra, Rd; also op [Ra,] Rd */
+		r = p->from.reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, r, 0);
+		break;
+
+	case 48:	/* op Rs, Ra */
+		r = p->from.reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = LOP_RRR(oprrr(p->as), p->to.reg, r, 0);
+		break;
+
+	case 49:	/* op Rb; op $n, Rb */
+		if(p->from.type != D_REG){	/* tlbie $L, rB */
+			v = regoff(&p->from) & 1;
+			o1 = AOP_RRR(oprrr(p->as), 0, 0, p->to.reg) | (v<<21);
+		}else
+			o1 = AOP_RRR(oprrr(p->as), 0, 0, p->from.reg);
+		break;
+
+	case 50:	/* rem[u] r1[,r2],r3 */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		v = oprrr(p->as);
+		t = v & ((1<<10)|1);	/* OE|Rc */
+		o1 = AOP_RRR(v&~t, REGTMP, r, p->from.reg);
+		o2 = AOP_RRR(OP_MULLW, REGTMP, REGTMP, p->from.reg);
+		o3 = AOP_RRR(OP_SUBF|t, p->to.reg, REGTMP, r);
+		break;
+
+	case 51:	/* remd[u] r1[,r2],r3 */
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		v = oprrr(p->as);
+		t = v & ((1<<10)|1);	/* OE|Rc */
+		o1 = AOP_RRR(v&~t, REGTMP, r, p->from.reg);
+		o2 = AOP_RRR(OP_MULLD, REGTMP, REGTMP, p->from.reg);
+		o3 = AOP_RRR(OP_SUBF|t, p->to.reg, REGTMP, r);
+		break;
+
+	case 52:	/* mtfsbNx cr(n) */
+		v = regoff(&p->from)&31L;
+		o1 = AOP_RRR(oprrr(p->as), v, 0, 0);
+		break;
+
+	case 53:	/* mffsX ,fr1 */
+		o1 = AOP_RRR(OP_MFFS, p->to.reg, 0, 0);
+		break;
+
+	case 54:	/* mov msr,r1; mov r1, msr*/
+		if(oclass(p->from) == C_REG){
+			if(p->as == AMOVD)
+				o1 = AOP_RRR(OP_MTMSRD, p->from.reg, 0, 0);
+			else
+				o1 = AOP_RRR(OP_MTMSR, p->from.reg, 0, 0);
+		}else
+			o1 = AOP_RRR(OP_MFMSR, p->to.reg, 0, 0);
+		break;
+
+	case 55:	/* op Rb, Rd */
+		o1 = AOP_RRR(oprrr(p->as), p->to.reg, 0, p->from.reg);
+		break;
+
+	case 56:	/* sra $sh,[s,]a; srd $sh,[s,]a */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = AOP_RRR(opirr(p->as), r, p->to.reg, v&31L);
+		if(p->as == ASRAD && (v&0x20))
+			o1 |= 1<<1;	/* mb[5] */
+		break;
+
+	case 57:	/* slw $sh,[s,]a -> rlwinm ... */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		/*
+		 * Let user (gs) shoot himself in the foot. 
+		 * qc has already complained.
+		 *
+		if(v < 0 || v > 31)
+			diag("illegal shift %ld\n%P", v, p);
+		 */
+		if(v < 0)
+			v = 0;
+		else if(v > 32)
+			v = 32;
+		if(p->as == ASRW || p->as == ASRWCC) {	/* shift right */
+			mask[0] = v;
+			mask[1] = 31;
+			v = 32-v;
+		} else {
+			mask[0] = 0;
+			mask[1] = 31-v;
+		}
+		o1 = OP_RLW(OP_RLWINM, p->to.reg, r, v, mask[0], mask[1]);
+		if(p->as == ASLWCC || p->as == ASRWCC)
+			o1 |= 1;	/* Rc */
+		break;
+
+	case 58:		/* logical $andcon,[s],a */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = LOP_IRR(opirr(p->as), p->to.reg, r, v);
+		break;
+
+	case 59:	/* or/and $ucon,,r */
+		v = regoff(&p->from);
+		r = p->reg;
+		if(r == NREG)
+			r = p->to.reg;
+		o1 = LOP_IRR(opirr(p->as+AEND), p->to.reg, r, v>>16);	/* oris, xoris, andis */
+		break;
+
+	case 60:	/* tw to,a,b */
+		r = regoff(&p->from)&31L;
+		o1 = AOP_RRR(oprrr(p->as), r, p->reg, p->to.reg);
+		break;
+
+	case 61:	/* tw to,a,$simm */
+		r = regoff(&p->from)&31L;
+		v = regoff(&p->to);
+		o1 = AOP_IRR(opirr(p->as), r, p->reg, v);
+		break;
+
+	case 62:	/* rlwmi $sh,s,$mask,a */
+		v = regoff(&p->from);
+		maskgen(p, mask, regoff(&p->from3));
+		o1 = AOP_RRR(opirr(p->as), p->reg, p->to.reg, v);
+		o1 |= ((mask[0]&31L)<<6)|((mask[1]&31L)<<1);
+		break;
+
+	case 63:	/* rlwmi b,s,$mask,a */
+		maskgen(p, mask, regoff(&p->from3));
+		o1 = AOP_RRR(opirr(p->as), p->reg, p->to.reg, p->from.reg);
+		o1 |= ((mask[0]&31L)<<6)|((mask[1]&31L)<<1);
+		break;
+
+	case 64:	/* mtfsf fr[, $m] {,fpcsr} */
+		if(p->from3.type != D_NONE)
+			v = regoff(&p->from3)&255L;
+		else
+			v = 255;
+		o1 = OP_MTFSF | (v<<17) | (p->from.reg<<11);
+		break;
+
+	case 65:	/* MOVFL $imm,FPSCR(n) => mtfsfi crfd,imm */
+		if(p->to.reg == NREG)
+			diag("must specify FPSCR(n)\n%P", p);
+		o1 = OP_MTFSFI | ((p->to.reg&15L)<<23) | ((regoff(&p->from)&31L)<<12);
+		break;
+
+	case 66:	/* mov spr,r1; mov r1,spr, also dcr */
+		if(p->from.type == D_REG) {
+			r = p->from.reg;
+			v = p->to.offset;
+			if(p->to.type == D_DCR)
+				o1 = OPVCC(31,451,0,0);	/* mtdcr */
+			else
+				o1 = OPVCC(31,467,0,0); /* mtspr */
+		} else {
+			r = p->to.reg;
+			v = p->from.offset;
+			if(p->from.type == D_DCR)
+				o1 = OPVCC(31,323,0,0);	/* mfdcr */
+			else
+				o1 = OPVCC(31,339,0,0);	/* mfspr */
+		}
+		o1 = AOP_RRR(o1, r, 0, 0) | ((v&0x1f)<<16) | (((v>>5)&0x1f)<<11);
+		break;
+
+	case 67:	/* mcrf crfD,crfS */
+		if(p->from.type != D_CREG || p->from.reg == NREG ||
+		   p->to.type != D_CREG || p->to.reg == NREG)
+			diag("illegal CR field number\n%P", p);
+		o1 = AOP_RRR(OP_MCRF, ((p->to.reg&7L)<<2), ((p->from.reg&7)<<2), 0);
+		break;
+
+	case 68:	/* mfcr rD; mfocrf CRM,rD */
+		if(p->from.type == D_CREG && p->from.reg != NREG){
+			v = 1<<(7-(p->to.reg&7));	/* CR(n) */
+			o1 = AOP_RRR(OP_MFCR, p->to.reg, 0, 0) | (1<<20) | (v<<12);	/* new form, mfocrf */
+		}else
+			o1 = AOP_RRR(OP_MFCR, p->to.reg, 0, 0);	/* old form, whole register */
+		break;
+
+	case 69:	/* mtcrf CRM,rS */
+		if(p->from3.type != D_NONE) {
+			if(p->to.reg != NREG)
+				diag("can't use both mask and CR(n)\n%P", p);
+			v = regoff(&p->from3) & 0xff;
+		} else {
+			if(p->to.reg == NREG)
+				v = 0xff;	/* CR */
+			else
+				v = 1<<(7-(p->to.reg&7));	/* CR(n) */
+		}
+		o1 = AOP_RRR(OP_MTCRF, p->from.reg, 0, 0) | (v<<12);
+		break;
+
+	case 70:	/* [f]cmp r,r,cr*/
+		if(p->reg == NREG)
+			r = 0;
+		else
+			r = (p->reg&7)<<2;
+		o1 = AOP_RRR(oprrr(p->as), r, p->from.reg, p->to.reg);
+		break;
+
+	case 71:	/* cmp[l] r,i,cr*/
+		if(p->reg == NREG)
+			r = 0;
+		else
+			r = (p->reg&7)<<2;
+		o1 = AOP_RRR(opirr(p->as), r, p->from.reg, 0) | (regoff(&p->to)&0xffff);
+		break;
+
+	case 72:	/* slbmte (Rb+Rs -> slb[Rb]) -> Rs, Rb */
+		o1 = AOP_RRR(oprrr(p->as), p->from.reg, 0, p->to.reg);
+		break;
+
+	case 73:	/* mcrfs crfD,crfS */
+		if(p->from.type != D_FPSCR || p->from.reg == NREG ||
+		   p->to.type != D_CREG || p->to.reg == NREG)
+			diag("illegal FPSCR/CR field number\n%P", p);
+		o1 = AOP_RRR(OP_MCRFS, ((p->to.reg&7L)<<2), ((p->from.reg&7)<<2), 0);
+		break;
+
+	/* relocation operations */
+
+	case 74:
+		v = regoff(&p->to);
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, v>>16);
+		o2 = AOP_IRR(opstore(p->as), p->from.reg, REGTMP, v);
+		if(dlm)
+			reloc(&p->to, p->pc, 1);
+		break;
+
+	case 75:
+		v = regoff(&p->from);
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, v>>16);
+		o2 = AOP_IRR(opload(p->as), p->to.reg, REGTMP, v);
+		if(dlm)
+			reloc(&p->from, p->pc, 1);
+		break;
+
+	case 76:
+		v = regoff(&p->from);
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, v>>16);
+		o2 = AOP_IRR(opload(p->as), p->to.reg, REGTMP, v);
+		o3 = LOP_RRR(OP_EXTSB, p->to.reg, p->to.reg, 0);
+		if(dlm)
+			reloc(&p->from, p->pc, 1);
+		break;
+
+	}
+	if(aflag)
+		return o1;
+	v = p->pc;
+	switch(o->size) {
+	default:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux:\t\t%P\n", v, p);
+		break;
+	case 4:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux: %.8lux\t%P\n", v, o1, p);
+		lput(o1);
+		break;
+	case 8:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux: %.8lux %.8lux%P\n", v, o1, o2, p);
+		lput(o1);
+		lput(o2);
+		break;
+	case 12:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux%P\n", v, o1, o2, o3, p);
+		lput(o1);
+		lput(o2);
+		lput(o3);
+		break;
+	case 16:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux%P\n",
+				v, o1, o2, o3, o4, p);
+		lput(o1);
+		lput(o2);
+		lput(o3);
+		lput(o4);
+		break;
+	case 20:
+		if(debug['a'])
+			Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux%P\n",
+				v, o1, o2, o3, o4, o5, p);
+		lput(o1);
+		lput(o2);
+		lput(o3);
+		lput(o4);
+		lput(o5);
+		break;
+	}
+	return 0;
+}
+
+long
+oprrr(int a)
+{
+	switch(a) {
+	case AADD:	return OPVCC(31,266,0,0);
+	case AADDCC:	return OPVCC(31,266,0,1);
+	case AADDV:	return OPVCC(31,266,1,0);
+	case AADDVCC:	return OPVCC(31,266,1,1);
+	case AADDC:	return OPVCC(31,10,0,0);
+	case AADDCCC:	return OPVCC(31,10,0,1);
+	case AADDCV:	return OPVCC(31,10,1,0);
+	case AADDCVCC:	return OPVCC(31,10,1,1);
+	case AADDE:	return OPVCC(31,138,0,0);
+	case AADDECC:	return OPVCC(31,138,0,1);
+	case AADDEV:	return OPVCC(31,138,1,0);
+	case AADDEVCC:	return OPVCC(31,138,1,1);
+	case AADDME:	return OPVCC(31,234,0,0);
+	case AADDMECC:	return OPVCC(31,234,0,1);
+	case AADDMEV:	return OPVCC(31,234,1,0);
+	case AADDMEVCC:	return OPVCC(31,234,1,1);
+	case AADDZE:	return OPVCC(31,202,0,0);
+	case AADDZECC:	return OPVCC(31,202,0,1);
+	case AADDZEV:	return OPVCC(31,202,1,0);
+	case AADDZEVCC:	return OPVCC(31,202,1,1);
+
+	case AAND:	return OPVCC(31,28,0,0);
+	case AANDCC:	return OPVCC(31,28,0,1);
+	case AANDN:	return OPVCC(31,60,0,0);
+	case AANDNCC:	return OPVCC(31,60,0,1);
+
+	case ACMP:	return OPVCC(31,0,0,0)|(1<<21);	/* L=1 */
+	case ACMPU:	return OPVCC(31,32,0,0)|(1<<21);
+	case ACMPW:	return OPVCC(31,0,0,0);	/* L=0 */
+	case ACMPWU:	return OPVCC(31,32,0,0);
+
+	case ACNTLZW:	return OPVCC(31,26,0,0);
+	case ACNTLZWCC:	return OPVCC(31,26,0,1);
+	case ACNTLZD:		return OPVCC(31,58,0,0);
+	case ACNTLZDCC:	return OPVCC(31,58,0,1);
+
+	case ACRAND:	return OPVCC(19,257,0,0);
+	case ACRANDN:	return OPVCC(19,129,0,0);
+	case ACREQV:	return OPVCC(19,289,0,0);
+	case ACRNAND:	return OPVCC(19,225,0,0);
+	case ACRNOR:	return OPVCC(19,33,0,0);
+	case ACROR:	return OPVCC(19,449,0,0);
+	case ACRORN:	return OPVCC(19,417,0,0);
+	case ACRXOR:	return OPVCC(19,193,0,0);
+
+	case ADCBF:	return OPVCC(31,86,0,0);
+	case ADCBI:	return OPVCC(31,470,0,0);
+	case ADCBST:	return OPVCC(31,54,0,0);
+	case ADCBT:	return OPVCC(31,278,0,0);
+	case ADCBTST:	return OPVCC(31,246,0,0);
+	case ADCBZ:	return OPVCC(31,1014,0,0);
+
+	case AREM:
+	case ADIVW:	return OPVCC(31,491,0,0);
+	case AREMCC:
+	case ADIVWCC:	return OPVCC(31,491,0,1);
+	case AREMV:
+	case ADIVWV:	return OPVCC(31,491,1,0);
+	case AREMVCC:
+	case ADIVWVCC:	return OPVCC(31,491,1,1);
+	case AREMU:
+	case ADIVWU:	return OPVCC(31,459,0,0);
+	case AREMUCC:
+	case ADIVWUCC:	return OPVCC(31,459,0,1);
+	case AREMUV:
+	case ADIVWUV:	return OPVCC(31,459,1,0);
+	case AREMUVCC:
+	case ADIVWUVCC:	return OPVCC(31,459,1,1);
+
+	case AREMD:
+	case ADIVD:	return OPVCC(31,489,0,0);
+	case AREMDCC:
+	case ADIVDCC:	return OPVCC(31,489,0,1);
+	case AREMDV:
+	case ADIVDV:	return OPVCC(31,489,1,0);
+	case AREMDVCC:
+	case ADIVDVCC:	return OPVCC(31,489,1,1);
+	case AREMDU:
+	case ADIVDU:	return OPVCC(31,457,0,0);
+	case AREMDUCC:
+	case ADIVDUCC:	return OPVCC(31,457,0,1);
+	case AREMDUV:
+	case ADIVDUV:	return OPVCC(31,457,1,0);
+	case AREMDUVCC:
+	case ADIVDUVCC:	return OPVCC(31,457,1,1);
+
+	case AEIEIO:	return OPVCC(31,854,0,0);
+
+	case AEQV:	return OPVCC(31,284,0,0);
+	case AEQVCC:	return OPVCC(31,284,0,1);
+
+	case AEXTSB:	return OPVCC(31,954,0,0);
+	case AEXTSBCC:	return OPVCC(31,954,0,1);
+	case AEXTSH:	return OPVCC(31,922,0,0);
+	case AEXTSHCC:	return OPVCC(31,922,0,1);
+	case AEXTSW:	return OPVCC(31,986,0,0);
+	case AEXTSWCC:	return OPVCC(31,986,0,1);
+
+	case AFABS:	return OPVCC(63,264,0,0);
+	case AFABSCC:	return OPVCC(63,264,0,1);
+	case AFADD:	return OPVCC(63,21,0,0);
+	case AFADDCC:	return OPVCC(63,21,0,1);
+	case AFADDS:	return OPVCC(59,21,0,0);
+	case AFADDSCC:	return OPVCC(59,21,0,1);
+	case AFCMPO:	return OPVCC(63,32,0,0);
+	case AFCMPU:	return OPVCC(63,0,0,0);
+	case AFCFID:	return OPVCC(63,846,0,0);
+	case AFCFIDCC:	return OPVCC(63,846,0,1);
+	case AFCTIW:	return OPVCC(63,14,0,0);
+	case AFCTIWCC:	return OPVCC(63,14,0,1);
+	case AFCTIWZ:	return OPVCC(63,15,0,0);
+	case AFCTIWZCC:	return OPVCC(63,15,0,1);
+	case AFCTID:	return OPVCC(63,814,0,0);
+	case AFCTIDCC:	return OPVCC(63,814,0,1);
+	case AFCTIDZ:	return OPVCC(63,815,0,0);
+	case AFCTIDZCC:	return OPVCC(63,815,0,1);
+	case AFDIV:	return OPVCC(63,18,0,0);
+	case AFDIVCC:	return OPVCC(63,18,0,1);
+	case AFDIVS:	return OPVCC(59,18,0,0);
+	case AFDIVSCC:	return OPVCC(59,18,0,1);
+	case AFMADD:	return OPVCC(63,29,0,0);
+	case AFMADDCC:	return OPVCC(63,29,0,1);
+	case AFMADDS:	return OPVCC(59,29,0,0);
+	case AFMADDSCC:	return OPVCC(59,29,0,1);
+	case AFMOVS:
+	case AFMOVD:	return OPVCC(63,72,0,0);	/* load */
+	case AFMOVDCC:	return OPVCC(63,72,0,1);
+	case AFMSUB:	return OPVCC(63,28,0,0);
+	case AFMSUBCC:	return OPVCC(63,28,0,1);
+	case AFMSUBS:	return OPVCC(59,28,0,0);
+	case AFMSUBSCC:	return OPVCC(59,28,0,1);
+	case AFMUL:	return OPVCC(63,25,0,0);
+	case AFMULCC:	return OPVCC(63,25,0,1);
+	case AFMULS:	return OPVCC(59,25,0,0);
+	case AFMULSCC:	return OPVCC(59,25,0,1);
+	case AFNABS:	return OPVCC(63,136,0,0);
+	case AFNABSCC:	return OPVCC(63,136,0,1);
+	case AFNEG:	return OPVCC(63,40,0,0);
+	case AFNEGCC:	return OPVCC(63,40,0,1);
+	case AFNMADD:	return OPVCC(63,31,0,0);
+	case AFNMADDCC:	return OPVCC(63,31,0,1);
+	case AFNMADDS:	return OPVCC(59,31,0,0);
+	case AFNMADDSCC:	return OPVCC(59,31,0,1);
+	case AFNMSUB:	return OPVCC(63,30,0,0);
+	case AFNMSUBCC:	return OPVCC(63,30,0,1);
+	case AFNMSUBS:	return OPVCC(59,30,0,0);
+	case AFNMSUBSCC:	return OPVCC(59,30,0,1);
+	case AFRES:	return OPVCC(59,24,0,0);
+	case AFRESCC:	return OPVCC(59,24,0,1);
+	case AFRSP:	return OPVCC(63,12,0,0);
+	case AFRSPCC:	return OPVCC(63,12,0,1);
+	case AFRSQRTE:	return OPVCC(63,26,0,0);
+	case AFRSQRTECC:	return OPVCC(63,26,0,1);
+	case AFSEL:	return OPVCC(63,23,0,0);
+	case AFSELCC:	return OPVCC(63,23,0,1);
+	case AFSQRT:	return OPVCC(63,22,0,0);
+	case AFSQRTCC:	return OPVCC(63,22,0,1);
+	case AFSQRTS:	return OPVCC(59,22,0,0);
+	case AFSQRTSCC:	return OPVCC(59,22,0,1);
+	case AFSUB:	return OPVCC(63,20,0,0);
+	case AFSUBCC:	return OPVCC(63,20,0,1);
+	case AFSUBS:	return OPVCC(59,20,0,0);
+	case AFSUBSCC:	return OPVCC(59,20,0,1);
+
+	case AICBI:	return OPVCC(31,982,0,0);
+	case AISYNC:	return OPVCC(19,150,0,0);
+
+	case AMTFSB0:	return OPVCC(63,70,0,0);
+	case AMTFSB0CC:	return OPVCC(63,70,0,1);
+	case AMTFSB1:	return OPVCC(63,38,0,0);
+	case AMTFSB1CC:	return OPVCC(63,38,0,1);
+
+	case AMULHW:	return OPVCC(31,75,0,0);
+	case AMULHWCC:	return OPVCC(31,75,0,1);
+	case AMULHWU:	return OPVCC(31,11,0,0);
+	case AMULHWUCC:	return OPVCC(31,11,0,1);
+	case AMULLW:	return OPVCC(31,235,0,0);
+	case AMULLWCC:	return OPVCC(31,235,0,1);
+	case AMULLWV:	return OPVCC(31,235,1,0);
+	case AMULLWVCC:	return OPVCC(31,235,1,1);
+
+	case AMULHD:	return OPVCC(31,73,0,0);
+	case AMULHDCC:	return OPVCC(31,73,0,1);
+	case AMULHDU:	return OPVCC(31,9,0,0);
+	case AMULHDUCC:	return OPVCC(31,9,0,1);
+	case AMULLD:	return OPVCC(31,233,0,0);
+	case AMULLDCC:	return OPVCC(31,233,0,1);
+	case AMULLDV:	return OPVCC(31,233,1,0);
+	case AMULLDVCC:	return OPVCC(31,233,1,1);
+
+	case ANAND:	return OPVCC(31,476,0,0);
+	case ANANDCC:	return OPVCC(31,476,0,1);
+	case ANEG:	return OPVCC(31,104,0,0);
+	case ANEGCC:	return OPVCC(31,104,0,1);
+	case ANEGV:	return OPVCC(31,104,1,0);
+	case ANEGVCC:	return OPVCC(31,104,1,1);
+	case ANOR:	return OPVCC(31,124,0,0);
+	case ANORCC:	return OPVCC(31,124,0,1);
+	case AOR:	return OPVCC(31,444,0,0);
+	case AORCC:	return OPVCC(31,444,0,1);
+	case AORN:	return OPVCC(31,412,0,0);
+	case AORNCC:	return OPVCC(31,412,0,1);
+
+	case ARFI:	return OPVCC(19,50,0,0);
+	case ARFCI:	return OPVCC(19,51,0,0);
+	case ARFID:	return OPVCC(19,18,0,0);
+	case AHRFID: return OPVCC(19,274,0,0);
+
+	case ARLWMI:	return OPVCC(20,0,0,0);
+	case ARLWMICC: return OPVCC(20,0,0,1);
+	case ARLWNM:	return OPVCC(23,0,0,0);
+	case ARLWNMCC:	return OPVCC(23,0,0,1);
+
+	case ARLDCL:	return OPVCC(30,8,0,0);
+	case ARLDCR:	return OPVCC(30,9,0,0);
+
+	case ASYSCALL:	return OPVCC(17,1,0,0);
+
+	case ASLW:	return OPVCC(31,24,0,0);
+	case ASLWCC:	return OPVCC(31,24,0,1);
+	case ASLD:	return OPVCC(31,27,0,0);
+	case ASLDCC:	return OPVCC(31,27,0,1);
+
+	case ASRAW:	return OPVCC(31,792,0,0);
+	case ASRAWCC:	return OPVCC(31,792,0,1);
+	case ASRAD:	return OPVCC(31,794,0,0);
+	case ASRADCC:	return OPVCC(31,794,0,1);
+
+	case ASRW:	return OPVCC(31,536,0,0);
+	case ASRWCC:	return OPVCC(31,536,0,1);
+	case ASRD:	return OPVCC(31,539,0,0);
+	case ASRDCC:	return OPVCC(31,539,0,1);
+
+	case ASUB:	return OPVCC(31,40,0,0);
+	case ASUBCC:	return OPVCC(31,40,0,1);
+	case ASUBV:	return OPVCC(31,40,1,0);
+	case ASUBVCC:	return OPVCC(31,40,1,1);
+	case ASUBC:	return OPVCC(31,8,0,0);
+	case ASUBCCC:	return OPVCC(31,8,0,1);
+	case ASUBCV:	return OPVCC(31,8,1,0);
+	case ASUBCVCC:	return OPVCC(31,8,1,1);
+	case ASUBE:	return OPVCC(31,136,0,0);
+	case ASUBECC:	return OPVCC(31,136,0,1);
+	case ASUBEV:	return OPVCC(31,136,1,0);
+	case ASUBEVCC:	return OPVCC(31,136,1,1);
+	case ASUBME:	return OPVCC(31,232,0,0);
+	case ASUBMECC:	return OPVCC(31,232,0,1);
+	case ASUBMEV:	return OPVCC(31,232,1,0);
+	case ASUBMEVCC:	return OPVCC(31,232,1,1);
+	case ASUBZE:	return OPVCC(31,200,0,0);
+	case ASUBZECC:	return OPVCC(31,200,0,1);
+	case ASUBZEV:	return OPVCC(31,200,1,0);
+	case ASUBZEVCC:	return OPVCC(31,200,1,1);
+
+	case ASYNC:	return OPVCC(31,598,0,0);
+	case APTESYNC:	return OPVCC(31,598,0,0) | (2<<21);
+
+	case ATLBIE:	return OPVCC(31,306,0,0);
+	case ATLBIEL:	return OPVCC(31,274,0,0);
+	case ATLBSYNC:	return OPVCC(31,566,0,0);
+	case ASLBIA:	return OPVCC(31,498,0,0);
+	case ASLBIE:	return OPVCC(31,434,0,0);
+	case ASLBMFEE:	return OPVCC(31,915,0,0);
+	case ASLBMFEV:	return OPVCC(31,851,0,0);
+	case ASLBMTE:		return OPVCC(31,402,0,0);
+
+	case ATW:	return OPVCC(31,4,0,0);
+	case ATD:	return OPVCC(31,68,0,0);
+
+	case AXOR:	return OPVCC(31,316,0,0);
+	case AXORCC:	return OPVCC(31,316,0,1);
+	}
+	diag("bad r/r opcode %A", a);
+	return 0;
+}
+
+long
+opirr(int a)
+{
+	switch(a) {
+	case AADD:	return OPVCC(14,0,0,0);
+	case AADDC:	return OPVCC(12,0,0,0);
+	case AADDCCC:	return OPVCC(13,0,0,0);
+	case AADD+AEND:	return OPVCC(15,0,0,0);		/* ADDIS/CAU */
+
+	case AANDCC:	return OPVCC(28,0,0,0);
+	case AANDCC+AEND:	return OPVCC(29,0,0,0);		/* ANDIS./ANDIU. */
+
+	case ABR:	return OPVCC(18,0,0,0);
+	case ABL:	return OPVCC(18,0,0,0) | 1;
+	case ABC:	return OPVCC(16,0,0,0);
+	case ABCL:	return OPVCC(16,0,0,0) | 1;
+
+	case ABEQ:	return AOP_RRR(16<<26,12,2,0);
+	case ABGE:	return AOP_RRR(16<<26,4,0,0);
+	case ABGT:	return AOP_RRR(16<<26,12,1,0);
+	case ABLE:	return AOP_RRR(16<<26,4,1,0);
+	case ABLT:	return AOP_RRR(16<<26,12,0,0);
+	case ABNE:	return AOP_RRR(16<<26,4,2,0);
+	case ABVC:	return AOP_RRR(16<<26,4,3,0);
+	case ABVS:	return AOP_RRR(16<<26,12,3,0);
+
+	case ACMP:	return OPVCC(11,0,0,0)|(1<<21);	/* L=1 */
+	case ACMPU:	return OPVCC(10,0,0,0)|(1<<21);
+	case ACMPW:	return OPVCC(11,0,0,0);	/* L=0 */
+	case ACMPWU:	return OPVCC(10,0,0,0);
+	case ALSW:	return OPVCC(31,597,0,0);
+
+	case AMULLW:	return OPVCC(7,0,0,0);
+
+	case AOR:	return OPVCC(24,0,0,0);
+	case AOR+AEND:	return OPVCC(25,0,0,0);		/* ORIS/ORIU */
+
+	case ARLWMI:	return OPVCC(20,0,0,0);		/* rlwimi */
+	case ARLWMICC:	return OPVCC(20,0,0,1);
+	case ARLDMI:	return OPVCC(30,0,0,0) | (3<<2);	/* rldimi */
+	case ARLDMICC:	return OPVCC(30,0,0,1) | (3<<2);
+
+	case ARLWNM:	return OPVCC(21,0,0,0);		/* rlwinm */
+	case ARLWNMCC:	return OPVCC(21,0,0,1);
+
+	case ARLDCL:	return OPVCC(30,0,0,0);		/* rldicl */
+	case ARLDCLCC:	return OPVCC(30,0,0,1);
+	case ARLDCR:	return OPVCC(30,1,0,0);		/* rldicr */
+	case ARLDCRCC:	return OPVCC(30,1,0,1);
+	case ARLDC:	return OPVCC(30,0,0,0) | (2<<2);
+	case ARLDCCC:	return OPVCC(30,0,0,1) | (2<<2);
+
+	case ASRAW:	return OPVCC(31,824,0,0);
+	case ASRAWCC:	return OPVCC(31,824,0,1);
+	case ASRAD:	return OPVCC(31,(413<<1),0,0);
+	case ASRADCC:	return OPVCC(31,(413<<1),0,1);
+
+	case ASTSW:	return OPVCC(31,725,0,0);
+
+	case ASUBC:	return OPVCC(8,0,0,0);
+
+	case ATW:	return OPVCC(3,0,0,0);
+	case ATD:	return OPVCC(2,0,0,0);
+
+	case AXOR:	return OPVCC(26,0,0,0);		/* XORIL */
+	case AXOR+AEND:	return OPVCC(27,0,0,0);		/* XORIU */
+	}
+	diag("bad opcode i/r %A", a);
+	return 0;
+}
+
+/*
+ * load o(a),d
+ */
+long
+opload(int a)
+{
+	switch(a) {
+	case AMOVD:	return OPVCC(58,0,0,0);	/* ld */
+	case AMOVDU:	return OPVCC(58,0,0,1);	/* ldu */
+	case AMOVWZ:	return OPVCC(32,0,0,0);		/* lwz */
+	case AMOVWZU:	return OPVCC(33,0,0,0);		/* lwzu */
+	case AMOVW:		return OPVCC(58,0,0,0)|(1<<1);	/* lwa */
+	/* no AMOVWU */
+	case AMOVB:
+	case AMOVBZ:	return OPVCC(34,0,0,0);		/* load */
+	case AMOVBU:
+	case AMOVBZU:	return OPVCC(35,0,0,0);
+	case AFMOVD:	return OPVCC(50,0,0,0);
+	case AFMOVDU:	return OPVCC(51,0,0,0);
+	case AFMOVS:	return OPVCC(48,0,0,0);
+	case AFMOVSU:	return OPVCC(49,0,0,0);
+	case AMOVH:	return OPVCC(42,0,0,0);
+	case AMOVHU:	return OPVCC(43,0,0,0);
+	case AMOVHZ:	return OPVCC(40,0,0,0);
+	case AMOVHZU:	return OPVCC(41,0,0,0);
+	case AMOVMW:	return OPVCC(46,0,0,0);	/* lmw */
+	}
+	diag("bad load opcode %A", a);
+	return 0;
+}
+
+/*
+ * indexed load a(b),d
+ */
+long
+oploadx(int a)
+{
+	switch(a) {
+	case AMOVWZ: return OPVCC(31,23,0,0);	/* lwzx */
+	case AMOVWZU:	return OPVCC(31,55,0,0); /* lwzux */
+	case AMOVW:	return OPVCC(31,341,0,0);	/* lwax */
+	case AMOVWU:	return OPVCC(31,373,0,0);	/* lwaux */
+	case AMOVB:
+	case AMOVBZ: return OPVCC(31,87,0,0);	/* lbzx */
+	case AMOVBU:
+	case AMOVBZU: return OPVCC(31,119,0,0);	/* lbzux */
+	case AFMOVD:	return OPVCC(31,599,0,0);	/* lfdx */
+	case AFMOVDU:	return OPVCC(31,631,0,0);	/*  lfdux */
+	case AFMOVS:	return OPVCC(31,535,0,0);	/* lfsx */
+	case AFMOVSU:	return OPVCC(31,567,0,0);	/* lfsux */
+	case AMOVH:	return OPVCC(31,343,0,0);	/* lhax */
+	case AMOVHU:	return OPVCC(31,375,0,0);	/* lhaux */
+	case AMOVHBR:	return OPVCC(31,790,0,0);	/* lhbrx */
+	case AMOVWBR:	return OPVCC(31,534,0,0);	/* lwbrx */
+	case AMOVHZ:	return OPVCC(31,279,0,0);	/* lhzx */
+	case AMOVHZU:	return OPVCC(31,311,0,0);	/* lhzux */
+	case AECIWX:	return OPVCC(31,310,0,0);	/* eciwx */
+	case ALWAR:	return OPVCC(31,20,0,0);	/* lwarx */
+	case ALSW:	return OPVCC(31,533,0,0);	/* lswx */
+	case AMOVD:	return OPVCC(31,21,0,0);	/* ldx */
+	case AMOVDU:	return OPVCC(31,53,0,0);	/* ldux */
+	}
+	diag("bad loadx opcode %A", a);
+	return 0;
+}
+
+/*
+ * store s,o(d)
+ */
+long
+opstore(int a)
+{
+	switch(a) {
+	case AMOVB:
+	case AMOVBZ:	return OPVCC(38,0,0,0);	/* stb */
+	case AMOVBU:
+	case AMOVBZU:	return OPVCC(39,0,0,0);	/* stbu */
+	case AFMOVD:	return OPVCC(54,0,0,0);	/* stfd */
+	case AFMOVDU:	return OPVCC(55,0,0,0);	/* stfdu */
+	case AFMOVS:	return OPVCC(52,0,0,0);	/* stfs */
+	case AFMOVSU:	return OPVCC(53,0,0,0);	/* stfsu */
+	case AMOVHZ:
+	case AMOVH:	return OPVCC(44,0,0,0);	/* sth */
+	case AMOVHZU:
+	case AMOVHU:	return OPVCC(45,0,0,0);	/* sthu */
+	case AMOVMW:	return OPVCC(47,0,0,0);	/* stmw */
+	case ASTSW:	return OPVCC(31,725,0,0);	/* stswi */
+	case AMOVWZ:
+	case AMOVW:	return OPVCC(36,0,0,0);	/* stw */
+	case AMOVWZU:
+	case AMOVWU:	return OPVCC(37,0,0,0);	/* stwu */
+	case AMOVD:	return OPVCC(62,0,0,0);	/* std */
+	case AMOVDU:	return OPVCC(62,0,0,1);	/* stdu */
+	}
+	diag("unknown store opcode %A", a);
+	return 0;
+}
+
+/*
+ * indexed store s,a(b)
+ */
+long
+opstorex(int a)
+{
+	switch(a) {
+	case AMOVB:
+	case AMOVBZ:	return OPVCC(31,215,0,0);	/* stbx */
+	case AMOVBU:
+	case AMOVBZU:	return OPVCC(31,247,0,0);	/* stbux */
+	case AFMOVD:	return OPVCC(31,727,0,0);	/* stfdx */
+	case AFMOVDU:	return OPVCC(31,759,0,0);	/* stfdux */
+	case AFMOVS:	return OPVCC(31,663,0,0);	/* stfsx */
+	case AFMOVSU:	return OPVCC(31,695,0,0);	/* stfsux */
+	case AMOVHZ:
+	case AMOVH:	return OPVCC(31,407,0,0);	/* sthx */
+	case AMOVHBR:	return OPVCC(31,918,0,0);	/* sthbrx */
+	case AMOVHZU:
+	case AMOVHU:	return OPVCC(31,439,0,0);	/* sthux */
+	case AMOVWZ:
+	case AMOVW:	return OPVCC(31,151,0,0);	/* stwx */
+	case AMOVWZU:
+	case AMOVWU:	return OPVCC(31,183,0,0);	/* stwux */
+	case ASTSW:	return OPVCC(31,661,0,0);	/* stswx */
+	case AMOVWBR:	return OPVCC(31,662,0,0);	/* stwbrx */
+	case ASTWCCC:	return OPVCC(31,150,0,1);	/* stwcx. */
+	case ASTDCCC:	return OPVCC(31,214,0,1);	/* stwdx. */
+	case AECOWX:	return OPVCC(31,438,0,0);	/* ecowx */
+	case AMOVD:	return OPVCC(31,149,0,0);	/* stdx */
+	case AMOVDU:	return OPVCC(31,181,0,0);	/* stdux */
+	}
+	diag("unknown storex opcode %A", a);
+	return 0;
+}

+ 37 - 0
sys/src/cmd/9l/cnam.c

@@ -0,0 +1,37 @@
+char	*cnames[] =
+{
+	"NONE",
+	"REG",
+	"FREG",
+	"CREG",
+	"SPR",
+	"ZCON",
+	"SCON",
+	"UCON",
+	"ADDCON",
+	"ANDCON",
+	"LCON",
+	"DCON",
+	"SACON",
+	"SECON",
+	"LACON",
+	"LECON",
+	"SBRA",
+	"LBRA",
+	"SAUTO",
+	"LAUTO",
+	"SEXT",
+	"LEXT",
+	"ZOREG",
+	"SOREG",
+	"LOREG",
+	"FPSCR",
+	"MSR",
+	"XER",
+	"LR",
+	"CTR",
+	"ANY",
+	"GOK",
+	"ADDR",
+	"NCLASS",
+};

+ 65 - 0
sys/src/cmd/9l/compat.c

@@ -0,0 +1,65 @@
+#include	"l.h"
+
+/*
+ * fake malloc
+ */
+void*
+malloc(ulong n)
+{
+	void *p;
+
+	while(n & 7)
+		n++;
+	while(nhunk < n)
+		gethunk();
+	p = hunk;
+	nhunk -= n;
+	hunk += n;
+	return p;
+}
+
+void
+free(void *p)
+{
+	USED(p);
+}
+
+void*
+calloc(ulong m, ulong n)
+{
+	void *p;
+
+	n *= m;
+	p = malloc(n);
+	memset(p, 0, n);
+	return p;
+}
+
+void*
+realloc(void*, ulong)
+{
+	fprint(2, "realloc called\n");
+	abort();
+	return 0;
+}
+
+void*
+mysbrk(ulong size)
+{
+	return sbrk(size);
+}
+
+void
+setmalloctag(void *v, ulong pc)
+{
+	USED(v, pc);
+}
+
+int
+fileexists(char *s)
+{
+	uchar dirbuf[400];
+
+	/* it's fine if stat result doesn't fit in dirbuf, since even then the file exists */
+	return stat(s, dirbuf, sizeof(dirbuf)) >= 0;
+}

+ 355 - 0
sys/src/cmd/9l/l.h

@@ -0,0 +1,355 @@
+#include	<u.h>
+#include	<libc.h>
+#include	<bio.h>
+#include	"../9c/9.out.h"
+#include	"../8l/elf.h"
+
+#ifndef	EXTERN
+#define	EXTERN	extern
+#endif
+
+#define	LIBNAMELEN	300
+
+typedef	struct	Adr	Adr;
+typedef	struct	Sym	Sym;
+typedef	struct	Autom	Auto;
+typedef	struct	Prog	Prog;
+typedef	struct	Optab	Optab;
+
+#define	P		((Prog*)0)
+#define	S		((Sym*)0)
+#define	TNAME		(curtext&&curtext->from.sym?curtext->from.sym->name:noname)
+
+struct	Adr
+{
+	union
+	{
+		vlong	u0offset;
+		char	u0sval[NSNAME];
+		Ieee	u0ieee;
+	}u0;
+	Sym	*sym;
+	Auto	*autom;
+	char	type;
+	uchar	reg;
+	char	name;
+	char	class;
+};
+
+#define	offset	u0.u0offset
+#define	sval	u0.u0sval
+#define	ieee	u0.u0ieee
+
+struct	Prog
+{
+	Adr	from;
+	Adr	from3;	/* fma and rlwm */
+	Adr	to;
+	Prog	*forwd;
+	Prog	*cond;
+	Prog	*link;
+	vlong	pc;
+	long	regused;
+	short	line;
+	short	mark;
+	short	optab;		/* could be uchar */
+	short	as;
+	char	reg;
+};
+struct	Sym
+{
+	char	*name;
+	short	type;
+	short	version;
+	short	become;
+	short	frame;
+	uchar	subtype;
+	ushort	file;
+	vlong	value;
+	long	sig;
+	Sym	*link;
+};
+struct	Autom
+{
+	Sym	*sym;
+	Auto	*link;
+	vlong	aoffset;
+	short	type;
+};
+struct	Optab
+{
+	short	as;
+	char	a1;
+	char	a2;
+	char	a3;
+	char	a4;
+	char	type;
+	char	size;
+	char	param;
+};
+struct
+{
+	Optab*	start;
+	Optab*	stop;
+} oprange[ALAST];
+
+enum
+{
+	FPCHIP		= 1,
+	BIG		= 32768-8,
+	STRINGSZ	= 200,
+	MAXIO		= 8192,
+	MAXHIST		= 20,				/* limit of path elements for history symbols */
+	DATBLK		= 1024,
+	NHASH		= 10007,
+	NHUNK		= 100000,
+	MINSIZ		= 64,
+	NENT		= 100,
+	NSCHED		= 20,
+
+/* mark flags */
+	LABEL		= 1<<0,
+	LEAF		= 1<<1,
+	FLOAT		= 1<<2,
+	BRANCH		= 1<<3,
+	LOAD		= 1<<4,
+	FCMP		= 1<<5,
+	SYNC		= 1<<6,
+	LIST		= 1<<7,
+	FOLL		= 1<<8,
+	NOSCHED		= 1<<9,
+
+	STEXT		= 1,
+	SDATA,
+	SBSS,
+	SDATA1,
+	SXREF,
+	SLEAF,
+	SFILE,
+	SCONST,
+	SUNDEF,
+
+	SIMPORT,
+	SEXPORT,
+
+	C_NONE		= 0,
+	C_REG,
+	C_FREG,
+	C_CREG,
+	C_SPR,		/* special processor register */
+	C_ZCON,
+	C_SCON,		/* 16 bit signed */
+	C_UCON,		/* low 16 bits 0 */
+	C_ADDCON,	/* -0x8000 <= v < 0 */
+	C_ANDCON,	/* 0 < v <= 0xFFFF */
+	C_LCON,		/* other 32 */
+	C_DCON,		/* other 64 (could subdivide further) */
+	C_SACON,
+	C_SECON,
+	C_LACON,
+	C_LECON,
+	C_SBRA,
+	C_LBRA,
+	C_SAUTO,
+	C_LAUTO,
+	C_SEXT,
+	C_LEXT,
+	C_ZOREG,
+	C_SOREG,
+	C_LOREG,
+	C_FPSCR,
+	C_MSR,
+	C_XER,
+	C_LR,
+	C_CTR,
+	C_ANY,
+	C_GOK,
+	C_ADDR,
+
+	C_NCLASS,
+
+	Roffset	= 22,		/* no. bits for offset in relocation address */
+	Rindex	= 10		/* no. bits for index in relocation address */
+};
+
+EXTERN union
+{
+	struct
+	{
+		uchar	obuf[MAXIO];			/* output buffer */
+		uchar	ibuf[MAXIO];			/* input buffer */
+	} u;
+	char	dbuf[1];
+} buf;
+
+#define	cbuf	u.obuf
+#define	xbuf	u.ibuf
+
+EXTERN	long	HEADR;			/* length of header */
+EXTERN	int	HEADTYPE;		/* type of header */
+EXTERN	vlong	INITDAT;		/* data location */
+EXTERN	long	INITRND;		/* data round above text location */
+EXTERN	vlong	INITTEXT;		/* text location */
+EXTERN	long	INITTEXTP;		/* text location (physical) */
+EXTERN	char*	INITENTRY;		/* entry point */
+EXTERN	long	autosize;
+EXTERN	Biobuf	bso;
+EXTERN	long	bsssize;
+EXTERN	int	cbc;
+EXTERN	uchar*	cbp;
+EXTERN	int	cout;
+EXTERN	Auto*	curauto;
+EXTERN	Auto*	curhist;
+EXTERN	Prog*	curp;
+EXTERN	Prog*	curtext;
+EXTERN	Prog*	datap;
+EXTERN	Prog*	prog_movsw;
+EXTERN	Prog*	prog_movdw;
+EXTERN	Prog*	prog_movws;
+EXTERN	Prog*	prog_movwd;
+EXTERN	vlong	datsize;
+EXTERN	char	debug[128];
+EXTERN	Prog*	firstp;
+EXTERN	uchar	fnuxi8[8];
+EXTERN	uchar	fnuxi4[4];
+EXTERN	Sym*	hash[NHASH];
+EXTERN	Sym*	histfrog[MAXHIST];
+EXTERN	int	histfrogp;
+EXTERN	int	histgen;
+EXTERN	char*	library[50];
+EXTERN	char*	libraryobj[50];
+EXTERN	int	libraryp;
+EXTERN	int	xrefresolv;
+EXTERN	char*	hunk;
+EXTERN	uchar	inuxi1[1];
+EXTERN	uchar	inuxi2[2];
+EXTERN	uchar	inuxi4[4];
+EXTERN	uchar	inuxi8[8];
+EXTERN	Prog*	lastp;
+EXTERN	long	lcsize;
+EXTERN	char	literal[32];
+EXTERN	int	nerrors;
+EXTERN	long	nhunk;
+EXTERN	char*	noname;
+EXTERN	vlong	instoffset;
+EXTERN	char*	outfile;
+EXTERN	vlong	pc;
+EXTERN	int	r0iszero;
+EXTERN	long	symsize;
+EXTERN	long	staticgen;
+EXTERN	Prog*	textp;
+EXTERN	vlong	textsize;
+EXTERN	long	tothunk;
+EXTERN	char	xcmp[C_NCLASS][C_NCLASS];
+EXTERN	int	version;
+EXTERN	Prog	zprg;
+EXTERN	int	dtype;
+
+EXTERN	int	doexp, dlm;
+EXTERN	int	imports, nimports;
+EXTERN	int	exports, nexports, allexport;
+EXTERN	char*	EXPTAB;
+EXTERN	Prog	undefp;
+
+#define	UP	(&undefp)
+
+extern	Optab	optab[];
+extern	char*	anames[];
+extern	char*	cnames[];
+
+int	Aconv(Fmt*);
+int	Dconv(Fmt*);
+int	Nconv(Fmt*);
+int	Pconv(Fmt*);
+int	Sconv(Fmt*);
+int	Rconv(Fmt*);
+int	aclass(Adr*);
+void	addhist(long, int);
+void	histtoauto(void);
+void	addlibpath(char*);
+void	addnop(Prog*);
+void	append(Prog*, Prog*);
+void	asmb(void);
+void	asmdyn(void);
+void	asmlc(void);
+int	asmout(Prog*, Optab*, int);
+void	asmsym(void);
+vlong	atolwhex(char*);
+Prog*	brloop(Prog*);
+void	buildop(void);
+void	cflush(void);
+void	ckoff(Sym*, vlong);
+int	cmp(int, int);
+void	cput(long);
+int	compound(Prog*);
+double	cputime(void);
+void	datblk(long, long);
+void	diag(char*, ...);
+void	dodata(void);
+void	doprof1(void);
+void	doprof2(void);
+void	dynreloc(Sym*, long, int, int, int);
+vlong	entryvalue(void);
+void	errorexit(void);
+void	exchange(Prog*);
+void	export(void);
+int	fileexists(char*);
+int	find1(long, int);
+char*	findlib(char*);
+void	follow(void);
+void	gethunk(void);
+double	ieeedtod(Ieee*);
+long	ieeedtof(Ieee*);
+void	import(void);
+int	isint32(vlong);
+int	isuint32(uvlong);
+int	isnop(Prog*);
+void	ldobj(int, long, char*);
+void	loadlib(void);
+void	listinit(void);
+void	initmuldiv(void);
+Sym*	lookup(char*, int);
+void	llput(vlong);
+void	llputl(vlong);
+void	lput(long);
+void	lputl(long);
+void	mkfwd(void);
+void*	mysbrk(ulong);
+void	names(void);
+void	nocache(Prog*);
+void	noops(void);
+void	nopout(Prog*);
+void	nuxiinit(void);
+void	objfile(char*);
+int	ocmp(void*, void*);
+long	opcode(int);
+Optab*	oplook(Prog*);
+void	patch(void);
+void	prasm(Prog*);
+void	prepend(Prog*, Prog*);
+Prog*	prg(void);
+int	pseudo(Prog*);
+void	putsymb(char*, int, vlong, int);
+void	readundefs(char*, int);
+long	regoff(Adr*);
+int	relinv(int);
+vlong	rnd(vlong, long);
+void	sched(Prog*, Prog*);
+void	span(void);
+void	strnput(char*, int);
+void	undef(void);
+void	undefsym(Sym*);
+vlong	vregoff(Adr*);
+void	wput(long);
+void	wputl(long);
+void	xdefine(char*, int, vlong);
+void	xfol(Prog*);
+void	zerosig(char*);
+
+#pragma	varargck	type	"D"	Adr*
+#pragma	varargck	type	"N"	Adr*
+#pragma	varargck	type	"P"	Prog*
+#pragma	varargck	type	"R"	int
+#pragma	varargck	type	"A"	int
+#pragma	varargck	type	"S"	char*
+#pragma	varargck	argpos	diag 1

+ 311 - 0
sys/src/cmd/9l/list.c

@@ -0,0 +1,311 @@
+#include "l.h"
+
+void
+listinit(void)
+{
+
+	fmtinstall('A', Aconv);
+	fmtinstall('D', Dconv);
+	fmtinstall('P', Pconv);
+	fmtinstall('S', Sconv);
+	fmtinstall('N', Nconv);
+	fmtinstall('R', Rconv);
+}
+
+void
+prasm(Prog *p)
+{
+	print("%P\n", p);
+}
+
+int
+Pconv(Fmt *fp)
+{
+	char str[STRINGSZ], *s;
+	Prog *p;
+	int a;
+
+	p = va_arg(fp->args, Prog*);
+	curp = p;
+	a = p->as;
+	if(a == ADATA || a == AINIT || a == ADYNT)
+		sprint(str, "(%d)	%A	%D/%d,%D", p->line, a, &p->from, p->reg, &p->to);
+	else {
+		s = str;
+		if(p->mark & NOSCHED)
+			s += sprint(s, "*");
+		if(p->reg == NREG && p->from3.type == D_NONE)
+			sprint(s, "(%d)	%A	%D,%D", p->line, a, &p->from, &p->to);
+		else
+		if(a != ATEXT && p->from.type == D_OREG) {
+			sprint(s, "(%d)	%A	%lld(R%d+R%d),%D", p->line, a,
+				p->from.offset, p->from.reg, p->reg, &p->to);
+		} else
+		if(p->to.type == D_OREG) {
+			sprint(s, "(%d)	%A	%D,%lld(R%d+R%d)", p->line, a,
+					&p->from, p->to.offset, p->to.reg, p->reg);
+		} else {
+			s += sprint(s, "(%d)	%A	%D", p->line, a, &p->from);
+			if(p->reg != NREG)
+				s += sprint(s, ",%c%d", p->from.type==D_FREG?'F':'R', p->reg);
+			if(p->from3.type != D_NONE)
+				s += sprint(s, ",%D", &p->from3);
+			sprint(s, ",%D", &p->to);
+		}
+	}
+	return fmtstrcpy(fp, str);
+}
+
+int
+Aconv(Fmt *fp)
+{
+	char *s;
+	int a;
+
+	a = va_arg(fp->args, int);
+	s = "???";
+	if(a >= AXXX && a < ALAST)
+		s = anames[a];
+	return fmtstrcpy(fp, s);
+}
+
+int
+Dconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Adr *a;
+	long v;
+
+	a = va_arg(fp->args, Adr*);
+	switch(a->type) {
+
+	default:
+		sprint(str, "GOK-type(%d)", a->type);
+		break;
+
+	case D_NONE:
+		str[0] = 0;
+		if(a->name != D_NONE || a->reg != NREG || a->sym != S)
+			sprint(str, "%N(R%d)(NONE)", a, a->reg);
+		break;
+
+	case D_CONST:
+	case D_DCONST:
+		if(a->reg != NREG)
+			sprint(str, "$%N(R%d)", a, a->reg);
+		else
+			sprint(str, "$%N", a);
+		break;
+
+	case D_OREG:
+		if(a->reg != NREG)
+			sprint(str, "%N(R%d)", a, a->reg);
+		else
+			sprint(str, "%N", a);
+		break;
+
+	case D_REG:
+		sprint(str, "R%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(R%d)(REG)", a, a->reg);
+		break;
+
+	case D_FREG:
+		sprint(str, "F%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(F%d)(REG)", a, a->reg);
+		break;
+
+	case D_CREG:
+		if(a->reg == NREG)
+			strcpy(str, "CR");
+		else
+			sprint(str, "CR%d", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(C%d)(REG)", a, a->reg);
+		break;
+
+	case D_SPR:
+		if(a->name == D_NONE && a->sym == S) {
+			switch((ulong)a->offset) {
+			case D_XER: sprint(str, "XER"); break;
+			case D_LR: sprint(str, "LR"); break;
+			case D_CTR: sprint(str, "CTR"); break;
+			default: sprint(str, "SPR(%lld)", a->offset); break;
+			}
+			break;
+		}
+		sprint(str, "SPR-GOK(%d)", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(SPR-GOK%d)(REG)", a, a->reg);
+		break;
+
+	case D_DCR:
+		if(a->name == D_NONE && a->sym == S) {
+			sprint(str, "DCR(%lld)", a->offset);
+			break;
+		}
+		sprint(str, "DCR-GOK(%d)", a->reg);
+		if(a->name != D_NONE || a->sym != S)
+			sprint(str, "%N(DCR-GOK%d)(REG)", a, a->reg);
+		break;
+
+	case D_OPT:
+		sprint(str, "OPT(%d)", a->reg);
+		break;
+
+	case D_FPSCR:
+		if(a->reg == NREG)
+			strcpy(str, "FPSCR");
+		else
+			sprint(str, "FPSCR(%d)", a->reg);
+		break;
+
+	case D_MSR:
+		sprint(str, "MSR");
+		break;
+
+	case D_BRANCH:
+		if(curp->cond != P) {
+			v = curp->cond->pc;
+			if(v >= INITTEXT)
+				v -= INITTEXT-HEADR;
+			if(a->sym != S)
+				sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v);
+			else
+				sprint(str, "%.5lux(BRANCH)", v);
+		} else
+			if(a->sym != S)
+				sprint(str, "%s+%lld(APC)", a->sym->name, a->offset);
+			else
+				sprint(str, "%lld(APC)", a->offset);
+		break;
+
+	case D_FCONST:
+		sprint(str, "$%lux-%lux", a->ieee.h, a->ieee.l);
+		break;
+
+	case D_SCONST:
+		sprint(str, "$\"%S\"", a->sval);
+		break;
+	}
+	return fmtstrcpy(fp, str);
+}
+
+int
+Nconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Adr *a;
+	Sym *s;
+	long l;
+
+	a = va_arg(fp->args, Adr*);
+	s = a->sym;
+	if(s == S) {
+		l = a->offset;
+		if((vlong)l != a->offset)
+			sprint(str, "0x%llux", a->offset);
+		else
+			sprint(str, "%lld", a->offset);
+		goto out;
+	}
+	switch(a->name) {
+	default:
+		sprint(str, "GOK-name(%d)", a->name);
+		break;
+
+	case D_EXTERN:
+		sprint(str, "%s+%lld(SB)", s->name, a->offset);
+		break;
+
+	case D_STATIC:
+		sprint(str, "%s<>+%lld(SB)", s->name, a->offset);
+		break;
+
+	case D_AUTO:
+		sprint(str, "%s-%lld(SP)", s->name, -a->offset);
+		break;
+
+	case D_PARAM:
+		sprint(str, "%s+%lld(FP)", s->name, a->offset);
+		break;
+	}
+out:
+	return fmtstrcpy(fp, str);
+}
+
+int
+Rconv(Fmt *fp)
+{
+	char *s;
+	int a;
+
+	a = va_arg(fp->args, int);
+	s = "C_??";
+	if(a >= C_NONE && a <= C_NCLASS)
+		s = cnames[a];
+	return fmtstrcpy(fp, s);
+}
+
+int
+Sconv(Fmt *fp)
+{
+	int i, c;
+	char str[STRINGSZ], *p, *a;
+
+	a = va_arg(fp->args, char*);
+	p = str;
+	for(i=0; i<sizeof(long); i++) {
+		c = a[i] & 0xff;
+		if(c >= 'a' && c <= 'z' ||
+		   c >= 'A' && c <= 'Z' ||
+		   c >= '0' && c <= '9' ||
+		   c == ' ' || c == '%') {
+			*p++ = c;
+			continue;
+		}
+		*p++ = '\\';
+		switch(c) {
+		case 0:
+			*p++ = 'z';
+			continue;
+		case '\\':
+		case '"':
+			*p++ = c;
+			continue;
+		case '\n':
+			*p++ = 'n';
+			continue;
+		case '\t':
+			*p++ = 't';
+			continue;
+		}
+		*p++ = (c>>6) + '0';
+		*p++ = ((c>>3) & 7) + '0';
+		*p++ = (c & 7) + '0';
+	}
+	*p = 0;
+	return fmtstrcpy(fp, str);
+}
+
+void
+diag(char *fmt, ...)
+{
+	char buf[STRINGSZ], *tn;
+	va_list arg;
+
+	tn = "??none??";
+	if(curtext != P && curtext->from.sym != S)
+		tn = curtext->from.sym->name;
+	va_start(arg, fmt);
+	vseprint(buf, buf+sizeof(buf), fmt, arg);
+	va_end(arg);
+	print("%s: %s\n", tn, buf);
+
+	nerrors++;
+	if(nerrors > 10) {
+		print("too many errors\n");
+		errorexit();
+	}
+}

+ 17 - 0
sys/src/cmd/9l/mkcname

@@ -0,0 +1,17 @@
+ed - ../9l/l.h <<'!'
+v/^	C_/d
+g/^	C_NCLASS/s//&,/
+g/[ 	]*=.*,/s//,/
+v/,/p
+,s/^	C_/	"/
+,s/,.*$/",/
+1i
+char	*cnames[] =
+{
+.
+,a
+};
+.
+w cnam.c
+Q
+!

+ 33 - 0
sys/src/cmd/9l/mkfile

@@ -0,0 +1,33 @@
+</$objtype/mkfile
+
+TARG=9l
+OFILES=\
+	asm.$O\
+	asmout.$O\
+	list.$O\
+	noop.$O\
+	obj.$O\
+	optab.$O\
+	pass.$O\
+	span.$O\
+	enam.$O\
+	cnam.$O\
+	sched.$O\
+	compat.$O\
+	elf.$O\
+
+HFILES=\
+	l.h\
+	../9c/9.out.h\
+	../8l/elf.h\
+
+BIN=/$objtype/bin
+CFLAGS=$CFLAGS -. -I.
+</sys/src/cmd/mkone
+
+enam.$O:	../9c/enam.c
+	$CC $CFLAGS ../9c/enam.c
+elf.$O:	../8l/elf.c
+	$CC $CFLAGS ../8l/elf.c
+cnam.c:	l.h
+	rc mkcname

+ 523 - 0
sys/src/cmd/9l/noop.c

@@ -0,0 +1,523 @@
+#include	"l.h"
+
+void
+noops(void)
+{
+	Prog *p, *p1, *q, *q1;
+	int o, mov, aoffset, curframe, curbecome, maxbecome;
+
+	/*
+	 * find leaf subroutines
+	 * become sizes
+	 * frame sizes
+	 * strip NOPs
+	 * expand RET
+	 * expand BECOME pseudo
+	 */
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f noops\n", cputime());
+	Bflush(&bso);
+
+	curframe = 0;
+	curbecome = 0;
+	maxbecome = 0;
+	curtext = 0;
+	q = P;
+	for(p = firstp; p != P; p = p->link) {
+
+		/* find out how much arg space is used in this TEXT */
+		if(p->to.type == D_OREG && p->to.reg == REGSP)
+			if(p->to.offset > curframe)
+				curframe = p->to.offset;
+
+		switch(p->as) {
+		/* too hard, just leave alone */
+		case ATEXT:
+			if(curtext && curtext->from.sym) {
+				curtext->from.sym->frame = curframe;
+				curtext->from.sym->become = curbecome;
+				if(curbecome > maxbecome)
+					maxbecome = curbecome;
+			}
+			curframe = 0;
+			curbecome = 0;
+
+			q = p;
+			p->mark |= LABEL|LEAF|SYNC;
+			if(p->link)
+				p->link->mark |= LABEL;
+			curtext = p;
+			break;
+
+		case ANOR:
+			q = p;
+			if(p->to.type == D_REG)
+				if(p->to.reg == REGZERO)
+					p->mark |= LABEL|SYNC;
+			break;
+
+		case ALWAR:
+		case ASTWCCC:
+		case AECIWX:
+		case AECOWX:
+		case AEIEIO:
+		case AICBI:
+		case AISYNC:
+		case ATLBIE:
+		case ATLBIEL:
+		case ASLBIA:
+		case ASLBIE:
+		case ASLBMFEE:
+		case ASLBMFEV:
+		case ASLBMTE:
+		case ADCBF:
+		case ADCBI:
+		case ADCBST:
+		case ADCBT:
+		case ADCBTST:
+		case ADCBZ:
+		case ASYNC:
+		case ATLBSYNC:
+		case APTESYNC:
+		case ATW:
+		case AWORD:
+		case ARFI:
+		case ARFCI:
+		case ARFID:
+		case AHRFID:
+			q = p;
+			p->mark |= LABEL|SYNC;
+			continue;
+
+		case AMOVW:
+		case AMOVWZ:
+		case AMOVD:
+			q = p;
+			switch(p->from.type) {
+			case D_MSR:
+			case D_SPR:
+			case D_FPSCR:
+			case D_CREG:
+			case D_DCR:
+				p->mark |= LABEL|SYNC;
+			}
+			switch(p->to.type) {
+			case D_MSR:
+			case D_SPR:
+			case D_FPSCR:
+			case D_CREG:
+			case D_DCR:
+				p->mark |= LABEL|SYNC;
+			}
+			continue;
+
+		case AFABS:
+		case AFABSCC:
+		case AFADD:
+		case AFADDCC:
+		case AFCTIW:
+		case AFCTIWCC:
+		case AFCTIWZ:
+		case AFCTIWZCC:
+		case AFDIV:
+		case AFDIVCC:
+		case AFMADD:
+		case AFMADDCC:
+		case AFMOVD:
+		case AFMOVDU:
+		/* case AFMOVDS: */
+		case AFMOVS:
+		case AFMOVSU:
+		/* case AFMOVSD: */
+		case AFMSUB:
+		case AFMSUBCC:
+		case AFMUL:
+		case AFMULCC:
+		case AFNABS:
+		case AFNABSCC:
+		case AFNEG:
+		case AFNEGCC:
+		case AFNMADD:
+		case AFNMADDCC:
+		case AFNMSUB:
+		case AFNMSUBCC:
+		case AFRSP:
+		case AFRSPCC:
+		case AFSUB:
+		case AFSUBCC:
+			q = p;
+			p->mark |= FLOAT;
+			continue;
+
+		case ABL:
+		case ABCL:
+			if(curtext != P)
+				curtext->mark &= ~LEAF;
+
+		case ABC:
+		case ABEQ:
+		case ABGE:
+		case ABGT:
+		case ABLE:
+		case ABLT:
+		case ABNE:
+		case ABR:
+		case ABVC:
+		case ABVS:
+
+			p->mark |= BRANCH;
+			q = p;
+			q1 = p->cond;
+			if(q1 != P) {
+				while(q1->as == ANOP) {
+					q1 = q1->link;
+					p->cond = q1;
+				}
+				if(!(q1->mark & LEAF))
+					q1->mark |= LABEL;
+			} else
+				p->mark |= LABEL;
+			q1 = p->link;
+			if(q1 != P)
+				q1->mark |= LABEL;
+			continue;
+
+		case AFCMPO:
+		case AFCMPU:
+			q = p;
+			p->mark |= FCMP|FLOAT;
+			continue;
+
+		case ARETURN:
+			/* special form of RETURN is BECOME */
+			if(p->from.type == D_CONST)
+				if(p->from.offset > curbecome)
+					curbecome = p->from.offset;
+
+			q = p;
+			if(p->link != P)
+				p->link->mark |= LABEL;
+			continue;
+
+		case ANOP:
+			q1 = p->link;
+			q->link = q1;		/* q is non-nop */
+			q1->mark |= p->mark;
+			continue;
+
+		default:
+			q = p;
+			continue;
+		}
+	}
+	if(curtext && curtext->from.sym) {
+		curtext->from.sym->frame = curframe;
+		curtext->from.sym->become = curbecome;
+		if(curbecome > maxbecome)
+			maxbecome = curbecome;
+	}
+
+	if(debug['b'])
+		print("max become = %d\n", maxbecome);
+	xdefine("ALEFbecome", STEXT, maxbecome);
+
+	curtext = 0;
+	for(p = firstp; p != P; p = p->link) {
+		switch(p->as) {
+		case ATEXT:
+			curtext = p;
+			break;
+
+		case ABL:	/* ABCL? */
+			if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
+				o = maxbecome - curtext->from.sym->frame;
+				if(o <= 0)
+					break;
+				/* calling a become or calling a variable */
+				if(p->to.sym == S || p->to.sym->become) {
+					curtext->to.offset += o;
+					if(debug['b']) {
+						curp = p;
+						print("%D calling %D increase %d\n",
+							&curtext->from, &p->to, o);
+					}
+				}
+			}
+			break;
+		}
+	}
+
+	curtext = P;
+	for(p = firstp; p != P; p = p->link) {
+		o = p->as;
+		switch(o) {
+		case ATEXT:
+			mov = AMOVD;
+			aoffset = 0;
+			curtext = p;
+			autosize = p->to.offset + 8;
+			if((p->mark & LEAF) && autosize <= 8)
+				autosize = 0;
+			else
+				if(autosize & 4)
+					autosize += 4;
+			p->to.offset = autosize - 8;
+
+			q = p;
+			if(autosize) {
+				/* use MOVDU to adjust R1 when saving R31, if autosize is small */
+				if(!(curtext->mark & LEAF) && autosize >= -BIG && autosize <= BIG) {
+					mov = AMOVDU;
+					aoffset = -autosize;
+				} else {
+					q = prg();
+					q->as = AADD;
+					q->line = p->line;
+					q->from.type = D_CONST;
+					q->from.offset = -autosize;
+					q->to.type = D_REG;
+					q->to.reg = REGSP;
+
+					q->link = p->link;
+					p->link = q;
+				}
+			} else
+			if(!(curtext->mark & LEAF)) {
+				if(debug['v'])
+					Bprint(&bso, "save suppressed in: %s\n",
+						curtext->from.sym->name);
+				curtext->mark |= LEAF;
+			}
+
+			if(curtext->mark & LEAF) {
+				if(curtext->from.sym)
+					curtext->from.sym->type = SLEAF;
+				break;
+			}
+
+			q1 = prg();
+			q1->as = mov;
+			q1->line = p->line;
+			q1->from.type = D_REG;
+			q1->from.reg = REGTMP;
+			q1->to.type = D_OREG;
+			q1->to.offset = aoffset;
+			q1->to.reg = REGSP;
+
+			q1->link = q->link;
+			q->link = q1;
+
+			q1 = prg();
+			q1->as = AMOVD;
+			q1->line = p->line;
+			q1->from.type = D_SPR;
+			q1->from.offset = D_LR;
+			q1->to.type = D_REG;
+			q1->to.reg = REGTMP;
+
+			q1->link = q->link;
+			q->link = q1;
+			break;
+
+		case ARETURN:
+			if(p->from.type == D_CONST)
+				goto become;
+			if(curtext->mark & LEAF) {
+				if(!autosize) {
+					p->as = ABR;
+					p->from = zprg.from;
+					p->to.type = D_SPR;
+					p->to.offset = D_LR;
+					p->mark |= BRANCH;
+					break;
+				}
+
+				p->as = AADD;
+				p->from.type = D_CONST;
+				p->from.offset = autosize;
+				p->to.type = D_REG;
+				p->to.reg = REGSP;
+
+				q = prg();
+				q->as = ABR;
+				q->line = p->line;
+				q->to.type = D_SPR;
+				q->to.offset = D_LR;
+				q->mark |= BRANCH;
+
+				q->link = p->link;
+				p->link = q;
+				break;
+			}
+
+			p->as = AMOVD;
+			p->from.type = D_OREG;
+			p->from.offset = 0;
+			p->from.reg = REGSP;
+			p->to.type = D_REG;
+			p->to.reg = REGTMP;
+
+			q = prg();
+			q->as = AMOVD;
+			q->line = p->line;
+			q->from.type = D_REG;
+			q->from.reg = REGTMP;
+			q->to.type = D_SPR;
+			q->to.offset = D_LR;
+
+			q->link = p->link;
+			p->link = q;
+			p = q;
+
+			if(autosize) {
+				q = prg();
+				q->as = AADD;
+				q->line = p->line;
+				q->from.type = D_CONST;
+				q->from.offset = autosize;
+				q->to.type = D_REG;
+				q->to.reg = REGSP;
+
+				q->link = p->link;
+				p->link = q;
+			}
+
+			q1 = prg();
+			q1->as = ABR;
+			q1->line = p->line;
+			q1->to.type = D_SPR;
+			q1->to.offset = D_LR;
+			q1->mark |= BRANCH;
+
+			q1->link = q->link;
+			q->link = q1;
+			break;
+
+		become:
+			if(curtext->mark & LEAF) {
+
+				q = prg();
+				q->line = p->line;
+				q->as = ABR;
+				q->from = zprg.from;
+				q->to = p->to;
+				q->cond = p->cond;
+				q->link = p->link;
+				q->mark |= BRANCH;
+				p->link = q;
+
+				p->as = AADD;
+				p->from = zprg.from;
+				p->from.type = D_CONST;
+				p->from.offset = autosize;
+				p->to = zprg.to;
+				p->to.type = D_REG;
+				p->to.reg = REGSP;
+
+				break;
+			}
+			q = prg();
+			q->line = p->line;
+			q->as = ABR;
+			q->from = zprg.from;
+			q->to = p->to;
+			q->cond = p->cond;
+			q->mark |= BRANCH;
+			q->link = p->link;
+			p->link = q;
+
+			q = prg();
+			q->line = p->line;
+			q->as = AADD;
+			q->from.type = D_CONST;
+			q->from.offset = autosize;
+			q->to.type = D_REG;
+			q->to.reg = REGSP;
+			q->link = p->link;
+			p->link = q;
+
+			q = prg();
+			q->line = p->line;
+			q->as = AMOVD;
+			q->line = p->line;
+			q->from.type = D_REG;
+			q->from.reg = REGTMP;
+			q->to.type = D_SPR;
+			q->to.offset = D_LR;
+			q->link = p->link;
+			p->link = q;
+
+			p->as = AMOVD;
+			p->from = zprg.from;
+			p->from.type = D_OREG;
+			p->from.offset = 0;
+			p->from.reg = REGSP;
+			p->to = zprg.to;
+			p->to.type = D_REG;
+			p->to.reg = REGTMP;
+
+			break;
+		}
+	}
+
+	if(debug['Q'] == 0)
+		return;
+
+	curtext = P;
+	q = P;		/* p - 1 */
+	q1 = firstp;	/* top of block */
+	o = 0;		/* count of instructions */
+	for(p = firstp; p != P; p = p1) {
+		p1 = p->link;
+		o++;
+		if(p->mark & NOSCHED){
+			if(q1 != p){
+				sched(q1, q);
+			}
+			for(; p != P; p = p->link){
+				if(!(p->mark & NOSCHED))
+					break;
+				q = p;
+			}
+			p1 = p;
+			q1 = p;
+			o = 0;
+			continue;
+		}
+		if(p->mark & (LABEL|SYNC)) {
+			if(q1 != p)
+				sched(q1, q);
+			q1 = p;
+			o = 1;
+		}
+		if(p->mark & (BRANCH|SYNC)) {
+			sched(q1, p);
+			q1 = p1;
+			o = 0;
+		}
+		if(o >= NSCHED) {
+			sched(q1, p);
+			q1 = p1;
+			o = 0;
+		}
+		q = p;
+	}
+}
+
+void
+addnop(Prog *p)
+{
+	Prog *q;
+
+	q = prg();
+	q->as = AOR;
+	q->line = p->line;
+	q->from.type = D_REG;
+	q->from.reg = REGZERO;
+	q->to.type = D_REG;
+	q->to.reg = REGZERO;
+
+	q->link = p->link;
+	p->link = q;
+}

+ 1602 - 0
sys/src/cmd/9l/obj.c

@@ -0,0 +1,1602 @@
+#define	EXTERN
+#include	"l.h"
+#include	<ar.h>
+
+#ifndef	DEFAULT
+#define	DEFAULT	'9'
+#endif
+
+char	*noname		= "<none>";
+char	symname[]	= SYMDEF;
+char	thechar		= '9';
+char	*thestring 	= "power64";
+
+char**	libdir;
+int	nlibdir	= 0;
+static	int	maxlibdir = 0;
+
+/*
+ * -H[0135] are unmaintained.
+ *	-H0 -T0x200000 -R0		was boot	[?]
+ *	-H1 -T4128 -R4096		was plan9 32-bit (q.out) format
+ *	-H2 -T0x100028 -R0x100000	is plan9 format
+ *	-H3 -T0x02010000 -D0x00001000	was raw
+ *	-H5 -T0x80010000 -t0x10000	was ELF32, phys = 10000, vaddr = 0x8001... [suspect]
+ *	-H9 -T0x100028 -R0x100000	is ELF64
+ *	-H10 -T0x100028 -R0x100000	is bizarre IBM/A2 ELF64 variant
+ */
+
+void
+usage(void)
+{
+	diag("usage: %s [-options] objects", argv0);
+	errorexit();
+}
+
+static int
+isobjfile(char *f)
+{
+	int n, v;
+	Biobuf *b;
+	char buf1[5], buf2[SARMAG];
+
+	b = Bopen(f, OREAD);
+	if(b == nil)
+		return 0;
+	n = Bread(b, buf1, 5);
+	if(n == 5 && (buf1[2] == 1 && buf1[3] == '<' || buf1[3] == 1 && buf1[4] == '<'))
+		v = 1;	/* good enough for our purposes */
+	else{
+		Bseek(b, 0, 0);
+		n = Bread(b, buf2, SARMAG);
+		v = n == SARMAG && strncmp(buf2, ARMAG, SARMAG) == 0;
+	}
+	Bterm(b);
+	return v;
+}
+
+void
+main(int argc, char *argv[])
+{
+	int c;
+	char *a;
+	char name[LIBNAMELEN];
+
+	Binit(&bso, 1, OWRITE);
+	cout = -1;
+	listinit();
+	outfile = 0;
+	nerrors = 0;
+	curtext = P;
+	HEADTYPE = -1;
+	INITTEXT = -1;
+	INITTEXTP = -1;
+	INITDAT = -1;
+	INITRND = -1;
+	INITENTRY = 0;
+
+	ARGBEGIN {
+	default:
+		c = ARGC();
+		if(c >= 0 && c < sizeof(debug))
+			debug[c]++;
+		break;
+	case 'o':
+		outfile = ARGF();
+		break;
+	case 'E':
+		a = ARGF();
+		if(a)
+			INITENTRY = a;
+		break;
+	case 'L':
+		addlibpath(EARGF(usage()));
+		break;
+	case 'T':
+		a = ARGF();
+		if(a)
+			INITTEXT = atolwhex(a);
+		break;
+	case 'P':
+		a = ARGF();
+		if(a)
+			INITTEXTP = atolwhex(a);
+		break;
+	case 'D':
+		a = ARGF();
+		if(a)
+			INITDAT = atolwhex(a);
+		break;
+	case 'R':
+		a = ARGF();
+		if(a)
+			INITRND = atolwhex(a);
+		break;
+	case 'H':
+		a = ARGF();
+		if(a)
+			HEADTYPE = atolwhex(a);
+		break;
+	case 'x':	/* produce export table */
+		doexp = 1;
+		if(argv[1] != nil && argv[1][0] != '-' && !isobjfile(argv[1]))
+			readundefs(ARGF(), SEXPORT);
+		break;
+	case 'u':	/* produce dynamically loadable module */
+		dlm = 1;
+		if(argv[1] != nil && argv[1][0] != '-' && !isobjfile(argv[1]))
+			readundefs(ARGF(), SIMPORT);
+		break;
+	} ARGEND
+	USED(argc);
+	if(*argv == 0)
+		usage();
+	if(!debug['9'] && !debug['U'] && !debug['B'])
+		debug[DEFAULT] = 1;
+	a = getenv("ccroot");
+	if(a != nil && *a != '\0') {
+		if(!fileexists(a)) {
+			diag("nonexistent $ccroot: %s", a);
+			errorexit();
+		}
+	}else
+		a = "";
+	snprint(name, sizeof(name), "%s/%s/lib", a, thestring);
+	addlibpath(name);
+	r0iszero = debug['0'] == 0;
+	if(HEADTYPE == -1) {
+		if(debug['U'])
+			HEADTYPE = 0;
+		if(debug['B'])
+			HEADTYPE = 1;
+		if(debug['9'])
+			HEADTYPE = 2;
+	}
+	switch(HEADTYPE) {
+	default:
+		diag("unknown -H option");
+		errorexit();
+
+	case 0:	/* boot */
+		HEADR = 32L;
+		if(INITTEXT == -1)
+			INITTEXT = 0x200000L;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 4096L;
+		break;
+	case 1:	/* plan 9 q.out */
+		HEADR = 32L;
+		if(INITTEXT == -1)
+			INITTEXT = 4128;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 4096;
+		break;
+	case 2:	/* plan 9 */
+		HEADR = 32L+8L;
+		if(INITTEXT == -1)
+			INITTEXT = 0x100000+HEADR;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 0x100000;
+		break;
+	case 3:	/* raw */
+		HEADR = 0;
+		if(INITTEXT == -1)
+			INITTEXT = 4128;
+		if(INITDAT == -1) {
+			INITDAT = 0;
+			INITRND = 4;
+		}
+		if(INITRND == -1)
+			INITRND = 0;
+		break;
+	case 5:	/* elf executable */
+		HEADR = rnd(Ehdr32sz+3*Phdr32sz, 16);
+		if(INITTEXT == -1)
+			INITTEXT = 0x00400000L+HEADR;
+		if(INITDAT == -1)
+			INITDAT = 0x10000000;
+		if(INITRND == -1)
+			INITRND = 0;
+		break;
+	case 9:	/* ELF64 executable */
+	case 10:
+		HEADR = rnd(Ehdr64sz+3*Phdr64sz, 16);
+		if(INITTEXT == -1)
+			INITTEXT = 0x100000+HEADR;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 0x100000;
+		break;
+	}
+	if (INITTEXTP == -1)
+		INITTEXTP = INITTEXT;
+	if(INITDAT != 0 && INITRND != 0)
+		print("warning: -D0x%llux is ignored because of -R0x%lux\n",
+			INITDAT, INITRND);
+	if(debug['v'])
+		Bprint(&bso, "HEADER = -H0x%x -T0x%llux -D0x%llux -R0x%lux\n",
+			HEADTYPE, INITTEXT, INITDAT, INITRND);
+	Bflush(&bso);
+	zprg.as = AGOK;
+	zprg.reg = NREG;
+	zprg.from.name = D_NONE;
+	zprg.from.type = D_NONE;
+	zprg.from.reg = NREG;
+	zprg.from3 = zprg.from;
+	zprg.to = zprg.from;
+	buildop();
+	histgen = 0;
+	textp = P;
+	datap = P;
+	pc = 0;
+	dtype = 4;
+	if(outfile == 0)
+		outfile = "9.out";
+	cout = create(outfile, 1, 0775);
+	if(cout < 0) {
+		diag("cannot create %s: %r", outfile);
+		errorexit();
+	}
+	nuxiinit();
+	version = 0;
+	cbp = buf.cbuf;
+	cbc = sizeof(buf.cbuf);
+	firstp = prg();
+	lastp = firstp;
+
+	if(INITENTRY == 0) {
+		INITENTRY = "_main";
+		if(debug['p'])
+			INITENTRY = "_mainp";
+		if(!debug['l'])
+			lookup(INITENTRY, 0)->type = SXREF;
+	} else {
+		/*
+		 * It's possibly a number handed in by -En (this is
+		 * the test made by entryvalue() when creating the appropriate
+		 * header). Calling lookup() on that will create the symbol 'n'
+		 * printing an undef() diagnostic later.
+		 */
+		a = INITENTRY;
+		if(!(*a >= '0' && *a <= '9'))
+			lookup(INITENTRY, 0)->type = SXREF;
+	}
+
+	while(*argv)
+		objfile(*argv++);
+	if(!debug['l'])
+		loadlib();
+	firstp = firstp->link;
+	if(firstp == P)
+		goto out;
+	if(doexp || dlm){
+		EXPTAB = "_exporttab";
+		zerosig(EXPTAB);
+		zerosig("etext");
+		zerosig("edata");
+		zerosig("end");
+		if(dlm){
+			import();
+			HEADTYPE = 2;
+			INITTEXT = 0;
+			INITDAT = 0;
+			INITRND = 8;
+			INITENTRY = EXPTAB;
+		}
+		export();
+	}
+	patch();
+	if(debug['p'])
+		if(debug['1'])
+			doprof1();
+		else
+			doprof2();
+	dodata();
+	follow();
+	if(firstp == P)
+		goto out;
+	noops();
+	span();
+	asmb();
+	undef();
+
+out:
+	if(debug['v']) {
+		Bprint(&bso, "%5.2f cpu time\n", cputime());
+		Bprint(&bso, "%ld memory used\n", tothunk);
+		Bprint(&bso, "%d sizeof adr\n", sizeof(Adr));
+		Bprint(&bso, "%d sizeof prog\n", sizeof(Prog));
+	}
+	errorexit();
+}
+
+void
+addlibpath(char *arg)
+{
+	char **p;
+
+	if(nlibdir >= maxlibdir) {
+		if(maxlibdir == 0)
+			maxlibdir = 8;
+		else
+			maxlibdir *= 2;
+		p = malloc(maxlibdir*sizeof(*p));
+		if(p == nil) {
+			diag("out of memory");
+			errorexit();
+		}
+		memmove(p, libdir, nlibdir*sizeof(*p));
+		free(libdir);
+		libdir = p;
+	}
+	libdir[nlibdir++] = strdup(arg);
+}
+
+char*
+findlib(char *file)
+{
+	int i;
+	char name[LIBNAMELEN];
+
+	for(i = 0; i < nlibdir; i++) {
+		snprint(name, sizeof(name), "%s/%s", libdir[i], file);
+		if(fileexists(name))
+			return libdir[i];
+	}
+	return nil;
+}
+
+void
+loadlib(void)
+{
+	int i;
+	long h;
+	Sym *s;
+
+loop:
+	xrefresolv = 0;
+	for(i=0; i<libraryp; i++) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f autolib: %s (from %s)\n", cputime(), library[i], libraryobj[i]);
+		objfile(library[i]);
+	}
+	if(xrefresolv)
+	for(h=0; h<nelem(hash); h++)
+	for(s = hash[h]; s != S; s = s->link)
+		if(s->type == SXREF)
+			goto loop;
+}
+
+void
+errorexit(void)
+{
+
+	Bflush(&bso);
+	if(nerrors) {
+		if(cout >= 0)
+			remove(outfile);
+		exits("error");
+	}
+	exits(0);
+}
+
+void
+objfile(char *file)
+{
+	long off, esym, cnt, l;
+	int f, work;
+	Sym *s;
+	char magbuf[SARMAG];
+	char name[LIBNAMELEN], pname[LIBNAMELEN];
+	struct ar_hdr arhdr;
+	char *e, *start, *stop;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f ldobj: %s\n", cputime(), file);
+	Bflush(&bso);
+	if(file[0] == '-' && file[1] == 'l') {
+		snprint(pname, sizeof(pname), "lib%s.a", file+2);
+		e = findlib(pname);
+		if(e == nil) {
+			diag("cannot find library: %s", file);
+			errorexit();
+		}
+		snprint(name, sizeof(name), "%s/%s", e, pname);
+		file = name;
+	}
+	f = open(file, 0);
+	if(f < 0) {
+		diag("cannot open %s: %r", file);
+		errorexit();
+	}
+	l = read(f, magbuf, SARMAG);
+	if(l != SARMAG || strncmp(magbuf, ARMAG, SARMAG)){
+		/* load it as a regular file */
+		l = seek(f, 0L, 2);
+		seek(f, 0L, 0);
+		ldobj(f, l, file);
+		close(f);
+		return;
+	}
+
+	l = read(f, &arhdr, SAR_HDR);
+	if(l != SAR_HDR) {
+		diag("%s: short read on archive file symbol header", file);
+		goto out;
+	}
+	if(strncmp(arhdr.name, symname, strlen(symname))) {
+		diag("%s: first entry not symbol header", file);
+		goto out;
+	}
+
+	esym = SARMAG + SAR_HDR + atolwhex(arhdr.size);
+	off = SARMAG + SAR_HDR;
+
+	/*
+	 * just bang the whole symbol file into memory
+	 */
+	seek(f, off, 0);
+	cnt = esym - off;
+	start = malloc(cnt + 10);
+	cnt = read(f, start, cnt);
+	if(cnt <= 0){
+		close(f);
+		return;
+	}
+	stop = &start[cnt];
+	memset(stop, 0, 10);
+
+	work = 1;
+	while(work){
+		if(debug['v'])
+			Bprint(&bso, "%5.2f library pass: %s\n", cputime(), file);
+		Bflush(&bso);
+		work = 0;
+		for(e = start; e < stop; e = strchr(e+5, 0) + 1) {
+			s = lookup(e+5, 0);
+			if(s->type != SXREF)
+				continue;
+			sprint(pname, "%s(%s)", file, s->name);
+			if(debug['v'])
+				Bprint(&bso, "%5.2f library: %s\n", cputime(), pname);
+			Bflush(&bso);
+			l = e[1] & 0xff;
+			l |= (e[2] & 0xff) << 8;
+			l |= (e[3] & 0xff) << 16;
+			l |= (e[4] & 0xff) << 24;
+			seek(f, l, 0);
+			/* need readn to read the dumps (at least) */
+			l = readn(f, &arhdr, SAR_HDR);
+			if(l != SAR_HDR)
+				goto bad;
+			if(strncmp(arhdr.fmag, ARFMAG, sizeof(arhdr.fmag)))
+				goto bad;
+			l = atolwhex(arhdr.size);
+			ldobj(f, l, pname);
+			if(s->type == SXREF) {
+				diag("%s: failed to load: %s", file, s->name);
+				errorexit();
+			}
+			work = 1;
+			xrefresolv = 1;
+		}
+	}
+	return;
+
+bad:
+	diag("%s: bad or out of date archive", file);
+out:
+	close(f);
+}
+
+int
+zaddr(uchar *p, Adr *a, Sym *h[])
+{
+	int i, c;
+	int l;
+	Sym *s;
+	Auto *u;
+
+	c = p[2];
+	if(c < 0 || c > NSYM){
+		print("sym out of range: %d\n", c);
+		p[0] = AEND+1;
+		return 0;
+	}
+	a->type = p[0];
+	a->reg = p[1];
+	a->sym = h[c];
+	a->name = p[3];
+	c = 4;
+
+	if(a->reg > NREG) {
+		print("register out of range %d\n", a->reg);
+		p[0] = AEND+1;
+		return 0;	/*  force real diagnostic */
+	}
+
+	switch(a->type) {
+	default:
+		print("unknown type %d\n", a->type);
+		p[0] = AEND+1;
+		return 0;	/* force real diagnostic */
+
+	case D_NONE:
+	case D_REG:
+	case D_FREG:
+	case D_CREG:
+	case D_FPSCR:
+	case D_MSR:
+	case D_OPT:
+		break;
+
+	case D_SPR:
+	case D_DCR:
+	case D_BRANCH:
+	case D_OREG:
+	case D_CONST:
+		l = p[4] | (p[5]<<8) |
+			(p[6]<<16) | (p[7]<<24);
+		a->offset = l;
+		c += 4;
+		break;
+
+	case D_DCONST:
+		l = p[4] | (p[5]<<8) | (p[6]<<16) | (p[7]<<24);
+		a->offset = (uvlong)l & 0xFFFFFFFFUL;
+		l = p[8] | (p[9]<<8) | (p[10]<<16) | (p[11]<<24);
+		a->offset |= (vlong)l << 32;
+		c += 8;
+		a->type = D_CONST;
+		break;
+
+	case D_SCONST:
+		memmove(a->sval, p+4, NSNAME);
+		c += NSNAME;
+		break;
+
+	case D_FCONST:
+		a->ieee.l = p[4] | (p[5]<<8) |
+			(p[6]<<16) | (p[7]<<24);
+		a->ieee.h = p[8] | (p[9]<<8) |
+			(p[10]<<16) | (p[11]<<24);
+		c += 8;
+		break;
+	}
+	s = a->sym;
+	if(s == S)
+		goto out;
+	i = a->name;
+	if(i != D_AUTO && i != D_PARAM)
+		goto out;
+
+	l = a->offset;
+	for(u=curauto; u; u=u->link)
+		if(u->sym == s)
+		if(u->type == i) {
+			if(u->aoffset > l)
+				u->aoffset = l;
+			goto out;
+		}
+
+	u = malloc(sizeof(Auto));
+
+	u->link = curauto;
+	curauto = u;
+	u->sym = s;
+	u->aoffset = l;
+	u->type = i;
+out:
+	return c;
+}
+
+void
+addlib(char *obj)
+{
+	char fn1[LIBNAMELEN], fn2[LIBNAMELEN], comp[LIBNAMELEN], *p, *name;
+	int i, search;
+
+	if(histfrogp <= 0)
+		return;
+
+	name = fn1;
+	search = 0;
+	if(histfrog[0]->name[1] == '/') {
+		sprint(name, "");
+		i = 1;
+	} else if(histfrog[0]->name[1] == '.') {
+		sprint(name, ".");
+		i = 0;
+	} else {
+		sprint(name, "");
+		i = 0;
+		search = 1;
+	}
+
+	for(; i<histfrogp; i++) {
+		snprint(comp, sizeof comp, histfrog[i]->name+1);
+		for(;;) {
+			p = strstr(comp, "$O");
+			if(p == 0)
+				break;
+			memmove(p+1, p+2, strlen(p+2)+1);
+			p[0] = thechar;
+		}
+		for(;;) {
+			p = strstr(comp, "$M");
+			if(p == 0)
+				break;
+			if(strlen(comp)+strlen(thestring)-2+1 >= sizeof comp) {
+				diag("library component too long");
+				return;
+			}
+			memmove(p+strlen(thestring), p+2, strlen(p+2)+1);
+			memmove(p, thestring, strlen(thestring));
+		}
+		if(strlen(fn1) + strlen(comp) + 3 >= sizeof(fn1)) {
+			diag("library component too long");
+			return;
+		}
+		if(i > 0 || !search)
+			strcat(fn1, "/");
+		strcat(fn1, comp);
+	}
+
+	cleanname(name);
+
+	if(search){
+		p = findlib(name);
+		if(p != nil){
+			snprint(fn2, sizeof(fn2), "%s/%s", p, name);
+			name = fn2;
+		}
+	}
+
+	for(i=0; i<libraryp; i++)
+		if(strcmp(name, library[i]) == 0)
+			return;
+	if(libraryp == nelem(library)){
+		diag("too many autolibs; skipping %s", name);
+		return;
+	}
+
+	p = malloc(strlen(name) + 1);
+	strcpy(p, name);
+	library[libraryp] = p;
+	p = malloc(strlen(obj) + 1);
+	strcpy(p, obj);
+	libraryobj[libraryp] = p;
+	libraryp++;
+}
+
+void
+addhist(long line, int type)
+{
+	Auto *u;
+	Sym *s;
+	int i, j, k;
+
+	u = malloc(sizeof(Auto));
+	s = malloc(sizeof(Sym));
+	s->name = malloc(2*(histfrogp+1) + 1);
+
+	u->sym = s;
+	u->type = type;
+	u->aoffset = line;
+	u->link = curhist;
+	curhist = u;
+
+	j = 1;
+	for(i=0; i<histfrogp; i++) {
+		k = histfrog[i]->value;
+		s->name[j+0] = k>>8;
+		s->name[j+1] = k;
+		j += 2;
+	}
+}
+
+void
+histtoauto(void)
+{
+	Auto *l;
+
+	while(l = curhist) {
+		curhist = l->link;
+		l->link = curauto;
+		curauto = l;
+	}
+}
+
+void
+collapsefrog(Sym *s)
+{
+	int i;
+
+	/*
+	 * bad encoding of path components only allows
+	 * MAXHIST components. if there is an overflow,
+	 * first try to collapse xxx/..
+	 */
+	for(i=1; i<histfrogp; i++)
+		if(strcmp(histfrog[i]->name+1, "..") == 0) {
+			memmove(histfrog+i-1, histfrog+i+1,
+				(histfrogp-i-1)*sizeof(histfrog[0]));
+			histfrogp--;
+			goto out;
+		}
+
+	/*
+	 * next try to collapse .
+	 */
+	for(i=0; i<histfrogp; i++)
+		if(strcmp(histfrog[i]->name+1, ".") == 0) {
+			memmove(histfrog+i, histfrog+i+1,
+				(histfrogp-i-1)*sizeof(histfrog[0]));
+			goto out;
+		}
+
+	/*
+	 * last chance, just truncate from front
+	 */
+	memmove(histfrog+0, histfrog+1,
+		(histfrogp-1)*sizeof(histfrog[0]));
+
+out:
+	histfrog[histfrogp-1] = s;
+}
+
+void
+nopout(Prog *p)
+{
+	p->as = ANOP;
+	p->from.type = D_NONE;
+	p->to.type = D_NONE;
+}
+
+uchar*
+readsome(int f, uchar *buf, uchar *good, uchar *stop, int max)
+{
+	int n;
+
+	n = stop - good;
+	memmove(buf, good, stop - good);
+	stop = buf + n;
+	n = MAXIO - n;
+	if(n > max)
+		n = max;
+	n = read(f, stop, n);
+	if(n <= 0)
+		return 0;
+	return stop + n;
+}
+
+void
+ldobj(int f, long c, char *pn)
+{
+	Prog *p, *t;
+	Sym *h[NSYM], *s, *di;
+	int v, o, r, skip;
+	vlong ipc;
+	uchar *bloc, *bsize, *stop;
+	ulong sig;
+	static int files;
+	static char **filen;
+	char **nfilen;
+
+	if((files&15) == 0){
+		nfilen = malloc((files+16)*sizeof(char*));
+		memmove(nfilen, filen, files*sizeof(char*));
+		free(filen);
+		filen = nfilen;
+	}
+	filen[files++] = strdup(pn);
+
+	bsize = buf.xbuf;
+	bloc = buf.xbuf;
+	di = S;
+
+newloop:
+	memset(h, 0, sizeof(h));
+	histfrogp = 0;
+	version++;
+	ipc = pc;
+	skip = 0;
+
+loop:
+	if(c <= 0)
+		goto eof;
+	r = bsize - bloc;
+	if(r < 100 && r < c) {		/* enough for largest prog */
+		bsize = readsome(f, buf.xbuf, bloc, bsize, c);
+		if(bsize == 0)
+			goto eof;
+		bloc = buf.xbuf;
+		goto loop;
+	}
+	o = bloc[0] | (bloc[1] << 8);		/* as */
+	if(o <= 0 || o >= ALAST) {
+		diag("%s: opcode out of range %d", pn, o);
+		print("	probably not a .%c file\n", thechar);
+		errorexit();
+	}
+	if(o == ANAME || o == ASIGNAME) {
+		sig = 0;
+		if(o == ASIGNAME) {
+			sig = bloc[2] | (bloc[3]<<8) | (bloc[4]<<16) | (bloc[5]<<24);
+			bloc += 4;
+			c -= 4;
+		}
+		stop = memchr(&bloc[4], 0, bsize-&bloc[4]);
+		if(stop == 0){
+			bsize = readsome(f, buf.xbuf, bloc, bsize, c);
+			if(bsize == 0)
+				goto eof;
+			bloc = buf.xbuf;
+			stop = memchr(&bloc[4], 0, bsize-&bloc[4]);
+			if(stop == 0){
+				fprint(2, "%s: name too long\n", pn);
+				errorexit();
+			}
+		}
+		v = bloc[2];	/* type */
+		o = bloc[3];	/* sym */
+		bloc += 4;
+		c -= 4;
+
+		r = 0;
+		if(v == D_STATIC)
+			r = version;
+		s = lookup((char*)bloc, r);
+		c -= &stop[1] - bloc;
+		bloc = stop + 1;
+		if(sig != 0){
+			if(s->sig != 0 && s->sig != sig)
+				diag("incompatible type signatures %lux(%s) and %lux(%s) for %s", s->sig, filen[s->file], sig, pn, s->name);
+			s->sig = sig;
+			s->file = files-1;
+		}
+
+
+		if(debug['W'])
+			print("	ANAME	%s\n", s->name);
+		h[o] = s;
+		if((v == D_EXTERN || v == D_STATIC) && s->type == 0)
+			s->type = SXREF;
+		if(v == D_FILE) {
+			if(s->type != SFILE) {
+				histgen++;
+				s->type = SFILE;
+				s->value = histgen;
+			}
+			if(histfrogp < MAXHIST) {
+				histfrog[histfrogp] = s;
+				histfrogp++;
+			} else
+				collapsefrog(s);
+		}
+		goto loop;
+	}
+
+	if(nhunk < sizeof(Prog))
+		gethunk();
+	p = (Prog*)hunk;
+	nhunk -= sizeof(Prog);
+	hunk += sizeof(Prog);
+
+	p->as = o;
+	p->reg = bloc[2] & 0x3f;
+	if(bloc[2] & 0x80)
+		p->mark = NOSCHED;
+	p->line = bloc[3] | (bloc[4]<<8) | (bloc[5]<<16) | (bloc[6]<<24);
+	r = zaddr(bloc+7, &p->from, h) + 7;
+	if(bloc[2] & 0x40)
+		r += zaddr(bloc+r, &p->from3, h);
+	else
+		p->from3 = zprg.from3;
+	r += zaddr(bloc+r, &p->to, h);
+	bloc += r;
+	c -= r;
+
+	if(p->reg < 0 || p->reg > NREG)
+		diag("register out of range %d", p->reg);
+
+	p->link = P;
+	p->cond = P;
+
+	if(debug['W'])
+		print("%P\n", p);
+
+	switch(o) {
+	case AHISTORY:
+		if(p->to.offset == -1) {
+			addlib(pn);
+			histfrogp = 0;
+			goto loop;
+		}
+		addhist(p->line, D_FILE);		/* 'z' */
+		if(p->to.offset)
+			addhist(p->to.offset, D_FILE1);	/* 'Z' */
+		histfrogp = 0;
+		goto loop;
+
+	case AEND:
+		histtoauto();
+		if(curtext != P)
+			curtext->to.autom = curauto;
+		curauto = 0;
+		curtext = P;
+		if(c)
+			goto newloop;
+		return;
+
+	case AGLOBL:
+		s = p->from.sym;
+		if(s == S) {
+			diag("GLOBL must have a name\n%P", p);
+			errorexit();
+		}
+		if(s->type == 0 || s->type == SXREF) {
+			s->type = SBSS;
+			s->value = 0;
+		}
+		if(s->type != SBSS) {
+			diag("redefinition: %s\n%P", s->name, p);
+			s->type = SBSS;
+			s->value = 0;
+		}
+		if(p->to.offset > s->value)
+			s->value = p->to.offset;
+		break;
+
+	case ADYNT:
+		if(p->to.sym == S) {
+			diag("DYNT without a sym\n%P", p);
+			break;
+		}
+		di = p->to.sym;
+		p->reg = 4;
+		if(di->type == SXREF) {
+			if(debug['z'])
+				Bprint(&bso, "%P set to %d\n", p, dtype);
+			di->type = SCONST;
+			di->value = dtype;
+			dtype += 4;
+		}
+		if(p->from.sym == S)
+			break;
+
+		p->from.offset = di->value;
+		p->from.sym->type = SDATA;
+		if(curtext == P) {
+			diag("DYNT not in text: %P", p);
+			break;
+		}
+		p->to.sym = curtext->from.sym;
+		p->to.type = D_CONST;
+		p->link = datap;
+		datap = p;
+		break;
+
+	case AINIT:
+		if(p->from.sym == S) {
+			diag("INIT without a sym\n%P", p);
+			break;
+		}
+		if(di == S) {
+			diag("INIT without previous DYNT\n%P", p);
+			break;
+		}
+		p->from.offset = di->value;
+		p->from.sym->type = SDATA;
+		p->link = datap;
+		datap = p;
+		break;
+
+	case ADATA:
+		p->link = datap;
+		datap = p;
+		break;
+
+	case AGOK:
+		diag("unknown opcode\n%P", p);
+		p->pc = pc;
+		pc++;
+		break;
+
+	case ATEXT:
+		if(curtext != P) {
+			histtoauto();
+			curtext->to.autom = curauto;
+			curauto = 0;
+		}
+		curtext = p;
+		autosize = (p->to.offset+7L) & ~7L;
+		p->to.offset = autosize;
+		autosize += 8;
+		s = p->from.sym;
+		if(s == S) {
+			diag("TEXT must have a name\n%P", p);
+			errorexit();
+		}
+		if(s->type != 0 && s->type != SXREF) {
+			if(p->reg & DUPOK) {
+				skip = 1;
+				goto casedef;
+			}
+			diag("redefinition: %s\n%P", s->name, p);
+		}
+		s->type = STEXT;
+		s->value = pc;
+		if(textp != P) {
+			for(t = textp; t->cond != P; t = t->cond)
+				;
+			t->cond = p;
+		} else
+			textp = p;
+		lastp->link = p;
+		lastp = p;
+		p->pc = pc;
+		pc++;
+		break;
+
+	case AFMOVS:
+		if(skip)
+			goto casedef;
+
+		if(p->from.type == D_FCONST) {
+			/* size sb 9 max */
+			sprint(literal, "$%lux", ieeedtof(&p->from.ieee));
+			s = lookup(literal, 0);
+			if(s->type == 0) {
+				s->type = SBSS;
+				s->value = 4;
+				t = prg();
+				t->as = ADATA;
+				t->line = p->line;
+				t->from.type = D_OREG;
+				t->from.sym = s;
+				t->from.name = D_EXTERN;
+				t->reg = 4;
+				t->to = p->from;
+				t->link = datap;
+				datap = t;
+			}
+			p->from.type = D_OREG;
+			p->from.sym = s;
+			p->from.name = D_EXTERN;
+			p->from.offset = 0;
+		}
+		goto casedef;
+
+	case AFMOVD:
+		if(skip)
+			goto casedef;
+		if(p->from.type == D_FCONST) {
+			/* size sb 18 max */
+			sprint(literal, "$%lux.%lux",
+				p->from.ieee.l, p->from.ieee.h);
+			s = lookup(literal, 0);
+			if(s->type == 0) {
+				s->type = SBSS;
+				s->value = 8;
+				t = prg();
+				t->as = ADATA;
+				t->line = p->line;
+				t->from.type = D_OREG;
+				t->from.sym = s;
+				t->from.name = D_EXTERN;
+				t->reg = 8;
+				t->to = p->from;
+				t->link = datap;
+				datap = t;
+			}
+			p->from.type = D_OREG;
+			p->from.sym = s;
+			p->from.name = D_EXTERN;
+			p->from.offset = 0;
+		}
+		goto casedef;
+
+	case ASUBC:
+		if(p->from.type == D_CONST) {
+			p->from.offset = -p->from.offset;
+			p->as = AADDC;
+		}
+		goto casedef;
+
+	case ASUBCCC:
+		if(p->from.type == D_CONST) {
+			p->from.offset = -p->from.offset;
+			p->as = AADDCCC;
+		}
+		goto casedef;
+
+	case ASUB:
+		if(p->from.type == D_CONST) {
+			p->from.offset = -p->from.offset;
+			p->as = AADD;
+		}
+		goto casedef;
+
+	case AMOVD:
+		if(skip)
+			goto casedef;
+		if(p->from.type == D_CONST && !isuint32(p->from.offset) && !isint32(p->from.offset)) {
+			/* make a literal */
+			sprint(literal, "$%llux", p->from.offset);
+			s = lookup(literal, 0);
+			if(s->type == 0) {
+				s->type = SBSS;
+				s->value = 8;
+				t = prg();
+				t->as = ADATA;
+				t->line = p->line;
+				t->from.type = D_OREG;
+				t->from.sym = s;
+				t->from.name = D_EXTERN;
+				t->reg = 8;
+				t->to = p->from;
+				t->link = datap;
+				datap = t;
+			}
+			p->from.type = D_OREG;
+			p->from.sym = s;
+			p->from.name = D_EXTERN;
+			p->from.offset = 0;
+		}
+		goto casedef;
+
+	default:
+	casedef:
+		if(skip)
+			nopout(p);
+
+		if(p->to.type == D_BRANCH)
+			p->to.offset += ipc;
+		lastp->link = p;
+		lastp = p;
+		p->pc = pc;
+		pc++;
+		break;
+	}
+	goto loop;
+
+eof:
+	diag("truncated object file: %s", pn);
+}
+
+Sym*
+lookup(char *symb, int v)
+{
+	Sym *s;
+	char *p;
+	long h;
+	int c, l;
+
+	h = v;
+	for(p=symb; c = *p; p++)
+		h = h+h+h + c;
+	l = (p - symb) + 1;
+	h &= 0xffffff;
+	h %= NHASH;
+	for(s = hash[h]; s != S; s = s->link)
+		if(s->version == v)
+		if(memcmp(s->name, symb, l) == 0)
+			return s;
+
+	while(nhunk < sizeof(Sym))
+		gethunk();
+	s = (Sym*)hunk;
+	nhunk -= sizeof(Sym);
+	hunk += sizeof(Sym);
+
+	s->name = malloc(l + 1);
+	memmove(s->name, symb, l);
+
+	s->link = hash[h];
+	s->type = 0;
+	s->version = v;
+	s->value = 0;
+	s->sig = 0;
+	hash[h] = s;
+	return s;
+}
+
+Prog*
+prg(void)
+{
+	Prog *p;
+	int n;
+
+	n = (sizeof(Prog) + 3) & ~3;
+	while(nhunk < n)
+		gethunk();
+
+	p = (Prog*)hunk;
+	nhunk -= n;
+	hunk += n;
+
+	*p = zprg;
+	return p;
+}
+
+void
+gethunk(void)
+{
+	char *h;
+	long nh;
+
+	nh = NHUNK;
+	if(tothunk >= 5L*NHUNK) {
+		nh = 5L*NHUNK;
+		if(tothunk >= 25L*NHUNK)
+			nh = 25L*NHUNK;
+	}
+	h = mysbrk(nh);
+	if(h == (char *)-1) {
+		diag("out of memory");
+		errorexit();
+	}
+
+	hunk = h;
+	nhunk = nh;
+	tothunk += nh;
+}
+
+void
+doprof1(void)
+{
+	Sym *s;
+	long n;
+	Prog *p, *q;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f profile 1\n", cputime());
+	Bflush(&bso);
+	s = lookup("__mcount", 0);
+	n = 1;
+	for(p = firstp->link; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			q = prg();
+			q->line = p->line;
+			q->link = datap;
+			datap = q;
+			q->as = ADATA;
+			q->from.type = D_OREG;
+			q->from.name = D_EXTERN;
+			q->from.offset = n*4;		/* TO DO: check */
+			q->from.sym = s;
+			q->reg = 4;
+			q->to = p->from;
+			q->to.type = D_CONST;
+
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			p->link = q;
+			p = q;
+			p->as = AMOVW;
+			p->from.type = D_OREG;
+			p->from.name = D_EXTERN;
+			p->from.sym = s;
+			p->from.offset = n*4 + 4;
+			p->to.type = D_REG;
+			p->to.reg = REGTMP;
+
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			p->link = q;
+			p = q;
+			p->as = AADD;
+			p->from.type = D_CONST;
+			p->from.offset = 1;
+			p->to.type = D_REG;
+			p->to.reg = REGTMP;
+
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			p->link = q;
+			p = q;
+			p->as = AMOVW;
+			p->from.type = D_REG;
+			p->from.reg = REGTMP;
+			p->to.type = D_OREG;
+			p->to.name = D_EXTERN;
+			p->to.sym = s;
+			p->to.offset = n*4 + 4;
+
+			n += 2;
+			continue;
+		}
+	}
+	q = prg();
+	q->line = 0;
+	q->link = datap;
+	datap = q;
+
+	q->as = ADATA;
+	q->from.type = D_OREG;
+	q->from.name = D_EXTERN;
+	q->from.sym = s;
+	q->reg = 4;
+	q->to.type = D_CONST;
+	q->to.offset = n;
+
+	s->type = SBSS;
+	s->value = n*4;
+}
+
+void
+doprof2(void)
+{
+	Sym *s2, *s4;
+	Prog *p, *q, *ps2, *ps4;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f profile 2\n", cputime());
+	Bflush(&bso);
+
+	s2 = lookup("_profin", 0);
+	s4 = lookup("_profout", 0);
+	if(s2->type != STEXT || s4->type != STEXT) {
+		diag("_profin/_profout not defined");
+		return;
+	}
+
+	ps2 = P;
+	ps4 = P;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			if(p->from.sym == s2) {
+				p->reg = 1;
+				ps2 = p;
+			}
+			if(p->from.sym == s4) {
+				p->reg = 1;
+				ps4 = p;
+			}
+		}
+	}
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			curtext = p;
+
+			if(p->reg & NOPROF) {	/* dont profile */
+				for(;;) {
+					q = p->link;
+					if(q == P)
+						break;
+					if(q->as == ATEXT)
+						break;
+					p = q;
+				}
+				continue;
+			}
+
+			/*
+			 * BL	profin
+			 */
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			p->link = q;
+			p = q;
+			p->as = ABL;
+			p->to.type = D_BRANCH;
+			p->cond = ps2;
+			p->to.sym = s2;
+
+			continue;
+		}
+		if(p->as == ARETURN) {
+
+			/*
+			 * RETURN
+			 */
+			q = prg();
+			q->as = ARETURN;
+			q->from = p->from;
+			q->to = p->to;
+			q->link = p->link;
+			p->link = q;
+
+			/*
+			 * BL profout
+			 */
+			p->as = ABL;
+			p->from = zprg.from;
+			p->to = zprg.to;
+			p->to.type = D_BRANCH;
+			p->cond = ps4;
+			p->to.sym = s4;
+
+			p = q;
+
+			continue;
+		}
+	}
+}
+
+void
+nuxiinit(void)
+{
+	int i, c;
+
+	for(i=0; i<4; i++) {
+		c = find1(0x01020304L, i+1);
+		if(i >= 2)
+			inuxi2[i-2] = c;
+		if(i >= 3)
+			inuxi1[i-3] = c;
+		inuxi4[i] = c;
+		inuxi8[i] = c+4;
+		inuxi8[i+4] = c;
+		fnuxi4[i] = c;
+		fnuxi8[i] = c+4;
+		fnuxi8[i+4] = c;
+	}
+	if(debug['v']) {
+		Bprint(&bso, "inuxi = ");
+		for(i=0; i<1; i++)
+			Bprint(&bso, "%d", inuxi1[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<2; i++)
+			Bprint(&bso, "%d", inuxi2[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<4; i++)
+			Bprint(&bso, "%d", inuxi4[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<8; i++)
+			Bprint(&bso, "%d", inuxi8[i]);
+		Bprint(&bso, "\nfnuxi = ");
+		for(i=0; i<4; i++)
+			Bprint(&bso, "%d", fnuxi4[i]);
+		Bprint(&bso, "\nfnuxi = ");
+		for(i=0; i<8; i++)
+			Bprint(&bso, "%d", fnuxi8[i]);
+		Bprint(&bso, "\n");
+	}
+	Bflush(&bso);
+}
+
+int
+find1(long l, int c)
+{
+	char *p;
+	int i;
+
+	p = (char*)&l;
+	for(i=0; i<4; i++)
+		if(*p++ == c)
+			return i;
+	return 0;
+}
+
+long
+ieeedtof(Ieee *ieeep)
+{
+	int exp;
+	long v;
+
+	if(ieeep->h == 0)
+		return 0;
+	exp = (ieeep->h>>20) & ((1L<<11)-1L);
+	exp -= (1L<<10) - 2L;
+	v = (ieeep->h & 0xfffffL) << 3;
+	v |= (ieeep->l >> 29) & 0x7L;
+	if((ieeep->l >> 28) & 1) {
+		v++;
+		if(v & 0x800000L) {
+			v = (v & 0x7fffffL) >> 1;
+			exp++;
+		}
+	}
+	if(exp <= -126 || exp >= 130)
+		diag("double fp to single fp overflow");
+	v |= ((exp + 126) & 0xffL) << 23;
+	v |= ieeep->h & 0x80000000L;
+	return v;
+}
+
+double
+ieeedtod(Ieee *ieeep)
+{
+	Ieee e;
+	double fr;
+	int exp;
+
+	if(ieeep->h & (1L<<31)) {
+		e.h = ieeep->h & ~(1L<<31);
+		e.l = ieeep->l;
+		return -ieeedtod(&e);
+	}
+	if(ieeep->l == 0 && ieeep->h == 0)
+		return 0;
+	fr = ieeep->l & ((1L<<16)-1L);
+	fr /= 1L<<16;
+	fr += (ieeep->l>>16) & ((1L<<16)-1L);
+	fr /= 1L<<16;
+	fr += (ieeep->h & (1L<<20)-1L) | (1L<<20);
+	fr /= 1L<<21;
+	exp = (ieeep->h>>20) & ((1L<<11)-1L);
+	exp -= (1L<<10) - 2L;
+	return ldexp(fr, exp);
+}
+
+void
+undefsym(Sym *s)
+{
+	int n;
+
+	n = imports;
+	if(s->value != 0)
+		diag("value != 0 on SXREF");
+	if(n >= 1<<Rindex)
+		diag("import index %d out of range", n);
+	s->value = n<<Roffset;
+	s->type = SUNDEF;
+	imports++;
+}
+
+void
+zerosig(char *sp)
+{
+	Sym *s;
+
+	s = lookup(sp, 0);
+	s->sig = 0;
+}
+
+void
+readundefs(char *f, int t)
+{
+	int i, n;
+	Sym *s;
+	Biobuf *b;
+	char *l, buf[256], *fields[64];
+
+	if(f == nil)
+		return;
+	b = Bopen(f, OREAD);
+	if(b == nil){
+		diag("could not open %s: %r", f);
+		errorexit();
+	}
+	while((l = Brdline(b, '\n')) != nil){
+		n = Blinelen(b);
+		if(n >= sizeof(buf)){
+			diag("%s: line too long", f);
+			errorexit();
+		}
+		memmove(buf, l, n);
+		buf[n-1] = '\0';
+		n = getfields(buf, fields, nelem(fields), 1, " \t\r\n");
+		if(n == nelem(fields)){
+			diag("%s: bad format", f);
+			errorexit();
+		}
+		for(i = 0; i < n; i++){
+			s = lookup(fields[i], 0);
+			s->type = SXREF;
+			s->subtype = t;
+			if(t == SIMPORT)
+				nimports++;
+			else
+				nexports++;
+		}
+	}
+	Bterm(b);
+}

+ 372 - 0
sys/src/cmd/9l/optab.c

@@ -0,0 +1,372 @@
+#include	"l.h"
+
+Optab	optab[] =
+{
+	{ ATEXT,	C_LEXT,	C_NONE, C_NONE, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_LEXT,	C_REG, C_NONE, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_LEXT,	C_NONE, C_LCON, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_LEXT,	C_REG, C_LCON, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_ADDR,	C_NONE, C_NONE, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_ADDR,	C_REG, C_NONE, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_ADDR,	C_NONE, C_LCON, 	C_LCON, 	 0, 0, 0 },
+	{ ATEXT,	C_ADDR,	C_REG, C_LCON, 	C_LCON, 	 0, 0, 0 },
+
+	/* move register */
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_REG,		 1, 4, 0 },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_REG,		12, 4, 0 },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_REG,		13, 4, 0 },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_REG,		 12, 4, 0 },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_REG,		 13, 4, 0 },
+
+	{ AADD,		C_REG,	C_REG, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AADD,		C_REG,	C_NONE, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AADD,		C_ADDCON,C_REG, C_NONE, 	C_REG,		 4, 4, 0 },
+	{ AADD,		C_ADDCON,C_NONE, C_NONE, C_REG,		 4, 4, 0 },
+	{ AADD,		C_UCON,	C_REG, C_NONE, 	C_REG,		20, 4, 0 },
+	{ AADD,		C_UCON,	C_NONE, C_NONE, 	C_REG,		20, 4, 0 },
+	{ AADD,		C_LCON,	C_REG, C_NONE, 	C_REG,		22, 12, 0 },
+	{ AADD,		C_LCON,	C_NONE, C_NONE, 	C_REG,		22, 12, 0 },
+
+	{ AADDC,	C_REG,	C_REG, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AADDC,	C_REG,	C_NONE, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AADDC,	C_ADDCON,C_REG, C_NONE, 	C_REG,		 4, 4, 0 },
+	{ AADDC,	C_ADDCON,C_NONE, C_NONE, C_REG,		 4, 4, 0 },
+	{ AADDC,	C_LCON,	C_REG, C_NONE, 	C_REG,		22, 12, 0 },
+	{ AADDC,	C_LCON,	C_NONE, C_NONE, 	C_REG,		22, 12, 0 },
+
+	{ AAND,		C_REG,	C_REG, C_NONE, 	C_REG,		6, 4, 0 },	/* logical, no literal */
+	{ AAND,		C_REG,	C_NONE, C_NONE, 	C_REG,		6, 4, 0 },
+	{ AANDCC,	C_REG,	C_REG, C_NONE, 	C_REG,		6, 4, 0 },
+	{ AANDCC,	C_REG,	C_NONE, C_NONE, 	C_REG,		6, 4, 0 },
+
+	{ AANDCC,	C_ANDCON,C_NONE, C_NONE, C_REG,		58, 4, 0 },
+	{ AANDCC,	C_ANDCON,C_REG, C_NONE, 	C_REG,		58, 4, 0 },
+	{ AANDCC,	C_UCON,	C_NONE, C_NONE, 	C_REG,		59, 4, 0 },
+	{ AANDCC,	C_UCON,	C_REG, C_NONE, 	C_REG,		59, 4, 0 },
+	{ AANDCC,	C_LCON,	C_NONE, C_NONE, 	C_REG,		23, 12, 0 },
+	{ AANDCC,	C_LCON,	C_REG, C_NONE, 	C_REG,		23, 12, 0 },
+
+	{ AMULLW,	C_REG,	C_REG, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AMULLW,	C_REG,	C_NONE, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ AMULLW,	C_ADDCON,C_REG, C_NONE, 	C_REG,		 4, 4, 0 },
+	{ AMULLW,	C_ADDCON,C_NONE, C_NONE, C_REG,		 4, 4, 0 },
+	{ AMULLW,	C_ANDCON,C_REG, C_NONE, 	C_REG,		 4, 4, 0 },
+	{ AMULLW,	C_ANDCON,	C_NONE, C_NONE,	C_REG,	 4, 4, 0 },
+	{ AMULLW,	C_LCON,	C_REG,	C_NONE,	C_REG,		22, 12, 0},
+	{ AMULLW,	C_LCON,	C_NONE,	C_NONE,	C_REG,		22, 12, 0},
+
+	{ ASUBC,	C_REG,	C_REG, C_NONE, 	C_REG,		 10, 4, 0 },
+	{ ASUBC,	C_REG,	C_NONE, C_NONE, 	C_REG,		 10, 4, 0 },
+	{ ASUBC,	C_REG,	C_NONE, C_ADDCON, 	C_REG,	 27, 4, 0 },
+	{ ASUBC,	C_REG,	C_NONE,	C_LCON,	C_REG,		28, 12, 0},
+
+	{ AOR,		C_REG,	C_REG, C_NONE, 	C_REG,		6, 4, 0 },	/* logical, literal not cc (or/xor) */
+	{ AOR,		C_REG,	C_NONE, C_NONE, 	C_REG,		6, 4, 0 },
+	{ AOR,		C_ANDCON, C_NONE, C_NONE,  C_REG,	58, 4, 0 },
+	{ AOR,		C_ANDCON, C_REG, C_NONE,  C_REG,		58, 4, 0 },
+	{ AOR,		C_UCON, C_NONE, C_NONE,  C_REG,		59, 4, 0 },
+	{ AOR,		C_UCON, C_REG, C_NONE,  C_REG,		59, 4, 0 },
+	{ AOR,		C_LCON,	C_NONE, C_NONE, 	C_REG,		23, 12, 0 },
+	{ AOR,		C_LCON,	C_REG, C_NONE, 	C_REG,		23, 12, 0 },
+
+	{ ADIVW,	C_REG,	C_REG, C_NONE, 	C_REG,		 2, 4, 0 },	/* op r1[,r2],r3 */
+	{ ADIVW,	C_REG,	C_NONE, C_NONE, 	C_REG,		 2, 4, 0 },
+	{ ASUB,	C_REG,	C_REG, C_NONE, 	C_REG,		 10, 4, 0 },	/* op r2[,r1],r3 */
+	{ ASUB,	C_REG,	C_NONE, C_NONE, 	C_REG,		 10, 4, 0 },
+
+	{ ASLW,	C_REG,	C_NONE, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASLW,	C_REG,	C_REG, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASLD,	C_REG,	C_NONE, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASLD,	C_REG,	C_REG, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASLD,	C_SCON,	C_REG, C_NONE,	C_REG,		25, 4, 0 },
+	{ ASLD,	C_SCON,	C_NONE, C_NONE,	C_REG,		25, 4, 0 },
+	{ ASLW,	C_SCON,	C_REG, C_NONE, 	C_REG,		57, 4, 0 },
+	{ ASLW,	C_SCON,	C_NONE, C_NONE, 	C_REG,		57, 4, 0 },
+
+	{ ASRAW,	C_REG,	C_NONE, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASRAW,	C_REG,	C_REG, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASRAW,	C_SCON,	C_REG, C_NONE, 	C_REG,		56, 4, 0 },
+	{ ASRAW,	C_SCON,	C_NONE, C_NONE, 	C_REG,		56, 4, 0 },
+	{ ASRAD,	C_REG,	C_NONE, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASRAD,	C_REG,	C_REG, C_NONE, 	C_REG,		 6, 4, 0 },
+	{ ASRAD,	C_SCON,	C_REG, C_NONE, 	C_REG,		56, 4, 0 },
+	{ ASRAD,	C_SCON,	C_NONE, C_NONE, 	C_REG,		56, 4, 0 },
+
+	{ ARLWMI,	C_SCON, C_REG, C_LCON, 	C_REG,		62, 4, 0 },
+	{ ARLWMI,	C_REG,	C_REG, C_LCON, 	C_REG,		63, 4, 0 },
+	{ ARLDMI,	C_SCON,	C_REG, C_LCON,	C_REG,		30, 4, 0 },
+
+	{ ARLDC,	C_SCON,	C_REG, C_LCON,	C_REG,		29, 4, 0 },
+	{ ARLDCL,	C_SCON,	C_REG, C_LCON,	C_REG,		29, 4, 0 },
+	{ ARLDCL,	C_REG,	C_REG,	C_LCON,	C_REG,		14, 4, 0 },
+	{ ARLDCL, C_REG,	C_NONE,	C_LCON,	C_REG,		14, 4, 0 },
+
+	{ AFADD,	C_FREG,	C_NONE, C_NONE, 	C_FREG,		 2, 4, 0 },
+	{ AFADD,	C_FREG,	C_REG, C_NONE, 	C_FREG,		 2, 4, 0 },
+	{ AFABS,	C_FREG,	C_NONE, C_NONE, 	C_FREG,		33, 4, 0 },
+	{ AFABS,	C_NONE,	C_NONE, C_NONE, 	C_FREG,		33, 4, 0 },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_FREG,		33, 4, 0 },
+
+	{ AFMADD,	C_FREG,	C_REG, C_FREG, 	C_FREG,		 34, 4, 0 },
+	{ AFMUL,	C_FREG,	C_NONE, C_NONE, 	C_FREG,		 32, 4, 0 },
+	{ AFMUL,	C_FREG,	C_REG, C_NONE, 	C_FREG,		 32, 4, 0 },
+
+	/* store, short offset */
+	{ AMOVD,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVW,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVWZ,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVBZ,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVBZU,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVB,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVBU,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	 7, 4, REGZERO },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_SEXT,		 7, 4, REGSB },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_SEXT,		 7, 4, REGSB },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_SEXT,		 7, 4, REGSB },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_SEXT,		 7, 4, REGSB },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_SEXT,		 7, 4, REGSB },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_SAUTO,	 7, 4, REGSP },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_SAUTO,	 7, 4, REGSP },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_SAUTO,	 7, 4, REGSP },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_SAUTO,	 7, 4, REGSP },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_SAUTO,	 7, 4, REGSP },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVBZU,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+	{ AMOVBU,	C_REG,	C_NONE, C_NONE, 	C_SOREG,	 7, 4, REGZERO },
+
+	/* load, short offset */
+	{ AMOVD,	C_ZOREG,C_REG, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVW,	C_ZOREG,C_REG, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVWZ,	C_ZOREG,C_REG, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVBZ,	C_ZOREG,C_REG, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVBZU,	C_ZOREG,C_REG, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVB,	C_ZOREG,C_REG, C_NONE, 	C_REG,		9, 8, REGZERO },
+	{ AMOVBU,	C_ZOREG,C_REG, C_NONE, 	C_REG,		9, 8, REGZERO },
+	{ AMOVD,	C_SEXT,	C_NONE, C_NONE, 	C_REG,		 8, 4, REGSB },
+	{ AMOVW,	C_SEXT,	C_NONE, C_NONE, 	C_REG,		 8, 4, REGSB },
+	{ AMOVWZ,	C_SEXT,	C_NONE, C_NONE, 	C_REG,		 8, 4, REGSB },
+	{ AMOVBZ,	C_SEXT,	C_NONE, C_NONE, 	C_REG,		 8, 4, REGSB },
+	{ AMOVB,	C_SEXT,	C_NONE, C_NONE, 	C_REG,		9, 8, REGSB },
+	{ AMOVD,	C_SAUTO,C_NONE, C_NONE, 	C_REG,		 8, 4, REGSP },
+	{ AMOVW,	C_SAUTO,C_NONE, C_NONE, 	C_REG,		 8, 4, REGSP },
+	{ AMOVWZ,	C_SAUTO,C_NONE, C_NONE, 	C_REG,		 8, 4, REGSP },
+	{ AMOVBZ,	C_SAUTO,C_NONE, C_NONE, 	C_REG,		 8, 4, REGSP },
+	{ AMOVB,	C_SAUTO,C_NONE, C_NONE, 	C_REG,		9, 8, REGSP },
+	{ AMOVD,	C_SOREG,C_NONE, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVW,	C_SOREG,C_NONE, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVWZ,	C_SOREG,C_NONE, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVBZ,	C_SOREG,C_NONE, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVBZU,	C_SOREG,C_NONE, C_NONE, 	C_REG,		 8, 4, REGZERO },
+	{ AMOVB,	C_SOREG,C_NONE, C_NONE, 	C_REG,		9, 8, REGZERO },
+	{ AMOVBU,	C_SOREG,C_NONE, C_NONE, 	C_REG,		9, 8, REGZERO },
+
+	/* store, long offset */
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_LEXT,		35, 8, REGSB },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_LEXT,		35, 8, REGSB },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_LEXT,		35, 8, REGSB },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_LEXT,		35, 8, REGSB },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_LEXT,		35, 8, REGSB },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_LAUTO,	35, 8, REGSP },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_LAUTO,	35, 8, REGSP },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_LAUTO,	35, 8, REGSP },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_LAUTO,	35, 8, REGSP },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_LAUTO,	35, 8, REGSP },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_LOREG,	35, 8, REGZERO },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_LOREG,	35, 8, REGZERO },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_LOREG,	35, 8, REGZERO },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_LOREG,	35, 8, REGZERO },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+	{ AMOVBZ,	C_REG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+	{ AMOVB,	C_REG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+
+	/* load, long offset */
+	{ AMOVD,	C_LEXT,	C_NONE, C_NONE, 	C_REG,		36, 8, REGSB },
+	{ AMOVW,	C_LEXT,	C_NONE, C_NONE, 	C_REG,		36, 8, REGSB },
+	{ AMOVWZ,	C_LEXT,	C_NONE, C_NONE, 	C_REG,		36, 8, REGSB },
+	{ AMOVBZ,	C_LEXT,	C_NONE, C_NONE, 	C_REG,		36, 8, REGSB },
+	{ AMOVB,	C_LEXT,	C_NONE, C_NONE, 	C_REG,		37, 12, REGSB },
+	{ AMOVD,	C_LAUTO,C_NONE, C_NONE, 	C_REG,		36, 8, REGSP },
+	{ AMOVW,	C_LAUTO,C_NONE, C_NONE, 	C_REG,		36, 8, REGSP },
+	{ AMOVWZ,	C_LAUTO,C_NONE, C_NONE, 	C_REG,		36, 8, REGSP },
+	{ AMOVBZ,	C_LAUTO,C_NONE, C_NONE, 	C_REG,		36, 8, REGSP },
+	{ AMOVB,	C_LAUTO,C_NONE, C_NONE, 	C_REG,		37, 12, REGSP },
+	{ AMOVD,	C_LOREG,C_NONE, C_NONE, 	C_REG,		36, 8, REGZERO },
+	{ AMOVW,	C_LOREG,C_NONE, C_NONE, 	C_REG,		36, 8, REGZERO },
+	{ AMOVWZ,	C_LOREG,C_NONE, C_NONE, 	C_REG,		36, 8, REGZERO },
+	{ AMOVBZ,	C_LOREG,C_NONE, C_NONE, 	C_REG,		36, 8, REGZERO },
+	{ AMOVB,	C_LOREG,C_NONE, C_NONE, 	C_REG,		37, 12, REGZERO },
+	{ AMOVD,	C_ADDR,	C_NONE, C_NONE, 	C_REG,		75, 8, 0 },
+	{ AMOVW,	C_ADDR,	C_NONE, C_NONE, 	C_REG,		75, 8, 0 },
+	{ AMOVWZ,	C_ADDR,	C_NONE, C_NONE, 	C_REG,		75, 8, 0 },
+	{ AMOVBZ,	C_ADDR,	C_NONE, C_NONE, 	C_REG,		75, 8, 0 },
+	{ AMOVB,	C_ADDR,	C_NONE, C_NONE, 	C_REG,		76, 12, 0 },
+
+	/* load constant */
+	{ AMOVD,	C_SECON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSB },
+	{ AMOVD,	C_SACON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSP },
+	{ AMOVD,	C_LECON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSB }, 
+	{ AMOVD,	C_LACON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSP },
+	{ AMOVD,	C_ADDCON,C_NONE, C_NONE, C_REG,		 3, 4, REGZERO },
+	{ AMOVW,	C_SECON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSB },	/* TO DO: check */
+	{ AMOVW,	C_SACON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSP },
+	{ AMOVW,	C_LECON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSB }, 
+	{ AMOVW,	C_LACON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSP },
+	{ AMOVW,	C_ADDCON,C_NONE, C_NONE, C_REG,		 3, 4, REGZERO },
+	{ AMOVWZ,	C_SECON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSB },	/* TO DO: check */
+	{ AMOVWZ,	C_SACON,C_NONE, C_NONE, 	C_REG,		 3, 4, REGSP },
+	{ AMOVWZ,	C_LECON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSB }, 
+	{ AMOVWZ,	C_LACON,C_NONE, C_NONE, 	C_REG,		26, 8, REGSP },
+	{ AMOVWZ,	C_ADDCON,C_NONE, C_NONE, C_REG,		 3, 4, REGZERO },
+
+	/* load unsigned/long constants (TO DO: check) */
+	{ AMOVD,	C_UCON, C_NONE, C_NONE,  C_REG,		3, 4, REGZERO },
+	{ AMOVD,	C_LCON,	C_NONE, C_NONE, 	C_REG,		19, 8, 0 },
+	{ AMOVW,	C_UCON, C_NONE, C_NONE,  C_REG,		3, 4, REGZERO },
+	{ AMOVW,	C_LCON,	C_NONE, C_NONE, 	C_REG,		19, 8, 0 },
+	{ AMOVWZ,	C_UCON, C_NONE, C_NONE,  C_REG,		3, 4, REGZERO },
+	{ AMOVWZ,	C_LCON,	C_NONE, C_NONE, 	C_REG,		19, 8, 0 },
+
+	{ AMOVHBR,	C_ZOREG,	C_REG, C_NONE, C_REG,		45, 4, 0 },
+	{ AMOVHBR,	C_ZOREG, C_NONE, C_NONE, C_REG,	45, 4, 0 },
+	{ AMOVHBR,	C_REG,	C_REG, C_NONE,	C_ZOREG,		44, 4, 0 },
+	{ AMOVHBR,	C_REG,	C_NONE, C_NONE,	C_ZOREG,		44, 4, 0 },
+
+	{ ASYSCALL,	C_NONE,	C_NONE, C_NONE, 	C_NONE,		 5, 4, 0 },
+
+	{ ABEQ,		C_NONE,	C_NONE, C_NONE, 	C_SBRA,		16, 4, 0 },
+	{ ABEQ,		C_CREG,	C_NONE, C_NONE, 	C_SBRA,		16, 4, 0 },
+
+	{ ABR,		C_NONE,	C_NONE, C_NONE, 	C_LBRA,		11, 4, 0 },
+
+	{ ABC,		C_SCON,	C_REG, C_NONE, 	C_SBRA,		16, 4, 0 },
+	{ ABC,		C_SCON, C_REG, C_NONE, 	C_LBRA,		17, 4, 0 },
+
+	{ ABR,		C_NONE,	C_NONE, C_NONE, 	C_LR,		18, 4, 0 },
+	{ ABR,		C_NONE,	C_NONE, C_NONE, 	C_CTR,		18, 4, 0 },
+	{ ABR,		C_NONE,	C_NONE, C_NONE, 	C_ZOREG,		15, 8, 0 },
+
+	{ ABC,		C_NONE,	C_REG, C_NONE, 	C_LR,		18, 4, 0 },
+	{ ABC,		C_NONE,	C_REG, C_NONE, 	C_CTR,		18, 4, 0 },
+	{ ABC,		C_SCON,	C_REG, C_NONE, 	C_LR,		18, 4, 0 },
+	{ ABC,		C_SCON,	C_REG, C_NONE, 	C_CTR,		18, 4, 0 },
+	{ ABC,		C_NONE,	C_NONE, C_NONE, 	C_ZOREG,		15, 8, 0 },
+
+	{ AFMOVD,	C_SEXT,	C_NONE, C_NONE, 	C_FREG,		8, 4, REGSB },
+	{ AFMOVD,	C_SAUTO,C_NONE, C_NONE, 	C_FREG,		8, 4, REGSP },
+	{ AFMOVD,	C_SOREG,C_NONE, C_NONE, 	C_FREG,		8, 4, REGZERO },
+
+	{ AFMOVD,	C_LEXT,	C_NONE, C_NONE, 	C_FREG,		8, 4, REGSB },
+	{ AFMOVD,	C_LAUTO,C_NONE, C_NONE, 	C_FREG,		8, 4, REGSP },
+	{ AFMOVD,	C_LOREG,C_NONE, C_NONE, 	C_FREG,		8, 4, REGZERO },
+	{ AFMOVD,	C_ADDR,	C_NONE, C_NONE, 	C_FREG,		75, 8, 0 },
+
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_SEXT,		7, 4, REGSB },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_SAUTO,	7, 4, REGSP },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_SOREG,	7, 4, REGZERO },
+
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_LEXT,		7, 4, REGSB },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_LAUTO,	7, 4, REGSP },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_LOREG,	7, 4, REGZERO },
+	{ AFMOVD,	C_FREG,	C_NONE, C_NONE, 	C_ADDR,		74, 8, 0 },
+
+	{ ASYNC,		C_NONE,	C_NONE, C_NONE, 	C_NONE,		46, 4, 0 },
+	{ AWORD,	C_LCON,	C_NONE, C_NONE, 	C_NONE,		40, 4, 0 },
+	{ ADWORD,	C_LCON,	C_NONE, C_NONE, C_NONE,	31, 8, 0 },
+
+	{ AADDME,	C_REG,	C_NONE, C_NONE, 	C_REG,		47, 4, 0 },
+
+	{ AEXTSB,	C_REG,	C_NONE, C_NONE, 	C_REG,		48, 4, 0 },
+	{ AEXTSB,	C_NONE,	C_NONE, C_NONE, 	C_REG,		48, 4, 0 },
+
+	{ ANEG,		C_REG,	C_NONE, C_NONE, 	C_REG,		47, 4, 0 },
+	{ ANEG,		C_NONE,	C_NONE, C_NONE, 	C_REG,		47, 4, 0 },
+
+	{ AREM,		C_REG,	C_NONE, C_NONE, 	C_REG,		50, 12, 0 },
+	{ AREM,		C_REG,	C_REG, C_NONE, 	C_REG,		50, 12, 0 },
+	{ AREMD,		C_REG,	C_NONE, C_NONE, 	C_REG,		51, 12, 0 },
+	{ AREMD,		C_REG,	C_REG, C_NONE, 	C_REG,		51, 12, 0 },
+
+	{ AMTFSB0,	C_SCON,	C_NONE, C_NONE, 	C_NONE,		52, 4, 0 },
+	{ AMOVFL, C_FPSCR, C_NONE, C_NONE,	C_FREG,		53, 4, 0 },
+	{ AMOVFL, C_FREG, C_NONE, C_NONE,	C_FPSCR,		64, 4, 0 },
+	{ AMOVFL, C_FREG, C_NONE, C_LCON,	C_FPSCR,		64, 4, 0 },
+	{ AMOVFL,	C_LCON, C_NONE, C_NONE,	C_FPSCR,		65, 4, 0 },
+
+	{ AMOVD,	C_MSR,	C_NONE, C_NONE, 	C_REG,		54, 4, 0 },		/* mfmsr */
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_MSR,		54, 4, 0 },		/* mtmsrd */
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_MSR,		54, 4, 0 },		/* mtmsr */
+
+	/* 64-bit special registers */
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_SPR,		66, 4, 0 },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_LR,		66, 4, 0 },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_CTR,		66, 4, 0 },
+	{ AMOVD,	C_REG,	C_NONE, C_NONE, 	C_XER,		66, 4, 0 },
+	{ AMOVD,	C_SPR,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+	{ AMOVD,	C_LR,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+	{ AMOVD,	C_CTR,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+	{ AMOVD,	C_XER,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+
+	/* 32-bit special registers (gloss over sign-extension or not?) */
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_SPR,		66, 4, 0 },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_CTR,		66, 4, 0 },
+	{ AMOVW,	C_REG,	C_NONE, C_NONE, 	C_XER,		66, 4, 0 },
+	{ AMOVW,	C_SPR,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+	{ AMOVW,	C_XER,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_SPR,		66, 4, 0 },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_CTR,		66, 4, 0 },
+	{ AMOVWZ,	C_REG,	C_NONE, C_NONE, 	C_XER,		66, 4, 0 },
+	{ AMOVWZ,	C_SPR,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+	{ AMOVWZ,	C_XER,	C_NONE, C_NONE, 	C_REG,		66, 4, 0 },
+
+	{ AMOVFL,	C_FPSCR, C_NONE, C_NONE, 	C_CREG,		73, 4, 0 },
+	{ AMOVFL,	C_CREG,	C_NONE, C_NONE, 	C_CREG,		67, 4, 0 },
+	{ AMOVW,	C_CREG,	C_NONE,	C_NONE,		C_REG,		68, 4, 0 },
+	{ AMOVWZ,	C_CREG,	C_NONE,	C_NONE,		C_REG,		68, 4, 0 },
+	{ AMOVFL,	C_REG, C_NONE, C_LCON, C_CREG,		69, 4, 0 },
+	{ AMOVFL,	C_REG, C_NONE, C_NONE, C_CREG,		69, 4, 0 },
+	{ AMOVW,	C_REG, C_NONE, C_NONE, C_CREG,		69, 4, 0 },
+	{ AMOVWZ,	C_REG, C_NONE, C_NONE, C_CREG,		69, 4, 0 },
+
+	{ ACMP,	C_REG,	C_NONE, C_NONE, 	C_REG,	70, 4, 0 },
+	{ ACMP,	C_REG,	C_REG, C_NONE, 	C_REG,	70, 4, 0 },
+	{ ACMP,	C_REG,	C_NONE, C_NONE,	C_ADDCON,	71, 4, 0 },
+	{ ACMP,	C_REG,	C_REG, C_NONE,	C_ADDCON,	71, 4, 0 },
+
+	{ ACMPU,	C_REG,	C_NONE, C_NONE, 	C_REG,	70, 4, 0 },
+	{ ACMPU,	C_REG,	C_REG, C_NONE, 	C_REG,	70, 4, 0 },
+	{ ACMPU,	C_REG,	C_NONE, C_NONE,	C_ANDCON,	71, 4, 0 },
+	{ ACMPU,	C_REG,	C_REG, C_NONE,	C_ANDCON,	71, 4, 0 },
+
+	{ AFCMPO,	C_FREG,	C_NONE, C_NONE, 	C_FREG,	70, 4, 0 },
+	{ AFCMPO,	C_FREG,	C_REG, C_NONE, 	C_FREG,	70, 4, 0 },
+
+	{ ATW,		C_LCON,	C_REG, C_NONE, 	C_REG,		60, 4, 0 },
+	{ ATW,		C_LCON,	C_REG, C_NONE, 	C_ADDCON,	61, 4, 0 },
+
+	{ ADCBF,	C_ZOREG, C_NONE, C_NONE,  C_NONE,	43, 4, 0 },
+	{ ADCBF,	C_ZOREG, C_REG, C_NONE,  C_NONE,	43, 4, 0 },
+
+	{ AECOWX,	C_REG,	C_REG, C_NONE, 	C_ZOREG,	44, 4, 0 },
+	{ AECIWX,	C_ZOREG, C_REG, C_NONE,  C_REG,		45, 4, 0 },
+	{ AECOWX,	C_REG,	C_NONE, C_NONE, 	C_ZOREG,	44, 4, 0 },
+	{ AECIWX,	C_ZOREG, C_NONE, C_NONE,  C_REG,		45, 4, 0 },
+
+	{ AEIEIO,	C_NONE,	C_NONE, C_NONE, 	C_NONE,		46, 4, 0 },
+	{ ATLBIE,	C_REG, C_NONE, C_NONE,		C_NONE,		49, 4, 0 },
+	{ ATLBIE,	C_SCON, C_NONE, C_NONE,	C_REG,	49, 4, 0 },
+	{ ASLBMFEE, C_REG, C_NONE, C_NONE,	C_REG,	55, 4, 0 },
+	{ ASLBMTE, C_REG, C_NONE, C_NONE,	C_REG,	55, 4, 0 },
+
+	{ ASTSW,	C_REG,	C_NONE, C_NONE, 	C_ZOREG,	44, 4, 0 },
+	{ ASTSW,	C_REG,	C_NONE, C_LCON, 	C_ZOREG,	41, 4, 0 },
+	{ ALSW,	C_ZOREG, C_NONE, C_NONE,  C_REG,		45, 4, 0 },
+	{ ALSW,	C_ZOREG, C_NONE, C_LCON,  C_REG,		42, 4, 0 },
+
+	{ AXXX,		C_NONE,	C_NONE, C_NONE, 	C_NONE,		 0, 4, 0 },
+};

+ 666 - 0
sys/src/cmd/9l/pass.c

@@ -0,0 +1,666 @@
+#include	"l.h"
+
+void
+dodata(void)
+{
+	int i, t;
+	Sym *s;
+	Prog *p, *p1;
+	vlong orig, orig1, v;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f dodata\n", cputime());
+	Bflush(&bso);
+	for(p = datap; p != P; p = p->link) {
+		s = p->from.sym;
+		if(p->as == ADYNT || p->as == AINIT)
+			s->value = dtype;
+		if(s->type == SBSS)
+			s->type = SDATA;
+		if(s->type != SDATA)
+			diag("initialize non-data (%d): %s\n%P",
+				s->type, s->name, p);
+		v = p->from.offset + p->reg;
+		if(v > s->value)
+			diag("initialize bounds (%lld): %s\n%P",
+				s->value, s->name, p);
+	}
+
+	/*
+	 * pass 1
+	 *	assign 'small' variables to data segment
+	 *	(rational is that data segment is more easily
+	 *	 addressed through offset on REGSB)
+	 */
+	orig = 0;
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		t = s->type;
+		if(t != SDATA && t != SBSS)
+			continue;
+		v = s->value;
+		if(v == 0) {
+			diag("%s: no size", s->name);
+			v = 1;
+		}
+		v = rnd(v, 4);
+		s->value = v;
+		if(v > MINSIZ)
+			continue;
+		if(v >= 8)
+			orig = rnd(orig, 8);
+		s->value = orig;
+		orig += v;
+		s->type = SDATA1;
+	}
+	orig1 = orig;
+
+	/*
+	 * pass 2
+	 *	assign 'data' variables to data segment
+	 */
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		t = s->type;
+		if(t != SDATA) {
+			if(t == SDATA1)
+				s->type = SDATA;
+			continue;
+		}
+		v = s->value;
+		if(v >= 8)
+			orig = rnd(orig, 8);
+		s->value = orig;
+		orig += v;
+		s->type = SDATA1;
+	}
+
+	if(orig)
+		orig = rnd(orig, 8);
+	datsize = orig;
+
+	/*
+	 * pass 3
+	 *	everything else to bss segment
+	 */
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type != SBSS)
+			continue;
+		v = s->value;
+		if(v >= 8)
+			orig = rnd(orig, 8);
+		s->value = orig;
+		orig += v;
+	}
+	if(orig)
+		orig = rnd(orig, 8);
+	bsssize = orig-datsize;
+
+	/*
+	 * pass 4
+	 *	add literals to all large values.
+	 *	at this time:
+	 *		small data is allocated DATA
+	 *		large data is allocated DATA1
+	 *		large bss is allocated BSS
+	 *	the new literals are loaded between
+	 *	small data and large data.
+	 */
+	orig = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as != AMOVW)
+			continue;
+		if(p->from.type != D_CONST)
+			continue;
+		if(s = p->from.sym) {
+			t = s->type;
+			if(t != SDATA && t != SDATA1 && t != SBSS)
+				continue;
+			t = p->from.name;
+			if(t != D_EXTERN && t != D_STATIC)
+				continue;
+			v = s->value + p->from.offset;
+			if(v >= 0 && v <= 0xffff)
+				continue;
+			if(!strcmp(s->name, "setSB"))
+				continue;
+			/* size should be 19 max */
+			if(strlen(s->name) >= 10)	/* has loader address */ 
+				sprint(literal, "$%p.%llux", s, p->from.offset);
+			else
+				sprint(literal, "$%s.%d.%llux", s->name, s->version, p->from.offset);
+		} else {
+			if(p->from.name != D_NONE)
+				continue;
+			if(p->from.reg != NREG)
+				continue;
+			v = p->from.offset;
+			if(v >= -0x7fff-1 && v <= 0x7fff)
+				continue;
+			if(!(v & 0xffff))
+				continue;
+			if(v)
+				continue;	/* quicker to build it than load it */
+			/* size should be 9 max */
+			sprint(literal, "$%llux", v);
+		}
+		s = lookup(literal, 0);
+		if(s->type == 0) {
+			s->type = SDATA;
+			s->value = orig1+orig;
+			orig += 4;
+			p1 = prg();
+			p1->as = ADATA;
+			p1->line = p->line;
+			p1->from.type = D_OREG;
+			p1->from.sym = s;
+			p1->from.name = D_EXTERN;
+			p1->reg = 4;
+			p1->to = p->from;
+			p1->link = datap;
+			datap = p1;
+		}
+		if(s->type != SDATA)
+			diag("literal not data: %s", s->name);
+		p->from.type = D_OREG;
+		p->from.sym = s;
+		p->from.name = D_EXTERN;
+		p->from.offset = 0;
+		continue;
+	}
+	while(orig & 7)
+		orig++;
+	/*
+	 * pass 5
+	 *	re-adjust offsets
+	 */
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		t = s->type;
+		if(t == SBSS) {
+			s->value += orig;
+			continue;
+		}
+		if(t == SDATA1) {
+			s->type = SDATA;
+			s->value += orig;
+			continue;
+		}
+	}
+	datsize += orig;
+	xdefine("setSB", SDATA, 0+BIG);
+	xdefine("bdata", SDATA, 0);
+	xdefine("edata", SDATA, datsize);
+	xdefine("end", SBSS, datsize+bsssize);
+	xdefine("etext", STEXT, 0);
+}
+
+void
+undef(void)
+{
+	int i;
+	Sym *s;
+
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link)
+		if(s->type == SXREF)
+			diag("%s: not defined", s->name);
+}
+
+int
+relinv(int a)
+{
+
+	switch(a) {
+	case ABEQ:	return ABNE;
+	case ABNE:	return ABEQ;
+
+	case ABGE:	return ABLT;
+	case ABLT:	return ABGE;
+
+	case ABGT:	return ABLE;
+	case ABLE:	return ABGT;
+
+	case ABVC:	return ABVS;
+	case ABVS:	return ABVC;
+	}
+	return 0;
+}
+
+void
+follow(void)
+{
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f follow\n", cputime());
+	Bflush(&bso);
+
+	firstp = prg();
+	lastp = firstp;
+
+	xfol(textp);
+
+	firstp = firstp->link;
+	lastp->link = P;
+}
+
+void
+xfol(Prog *p)
+{
+	Prog *q, *r;
+	int a, b, i;
+
+loop:
+	if(p == P)
+		return;
+	a = p->as;
+	if(a == ATEXT)
+		curtext = p;
+	if(a == ABR) {
+		q = p->cond;
+		if((p->mark&NOSCHED) || q && (q->mark&NOSCHED)){
+			p->mark |= FOLL;
+			lastp->link = p;
+			lastp = p;
+			p = p->link;
+			xfol(p);
+			p = q;
+			if(p && !(p->mark & FOLL))
+				goto loop;
+			return;
+		}
+		if(q != P) {
+			p->mark |= FOLL;
+			p = q;
+			if(!(p->mark & FOLL))
+				goto loop;
+		}
+	}
+	if(p->mark & FOLL) {
+		for(i=0,q=p; i<4; i++,q=q->link) {
+			if(q == lastp || (q->mark&NOSCHED))
+				break;
+			b = 0;		/* set */
+			a = q->as;
+			if(a == ANOP) {
+				i--;
+				continue;
+			}
+			if(a == ABR || a == ARETURN || a == ARFI || a == ARFCI || a == ARFID || a == AHRFID)
+				goto copy;
+			if(!q->cond || (q->cond->mark&FOLL))
+				continue;
+			b = relinv(a);
+			if(!b)
+				continue;
+		copy:
+			for(;;) {
+				r = prg();
+				*r = *p;
+				if(!(r->mark&FOLL))
+					print("cant happen 1\n");
+				r->mark |= FOLL;
+				if(p != q) {
+					p = p->link;
+					lastp->link = r;
+					lastp = r;
+					continue;
+				}
+				lastp->link = r;
+				lastp = r;
+				if(a == ABR || a == ARETURN || a == ARFI || a == ARFCI || a == ARFID || a == AHRFID)
+					return;
+				r->as = b;
+				r->cond = p->link;
+				r->link = p->cond;
+				if(!(r->link->mark&FOLL))
+					xfol(r->link);
+				if(!(r->cond->mark&FOLL))
+					print("cant happen 2\n");
+				return;
+			}
+		}
+
+		a = ABR;
+		q = prg();
+		q->as = a;
+		q->line = p->line;
+		q->to.type = D_BRANCH;
+		q->to.offset = p->pc;
+		q->cond = p;
+		p = q;
+	}
+	p->mark |= FOLL;
+	lastp->link = p;
+	lastp = p;
+	if(a == ABR || a == ARETURN || a == ARFI || a == ARFCI || a == ARFID || a == AHRFID){
+		if(p->mark & NOSCHED){
+			p = p->link;
+			goto loop;
+		}
+		return;
+	}
+	if(p->cond != P)
+	if(a != ABL && p->link != P) {
+		xfol(p->link);
+		p = p->cond;
+		if(p == P || (p->mark&FOLL))
+			return;
+		goto loop;
+	}
+	p = p->link;
+	goto loop;
+}
+
+void
+patch(void)
+{
+	long c;
+	Prog *p, *q;
+	Sym *s;
+	int a;
+	vlong vexit;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f patch\n", cputime());
+	Bflush(&bso);
+	mkfwd();
+	s = lookup("exit", 0);
+	vexit = s->value;
+	for(p = firstp; p != P; p = p->link) {
+		a = p->as;
+		if(a == ATEXT)
+			curtext = p;
+		if((a == ABL || a == ARETURN) && p->to.sym != S) {
+			s = p->to.sym;
+			if(s->type != STEXT && s->type != SUNDEF) {
+				diag("undefined: %s\n%P", s->name, p);
+				s->type = STEXT;
+				s->value = vexit;
+			}
+			if(s->type == SUNDEF){
+				p->to.offset = 0;
+				p->cond = UP;
+			}
+			else
+				p->to.offset = s->value;
+			p->to.type = D_BRANCH;
+		}
+		if(p->to.type != D_BRANCH || p->cond == UP)
+			continue;
+		c = p->to.offset;
+		for(q = firstp; q != P;) {
+			if(q->forwd != P)
+			if(c >= q->forwd->pc) {
+				q = q->forwd;
+				continue;
+			}
+			if(c == q->pc)
+				break;
+			q = q->link;
+		}
+		if(q == P) {
+			diag("branch out of range %ld\n%P", c, p);
+			p->to.type = D_NONE;
+		}
+		p->cond = q;
+	}
+
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		p->mark = 0;	/* initialization for follow */
+		if(p->cond != P && p->cond != UP) {
+			p->cond = brloop(p->cond);
+			if(p->cond != P)
+			if(p->to.type == D_BRANCH)
+				p->to.offset = p->cond->pc;
+		}
+	}
+}
+
+#define	LOG	5
+void
+mkfwd(void)
+{
+	Prog *p;
+	long dwn[LOG], cnt[LOG], i;
+	Prog *lst[LOG];
+
+	for(i=0; i<LOG; i++) {
+		if(i == 0)
+			cnt[i] = 1; else
+			cnt[i] = LOG * cnt[i-1];
+		dwn[i] = 1;
+		lst[i] = P;
+	}
+	i = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		i--;
+		if(i < 0)
+			i = LOG-1;
+		p->forwd = P;
+		dwn[i]--;
+		if(dwn[i] <= 0) {
+			dwn[i] = cnt[i];
+			if(lst[i] != P)
+				lst[i]->forwd = p;
+			lst[i] = p;
+		}
+	}
+}
+
+Prog*
+brloop(Prog *p)
+{
+	Prog *q;
+	int c;
+
+	for(c=0; p!=P;) {
+		if(p->as != ABR || (p->mark&NOSCHED))
+			return p;
+		q = p->cond;
+		if(q <= p) {
+			c++;
+			if(q == p || c > 5000)
+				break;
+		}
+		p = q;
+	}
+	return P;
+}
+
+vlong
+atolwhex(char *s)
+{
+	vlong n;
+	int f;
+
+	n = 0;
+	f = 0;
+	while(*s == ' ' || *s == '\t')
+		s++;
+	if(*s == '-' || *s == '+') {
+		if(*s++ == '-')
+			f = 1;
+		while(*s == ' ' || *s == '\t')
+			s++;
+	}
+	if(s[0]=='0' && s[1]){
+		if(s[1]=='x' || s[1]=='X'){
+			s += 2;
+			for(;;){
+				if(*s >= '0' && *s <= '9')
+					n = n*16 + *s++ - '0';
+				else if(*s >= 'a' && *s <= 'f')
+					n = n*16 + *s++ - 'a' + 10;
+				else if(*s >= 'A' && *s <= 'F')
+					n = n*16 + *s++ - 'A' + 10;
+				else
+					break;
+			}
+		} else
+			while(*s >= '0' && *s <= '7')
+				n = n*8 + *s++ - '0';
+	} else
+		while(*s >= '0' && *s <= '9')
+			n = n*10 + *s++ - '0';
+	if(f)
+		n = -n;
+	return n;
+}
+
+vlong
+rnd(vlong v, long r)
+{
+	vlong c;
+
+	if(r <= 0)
+		return v;
+	v += r - 1;
+	c = v % r;
+	if(c < 0)
+		c += r;
+	v -= c;
+	return v;
+}
+
+void
+import(void)
+{
+	int i;
+	Sym *s;
+
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->sig != 0 && s->type == SXREF && (nimports == 0 || s->subtype == SIMPORT)){
+				undefsym(s);
+				Bprint(&bso, "IMPORT: %s sig=%lux v=%lld\n", s->name, s->sig, s->value);
+				if(debug['S'])
+					s->sig = 0;
+			}
+}
+
+void
+ckoff(Sym *s, vlong v)
+{
+	if(v < 0 || v >= 1<<Roffset)
+		diag("relocation offset %lld for %s out of range", v, s->name);
+}
+
+static Prog*
+newdata(Sym *s, int o, int w, int t)
+{
+	Prog *p;
+
+	p = prg();
+	p->link = datap;
+	datap = p;
+	p->as = ADATA;
+	p->reg = w;
+	p->from.type = D_OREG;
+	p->from.name = t;
+	p->from.sym = s;
+	p->from.offset = o;
+	p->to.type = D_CONST;
+	p->to.name = D_NONE;
+	return p;
+}
+
+void
+export(void)
+{
+	int i, j, n, off, nb, sv, ne;
+	Sym *s, *et, *str, **esyms;
+	Prog *p;
+	char buf[NSNAME], *t;
+
+	n = 0;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->type != SXREF && s->type != SUNDEF && (nexports == 0 && s->sig != 0 || s->subtype == SEXPORT || allexport))
+				n++;
+	esyms = malloc(n*sizeof(Sym*));
+	ne = n;
+	n = 0;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->type != SXREF && s->type != SUNDEF && (nexports == 0 && s->sig != 0 || s->subtype == SEXPORT || allexport))
+				esyms[n++] = s;
+	for(i = 0; i < ne-1; i++)
+		for(j = i+1; j < ne; j++)
+			if(strcmp(esyms[i]->name, esyms[j]->name) > 0){
+				s = esyms[i];
+				esyms[i] = esyms[j];
+				esyms[j] = s;
+			}
+
+	nb = 0;
+	off = 0;
+	et = lookup(EXPTAB, 0);
+	if(et->type != 0 && et->type != SXREF)
+		diag("%s already defined", EXPTAB);
+	et->type = SDATA;
+	str = lookup(".string", 0);
+	if(str->type == 0)
+		str->type = SDATA;
+	sv = str->value;
+	for(i = 0; i < ne; i++){
+		s = esyms[i];
+		Bprint(&bso, "EXPORT: %s sig=%lux t=%d\n", s->name, s->sig, s->type);
+
+		/* signature */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+		p->to.offset = s->sig;
+
+		/* address */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);		/* TO DO: bug */
+		p->to.name = D_EXTERN;
+		p->to.sym = s;
+
+		/* string */
+		t = s->name;
+		n = strlen(t)+1;
+		for(;;){
+			buf[nb++] = *t;
+			sv++;
+			if(nb >= NSNAME){
+				p = newdata(str, sv-NSNAME, NSNAME, D_STATIC);
+				p->to.type = D_SCONST;
+				memmove(p->to.sval, buf, NSNAME);
+				nb = 0;
+			}
+			if(*t++ == 0)
+				break;
+		}
+
+		/* name */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+		p->to.name = D_STATIC;
+		p->to.sym = str;
+		p->to.offset = sv-n;
+	}
+
+	if(nb > 0){
+		p = newdata(str, sv-nb, nb, D_STATIC);
+		p->to.type = D_SCONST;
+		memmove(p->to.sval, buf, nb);
+	}
+
+	for(i = 0; i < 3; i++){
+		newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+	}
+	et->value = off;
+	if(sv == 0)
+		sv = 1;
+	str->value = sv;
+	exports = ne;
+	free(esyms);
+}

+ 804 - 0
sys/src/cmd/9l/sched.c

@@ -0,0 +1,804 @@
+#include	"l.h"
+
+enum
+{
+	E_ICC	= 1<<0,
+	E_FCC	= 1<<1,
+	E_MEM	= 1<<2,
+	E_MEMSP	= 1<<3,	/* uses offset and size */
+	E_MEMSB	= 1<<4,	/* uses offset and size */
+	E_LR	= 1<<5,
+	E_CR = 1<<6,
+	E_CTR = 1<<7,
+	E_XER = 1<<8,
+
+	E_CR0 = 0xF<<0,
+	E_CR1 = 0xF<<4,
+
+	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
+	ALL	= ~0,
+};
+
+typedef	struct	Sch	Sch;
+typedef	struct	Dep	Dep;
+
+struct	Dep
+{
+	ulong	ireg;
+	ulong	freg;
+	ulong	cc;
+	ulong	cr;
+};
+struct	Sch
+{
+	Prog	p;
+	Dep	set;
+	Dep	used;
+	long	soffset;
+	char	size;
+	char	comp;
+};
+
+void	regused(Sch*, Prog*);
+int	depend(Sch*, Sch*);
+int	conflict(Sch*, Sch*);
+int	offoverlap(Sch*, Sch*);
+void	dumpbits(Sch*, Dep*);
+
+void
+sched(Prog *p0, Prog *pe)
+{
+	Prog *p, *q;
+	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
+
+	if(!debug['Q'])
+		return;
+	/*
+	 * build side structure
+	 */
+	s = sch;
+	for(p=p0;; p=p->link) {
+		memset(s, 0, sizeof(*s));
+		s->p = *p;
+		regused(s, p);
+		if(debug['X']) {
+			Bprint(&bso, "%P\tset", &s->p);
+			dumpbits(s, &s->set);
+			Bprint(&bso, "; used");
+			dumpbits(s, &s->used);
+			if(s->comp)
+				Bprint(&bso, "; compound");
+			if(s->p.mark & LOAD)
+				Bprint(&bso, "; load");
+			if(s->p.mark & BRANCH)
+				Bprint(&bso, "; branch");
+			if(s->p.mark & FCMP)
+				Bprint(&bso, "; fcmp");
+			Bprint(&bso, "\n");
+		}
+		s++;
+		if(p == pe)
+			break;
+	}
+	se = s;
+
+	for(s=se-1; s>=sch; s--) {
+
+		/*
+		 * load delay. interlocked.
+		 */
+		if(s->p.mark & LOAD) {
+			if(s >= se-1)
+				continue;
+			if(!conflict(s, (s+1)))
+				continue;
+			/*
+			 * s is load, s+1 is immediate use of result
+			 * t is the trial instruction to insert between s and s+1
+			 */
+			for(t=s-1; t>=sch; t--) {
+				if(t->p.mark & BRANCH)
+					goto no2;
+				if(t->p.mark & FCMP)
+					if((s+1)->p.mark & BRANCH)
+						goto no2;
+				if(t->p.mark & LOAD)
+					if(conflict(t, (s+1)))
+						goto no2;
+				for(u=t+1; u<=s; u++)
+					if(depend(u, t))
+						goto no2;
+				goto out2;
+			no2:;
+			}
+			if(debug['X'])
+				Bprint(&bso, "?l%P\n", &s->p);
+			continue;
+		out2:
+			if(debug['X']) {
+				Bprint(&bso, "!l%P\n", &t->p);
+				Bprint(&bso, "%P\n", &s->p);
+			}
+			stmp = *t;
+			memmove(t, t+1, (uchar*)s - (uchar*)t);
+			*s = stmp;
+			s--;
+			continue;
+		}
+
+		/*
+		 * fop2 delay.
+		 */
+		if(s->p.mark & FCMP) {
+			if(s >= se-1)
+				continue;
+			if(!((s+1)->p.mark & BRANCH))
+				continue;
+			/* t is the trial instruction to use */
+			for(t=s-1; t>=sch; t--) {
+				for(u=t+1; u<=s; u++)
+					if(depend(u, t))
+						goto no3;
+				goto out3;
+			no3:;
+			}
+			if(debug['X'])
+				Bprint(&bso, "?f%P\n", &s->p);
+			continue;
+		out3:
+			if(debug['X']) {
+				Bprint(&bso, "!f%P\n", &t->p);
+				Bprint(&bso, "%P\n", &s->p);
+			}
+			stmp = *t;
+			memmove(t, t+1, (uchar*)s - (uchar*)t);
+			*s = stmp;
+			s--;
+			continue;
+		}
+	}
+
+	/*
+	 * put it all back
+	 */
+	for(s=sch, p=p0; s<se; s++, p=q) {
+		q = p->link;
+		if(q != s->p.link) {
+			*p = s->p;
+			p->link = q;
+		}
+	}
+	if(debug['X'])
+		Bprint(&bso, "\n");
+}
+
+void
+regused(Sch *s, Prog *realp)
+{
+	int c, ar, ad, ld, sz, nr, upd;
+	ulong m;
+	Prog *p;
+
+	p = &s->p;
+	s->comp = compound(p);
+	if(s->comp) {
+		s->set.ireg |= 1<<REGTMP;
+		s->used.ireg |= 1<<REGTMP;
+	}
+	ar = 0;		/* dest is really reference */
+	ad = 0;		/* source/dest is really address */
+	ld = 0;		/* opcode is load instruction */
+	sz = 32*4;		/* size of load/store for overlap computation */
+	nr = 0;	/* source/dest is not really reg */
+	upd = 0;	/* move with update; changes reg */
+
+/*
+ * flags based on opcode
+ */
+	switch(p->as) {
+	case ATEXT:
+		curtext = realp;
+		autosize = p->to.offset + 8;
+		ad = 1;
+		break;
+	case ABL:
+		s->set.cc |= E_LR;
+		ar = 1;
+		ad = 1;
+		break;
+	case ABR:
+		ar = 1;
+		ad = 1;
+		break;
+	case ACMP:
+	case ACMPU:
+	case ACMPW:
+	case ACMPWU:
+		s->set.cc |= E_ICC;
+		if(p->reg == 0)
+			s->set.cr |= E_CR0;
+		else
+			s->set.cr |= (0xF<<((p->reg&7)*4));
+		ar = 1;
+		break;
+	case AFCMPO:
+	case AFCMPU:
+		s->set.cc |= E_FCC;
+		if(p->reg == 0)
+			s->set.cr |= E_CR0;
+		else
+			s->set.cr |= (0xF<<((p->reg&7)*4));
+		ar = 1;
+		break;
+	case ACRAND:
+	case ACRANDN:
+	case ACREQV:
+	case ACRNAND:
+	case ACRNOR:
+	case ACROR:
+	case ACRORN:
+	case ACRXOR:
+		s->used.cr |= 1<<p->from.reg;
+		s->set.cr |= 1<<p->to.reg;
+		nr = 1;
+		break;
+	case ABCL:	/* tricky */
+		s->used.cc |= E_FCC|E_ICC;
+		s->used.cr = ALL;
+		s->set.cc |= E_LR;
+		ar = 1;
+		break;
+	case ABC:		/* tricky */
+		s->used.cc |= E_FCC|E_ICC;
+		s->used.cr = ALL;
+		ar = 1;
+		break;
+	case ABEQ:
+	case ABGE:
+	case ABGT:
+	case ABLE:
+	case ABLT:
+	case ABNE:
+	case ABVC:
+	case ABVS:
+		s->used.cc |= E_ICC;
+		s->used.cr |= E_CR0;
+		ar = 1;
+		break;
+	case ALSW:
+	case AMOVMW:
+		/* could do better */
+		sz = 32*4;
+		ld = 1;
+		break;
+	case AMOVBU:
+	case AMOVBZU:
+		upd = 1;
+		sz = 1;
+		ld = 1;
+		break;
+	case AMOVB:
+	case AMOVBZ:
+		sz = 1;
+		ld = 1;
+		break;
+	case AMOVHU:
+	case AMOVHZU:
+		upd = 1;
+		sz = 2;
+		ld = 1;
+		break;
+	case AMOVH:
+	case AMOVHBR:
+	case AMOVHZ:
+		sz = 2;
+		ld = 1;
+		break;
+	case AFMOVSU:
+	case AMOVWU:
+	case AMOVWZU:
+		upd = 1;
+		sz = 4;
+		ld = 1;
+		break;
+	case AFMOVS:
+	case AMOVW:
+	case AMOVWZ:
+	case AMOVWBR:
+	case ALWAR:
+		sz = 4;
+		ld = 1;
+		break;
+	case AFMOVDU:
+		upd = 1;
+		sz = 8;
+		ld = 1;
+		break;
+	case AFMOVD:
+		sz = 8;
+		ld = 1;
+		break;
+	case AFMOVDCC:
+		sz = 8;
+		ld = 1;
+		s->set.cc |= E_FCC;
+		s->set.cr |= E_CR1;
+		break;
+	case AMOVFL:
+	case AMOVCRFS:
+	case AMTFSB0:
+	case AMTFSB0CC:
+	case AMTFSB1:
+	case AMTFSB1CC:
+		s->set.ireg = ALL;
+		s->set.freg = ALL;
+		s->set.cc = ALL;
+		s->set.cr = ALL;
+		break;
+	case AADDCC:
+	case AADDVCC:
+	case AADDCCC:
+	case AADDCVCC:
+	case AADDMECC:
+	case AADDMEVCC:
+	case AADDECC:
+	case AADDEVCC:
+	case AADDZECC:
+	case AADDZEVCC:
+	case AANDCC:
+	case AANDNCC:
+	case ACNTLZWCC:
+	case ADIVWCC:
+	case ADIVWVCC:
+	case ADIVWUCC:
+	case ADIVWUVCC:
+	case AEQVCC:
+	case AEXTSBCC:
+	case AEXTSHCC:
+	case AMULHWCC:
+	case AMULHWUCC:
+	case AMULLWCC:
+	case AMULLWVCC:
+	case ANANDCC:
+	case ANEGCC:
+	case ANEGVCC:
+	case ANORCC:
+	case AORCC:
+	case AORNCC:
+	case AREMCC:
+	case AREMVCC:
+	case AREMUCC:
+	case AREMUVCC:
+	case ARLWMICC:
+	case ARLWNMCC:
+	case ASLWCC:
+	case ASRAWCC:
+	case ASRWCC:
+	case ASTWCCC:
+	case ASUBCC:
+	case ASUBVCC:
+	case ASUBCCC:
+	case ASUBCVCC:
+	case ASUBMECC:
+	case ASUBMEVCC:
+	case ASUBECC:
+	case ASUBEVCC:
+	case ASUBZECC:
+	case ASUBZEVCC:
+	case AXORCC:
+		s->set.cc |= E_ICC;
+		s->set.cr |= E_CR0;
+		break;
+	case AFABSCC:
+	case AFADDCC:
+	case AFADDSCC:
+	case AFCTIWCC:
+	case AFCTIWZCC:
+	case AFDIVCC:
+	case AFDIVSCC:
+	case AFMADDCC:
+	case AFMADDSCC:
+	case AFMSUBCC:
+	case AFMSUBSCC:
+	case AFMULCC:
+	case AFMULSCC:
+	case AFNABSCC:
+	case AFNEGCC:
+	case AFNMADDCC:
+	case AFNMADDSCC:
+	case AFNMSUBCC:
+	case AFNMSUBSCC:
+	case AFRSPCC:
+	case AFSUBCC:
+	case AFSUBSCC:
+		s->set.cc |= E_FCC;
+		s->set.cr |= E_CR1;
+		break;
+	}
+
+/*
+ * flags based on 'to' field
+ */
+	c = p->to.class;
+	if(c == 0) {
+		c = aclass(&p->to) + 1;
+		p->to.class = c;
+	}
+	c--;
+	switch(c) {
+	default:
+		print("unknown class %d %D\n", c, &p->to);
+
+	case C_NONE:
+	case C_ZCON:
+	case C_SCON:
+	case C_UCON:
+	case C_LCON:
+	case C_ADDCON:
+	case C_ANDCON:
+	case C_SBRA:
+	case C_LBRA:
+		break;
+	case C_CREG:
+		c = p->to.reg;
+		if(c == NREG)
+			s->set.cr = ALL;
+		else
+			s->set.cr |= (0xF << ((p->from.reg&7)*4));
+		s->set.cc = ALL;
+		break;
+	case C_SPR:
+	case C_FPSCR:
+	case C_MSR:
+	case C_XER:
+		s->set.ireg = ALL;
+		s->set.freg = ALL;
+		s->set.cc = ALL;
+		s->set.cr = ALL;
+		break;
+	case C_LR:
+		s->set.cc |= E_LR;
+		break;
+	case C_CTR:
+		s->set.cc |= E_CTR;
+		break;
+	case C_ZOREG:
+	case C_SOREG:
+	case C_LOREG:
+		c = p->to.reg;
+		s->used.ireg |= 1<<c;
+		if(upd)
+			s->set.ireg |= 1<<c;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->to);
+
+		m = ANYMEM;
+		if(c == REGSB)
+			m = E_MEMSB;
+		if(c == REGSP)
+			m = E_MEMSP;
+
+		if(ar)
+			s->used.cc |= m;
+		else
+			s->set.cc |= m;
+		break;
+	case C_SACON:
+	case C_LACON:
+		s->used.ireg |= 1<<REGSP;
+		if(upd)
+			s->set.ireg |= 1<<c;
+		break;
+	case C_SECON:
+	case C_LECON:
+		s->used.ireg |= 1<<REGSB;
+		if(upd)
+			s->set.ireg |= 1<<c;
+		break;
+	case C_REG:
+		if(nr)
+			break;
+		if(ar)
+			s->used.ireg |= 1<<p->to.reg;
+		else
+			s->set.ireg |= 1<<p->to.reg;
+		break;
+	case C_FREG:
+		if(ar)
+			s->used.freg |= 1<<p->to.reg;
+		else
+			s->set.freg |= 1<<p->to.reg;
+		break;
+	case C_SAUTO:
+	case C_LAUTO:
+		s->used.ireg |= 1<<REGSP;
+		if(upd)
+			s->set.ireg |= 1<<c;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->to);
+
+		if(ar)
+			s->used.cc |= E_MEMSP;
+		else
+			s->set.cc |= E_MEMSP;
+		break;
+	case C_SEXT:
+	case C_LEXT:
+		s->used.ireg |= 1<<REGSB;
+		if(upd)
+			s->set.ireg |= 1<<c;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->to);
+
+		if(ar)
+			s->used.cc |= E_MEMSB;
+		else
+			s->set.cc |= E_MEMSB;
+		break;
+	}
+
+/*
+ * flags based on 'from' field
+ */
+	c = p->from.class;
+	if(c == 0) {
+		c = aclass(&p->from) + 1;
+		p->from.class = c;
+	}
+	c--;
+	switch(c) {
+	default:
+		print("unknown class %d %D\n", c, &p->from);
+
+	case C_NONE:
+	case C_ZCON:
+	case C_SCON:
+	case C_UCON:
+	case C_LCON:
+	case C_ADDCON:
+	case C_ANDCON:
+	case C_SBRA:
+	case C_LBRA:
+		c = p->from.reg;
+		if(c != NREG)
+			s->used.ireg |= 1<<c;
+		break;
+	case C_CREG:
+		c = p->from.reg;
+		if(c == NREG)
+			s->used.cr = ALL;
+		else
+			s->used.cr |= (0xF << ((p->from.reg&7)*4));
+		s->used.cc = ALL;
+		break;
+	case C_SPR:
+	case C_FPSCR:
+	case C_MSR:
+	case C_XER:
+		s->set.ireg = ALL;
+		s->set.freg = ALL;
+		s->set.cc = ALL;
+		s->set.cr = ALL;
+		break;
+	case C_LR:
+		s->used.cc |= E_LR;
+		break;
+	case C_CTR:
+		s->used.cc |= E_CTR;
+		break;
+	case C_ZOREG:
+	case C_SOREG:
+	case C_LOREG:
+		c = p->from.reg;
+		s->used.ireg |= 1<<c;
+		if(ld)
+			p->mark |= LOAD;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->from);
+
+		m = ANYMEM;
+		if(c == REGSB)
+			m = E_MEMSB;
+		if(c == REGSP)
+			m = E_MEMSP;
+
+		s->used.cc |= m;
+		break;
+	case C_SACON:
+	case C_LACON:
+		s->used.ireg |= 1<<REGSP;
+		break;
+	case C_SECON:
+	case C_LECON:
+		s->used.ireg |= 1<<REGSB;
+		break;
+	case C_REG:
+		if(nr)
+			break;
+		s->used.ireg |= 1<<p->from.reg;
+		break;
+	case C_FREG:
+		s->used.freg |= 1<<p->from.reg;
+		break;
+	case C_SAUTO:
+	case C_LAUTO:
+		s->used.ireg |= 1<<REGSP;
+		if(ld)
+			p->mark |= LOAD;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->from);
+
+		s->used.cc |= E_MEMSP;
+		break;
+	case C_SEXT:
+	case C_LEXT:
+		s->used.ireg |= 1<<REGSB;
+		if(ld)
+			p->mark |= LOAD;
+		if(ad)
+			break;
+		s->size = sz;
+		s->soffset = regoff(&p->from);
+
+		s->used.cc |= E_MEMSB;
+		break;
+	}
+	
+	c = p->reg;
+	if(c != NREG) {
+		if(p->from.type == D_FREG || p->to.type == D_FREG)
+			s->used.freg |= 1<<c;
+		else
+			s->used.ireg |= 1<<c;
+	}
+}
+
+/*
+ * test to see if 2 instrictions can be
+ * interchanged without changing semantics
+ */
+int
+depend(Sch *sa, Sch *sb)
+{
+	ulong x;
+
+	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
+		return 1;
+	if(sb->set.ireg & sa->used.ireg)
+		return 1;
+
+	if(sa->set.freg & (sb->set.freg|sb->used.freg))
+		return 1;
+	if(sb->set.freg & sa->used.freg)
+		return 1;
+
+	if(sa->set.cr & (sb->set.cr|sb->used.cr))
+		return 1;
+	if(sb->set.cr & sa->used.cr)
+		return 1;
+
+
+	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
+		(sb->set.cc & sa->used.cc);
+	if(x) {
+		/*
+		 * allow SB and SP to pass each other.
+		 * allow SB to pass SB iff doffsets are ok
+		 * anything else conflicts
+		 */
+		if(x != E_MEMSP && x != E_MEMSB)
+			return 1;
+		x = sa->set.cc | sb->set.cc |
+			sa->used.cc | sb->used.cc;
+		if(x & E_MEM)
+			return 1;
+		if(offoverlap(sa, sb))
+			return 1;
+	}
+
+	return 0; 
+}
+
+int
+offoverlap(Sch *sa, Sch *sb)
+{
+
+	if(sa->soffset < sb->soffset) {
+		if(sa->soffset+sa->size > sb->soffset)
+			return 1;
+		return 0;
+	}
+	if(sb->soffset+sb->size > sa->soffset)
+		return 1;
+	return 0;
+}
+
+/*
+ * test 2 adjacent instructions
+ * and find out if inserted instructions
+ * are desired to prevent stalls.
+ * first instruction is a load instruction.
+ */
+int
+conflict(Sch *sa, Sch *sb)
+{
+
+	if(sa->set.ireg & sb->used.ireg)
+		return 1;
+	if(sa->set.freg & sb->used.freg)
+		return 1;
+	if(sa->set.cr & sb->used.cr)
+		return 1;
+	return 0;
+}
+
+int
+compound(Prog *p)
+{
+	Optab *o;
+
+	o = oplook(p);
+	if(o->size != 4)
+		return 1;
+	if(p->to.type == D_REG && p->to.reg == REGSB)
+		return 1;
+	return 0;
+}
+
+void
+dumpbits(Sch *s, Dep *d)
+{
+	int i;
+
+	for(i=0; i<32; i++)
+		if(d->ireg & (1<<i))
+			Bprint(&bso, " R%d", i);
+	for(i=0; i<32; i++)
+		if(d->freg & (1<<i))
+			Bprint(&bso, " F%d", i);
+	for(i=0; i<32; i++)
+		if(d->cr & (1<<i))
+			Bprint(&bso, " C%d", i);
+	for(i=0; i<32; i++)
+		switch(d->cc & (1<<i)) {
+		default:
+			break;
+		case E_ICC:
+			Bprint(&bso, " ICC");
+			break;
+		case E_FCC:
+			Bprint(&bso, " FCC");
+			break;
+		case E_LR:
+			Bprint(&bso, " LR");
+			break;
+		case E_CR:
+			Bprint(&bso, " CR");
+			break;
+		case E_CTR:
+			Bprint(&bso, " CTR");
+			break;
+		case E_XER:
+			Bprint(&bso, " XER");
+			break;
+		case E_MEM:
+			Bprint(&bso, " MEM%d", s->size);
+			break;
+		case E_MEMSB:
+			Bprint(&bso, " SB%d", s->size);
+			break;
+		case E_MEMSP:
+			Bprint(&bso, " SP%d", s->size);
+			break;
+		}
+}

+ 1011 - 0
sys/src/cmd/9l/span.c

@@ -0,0 +1,1011 @@
+#include	"l.h"
+
+void
+span(void)
+{
+	Prog *p, *q;
+	Sym *setext;
+	Optab *o;
+	int m, bflag;
+	vlong c, otxt;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f span\n", cputime());
+	Bflush(&bso);
+
+	bflag = 0;
+	c = INITTEXT;
+	otxt = c;
+	for(p = firstp; p != P; p = p->link) {
+		p->pc = c;
+		o = oplook(p);
+		m = o->size;
+		if(m == 0) {
+			if(p->as == ATEXT) {
+				curtext = p;
+				autosize = p->to.offset + 8;
+				if(p->from3.type == D_CONST) {
+					if(p->from3.offset & 3)
+						diag("illegal origin\n%P", p);
+					if(c > p->from3.offset)
+						diag("passed origin (#%llux)\n%P", c, p);
+					else
+						c = p->from3.offset;
+					p->pc = c;
+				}
+				if(p->from.sym != S)
+					p->from.sym->value = c;
+				/* need passes to resolve branches? */
+				if(c-otxt >= (1L<<15))
+					bflag = c;
+				otxt = c;
+				continue;
+			}
+			if(p->as != ANOP)
+				diag("zero-width instruction\n%P", p);
+			continue;
+		}
+		c += m;
+	}
+
+	/*
+	 * if any procedure is large enough to
+	 * generate a large SBRA branch, then
+	 * generate extra passes putting branches
+	 * around jmps to fix. this is rare.
+	 */
+	while(bflag) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f span1\n", cputime());
+		bflag = 0;
+		c = INITTEXT;
+		for(p = firstp; p != P; p = p->link) {
+			p->pc = c;
+			o = oplook(p);
+			if((o->type == 16 || o->type == 17) && p->cond) {
+				otxt = p->cond->pc - c;
+				if(otxt < -(1L<<16)+10 || otxt >= (1L<<15)-10) {
+					q = prg();
+					q->link = p->link;
+					p->link = q;
+					q->as = ABR;
+					q->to.type = D_BRANCH;
+					q->cond = p->cond;
+					p->cond = q;
+					q = prg();
+					q->link = p->link;
+					p->link = q;
+					q->as = ABR;
+					q->to.type = D_BRANCH;
+					q->cond = q->link->link;
+					addnop(p->link);
+					addnop(p);
+					bflag = 1;
+				}
+			}
+			m = o->size;
+			if(m == 0) {
+				if(p->as == ATEXT) {
+					curtext = p;
+					autosize = p->to.offset + 8;
+					if(p->from.sym != S)
+						p->from.sym->value = c;
+					continue;
+				}
+				if(p->as != ANOP)
+					diag("zero-width instruction\n%P", p);
+				continue;
+			}
+			c += m;
+		}
+	}
+
+	c = rnd(c, 8);
+
+	setext = lookup("etext", 0);
+	if(setext != S) {
+		setext->value = c;
+		textsize = c - INITTEXT;
+	}
+	if(INITRND)
+		INITDAT = rnd(c, INITRND);
+	if(debug['v'])
+		Bprint(&bso, "tsize = %llux\n", textsize);
+	Bflush(&bso);
+}
+		
+void
+xdefine(char *p, int t, vlong v)
+{
+	Sym *s;
+
+	s = lookup(p, 0);
+	if(s->type == 0 || s->type == SXREF) {
+		s->type = t;
+		s->value = v;
+	}
+}
+
+vlong
+vregoff(Adr *a)
+{
+
+	instoffset = 0;
+	aclass(a);
+	return instoffset;
+}
+
+long
+regoff(Adr *a)
+{
+	return vregoff(a);
+}
+
+int
+isint32(vlong v)
+{
+	long l;
+
+	l = v;
+	return (vlong)l == v;
+}
+
+int
+isuint32(uvlong v)
+{
+	ulong l;
+
+	l = v;
+	return (uvlong)l == v;
+}
+
+int
+aclass(Adr *a)
+{
+	Sym *s;
+	int t;
+
+	switch(a->type) {
+	case D_NONE:
+		return C_NONE;
+
+	case D_REG:
+		return C_REG;
+
+	case D_FREG:
+		return C_FREG;
+
+	case D_CREG:
+		return C_CREG;
+
+	case D_SPR:
+		if(a->offset == D_LR)
+			return C_LR;
+		if(a->offset == D_XER)
+			return C_XER;
+		if(a->offset == D_CTR)
+			return C_CTR;
+		return C_SPR;
+
+	case D_DCR:
+		return C_SPR;
+
+	case D_FPSCR:
+		return C_FPSCR;
+
+	case D_MSR:
+		return C_MSR;
+
+	case D_OREG:
+		switch(a->name) {
+		case D_EXTERN:
+		case D_STATIC:
+			if(a->sym == S)
+				break;
+			t = a->sym->type;
+			if(t == 0 || t == SXREF) {
+				diag("undefined external: %s in %s",
+					a->sym->name, TNAME);
+				a->sym->type = SDATA;
+			}
+			if(dlm){
+				instoffset = a->sym->value + a->offset;
+				switch(a->sym->type){
+				case STEXT:
+				case SLEAF:
+				case SCONST:
+				case SUNDEF:
+					break;
+				default:
+					instoffset += INITDAT;
+				}
+				return C_ADDR;
+			}
+			instoffset = a->sym->value + a->offset - BIG;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SEXT;
+			return C_LEXT;
+		case D_AUTO:
+			instoffset = autosize + a->offset;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SAUTO;
+			return C_LAUTO;
+		case D_PARAM:
+			instoffset = autosize + a->offset + 8L;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SAUTO;
+			return C_LAUTO;
+		case D_NONE:
+			instoffset = a->offset;
+			if(instoffset == 0)
+				return C_ZOREG;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SOREG;
+			return C_LOREG;
+		}
+		return C_GOK;
+
+	case D_OPT:
+		instoffset = a->offset & 31L;
+		if(a->name == D_NONE)
+			return C_SCON;
+		return C_GOK;
+
+	case D_CONST:
+		switch(a->name) {
+
+		case D_NONE:
+			instoffset = a->offset;
+		consize:
+			if(instoffset >= 0) {
+				if(instoffset == 0)
+					return C_ZCON;
+				if(instoffset <= 0x7fff)
+					return C_SCON;
+				if(instoffset <= 0xffff)
+					return C_ANDCON;
+				if((instoffset & 0xffff) == 0 && isuint32(instoffset))	/* && (instoffset & (1<<31)) == 0) */
+					return C_UCON;
+				if(isint32(instoffset) || isuint32(instoffset))
+					return C_LCON;
+				return C_DCON;
+			}
+			if(instoffset >= -0x8000)
+				return C_ADDCON;
+			if((instoffset & 0xffff) == 0 && isint32(instoffset))
+				return C_UCON;
+			if(isint32(instoffset))
+				return C_LCON;
+			return C_DCON;
+
+		case D_EXTERN:
+		case D_STATIC:
+			s = a->sym;
+			if(s == S)
+				break;
+			t = s->type;
+			if(t == 0 || t == SXREF) {
+				diag("undefined external: %s in %s",
+					s->name, TNAME);
+				s->type = SDATA;
+			}
+			if(s->type == STEXT || s->type == SLEAF || s->type == SUNDEF) {
+				instoffset = s->value + a->offset;
+				return C_LCON;
+			}
+			if(s->type == SCONST) {
+				instoffset = s->value + a->offset;
+				if(dlm)
+					return C_LCON;
+				goto consize;
+			}
+			if(!dlm){
+				instoffset = s->value + a->offset - BIG;
+				if(instoffset >= -BIG && instoffset < BIG && instoffset != 0)
+					return C_SECON;
+			}
+			instoffset = s->value + a->offset + INITDAT;
+			if(dlm)
+				return C_LCON;
+			/* not sure why this barfs */
+			return C_LCON;
+		/*
+			if(instoffset == 0)
+				return C_ZCON;
+			if(instoffset >= -0x8000 && instoffset <= 0xffff)
+				return C_SCON;
+			if((instoffset & 0xffff) == 0)
+				return C_UCON;
+			return C_LCON;
+		*/
+
+		case D_AUTO:
+			instoffset = autosize + a->offset;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SACON;
+			return C_LACON;
+
+		case D_PARAM:
+			instoffset = autosize + a->offset + 8L;
+			if(instoffset >= -BIG && instoffset < BIG)
+				return C_SACON;
+			return C_LACON;
+		}
+		return C_GOK;
+
+	case D_BRANCH:
+		return C_SBRA;
+	}
+	return C_GOK;
+}
+
+Optab*
+oplook(Prog *p)
+{
+	int a1, a2, a3, a4, r;
+	char *c1, *c3, *c4;
+	Optab *o, *e;
+
+	a1 = p->optab;
+	if(a1)
+		return optab+(a1-1);
+	a1 = p->from.class;
+	if(a1 == 0) {
+		a1 = aclass(&p->from) + 1;
+		p->from.class = a1;
+	}
+	a1--;
+	a3 = p->from3.class;
+	if(a3 == 0) {
+		a3 = aclass(&p->from3) + 1;
+		p->from3.class = a3;
+	}
+	a3--;
+	a4 = p->to.class;
+	if(a4 == 0) {
+		a4 = aclass(&p->to) + 1;
+		p->to.class = a4;
+	}
+	a4--;
+	a2 = C_NONE;
+	if(p->reg != NREG)
+		a2 = C_REG;
+	r = p->as;
+	o = oprange[r].start;
+	if(o == 0)
+		o = oprange[r].stop; /* just generate an error */
+	e = oprange[r].stop;
+	c1 = xcmp[a1];
+	c3 = xcmp[a3];
+	c4 = xcmp[a4];
+	for(; o<e; o++)
+		if(o->a2 == a2)
+		if(c1[o->a1])
+		if(c3[o->a3])
+		if(c4[o->a4]) {
+			p->optab = (o-optab)+1;
+			return o;
+		}
+	diag("illegal combination %A %R %R %R %R",
+		p->as, a1, a2, a3, a4);
+	if(1||!debug['a'])
+		prasm(p);
+	if(o == 0)
+		errorexit();
+	return o;
+}
+
+int
+cmp(int a, int b)
+{
+
+	if(a == b)
+		return 1;
+	switch(a) {
+	case C_LCON:
+		if(b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON)
+			return 1;
+		break;
+	case C_ADDCON:
+		if(b == C_ZCON || b == C_SCON)
+			return 1;
+		break;
+	case C_ANDCON:
+		if(b == C_ZCON || b == C_SCON)
+			return 1;
+		break;
+	case C_SPR:
+		if(b == C_LR || b == C_XER || b == C_CTR)
+			return 1;
+		break;
+	case C_UCON:
+		if(b == C_ZCON)
+			return 1;
+		break;
+	case C_SCON:
+		if(b == C_ZCON)
+			return 1;
+		break;
+	case C_LACON:
+		if(b == C_SACON)
+			return 1;
+		break;
+	case C_LBRA:
+		if(b == C_SBRA)
+			return 1;
+		break;
+	case C_LEXT:
+		if(b == C_SEXT)
+			return 1;
+		break;
+	case C_LAUTO:
+		if(b == C_SAUTO)
+			return 1;
+		break;
+	case C_REG:
+		if(b == C_ZCON)
+			return r0iszero;
+		break;
+	case C_LOREG:
+		if(b == C_ZOREG || b == C_SOREG)
+			return 1;
+		break;
+	case C_SOREG:
+		if(b == C_ZOREG)
+			return 1;
+		break;
+
+	case C_ANY:
+		return 1;
+	}
+	return 0;
+}
+
+int
+ocmp(void *a1, void *a2)
+{
+	Optab *p1, *p2;
+	int n;
+
+	p1 = a1;
+	p2 = a2;
+	n = p1->as - p2->as;
+	if(n)
+		return n;
+	n = p1->a1 - p2->a1;
+	if(n)
+		return n;
+	n = p1->a2 - p2->a2;
+	if(n)
+		return n;
+	n = p1->a3 - p2->a3;
+	if(n)
+		return n;
+	n = p1->a4 - p2->a4;
+	if(n)
+		return n;
+	return 0;
+}
+
+void
+buildop(void)
+{
+	int i, n, r;
+
+	for(i=0; i<C_NCLASS; i++)
+		for(n=0; n<C_NCLASS; n++)
+			xcmp[i][n] = cmp(n, i);
+	for(n=0; optab[n].as != AXXX; n++)
+		;
+	qsort(optab, n, sizeof(optab[0]), ocmp);
+	for(i=0; i<n; i++) {
+		r = optab[i].as;
+		oprange[r].start = optab+i;
+		while(optab[i].as == r)
+			i++;
+		oprange[r].stop = optab+i;
+		i--;
+		
+		switch(r)
+		{
+		default:
+			diag("unknown op in build: %A", r);
+			errorexit();
+		case ADCBF:	/* unary indexed: op (b+a); op (b) */
+			oprange[ADCBI] = oprange[r];
+			oprange[ADCBST] = oprange[r];
+			oprange[ADCBT] = oprange[r];
+			oprange[ADCBTST] = oprange[r];
+			oprange[ADCBZ] = oprange[r];
+			oprange[AICBI] = oprange[r];
+			break;
+		case AECOWX:	/* indexed store: op s,(b+a); op s,(b) */
+			oprange[ASTWCCC] = oprange[r];
+			break;
+		case AREM:	/* macro */
+			oprange[AREMCC] = oprange[r];
+			oprange[AREMV] = oprange[r];
+			oprange[AREMVCC] = oprange[r];
+			oprange[AREMU] = oprange[r];
+			oprange[AREMUCC] = oprange[r];
+			oprange[AREMUV] = oprange[r];
+			oprange[AREMUVCC] = oprange[r];
+			break;
+		case AREMD:
+			oprange[AREMDCC] = oprange[r];
+			oprange[AREMDV] = oprange[r];
+			oprange[AREMDVCC] = oprange[r];
+			oprange[AREMDU] = oprange[r];
+			oprange[AREMDUCC] = oprange[r];
+			oprange[AREMDUV] = oprange[r];
+			oprange[AREMDUVCC] = oprange[r];
+			break;
+		case ADIVW:	/* op Rb[,Ra],Rd */
+			oprange[AMULHW] = oprange[r];
+			oprange[AMULHWCC] = oprange[r];
+			oprange[AMULHWU] = oprange[r];
+			oprange[AMULHWUCC] = oprange[r];
+			oprange[AMULLWCC] = oprange[r];
+			oprange[AMULLWVCC] = oprange[r];
+			oprange[AMULLWV] = oprange[r];
+			oprange[ADIVWCC] = oprange[r];
+			oprange[ADIVWV] = oprange[r];
+			oprange[ADIVWVCC] = oprange[r];
+			oprange[ADIVWU] = oprange[r];
+			oprange[ADIVWUCC] = oprange[r];
+			oprange[ADIVWUV] = oprange[r];
+			oprange[ADIVWUVCC] = oprange[r];
+			oprange[AADDCC] = oprange[r];
+			oprange[AADDCV] = oprange[r];
+			oprange[AADDCVCC] = oprange[r];
+			oprange[AADDV] = oprange[r];
+			oprange[AADDVCC] = oprange[r];
+			oprange[AADDE] = oprange[r];
+			oprange[AADDECC] = oprange[r];
+			oprange[AADDEV] = oprange[r];
+			oprange[AADDEVCC] = oprange[r];
+			oprange[ACRAND] = oprange[r];
+			oprange[ACRANDN] = oprange[r];
+			oprange[ACREQV] = oprange[r];
+			oprange[ACRNAND] = oprange[r];
+			oprange[ACRNOR] = oprange[r];
+			oprange[ACROR] = oprange[r];
+			oprange[ACRORN] = oprange[r];
+			oprange[ACRXOR] = oprange[r];
+			oprange[AMULHD] = oprange[r];
+			oprange[AMULHDCC] = oprange[r];
+			oprange[AMULHDU] = oprange[r];
+			oprange[AMULHDUCC] = oprange[r];
+			oprange[AMULLD] = oprange[r];
+			oprange[AMULLDCC] = oprange[r];
+			oprange[AMULLDVCC] = oprange[r];
+			oprange[AMULLDV] = oprange[r];
+			oprange[ADIVD] = oprange[r];
+			oprange[ADIVDCC] = oprange[r];
+			oprange[ADIVDVCC] = oprange[r];
+			oprange[ADIVDV] = oprange[r];
+			oprange[ADIVDU] = oprange[r];
+			oprange[ADIVDUCC] = oprange[r];
+			oprange[ADIVDUVCC] = oprange[r];
+			oprange[ADIVDUCC] = oprange[r];
+			break;
+		case AMOVBZ:	/* lbz, stz, rlwm(r/r), lhz, lha, stz, and x variants */
+			oprange[AMOVH] = oprange[r];
+			oprange[AMOVHZ] = oprange[r];
+			break;
+		case AMOVBZU:	/* lbz[x]u, stb[x]u, lhz[x]u, lha[x]u, sth[u]x, ld[x]u, std[u]x */
+			oprange[AMOVHU] = oprange[r];
+			oprange[AMOVHZU] = oprange[r];
+			oprange[AMOVWU] = oprange[r];
+			oprange[AMOVWZU] = oprange[r];
+			oprange[AMOVDU] = oprange[r];
+			oprange[AMOVMW] = oprange[r];
+			break;
+		case AAND:	/* logical op Rb,Rs,Ra; no literal */
+			oprange[AANDN] = oprange[r];
+			oprange[AANDNCC] = oprange[r];
+			oprange[AEQV] = oprange[r];
+			oprange[AEQVCC] = oprange[r];
+			oprange[ANAND] = oprange[r];
+			oprange[ANANDCC] = oprange[r];
+			oprange[ANOR] = oprange[r];
+			oprange[ANORCC] = oprange[r];
+			oprange[AORCC] = oprange[r];
+			oprange[AORN] = oprange[r];
+			oprange[AORNCC] = oprange[r];
+			oprange[AXORCC] = oprange[r];
+			break;
+		case AADDME:	/* op Ra, Rd */
+			oprange[AADDMECC] = oprange[r];
+			oprange[AADDMEV] = oprange[r];
+			oprange[AADDMEVCC] = oprange[r];
+			oprange[AADDZE] = oprange[r];
+			oprange[AADDZECC] = oprange[r];
+			oprange[AADDZEV] = oprange[r];
+			oprange[AADDZEVCC] = oprange[r];
+			oprange[ASUBME] = oprange[r];
+			oprange[ASUBMECC] = oprange[r];
+			oprange[ASUBMEV] = oprange[r];
+			oprange[ASUBMEVCC] = oprange[r];
+			oprange[ASUBZE] = oprange[r];
+			oprange[ASUBZECC] = oprange[r];
+			oprange[ASUBZEV] = oprange[r];
+			oprange[ASUBZEVCC] = oprange[r];
+			break;
+		case AADDC:
+			oprange[AADDCCC] = oprange[r];
+			break;
+		case ABEQ:
+			oprange[ABGE] = oprange[r];
+			oprange[ABGT] = oprange[r];
+			oprange[ABLE] = oprange[r];
+			oprange[ABLT] = oprange[r];
+			oprange[ABNE] = oprange[r];
+			oprange[ABVC] = oprange[r];
+			oprange[ABVS] = oprange[r];
+			break;
+		case ABR:
+			oprange[ABL] = oprange[r];
+			break;
+		case ABC:
+			oprange[ABCL] = oprange[r];
+			break;
+		case AEXTSB:	/* op Rs, Ra */
+			oprange[AEXTSBCC] = oprange[r];
+			oprange[AEXTSH] = oprange[r];
+			oprange[AEXTSHCC] = oprange[r];
+			oprange[ACNTLZW] = oprange[r];
+			oprange[ACNTLZWCC] = oprange[r];
+			oprange[ACNTLZD] = oprange[r];
+			oprange[AEXTSW] = oprange[r];
+			oprange[AEXTSWCC] = oprange[r];
+			oprange[ACNTLZDCC] = oprange[r];
+			break;
+		case AFABS:	/* fop [s,]d */
+			oprange[AFABSCC] = oprange[r];
+			oprange[AFNABS] = oprange[r];
+			oprange[AFNABSCC] = oprange[r];
+			oprange[AFNEG] = oprange[r];
+			oprange[AFNEGCC] = oprange[r];
+			oprange[AFRSP] = oprange[r];
+			oprange[AFRSPCC] = oprange[r];
+			oprange[AFCTIW] = oprange[r];
+			oprange[AFCTIWCC] = oprange[r];
+			oprange[AFCTIWZ] = oprange[r];
+			oprange[AFCTIWZCC] = oprange[r];
+			oprange[AFCTID] = oprange[r];
+			oprange[AFCTIDCC] = oprange[r];
+			oprange[AFCTIDZ] = oprange[r];
+			oprange[AFCTIDZCC] = oprange[r];
+			oprange[AFCFID] = oprange[r];
+			oprange[AFCFIDCC] = oprange[r];
+			oprange[AFRES] = oprange[r];
+			oprange[AFRESCC] = oprange[r];
+			oprange[AFRSQRTE] = oprange[r];
+			oprange[AFRSQRTECC] = oprange[r];
+			oprange[AFSQRT] = oprange[r];
+			oprange[AFSQRTCC] = oprange[r];
+			oprange[AFSQRTS] = oprange[r];
+			oprange[AFSQRTSCC] = oprange[r];
+			break;
+		case AFADD:
+			oprange[AFADDS] = oprange[r];
+			oprange[AFADDCC] = oprange[r];
+			oprange[AFADDSCC] = oprange[r];
+			oprange[AFDIV] = oprange[r];
+			oprange[AFDIVS] = oprange[r];
+			oprange[AFDIVCC] = oprange[r];
+			oprange[AFDIVSCC] = oprange[r];
+			oprange[AFSUB] = oprange[r];
+			oprange[AFSUBS] = oprange[r];
+			oprange[AFSUBCC] = oprange[r];
+			oprange[AFSUBSCC] = oprange[r];
+			break;
+		case AFMADD:
+			oprange[AFMADDCC] = oprange[r];
+			oprange[AFMADDS] = oprange[r];
+			oprange[AFMADDSCC] = oprange[r];
+			oprange[AFMSUB] = oprange[r];
+			oprange[AFMSUBCC] = oprange[r];
+			oprange[AFMSUBS] = oprange[r];
+			oprange[AFMSUBSCC] = oprange[r];
+			oprange[AFNMADD] = oprange[r];
+			oprange[AFNMADDCC] = oprange[r];
+			oprange[AFNMADDS] = oprange[r];
+			oprange[AFNMADDSCC] = oprange[r];
+			oprange[AFNMSUB] = oprange[r];
+			oprange[AFNMSUBCC] = oprange[r];
+			oprange[AFNMSUBS] = oprange[r];
+			oprange[AFNMSUBSCC] = oprange[r];
+			oprange[AFSEL] = oprange[r];
+			oprange[AFSELCC] = oprange[r];
+			break;
+		case AFMUL:
+			oprange[AFMULS] = oprange[r];
+			oprange[AFMULCC] = oprange[r];
+			oprange[AFMULSCC] = oprange[r];
+			break;
+		case AFCMPO:
+			oprange[AFCMPU] = oprange[r];
+			break;
+		case AMTFSB0:
+			oprange[AMTFSB0CC] = oprange[r];
+			oprange[AMTFSB1] = oprange[r];
+			oprange[AMTFSB1CC] = oprange[r];
+			break;
+		case ANEG:	/* op [Ra,] Rd */
+			oprange[ANEGCC] = oprange[r];
+			oprange[ANEGV] = oprange[r];
+			oprange[ANEGVCC] = oprange[r];
+			break;
+		case AOR:	/* or/xor Rb,Rs,Ra; ori/xori $uimm,Rs,Ra; oris/xoris $uimm,Rs,Ra */
+			oprange[AXOR] = oprange[r];
+			break;
+		case ASLW:
+			oprange[ASLWCC] = oprange[r];
+			oprange[ASRW] = oprange[r];
+			oprange[ASRWCC] = oprange[r];
+			break;
+		case ASLD:
+			oprange[ASLDCC] = oprange[r];
+			oprange[ASRD] = oprange[r];
+			oprange[ASRDCC] = oprange[r];
+			break;
+		case ASRAW:	/* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
+			oprange[ASRAWCC] = oprange[r];
+			break;
+		case ASRAD:	/* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
+			oprange[ASRADCC] = oprange[r];
+			break;
+		case ASUB:	/* SUB Ra,Rb,Rd => subf Rd,ra,rb */
+			oprange[ASUB] = oprange[r];
+			oprange[ASUBCC] = oprange[r];
+			oprange[ASUBV] = oprange[r];
+			oprange[ASUBVCC] = oprange[r];
+			oprange[ASUBCCC] = oprange[r];
+			oprange[ASUBCV] = oprange[r];
+			oprange[ASUBCVCC] = oprange[r];
+			oprange[ASUBE] = oprange[r];
+			oprange[ASUBECC] = oprange[r];
+			oprange[ASUBEV] = oprange[r];
+			oprange[ASUBEVCC] = oprange[r];
+			break;
+		case ASYNC:
+			oprange[AISYNC] = oprange[r];
+			oprange[APTESYNC] = oprange[r];
+			oprange[ATLBSYNC] = oprange[r];
+			break;
+		case ARLWMI:
+			oprange[ARLWMICC] = oprange[r];
+			oprange[ARLWNM] = oprange[r];
+			oprange[ARLWNMCC] = oprange[r];
+			break;
+		case ARLDMI:
+			oprange[ARLDMICC] = oprange[r];
+			break;
+		case ARLDC:
+			oprange[ARLDCCC] = oprange[r];
+			break;
+		case ARLDCL:
+			oprange[ARLDCR] = oprange[r];
+			oprange[ARLDCLCC] = oprange[r];
+			oprange[ARLDCRCC] = oprange[r];
+			break;
+		case AFMOVD:
+			oprange[AFMOVDCC] = oprange[r];
+			oprange[AFMOVDU] = oprange[r];
+			oprange[AFMOVS] = oprange[r];
+			oprange[AFMOVSU] = oprange[r];
+			break;
+		case AECIWX:
+			oprange[ALWAR] = oprange[r];
+			break;
+		case ASYSCALL:	/* just the op; flow of control */
+			oprange[ARFI] = oprange[r];
+			oprange[ARFCI] = oprange[r];
+			oprange[ARFID] = oprange[r];
+			oprange[AHRFID] = oprange[r];
+			break;
+		case AMOVHBR:
+			oprange[AMOVWBR] = oprange[r];
+			break;
+		case ASLBMFEE:
+			oprange[ASLBMFEV] = oprange[r];
+			break;
+		case ATW:
+			oprange[ATD] = oprange[r];
+			break;
+		case ATLBIE:
+			oprange[ASLBIE] = oprange[r];
+			oprange[ATLBIEL] = oprange[r];
+			break;
+		case AEIEIO:
+			oprange[ASLBIA] = oprange[r];
+			break;
+		case ACMP:
+			oprange[ACMPW] = oprange[r];
+			break;
+		case ACMPU:
+			oprange[ACMPWU] = oprange[r];
+			break;
+		case AADD:
+		case AANDCC:	/* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra; andis. $uimm,Rs,Ra */
+		case ALSW:
+		case AMOVW:	/* load/store/move word with sign extension; special 32-bit move; move 32-bit literals */
+		case AMOVWZ:	/* load/store/move word with zero extension; move 32-bit literals  */
+		case AMOVD:	/* load/store/move 64-bit values, including 32-bit literals with/without sign-extension */
+		case AMOVB:	/* macro: move byte with sign extension */
+		case AMOVBU:	/* macro: move byte with sign extension & update */
+		case AMOVFL:
+		case AMULLW:	/* op $s[,r2],r3; op r1[,r2],r3; no cc/v */
+		case ASUBC:	/* op r1,$s,r3; op r1[,r2],r3 */
+		case ASTSW:
+		case ASLBMTE:
+		case AWORD:
+		case ADWORD:
+		case ANOP:
+		case ATEXT:
+			break;
+		}
+	}
+}
+
+enum{
+	ABSD = 0,
+	ABSU = 1,
+	RELD = 2,
+	RELU = 3,
+};
+
+int modemap[8] = { 0, 1, -1, 2, 3, 4, 5, 6};
+
+typedef struct Reloc Reloc;
+
+struct Reloc
+{
+	int n;
+	int t;
+	uchar *m;
+	ulong *a;
+};
+
+Reloc rels;
+
+static void
+grow(Reloc *r)
+{
+	int t;
+	uchar *m, *nm;
+	ulong *a, *na;
+
+	t = r->t;
+	r->t += 64;
+	m = r->m;
+	a = r->a;
+	r->m = nm = malloc(r->t*sizeof(uchar));
+	r->a = na = malloc(r->t*sizeof(ulong));
+	memmove(nm, m, t*sizeof(uchar));
+	memmove(na, a, t*sizeof(ulong));
+	free(m);
+	free(a);
+}
+
+void
+dynreloc(Sym *s, long v, int abs, int split, int sext)
+{
+	int i, k, n;
+	uchar *m;
+	ulong *a;
+	Reloc *r;
+
+	if(v&3)
+		diag("bad relocation address");
+	v >>= 2;
+	if(s->type == SUNDEF)
+		k = abs ? ABSU : RELU;
+	else
+		k = abs ? ABSD : RELD;
+	if(split)
+		k += 4;
+	if(sext)
+		k += 2;
+	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, a, a, k); */
+	k = modemap[k];
+	r = &rels;
+	n = r->n;
+	if(n >= r->t)
+		grow(r);
+	m = r->m;
+	a = r->a;
+	for(i = n; i > 0; i--){
+		if(v < a[i-1]){	/* happens occasionally for data */
+			m[i] = m[i-1];
+			a[i] = a[i-1];
+		}
+		else
+			break;
+	}
+	m[i] = k;
+	a[i] = v;
+	r->n++;
+}
+
+static int
+sput(char *s)
+{
+	char *p;
+
+	p = s;
+	while(*s)
+		cput(*s++);
+	cput(0);
+	return s-p+1;
+}
+
+void
+asmdyn()
+{
+	int i, n, t, c;
+	Sym *s;
+	ulong la, ra, *a;
+	vlong off;
+	uchar *m;
+	Reloc *r;
+
+	cflush();
+	off = seek(cout, 0, 1);
+	lput(0);
+	t = 0;
+	lput(imports);
+	t += 4;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->type == SUNDEF){
+				lput(s->sig);
+				t += 4;
+				t += sput(s->name);
+			}
+	
+	la = 0;
+	r = &rels;
+	n = r->n;
+	m = r->m;
+	a = r->a;
+	lput(n);
+	t += 4;
+	for(i = 0; i < n; i++){
+		ra = *a-la;
+		if(*a < la)
+			diag("bad relocation order");
+		if(ra < 256)
+			c = 0;
+		else if(ra < 65536)
+			c = 1;
+		else
+			c = 2;
+		cput((c<<6)|*m++);
+		t++;
+		if(c == 0){
+			cput(ra);
+			t++;
+		}
+		else if(c == 1){
+			wput(ra);
+			t += 2;
+		}
+		else{
+			lput(ra);
+			t += 4;
+		}
+		la = *a++;
+	}
+
+	cflush();
+	seek(cout, off, 0);
+	lput(t);
+
+	if(debug['v']){
+		Bprint(&bso, "import table entries = %d\n", imports);
+		Bprint(&bso, "export table entries = %d\n", exports);
+	}
+}

+ 9 - 0
sys/src/libc/9syscall/mkfile

@@ -110,6 +110,15 @@ install:V:
 				MOVW R3,4(R8)'
 			}
 			echo RETURN
+		case power64
+			if(~ $i seek)
+				echo TEXT _seek'(SB)', 1, '$0'
+			if not
+				echo TEXT $i'(SB)', 1, '$0'
+			echo MOVD R3, '0(FP)'
+			echo MOVW '$'$n, R3
+			echo SYSCALL
+			echo RETURN
 		}} > $i.s
 		$AS $i.s
 	}

+ 14 - 0
sys/src/libc/power64/_seek.c

@@ -0,0 +1,14 @@
+#include <u.h>
+#include <libc.h>
+
+extern int _seek(vlong*, int, vlong, int);
+
+vlong
+seek(int fd, vlong o, int p)
+{
+	vlong l;
+
+	if(_seek(&l, fd, o, p) < 0)
+		l = -1LL;
+	return l;
+}

+ 4 - 0
sys/src/libc/power64/argv0.s

@@ -0,0 +1,4 @@
+GLOBL	argv0(SB), $8
+GLOBL	_tos(SB), $8
+GLOBL	_privates(SB), $8
+GLOBL	_nprivates(SB), $4

+ 34 - 0
sys/src/libc/power64/atom.s

@@ -0,0 +1,34 @@
+TEXT ainc(SB), 1, $-4				/* long ainc(long *); */
+	BR	_trap
+	RET
+
+TEXT adec(SB), 1, $-4				/* long adec(long*); */
+	BR	_trap
+	RET
+
+TEXT _xinc(SB), 1, $-4				/* void _xinc(long *); */
+	BR	_trap
+	RET
+
+TEXT _xdec(SB), 1, $-4				/* long _xdec(long *); */
+	BR	_trap
+	RET
+
+/*
+ * int cas(uint* p, int ov, int nv);
+ */
+TEXT cas(SB), 1, $-4
+	BR	_trap
+	RET
+
+/*
+ * int casv(u64int* p, u64int ov, u64int nv);
+ */
+TEXT casv(SB), 1, $-4
+	BR	_trap
+	RET
+
+_trap:
+	MOVD	$0, R0
+	MOVD	0(R0), R0
+	RET

+ 17 - 0
sys/src/libc/power64/cycles.s

@@ -0,0 +1,17 @@
+#define TBRL	268
+#define TBRU	269		/* Time base Upper/Lower (Reading) */
+
+/*
+ * time stamp counter; _cycles since power up
+ * Runs at fasthz/4 cycles per second (m->clkin>>3)
+ */
+TEXT cycles(SB),1,$0
+loop:
+	MOVW	SPR(TBRU),R7
+	MOVW	SPR(TBRL),R8
+	MOVW	SPR(TBRU),R5
+	CMP	R5,R7
+	BNE	loop
+	MOVW	R7,0(R3)
+	MOVW	R8,4(R3)
+	RETURN

+ 4 - 0
sys/src/libc/power64/getcallerpc.s

@@ -0,0 +1,4 @@
+TEXT	getcallerpc(SB),1,$-8
+	MOVD	0(R1), R3
+	RETURN
+

+ 28 - 0
sys/src/libc/power64/getfcr.s

@@ -0,0 +1,28 @@
+TEXT	getfcr(SB), $8
+	MOVFL	FPSCR, F3
+	FMOVD	F3, f-8(SP)
+	MOVW	-4(SP), R3
+	RETURN
+
+TEXT	getfsr(SB), $8
+	MOVFL	FPSCR, F3
+	FMOVD	F3, f-8(SP)
+	MOVW	-4(SP), R3
+	RETURN
+
+TEXT	setfcr(SB), $8
+	SYNC
+	MOVW	R3, -4(SP)
+	FMOVD	-8(SP), F3
+	MOVFL	F3, FPSCR
+	ISYNC
+	RETURN
+
+TEXT	setfsr(SB), $8
+	SYNC
+	MOVW	R3, -4(SP)
+	FMOVD	-8(SP), F3
+	MOVFL	F3, FPSCR
+	ISYNC
+	RETURN
+

+ 23 - 0
sys/src/libc/power64/main9.s

@@ -0,0 +1,23 @@
+#define NPRIVATES	16
+
+TEXT	_main(SB), 1, $(2*8 + NPRIVATES*8)
+
+	MOVD	$setSB(SB), R2
+	MOVD	R3, _tos(SB)
+
+	MOVD	$p-64(SP), R4
+	MOVD	R4, _privates+0(SB)
+	MOVW	$16, R4
+	MOVW	R4, _nprivates+0(SB)
+
+	MOVW	inargc-8(FP), R3
+	MOVD	$inargv+0(FP), R4
+	MOVW	R4, 16(R1)
+	BL	main(SB)
+loop:
+	MOVD	$_exitstr<>(SB), R3
+	BL	exits(SB)
+	BR	loop
+
+DATA	_exitstr<>+0(SB)/4, $"main"
+GLOBL	_exitstr<>+0(SB), $5

+ 35 - 0
sys/src/libc/power64/main9p.s

@@ -0,0 +1,35 @@
+#define NPRIVATES	16
+
+TEXT	_mainp(SB), 1, $(2*8 + NPRIVATES*8)
+
+	MOVD	$setSB(SB), R2
+	MOVD	R3, _tos(SB)
+
+	MOVD	$p-64(SP), R4
+	MOVD	R4, _privates+0(SB)
+	MOVW	$16, R4
+	MOVW	R4, _nprivates+0(SB)
+
+	BL		_profmain(SB)
+	MOVD	_tos(SB), R3
+	MOVD	8(R3), R4
+	MOVD	R4, 0(R3)
+	MOVW	inargc-4(FP), R3
+	MOVD	$inargv+0(FP), R4
+	MOVD	R4, 16(R1)
+	BL		main(SB)
+loop:
+	MOVD	$exits<>(SB), R3
+	BL	exits(SB)
+	MOVD	$_profin(SB), R3	/* force loading of profile */
+	BR	loop
+
+TEXT	_savearg(SB), 1, $0
+	RETURN
+
+TEXT	_callpc(SB), 1, $0
+	MOVD	argp-8(FP), R3
+	RETURN
+
+DATA	exits<>+0(SB)/4, $"main"
+GLOBL	exits<>+0(SB), $5

+ 35 - 0
sys/src/libc/power64/mkfile

@@ -0,0 +1,35 @@
+objtype=power64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libc.a
+SFILES=\
+	argv0.s\
+	cycles.s\
+	getcallerpc.s\
+	getfcr.s\
+	main9.s\
+	main9p.s\
+#	memccpy.s\
+#	memcmp.s\
+#	memmove.s\
+#	memset.s\
+	setjmp.s\
+#	strcmp.s\
+#	strncmp.s\
+	tas.s\
+
+CFILES=\
+	_seek.c\
+	notejmp.c\
+	sqrt.c\
+
+HFILES=/sys/include/libc.h
+
+OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+	$HFILES\
+	$CFILES\
+	$SFILES\
+
+</sys/src/cmd/mksyslib

+ 22 - 0
sys/src/libc/power64/notejmp.c

@@ -0,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+
+int	__noterestore(void);
+
+void
+notejmp(void *vr, jmp_buf j, int ret)
+{
+	struct Ureg *r = vr;
+
+	/*
+	 * song and dance to get around the kernel smashing r3 in noted
+	 */
+	r->r4 = ret;
+	if(ret == 0)
+		r->r4 = 1;
+	r->r5 = j[JMPBUFPC] - JMPBUFDPC;
+	r->pc = (uintptr)__noterestore;
+	r->sp = j[JMPBUFSP];
+	noted(NCONT);
+}

+ 26 - 0
sys/src/libc/power64/setjmp.s

@@ -0,0 +1,26 @@
+TEXT	setjmp(SB), 1, $-8
+	MOVD	LR, R4
+	MOVD	R1, (R3)
+	MOVD	R4, 4(R3)
+	MOVW	$0, R3
+	RETURN
+
+TEXT	longjmp(SB), 1, $-8
+	MOVD	R3, R4
+	MOVW	r+12(FP), R3
+	CMP	R3, $0
+	BNE	ok		/* ansi: "longjmp(0) => longjmp(1)" */
+	MOVW	$1, R3		/* bless their pointed heads */
+ok:	MOVD	(R4), R1
+	MOVD	4(R4), R4
+	MOVD	R4, LR
+	BR	(LR)
+
+/*
+ * trampoline functions because the kernel smashes r3
+ * in the uregs given to notejmp
+ */
+TEXT	__noterestore(SB), 1, $-8
+	MOVD	R4, R3
+	MOVD	R5, LR
+	BR	(LR)

+ 103 - 0
sys/src/libc/power64/sqrt.c

@@ -0,0 +1,103 @@
+#include <u.h>
+#include <libc.h>
+
+static	long	sqtab[64] =
+{
+	0x6cdb2, 0x726d4, 0x77ea3, 0x7d52f, 0x82a85, 0x87eb1, 0x8d1c0, 0x923bd,
+	0x974b2, 0x9c4a8, 0xa13a9, 0xa61be, 0xaaeee, 0xafb41, 0xb46bf, 0xb916e,
+	0xbdb55, 0xc247a, 0xc6ce3, 0xcb495, 0xcfb95, 0xd41ea, 0xd8796, 0xdcca0,
+	0xe110c, 0xe54dd, 0xe9818, 0xedac0, 0xf1cd9, 0xf5e67, 0xf9f6e, 0xfdfef,
+	0x01fe0, 0x05ee6, 0x09cfd, 0x0da30, 0x11687, 0x1520c, 0x18cc8, 0x1c6c1,
+	0x20000, 0x2388a, 0x27068, 0x2a79e, 0x2de32, 0x3142b, 0x3498c, 0x37e5b,
+	0x3b29d, 0x3e655, 0x41989, 0x44c3b, 0x47e70, 0x4b02b, 0x4e16f, 0x51241,
+	0x542a2, 0x57296, 0x5a220, 0x5d142, 0x60000, 0x62e5a, 0x65c55, 0x689f2,
+};
+
+double
+sqrt(double arg)
+{
+	int e, ms;
+	double a, t;
+	union
+	{
+		double	d;
+		struct
+		{
+			long	ms;
+			long	ls;
+		};
+	} u;
+
+	u.d = arg;
+	ms = u.ms;
+
+	/*
+	 * sign extend the mantissa with
+	 * exponent. result should be > 0 for
+	 * normal case.
+	 */
+	e = ms >> 20;
+	if(e <= 0) {
+		if(e == 0)
+			return 0;
+		return NaN();
+	}
+
+	/*
+	 * pick up arg/4 by adjusting exponent
+	 */
+	u.ms = ms - (2 << 20);
+	a = u.d;
+
+	/*
+	 * use 5 bits of mantissa and 1 bit
+	 * of exponent to form table index.
+	 * insert exponent/2 - 1.
+	 */
+	e = (((e - 1023) >> 1) + 1022) << 20;
+	u.ms = *(long*)((char*)sqtab + ((ms >> 13) & 0xfc)) | e;
+	u.ls = 0;
+
+	/*
+	 * three laps of newton
+	 */
+	e = 1 << 20;
+	t = u.d;
+	u.d = t + a/t;
+	u.ms -= e;		/* u.d /= 2; */
+	t = u.d;
+	u.d = t + a/t;
+	u.ms -= e;		/* u.d /= 2; */
+	t = u.d;
+
+	return t + a/t;
+}
+
+/*
+ * this is the program that generated the table.
+ * it calls sqrt by some other means.
+ * 
+ * void
+ * main(void)
+ * {
+ * 	int i;
+ * 	union	U
+ * 	{
+ * 		double	d;
+ * 		struct
+ * 		{
+ * 			long	ms;
+ * 			long	ls;
+ * 		};
+ * 	} u;
+ * 
+ * 	for(i=0; i<64; i++) {
+ * 		u.ms = (i<<15) | 0x3fe04000;
+ * 		u.ls = 0;
+ * 		u.d = sqrt(u.d);
+ * 		print(" 0x%.5lux,", u.ms & 0xfffff);
+ * 	}
+ * 	print("\n");
+ * 	exits(0);
+ * }
+ */

+ 19 - 0
sys/src/libc/power64/tas.s

@@ -0,0 +1,19 @@
+TEXT	_tas(SB), $0
+	SYNC
+	MOVD	R3, R4
+	MOVWZ	$0xdeaddead,R5
+tas1:
+/* taken out for the 755.  dcbf and L2 caching do not seem to get on
+    with eachother.  It seems that dcbf is desctructive in the L2 cache 
+    (also see l.s) */
+//	DCBF	(R4)	
+	SYNC
+	LWAR	(R4), R3
+	CMP	R3, $0
+	BNE	tas0
+	STWCCC	R5, (R4)
+	BNE	tas1
+tas0:
+	SYNC
+	ISYNC
+	RETURN

+ 13 - 0
sys/src/libmp/power64/mkfile

@@ -0,0 +1,13 @@
+objtype=power64
+</power/mkfile
+
+LIB=/$objtype/lib/libmp.a
+OFILES=\
+
+HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
+
+UPDATE=\
+	mkfile\
+	$HFILES\
+
+</sys/src/cmd/mksyslib

+ 11 - 0
sys/src/libsec/power64/mkfile

@@ -0,0 +1,11 @@
+objtype=power64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libsec.a
+OFILES=	\
+
+HFILES=/sys/include/libsec.h
+
+UPDATE=mkfile
+
+</sys/src/cmd/mksyslib

+ 27 - 0
sys/src/libthread/power64.c

@@ -0,0 +1,27 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "threadimpl.h"
+
+/* first argument goes in a register; simplest just to ignore it */
+static void
+launcherpower64(int, void (*f)(void *arg), void *arg)
+{
+	(*f)(arg);
+	threadexits(nil);
+}
+
+void
+_threadinitstack(Thread *t, void (*f)(void*), void *arg)
+{
+	uvlong *tos;
+
+	tos = (uvlong*)&t->stk[t->stksize&~7];
+	*--tos = (uvlong)arg;
+	*--tos = (uvlong)f;
+	*--tos = 0;	/* first arg to launcherpower64 */
+	*--tos = 0;	/* place to store return PC */
+	t->sched[JMPBUFPC] = (uvlong)launcherpower64+JMPBUFDPC;
+	t->sched[JMPBUFSP] = (uvlong)tos;
+}
+

+ 19 - 0
sys/src/libthread/xincpower64.s

@@ -0,0 +1,19 @@
+TEXT	_xinc(SB),$0	/* void _xinc(long *); */
+
+	MOVD	R3, R4
+xincloop:
+	LWAR	(R4), R3
+	ADD		$1, R3
+	STWCCC	R3, (R4)
+	BNE		xincloop
+	RETURN
+
+TEXT	_xdec(SB),$0	/* long _xdec(long *); */
+
+	MOVD	R3, R4
+xdecloop:
+	LWAR	(R4), R3
+	ADD		$-1, R3
+	STWCCC	R3, (R4)
+	BNE		xdecloop
+	RETURN