Browse Source

Plan 9 from Bell Labs 2011-04-29

David du Colombier 13 years ago
parent
commit
73cfe7217c

+ 4 - 2
rc/bin/doctype

@@ -23,10 +23,10 @@ ifs='
 '{
 	files=`{echo $*}
 }
-grep -h '\$LIST|\|reference|Jp|^\.(EQ|TS|\[|PS|IS|GS|G1|GD|PP|BM|LP|BP|PI|cstart|begin|TH...)|^\.P$' $* |
+grep -h '\$LIST|\|reference|Jp|^\.(EQ|TS|\[|PS|IS|GS|G1|GD|PP|BM|LP|BP|PI|cstart|begin|TH...|TI)|^\.P$' $* |
 sort -u |
 awk '
-BEGIN	{ files = "'$files'" }
+BEGIN	{ files = "'$"files'" }
 /\$LIST/ { e++ }
 /^\.PP/	{ ms++ }
 /^\.LP/	{ ms++ }
@@ -46,6 +46,7 @@ BEGIN	{ files = "'$files'" }
 /^\.P$/	{ mm++ }
 /^\.BP/	{ pictures++ }
 /^\.PI/	{ pictures++ }
+/^\.TI/	{ mcs++ }
 /^\.ft *Jp|\\f\(Jp/ { nihongo++ }
 END {
 	x = ""
@@ -68,6 +69,7 @@ END {
 	if (man) x = x "-man"
 	else if (ms) x = x "-ms"
 	else if (mm) x = x "-mm"
+	if (mcs) x = x " -mcs"
 	if (lbits) x = x " -mbits"
 	if (pictures) x = x " -mpictures"
 	if (nihongo) x = x " -mnihongo"

+ 195 - 0
sys/src/cmd/6a/a.h

@@ -0,0 +1,195 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "../6c/6.out.h"
+
+
+#ifndef	EXTERN
+#define	EXTERN	extern
+#endif
+
+typedef	struct	Sym	Sym;
+typedef	struct	Ref	Ref;
+typedef	struct	Gen	Gen;
+typedef	struct	Io	Io;
+typedef	struct	Hist	Hist;
+typedef	struct	Gen2 	Gen2;
+
+#define	MAXALIGN	7
+#define	FPCHIP		1
+#define	NSYMB		500
+#define	BUFSIZ		8192
+#define	HISTSZ		20
+#define	NINCLUDE	10
+#define	NHUNK		10000
+#define	EOF		(-1)
+#define	IGN		(-2)
+#define	GETC()		((--fi.c < 0)? filbuf(): *fi.p++ & 0xff)
+#define	NHASH		503
+#define	STRINGSZ	200
+#define	NMACRO		10
+
+struct	Sym
+{
+	Sym*	link;
+	Ref*	ref;
+	char*	macro;
+	vlong	value;
+	ushort	type;
+	char	*name;
+	char	sym;
+};
+#define	S	((Sym*)0)
+
+struct	Ref
+{
+	int	class;
+};
+
+EXTERN struct
+{
+	char*	p;
+	int	c;
+} fi;
+
+struct	Io
+{
+	Io*	link;
+	char	b[BUFSIZ];
+	char*	p;
+	short	c;
+	short	f;
+};
+#define	I	((Io*)0)
+
+EXTERN struct
+{
+	Sym*	sym;
+	short	type;
+} h[NSYM];
+
+struct	Gen
+{
+	double	dval;
+	char	sval[8];
+	vlong	offset;
+	Sym*	sym;
+	short	type;
+	short	index;
+	short	scale;
+};
+struct	Gen2
+{
+	Gen	from;
+	Gen	to;
+};
+
+struct	Hist
+{
+	Hist*	link;
+	char*	name;
+	long	line;
+	vlong	offset;
+};
+#define	H	((Hist*)0)
+
+enum
+{
+	CLAST,
+	CMACARG,
+	CMACRO,
+	CPREPROC,
+};
+
+
+EXTERN	char	debug[256];
+EXTERN	Sym*	hash[NHASH];
+EXTERN	char*	Dlist[30];
+EXTERN	int	nDlist;
+EXTERN	Hist*	ehist;
+EXTERN	int	newflag;
+EXTERN	Hist*	hist;
+EXTERN	char*	hunk;
+EXTERN	char*	include[NINCLUDE];
+EXTERN	Io*	iofree;
+EXTERN	Io*	ionext;
+EXTERN	Io*	iostack;
+EXTERN	long	lineno;
+EXTERN	int	nerrors;
+EXTERN	long	nhunk;
+EXTERN	int	ninclude;
+EXTERN	Gen	nullgen;
+EXTERN	char*	outfile;
+EXTERN	int	pass;
+EXTERN	char*	pathname;
+EXTERN	long	pc;
+EXTERN	int	peekc;
+EXTERN	int	sym;
+EXTERN	char	symb[NSYMB];
+EXTERN	int	thechar;
+EXTERN	char*	thestring;
+EXTERN	long	thunk;
+EXTERN	Biobuf	obuf;
+
+void*	allocn(void*, long, long);
+void	errorexit(void);
+void	pushio(void);
+void	newio(void);
+void	newfile(char*, int);
+Sym*	slookup(char*);
+Sym*	lookup(void);
+void	syminit(Sym*);
+long	yylex(void);
+int	getc(void);
+int	getnsc(void);
+void	unget(int);
+int	escchar(int);
+void	cinit(void);
+void	checkscale(int);
+void	pinit(char*);
+void	cclean(void);
+int	isreg(Gen*);
+void	outcode(int, Gen2*);
+void	outhist(void);
+void	zaddr(Gen*, int);
+void	zname(char*, int, int);
+void	ieeedtod(Ieee*, double);
+int	filbuf(void);
+Sym*	getsym(void);
+void	domacro(void);
+void	macund(void);
+void	macdef(void);
+void	macexpand(Sym*, char*);
+void	macinc(void);
+void	macprag(void);
+void	maclin(void);
+void	macif(int);
+void	macend(void);
+void	dodefine(char*);
+void	prfile(long);
+void	linehist(char*, int);
+void	gethunk(void);
+void	yyerror(char*, ...);
+int	yyparse(void);
+void	setinclude(char*);
+int	assemble(char*);
+
+/*
+ *	Posix.c/Inferno.c/Nt.c
+ */
+enum	/* keep in synch with ../cc/cc.h */
+{
+	Plan9	= 1<<0,
+	Unix	= 1<<1,
+	Windows	= 1<<2
+};
+int	mywait(int*);
+int	mycreat(char*, int);
+int	systemtype(int);
+int	pathchar(void);
+char*	mygetwd(char*, int);
+int	myexec(char*, char*[]);
+int	mydup(int, int);
+int	myfork(void);
+int	mypipe(int*);
+void*	mysbrk(ulong);

+ 562 - 0
sys/src/cmd/6a/a.y

@@ -0,0 +1,562 @@
+%{
+#include "a.h"
+%}
+%union	{
+	Sym	*sym;
+	vlong	lval;
+	double	dval;
+	char	sval[8];
+	Gen	gen;
+	Gen2	gen2;
+}
+%left	'|'
+%left	'^'
+%left	'&'
+%left	'<' '>'
+%left	'+' '-'
+%left	'*' '/' '%'
+%token	<lval>	LTYPE0 LTYPE1 LTYPE2 LTYPE3 LTYPE4
+%token	<lval>	LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEXC LTYPEX LTYPERT
+%token	<lval>	LCONST LFP LPC LSB
+%token	<lval>	LBREG LLREG LSREG LFREG LMREG LXREG
+%token	<dval>	LFCONST
+%token	<sval>	LSCONST LSP
+%token	<sym>	LNAME LLAB LVAR
+%type	<lval>	con expr pointer offset
+%type	<gen>	mem imm reg nam rel rem rim rom omem nmem
+%type	<gen2>	nonnon nonrel nonrem rimnon rimrem remrim spec10
+%type	<gen2>	spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8 spec9
+%%
+prog:
+|	prog line
+
+line:
+	LLAB ':'
+	{
+		if($1->value != pc)
+			yyerror("redeclaration of %s", $1->name);
+		$1->value = pc;
+	}
+	line
+|	LNAME ':'
+	{
+		$1->type = LLAB;
+		$1->value = pc;
+	}
+	line
+|	';'
+|	inst ';'
+|	error ';'
+
+inst:
+	LNAME '=' expr
+	{
+		$1->type = LVAR;
+		$1->value = $3;
+	}
+|	LVAR '=' expr
+	{
+		if($1->value != $3)
+			yyerror("redeclaration of %s", $1->name);
+		$1->value = $3;
+	}
+|	LTYPE0 nonnon	{ outcode($1, &$2); }
+|	LTYPE1 nonrem	{ outcode($1, &$2); }
+|	LTYPE2 rimnon	{ outcode($1, &$2); }
+|	LTYPE3 rimrem	{ outcode($1, &$2); }
+|	LTYPE4 remrim	{ outcode($1, &$2); }
+|	LTYPER nonrel	{ outcode($1, &$2); }
+|	LTYPED spec1	{ outcode($1, &$2); }
+|	LTYPET spec2	{ outcode($1, &$2); }
+|	LTYPEC spec3	{ outcode($1, &$2); }
+|	LTYPEN spec4	{ outcode($1, &$2); }
+|	LTYPES spec5	{ outcode($1, &$2); }
+|	LTYPEM spec6	{ outcode($1, &$2); }
+|	LTYPEI spec7	{ outcode($1, &$2); }
+|	LTYPEXC spec8	{ outcode($1, &$2); }
+|	LTYPEX spec9	{ outcode($1, &$2); }
+|	LTYPERT spec10	{ outcode($1, &$2); }
+
+nonnon:
+	{
+		$$.from = nullgen;
+		$$.to = nullgen;
+	}
+|	','
+	{
+		$$.from = nullgen;
+		$$.to = nullgen;
+	}
+
+rimrem:
+	rim ',' rem
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+
+remrim:
+	rem ',' rim
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+
+rimnon:
+	rim ','
+	{
+		$$.from = $1;
+		$$.to = nullgen;
+	}
+|	rim
+	{
+		$$.from = $1;
+		$$.to = nullgen;
+	}
+
+nonrem:
+	',' rem
+	{
+		$$.from = nullgen;
+		$$.to = $2;
+	}
+|	rem
+	{
+		$$.from = nullgen;
+		$$.to = $1;
+	}
+
+nonrel:
+	',' rel
+	{
+		$$.from = nullgen;
+		$$.to = $2;
+	}
+|	rel
+	{
+		$$.from = nullgen;
+		$$.to = $1;
+	}
+
+spec1:	/* DATA */
+	nam '/' con ',' imm
+	{
+		$$.from = $1;
+		$$.from.scale = $3;
+		$$.to = $5;
+	}
+
+spec2:	/* TEXT */
+	mem ',' imm
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+|	mem ',' con ',' imm
+	{
+		$$.from = $1;
+		$$.from.scale = $3;
+		$$.to = $5;
+	}
+
+spec3:	/* JMP/CALL */
+	',' rom
+	{
+		$$.from = nullgen;
+		$$.to = $2;
+	}
+|	rom
+	{
+		$$.from = nullgen;
+		$$.to = $1;
+	}
+
+spec4:	/* NOP */
+	nonnon
+|	nonrem
+
+spec5:	/* SHL/SHR */
+	rim ',' rem
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+|	rim ',' rem ':' LLREG
+	{
+		$$.from = $1;
+		$$.to = $3;
+		if($$.from.index != D_NONE)
+			yyerror("dp shift with lhs index");
+		$$.from.index = $5;
+	}
+
+spec6:	/* MOVW/MOVL */
+	rim ',' rem
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+|	rim ',' rem ':' LSREG
+	{
+		$$.from = $1;
+		$$.to = $3;
+		if($$.to.index != D_NONE)
+			yyerror("dp move with lhs index");
+		$$.to.index = $5;
+	}
+
+spec7:
+	rim ','
+	{
+		$$.from = $1;
+		$$.to = nullgen;
+	}
+|	rim
+	{
+		$$.from = $1;
+		$$.to = nullgen;
+	}
+|	rim ',' rem
+	{
+		$$.from = $1;
+		$$.to = $3;
+	}
+
+spec8:	/* CMPPS/CMPPD */
+	reg ',' rem ',' con
+	{
+		$$.from = $1;
+		$$.to = $3;
+		$$.from.offset = $5;
+	}
+
+spec9:	/* shufl */
+	imm ',' rem ',' reg
+	{
+		$$.from = $3;
+		$$.to = $5;
+		if($1.type != D_CONST)
+			yyerror("illegal constant");
+		$$.to.offset = $1.offset;
+	}
+
+spec10:	/* RET/RETF */
+	{
+		$$.from = nullgen;
+		$$.to = nullgen;
+	}
+|	imm
+	{
+		$$.from = $1;
+		$$.to = nullgen;
+	}
+
+rem:
+	reg
+|	mem
+
+rom:
+	rel
+|	nmem
+|	'*' reg
+	{
+		$$ = $2;
+	}
+|	'*' omem
+	{
+		$$ = $2;
+	}
+|	reg
+|	omem
+
+rim:
+	rem
+|	imm
+
+rel:
+	con '(' LPC ')'
+	{
+		$$ = nullgen;
+		$$.type = D_BRANCH;
+		$$.offset = $1 + pc;
+	}
+|	LNAME offset
+	{
+		$$ = nullgen;
+		if(pass == 2)
+			yyerror("undefined label: %s", $1->name);
+		$$.type = D_BRANCH;
+		$$.sym = $1;
+		$$.offset = $2;
+	}
+|	LLAB offset
+	{
+		$$ = nullgen;
+		$$.type = D_BRANCH;
+		$$.sym = $1;
+		$$.offset = $1->value + $2;
+	}
+
+reg:
+	LBREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+|	LFREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+|	LLREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+|	LMREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+|	LSP
+	{
+		$$ = nullgen;
+		$$.type = D_SP;
+	}
+|	LSREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+|	LXREG
+	{
+		$$ = nullgen;
+		$$.type = $1;
+	}
+
+imm:
+	'$' con
+	{
+		$$ = nullgen;
+		$$.type = D_CONST;
+		$$.offset = $2;
+	}
+|	'$' nam
+	{
+		$$ = $2;
+		$$.index = $2.type;
+		$$.type = D_ADDR;
+		/*
+		if($2.type == D_AUTO || $2.type == D_PARAM)
+			yyerror("constant cannot be automatic: %s",
+				$2.sym->name);
+		 */
+	}
+|	'$' LSCONST
+	{
+		$$ = nullgen;
+		$$.type = D_SCONST;
+		memcpy($$.sval, $2, sizeof($$.sval));
+	}
+|	'$' LFCONST
+	{
+		$$ = nullgen;
+		$$.type = D_FCONST;
+		$$.dval = $2;
+	}
+|	'$' '(' LFCONST ')'
+	{
+		$$ = nullgen;
+		$$.type = D_FCONST;
+		$$.dval = $3;
+	}
+|	'$' '-' LFCONST
+	{
+		$$ = nullgen;
+		$$.type = D_FCONST;
+		$$.dval = -$3;
+	}
+
+mem:
+	omem
+|	nmem
+
+omem:
+	con
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+D_NONE;
+		$$.offset = $1;
+	}
+|	con '(' LLREG ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+$3;
+		$$.offset = $1;
+	}
+|	con '(' LSP ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+D_SP;
+		$$.offset = $1;
+	}
+|	con '(' LLREG '*' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+D_NONE;
+		$$.offset = $1;
+		$$.index = $3;
+		$$.scale = $5;
+		checkscale($$.scale);
+	}
+|	con '(' LLREG ')' '(' LLREG '*' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+$3;
+		$$.offset = $1;
+		$$.index = $6;
+		$$.scale = $8;
+		checkscale($$.scale);
+	}
+|	'(' LLREG ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+$2;
+	}
+|	'(' LSP ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+D_SP;
+	}
+|	'(' LLREG '*' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+D_NONE;
+		$$.index = $2;
+		$$.scale = $4;
+		checkscale($$.scale);
+	}
+|	'(' LLREG ')' '(' LLREG '*' con ')'
+	{
+		$$ = nullgen;
+		$$.type = D_INDIR+$2;
+		$$.index = $5;
+		$$.scale = $7;
+		checkscale($$.scale);
+	}
+
+nmem:
+	nam
+	{
+		$$ = $1;
+	}
+|	nam '(' LLREG '*' con ')'
+	{
+		$$ = $1;
+		$$.index = $3;
+		$$.scale = $5;
+		checkscale($$.scale);
+	}
+
+nam:
+	LNAME offset '(' pointer ')'
+	{
+		$$ = nullgen;
+		$$.type = $4;
+		$$.sym = $1;
+		$$.offset = $2;
+	}
+|	LNAME '<' '>' offset '(' LSB ')'
+	{
+		$$ = nullgen;
+		$$.type = D_STATIC;
+		$$.sym = $1;
+		$$.offset = $4;
+	}
+
+offset:
+	{
+		$$ = 0;
+	}
+|	'+' con
+	{
+		$$ = $2;
+	}
+|	'-' con
+	{
+		$$ = -$2;
+	}
+
+pointer:
+	LSB
+|	LSP
+	{
+		$$ = D_AUTO;
+	}
+|	LFP
+
+con:
+	LCONST
+|	LVAR
+	{
+		$$ = $1->value;
+	}
+|	'-' con
+	{
+		$$ = -$2;
+	}
+|	'+' con
+	{
+		$$ = $2;
+	}
+|	'~' con
+	{
+		$$ = ~$2;
+	}
+|	'(' expr ')'
+	{
+		$$ = $2;
+	}
+
+expr:
+	con
+|	expr '+' expr
+	{
+		$$ = $1 + $3;
+	}
+|	expr '-' expr
+	{
+		$$ = $1 - $3;
+	}
+|	expr '*' expr
+	{
+		$$ = $1 * $3;
+	}
+|	expr '/' expr
+	{
+		$$ = $1 / $3;
+	}
+|	expr '%' expr
+	{
+		$$ = $1 % $3;
+	}
+|	expr '<' '<' expr
+	{
+		$$ = $1 << $4;
+	}
+|	expr '>' '>' expr
+	{
+		$$ = $1 >> $4;
+	}
+|	expr '&' expr
+	{
+		$$ = $1 & $3;
+	}
+|	expr '^' expr
+	{
+		$$ = $1 ^ $3;
+	}
+|	expr '|' expr
+	{
+		$$ = $1 | $3;
+	}

+ 1289 - 0
sys/src/cmd/6a/lex.c

@@ -0,0 +1,1289 @@
+#define	EXTERN
+#include "a.h"
+#include "y.tab.h"
+#include <ctype.h>
+
+void
+main(int argc, char *argv[])
+{
+	char *p;
+	int nout, nproc, status, i, c;
+
+	thechar = '6';
+	thestring = "amd64";
+	memset(debug, 0, sizeof(debug));
+	cinit();
+	outfile = 0;
+	include[ninclude++] = ".";
+	ARGBEGIN {
+	default:
+		c = ARGC();
+		if(c >= 0 || c < sizeof(debug))
+			debug[c] = 1;
+		break;
+
+	case 'o':
+		outfile = ARGF();
+		break;
+
+	case 'D':
+		p = ARGF();
+		if(p)
+			Dlist[nDlist++] = p;
+		break;
+
+	case 'I':
+		p = ARGF();
+		setinclude(p);
+		break;
+	} ARGEND
+	if(*argv == 0) {
+		print("usage: %ca [-options] file.s\n", thechar);
+		errorexit();
+	}
+	if(argc > 1 && systemtype(Windows)){
+		print("can't assemble multiple files on windows\n");
+		errorexit();
+	}
+	if(argc > 1 && !systemtype(Windows)) {
+		nproc = 1;
+		if(p = getenv("NPROC"))
+			nproc = atol(p);	/* */
+		c = 0;
+		nout = 0;
+		for(;;) {
+			while(nout < nproc && argc > 0) {
+				i = myfork();
+				if(i < 0) {
+					i = mywait(&status);
+					if(i < 0)
+						errorexit();
+					if(status)
+						c++;
+					nout--;
+					continue;
+				}
+				if(i == 0) {
+					print("%s:\n", *argv);
+					if(assemble(*argv))
+						errorexit();
+					exits(0);
+				}
+				nout++;
+				argc--;
+				argv++;
+			}
+			i = mywait(&status);
+			if(i < 0) {
+				if(c)
+					errorexit();
+				exits(0);
+			}
+			if(status)
+				c++;
+			nout--;
+		}
+	}
+	if(assemble(argv[0]))
+		errorexit();
+	exits(0);
+}
+
+int
+assemble(char *file)
+{
+	char ofile[100], incfile[20], *p;
+	int i, of;
+
+	strcpy(ofile, file);
+	p = utfrrune(ofile, pathchar());
+	if(p) {
+		include[0] = ofile;
+		*p++ = 0;
+	} else
+		p = ofile;
+	if(outfile == 0) {
+		outfile = p;
+		if(outfile){
+			p = utfrrune(outfile, '.');
+			if(p)
+				if(p[1] == 's' && p[2] == 0)
+					p[0] = 0;
+			p = utfrune(outfile, 0);
+			p[0] = '.';
+			p[1] = thechar;
+			p[2] = 0;
+		} else
+			outfile = "/dev/null";
+	}
+	p = getenv("INCLUDE");
+	if(p) {
+		setinclude(p);
+	} else {
+		if(systemtype(Plan9)) {
+			sprint(incfile,"/%s/include", thestring);
+			setinclude(strdup(incfile));
+		}
+	}
+
+	of = mycreat(outfile, 0664);
+	if(of < 0) {
+		yyerror("%ca: cannot create %s", thechar, outfile);
+		errorexit();
+	}
+	Binit(&obuf, of, OWRITE);
+
+	pass = 1;
+	pinit(file);
+	for(i=0; i<nDlist; i++)
+		dodefine(Dlist[i]);
+	yyparse();
+	if(nerrors) {
+		cclean();
+		return nerrors;
+	}
+
+	pass = 2;
+	outhist();
+	pinit(file);
+	for(i=0; i<nDlist; i++)
+		dodefine(Dlist[i]);
+	yyparse();
+	cclean();
+	return nerrors;
+}
+
+struct
+{
+	char	*name;
+	ushort	type;
+	ushort	value;
+} itab[] =
+{
+	"SP",		LSP,	D_AUTO,
+	"SB",		LSB,	D_EXTERN,
+	"FP",		LFP,	D_PARAM,
+	"PC",		LPC,	D_BRANCH,
+
+	"AL",		LBREG,	D_AL,
+	"CL",		LBREG,	D_CL,
+	"DL",		LBREG,	D_DL,
+	"BL",		LBREG,	D_BL,
+/*	"SPB",		LBREG,	D_SPB,	*/
+	"SIB",		LBREG,	D_SIB,
+	"DIB",		LBREG,	D_DIB,
+	"BPB",		LBREG,	D_BPB,
+	"R8B",		LBREG,	D_R8B,
+	"R9B",		LBREG,	D_R9B,
+	"R10B",		LBREG,	D_R10B,
+	"R11B",		LBREG,	D_R11B,
+	"R12B",		LBREG,	D_R12B,
+	"R13B",		LBREG,	D_R13B,
+	"R14B",		LBREG,	D_R14B,
+	"R15B",		LBREG,	D_R15B,
+
+	"AH",		LBREG,	D_AH,
+	"CH",		LBREG,	D_CH,
+	"DH",		LBREG,	D_DH,
+	"BH",		LBREG,	D_BH,
+
+	"AX",		LLREG,	D_AX,
+	"CX",		LLREG,	D_CX,
+	"DX",		LLREG,	D_DX,
+	"BX",		LLREG,	D_BX,
+/*	"SP",		LLREG,	D_SP,	*/
+	"BP",		LLREG,	D_BP,
+	"SI",		LLREG,	D_SI,
+	"DI",		LLREG,	D_DI,
+	"R8",		LLREG,	D_R8,
+	"R9",		LLREG,	D_R9,
+	"R10",		LLREG,	D_R10,
+	"R11",		LLREG,	D_R11,
+	"R12",		LLREG,	D_R12,
+	"R13",		LLREG,	D_R13,
+	"R14",		LLREG,	D_R14,
+	"R15",		LLREG,	D_R15,
+
+	"RARG",		LLREG,	REGARG,
+
+	"F0",		LFREG,	D_F0+0,
+	"F1",		LFREG,	D_F0+1,
+	"F2",		LFREG,	D_F0+2,
+	"F3",		LFREG,	D_F0+3,
+	"F4",		LFREG,	D_F0+4,
+	"F5",		LFREG,	D_F0+5,
+	"F6",		LFREG,	D_F0+6,
+	"F7",		LFREG,	D_F0+7,
+
+	"M0",		LMREG,	D_M0+0,
+	"M1",		LMREG,	D_M0+1,
+	"M2",		LMREG,	D_M0+2,
+	"M3",		LMREG,	D_M0+3,
+	"M4",		LMREG,	D_M0+4,
+	"M5",		LMREG,	D_M0+5,
+	"M6",		LMREG,	D_M0+6,
+	"M7",		LMREG,	D_M0+7,
+
+	"X0",		LXREG,	D_X0+0,
+	"X1",		LXREG,	D_X0+1,
+	"X2",		LXREG,	D_X0+2,
+	"X3",		LXREG,	D_X0+3,
+	"X4",		LXREG,	D_X0+4,
+	"X5",		LXREG,	D_X0+5,
+	"X6",		LXREG,	D_X0+6,
+	"X7",		LXREG,	D_X0+7,
+	"X8",		LXREG,	D_X0+8,
+	"X9",		LXREG,	D_X0+9,
+	"X10",		LXREG,	D_X0+10,
+	"X11",		LXREG,	D_X0+11,
+	"X12",		LXREG,	D_X0+12,
+	"X13",		LXREG,	D_X0+13,
+	"X14",		LXREG,	D_X0+14,
+	"X15",		LXREG,	D_X0+15,
+
+	"CS",		LSREG,	D_CS,
+	"SS",		LSREG,	D_SS,
+	"DS",		LSREG,	D_DS,
+	"ES",		LSREG,	D_ES,
+	"FS",		LSREG,	D_FS,
+	"GS",		LSREG,	D_GS,
+
+	"GDTR",		LBREG,	D_GDTR,
+	"IDTR",		LBREG,	D_IDTR,
+	"LDTR",		LBREG,	D_LDTR,
+	"MSW",		LBREG,	D_MSW,
+	"TASK",		LBREG,	D_TASK,
+
+	"CR0",		LBREG,	D_CR+0,
+	"CR1",		LBREG,	D_CR+1,
+	"CR2",		LBREG,	D_CR+2,
+	"CR3",		LBREG,	D_CR+3,
+	"CR4",		LBREG,	D_CR+4,
+	"CR5",		LBREG,	D_CR+5,
+	"CR6",		LBREG,	D_CR+6,
+	"CR7",		LBREG,	D_CR+7,
+	"CR8",		LBREG,	D_CR+8,
+	"CR9",		LBREG,	D_CR+9,
+	"CR10",		LBREG,	D_CR+10,
+	"CR11",		LBREG,	D_CR+11,
+	"CR12",		LBREG,	D_CR+12,
+	"CR13",		LBREG,	D_CR+13,
+	"CR14",		LBREG,	D_CR+14,
+	"CR15",		LBREG,	D_CR+15,
+
+	"DR0",		LBREG,	D_DR+0,
+	"DR1",		LBREG,	D_DR+1,
+	"DR2",		LBREG,	D_DR+2,
+	"DR3",		LBREG,	D_DR+3,
+	"DR4",		LBREG,	D_DR+4,
+	"DR5",		LBREG,	D_DR+5,
+	"DR6",		LBREG,	D_DR+6,
+	"DR7",		LBREG,	D_DR+7,
+
+	"TR0",		LBREG,	D_TR+0,
+	"TR1",		LBREG,	D_TR+1,
+	"TR2",		LBREG,	D_TR+2,
+	"TR3",		LBREG,	D_TR+3,
+	"TR4",		LBREG,	D_TR+4,
+	"TR5",		LBREG,	D_TR+5,
+	"TR6",		LBREG,	D_TR+6,
+	"TR7",		LBREG,	D_TR+7,
+
+	"AAA",		LTYPE0,	AAAA,
+	"AAD",		LTYPE0,	AAAD,
+	"AAM",		LTYPE0,	AAAM,
+	"AAS",		LTYPE0,	AAAS,
+	"ADCB",		LTYPE3,	AADCB,
+	"ADCL",		LTYPE3,	AADCL,
+	"ADCQ",		LTYPE3,	AADCQ,
+	"ADCW",		LTYPE3,	AADCW,
+	"ADDB",		LTYPE3,	AADDB,
+	"ADDL",		LTYPE3,	AADDL,
+	"ADDQ",		LTYPE3,	AADDQ,
+	"ADDW",		LTYPE3,	AADDW,
+	"ADJSP",	LTYPE2,	AADJSP,
+	"ANDB",		LTYPE3,	AANDB,
+	"ANDL",		LTYPE3,	AANDL,
+	"ANDQ",		LTYPE3,	AANDQ,
+	"ANDW",		LTYPE3,	AANDW,
+	"ARPL",		LTYPE3,	AARPL,
+	"BOUNDL",	LTYPE3,	ABOUNDL,
+	"BOUNDW",	LTYPE3,	ABOUNDW,
+	"BSFL",		LTYPE3,	ABSFL,
+	"BSFQ",		LTYPE3,	ABSFQ,
+	"BSFW",		LTYPE3,	ABSFW,
+	"BSRL",		LTYPE3,	ABSRL,
+	"BSRQ",		LTYPE3,	ABSRQ,
+	"BSRW",		LTYPE3,	ABSRW,
+	"BTCL",		LTYPE3,	ABTCL,
+	"BTCQ",		LTYPE3,	ABTCQ,
+	"BTCW",		LTYPE3,	ABTCW,
+	"BTL",		LTYPE3,	ABTL,
+	"BTQ",		LTYPE3,	ABTQ,
+	"BTRL",		LTYPE3,	ABTRL,
+	"BTRQ",		LTYPE3,	ABTRQ,
+	"BTRW",		LTYPE3,	ABTRW,
+	"BTSL",		LTYPE3,	ABTSL,
+	"BTSQ",		LTYPE3,	ABTSQ,
+	"BTSW",		LTYPE3,	ABTSW,
+	"BTW",		LTYPE3,	ABTW,
+	"BYTE",		LTYPE2,	ABYTE,
+	"CALL",		LTYPEC,	ACALL,
+	"CLC",		LTYPE0,	ACLC,
+	"CLD",		LTYPE0,	ACLD,
+	"CLI",		LTYPE0,	ACLI,
+	"CLTS",		LTYPE0,	ACLTS,
+	"CMC",		LTYPE0,	ACMC,
+	"CMPB",		LTYPE4,	ACMPB,
+	"CMPL",		LTYPE4,	ACMPL,
+	"CMPQ",		LTYPE4,	ACMPQ,
+	"CMPW",		LTYPE4,	ACMPW,
+	"CMPSB",	LTYPE0,	ACMPSB,
+	"CMPSL",	LTYPE0,	ACMPSL,
+	"CMPSQ",	LTYPE0,	ACMPSQ,
+	"CMPSW",	LTYPE0,	ACMPSW,
+	"CMPXCHG8B",	LTYPE1,	ACMPXCHG8B,
+	"CMPXCHGB",	LTYPE3,	ACMPXCHGB,	/* LTYPE3? */
+	"CMPXCHGL",	LTYPE3,	ACMPXCHGL,
+	"CMPXCHGQ",	LTYPE3,	ACMPXCHGQ,
+	"CMPXCHGW",	LTYPE3,	ACMPXCHGW,
+	"CPUID",	LTYPE0,	ACPUID,
+	"DAA",		LTYPE0,	ADAA,
+	"DAS",		LTYPE0,	ADAS,
+	"DATA",		LTYPED,	ADATA,
+	"DECB",		LTYPE1,	ADECB,
+	"DECL",		LTYPE1,	ADECL,
+	"DECQ",		LTYPE1,	ADECQ,
+	"DECW",		LTYPE1,	ADECW,
+	"DIVB",		LTYPE2,	ADIVB,
+	"DIVL",		LTYPE2,	ADIVL,
+	"DIVQ",		LTYPE2,	ADIVQ,
+	"DIVW",		LTYPE2,	ADIVW,
+	"EMMS",		LTYPE0,	AEMMS,
+	"END",		LTYPE0,	AEND,
+	"ENTER",	LTYPE2,	AENTER,
+	"GLOBL",	LTYPET,	AGLOBL,
+	"HLT",		LTYPE0,	AHLT,
+	"IDIVB",	LTYPE2,	AIDIVB,
+	"IDIVL",	LTYPE2,	AIDIVL,
+	"IDIVQ",	LTYPE2,	AIDIVQ,
+	"IDIVW",	LTYPE2,	AIDIVW,
+	"IMULB",	LTYPEI,	AIMULB,
+	"IMULL",	LTYPEI,	AIMULL,
+	"IMULQ",	LTYPEI,	AIMULQ,
+	"IMULW",	LTYPEI,	AIMULW,
+	"INB",		LTYPE0,	AINB,
+	"INL",		LTYPE0,	AINL,
+	"INW",		LTYPE0,	AINW,
+	"INCB",		LTYPE1,	AINCB,
+	"INCL",		LTYPE1,	AINCL,
+	"INCQ",		LTYPE1,	AINCQ,
+	"INCW",		LTYPE1,	AINCW,
+	"INSB",		LTYPE0,	AINSB,
+	"INSL",		LTYPE0,	AINSL,
+	"INSW",		LTYPE0,	AINSW,
+	"INT",		LTYPE2,	AINT,
+	"INTO",		LTYPE0,	AINTO,
+	"INVD",		LTYPE0,	AINVD,
+	"INVLPG",	LTYPE2,	AINVLPG,
+	"IRETL",	LTYPE0,	AIRETL,
+	"IRETQ",	LTYPE0,	AIRETQ,
+	"IRETW",	LTYPE0,	AIRETW,
+
+	"JOS",		LTYPER,	AJOS,
+	"JO",		LTYPER,	AJOS,	/* alternate */
+	"JOC",		LTYPER,	AJOC,
+	"JNO",		LTYPER,	AJOC,	/* alternate */
+	"JCS",		LTYPER,	AJCS,
+	"JB",		LTYPER,	AJCS,	/* alternate */
+	"JC",		LTYPER,	AJCS,	/* alternate */
+	"JNAE",		LTYPER,	AJCS,	/* alternate */
+	"JLO",		LTYPER,	AJCS,	/* alternate */
+	"JCC",		LTYPER,	AJCC,
+	"JAE",		LTYPER,	AJCC,	/* alternate */
+	"JNB",		LTYPER,	AJCC,	/* alternate */
+	"JNC",		LTYPER,	AJCC,	/* alternate */
+	"JHS",		LTYPER,	AJCC,	/* alternate */
+	"JEQ",		LTYPER,	AJEQ,
+	"JE",		LTYPER,	AJEQ,	/* alternate */
+	"JZ",		LTYPER,	AJEQ,	/* alternate */
+	"JNE",		LTYPER,	AJNE,
+	"JNZ",		LTYPER,	AJNE,	/* alternate */
+	"JLS",		LTYPER,	AJLS,
+	"JBE",		LTYPER,	AJLS,	/* alternate */
+	"JNA",		LTYPER,	AJLS,	/* alternate */
+	"JHI",		LTYPER,	AJHI,
+	"JA",		LTYPER,	AJHI,	/* alternate */
+	"JNBE",		LTYPER,	AJHI,	/* alternate */
+	"JMI",		LTYPER,	AJMI,
+	"JS",		LTYPER,	AJMI,	/* alternate */
+	"JPL",		LTYPER,	AJPL,
+	"JNS",		LTYPER,	AJPL,	/* alternate */
+	"JPS",		LTYPER,	AJPS,
+	"JP",		LTYPER,	AJPS,	/* alternate */
+	"JPE",		LTYPER,	AJPS,	/* alternate */
+	"JPC",		LTYPER,	AJPC,
+	"JNP",		LTYPER,	AJPC,	/* alternate */
+	"JPO",		LTYPER,	AJPC,	/* alternate */
+	"JLT",		LTYPER,	AJLT,
+	"JL",		LTYPER,	AJLT,	/* alternate */
+	"JNGE",		LTYPER,	AJLT,	/* alternate */
+	"JGE",		LTYPER,	AJGE,
+	"JNL",		LTYPER,	AJGE,	/* alternate */
+	"JLE",		LTYPER,	AJLE,
+	"JNG",		LTYPER,	AJLE,	/* alternate */
+	"JGT",		LTYPER,	AJGT,
+	"JG",		LTYPER,	AJGT,	/* alternate */
+	"JNLE",		LTYPER,	AJGT,	/* alternate */
+
+	"JCXZ",		LTYPER,	AJCXZ,
+	"JMP",		LTYPEC,	AJMP,
+	"LAHF",		LTYPE0,	ALAHF,
+	"LARL",		LTYPE3,	ALARL,
+	"LARW",		LTYPE3,	ALARW,
+	"LEAL",		LTYPE3,	ALEAL,
+	"LEAQ",		LTYPE3,	ALEAQ,
+	"LEAW",		LTYPE3,	ALEAW,
+	"LEAVEL",	LTYPE0,	ALEAVEL,
+	"LEAVEQ",	LTYPE0,	ALEAVEQ,
+	"LEAVEW",	LTYPE0,	ALEAVEW,
+	"LFENCE",	LTYPE0,	ALFENCE,
+	"LOCK",		LTYPE0,	ALOCK,
+	"LODSB",	LTYPE0,	ALODSB,
+	"LODSL",	LTYPE0,	ALODSL,
+	"LODSQ",	LTYPE0,	ALODSQ,
+	"LODSW",	LTYPE0,	ALODSW,
+	"LONG",		LTYPE2,	ALONG,
+	"LOOP",		LTYPER,	ALOOP,
+	"LOOPEQ",	LTYPER,	ALOOPEQ,
+	"LOOPNE",	LTYPER,	ALOOPNE,
+	"LSLL",		LTYPE3,	ALSLL,
+	"LSLW",		LTYPE3,	ALSLW,
+	"MFENCE",	LTYPE0,	AMFENCE,
+	"MODE",		LTYPE2,	AMODE,
+	"MOVB",		LTYPE3,	AMOVB,
+	"MOVL",		LTYPEM,	AMOVL,
+	"MOVQ",		LTYPEM,	AMOVQ,
+	"MOVW",		LTYPEM,	AMOVW,
+	"MOVBLSX",	LTYPE3, AMOVBLSX,
+	"MOVBLZX",	LTYPE3, AMOVBLZX,
+	"MOVBQSX",	LTYPE3,	AMOVBQSX,
+	"MOVBQZX",	LTYPE3,	AMOVBQZX,
+	"MOVBWSX",	LTYPE3, AMOVBWSX,
+	"MOVBWZX",	LTYPE3, AMOVBWZX,
+	"MOVLQSX",	LTYPE3, AMOVLQSX,
+	"MOVLQZX",	LTYPE3, AMOVLQZX,
+	"MOVNTIL",	LTYPE3,	AMOVNTIL,
+	"MOVNTIQ",	LTYPE3,	AMOVNTIQ,
+	"MOVWLSX",	LTYPE3, AMOVWLSX,
+	"MOVWLZX",	LTYPE3, AMOVWLZX,
+	"MOVWQSX",	LTYPE3,	AMOVWQSX,
+	"MOVWQZX",	LTYPE3,	AMOVWQZX,
+	"MOVSB",	LTYPE0,	AMOVSB,
+	"MOVSL",	LTYPE0,	AMOVSL,
+	"MOVSQ",	LTYPE0,	AMOVSQ,
+	"MOVSW",	LTYPE0,	AMOVSW,
+	"MULB",		LTYPE2,	AMULB,
+	"MULL",		LTYPE2,	AMULL,
+	"MULQ",		LTYPE2,	AMULQ,
+	"MULW",		LTYPE2,	AMULW,
+	"NEGB",		LTYPE1,	ANEGB,
+	"NEGL",		LTYPE1,	ANEGL,
+	"NEGQ",		LTYPE1,	ANEGQ,
+	"NEGW",		LTYPE1,	ANEGW,
+	"NOP",		LTYPEN,	ANOP,
+	"NOTB",		LTYPE1,	ANOTB,
+	"NOTL",		LTYPE1,	ANOTL,
+	"NOTQ",		LTYPE1,	ANOTQ,
+	"NOTW",		LTYPE1,	ANOTW,
+	"ORB",		LTYPE3,	AORB,
+	"ORL",		LTYPE3,	AORL,
+	"ORQ",		LTYPE3,	AORQ,
+	"ORW",		LTYPE3,	AORW,
+	"OUTB",		LTYPE0,	AOUTB,
+	"OUTL",		LTYPE0,	AOUTL,
+	"OUTW",		LTYPE0,	AOUTW,
+	"OUTSB",	LTYPE0,	AOUTSB,
+	"OUTSL",	LTYPE0,	AOUTSL,
+	"OUTSW",	LTYPE0,	AOUTSW,
+	"POPAL",	LTYPE0,	APOPAL,
+	"POPAW",	LTYPE0,	APOPAW,
+	"POPFL",	LTYPE0,	APOPFL,
+	"POPFQ",	LTYPE0,	APOPFQ,
+	"POPFW",	LTYPE0,	APOPFW,
+	"POPL",		LTYPE1,	APOPL,
+	"POPQ",		LTYPE1,	APOPQ,
+	"POPW",		LTYPE1,	APOPW,
+	"PUSHAL",	LTYPE0,	APUSHAL,
+	"PUSHAW",	LTYPE0,	APUSHAW,
+	"PUSHFL",	LTYPE0,	APUSHFL,
+	"PUSHFQ",	LTYPE0,	APUSHFQ,
+	"PUSHFW",	LTYPE0,	APUSHFW,
+	"PUSHL",	LTYPE2,	APUSHL,
+	"PUSHQ",	LTYPE2,	APUSHQ,
+	"PUSHW",	LTYPE2,	APUSHW,
+	"RCLB",		LTYPE3,	ARCLB,
+	"RCLL",		LTYPE3,	ARCLL,
+	"RCLQ",		LTYPE3,	ARCLQ,
+	"RCLW",		LTYPE3,	ARCLW,
+	"RCRB",		LTYPE3,	ARCRB,
+	"RCRL",		LTYPE3,	ARCRL,
+	"RCRQ",		LTYPE3,	ARCRQ,
+	"RCRW",		LTYPE3,	ARCRW,
+	"RDMSR",	LTYPE0,	ARDMSR,
+	"RDPMC",	LTYPE0,	ARDPMC,
+	"RDTSC",	LTYPE0,	ARDTSC,
+	"REP",		LTYPE0,	AREP,
+	"REPN",		LTYPE0,	AREPN,
+	"RET",		LTYPE0,	ARET,
+	"RETFL",	LTYPERT,ARETFL,
+	"RETFW",	LTYPERT,ARETFW,
+	"RETFQ",	LTYPERT,ARETFQ,
+	"ROLB",		LTYPE3,	AROLB,
+	"ROLL",		LTYPE3,	AROLL,
+	"ROLQ",		LTYPE3,	AROLQ,
+	"ROLW",		LTYPE3,	AROLW,
+	"RORB",		LTYPE3,	ARORB,
+	"RORL",		LTYPE3,	ARORL,
+	"RORQ",		LTYPE3,	ARORQ,
+	"RORW",		LTYPE3,	ARORW,
+	"RSM",		LTYPE0,	ARSM,
+	"SAHF",		LTYPE0,	ASAHF,
+	"SALB",		LTYPE3,	ASALB,
+	"SALL",		LTYPE3,	ASALL,
+	"SALQ",		LTYPE3,	ASALQ,
+	"SALW",		LTYPE3,	ASALW,
+	"SARB",		LTYPE3,	ASARB,
+	"SARL",		LTYPE3,	ASARL,
+	"SARQ",		LTYPE3,	ASARQ,
+	"SARW",		LTYPE3,	ASARW,
+	"SBBB",		LTYPE3,	ASBBB,
+	"SBBL",		LTYPE3,	ASBBL,
+	"SBBQ",		LTYPE3,	ASBBQ,
+	"SBBW",		LTYPE3,	ASBBW,
+	"SCASB",	LTYPE0,	ASCASB,
+	"SCASL",	LTYPE0,	ASCASL,
+	"SCASQ",	LTYPE0,	ASCASQ,
+	"SCASW",	LTYPE0,	ASCASW,
+	"SETCC",	LTYPE1,	ASETCC,
+	"SETCS",	LTYPE1,	ASETCS,
+	"SETEQ",	LTYPE1,	ASETEQ,
+	"SETGE",	LTYPE1,	ASETGE,
+	"SETGT",	LTYPE1,	ASETGT,
+	"SETHI",	LTYPE1,	ASETHI,
+	"SETLE",	LTYPE1,	ASETLE,
+	"SETLS",	LTYPE1,	ASETLS,
+	"SETLT",	LTYPE1,	ASETLT,
+	"SETMI",	LTYPE1,	ASETMI,
+	"SETNE",	LTYPE1,	ASETNE,
+	"SETOC",	LTYPE1,	ASETOC,
+	"SETOS",	LTYPE1,	ASETOS,
+	"SETPC",	LTYPE1,	ASETPC,
+	"SETPL",	LTYPE1,	ASETPL,
+	"SETPS",	LTYPE1,	ASETPS,
+	"SFENCE",	LTYPE0,	ASFENCE,
+	"CDQ",		LTYPE0,	ACDQ,
+	"CWD",		LTYPE0,	ACWD,
+	"CQO",		LTYPE0,	ACQO,
+	"SHLB",		LTYPE3,	ASHLB,
+	"SHLL",		LTYPES,	ASHLL,
+	"SHLQ",		LTYPES,	ASHLQ,
+	"SHLW",		LTYPES,	ASHLW,
+	"SHRB",		LTYPE3,	ASHRB,
+	"SHRL",		LTYPES,	ASHRL,
+	"SHRQ",		LTYPES,	ASHRQ,
+	"SHRW",		LTYPES,	ASHRW,
+	"STC",		LTYPE0,	ASTC,
+	"STD",		LTYPE0,	ASTD,
+	"STI",		LTYPE0,	ASTI,
+	"STOSB",	LTYPE0,	ASTOSB,
+	"STOSL",	LTYPE0,	ASTOSL,
+	"STOSQ",	LTYPE0,	ASTOSQ,
+	"STOSW",	LTYPE0,	ASTOSW,
+	"SUBB",		LTYPE3,	ASUBB,
+	"SUBL",		LTYPE3,	ASUBL,
+	"SUBQ",		LTYPE3,	ASUBQ,
+	"SUBW",		LTYPE3,	ASUBW,
+	"SYSCALL",	LTYPE0,	ASYSCALL,
+	"SYSRET",	LTYPE0,	ASYSRET,
+	"SWAPGS",	LTYPE0,	ASWAPGS,
+	"TESTB",	LTYPE3,	ATESTB,
+	"TESTL",	LTYPE3,	ATESTL,
+	"TESTQ",	LTYPE3,	ATESTQ,
+	"TESTW",	LTYPE3,	ATESTW,
+	"TEXT",		LTYPET,	ATEXT,
+	"VERR",		LTYPE2,	AVERR,
+	"VERW",		LTYPE2,	AVERW,
+	"QUAD",		LTYPE2,	AQUAD,
+	"WAIT",		LTYPE0,	AWAIT,
+	"WBINVD",	LTYPE0,	AWBINVD,
+	"WRMSR",	LTYPE0,	AWRMSR,
+	"WORD",		LTYPE2,	AWORD,
+	"XADDB",	LTYPE3,	AXADDB,
+	"XADDL",	LTYPE3,	AXADDL,
+	"XADDQ",	LTYPE3,	AXADDQ,
+	"XADDW",	LTYPE3,	AXADDW,
+	"XCHGB",	LTYPE3,	AXCHGB,
+	"XCHGL",	LTYPE3,	AXCHGL,
+	"XCHGQ",	LTYPE3,	AXCHGQ,
+	"XCHGW",	LTYPE3,	AXCHGW,
+	"XLAT",		LTYPE2,	AXLAT,
+	"XORB",		LTYPE3,	AXORB,
+	"XORL",		LTYPE3,	AXORL,
+	"XORQ",		LTYPE3,	AXORQ,
+	"XORW",		LTYPE3,	AXORW,
+
+	"CMOVLCC",	LTYPE3,	ACMOVLCC,
+	"CMOVLCS",	LTYPE3,	ACMOVLCS,
+	"CMOVLEQ",	LTYPE3,	ACMOVLEQ,
+	"CMOVLGE",	LTYPE3,	ACMOVLGE,
+	"CMOVLGT",	LTYPE3,	ACMOVLGT,
+	"CMOVLHI",	LTYPE3,	ACMOVLHI,
+	"CMOVLLE",	LTYPE3,	ACMOVLLE,
+	"CMOVLLS",	LTYPE3,	ACMOVLLS,
+	"CMOVLLT",	LTYPE3,	ACMOVLLT,
+	"CMOVLMI",	LTYPE3,	ACMOVLMI,
+	"CMOVLNE",	LTYPE3,	ACMOVLNE,
+	"CMOVLOC",	LTYPE3,	ACMOVLOC,
+	"CMOVLOS",	LTYPE3,	ACMOVLOS,
+	"CMOVLPC",	LTYPE3,	ACMOVLPC,
+	"CMOVLPL",	LTYPE3,	ACMOVLPL,
+	"CMOVLPS",	LTYPE3,	ACMOVLPS,
+	"CMOVQCC",	LTYPE3,	ACMOVQCC,
+	"CMOVQCS",	LTYPE3,	ACMOVQCS,
+	"CMOVQEQ",	LTYPE3,	ACMOVQEQ,
+	"CMOVQGE",	LTYPE3,	ACMOVQGE,
+	"CMOVQGT",	LTYPE3,	ACMOVQGT,
+	"CMOVQHI",	LTYPE3,	ACMOVQHI,
+	"CMOVQLE",	LTYPE3,	ACMOVQLE,
+	"CMOVQLS",	LTYPE3,	ACMOVQLS,
+	"CMOVQLT",	LTYPE3,	ACMOVQLT,
+	"CMOVQMI",	LTYPE3,	ACMOVQMI,
+	"CMOVQNE",	LTYPE3,	ACMOVQNE,
+	"CMOVQOC",	LTYPE3,	ACMOVQOC,
+	"CMOVQOS",	LTYPE3,	ACMOVQOS,
+	"CMOVQPC",	LTYPE3,	ACMOVQPC,
+	"CMOVQPL",	LTYPE3,	ACMOVQPL,
+	"CMOVQPS",	LTYPE3,	ACMOVQPS,
+	"CMOVWCC",	LTYPE3,	ACMOVWCC,
+	"CMOVWCS",	LTYPE3,	ACMOVWCS,
+	"CMOVWEQ",	LTYPE3,	ACMOVWEQ,
+	"CMOVWGE",	LTYPE3,	ACMOVWGE,
+	"CMOVWGT",	LTYPE3,	ACMOVWGT,
+	"CMOVWHI",	LTYPE3,	ACMOVWHI,
+	"CMOVWLE",	LTYPE3,	ACMOVWLE,
+	"CMOVWLS",	LTYPE3,	ACMOVWLS,
+	"CMOVWLT",	LTYPE3,	ACMOVWLT,
+	"CMOVWMI",	LTYPE3,	ACMOVWMI,
+	"CMOVWNE",	LTYPE3,	ACMOVWNE,
+	"CMOVWOC",	LTYPE3,	ACMOVWOC,
+	"CMOVWOS",	LTYPE3,	ACMOVWOS,
+	"CMOVWPC",	LTYPE3,	ACMOVWPC,
+	"CMOVWPL",	LTYPE3,	ACMOVWPL,
+	"CMOVWPS",	LTYPE3,	ACMOVWPS,
+
+	"FMOVB",	LTYPE3, AFMOVB,
+	"FMOVBP",	LTYPE3, AFMOVBP,
+	"FMOVD",	LTYPE3, AFMOVD,
+	"FMOVDP",	LTYPE3, AFMOVDP,
+	"FMOVF",	LTYPE3, AFMOVF,
+	"FMOVFP",	LTYPE3, AFMOVFP,
+	"FMOVL",	LTYPE3, AFMOVL,
+	"FMOVLP",	LTYPE3, AFMOVLP,
+	"FMOVV",	LTYPE3, AFMOVV,
+	"FMOVVP",	LTYPE3, AFMOVVP,
+	"FMOVW",	LTYPE3, AFMOVW,
+	"FMOVWP",	LTYPE3, AFMOVWP,
+	"FMOVX",	LTYPE3, AFMOVX,
+	"FMOVXP",	LTYPE3, AFMOVXP,
+	"FCOMB",	LTYPE3, AFCOMB,
+	"FCOMBP",	LTYPE3, AFCOMBP,
+	"FCOMD",	LTYPE3, AFCOMD,
+	"FCOMDP",	LTYPE3, AFCOMDP,
+	"FCOMDPP",	LTYPE3, AFCOMDPP,
+	"FCOMF",	LTYPE3, AFCOMF,
+	"FCOMFP",	LTYPE3, AFCOMFP,
+	"FCOML",	LTYPE3, AFCOML,
+	"FCOMLP",	LTYPE3, AFCOMLP,
+	"FCOMW",	LTYPE3, AFCOMW,
+	"FCOMWP",	LTYPE3, AFCOMWP,
+	"FUCOM",	LTYPE3, AFUCOM,
+	"FUCOMP",	LTYPE3, AFUCOMP,
+	"FUCOMPP",	LTYPE3, AFUCOMPP,
+	"FADDW",	LTYPE3, AFADDW,
+	"FADDL",	LTYPE3, AFADDL,
+	"FADDF",	LTYPE3, AFADDF,
+	"FADDD",	LTYPE3, AFADDD,
+	"FADDDP",	LTYPE3, AFADDDP,
+	"FSUBDP",	LTYPE3, AFSUBDP,
+	"FSUBW",	LTYPE3, AFSUBW,
+	"FSUBL",	LTYPE3, AFSUBL,
+	"FSUBF",	LTYPE3, AFSUBF,
+	"FSUBD",	LTYPE3, AFSUBD,
+	"FSUBRDP",	LTYPE3, AFSUBRDP,
+	"FSUBRW",	LTYPE3, AFSUBRW,
+	"FSUBRL",	LTYPE3, AFSUBRL,
+	"FSUBRF",	LTYPE3, AFSUBRF,
+	"FSUBRD",	LTYPE3, AFSUBRD,
+	"FMULDP",	LTYPE3, AFMULDP,
+	"FMULW",	LTYPE3, AFMULW,
+	"FMULL",	LTYPE3, AFMULL,
+	"FMULF",	LTYPE3, AFMULF,
+	"FMULD",	LTYPE3, AFMULD,
+	"FDIVDP",	LTYPE3, AFDIVDP,
+	"FDIVW",	LTYPE3, AFDIVW,
+	"FDIVL",	LTYPE3, AFDIVL,
+	"FDIVF",	LTYPE3, AFDIVF,
+	"FDIVD",	LTYPE3, AFDIVD,
+	"FDIVRDP",	LTYPE3, AFDIVRDP,
+	"FDIVRW",	LTYPE3, AFDIVRW,
+	"FDIVRL",	LTYPE3, AFDIVRL,
+	"FDIVRF",	LTYPE3, AFDIVRF,
+	"FDIVRD",	LTYPE3, AFDIVRD,
+	"FXCHD",	LTYPE3, AFXCHD,
+	"FFREE",	LTYPE1, AFFREE,
+	"FLDCW",	LTYPE2, AFLDCW,
+	"FLDENV",	LTYPE1, AFLDENV,
+	"FRSTOR",	LTYPE2, AFRSTOR,
+	"FSAVE",	LTYPE1, AFSAVE,
+	"FSTCW",	LTYPE1, AFSTCW,
+	"FSTENV",	LTYPE1, AFSTENV,
+	"FSTSW",	LTYPE1, AFSTSW,
+	"F2XM1",	LTYPE0, AF2XM1,
+	"FABS",		LTYPE0, AFABS,
+	"FCHS",		LTYPE0, AFCHS,
+	"FCLEX",	LTYPE0, AFCLEX,
+	"FCOS",		LTYPE0, AFCOS,
+	"FDECSTP",	LTYPE0, AFDECSTP,
+	"FINCSTP",	LTYPE0, AFINCSTP,
+	"FINIT",	LTYPE0, AFINIT,
+	"FLD1",		LTYPE0, AFLD1,
+	"FLDL2E",	LTYPE0, AFLDL2E,
+	"FLDL2T",	LTYPE0, AFLDL2T,
+	"FLDLG2",	LTYPE0, AFLDLG2,
+	"FLDLN2",	LTYPE0, AFLDLN2,
+	"FLDPI",	LTYPE0, AFLDPI,
+	"FLDZ",		LTYPE0, AFLDZ,
+	"FNOP",		LTYPE0, AFNOP,
+	"FPATAN",	LTYPE0, AFPATAN,
+	"FPREM",	LTYPE0, AFPREM,
+	"FPREM1",	LTYPE0, AFPREM1,
+	"FPTAN",	LTYPE0, AFPTAN,
+	"FRNDINT",	LTYPE0, AFRNDINT,
+	"FSCALE",	LTYPE0, AFSCALE,
+	"FSIN",		LTYPE0, AFSIN,
+	"FSINCOS",	LTYPE0, AFSINCOS,
+	"FSQRT",	LTYPE0, AFSQRT,
+	"FTST",		LTYPE0, AFTST,
+	"FXAM",		LTYPE0, AFXAM,
+	"FXTRACT",	LTYPE0, AFXTRACT,
+	"FYL2X",	LTYPE0, AFYL2X,
+	"FYL2XP1",	LTYPE0, AFYL2XP1,
+
+	"ADDPD",	LTYPE3,	AADDPD,
+	"ADDPS",	LTYPE3,	AADDPS,
+	"ADDSD",	LTYPE3,	AADDSD,
+	"ADDSS",	LTYPE3,	AADDSS,
+	"ANDNPD",	LTYPE3,	AANDNPD,
+	"ANDNPS",	LTYPE3,	AANDNPS,
+	"ANDPD",	LTYPE3,	AANDPD,
+	"ANDPS",	LTYPE3,	AANDPS,
+	"CMPPD",	LTYPEXC,ACMPPD,
+	"CMPPS",	LTYPEXC,ACMPPS,
+	"CMPSD",	LTYPEXC,ACMPSD,
+	"CMPSS",	LTYPEXC,ACMPSS,
+	"COMISD",	LTYPE3,	ACOMISD,
+	"COMISS",	LTYPE3,	ACOMISS,
+	"CVTPL2PD",	LTYPE3,	ACVTPL2PD,
+	"CVTPL2PS",	LTYPE3,	ACVTPL2PS,
+	"CVTPD2PL",	LTYPE3,	ACVTPD2PL,
+	"CVTPD2PS",	LTYPE3,	ACVTPD2PS,
+	"CVTPS2PL",	LTYPE3,	ACVTPS2PL,
+	"PF2IW",	LTYPE3,	APF2IW,
+	"PF2IL",	LTYPE3,	APF2IL,
+	"PF2ID",	LTYPE3,	APF2IL,	/* syn */
+	"PI2FL",	LTYPE3,	API2FL,
+	"PI2FD",	LTYPE3,	API2FL,	/* syn */
+	"PI2FW",	LTYPE3,	API2FW,
+	"CVTPS2PD",	LTYPE3,	ACVTPS2PD,
+	"CVTSD2SL",	LTYPE3,	ACVTSD2SL,
+	"CVTSD2SQ",	LTYPE3,	ACVTSD2SQ,
+	"CVTSD2SS",	LTYPE3,	ACVTSD2SS,
+	"CVTSL2SD",	LTYPE3,	ACVTSL2SD,
+	"CVTSQ2SD",	LTYPE3,	ACVTSQ2SD,
+	"CVTSL2SS",	LTYPE3,	ACVTSL2SS,
+	"CVTSQ2SS",	LTYPE3,	ACVTSQ2SS,
+	"CVTSS2SD",	LTYPE3,	ACVTSS2SD,
+	"CVTSS2SL",	LTYPE3,	ACVTSS2SL,
+	"CVTSS2SQ",	LTYPE3,	ACVTSS2SQ,
+	"CVTTPD2PL",	LTYPE3,	ACVTTPD2PL,
+	"CVTTPS2PL",	LTYPE3,	ACVTTPS2PL,
+	"CVTTSD2SL",	LTYPE3,	ACVTTSD2SL,
+	"CVTTSD2SQ",	LTYPE3,	ACVTTSD2SQ,
+	"CVTTSS2SL",	LTYPE3,	ACVTTSS2SL,
+	"CVTTSS2SQ",	LTYPE3,	ACVTTSS2SQ,
+	"DIVPD",	LTYPE3,	ADIVPD,
+	"DIVPS",	LTYPE3,	ADIVPS,
+	"DIVSD",	LTYPE3,	ADIVSD,
+	"DIVSS",	LTYPE3,	ADIVSS,
+	"FXRSTOR",	LTYPE2,	AFXRSTOR,
+	"FXRSTOR64",	LTYPE2,	AFXRSTOR64,
+	"FXSAVE",	LTYPE1,	AFXSAVE,
+	"FXSAVE64",	LTYPE1,	AFXSAVE64,
+	"LDMXCSR",	LTYPE2,	ALDMXCSR,
+	"MASKMOVOU",	LTYPE3,	AMASKMOVOU,
+	"MASKMOVDQU",	LTYPE3,	AMASKMOVOU,	/* syn */
+	"MASKMOVQ",	LTYPE3,	AMASKMOVQ,
+	"MAXPD",	LTYPE3,	AMAXPD,
+	"MAXPS",	LTYPE3,	AMAXPS,
+	"MAXSD",	LTYPE3,	AMAXSD,
+	"MAXSS",	LTYPE3,	AMAXSS,
+	"MINPD",	LTYPE3,	AMINPD,
+	"MINPS",	LTYPE3,	AMINPS,
+	"MINSD",	LTYPE3,	AMINSD,
+	"MINSS",	LTYPE3,	AMINSS,
+	"MOVAPD",	LTYPE3,	AMOVAPD,
+	"MOVAPS",	LTYPE3,	AMOVAPS,
+	"MOVD",		LTYPE3,	AMOVQ,	/* syn */
+	"MOVDQ2Q",	LTYPE3,	AMOVQ,	/* syn */
+	"MOVO",		LTYPE3,	AMOVO,
+	"MOVOA",	LTYPE3,	AMOVO,	/* syn */
+	"MOVOU",	LTYPE3,	AMOVOU,
+	"MOVHLPS",	LTYPE3,	AMOVHLPS,
+	"MOVHPD",	LTYPE3,	AMOVHPD,
+	"MOVHPS",	LTYPE3,	AMOVHPS,
+	"MOVLHPS",	LTYPE3,	AMOVLHPS,
+	"MOVLPD",	LTYPE3,	AMOVLPD,
+	"MOVLPS",	LTYPE3,	AMOVLPS,
+	"MOVMSKPD",	LTYPE3,	AMOVMSKPD,
+	"MOVMSKPS",	LTYPE3,	AMOVMSKPS,
+	"MOVNTO",	LTYPE3,	AMOVNTO,
+	"MOVNTDQ",	LTYPE3,	AMOVNTO,	/* syn */
+	"MOVNTPD",	LTYPE3,	AMOVNTPD,
+	"MOVNTPS",	LTYPE3,	AMOVNTPS,
+	"MOVNTQ",	LTYPE3,	AMOVNTQ,
+	"MOVQOZX",	LTYPE3,	AMOVQOZX,
+	"MOVSD",	LTYPE3,	AMOVSD,
+	"MOVSS",	LTYPE3,	AMOVSS,
+	"MOVUPD",	LTYPE3,	AMOVUPD,
+	"MOVUPS",	LTYPE3,	AMOVUPS,
+	"MULPD",	LTYPE3,	AMULPD,
+	"MULPS",	LTYPE3,	AMULPS,
+	"MULSD",	LTYPE3,	AMULSD,
+	"MULSS",	LTYPE3,	AMULSS,
+	"ORPD",		LTYPE3,	AORPD,
+	"ORPS",		LTYPE3,	AORPS,
+	"PACKSSLW",	LTYPE3,	APACKSSLW,
+	"PACKSSWB",	LTYPE3,	APACKSSWB,
+	"PACKUSWB",	LTYPE3,	APACKUSWB,
+	"PADDB",	LTYPE3,	APADDB,
+	"PADDL",	LTYPE3,	APADDL,
+	"PADDQ",	LTYPE3,	APADDQ,
+	"PADDSB",	LTYPE3,	APADDSB,
+	"PADDSW",	LTYPE3,	APADDSW,
+	"PADDUSB",	LTYPE3,	APADDUSB,
+	"PADDUSW",	LTYPE3,	APADDUSW,
+	"PADDW",	LTYPE3,	APADDW,
+	"PAND",		LTYPE3, APAND,
+	"PANDB",	LTYPE3,	APANDB,
+	"PANDL",	LTYPE3,	APANDL,
+	"PANDSB",	LTYPE3,	APANDSB,
+	"PANDSW",	LTYPE3,	APANDSW,
+	"PANDUSB",	LTYPE3,	APANDUSB,
+	"PANDUSW",	LTYPE3,	APANDUSW,
+	"PANDW",	LTYPE3,	APANDW,
+	"PANDN",	LTYPE3, APANDN,
+	"PAVGB",	LTYPE3,	APAVGB,
+	"PAVGW",	LTYPE3,	APAVGW,
+	"PCMPEQB",	LTYPE3,	APCMPEQB,
+	"PCMPEQL",	LTYPE3,	APCMPEQL,
+	"PCMPEQW",	LTYPE3,	APCMPEQW,
+	"PCMPGTB",	LTYPE3,	APCMPGTB,
+	"PCMPGTL",	LTYPE3,	APCMPGTL,	
+	"PCMPGTW",	LTYPE3,	APCMPGTW,
+	"PEXTRW",	LTYPEX,	APEXTRW,
+	"PINSRW",	LTYPEX,	APINSRW,
+	"PMADDWL",	LTYPE3,	APMADDWL,
+	"PMAXSW",	LTYPE3,	APMAXSW,
+	"PMAXUB",	LTYPE3,	APMAXUB,
+	"PMINSW",	LTYPE3,	APMINSW,
+	"PMINUB",	LTYPE3,	APMINUB,
+	"PMOVMSKB",	LTYPE3,	APMOVMSKB,
+	"PMULHRW",	LTYPE3,	APMULHRW,
+	"PMULHUW",	LTYPE3,	APMULHUW,
+	"PMULHW",	LTYPE3,	APMULHW,
+	"PMULLW",	LTYPE3,	APMULLW,
+	"PMULULQ",	LTYPE3,	APMULULQ,
+	"POR",		LTYPE3,	APOR,
+	"PSADBW",	LTYPE3,	APSADBW,
+	"PSHUFHW",	LTYPEX,	APSHUFHW,
+	"PSHUFL",	LTYPEX,	APSHUFL,
+	"PSHUFLW",	LTYPEX,	APSHUFLW,
+	"PSHUFW",	LTYPEX, APSHUFW,
+	"PSLLO",	LTYPE3,	APSLLO,
+	"PSLLDQ",	LTYPE3,	APSLLO,	/* syn */
+	"PSLLL",	LTYPE3,	APSLLL,
+	"PSLLQ",	LTYPE3,	APSLLQ,
+	"PSLLW",	LTYPE3,	APSLLW,
+	"PSRAL",	LTYPE3,	APSRAL,
+	"PSRAW",	LTYPE3,	APSRAW,
+	"PSRLO",	LTYPE3,	APSRLO,
+	"PSRLDQ",	LTYPE3,	APSRLO,	/* syn */
+	"PSRLL",	LTYPE3,	APSRLL,
+	"PSRLQ",	LTYPE3,	APSRLQ,
+	"PSRLW",	LTYPE3,	APSRLW,
+	"PSUBB",	LTYPE3,	APSUBB,
+	"PSUBL",	LTYPE3,	APSUBL,
+	"PSUBQ",	LTYPE3,	APSUBQ,
+	"PSUBSB",	LTYPE3,	APSUBSB,
+	"PSUBSW",	LTYPE3,	APSUBSW,
+	"PSUBUSB",	LTYPE3,	APSUBUSB,
+	"PSUBUSW",	LTYPE3,	APSUBUSW,
+	"PSUBW",	LTYPE3,	APSUBW,
+	"PUNPCKHBW",	LTYPE3,	APUNPCKHBW,
+	"PUNPCKHLQ",	LTYPE3,	APUNPCKHLQ,
+	"PUNPCKHQDQ",	LTYPE3,	APUNPCKHQDQ,
+	"PUNPCKHWL",	LTYPE3,	APUNPCKHWL,
+	"PUNPCKLBW",	LTYPE3,	APUNPCKLBW,
+	"PUNPCKLLQ",	LTYPE3,	APUNPCKLLQ,
+	"PUNPCKLQDQ",	LTYPE3,	APUNPCKLQDQ,
+	"PUNPCKLWL",	LTYPE3,	APUNPCKLWL,
+	"PXOR",		LTYPE3,	APXOR,
+	"RCPPS",	LTYPE3,	ARCPPS,
+	"RCPSS",	LTYPE3,	ARCPSS,
+	"RSQRTPS",	LTYPE3,	ARSQRTPS,
+	"RSQRTSS",	LTYPE3,	ARSQRTSS,
+	"SHUFPD",	LTYPEX,	ASHUFPD,
+	"SHUFPS",	LTYPEX,	ASHUFPS,
+	"SQRTPD",	LTYPE3,	ASQRTPD,
+	"SQRTPS",	LTYPE3,	ASQRTPS,
+	"SQRTSD",	LTYPE3,	ASQRTSD,
+	"SQRTSS",	LTYPE3,	ASQRTSS,
+	"STMXCSR",	LTYPE1,	ASTMXCSR,
+	"SUBPD",	LTYPE3,	ASUBPD,
+	"SUBPS",	LTYPE3,	ASUBPS,
+	"SUBSD",	LTYPE3,	ASUBSD,
+	"SUBSS",	LTYPE3,	ASUBSS,
+	"UCOMISD",	LTYPE3,	AUCOMISD,
+	"UCOMISS",	LTYPE3,	AUCOMISS,
+	"UNPCKHPD",	LTYPE3,	AUNPCKHPD,
+	"UNPCKHPS",	LTYPE3,	AUNPCKHPS,
+	"UNPCKLPD",	LTYPE3,	AUNPCKLPD,
+	"UNPCKLPS",	LTYPE3,	AUNPCKLPS,
+	"XORPD",	LTYPE3,	AXORPD,
+	"XORPS",	LTYPE3,	AXORPS,
+
+	0
+};
+
+void
+cinit(void)
+{
+	Sym *s;
+	int i;
+
+	nullgen.sym = S;
+	nullgen.offset = 0;
+	if(FPCHIP)
+		nullgen.dval = 0;
+	for(i=0; i<sizeof(nullgen.sval); i++)
+		nullgen.sval[i] = 0;
+	nullgen.type = D_NONE;
+	nullgen.index = D_NONE;
+	nullgen.scale = 0;
+
+	nerrors = 0;
+	iostack = I;
+	iofree = I;
+	peekc = IGN;
+	nhunk = 0;
+	for(i=0; i<NHASH; i++)
+		hash[i] = S;
+	for(i=0; itab[i].name; i++) {
+		s = slookup(itab[i].name);
+		if(s->type != LNAME)
+			yyerror("double initialization %s", itab[i].name);
+		s->type = itab[i].type;
+		s->value = itab[i].value;
+	}
+
+	pathname = allocn(pathname, 0, 100);
+	if(mygetwd(pathname, 99) == 0) {
+		pathname = allocn(pathname, 100, 900);
+		if(mygetwd(pathname, 999) == 0)
+			strcpy(pathname, "/???");
+	}
+}
+
+void
+checkscale(int scale)
+{
+
+	switch(scale) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		return;
+	}
+	yyerror("scale must be 1248: %d", scale);
+}
+
+void
+syminit(Sym *s)
+{
+
+	s->type = LNAME;
+	s->value = 0;
+}
+
+void
+cclean(void)
+{
+	Gen2 g2;
+
+	g2.from = nullgen;
+	g2.to = nullgen;
+	outcode(AEND, &g2);
+	Bflush(&obuf);
+}
+
+void
+zname(char *n, int t, int s)
+{
+
+	Bputc(&obuf, ANAME);		/* as(2) */
+	Bputc(&obuf, ANAME>>8);
+	Bputc(&obuf, t);		/* type */
+	Bputc(&obuf, s);		/* sym */
+	while(*n) {
+		Bputc(&obuf, *n);
+		n++;
+	}
+	Bputc(&obuf, 0);
+}
+
+void
+zaddr(Gen *a, int s)
+{
+	long l;
+	int i, t;
+	char *n;
+	Ieee e;
+
+	t = 0;
+	if(a->index != D_NONE || a->scale != 0)
+		t |= T_INDEX;
+	if(a->offset != 0) {
+		t |= T_OFFSET;
+		l = a->offset;
+		if((vlong)l != a->offset)
+			t |= T_64;
+	}
+	if(s != 0)
+		t |= T_SYM;
+
+	switch(a->type) {
+	default:
+		t |= T_TYPE;
+		break;
+	case D_FCONST:
+		t |= T_FCONST;
+		break;
+	case D_SCONST:
+		t |= T_SCONST;
+		break;
+	case D_NONE:
+		break;
+	}
+	Bputc(&obuf, t);
+
+	if(t & T_INDEX) {	/* implies index, scale */
+		Bputc(&obuf, a->index);
+		Bputc(&obuf, a->scale);
+	}
+	if(t & T_OFFSET) {	/* implies offset */
+		l = a->offset;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		if(t & T_64) {
+			l = a->offset>>32;
+			Bputc(&obuf, l);
+			Bputc(&obuf, l>>8);
+			Bputc(&obuf, l>>16);
+			Bputc(&obuf, l>>24);
+		}
+	}
+	if(t & T_SYM)		/* implies sym */
+		Bputc(&obuf, s);
+	if(t & T_FCONST) {
+		ieeedtod(&e, a->dval);
+		l = e.l;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		l = e.h;
+		Bputc(&obuf, l);
+		Bputc(&obuf, l>>8);
+		Bputc(&obuf, l>>16);
+		Bputc(&obuf, l>>24);
+		return;
+	}
+	if(t & T_SCONST) {
+		n = a->sval;
+		for(i=0; i<NSNAME; i++) {
+			Bputc(&obuf, *n);
+			n++;
+		}
+		return;
+	}
+	if(t & T_TYPE)
+		Bputc(&obuf, a->type);
+}
+
+void
+outcode(int a, Gen2 *g2)
+{
+	int sf, st, t;
+	Sym *s;
+
+	if(pass == 1)
+		goto out;
+
+jackpot:
+	sf = 0;
+	s = g2->from.sym;
+	while(s != S) {
+		sf = s->sym;
+		if(sf < 0 || sf >= NSYM)
+			sf = 0;
+		t = g2->from.type;
+		if(t == D_ADDR)
+			t = g2->from.index;
+		if(h[sf].type == t)
+		if(h[sf].sym == s)
+			break;
+		zname(s->name, t, sym);
+		s->sym = sym;
+		h[sym].sym = s;
+		h[sym].type = t;
+		sf = sym;
+		sym++;
+		if(sym >= NSYM)
+			sym = 1;
+		break;
+	}
+	st = 0;
+	s = g2->to.sym;
+	while(s != S) {
+		st = s->sym;
+		if(st < 0 || st >= NSYM)
+			st = 0;
+		t = g2->to.type;
+		if(t == D_ADDR)
+			t = g2->to.index;
+		if(h[st].type == t)
+		if(h[st].sym == s)
+			break;
+		zname(s->name, t, sym);
+		s->sym = sym;
+		h[sym].sym = s;
+		h[sym].type = t;
+		st = sym;
+		sym++;
+		if(sym >= NSYM)
+			sym = 1;
+		if(st == sf)
+			goto jackpot;
+		break;
+	}
+	Bputc(&obuf, a);
+	Bputc(&obuf, a>>8);
+	Bputc(&obuf, lineno);
+	Bputc(&obuf, lineno>>8);
+	Bputc(&obuf, lineno>>16);
+	Bputc(&obuf, lineno>>24);
+	zaddr(&g2->from, sf);
+	zaddr(&g2->to, st);
+
+out:
+	if(a != AGLOBL && a != ADATA && a != AMODE)
+		pc++;
+}
+
+void
+outhist(void)
+{
+	Gen g;
+	Hist *h;
+	char *p, *q, *op, c;
+	int n;
+
+	g = nullgen;
+	c = pathchar();
+	for(h = hist; h != H; h = h->link) {
+		p = h->name;
+		op = 0;
+		/* on windows skip drive specifier in pathname */
+		if(systemtype(Windows) && p && p[1] == ':'){
+			p += 2;
+			c = *p;
+		}
+		if(p && p[0] != c && h->offset == 0 && pathname){
+			/* on windows skip drive specifier in pathname */
+			if(systemtype(Windows) && pathname[1] == ':') {
+				op = p;
+				p = pathname+2;
+				c = *p;
+			} else if(pathname[0] == c){
+				op = p;
+				p = pathname;
+			}
+		}
+		while(p) {
+			q = strchr(p, c);
+			if(q) {
+				n = q-p;
+				if(n == 0){
+					n = 1;	/* leading "/" */
+					*p = '/';	/* don't emit "\" on windows */
+				}
+				q++;
+			} else {
+				n = strlen(p);
+				q = 0;
+			}
+			if(n) {
+				Bputc(&obuf, ANAME);
+				Bputc(&obuf, ANAME>>8);
+				Bputc(&obuf, D_FILE);	/* type */
+				Bputc(&obuf, 1);	/* sym */
+				Bputc(&obuf, '<');
+				Bwrite(&obuf, p, n);
+				Bputc(&obuf, 0);
+			}
+			p = q;
+			if(p == 0 && op) {
+				p = op;
+				op = 0;
+			}
+		}
+		g.offset = h->offset;
+
+		Bputc(&obuf, AHISTORY);
+		Bputc(&obuf, AHISTORY>>8);
+		Bputc(&obuf, h->line);
+		Bputc(&obuf, h->line>>8);
+		Bputc(&obuf, h->line>>16);
+		Bputc(&obuf, h->line>>24);
+		zaddr(&nullgen, 0);
+		zaddr(&g, 0);
+	}
+}
+
+#include "../cc/lexbody"
+#include "../cc/macbody"
+#include "../cc/compat"

+ 26 - 0
sys/src/cmd/6a/mkfile

@@ -0,0 +1,26 @@
+</$objtype/mkfile
+
+TARG=6a
+OFILES=\
+	y.tab.$O\
+	lex.$O\
+
+HFILES=\
+	../6c/6.out.h\
+	y.tab.h\
+	a.h\
+
+YFILES=a.y\
+
+BIN=/$objtype/bin
+UPDATE=\
+	mkfile\
+	a.y\
+	lex.c\
+	a.h\
+	${TARG:%=/386/bin/%}\
+
+< /sys/src/cmd/mkone
+YFLAGS=-D1 -d
+
+lex.$O:	../cc/macbody ../cc/lexbody ../cc/compat

+ 820 - 0
sys/src/cmd/6c/6.out.h

@@ -0,0 +1,820 @@
+#define	NSYM	50
+#define	NSNAME	8
+#define NOPROF	(1<<0)
+#define DUPOK	(1<<1)
+
+/*
+ *	amd64
+ */
+
+enum	as
+{
+	AXXX,
+	AAAA,
+	AAAD,
+	AAAM,
+	AAAS,
+	AADCB,
+	AADCL,
+	AADCW,
+	AADDB,
+	AADDL,
+	AADDW,
+	AADJSP,
+	AANDB,
+	AANDL,
+	AANDW,
+	AARPL,
+	ABOUNDL,
+	ABOUNDW,
+	ABSFL,
+	ABSFW,
+	ABSRL,
+	ABSRW,
+	ABTL,
+	ABTW,
+	ABTCL,
+	ABTCW,
+	ABTRL,
+	ABTRW,
+	ABTSL,
+	ABTSW,
+	ABYTE,
+	ACALL,
+	ACLC,
+	ACLD,
+	ACLI,
+	ACLTS,
+	ACMC,
+	ACMPB,
+	ACMPL,
+	ACMPW,
+	ACMPSB,
+	ACMPSL,
+	ACMPSW,
+	ADAA,
+	ADAS,
+	ADATA,
+	ADECB,
+	ADECL,
+	ADECQ,
+	ADECW,
+	ADIVB,
+	ADIVL,
+	ADIVW,
+	AENTER,
+	AGLOBL,
+	AGOK,
+	AHISTORY,
+	AHLT,
+	AIDIVB,
+	AIDIVL,
+	AIDIVW,
+	AIMULB,
+	AIMULL,
+	AIMULW,
+	AINB,
+	AINL,
+	AINW,
+	AINCB,
+	AINCL,
+	AINCQ,
+	AINCW,
+	AINSB,
+	AINSL,
+	AINSW,
+	AINT,
+	AINTO,
+	AIRETL,
+	AIRETW,
+	AJCC,
+	AJCS,
+	AJCXZ,
+	AJEQ,
+	AJGE,
+	AJGT,
+	AJHI,
+	AJLE,
+	AJLS,
+	AJLT,
+	AJMI,
+	AJMP,
+	AJNE,
+	AJOC,
+	AJOS,
+	AJPC,
+	AJPL,
+	AJPS,
+	ALAHF,
+	ALARL,
+	ALARW,
+	ALEAL,
+	ALEAW,
+	ALEAVEL,
+	ALEAVEW,
+	ALOCK,
+	ALODSB,
+	ALODSL,
+	ALODSW,
+	ALONG,
+	ALOOP,
+	ALOOPEQ,
+	ALOOPNE,
+	ALSLL,
+	ALSLW,
+	AMOVB,
+	AMOVL,
+	AMOVW,
+	AMOVBLSX,
+	AMOVBLZX,
+	AMOVBQSX,
+	AMOVBQZX,
+	AMOVBWSX,
+	AMOVBWZX,
+	AMOVWLSX,
+	AMOVWLZX,
+	AMOVWQSX,
+	AMOVWQZX,
+	AMOVSB,
+	AMOVSL,
+	AMOVSW,
+	AMULB,
+	AMULL,
+	AMULW,
+	ANAME,
+	ANEGB,
+	ANEGL,
+	ANEGW,
+	ANOP,
+	ANOTB,
+	ANOTL,
+	ANOTW,
+	AORB,
+	AORL,
+	AORW,
+	AOUTB,
+	AOUTL,
+	AOUTW,
+	AOUTSB,
+	AOUTSL,
+	AOUTSW,
+	APOPAL,
+	APOPAW,
+	APOPFL,
+	APOPFW,
+	APOPL,
+	APOPW,
+	APUSHAL,
+	APUSHAW,
+	APUSHFL,
+	APUSHFW,
+	APUSHL,
+	APUSHW,
+	ARCLB,
+	ARCLL,
+	ARCLW,
+	ARCRB,
+	ARCRL,
+	ARCRW,
+	AREP,
+	AREPN,
+	ARET,
+	AROLB,
+	AROLL,
+	AROLW,
+	ARORB,
+	ARORL,
+	ARORW,
+	ASAHF,
+	ASALB,
+	ASALL,
+	ASALW,
+	ASARB,
+	ASARL,
+	ASARW,
+	ASBBB,
+	ASBBL,
+	ASBBW,
+	ASCASB,
+	ASCASL,
+	ASCASW,
+	ASETCC,
+	ASETCS,
+	ASETEQ,
+	ASETGE,
+	ASETGT,
+	ASETHI,
+	ASETLE,
+	ASETLS,
+	ASETLT,
+	ASETMI,
+	ASETNE,
+	ASETOC,
+	ASETOS,
+	ASETPC,
+	ASETPL,
+	ASETPS,
+	ACDQ,
+	ACWD,
+	ASHLB,
+	ASHLL,
+	ASHLW,
+	ASHRB,
+	ASHRL,
+	ASHRW,
+	ASTC,
+	ASTD,
+	ASTI,
+	ASTOSB,
+	ASTOSL,
+	ASTOSW,
+	ASUBB,
+	ASUBL,
+	ASUBW,
+	ASYSCALL,
+	ATESTB,
+	ATESTL,
+	ATESTW,
+	ATEXT,
+	AVERR,
+	AVERW,
+	AWAIT,
+	AWORD,
+	AXCHGB,
+	AXCHGL,
+	AXCHGW,
+	AXLAT,
+	AXORB,
+	AXORL,
+	AXORW,
+
+	AFMOVB,
+	AFMOVBP,
+	AFMOVD,
+	AFMOVDP,
+	AFMOVF,
+	AFMOVFP,
+	AFMOVL,
+	AFMOVLP,
+	AFMOVV,
+	AFMOVVP,
+	AFMOVW,
+	AFMOVWP,
+	AFMOVX,
+	AFMOVXP,
+
+	AFCOMB,
+	AFCOMBP,
+	AFCOMD,
+	AFCOMDP,
+	AFCOMDPP,
+	AFCOMF,
+	AFCOMFP,
+	AFCOML,
+	AFCOMLP,
+	AFCOMW,
+	AFCOMWP,
+	AFUCOM,
+	AFUCOMP,
+	AFUCOMPP,
+
+	AFADDDP,
+	AFADDW,
+	AFADDL,
+	AFADDF,
+	AFADDD,
+
+	AFMULDP,
+	AFMULW,
+	AFMULL,
+	AFMULF,
+	AFMULD,
+
+	AFSUBDP,
+	AFSUBW,
+	AFSUBL,
+	AFSUBF,
+	AFSUBD,
+
+	AFSUBRDP,
+	AFSUBRW,
+	AFSUBRL,
+	AFSUBRF,
+	AFSUBRD,
+
+	AFDIVDP,
+	AFDIVW,
+	AFDIVL,
+	AFDIVF,
+	AFDIVD,
+
+	AFDIVRDP,
+	AFDIVRW,
+	AFDIVRL,
+	AFDIVRF,
+	AFDIVRD,
+
+	AFXCHD,
+	AFFREE,
+
+	AFLDCW,
+	AFLDENV,
+	AFRSTOR,
+	AFSAVE,
+	AFSTCW,
+	AFSTENV,
+	AFSTSW,
+
+	AF2XM1,
+	AFABS,
+	AFCHS,
+	AFCLEX,
+	AFCOS,
+	AFDECSTP,
+	AFINCSTP,
+	AFINIT,
+	AFLD1,
+	AFLDL2E,
+	AFLDL2T,
+	AFLDLG2,
+	AFLDLN2,
+	AFLDPI,
+	AFLDZ,
+	AFNOP,
+	AFPATAN,
+	AFPREM,
+	AFPREM1,
+	AFPTAN,
+	AFRNDINT,
+	AFSCALE,
+	AFSIN,
+	AFSINCOS,
+	AFSQRT,
+	AFTST,
+	AFXAM,
+	AFXTRACT,
+	AFYL2X,
+	AFYL2XP1,
+
+	AEND,
+
+	ADYNT,
+	AINIT,
+
+	ASIGNAME,
+
+	/* extra 32-bit operations */
+	ACMPXCHGB,
+	ACMPXCHGL,
+	ACMPXCHGW,
+	ACMPXCHG8B,
+	ACPUID,
+	AINVD,
+	AINVLPG,
+	ALFENCE,
+	AMFENCE,
+	AMOVNTIL,
+	ARDMSR,
+	ARDPMC,
+	ARDTSC,
+	ARSM,
+	ASFENCE,
+	ASYSRET,
+	AWBINVD,
+	AWRMSR,
+	AXADDB,
+	AXADDL,
+	AXADDW,
+
+	/* conditional move */
+	ACMOVLCC,
+	ACMOVLCS,
+	ACMOVLEQ,
+	ACMOVLGE,
+	ACMOVLGT,
+	ACMOVLHI,
+	ACMOVLLE,
+	ACMOVLLS,
+	ACMOVLLT,
+	ACMOVLMI,
+	ACMOVLNE,
+	ACMOVLOC,
+	ACMOVLOS,
+	ACMOVLPC,
+	ACMOVLPL,
+	ACMOVLPS,
+	ACMOVQCC,
+	ACMOVQCS,
+	ACMOVQEQ,
+	ACMOVQGE,
+	ACMOVQGT,
+	ACMOVQHI,
+	ACMOVQLE,
+	ACMOVQLS,
+	ACMOVQLT,
+	ACMOVQMI,
+	ACMOVQNE,
+	ACMOVQOC,
+	ACMOVQOS,
+	ACMOVQPC,
+	ACMOVQPL,
+	ACMOVQPS,
+	ACMOVWCC,
+	ACMOVWCS,
+	ACMOVWEQ,
+	ACMOVWGE,
+	ACMOVWGT,
+	ACMOVWHI,
+	ACMOVWLE,
+	ACMOVWLS,
+	ACMOVWLT,
+	ACMOVWMI,
+	ACMOVWNE,
+	ACMOVWOC,
+	ACMOVWOS,
+	ACMOVWPC,
+	ACMOVWPL,
+	ACMOVWPS,
+
+	/* 64-bit */
+	AADCQ,
+	AADDQ,
+	AANDQ,
+	ABSFQ,
+	ABSRQ,
+	ABTCQ,
+	ABTQ,
+	ABTRQ,
+	ABTSQ,
+	ACMPQ,
+	ACMPSQ,
+	ACMPXCHGQ,
+	ACQO,
+	ADIVQ,
+	AIDIVQ,
+	AIMULQ,
+	AIRETQ,
+	ALEAQ,
+	ALEAVEQ,
+	ALODSQ,
+	AMOVQ,
+	AMOVLQSX,
+	AMOVLQZX,
+	AMOVNTIQ,
+	AMOVSQ,
+	AMULQ,
+	ANEGQ,
+	ANOTQ,
+	AORQ,
+	APOPFQ,
+	APOPQ,
+	APUSHFQ,
+	APUSHQ,
+	ARCLQ,
+	ARCRQ,
+	AROLQ,
+	ARORQ,
+	AQUAD,
+	ASALQ,
+	ASARQ,
+	ASBBQ,
+	ASCASQ,
+	ASHLQ,
+	ASHRQ,
+	ASTOSQ,
+	ASUBQ,
+	ATESTQ,
+	AXADDQ,
+	AXCHGQ,
+	AXORQ,
+
+	/* media */
+	AADDPD,
+	AADDPS,
+	AADDSD,
+	AADDSS,
+	AANDNPD,
+	AANDNPS,
+	AANDPD,
+	AANDPS,
+	ACMPPD,
+	ACMPPS,
+	ACMPSD,
+	ACMPSS,
+	ACOMISD,
+	ACOMISS,
+	ACVTPD2PL,
+	ACVTPD2PS,
+	ACVTPL2PD,
+	ACVTPL2PS,
+	ACVTPS2PD,
+	ACVTPS2PL,
+	ACVTSD2SL,
+	ACVTSD2SQ,
+	ACVTSD2SS,
+	ACVTSL2SD,
+	ACVTSL2SS,
+	ACVTSQ2SD,
+	ACVTSQ2SS,
+	ACVTSS2SD,
+	ACVTSS2SL,
+	ACVTSS2SQ,
+	ACVTTPD2PL,
+	ACVTTPS2PL,
+	ACVTTSD2SL,
+	ACVTTSD2SQ,
+	ACVTTSS2SL,
+	ACVTTSS2SQ,
+	ADIVPD,
+	ADIVPS,
+	ADIVSD,
+	ADIVSS,
+	AEMMS,
+	AFXRSTOR,
+	AFXRSTOR64,
+	AFXSAVE,
+	AFXSAVE64,
+	ALDMXCSR,
+	AMASKMOVOU,
+	AMASKMOVQ,
+	AMAXPD,
+	AMAXPS,
+	AMAXSD,
+	AMAXSS,
+	AMINPD,
+	AMINPS,
+	AMINSD,
+	AMINSS,
+	AMOVAPD,
+	AMOVAPS,
+	AMOVOU,
+	AMOVHLPS,
+	AMOVHPD,
+	AMOVHPS,
+	AMOVLHPS,
+	AMOVLPD,
+	AMOVLPS,
+	AMOVMSKPD,
+	AMOVMSKPS,
+	AMOVNTO,
+	AMOVNTPD,
+	AMOVNTPS,
+	AMOVNTQ,
+	AMOVO,
+	AMOVQOZX,
+	AMOVSD,
+	AMOVSS,
+	AMOVUPD,
+	AMOVUPS,
+	AMULPD,
+	AMULPS,
+	AMULSD,
+	AMULSS,
+	AORPD,
+	AORPS,
+	APACKSSLW,
+	APACKSSWB,
+	APACKUSWB,
+	APADDB,
+	APADDL,
+	APADDQ,
+	APADDSB,
+	APADDSW,
+	APADDUSB,
+	APADDUSW,
+	APADDW,
+	APANDB,
+	APANDL,
+	APANDSB,
+	APANDSW,
+	APANDUSB,
+	APANDUSW,
+	APANDW,
+	APAND,
+	APANDN,
+	APAVGB,
+	APAVGW,
+	APCMPEQB,
+	APCMPEQL,
+	APCMPEQW,
+	APCMPGTB,
+	APCMPGTL,
+	APCMPGTW,
+	APEXTRW,
+	APFACC,
+	APFADD,
+	APFCMPEQ,
+	APFCMPGE,
+	APFCMPGT,
+	APFMAX,
+	APFMIN,
+	APFMUL,
+	APFNACC,
+	APFPNACC,
+	APFRCP,
+	APFRCPIT1,
+	APFRCPI2T,
+	APFRSQIT1,
+	APFRSQRT,
+	APFSUB,
+	APFSUBR,
+	APINSRW,
+	APMADDWL,
+	APMAXSW,
+	APMAXUB,
+	APMINSW,
+	APMINUB,
+	APMOVMSKB,
+	APMULHRW,
+	APMULHUW,
+	APMULHW,
+	APMULLW,
+	APMULULQ,
+	APOR,
+	APSADBW,
+	APSHUFHW,
+	APSHUFL,
+	APSHUFLW,
+	APSHUFW,
+	APSLLO,
+	APSLLL,
+	APSLLQ,
+	APSLLW,
+	APSRAL,
+	APSRAW,
+	APSRLO,
+	APSRLL,
+	APSRLQ,
+	APSRLW,
+	APSUBB,
+	APSUBL,
+	APSUBQ,
+	APSUBSB,
+	APSUBSW,
+	APSUBUSB,
+	APSUBUSW,
+	APSUBW,
+	APSWAPL,
+	APUNPCKHBW,
+	APUNPCKHLQ,
+	APUNPCKHQDQ,
+	APUNPCKHWL,
+	APUNPCKLBW,
+	APUNPCKLLQ,
+	APUNPCKLQDQ,
+	APUNPCKLWL,
+	APXOR,
+	ARCPPS,
+	ARCPSS,
+	ARSQRTPS,
+	ARSQRTSS,
+	ASHUFPD,
+	ASHUFPS,
+	ASQRTPD,
+	ASQRTPS,
+	ASQRTSD,
+	ASQRTSS,
+	ASTMXCSR,
+	ASUBPD,
+	ASUBPS,
+	ASUBSD,
+	ASUBSS,
+	AUCOMISD,
+	AUCOMISS,
+	AUNPCKHPD,
+	AUNPCKHPS,
+	AUNPCKLPD,
+	AUNPCKLPS,
+	AXORPD,
+	AXORPS,
+
+	APF2IW,
+	APF2IL,
+	API2FW,
+	API2FL,
+	ARETFW,
+	ARETFL,
+	ARETFQ,
+	ASWAPGS,
+
+	AMODE,
+
+	ALAST
+};
+
+enum
+{
+
+	D_AL		= 0,
+	D_CL,
+	D_DL,
+	D_BL,
+	D_SPB,
+	D_BPB,
+	D_SIB,
+	D_DIB,
+	D_R8B,
+	D_R9B,
+	D_R10B,
+	D_R11B,
+	D_R12B,
+	D_R13B,
+	D_R14B,
+	D_R15B,
+
+	D_AX		= 16,
+	D_CX,
+	D_DX,
+	D_BX,
+	D_SP,
+	D_BP,
+	D_SI,
+	D_DI,
+	D_R8,
+	D_R9,
+	D_R10,
+	D_R11,
+	D_R12,
+	D_R13,
+	D_R14,
+	D_R15,
+
+	D_AH		= 32,
+	D_CH,
+	D_DH,
+	D_BH,
+
+	D_F0		= 36,
+
+	D_M0		= 44,
+
+	D_X0		= 52,
+
+	D_CS		= 68,
+	D_SS,
+	D_DS,
+	D_ES,
+	D_FS,
+	D_GS,
+
+	D_GDTR,		/* global descriptor table register */
+	D_IDTR,		/* interrupt descriptor table register */
+	D_LDTR,		/* local descriptor table register */
+	D_MSW,		/* machine status word */
+	D_TASK,		/* task register */
+
+	D_CR		= 79,
+	D_DR		= 95,
+	D_TR		= 103,
+
+	D_NONE		= 111,
+
+	D_BRANCH	= 112,
+	D_EXTERN	= 113,
+	D_STATIC		= 114,
+	D_AUTO		= 115,
+	D_PARAM		= 116,
+	D_CONST		= 117,
+	D_FCONST	= 118,
+	D_SCONST	= 119,
+	D_ADDR		= 120,
+
+	D_FILE,
+	D_FILE1,
+
+	D_INDIR,	/* additive */
+
+	T_TYPE		= 1<<0,
+	T_INDEX		= 1<<1,
+	T_OFFSET	= 1<<2,
+	T_FCONST	= 1<<3,
+	T_SYM		= 1<<4,
+	T_SCONST	= 1<<5,
+	T_64	= 1<<6,
+
+	REGARG		= D_BP,	/* MIGHT CHANGE */
+	REGRET		= D_AX,
+	FREGRET		= D_X0,
+	REGSP		= D_SP,
+	REGTMP		= D_DI,
+	REGEXT		= D_R15,	/* compiler allocates external registers R15 down */
+	FREGMIN		= D_X0+5,	/* first register variable */
+	FREGEXT		= D_X0+7	/* first external register */
+};
+
+/*
+ * this is the ranlib header
+ */
+#define	SYMDEF	"__.SYMDEF"
+
+/*
+ * this is the simulated IEEE floating point
+ */
+typedef	struct	ieee	Ieee;
+struct	ieee
+{
+	long	l;	/* contains ls-man	0xffffffff */
+	long	h;	/* contains sign	0x80000000
+				    exp		0x7ff00000
+				    ms-man	0x000fffff */
+};

+ 1942 - 0
sys/src/cmd/6c/cgen.c

@@ -0,0 +1,1942 @@
+#include "gc.h"
+
+/* ,x/^(print|prtree)\(/i/\/\/ */
+int castup(Type*, Type*);
+
+void
+cgen(Node *n, Node *nn)
+{
+	Node *l, *r, *t;
+	Prog *p1;
+	Node nod, nod1, nod2, nod3, nod4;
+	int o, hardleft;
+	long v, curs;
+	vlong c;
+
+	if(debug['g']) {
+		prtree(nn, "cgen lhs");
+		prtree(n, "cgen");
+	}
+	if(n == Z || n->type == T)
+		return;
+	if(typesu[n->type->etype]) {
+		sugen(n, nn, n->type->width);
+		return;
+	}
+	l = n->left;
+	r = n->right;
+	o = n->op;
+	if(n->addable >= INDEXED) {
+		if(nn == Z) {
+			switch(o) {
+			default:
+				nullwarn(Z, Z);
+				break;
+			case OINDEX:
+				nullwarn(l, r);
+				break;
+			}
+			return;
+		}
+		gmove(n, nn);
+		return;
+	}
+	curs = cursafe;
+
+	if(l->complex >= FNX)
+	if(r != Z && r->complex >= FNX)
+	switch(o) {
+	default:
+		if(cond(o) && typesu[l->type->etype])
+			break;
+
+		regret(&nod, r);
+		cgen(r, &nod);
+
+		regsalloc(&nod1, r);
+		gmove(&nod, &nod1);
+
+		regfree(&nod);
+		nod = *n;
+		nod.right = &nod1;
+
+		cgen(&nod, nn);
+		return;
+
+	case OFUNC:
+	case OCOMMA:
+	case OANDAND:
+	case OOROR:
+	case OCOND:
+	case ODOT:
+		break;
+	}
+
+	hardleft = l->addable < INDEXED || l->complex >= FNX;
+	switch(o) {
+	default:
+		diag(n, "unknown op in cgen: %O", o);
+		break;
+
+	case ONEG:
+	case OCOM:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		regalloc(&nod, l, nn);
+		cgen(l, &nod);
+		gopcode(o, n->type, Z, &nod);
+		gmove(&nod, nn);
+		regfree(&nod);
+		break;
+
+	case OAS:
+		if(l->op == OBIT)
+			goto bitas;
+		if(!hardleft) {
+			if(nn != Z || r->addable < INDEXED || hardconst(r)) {
+				if(r->complex >= FNX && nn == Z)
+					regret(&nod, r);
+				else
+					regalloc(&nod, r, nn);
+				cgen(r, &nod);
+				gmove(&nod, l);
+				if(nn != Z)
+					gmove(&nod, nn);
+				regfree(&nod);
+			} else
+				gmove(r, l);
+			break;
+		}
+		if(l->complex >= r->complex) {
+			if(l->op == OINDEX && immconst(r)) {
+				gmove(r, l);
+				break;
+			}
+			reglcgen(&nod1, l, Z);
+			if(r->addable >= INDEXED && !hardconst(r)) {
+				gmove(r, &nod1);
+				if(nn != Z)
+					gmove(r, nn);
+				regfree(&nod1);
+				break;
+			}
+			regalloc(&nod, r, nn);
+			cgen(r, &nod);
+		} else {
+			regalloc(&nod, r, nn);
+			cgen(r, &nod);
+			reglcgen(&nod1, l, Z);
+		}
+		gmove(&nod, &nod1);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	bitas:
+		n = l->left;
+		regalloc(&nod, r, nn);
+		if(l->complex >= r->complex) {
+			reglcgen(&nod1, n, Z);
+			cgen(r, &nod);
+		} else {
+			cgen(r, &nod);
+			reglcgen(&nod1, n, Z);
+		}
+		regalloc(&nod2, n, Z);
+		gmove(&nod1, &nod2);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+
+	case OBIT:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		bitload(n, &nod, Z, Z, nn);
+		gmove(&nod, nn);
+		regfree(&nod);
+		break;
+
+	case OLSHR:
+	case OASHL:
+	case OASHR:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		if(r->op == OCONST) {
+			if(r->vconst == 0) {
+				cgen(l, nn);
+				break;
+			}
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			if(o == OASHL && r->vconst == 1)
+				gopcode(OADD, n->type, &nod, &nod);
+			else
+				gopcode(o, n->type, r, &nod);
+			gmove(&nod, nn);
+			regfree(&nod);
+			break;
+		}
+
+		/*
+		 * get nod to be D_CX
+		 */
+		if(nodreg(&nod, nn, D_CX)) {
+			regsalloc(&nod1, n);
+			gmove(&nod, &nod1);
+			cgen(n, &nod);		/* probably a bug */
+			gmove(&nod, nn);
+			gmove(&nod1, &nod);
+			break;
+		}
+		reg[D_CX]++;
+		if(nn->op == OREGISTER && nn->reg == D_CX)
+			regalloc(&nod1, l, Z);
+		else
+			regalloc(&nod1, l, nn);
+		if(r->complex >= l->complex) {
+			cgen(r, &nod);
+			cgen(l, &nod1);
+		} else {
+			cgen(l, &nod1);
+			cgen(r, &nod);
+		}
+		gopcode(o, n->type, &nod, &nod1);
+		gmove(&nod1, nn);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	case OADD:
+	case OSUB:
+	case OOR:
+	case OXOR:
+	case OAND:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		if(typefd[n->type->etype])
+			goto fop;
+		if(r->op == OCONST) {
+			if(r->vconst == 0 && o != OAND) {
+				cgen(l, nn);
+				break;
+			}
+		}
+		if(n->op == OADD && l->op == OASHL && l->right->op == OCONST
+		&& (r->op != OCONST || r->vconst < -128 || r->vconst > 127)) {
+			c = l->right->vconst;
+			if(c > 0 && c <= 3) {
+				if(l->left->complex >= r->complex) {
+					regalloc(&nod, l->left, nn);
+					cgen(l->left, &nod);
+					if(r->addable < INDEXED) {
+						regalloc(&nod1, r, Z);
+						cgen(r, &nod1);
+						genmuladd(&nod, &nod, 1 << c, &nod1);
+						regfree(&nod1);
+					}
+					else
+						genmuladd(&nod, &nod, 1 << c, r);
+				}
+				else {
+					regalloc(&nod, r, nn);
+					cgen(r, &nod);
+					regalloc(&nod1, l->left, Z);
+					cgen(l->left, &nod1);
+					genmuladd(&nod, &nod1, 1 << c, &nod);
+					regfree(&nod1);
+				}
+				gmove(&nod, nn);
+				regfree(&nod);
+				break;
+			}
+		}
+		if(r->addable >= INDEXED && !hardconst(r)) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			gopcode(o, n->type, r, &nod);
+			gmove(&nod, nn);
+			regfree(&nod);
+			break;
+		}
+		if(l->complex >= r->complex) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			regalloc(&nod1, r, Z);
+			cgen(r, &nod1);
+			gopcode(o, n->type, &nod1, &nod);
+		} else {
+			regalloc(&nod1, r, nn);
+			cgen(r, &nod1);
+			regalloc(&nod, l, Z);
+			cgen(l, &nod);
+			gopcode(o, n->type, &nod1, &nod);
+		}
+		gmove(&nod, nn);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	case OLMOD:
+	case OMOD:
+	case OLMUL:
+	case OLDIV:
+	case OMUL:
+	case ODIV:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		if(typefd[n->type->etype])
+			goto fop;
+		if(r->op == OCONST && typechl[n->type->etype]) {	/* TO DO */
+			SET(v);
+			switch(o) {
+			case ODIV:
+			case OMOD:
+				c = r->vconst;
+				if(c < 0)
+					c = -c;
+				v = log2(c);
+				if(v < 0)
+					break;
+				/* fall thru */
+			case OMUL:
+			case OLMUL:
+				regalloc(&nod, l, nn);
+				cgen(l, &nod);
+				switch(o) {
+				case OMUL:
+				case OLMUL:
+					mulgen(n->type, r, &nod);
+					break;
+				case ODIV:
+					sdiv2(r->vconst, v, l, &nod);
+					break;
+				case OMOD:
+					smod2(r->vconst, v, l, &nod);
+					break;
+				}
+				gmove(&nod, nn);
+				regfree(&nod);
+				goto done;
+			case OLDIV:
+				c = r->vconst;
+				if((c & 0x80000000) == 0)
+					break;
+				regalloc(&nod1, l, Z);
+				cgen(l, &nod1);
+				regalloc(&nod, l, nn);
+				zeroregm(&nod);
+				gins(ACMPL, &nod1, nodconst(c));
+				gins(ASBBL, nodconst(-1), &nod);
+				regfree(&nod1);
+				gmove(&nod, nn);
+				regfree(&nod);
+				goto done;
+			}
+		}
+
+		if(o == OMUL) {
+			if(l->addable >= INDEXED) {
+				t = l;
+				l = r;
+				r = t;
+			}
+			/* should favour AX */
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			if(r->addable < INDEXED || hardconst(r)) {
+				regalloc(&nod1, r, Z);
+				cgen(r, &nod1);
+				gopcode(OMUL, n->type, &nod1, &nod);
+				regfree(&nod1);
+			}else
+				gopcode(OMUL, n->type, r, &nod);	/* addressible */
+			gmove(&nod, nn);
+			regfree(&nod);
+			break;
+		}
+
+		/*
+		 * get nod to be D_AX
+		 * get nod1 to be D_DX
+		 */
+		if(nodreg(&nod, nn, D_AX)) {
+			regsalloc(&nod2, n);
+			gmove(&nod, &nod2);
+			v = reg[D_AX];
+			reg[D_AX] = 0;
+
+			if(isreg(l, D_AX)) {
+				nod3 = *n;
+				nod3.left = &nod2;
+				cgen(&nod3, nn);
+			} else
+			if(isreg(r, D_AX)) {
+				nod3 = *n;
+				nod3.right = &nod2;
+				cgen(&nod3, nn);
+			} else
+				cgen(n, nn);
+
+			gmove(&nod2, &nod);
+			reg[D_AX] = v;
+			break;
+		}
+		if(nodreg(&nod1, nn, D_DX)) {
+			regsalloc(&nod2, n);
+			gmove(&nod1, &nod2);
+			v = reg[D_DX];
+			reg[D_DX] = 0;
+
+			if(isreg(l, D_DX)) {
+				nod3 = *n;
+				nod3.left = &nod2;
+				cgen(&nod3, nn);
+			} else
+			if(isreg(r, D_DX)) {
+				nod3 = *n;
+				nod3.right = &nod2;
+				cgen(&nod3, nn);
+			} else
+				cgen(n, nn);
+
+			gmove(&nod2, &nod1);
+			reg[D_DX] = v;
+			break;
+		}
+		reg[D_AX]++;
+
+		if(r->op == OCONST && (o == ODIV || o == OLDIV) && immconst(r) && typechl[r->type->etype]) {
+			reg[D_DX]++;
+			if(l->addable < INDEXED) {
+				regalloc(&nod2, l, Z);
+				cgen(l, &nod2);
+				l = &nod2;
+			}
+			if(o == ODIV)
+				sdivgen(l, r, &nod, &nod1);
+			else
+				udivgen(l, r, &nod, &nod1);
+			gmove(&nod1, nn);
+			if(l == &nod2)
+				regfree(l);
+			goto freeaxdx;
+		}
+
+		if(l->complex >= r->complex) {
+			cgen(l, &nod);
+			reg[D_DX]++;
+			if(o == ODIV || o == OMOD)
+				gins(typechl[l->type->etype]? ACDQ: ACQO, Z, Z);
+			if(o == OLDIV || o == OLMOD)
+				zeroregm(&nod1);
+			if(r->addable < INDEXED || r->op == OCONST) {
+				regalloc(&nod3, r, Z);
+				cgen(r, &nod3);
+				gopcode(o, n->type, &nod3, Z);
+				regfree(&nod3);
+			} else
+				gopcode(o, n->type, r, Z);
+		} else {
+			regsalloc(&nod3, r);
+			cgen(r, &nod3);
+			cgen(l, &nod);
+			reg[D_DX]++;
+			if(o == ODIV || o == OMOD)
+				gins(typechl[l->type->etype]? ACDQ: ACQO, Z, Z);
+			if(o == OLDIV || o == OLMOD)
+				zeroregm(&nod1);
+			gopcode(o, n->type, &nod3, Z);
+		}
+		if(o == OMOD || o == OLMOD)
+			gmove(&nod1, nn);
+		else
+			gmove(&nod, nn);
+	freeaxdx:
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	case OASLSHR:
+	case OASASHL:
+	case OASASHR:
+		if(r->op == OCONST)
+			goto asand;
+		if(l->op == OBIT)
+			goto asbitop;
+		if(typefd[n->type->etype])
+			goto asand;	/* can this happen? */
+
+		/*
+		 * get nod to be D_CX
+		 */
+		if(nodreg(&nod, nn, D_CX)) {
+			regsalloc(&nod1, n);
+			gmove(&nod, &nod1);
+			cgen(n, &nod);
+			if(nn != Z)
+				gmove(&nod, nn);
+			gmove(&nod1, &nod);
+			break;
+		}
+		reg[D_CX]++;
+
+		if(r->complex >= l->complex) {
+			cgen(r, &nod);
+			if(hardleft)
+				reglcgen(&nod1, l, Z);
+			else
+				nod1 = *l;
+		} else {
+			if(hardleft)
+				reglcgen(&nod1, l, Z);
+			else
+				nod1 = *l;
+			cgen(r, &nod);
+		}
+
+		gopcode(o, l->type, &nod, &nod1);
+		regfree(&nod);
+		if(nn != Z)
+			gmove(&nod1, nn);
+		if(hardleft)
+			regfree(&nod1);
+		break;
+
+	case OASAND:
+	case OASADD:
+	case OASSUB:
+	case OASXOR:
+	case OASOR:
+	asand:
+		if(l->op == OBIT)
+			goto asbitop;
+		if(typefd[l->type->etype] || typefd[r->type->etype])
+			goto asfop;
+		if(l->complex >= r->complex) {
+			if(hardleft)
+				reglcgen(&nod, l, Z);
+			else
+				nod = *l;
+			if(!immconst(r)) {
+				regalloc(&nod1, r, nn);
+				cgen(r, &nod1);
+				gopcode(o, l->type, &nod1, &nod);
+				regfree(&nod1);
+			} else
+				gopcode(o, l->type, r, &nod);
+		} else {
+			regalloc(&nod1, r, nn);
+			cgen(r, &nod1);
+			if(hardleft)
+				reglcgen(&nod, l, Z);
+			else
+				nod = *l;
+			gopcode(o, l->type, &nod1, &nod);
+			regfree(&nod1);
+		}
+		if(nn != Z)
+			gmove(&nod, nn);
+		if(hardleft)
+			regfree(&nod);
+		break;
+
+	asfop:
+		if(l->complex >= r->complex) {
+			if(hardleft)
+				reglcgen(&nod, l, Z);
+			else
+				nod = *l;
+			if(r->addable < INDEXED){
+				regalloc(&nod1, r, nn);
+				cgen(r, &nod1);
+			}else
+				nod1 = *r;
+			regalloc(&nod2, r, Z);
+			gmove(&nod, &nod2);
+			gopcode(o, r->type, &nod1, &nod2);
+			gmove(&nod2, &nod);
+			regfree(&nod2);
+			if(r->addable < INDEXED)
+				regfree(&nod1);
+		} else {
+			regalloc(&nod1, r, nn);
+			cgen(r, &nod1);
+			if(hardleft)
+				reglcgen(&nod, l, Z);
+			else
+				nod = *l;
+			if(o != OASMUL && o != OASADD) {
+				regalloc(&nod2, r, Z);
+				gmove(&nod, &nod2);
+				gopcode(o, r->type, &nod1, &nod2);
+				regfree(&nod1);
+				gmove(&nod2, &nod);
+				regfree(&nod2);
+			} else {
+				gopcode(o, r->type, &nod, &nod1);
+				gmove(&nod1, &nod);
+				regfree(&nod1);
+			}
+		}
+		if(nn != Z)
+			gmove(&nod, nn);
+		if(hardleft)
+			regfree(&nod);
+		break;
+
+	case OASLMUL:
+	case OASLDIV:
+	case OASLMOD:
+	case OASMUL:
+	case OASDIV:
+	case OASMOD:
+		if(l->op == OBIT)
+			goto asbitop;
+		if(typefd[n->type->etype] || typefd[r->type->etype])
+			goto asfop;
+		if(r->op == OCONST && typechl[n->type->etype]) {
+			SET(v);
+			switch(o) {
+			case OASDIV:
+			case OASMOD:
+				c = r->vconst;
+				if(c < 0)
+					c = -c;
+				v = log2(c);
+				if(v < 0)
+					break;
+				/* fall thru */
+			case OASMUL:
+			case OASLMUL:
+				if(hardleft)
+					reglcgen(&nod2, l, Z);
+				else
+					nod2 = *l;
+				regalloc(&nod, l, nn);
+				cgen(&nod2, &nod);
+				switch(o) {
+				case OASMUL:
+				case OASLMUL:
+					mulgen(n->type, r, &nod);
+					break;
+				case OASDIV:
+					sdiv2(r->vconst, v, l, &nod);
+					break;
+				case OASMOD:
+					smod2(r->vconst, v, l, &nod);
+					break;
+				}
+			havev:
+				gmove(&nod, &nod2);
+				if(nn != Z)
+					gmove(&nod, nn);
+				if(hardleft)
+					regfree(&nod2);
+				regfree(&nod);
+				goto done;
+			case OASLDIV:
+				c = r->vconst;
+				if((c & 0x80000000) == 0)
+					break;
+				if(hardleft)
+					reglcgen(&nod2, l, Z);
+				else
+					nod2 = *l;
+				regalloc(&nod1, l, nn);
+				cgen(&nod2, &nod1);
+				regalloc(&nod, l, nn);
+				zeroregm(&nod);
+				gins(ACMPL, &nod1, nodconst(c));
+				gins(ASBBL, nodconst(-1), &nod);
+				regfree(&nod1);
+				goto havev;
+			}
+		}
+
+		if(o == OASMUL) {
+			/* should favour AX */
+			regalloc(&nod, l, nn);
+			if(r->complex >= FNX) {
+				regalloc(&nod1, r, Z);
+				cgen(r, &nod1);
+				r = &nod1;
+			}
+			if(hardleft)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+			cgen(&nod2, &nod);
+			if(r->addable < INDEXED || hardconst(r)) {
+				if(r->complex < FNX) {
+					regalloc(&nod1, r, Z);
+					cgen(r, &nod1);
+				}
+				gopcode(OASMUL, n->type, &nod1, &nod);
+				regfree(&nod1);
+			}
+			else
+				gopcode(OASMUL, n->type, r, &nod);
+			if(r == &nod1)
+				regfree(r);
+			gmove(&nod, &nod2);
+			if(nn != Z)
+				gmove(&nod, nn);
+			regfree(&nod);
+			if(hardleft)
+				regfree(&nod2);
+			break;
+		}
+
+		/*
+		 * get nod to be D_AX
+		 * get nod1 to be D_DX
+		 */
+		if(nodreg(&nod, nn, D_AX)) {
+			regsalloc(&nod2, n);
+			gmove(&nod, &nod2);
+			v = reg[D_AX];
+			reg[D_AX] = 0;
+
+			if(isreg(l, D_AX)) {
+				nod3 = *n;
+				nod3.left = &nod2;
+				cgen(&nod3, nn);
+			} else
+			if(isreg(r, D_AX)) {
+				nod3 = *n;
+				nod3.right = &nod2;
+				cgen(&nod3, nn);
+			} else
+				cgen(n, nn);
+
+			gmove(&nod2, &nod);
+			reg[D_AX] = v;
+			break;
+		}
+		if(nodreg(&nod1, nn, D_DX)) {
+			regsalloc(&nod2, n);
+			gmove(&nod1, &nod2);
+			v = reg[D_DX];
+			reg[D_DX] = 0;
+
+			if(isreg(l, D_DX)) {
+				nod3 = *n;
+				nod3.left = &nod2;
+				cgen(&nod3, nn);
+			} else
+			if(isreg(r, D_DX)) {
+				nod3 = *n;
+				nod3.right = &nod2;
+				cgen(&nod3, nn);
+			} else
+				cgen(n, nn);
+
+			gmove(&nod2, &nod1);
+			reg[D_DX] = v;
+			break;
+		}
+		reg[D_AX]++;
+		reg[D_DX]++;
+
+		if(l->complex >= r->complex) {
+			if(hardleft)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+			cgen(&nod2, &nod);
+			if(r->op == OCONST && typechl[r->type->etype]) {
+				switch(o) {
+				case OASDIV:
+					sdivgen(&nod2, r, &nod, &nod1);
+					goto divdone;
+				case OASLDIV:
+					udivgen(&nod2, r, &nod, &nod1);
+				divdone:
+					gmove(&nod1, &nod2);
+					if(nn != Z)
+						gmove(&nod1, nn);
+					goto freelxaxdx;
+				}
+			}
+			if(o == OASDIV || o == OASMOD)
+				gins(typechl[l->type->etype]? ACDQ: ACQO, Z, Z);
+			if(o == OASLDIV || o == OASLMOD)
+				zeroregm(&nod1);
+			if(r->addable < INDEXED || r->op == OCONST ||
+			   !typeil[r->type->etype]) {
+				regalloc(&nod3, r, Z);
+				cgen(r, &nod3);
+				gopcode(o, l->type, &nod3, Z);
+				regfree(&nod3);
+			} else
+				gopcode(o, n->type, r, Z);
+		} else {
+			regalloc(&nod3, r, Z);
+			cgen(r, &nod3);
+			if(hardleft)
+				reglcgen(&nod2, l, Z);
+			else
+				nod2 = *l;
+			cgen(&nod2, &nod);
+			if(o == OASDIV || o == OASMOD)
+				gins(typechl[l->type->etype]? ACDQ: ACQO, Z, Z);
+			if(o == OASLDIV || o == OASLMOD)
+				zeroregm(&nod1);
+			gopcode(o, l->type, &nod3, Z);
+			regfree(&nod3);
+		}
+		if(o == OASMOD || o == OASLMOD) {
+			gmove(&nod1, &nod2);
+			if(nn != Z)
+				gmove(&nod1, nn);
+		} else {
+			gmove(&nod, &nod2);
+			if(nn != Z)
+				gmove(&nod, nn);
+		}
+	freelxaxdx:
+		if(hardleft)
+			regfree(&nod2);
+		regfree(&nod);
+		regfree(&nod1);
+		break;
+
+	fop:
+		if(l->complex >= r->complex) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			if(r->addable < INDEXED) {
+				regalloc(&nod1, r, Z);
+				cgen(r, &nod1);
+				gopcode(o, n->type, &nod1, &nod);
+				regfree(&nod1);
+			} else
+				gopcode(o, n->type, r, &nod);
+		} else {
+			/* TO DO: could do better with r->addable >= INDEXED */
+			regalloc(&nod1, r, Z);
+			cgen(r, &nod1);
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			gopcode(o, n->type, &nod1, &nod);
+			regfree(&nod1);
+		}
+		gmove(&nod, nn);
+		regfree(&nod);
+		break;
+
+	asbitop:
+		regalloc(&nod4, n, nn);
+		if(l->complex >= r->complex) {
+			bitload(l, &nod, &nod1, &nod2, &nod4);
+			regalloc(&nod3, r, Z);
+			cgen(r, &nod3);
+		} else {
+			regalloc(&nod3, r, Z);
+			cgen(r, &nod3);
+			bitload(l, &nod, &nod1, &nod2, &nod4);
+		}
+		gmove(&nod, &nod4);
+
+		{	/* TO DO: check floating point source */
+			Node onod;
+
+			/* incredible grot ... */
+			onod = nod3;
+			onod.op = o;
+			onod.complex = 2;
+			onod.addable = 0;
+			onod.type = tfield;
+			onod.left = &nod4;
+			onod.right = &nod3;
+			cgen(&onod, Z);
+		}
+		regfree(&nod3);
+		gmove(&nod4, &nod);
+		regfree(&nod4);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+
+	case OADDR:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		lcgen(l, nn);
+		break;
+
+	case OFUNC:
+		if(l->complex >= FNX) {
+			if(l->op != OIND)
+				diag(n, "bad function call");
+
+			regret(&nod, l->left);
+			cgen(l->left, &nod);
+			regsalloc(&nod1, l->left);
+			gmove(&nod, &nod1);
+			regfree(&nod);
+
+			nod = *n;
+			nod.left = &nod2;
+			nod2 = *l;
+			nod2.left = &nod1;
+			nod2.complex = 1;
+			cgen(&nod, nn);
+
+			return;
+		}
+		o = reg[REGARG];
+		gargs(r, &nod, &nod1);
+		if(l->addable < INDEXED) {
+			reglcgen(&nod, l, nn);
+			nod.op = OREGISTER;
+			gopcode(OFUNC, n->type, Z, &nod);
+			regfree(&nod);
+		} else
+			gopcode(OFUNC, n->type, Z, l);
+		if(REGARG)
+			if(o != reg[REGARG])
+				reg[REGARG]--;
+		if(nn != Z) {
+			regret(&nod, n);
+			gmove(&nod, nn);
+			regfree(&nod);
+		}
+		break;
+
+	case OIND:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		regialloc(&nod, n, nn);
+		r = l;
+		while(r->op == OADD)
+			r = r->right;
+		if(sconst(r)) {
+			v = r->vconst;
+			r->vconst = 0;
+			cgen(l, &nod);
+			nod.xoffset += v;
+			r->vconst = v;
+		} else
+			cgen(l, &nod);
+		regind(&nod, n);
+		gmove(&nod, nn);
+		regfree(&nod);
+		break;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OLO:
+	case OLS:
+	case OHI:
+	case OHS:
+		if(nn == Z) {
+			nullwarn(l, r);
+			break;
+		}
+		boolgen(n, 1, nn);
+		break;
+
+	case OANDAND:
+	case OOROR:
+		boolgen(n, 1, nn);
+		if(nn == Z)
+			patch(p, pc);
+		break;
+
+	case ONOT:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		boolgen(n, 1, nn);
+		break;
+
+	case OCOMMA:
+		cgen(l, Z);
+		cgen(r, nn);
+		break;
+
+	case OCAST:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		/*
+		 * convert from types l->n->nn
+		 */
+		if(nocast(l->type, n->type) && nocast(n->type, nn->type)) {
+			/* both null, gen l->nn */
+			cgen(l, nn);
+			break;
+		}
+		if(ewidth[n->type->etype] < ewidth[l->type->etype]){
+			if(l->type->etype == TIND && typechlp[n->type->etype])
+				warn(n, "conversion of pointer to shorter integer");
+		}else if(0){
+			if(nocast(n->type, nn->type) || castup(n->type, nn->type)){
+				if(typefd[l->type->etype] != typefd[nn->type->etype])
+					regalloc(&nod, l, nn);
+				else
+					regalloc(&nod, nn, nn);
+				cgen(l, &nod);
+				gmove(&nod, nn);
+				regfree(&nod);
+				break;
+			}
+		}
+		regalloc(&nod, l, nn);
+		cgen(l, &nod);
+		regalloc(&nod1, n, &nod);
+		gmove(&nod, &nod1);
+		gmove(&nod1, nn);
+		regfree(&nod1);
+		regfree(&nod);
+		break;
+
+	case ODOT:
+		sugen(l, nodrat, l->type->width);
+		if(nn == Z)
+			break;
+		warn(n, "non-interruptable temporary");
+		nod = *nodrat;
+		if(!r || r->op != OCONST) {
+			diag(n, "DOT and no offset");
+			break;
+		}
+		nod.xoffset += (long)r->vconst;
+		nod.type = n->type;
+		cgen(&nod, nn);
+		break;
+
+	case OCOND:
+		bcgen(l, 1);
+		p1 = p;
+		cgen(r->left, nn);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		cgen(r->right, nn);
+		patch(p1, pc);
+		break;
+
+	case OPOSTINC:
+	case OPOSTDEC:
+		v = 1;
+		if(l->type->etype == TIND)
+			v = l->type->link->width;
+		if(o == OPOSTDEC)
+			v = -v;
+		if(l->op == OBIT)
+			goto bitinc;
+		if(nn == Z)
+			goto pre;
+
+		if(hardleft)
+			reglcgen(&nod, l, Z);
+		else
+			nod = *l;
+
+		gmove(&nod, nn);
+		if(typefd[n->type->etype]) {
+			regalloc(&nod1, l, Z);
+			gmove(&nod, &nod1);
+			if(v < 0)
+				gopcode(OSUB, n->type, nodfconst(-v), &nod1);
+			else
+				gopcode(OADD, n->type, nodfconst(v), &nod1);
+			gmove(&nod1, &nod);
+			regfree(&nod1);
+		} else
+			gopcode(OADD, n->type, nodconst(v), &nod);
+		if(hardleft)
+			regfree(&nod);
+		break;
+
+	case OPREINC:
+	case OPREDEC:
+		v = 1;
+		if(l->type->etype == TIND)
+			v = l->type->link->width;
+		if(o == OPREDEC)
+			v = -v;
+		if(l->op == OBIT)
+			goto bitinc;
+
+	pre:
+		if(hardleft)
+			reglcgen(&nod, l, Z);
+		else
+			nod = *l;
+		if(typefd[n->type->etype]) {
+			regalloc(&nod1, l, Z);
+			gmove(&nod, &nod1);
+			if(v < 0)
+				gopcode(OSUB, n->type, nodfconst(-v), &nod1);
+			else
+				gopcode(OADD, n->type, nodfconst(v), &nod1);
+			gmove(&nod1, &nod);
+			regfree(&nod1);
+		} else
+			gopcode(OADD, n->type, nodconst(v), &nod);
+		if(nn != Z)
+			gmove(&nod, nn);
+		if(hardleft)
+			regfree(&nod);
+		break;
+
+	bitinc:
+		if(nn != Z && (o == OPOSTINC || o == OPOSTDEC)) {
+			bitload(l, &nod, &nod1, &nod2, Z);
+			gmove(&nod, nn);
+			gopcode(OADD, tfield, nodconst(v), &nod);
+			bitstore(l, &nod, &nod1, &nod2, Z);
+			break;
+		}
+		bitload(l, &nod, &nod1, &nod2, nn);
+		gopcode(OADD, tfield, nodconst(v), &nod);
+		bitstore(l, &nod, &nod1, &nod2, nn);
+		break;
+	}
+done:
+	cursafe = curs;
+}
+
+void
+reglcgen(Node *t, Node *n, Node *nn)
+{
+	Node *r;
+	long v;
+
+	regialloc(t, n, nn);
+	if(n->op == OIND) {
+		r = n->left;
+		while(r->op == OADD)
+			r = r->right;
+		if(sconst(r)) {
+			v = r->vconst;
+			r->vconst = 0;
+			lcgen(n, t);
+			t->xoffset += v;
+			r->vconst = v;
+			regind(t, n);
+			return;
+		}
+	}
+	lcgen(n, t);
+	regind(t, n);
+}
+
+void
+lcgen(Node *n, Node *nn)
+{
+	Prog *p1;
+	Node nod;
+
+	if(debug['g']) {
+		prtree(nn, "lcgen lhs");
+		prtree(n, "lcgen");
+	}
+	if(n == Z || n->type == T)
+		return;
+	if(nn == Z) {
+		nn = &nod;
+		regalloc(&nod, n, Z);
+	}
+	switch(n->op) {
+	default:
+		if(n->addable < INDEXED) {
+			diag(n, "unknown op in lcgen: %O", n->op);
+			break;
+		}
+		gopcode(OADDR, n->type, n, nn);
+		break;
+
+	case OCOMMA:
+		cgen(n->left, n->left);
+		lcgen(n->right, nn);
+		break;
+
+	case OIND:
+		cgen(n->left, nn);
+		break;
+
+	case OCOND:
+		bcgen(n->left, 1);
+		p1 = p;
+		lcgen(n->right->left, nn);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		lcgen(n->right->right, nn);
+		patch(p1, pc);
+		break;
+	}
+}
+
+void
+bcgen(Node *n, int true)
+{
+
+	if(n->type == T)
+		gbranch(OGOTO);
+	else
+		boolgen(n, true, Z);
+}
+
+void
+boolgen(Node *n, int true, Node *nn)
+{
+	int o;
+	Prog *p1, *p2;
+	Node *l, *r, nod, nod1;
+	long curs;
+
+	if(debug['g']) {
+		prtree(nn, "boolgen lhs");
+		prtree(n, "boolgen");
+	}
+	curs = cursafe;
+	l = n->left;
+	r = n->right;
+	switch(n->op) {
+
+	default:
+		o = ONE;
+		if(true)
+			o = OEQ;
+		/* bad, 13 is address of external that becomes constant */
+		if(n->addable >= INDEXED && n->addable != 13) {
+			if(typefd[n->type->etype]) {
+				regalloc(&nod1, n, Z);
+				gmove(nodfconst(0.0), &nod1);	/* TO DO: FREGZERO */
+				gopcode(o, n->type, n, &nod1);
+				regfree(&nod1);
+			} else
+				gopcode(o, n->type, n, nodconst(0));
+			goto com;
+		}
+		regalloc(&nod, n, nn);
+		cgen(n, &nod);
+		if(typefd[n->type->etype]) {
+			regalloc(&nod1, n, Z);
+			gmove(nodfconst(0.0), &nod1);	/* TO DO: FREGZERO */
+			gopcode(o, n->type, &nod, &nod1);
+			regfree(&nod1);
+		} else
+			gopcode(o, n->type, &nod, nodconst(0));
+		regfree(&nod);
+		goto com;
+
+	case OCONST:
+		o = vconst(n);
+		if(!true)
+			o = !o;
+		gbranch(OGOTO);
+		if(o) {
+			p1 = p;
+			gbranch(OGOTO);
+			patch(p1, pc);
+		}
+		goto com;
+
+	case OCOMMA:
+		cgen(l, Z);
+		boolgen(r, true, nn);
+		break;
+
+	case ONOT:
+		boolgen(l, !true, nn);
+		break;
+
+	case OCOND:
+		bcgen(l, 1);
+		p1 = p;
+		bcgen(r->left, true);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		bcgen(r->right, !true);
+		patch(p2, pc);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		patch(p2, pc);
+		goto com;
+
+	case OANDAND:
+		if(!true)
+			goto caseor;
+
+	caseand:
+		bcgen(l, true);
+		p1 = p;
+		bcgen(r, !true);
+		p2 = p;
+		patch(p1, pc);
+		gbranch(OGOTO);
+		patch(p2, pc);
+		goto com;
+
+	case OOROR:
+		if(!true)
+			goto caseand;
+
+	caseor:
+		bcgen(l, !true);
+		p1 = p;
+		bcgen(r, !true);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		patch(p2, pc);
+		goto com;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		o = n->op;
+		if(true)
+			o = comrel[relindex(o)];
+		if(l->complex >= FNX && r->complex >= FNX) {
+			regret(&nod, r);
+			cgen(r, &nod);
+			regsalloc(&nod1, r);
+			gmove(&nod, &nod1);
+			regfree(&nod);
+			nod = *n;
+			nod.right = &nod1;
+			boolgen(&nod, true, nn);
+			break;
+		}
+		if(immconst(l)) {
+			o = invrel[relindex(o)];
+			/* bad, 13 is address of external that becomes constant */
+			if(r->addable < INDEXED || r->addable == 13) {
+				regalloc(&nod, r, nn);
+				cgen(r, &nod);
+				gopcode(o, l->type, &nod, l);
+				regfree(&nod);
+			} else
+				gopcode(o, l->type, r, l);
+			goto com;
+		}
+		if(typefd[l->type->etype])
+			o = invrel[relindex(logrel[relindex(o)])];
+		if(l->complex >= r->complex) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			if(r->addable < INDEXED || hardconst(r) || typefd[l->type->etype]) {
+				regalloc(&nod1, r, Z);
+				cgen(r, &nod1);
+				gopcode(o, l->type, &nod, &nod1);
+				regfree(&nod1);
+			} else
+				gopcode(o, l->type, &nod, r);
+			regfree(&nod);
+			goto com;
+		}
+		regalloc(&nod, r, nn);
+		cgen(r, &nod);
+		if(l->addable < INDEXED || l->addable == 13 || hardconst(l)) {
+			regalloc(&nod1, l, Z);
+			cgen(l, &nod1);
+			if(typechl[l->type->etype] && ewidth[l->type->etype] <= ewidth[TINT])
+				gopcode(o, types[TINT], &nod1, &nod);
+			else
+				gopcode(o, l->type, &nod1, &nod);
+			regfree(&nod1);
+		} else
+			gopcode(o, l->type, l, &nod);
+		regfree(&nod);
+
+	com:
+		if(nn != Z) {
+			p1 = p;
+			gmove(nodconst(1L), nn);
+			gbranch(OGOTO);
+			p2 = p;
+			patch(p1, pc);
+			gmove(nodconst(0L), nn);
+			patch(p2, pc);
+		}
+		break;
+	}
+	cursafe = curs;
+}
+
+void
+sugen(Node *n, Node *nn, long w)
+{
+	Prog *p1;
+	Node nod0, nod1, nod2, nod3, nod4, *l, *r;
+	Type *t;
+	int c, mt, mo;
+	vlong o0, o1;
+
+	if(n == Z || n->type == T)
+		return;
+	if(debug['g']) {
+		prtree(nn, "sugen lhs");
+		prtree(n, "sugen");
+	}
+	if(nn == nodrat)
+		if(w > nrathole)
+			nrathole = w;
+	switch(n->op) {
+	case OIND:
+		if(nn == Z) {
+			nullwarn(n->left, Z);
+			break;
+		}
+
+	default:
+		goto copy;
+
+	case OCONST:
+		goto copy;
+
+	case ODOT:
+		l = n->left;
+		sugen(l, nodrat, l->type->width);
+		if(nn == Z)
+			break;
+		warn(n, "non-interruptable temporary");
+		nod1 = *nodrat;
+		r = n->right;
+		if(!r || r->op != OCONST) {
+			diag(n, "DOT and no offset");
+			break;
+		}
+		nod1.xoffset += (long)r->vconst;
+		nod1.type = n->type;
+		sugen(&nod1, nn, w);
+		break;
+
+	case OSTRUCT:
+		/*
+		 * rewrite so lhs has no fn call
+		 */
+		if(nn != Z && side(nn)) {
+			nod1 = *n;
+			nod1.type = typ(TIND, n->type);
+			regret(&nod2, &nod1);
+			lcgen(nn, &nod2);
+			regsalloc(&nod0, &nod1);
+			cgen(&nod2, &nod0);
+			regfree(&nod2);
+
+			nod1 = *n;
+			nod1.op = OIND;
+			nod1.left = &nod0;
+			nod1.right = Z;
+			nod1.complex = 1;
+
+			sugen(n, &nod1, w);
+			return;
+		}
+
+		r = n->left;
+		for(t = n->type->link; t != T; t = t->down) {
+			l = r;
+			if(r->op == OLIST) {
+				l = r->left;
+				r = r->right;
+			}
+			if(nn == Z) {
+				cgen(l, nn);
+				continue;
+			}
+			/*
+			 * hand craft *(&nn + o) = l
+			 */
+			nod0 = znode;
+			nod0.op = OAS;
+			nod0.type = t;
+			nod0.left = &nod1;
+			nod0.right = nil;
+
+			nod1 = znode;
+			nod1.op = OIND;
+			nod1.type = t;
+			nod1.left = &nod2;
+
+			nod2 = znode;
+			nod2.op = OADD;
+			nod2.type = typ(TIND, t);
+			nod2.left = &nod3;
+			nod2.right = &nod4;
+
+			nod3 = znode;
+			nod3.op = OADDR;
+			nod3.type = nod2.type;
+			nod3.left = nn;
+
+			nod4 = znode;
+			nod4.op = OCONST;
+			nod4.type = nod2.type;
+			nod4.vconst = t->offset;
+
+			ccom(&nod0);
+			acom(&nod0);
+			xcom(&nod0);
+			nod0.addable = 0;
+			nod0.right = l;
+
+			/* prtree(&nod0, "hand craft"); /* */
+			cgen(&nod0, Z);
+		}
+		break;
+
+	case OAS:
+		if(nn == Z) {
+			if(n->addable < INDEXED)
+				sugen(n->right, n->left, w);
+			break;
+		}
+
+		sugen(n->right, nodrat, w);
+		warn(n, "non-interruptable temporary");
+		sugen(nodrat, n->left, w);
+		sugen(nodrat, nn, w);
+		break;
+
+	case OFUNC:
+		if(nn == Z) {
+			sugen(n, nodrat, w);
+			break;
+		}
+		if(nn->op != OIND) {
+			nn = new1(OADDR, nn, Z);
+			nn->type = types[TIND];
+			nn->addable = 0;
+		} else
+			nn = nn->left;
+		n = new(OFUNC, n->left, new(OLIST, nn, n->right));
+		n->type = types[TVOID];
+		n->left->type = types[TVOID];
+		cgen(n, Z);
+		break;
+
+	case OCOND:
+		bcgen(n->left, 1);
+		p1 = p;
+		sugen(n->right->left, nn, w);
+		gbranch(OGOTO);
+		patch(p1, pc);
+		p1 = p;
+		sugen(n->right->right, nn, w);
+		patch(p1, pc);
+		break;
+
+	case OCOMMA:
+		cgen(n->left, Z);
+		sugen(n->right, nn, w);
+		break;
+	}
+	return;
+
+copy:
+	if(nn == Z) {
+		switch(n->op) {
+		case OASADD:
+		case OASSUB:
+		case OASAND:
+		case OASOR:
+		case OASXOR:
+
+		case OASMUL:
+		case OASLMUL:
+
+
+		case OASASHL:
+		case OASASHR:
+		case OASLSHR:
+			break;
+
+		case OPOSTINC:
+		case OPOSTDEC:
+		case OPREINC:
+		case OPREDEC:
+			break;
+
+		default:
+			return;
+		}
+	}
+
+	if(n->complex >= FNX && nn != nil && nn->complex >= FNX) {
+		t = nn->type;
+		nn->type = types[TLONG];
+		regialloc(&nod1, nn, Z);
+		lcgen(nn, &nod1);
+		regsalloc(&nod2, nn);
+		nn->type = t;
+
+		gins(AMOVL, &nod1, &nod2);
+		regfree(&nod1);
+
+		nod2.type = typ(TIND, t);
+
+		nod1 = nod2;
+		nod1.op = OIND;
+		nod1.left = &nod2;
+		nod1.right = Z;
+		nod1.complex = 1;
+		nod1.type = t;
+
+		sugen(n, &nod1, w);
+		return;
+	}
+
+	if(w <= 32) {
+		c = cursafe;
+		if(n->left != Z && n->left->complex >= FNX
+		&& n->right != Z && n->right->complex >= FNX) {
+			regsalloc(&nod1, n->right);
+			cgen(n->right, &nod1);
+			nod2 = *n;
+			nod2.right = &nod1;
+			cgen(&nod2, nn);
+			cursafe = c;
+			return;
+		}
+		if(w & 7) {
+			mt = TLONG;
+			mo = AMOVL;
+		} else {
+			mt = TVLONG;
+			mo = AMOVQ;
+		}
+		if(n->complex > nn->complex) {
+			t = n->type;
+			n->type = types[mt];
+			regalloc(&nod0, n, Z);
+			if(!vaddr(n, 0)) {
+				reglcgen(&nod1, n, Z);
+				n->type = t;
+				n = &nod1;
+			}
+			else
+				n->type = t;
+
+			t = nn->type;
+			nn->type = types[mt];
+			if(!vaddr(nn, 0)) {
+				reglcgen(&nod2, nn, Z);
+				nn->type = t;
+				nn = &nod2;
+			}
+			else
+				nn->type = t;
+		} else {
+			t = nn->type;
+			nn->type = types[mt];
+			regalloc(&nod0, nn, Z);
+			if(!vaddr(nn, 0)) {
+				reglcgen(&nod2, nn, Z);
+				nn->type = t;
+				nn = &nod2;
+			}
+			else
+				nn->type = t;
+
+			t = n->type;
+			n->type = types[mt];
+			if(!vaddr(n, 0)) {
+				reglcgen(&nod1, n, Z);
+				n->type = t;
+				n = &nod1;
+			}
+			else
+				n->type = t;
+		}
+		o0 = n->xoffset;
+		o1 = nn->xoffset;
+		w /= ewidth[mt];
+		while(--w >= 0) {
+			gins(mo, n, &nod0);
+			gins(mo, &nod0, nn);
+			n->xoffset += ewidth[mt];
+			nn->xoffset += ewidth[mt];
+		}
+		n->xoffset = o0;
+		nn->xoffset = o1;
+		if(nn == &nod2)
+			regfree(&nod2);
+		if(n == &nod1)
+			regfree(&nod1);
+		regfree(&nod0);
+		return;
+	}
+
+	/* botch, need to save in .safe */
+	c = 0;
+	if(n->complex > nn->complex) {
+		t = n->type;
+		n->type = types[TLONG];
+		nodreg(&nod1, n, D_SI);
+		if(reg[D_SI]) {
+			gins(APUSHQ, &nod1, Z);
+			c |= 1;
+			reg[D_SI]++;
+		}
+		lcgen(n, &nod1);
+		n->type = t;
+
+		t = nn->type;
+		nn->type = types[TLONG];
+		nodreg(&nod2, nn, D_DI);
+		if(reg[D_DI]) {
+warn(Z, "DI botch");
+			gins(APUSHQ, &nod2, Z);
+			c |= 2;
+			reg[D_DI]++;
+		}
+		lcgen(nn, &nod2);
+		nn->type = t;
+	} else {
+		t = nn->type;
+		nn->type = types[TLONG];
+		nodreg(&nod2, nn, D_DI);
+		if(reg[D_DI]) {
+warn(Z, "DI botch");
+			gins(APUSHQ, &nod2, Z);
+			c |= 2;
+			reg[D_DI]++;
+		}
+		lcgen(nn, &nod2);
+		nn->type = t;
+
+		t = n->type;
+		n->type = types[TLONG];
+		nodreg(&nod1, n, D_SI);
+		if(reg[D_SI]) {
+			gins(APUSHQ, &nod1, Z);
+			c |= 1;
+			reg[D_SI]++;
+		}
+		lcgen(n, &nod1);
+		n->type = t;
+	}
+	nodreg(&nod3, n, D_CX);
+	if(reg[D_CX]) {
+		gins(APUSHQ, &nod3, Z);
+		c |= 4;
+		reg[D_CX]++;
+	}
+	gins(AMOVL, nodconst(w/SZ_INT), &nod3);
+	gins(ACLD, Z, Z);
+	gins(AREP, Z, Z);
+	gins(AMOVSL, Z, Z);
+	if(c & 4) {
+		gins(APOPQ, Z, &nod3);
+		reg[D_CX]--;
+	}
+	if(c & 2) {
+		gins(APOPQ, Z, &nod2);
+		reg[nod2.reg]--;
+	}
+	if(c & 1) {
+		gins(APOPQ, Z, &nod1);
+		reg[nod1.reg]--;
+	}
+}
+
+/*
+ * TO DO
+ */
+void
+layout(Node *f, Node *t, int c, int cv, Node *cn)
+{
+	Node t1, t2;
+
+	while(c > 3) {
+		layout(f, t, 2, 0, Z);
+		c -= 2;
+	}
+
+	regalloc(&t1, &lregnode, Z);
+	regalloc(&t2, &lregnode, Z);
+	if(c > 0) {
+		gmove(f, &t1);
+		f->xoffset += SZ_INT;
+	}
+	if(cn != Z)
+		gmove(nodconst(cv), cn);
+	if(c > 1) {
+		gmove(f, &t2);
+		f->xoffset += SZ_INT;
+	}
+	if(c > 0) {
+		gmove(&t1, t);
+		t->xoffset += SZ_INT;
+	}
+	if(c > 2) {
+		gmove(f, &t1);
+		f->xoffset += SZ_INT;
+	}
+	if(c > 1) {
+		gmove(&t2, t);
+		t->xoffset += SZ_INT;
+	}
+	if(c > 2) {
+		gmove(&t1, t);
+		t->xoffset += SZ_INT;
+	}
+	regfree(&t1);
+	regfree(&t2);
+}
+
+/*
+ * constant is not vlong or fits as 32-bit signed immediate
+ */
+int
+immconst(Node *n)
+{
+	long v;
+
+	if(n->op != OCONST || !typechlpv[n->type->etype])
+		return 0;
+	if(typechl[n->type->etype])
+		return 1;
+	v = n->vconst;
+	return n->vconst == (vlong)v;
+}
+
+/*
+ * if a constant and vlong, doesn't fit as 32-bit signed immediate
+ */
+int
+hardconst(Node *n)
+{
+	return n->op == OCONST && !immconst(n);
+}
+
+/*
+ * casting up to t2 covers an intermediate cast to t1
+ */
+int
+castup(Type *t1, Type *t2)
+{
+	int ft;
+
+	if(!nilcast(t1, t2))
+		return 0;
+	/* known to be small to large */
+	ft = t1->etype;
+	switch(t2->etype){
+	case TINT:
+	case TLONG:
+		return ft == TLONG || ft == TINT || ft == TSHORT || ft == TCHAR;
+	case TUINT:
+	case TULONG:
+		return ft == TULONG || ft == TUINT || ft == TUSHORT || ft == TUCHAR;
+	case TVLONG:
+		return ft == TLONG || ft == TINT || ft == TSHORT;
+	case TUVLONG:
+		return ft == TULONG || ft == TUINT || ft == TUSHORT;
+	}
+	return 0;
+}
+
+void
+zeroregm(Node *n)
+{
+	gins(AMOVL, nodconst(0), n);
+}
+
+/* do we need to load the address of a vlong? */
+int
+vaddr(Node *n, int a)
+{
+	switch(n->op) {
+	case ONAME:
+		if(a)
+			return 1;
+		return !(n->class == CEXTERN || n->class == CGLOBL || n->class == CSTATIC);
+
+	case OCONST:
+	case OREGISTER:
+	case OINDREG:
+		return 1;
+	}
+	return 0;
+}
+
+long
+hi64v(Node *n)
+{
+	if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+		return (long)(n->vconst) & ~0L;
+	else
+		return (long)((uvlong)n->vconst>>32) & ~0L;
+}
+
+long
+lo64v(Node *n)
+{
+	if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+		return (long)((uvlong)n->vconst>>32) & ~0L;
+	else
+		return (long)(n->vconst) & ~0L;
+}
+
+Node *
+hi64(Node *n)
+{
+	return nodconst(hi64v(n));
+}
+
+Node *
+lo64(Node *n)
+{
+	return nodconst(lo64v(n));
+}
+
+int
+cond(int op)
+{
+	switch(op) {
+	case OANDAND:
+	case OOROR:
+	case ONOT:
+		return 1;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		return 1;
+	}
+	return 0;
+}

+ 206 - 0
sys/src/cmd/6c/div.c

@@ -0,0 +1,206 @@
+#include "gc.h"
+
+/*
+ * Based on: Granlund, T.; Montgomery, P.L.
+ * "Division by Invariant Integers using Multiplication".
+ * SIGPLAN Notices, Vol. 29, June 1994, page 61.
+ */
+
+#define	TN(n)	((uvlong)1 << (n))
+#define	T31	TN(31)
+#define	T32	TN(32)
+
+int
+multiplier(ulong d, int p, uvlong *mp)
+{
+	int l;
+	uvlong mlo, mhi, tlo, thi;
+
+	l = topbit(d - 1) + 1;
+	mlo = (((TN(l) - d) << 32) / d) + T32;
+	if(l + p == 64)
+		mhi = (((TN(l) + 1 - d) << 32) / d) + T32;
+	else
+		mhi = (TN(32 + l) + TN(32 + l - p)) / d;
+	/*assert(mlo < mhi);*/
+	while(l > 0) {
+		tlo = mlo >> 1;
+		thi = mhi >> 1;
+		if(tlo == thi)
+			break;
+		mlo = tlo;
+		mhi = thi;
+		l--;
+	}
+	*mp = mhi;
+	return l;
+}
+
+int
+sdiv(ulong d, ulong *mp, int *sp)
+{
+	int s;
+	uvlong m;
+
+	s = multiplier(d, 32 - 1, &m);
+	*mp = m;
+	*sp = s;
+	if(m >= T31)
+		return 1;
+	else
+		return 0;
+}
+
+int
+udiv(ulong d, ulong *mp, int *sp, int *pp)
+{
+	int p, s;
+	uvlong m;
+
+	s = multiplier(d, 32, &m);
+	p = 0;
+	if(m >= T32) {
+		while((d & 1) == 0) {
+			d >>= 1;
+			p++;
+		}
+		s = multiplier(d, 32 - p, &m);
+	}
+	*mp = m;
+	*pp = p;
+	if(m >= T32) {
+		/*assert(p == 0);*/
+		*sp = s - 1;
+		return 1;
+	}
+	else {
+		*sp = s;
+		return 0;
+	}
+}
+
+void
+sdivgen(Node *l, Node *r, Node *ax, Node *dx)
+{
+	int a, s;
+	ulong m;
+	vlong c;
+
+	c = r->vconst;
+	if(c < 0)
+		c = -c;
+	a = sdiv(c, &m, &s);
+//print("a=%d i=%ld s=%d m=%lux\n", a, (long)r->vconst, s, m);
+	gins(AMOVL, nodconst(m), ax);
+	gins(AIMULL, l, Z);
+	gins(AMOVL, l, ax);
+	if(a)
+		gins(AADDL, ax, dx);
+	gins(ASHRL, nodconst(31), ax);
+	gins(ASARL, nodconst(s), dx);
+	gins(AADDL, ax, dx);
+	if(r->vconst < 0)
+		gins(ANEGL, Z, dx);
+}
+
+void
+udivgen(Node *l, Node *r, Node *ax, Node *dx)
+{
+	int a, s, t;
+	ulong m;
+	Node nod;
+
+	a = udiv(r->vconst, &m, &s, &t);
+//print("a=%ud i=%ld p=%d s=%d m=%lux\n", a, (long)r->vconst, t, s, m);
+	if(t != 0) {
+		gins(AMOVL, l, ax);
+		gins(ASHRL, nodconst(t), ax);
+		gins(AMOVL, nodconst(m), dx);
+		gins(AMULL, dx, Z);
+	}
+	else if(a) {
+		if(l->op != OREGISTER) {
+			regalloc(&nod, l, Z);
+			gins(AMOVL, l, &nod);
+			l = &nod;
+		}
+		gins(AMOVL, nodconst(m), ax);
+		gins(AMULL, l, Z);
+		gins(AADDL, l, dx);
+		gins(ARCRL, nodconst(1), dx);
+		if(l == &nod)
+			regfree(l);
+	}
+	else {
+		gins(AMOVL, nodconst(m), ax);
+		gins(AMULL, l, Z);
+	}
+	if(s != 0)
+		gins(ASHRL, nodconst(s), dx);
+}
+
+void
+sext(Node *d, Node *s, Node *l)
+{
+	if(s->reg == D_AX && !nodreg(d, Z, D_DX)) {
+		reg[D_DX]++;
+		gins(ACDQ, Z, Z);
+	}
+	else {
+		regalloc(d, l, Z);
+		gins(AMOVL, s, d);
+		gins(ASARL, nodconst(31), d);
+	}
+}
+
+void
+sdiv2(long c, int v, Node *l, Node *n)
+{
+	Node nod;
+
+	if(v > 0) {
+		if(v > 1) {
+			sext(&nod, n, l);
+			gins(AANDL, nodconst((1 << v) - 1), &nod);
+			gins(AADDL, &nod, n);
+			regfree(&nod);
+		}
+		else {
+			gins(ACMPL, n, nodconst(0x80000000));
+			gins(ASBBL, nodconst(-1), n);
+		}
+		gins(ASARL, nodconst(v), n);
+	}
+	if(c < 0)
+		gins(ANEGL, Z, n);
+}
+
+void
+smod2(long c, int v, Node *l, Node *n)
+{
+	Node nod;
+
+	if(c == 1) {
+		zeroregm(n);
+		return;
+	}
+
+	sext(&nod, n, l);
+	if(v == 0) {
+		zeroregm(n);
+		gins(AXORL, &nod, n);
+		gins(ASUBL, &nod, n);
+	}
+	else if(v > 1) {
+		gins(AANDL, nodconst((1 << v) - 1), &nod);
+		gins(AADDL, &nod, n);
+		gins(AANDL, nodconst((1 << v) - 1), n);
+		gins(ASUBL, &nod, n);
+	}
+	else {
+		gins(AANDL, nodconst(1), n);
+		gins(AXORL, &nod, n);
+		gins(ASUBL, &nod, n);
+	}
+	regfree(&nod);
+}

+ 669 - 0
sys/src/cmd/6c/enam.c

@@ -0,0 +1,669 @@
+char*	anames[] =
+{
+	"XXX",
+	"AAA",
+	"AAD",
+	"AAM",
+	"AAS",
+	"ADCB",
+	"ADCL",
+	"ADCW",
+	"ADDB",
+	"ADDL",
+	"ADDW",
+	"ADJSP",
+	"ANDB",
+	"ANDL",
+	"ANDW",
+	"ARPL",
+	"BOUNDL",
+	"BOUNDW",
+	"BSFL",
+	"BSFW",
+	"BSRL",
+	"BSRW",
+	"BTL",
+	"BTW",
+	"BTCL",
+	"BTCW",
+	"BTRL",
+	"BTRW",
+	"BTSL",
+	"BTSW",
+	"BYTE",
+	"CALL",
+	"CLC",
+	"CLD",
+	"CLI",
+	"CLTS",
+	"CMC",
+	"CMPB",
+	"CMPL",
+	"CMPW",
+	"CMPSB",
+	"CMPSL",
+	"CMPSW",
+	"DAA",
+	"DAS",
+	"DATA",
+	"DECB",
+	"DECL",
+	"DECQ",
+	"DECW",
+	"DIVB",
+	"DIVL",
+	"DIVW",
+	"ENTER",
+	"GLOBL",
+	"GOK",
+	"HISTORY",
+	"HLT",
+	"IDIVB",
+	"IDIVL",
+	"IDIVW",
+	"IMULB",
+	"IMULL",
+	"IMULW",
+	"INB",
+	"INL",
+	"INW",
+	"INCB",
+	"INCL",
+	"INCQ",
+	"INCW",
+	"INSB",
+	"INSL",
+	"INSW",
+	"INT",
+	"INTO",
+	"IRETL",
+	"IRETW",
+	"JCC",
+	"JCS",
+	"JCXZ",
+	"JEQ",
+	"JGE",
+	"JGT",
+	"JHI",
+	"JLE",
+	"JLS",
+	"JLT",
+	"JMI",
+	"JMP",
+	"JNE",
+	"JOC",
+	"JOS",
+	"JPC",
+	"JPL",
+	"JPS",
+	"LAHF",
+	"LARL",
+	"LARW",
+	"LEAL",
+	"LEAW",
+	"LEAVEL",
+	"LEAVEW",
+	"LOCK",
+	"LODSB",
+	"LODSL",
+	"LODSW",
+	"LONG",
+	"LOOP",
+	"LOOPEQ",
+	"LOOPNE",
+	"LSLL",
+	"LSLW",
+	"MOVB",
+	"MOVL",
+	"MOVW",
+	"MOVBLSX",
+	"MOVBLZX",
+	"MOVBQSX",
+	"MOVBQZX",
+	"MOVBWSX",
+	"MOVBWZX",
+	"MOVWLSX",
+	"MOVWLZX",
+	"MOVWQSX",
+	"MOVWQZX",
+	"MOVSB",
+	"MOVSL",
+	"MOVSW",
+	"MULB",
+	"MULL",
+	"MULW",
+	"NAME",
+	"NEGB",
+	"NEGL",
+	"NEGW",
+	"NOP",
+	"NOTB",
+	"NOTL",
+	"NOTW",
+	"ORB",
+	"ORL",
+	"ORW",
+	"OUTB",
+	"OUTL",
+	"OUTW",
+	"OUTSB",
+	"OUTSL",
+	"OUTSW",
+	"POPAL",
+	"POPAW",
+	"POPFL",
+	"POPFW",
+	"POPL",
+	"POPW",
+	"PUSHAL",
+	"PUSHAW",
+	"PUSHFL",
+	"PUSHFW",
+	"PUSHL",
+	"PUSHW",
+	"RCLB",
+	"RCLL",
+	"RCLW",
+	"RCRB",
+	"RCRL",
+	"RCRW",
+	"REP",
+	"REPN",
+	"RET",
+	"ROLB",
+	"ROLL",
+	"ROLW",
+	"RORB",
+	"RORL",
+	"RORW",
+	"SAHF",
+	"SALB",
+	"SALL",
+	"SALW",
+	"SARB",
+	"SARL",
+	"SARW",
+	"SBBB",
+	"SBBL",
+	"SBBW",
+	"SCASB",
+	"SCASL",
+	"SCASW",
+	"SETCC",
+	"SETCS",
+	"SETEQ",
+	"SETGE",
+	"SETGT",
+	"SETHI",
+	"SETLE",
+	"SETLS",
+	"SETLT",
+	"SETMI",
+	"SETNE",
+	"SETOC",
+	"SETOS",
+	"SETPC",
+	"SETPL",
+	"SETPS",
+	"CDQ",
+	"CWD",
+	"SHLB",
+	"SHLL",
+	"SHLW",
+	"SHRB",
+	"SHRL",
+	"SHRW",
+	"STC",
+	"STD",
+	"STI",
+	"STOSB",
+	"STOSL",
+	"STOSW",
+	"SUBB",
+	"SUBL",
+	"SUBW",
+	"SYSCALL",
+	"TESTB",
+	"TESTL",
+	"TESTW",
+	"TEXT",
+	"VERR",
+	"VERW",
+	"WAIT",
+	"WORD",
+	"XCHGB",
+	"XCHGL",
+	"XCHGW",
+	"XLAT",
+	"XORB",
+	"XORL",
+	"XORW",
+	"FMOVB",
+	"FMOVBP",
+	"FMOVD",
+	"FMOVDP",
+	"FMOVF",
+	"FMOVFP",
+	"FMOVL",
+	"FMOVLP",
+	"FMOVV",
+	"FMOVVP",
+	"FMOVW",
+	"FMOVWP",
+	"FMOVX",
+	"FMOVXP",
+	"FCOMB",
+	"FCOMBP",
+	"FCOMD",
+	"FCOMDP",
+	"FCOMDPP",
+	"FCOMF",
+	"FCOMFP",
+	"FCOML",
+	"FCOMLP",
+	"FCOMW",
+	"FCOMWP",
+	"FUCOM",
+	"FUCOMP",
+	"FUCOMPP",
+	"FADDDP",
+	"FADDW",
+	"FADDL",
+	"FADDF",
+	"FADDD",
+	"FMULDP",
+	"FMULW",
+	"FMULL",
+	"FMULF",
+	"FMULD",
+	"FSUBDP",
+	"FSUBW",
+	"FSUBL",
+	"FSUBF",
+	"FSUBD",
+	"FSUBRDP",
+	"FSUBRW",
+	"FSUBRL",
+	"FSUBRF",
+	"FSUBRD",
+	"FDIVDP",
+	"FDIVW",
+	"FDIVL",
+	"FDIVF",
+	"FDIVD",
+	"FDIVRDP",
+	"FDIVRW",
+	"FDIVRL",
+	"FDIVRF",
+	"FDIVRD",
+	"FXCHD",
+	"FFREE",
+	"FLDCW",
+	"FLDENV",
+	"FRSTOR",
+	"FSAVE",
+	"FSTCW",
+	"FSTENV",
+	"FSTSW",
+	"F2XM1",
+	"FABS",
+	"FCHS",
+	"FCLEX",
+	"FCOS",
+	"FDECSTP",
+	"FINCSTP",
+	"FINIT",
+	"FLD1",
+	"FLDL2E",
+	"FLDL2T",
+	"FLDLG2",
+	"FLDLN2",
+	"FLDPI",
+	"FLDZ",
+	"FNOP",
+	"FPATAN",
+	"FPREM",
+	"FPREM1",
+	"FPTAN",
+	"FRNDINT",
+	"FSCALE",
+	"FSIN",
+	"FSINCOS",
+	"FSQRT",
+	"FTST",
+	"FXAM",
+	"FXTRACT",
+	"FYL2X",
+	"FYL2XP1",
+	"END",
+	"DYNT",
+	"INIT",
+	"SIGNAME",
+	"CMPXCHGB",
+	"CMPXCHGL",
+	"CMPXCHGW",
+	"CMPXCHG8B",
+	"CPUID",
+	"INVD",
+	"INVLPG",
+	"LFENCE",
+	"MFENCE",
+	"MOVNTIL",
+	"RDMSR",
+	"RDPMC",
+	"RDTSC",
+	"RSM",
+	"SFENCE",
+	"SYSRET",
+	"WBINVD",
+	"WRMSR",
+	"XADDB",
+	"XADDL",
+	"XADDW",
+	"CMOVLCC",
+	"CMOVLCS",
+	"CMOVLEQ",
+	"CMOVLGE",
+	"CMOVLGT",
+	"CMOVLHI",
+	"CMOVLLE",
+	"CMOVLLS",
+	"CMOVLLT",
+	"CMOVLMI",
+	"CMOVLNE",
+	"CMOVLOC",
+	"CMOVLOS",
+	"CMOVLPC",
+	"CMOVLPL",
+	"CMOVLPS",
+	"CMOVQCC",
+	"CMOVQCS",
+	"CMOVQEQ",
+	"CMOVQGE",
+	"CMOVQGT",
+	"CMOVQHI",
+	"CMOVQLE",
+	"CMOVQLS",
+	"CMOVQLT",
+	"CMOVQMI",
+	"CMOVQNE",
+	"CMOVQOC",
+	"CMOVQOS",
+	"CMOVQPC",
+	"CMOVQPL",
+	"CMOVQPS",
+	"CMOVWCC",
+	"CMOVWCS",
+	"CMOVWEQ",
+	"CMOVWGE",
+	"CMOVWGT",
+	"CMOVWHI",
+	"CMOVWLE",
+	"CMOVWLS",
+	"CMOVWLT",
+	"CMOVWMI",
+	"CMOVWNE",
+	"CMOVWOC",
+	"CMOVWOS",
+	"CMOVWPC",
+	"CMOVWPL",
+	"CMOVWPS",
+	"ADCQ",
+	"ADDQ",
+	"ANDQ",
+	"BSFQ",
+	"BSRQ",
+	"BTCQ",
+	"BTQ",
+	"BTRQ",
+	"BTSQ",
+	"CMPQ",
+	"CMPSQ",
+	"CMPXCHGQ",
+	"CQO",
+	"DIVQ",
+	"IDIVQ",
+	"IMULQ",
+	"IRETQ",
+	"LEAQ",
+	"LEAVEQ",
+	"LODSQ",
+	"MOVQ",
+	"MOVLQSX",
+	"MOVLQZX",
+	"MOVNTIQ",
+	"MOVSQ",
+	"MULQ",
+	"NEGQ",
+	"NOTQ",
+	"ORQ",
+	"POPFQ",
+	"POPQ",
+	"PUSHFQ",
+	"PUSHQ",
+	"RCLQ",
+	"RCRQ",
+	"ROLQ",
+	"RORQ",
+	"QUAD",
+	"SALQ",
+	"SARQ",
+	"SBBQ",
+	"SCASQ",
+	"SHLQ",
+	"SHRQ",
+	"STOSQ",
+	"SUBQ",
+	"TESTQ",
+	"XADDQ",
+	"XCHGQ",
+	"XORQ",
+	"ADDPD",
+	"ADDPS",
+	"ADDSD",
+	"ADDSS",
+	"ANDNPD",
+	"ANDNPS",
+	"ANDPD",
+	"ANDPS",
+	"CMPPD",
+	"CMPPS",
+	"CMPSD",
+	"CMPSS",
+	"COMISD",
+	"COMISS",
+	"CVTPD2PL",
+	"CVTPD2PS",
+	"CVTPL2PD",
+	"CVTPL2PS",
+	"CVTPS2PD",
+	"CVTPS2PL",
+	"CVTSD2SL",
+	"CVTSD2SQ",
+	"CVTSD2SS",
+	"CVTSL2SD",
+	"CVTSL2SS",
+	"CVTSQ2SD",
+	"CVTSQ2SS",
+	"CVTSS2SD",
+	"CVTSS2SL",
+	"CVTSS2SQ",
+	"CVTTPD2PL",
+	"CVTTPS2PL",
+	"CVTTSD2SL",
+	"CVTTSD2SQ",
+	"CVTTSS2SL",
+	"CVTTSS2SQ",
+	"DIVPD",
+	"DIVPS",
+	"DIVSD",
+	"DIVSS",
+	"EMMS",
+	"FXRSTOR",
+	"FXRSTOR64",
+	"FXSAVE",
+	"FXSAVE64",
+	"LDMXCSR",
+	"MASKMOVOU",
+	"MASKMOVQ",
+	"MAXPD",
+	"MAXPS",
+	"MAXSD",
+	"MAXSS",
+	"MINPD",
+	"MINPS",
+	"MINSD",
+	"MINSS",
+	"MOVAPD",
+	"MOVAPS",
+	"MOVOU",
+	"MOVHLPS",
+	"MOVHPD",
+	"MOVHPS",
+	"MOVLHPS",
+	"MOVLPD",
+	"MOVLPS",
+	"MOVMSKPD",
+	"MOVMSKPS",
+	"MOVNTO",
+	"MOVNTPD",
+	"MOVNTPS",
+	"MOVNTQ",
+	"MOVO",
+	"MOVQOZX",
+	"MOVSD",
+	"MOVSS",
+	"MOVUPD",
+	"MOVUPS",
+	"MULPD",
+	"MULPS",
+	"MULSD",
+	"MULSS",
+	"ORPD",
+	"ORPS",
+	"PACKSSLW",
+	"PACKSSWB",
+	"PACKUSWB",
+	"PADDB",
+	"PADDL",
+	"PADDQ",
+	"PADDSB",
+	"PADDSW",
+	"PADDUSB",
+	"PADDUSW",
+	"PADDW",
+	"PANDB",
+	"PANDL",
+	"PANDSB",
+	"PANDSW",
+	"PANDUSB",
+	"PANDUSW",
+	"PANDW",
+	"PAND",
+	"PANDN",
+	"PAVGB",
+	"PAVGW",
+	"PCMPEQB",
+	"PCMPEQL",
+	"PCMPEQW",
+	"PCMPGTB",
+	"PCMPGTL",
+	"PCMPGTW",
+	"PEXTRW",
+	"PFACC",
+	"PFADD",
+	"PFCMPEQ",
+	"PFCMPGE",
+	"PFCMPGT",
+	"PFMAX",
+	"PFMIN",
+	"PFMUL",
+	"PFNACC",
+	"PFPNACC",
+	"PFRCP",
+	"PFRCPIT1",
+	"PFRCPI2T",
+	"PFRSQIT1",
+	"PFRSQRT",
+	"PFSUB",
+	"PFSUBR",
+	"PINSRW",
+	"PMADDWL",
+	"PMAXSW",
+	"PMAXUB",
+	"PMINSW",
+	"PMINUB",
+	"PMOVMSKB",
+	"PMULHRW",
+	"PMULHUW",
+	"PMULHW",
+	"PMULLW",
+	"PMULULQ",
+	"POR",
+	"PSADBW",
+	"PSHUFHW",
+	"PSHUFL",
+	"PSHUFLW",
+	"PSHUFW",
+	"PSLLO",
+	"PSLLL",
+	"PSLLQ",
+	"PSLLW",
+	"PSRAL",
+	"PSRAW",
+	"PSRLO",
+	"PSRLL",
+	"PSRLQ",
+	"PSRLW",
+	"PSUBB",
+	"PSUBL",
+	"PSUBQ",
+	"PSUBSB",
+	"PSUBSW",
+	"PSUBUSB",
+	"PSUBUSW",
+	"PSUBW",
+	"PSWAPL",
+	"PUNPCKHBW",
+	"PUNPCKHLQ",
+	"PUNPCKHQDQ",
+	"PUNPCKHWL",
+	"PUNPCKLBW",
+	"PUNPCKLLQ",
+	"PUNPCKLQDQ",
+	"PUNPCKLWL",
+	"PXOR",
+	"RCPPS",
+	"RCPSS",
+	"RSQRTPS",
+	"RSQRTSS",
+	"SHUFPD",
+	"SHUFPS",
+	"SQRTPD",
+	"SQRTPS",
+	"SQRTSD",
+	"SQRTSS",
+	"STMXCSR",
+	"SUBPD",
+	"SUBPS",
+	"SUBSD",
+	"SUBSS",
+	"UCOMISD",
+	"UCOMISS",
+	"UNPCKHPD",
+	"UNPCKHPS",
+	"UNPCKLPD",
+	"UNPCKLPS",
+	"XORPD",
+	"XORPS",
+	"PF2IW",
+	"PF2IL",
+	"PI2FW",
+	"PI2FL",
+	"RETFW",
+	"RETFL",
+	"RETFQ",
+	"SWAPGS",
+	"MODE",
+	"LAST",
+};

+ 381 - 0
sys/src/cmd/6c/gc.h

@@ -0,0 +1,381 @@
+#include	"../cc/cc.h"
+#include	"../6c/6.out.h"
+
+/*
+ * 6c/amd64
+ * Intel 386 with AMD64 extensions
+ */
+#define	SZ_CHAR		1
+#define	SZ_SHORT	2
+#define	SZ_INT		4
+#define	SZ_LONG		4
+#define	SZ_IND		8
+#define	SZ_FLOAT	4
+#define	SZ_VLONG	8
+#define	SZ_DOUBLE	8
+#define	FNX		100
+
+typedef	struct	Adr	Adr;
+typedef	struct	Prog	Prog;
+typedef	struct	Case	Case;
+typedef	struct	C1	C1;
+typedef	struct	Var	Var;
+typedef	struct	Reg	Reg;
+typedef	struct	Rgn	Rgn;
+typedef	struct	Renv	Renv;
+
+EXTERN	struct
+{
+	Node*	regtree;
+	Node*	basetree;
+	short	scale;
+	short	reg;
+	short	ptr;
+} idx;
+
+struct	Adr
+{
+	vlong	offset;
+	double	dval;
+	char	sval[NSNAME];
+
+	Sym*	sym;
+	uchar	type;
+	uchar	index;
+	uchar	etype;
+	uchar	scale;	/* doubles as width in DATA op */
+};
+#define	A	((Adr*)0)
+
+#define	INDEXED	9
+struct	Prog
+{
+	Adr	from;
+	Adr	to;
+	Prog*	link;
+	long	lineno;
+	short	as;
+};
+#define	P	((Prog*)0)
+
+struct	Case
+{
+	Case*	link;
+	vlong	val;
+	long	label;
+	char	def;
+	char	isv;
+};
+#define	C	((Case*)0)
+
+struct	C1
+{
+	vlong	val;
+	long	label;
+};
+
+struct	Var
+{
+	vlong	offset;
+	Sym*	sym;
+	char	name;
+	char	etype;
+};
+
+struct	Reg
+{
+	long	pc;
+	long	rpo;		/* reverse post ordering */
+
+	Bits	set;
+	Bits	use1;
+	Bits	use2;
+
+	Bits	refbehind;
+	Bits	refahead;
+	Bits	calbehind;
+	Bits	calahead;
+	Bits	regdiff;
+	Bits	act;
+
+	long	regu;
+	long	loop;		/* could be shorter */
+
+	Reg*	log5;
+	long	active;
+
+	Reg*	p1;
+	Reg*	p2;
+	Reg*	p2link;
+	Reg*	s1;
+	Reg*	s2;
+	Reg*	link;
+	Prog*	prog;
+};
+#define	R	((Reg*)0)
+
+struct	Renv
+{
+	int	safe;
+	Node	base;
+	Node*	saved;
+	Node*	scope;
+};
+
+#define	NRGN	600
+struct	Rgn
+{
+	Reg*	enter;
+	short	cost;
+	short	varno;
+	short	regno;
+};
+
+EXTERN	long	breakpc;
+EXTERN	long	nbreak;
+EXTERN	Case*	cases;
+EXTERN	Node	constnode;
+EXTERN	Node	fconstnode;
+EXTERN	Node	vconstnode;
+EXTERN	long	continpc;
+EXTERN	long	curarg;
+EXTERN	long	cursafe;
+EXTERN	Prog*	firstp;
+EXTERN	Prog*	lastp;
+EXTERN	long	maxargsafe;
+EXTERN	int	mnstring;
+EXTERN	Node*	nodrat;
+EXTERN	Node*	nodret;
+EXTERN	Node*	nodsafe;
+EXTERN	long	nrathole;
+EXTERN	long	nstring;
+EXTERN	Prog*	p;
+EXTERN	long	pc;
+EXTERN	Node	lregnode;
+EXTERN	Node	qregnode;
+EXTERN	char	string[NSNAME];
+EXTERN	Sym*	symrathole;
+EXTERN	Node	znode;
+EXTERN	Prog	zprog;
+EXTERN	int	reg[D_NONE];
+EXTERN	long	exregoffset;
+EXTERN	long	exfregoffset;
+EXTERN	uchar	typechlpv[NTYPE];
+
+#define	BLOAD(r)	band(bnot(r->refbehind), r->refahead)
+#define	BSTORE(r)	band(bnot(r->calbehind), r->calahead)
+#define	LOAD(r)		(~r->refbehind.b[z] & r->refahead.b[z])
+#define	STORE(r)	(~r->calbehind.b[z] & r->calahead.b[z])
+
+#define	bset(a,n)	((a).b[(n)/32]&(1L<<(n)%32))
+
+#define	CLOAD	5
+#define	CREF	5
+#define	CINF	1000
+#define	LOOP	3
+
+EXTERN	Rgn	region[NRGN];
+EXTERN	Rgn*	rgp;
+EXTERN	int	nregion;
+EXTERN	int	nvar;
+
+EXTERN	Bits	externs;
+EXTERN	Bits	params;
+EXTERN	Bits	consts;
+EXTERN	Bits	addrs;
+
+EXTERN	long	regbits;
+EXTERN	long	exregbits;
+
+EXTERN	int	change;
+EXTERN	int	suppress;
+
+EXTERN	Reg*	firstr;
+EXTERN	Reg*	lastr;
+EXTERN	Reg	zreg;
+EXTERN	Reg*	freer;
+EXTERN	Var	var[NVAR];
+EXTERN	long*	idom;
+EXTERN	Reg**	rpo2r;
+EXTERN	long	maxnr;
+
+extern	char*	anames[];
+
+/*
+ * sgen.c
+ */
+void	codgen(Node*, Node*);
+void	gen(Node*);
+void	noretval(int);
+void	usedset(Node*, int);
+void	xcom(Node*);
+void	indx(Node*);
+int	bcomplex(Node*, Node*);
+
+/*
+ * cgen.c
+ */
+void	zeroregm(Node*);
+void	cgen(Node*, Node*);
+void	reglcgen(Node*, Node*, Node*);
+void	lcgen(Node*, Node*);
+void	bcgen(Node*, int);
+void	boolgen(Node*, int, Node*);
+void	sugen(Node*, Node*, long);
+int	needreg(Node*, int);
+int	hardconst(Node*);
+int	immconst(Node*);
+
+/*
+ * cgen64.c
+ */
+int	vaddr(Node*, int);
+void	loadpair(Node*, Node*);
+int	cgen64(Node*, Node*);
+void	testv(Node*, int);
+
+/*
+ * txt.c
+ */
+void	ginit(void);
+void	gclean(void);
+void	nextpc(void);
+void	gargs(Node*, Node*, Node*);
+void	garg1(Node*, Node*, Node*, int, Node**);
+Node*	nodconst(long);
+Node*	nodfconst(double);
+Node*	nodgconst(vlong, Type*);
+int	nodreg(Node*, Node*, int);
+int	isreg(Node*, int);
+void	regret(Node*, Node*);
+void	regalloc(Node*, Node*, Node*);
+void	regfree(Node*);
+void	regialloc(Node*, Node*, Node*);
+void	regsalloc(Node*, Node*);
+void	regaalloc1(Node*, Node*);
+void	regaalloc(Node*, Node*);
+void	regind(Node*, Node*);
+void	gprep(Node*, Node*);
+void	naddr(Node*, Adr*);
+void	gcmp(int, Node*, vlong);
+void	gmove(Node*, Node*);
+void	gins(int a, Node*, Node*);
+void	gopcode(int, Type*, Node*, Node*);
+int	samaddr(Node*, Node*);
+void	gbranch(int);
+void	patch(Prog*, long);
+int	sconst(Node*);
+void	gpseudo(int, Sym*, Node*);
+
+/*
+ * swt.c
+ */
+int	swcmp(const void*, const void*);
+void	doswit(Node*);
+void	swit1(C1*, int, long, Node*);
+void	casf(void);
+void	bitload(Node*, Node*, Node*, Node*, Node*);
+void	bitstore(Node*, Node*, Node*, Node*, Node*);
+long	outstring(char*, long);
+void	nullwarn(Node*, Node*);
+void	sextern(Sym*, Node*, long, long);
+void	gextern(Sym*, Node*, long, long);
+void	outcode(void);
+void	ieeedtod(Ieee*, double);
+
+/*
+ * list
+ */
+void	listinit(void);
+int	Pconv(Fmt*);
+int	Aconv(Fmt*);
+int	Dconv(Fmt*);
+int	Sconv(Fmt*);
+int	Rconv(Fmt*);
+int	Xconv(Fmt*);
+int	Bconv(Fmt*);
+
+/*
+ * reg.c
+ */
+Reg*	rega(void);
+int	rcmp(const void*, const void*);
+void	regopt(Prog*);
+void	addmove(Reg*, int, int, int);
+Bits	mkvar(Reg*, Adr*);
+void	prop(Reg*, Bits, Bits);
+void	loopit(Reg*, long);
+void	synch(Reg*, Bits);
+ulong	allreg(ulong, Rgn*);
+void	paint1(Reg*, int);
+ulong	paint2(Reg*, int);
+void	paint3(Reg*, int, long, int);
+void	addreg(Adr*, int);
+
+/*
+ * peep.c
+ */
+void	peep(void);
+void	excise(Reg*);
+Reg*	uniqp(Reg*);
+Reg*	uniqs(Reg*);
+int	regtyp(Adr*);
+int	anyvar(Adr*);
+int	subprop(Reg*);
+int	copyprop(Reg*);
+int	copy1(Adr*, Adr*, Reg*, int);
+int	copyu(Prog*, Adr*, Adr*);
+
+int	copyas(Adr*, Adr*);
+int	copyau(Adr*, Adr*);
+int	copysub(Adr*, Adr*, Adr*, int);
+int	copysub1(Prog*, Adr*, Adr*, int);
+
+long	RtoB(int);
+long	FtoB(int);
+int	BtoR(long);
+int	BtoF(long);
+
+#define	D_HI	D_NONE
+#define	D_LO	D_NONE
+
+#define	isregtype(t)	((t)>= D_AX && (t)<=D_R15)
+
+/*
+ * bound
+ */
+void	comtarg(void);
+
+/*
+ * com64
+ */
+int	cond(int);
+int	com64(Node*);
+void	com64init(void);
+void	bool64(Node*);
+long	lo64v(Node*);
+long	hi64v(Node*);
+Node*	lo64(Node*);
+Node*	hi64(Node*);
+
+/*
+ * div/mul
+ */
+void	sdivgen(Node*, Node*, Node*, Node*);
+void	udivgen(Node*, Node*, Node*, Node*);
+void	sdiv2(long, int, Node*, Node*);
+void	smod2(long, int, Node*, Node*);
+void	mulgen(Type*, Node*, Node*);
+void	genmuladd(Node*, Node*, int, Node*);
+void	shiftit(Type*, Node*, Node*);
+
+#pragma	varargck	type	"A"	int
+#pragma	varargck	type	"B"	Bits
+#pragma	varargck	type	"D"	Adr*
+#pragma	varargck	type	"P"	Prog*
+#pragma	varargck	type	"R"	int
+#pragma	varargck	type	"S"	char*
+
+#define	D_X7	(D_X0+7)
+
+void	fgopcode(int, Node*, Node*, int, int);

+ 337 - 0
sys/src/cmd/6c/list.c

@@ -0,0 +1,337 @@
+#define EXTERN
+#include "gc.h"
+
+void
+listinit(void)
+{
+
+	fmtinstall('A', Aconv);
+	fmtinstall('B', Bconv);
+	fmtinstall('P', Pconv);
+	fmtinstall('S', Sconv);
+	fmtinstall('D', Dconv);
+	fmtinstall('R', Rconv);
+}
+
+int
+Bconv(Fmt *fp)
+{
+	char str[STRINGSZ], ss[STRINGSZ], *s;
+	Bits bits;
+	int i;
+
+	str[0] = 0;
+	bits = va_arg(fp->args, Bits);
+	while(bany(&bits)) {
+		i = bnum(bits);
+		if(str[0])
+			strcat(str, " ");
+		if(var[i].sym == S) {
+			sprint(ss, "$%lld", var[i].offset);
+			s = ss;
+		} else
+			s = var[i].sym->name;
+		if(strlen(str) + strlen(s) + 1 >= STRINGSZ)
+			break;
+		strcat(str, s);
+		bits.b[i/32] &= ~(1L << (i%32));
+	}
+	return fmtstrcpy(fp, str);
+}
+
+int
+Pconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Prog *p;
+
+	p = va_arg(fp->args, Prog*);
+	if(p->as == ADATA)
+		sprint(str, "	%A	%D/%d,%D",
+			p->as, &p->from, p->from.scale, &p->to);
+	else if(p->as == ATEXT)
+		sprint(str, "	%A	%D,%d,%D",
+			p->as, &p->from, p->from.scale, &p->to);
+	else
+		sprint(str, "	%A	%D,%D",
+			p->as, &p->from, &p->to);
+	return fmtstrcpy(fp, str);
+}
+
+int
+Aconv(Fmt *fp)
+{
+	int i;
+
+	i = va_arg(fp->args, int);
+	return fmtstrcpy(fp, anames[i]);
+}
+
+int
+Dconv(Fmt *fp)
+{
+	char str[40], s[20];
+	Adr *a;
+	int i;
+
+	a = va_arg(fp->args, Adr*);
+	i = a->type;
+	if(i >= D_INDIR) {
+		if(a->offset)
+			sprint(str, "%lld(%R)", a->offset, i-D_INDIR);
+		else
+			sprint(str, "(%R)", i-D_INDIR);
+		goto brk;
+	}
+	switch(i) {
+
+	default:
+		if(a->offset)
+			sprint(str, "$%lld,%R", a->offset, i);
+		else
+			sprint(str, "%R", i);
+		break;
+
+	case D_NONE:
+		str[0] = 0;
+		break;
+
+	case D_BRANCH:
+		sprint(str, "%lld(PC)", a->offset-pc);
+		break;
+
+	case D_EXTERN:
+		sprint(str, "%s+%lld(SB)", a->sym->name, a->offset);
+		break;
+
+	case D_STATIC:
+		sprint(str, "%s<>+%lld(SB)", a->sym->name,
+			a->offset);
+		break;
+
+	case D_AUTO:
+		sprint(str, "%s+%lld(SP)", a->sym->name, a->offset);
+		break;
+
+	case D_PARAM:
+		if(a->sym)
+			sprint(str, "%s+%lld(FP)", a->sym->name, a->offset);
+		else
+			sprint(str, "%lld(FP)", a->offset);
+		break;
+
+	case D_CONST:
+		sprint(str, "$%lld", a->offset);
+		break;
+
+	case D_FCONST:
+		sprint(str, "$(%.17e)", a->dval);
+		break;
+
+	case D_SCONST:
+		sprint(str, "$\"%S\"", a->sval);
+		break;
+
+	case D_ADDR:
+		a->type = a->index;
+		a->index = D_NONE;
+		sprint(str, "$%D", a);
+		a->index = a->type;
+		a->type = D_ADDR;
+		goto conv;
+	}
+brk:
+	if(a->index != D_NONE) {
+		sprint(s, "(%R*%d)", (int)a->index, (int)a->scale);
+		strcat(str, s);
+	}
+conv:
+	return fmtstrcpy(fp, str);
+}
+
+char*	regstr[] =
+{
+	"AL",		/* [D_AL] */
+	"CL",
+	"DL",
+	"BL",
+	"SPB",
+	"BPB",
+	"SIB",
+	"DIB",
+	"R8B",
+	"R9B",
+	"R10B",
+	"R11B",
+	"R12B",
+	"R13B",
+	"R14B",
+	"R15B",
+
+	"AX",		/* [D_AX] */
+	"CX",
+	"DX",
+	"BX",
+	"SP",
+	"BP",
+	"SI",
+	"DI",
+	"R8",
+	"R9",
+	"R10",
+	"R11",
+	"R12",
+	"R13",
+	"R14",
+	"R15",
+
+	"AH",
+	"CH",
+	"DH",
+	"BH",
+
+	"F0",		/* [D_F0] */
+	"F1",
+	"F2",
+	"F3",
+	"F4",
+	"F5",
+	"F6",
+	"F7",
+
+	"M0",
+	"M1",
+	"M2",
+	"M3",
+	"M4",
+	"M5",
+	"M6",
+	"M7",
+
+	"X0",
+	"X1",
+	"X2",
+	"X3",
+	"X4",
+	"X5",
+	"X6",
+	"X7",
+	"X8",
+	"X9",
+	"X10",
+	"X11",
+	"X12",
+	"X13",
+	"X14",
+	"X15",
+
+	"CS",		/* [D_CS] */
+	"SS",
+	"DS",
+	"ES",
+	"FS",
+	"GS",
+
+	"GDTR",		/* [D_GDTR] */
+	"IDTR",		/* [D_IDTR] */
+	"LDTR",		/* [D_LDTR] */
+	"MSW",		/* [D_MSW] */
+	"TASK",		/* [D_TASK] */
+
+	"CR0",		/* [D_CR] */
+	"CR1",
+	"CR2",
+	"CR3",
+	"CR4",
+	"CR5",
+	"CR6",
+	"CR7",
+	"CR8",
+	"CR9",
+	"CR10",
+	"CR11",
+	"CR12",
+	"CR13",
+	"CR14",
+	"CR15",
+
+	"DR0",		/* [D_DR] */
+	"DR1",
+	"DR2",
+	"DR3",
+	"DR4",
+	"DR5",
+	"DR6",
+	"DR7",
+
+	"TR0",		/* [D_TR] */
+	"TR1",
+	"TR2",
+	"TR3",
+	"TR4",
+	"TR5",
+	"TR6",
+	"TR7",
+
+	"NONE",		/* [D_NONE] */
+};
+
+int
+Rconv(Fmt *fp)
+{
+	char str[20];
+	int r;
+
+	r = va_arg(fp->args, int);
+	if(r >= D_AL && r <= D_NONE)
+		sprint(str, "%s", regstr[r-D_AL]);
+	else
+		sprint(str, "gok(%d)", r);
+
+	return fmtstrcpy(fp, str);
+}
+
+int
+Sconv(Fmt *fp)
+{
+	int i, c;
+	char str[30], *p, *a;
+
+	a = va_arg(fp->args, char*);
+	p = str;
+	for(i=0; i<sizeof(double); i++) {
+		c = a[i] & 0xff;
+		if(c >= 'a' && c <= 'z' ||
+		   c >= 'A' && c <= 'Z' ||
+		   c >= '0' && c <= '9') {
+			*p++ = c;
+			continue;
+		}
+		*p++ = '\\';
+		switch(c) {
+		default:
+			if(c < 040 || c >= 0177)
+				break;	/* not portable */
+			p[-1] = c;
+			continue;
+		case 0:
+			*p++ = 'z';
+			continue;
+		case '\\':
+		case '"':
+			*p++ = c;
+			continue;
+		case '\n':
+			*p++ = 'n';
+			continue;
+		case '\t':
+			*p++ = 't';
+			continue;
+		}
+		*p++ = (c>>6) + '0';
+		*p++ = ((c>>3) & 7) + '0';
+		*p++ = (c & 7) + '0';
+	}
+	*p = 0;
+	return fmtstrcpy(fp, str);
+}

+ 78 - 0
sys/src/cmd/6c/machcap.c

@@ -0,0 +1,78 @@
+#include "gc.h"
+
+int
+machcap(Node *n)
+{
+
+	if(n == Z)
+		return 1;	/* test */
+
+	switch(n->op) {
+	case OMUL:
+	case OLMUL:
+	case OASMUL:
+	case OASLMUL:
+		if(typechl[n->type->etype])
+			return 1;
+		if(typev[n->type->etype]) {
+				return 1;
+		}
+		break;
+
+	case OCOM:
+	case ONEG:
+	case OADD:
+	case OAND:
+	case OOR:
+	case OSUB:
+	case OXOR:
+	case OASHL:
+	case OLSHR:
+	case OASHR:
+		if(typechlv[n->left->type->etype])
+			return 1;
+		break;
+
+	case OCAST:
+		return 1;
+
+	case OCOND:
+	case OCOMMA:
+	case OLIST:
+	case OANDAND:
+	case OOROR:
+	case ONOT:
+		return 1;
+
+	case OASADD:
+	case OASSUB:
+	case OASAND:
+	case OASOR:
+	case OASXOR:
+		return 1;
+
+	case OASASHL:
+	case OASASHR:
+	case OASLSHR:
+		return 1;
+
+	case OPOSTINC:
+	case OPOSTDEC:
+	case OPREINC:
+	case OPREDEC:
+		return 1;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OGT:
+	case OLT:
+	case OGE:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		return 1;
+	}
+	return 0;
+}

+ 15 - 0
sys/src/cmd/6c/mkenam

@@ -0,0 +1,15 @@
+ed - ../6c/6.out.h <<'!'
+v/^	A/d
+,s/^	A/	"/
+g/ .*$/s///
+,s/,*$/",/
+1i
+char*	anames[] =
+{
+.
+$a
+};
+.
+w enam.c
+Q
+!

+ 38 - 0
sys/src/cmd/6c/mkfile

@@ -0,0 +1,38 @@
+</$objtype/mkfile
+
+TARG=6c
+OFILES=\
+	cgen.$O\
+	enam.$O\
+	list.$O\
+	sgen.$O\
+	swt.$O\
+	txt.$O\
+	reg.$O\
+	peep.$O\
+	pgen.$O\
+	pswt.$O\
+	machcap.$O\
+	div.$O\
+	mul.$O\
+
+HFILES=\
+	gc.h\
+	6.out.h\
+	../cc/cc.h\
+
+LIB=../cc/cc.a$O
+
+BIN=/$objtype/bin
+</sys/src/cmd/mkone
+
+$LIB:
+	cd ../cc
+	mk install
+	mk clean
+
+%.$O: ../cc/%.c
+	$CC $CFLAGS ../cc/$stem.c
+
+bound.$O:	bound.h
+

+ 428 - 0
sys/src/cmd/6c/mul.c

@@ -0,0 +1,428 @@
+#include "gc.h"
+
+typedef struct	Malg	Malg;
+typedef struct	Mparam	Mparam;
+
+struct	Malg
+{
+	char	vals[10];
+};
+
+struct	Mparam
+{
+	ulong	value;
+	char	alg;
+	char	neg;
+	char	shift;
+	char	arg;
+	char	off;
+};
+
+static	Mparam	multab[32];
+static	int	mulptr;
+
+static	Malg	malgs[]	=
+{
+	{0, 100},
+	{-1, 1, 100},
+	{-9, -5, -3, 3, 5, 9, 100},
+	{6, 10, 12, 18, 20, 24, 36, 40, 72, 100},
+	{-8, -4, -2, 2, 4, 8, 100},
+};
+
+/*
+ * return position of lowest 1
+ */
+int
+lowbit(ulong v)
+{
+	int s, i;
+	ulong m;
+
+	s = 0;
+	m = 0xFFFFFFFFUL;
+	for(i = 16; i > 0; i >>= 1) {
+		m >>= i;
+		if((v & m) == 0) {
+			v >>= i;
+			s += i;
+		}
+	}
+	return s;
+}
+
+void
+genmuladd(Node *d, Node *s, int m, Node *a)
+{
+	Node nod;
+
+	nod.op = OINDEX;
+	nod.left = a;
+	nod.right = s;
+	nod.scale = m;
+	nod.type = types[TIND];
+	nod.xoffset = 0;
+	xcom(&nod);
+	gopcode(OADDR, d->type, &nod, d);
+}
+
+void
+mulparam(ulong m, Mparam *mp)
+{
+	int c, i, j, n, o, q, s;
+	int bc, bi, bn, bo, bq, bs, bt;
+	char *p;
+	long u;
+	ulong t;
+
+	bc = bq = 10;
+	bi = bn = bo = bs = bt = 0;
+	for(i = 0; i < nelem(malgs); i++) {
+		for(p = malgs[i].vals, j = 0; (o = p[j]) < 100; j++)
+		for(s = 0; s < 2; s++) {
+			c = 10;
+			q = 10;
+			u = m - o;
+			if(u == 0)
+				continue;
+			if(s) {
+				o = -o;
+				if(o > 0)
+					continue;
+				u = -u;
+			}
+			n = lowbit(u);
+			t = (ulong)u >> n;
+			switch(i) {
+			case 0:
+				if(t == 1) {
+					c = s + 1;
+					q = 0;
+					break;
+				}
+				switch(t) {
+				case 3:
+				case 5:
+				case 9:
+					c = s + 1;
+					if(n)
+						c++;
+					q = 0;
+					break;
+				}
+				if(s)
+					break;
+				switch(t) {
+				case 15:
+				case 25:
+				case 27:
+				case 45:
+				case 81:
+					c = 2;
+					if(n)
+						c++;
+					q = 1;
+					break;
+				}
+				break;
+			case 1:
+				if(t == 1) {
+					c = 3;
+					q = 3;
+					break;
+				}
+				switch(t) {
+				case 3:
+				case 5:
+				case 9:
+					c = 3;
+					q = 2;
+					break;
+				}
+				break;
+			case 2:
+				if(t == 1) {
+					c = 3;
+					q = 2;
+					break;
+				}
+				break;
+			case 3:
+				if(s)
+					break;
+				if(t == 1) {
+					c = 3;
+					q = 1;
+					break;
+				}
+				break;
+			case 4:
+				if(t == 1) {
+					c = 3;
+					q = 0;
+					break;
+				}
+				break;
+			}
+			if(c < bc || (c == bc && q > bq)) {
+				bc = c;
+				bi = i;
+				bn = n;
+				bo = o;
+				bq = q;
+				bs = s;
+				bt = t;
+			}
+		}
+	}
+	mp->value = m;
+	if(bc <= 3) {
+		mp->alg = bi;
+		mp->shift = bn;
+		mp->off = bo;
+		mp->neg = bs;
+		mp->arg = bt;
+	}
+	else
+		mp->alg = -1;
+}
+
+int
+m0(int a)
+{
+	switch(a) {
+	case -2:
+	case 2:
+		return 2;
+	case -3:
+	case 3:
+		return 2;
+	case -4:
+	case 4:
+		return 4;
+	case -5:
+	case 5:
+		return 4;
+	case 6:
+		return 2;
+	case -8:
+	case 8:
+		return 8;
+	case -9:
+	case 9:
+		return 8;
+	case 10:
+		return 4;
+	case 12:
+		return 2;
+	case 15:
+		return 2;
+	case 18:
+		return 8;
+	case 20:
+		return 4;
+	case 24:
+		return 2;
+	case 25:
+		return 4;
+	case 27:
+		return 2;
+	case 36:
+		return 8;
+	case 40:
+		return 4;
+	case 45:
+		return 4;
+	case 72:
+		return 8;
+	case 81:
+		return 8;
+	}
+	diag(Z, "bad m0");
+	return 0;
+}
+
+int
+m1(int a)
+{
+	switch(a) {
+	case 15:
+		return 4;
+	case 25:
+		return 4;
+	case 27:
+		return 8;
+	case 45:
+		return 8;
+	case 81:
+		return 8;
+	}
+	diag(Z, "bad m1");
+	return 0;
+}
+
+int
+m2(int a)
+{
+	switch(a) {
+	case 6:
+		return 2;
+	case 10:
+		return 2;
+	case 12:
+		return 4;
+	case 18:
+		return 2;
+	case 20:
+		return 4;
+	case 24:
+		return 8;
+	case 36:
+		return 4;
+	case 40:
+		return 8;
+	case 72:
+		return 8;
+	}
+	diag(Z, "bad m2");
+	return 0;
+}
+
+void
+shiftit(Type *t, Node *s, Node *d)
+{
+	long c;
+
+	c = (long)s->vconst & 31;
+	switch(c) {
+	case 0:
+		break;
+	case 1:
+		gopcode(OADD, t, d, d);
+		break;
+	default:
+		gopcode(OASHL, t, s, d);
+	}
+}
+
+static int
+mulgen1(ulong v, Node *n)
+{
+	int i, o;
+	Mparam *p;
+	Node nod, nods;
+
+	for(i = 0; i < nelem(multab); i++) {
+		p = &multab[i];
+		if(p->value == v)
+			goto found;
+	}
+
+	p = &multab[mulptr];
+	if(++mulptr == nelem(multab))
+		mulptr = 0;
+
+	mulparam(v, p);
+
+found:
+//	print("v=%.lx a=%d n=%d s=%d g=%d o=%d \n", p->value, p->alg, p->neg, p->shift, p->arg, p->off);
+	if(p->alg < 0)
+		return 0;
+
+	nods = *nodconst(p->shift);
+
+	o = OADD;
+	if(p->alg > 0) {
+		regalloc(&nod, n, Z);
+		if(p->off < 0)
+			o = OSUB;
+	}
+
+	switch(p->alg) {
+	case 0:
+		switch(p->arg) {
+		case 1:
+			shiftit(n->type, &nods, n);
+			break;
+		case 15:
+		case 25:
+		case 27:
+		case 45:
+		case 81:
+			genmuladd(n, n, m1(p->arg), n);
+			/* fall thru */
+		case 3:
+		case 5:
+		case 9:
+			genmuladd(n, n, m0(p->arg), n);
+			shiftit(n->type, &nods, n);
+			break;
+		default:
+			goto bad;
+		}
+		if(p->neg == 1)
+			gins(ANEGL, Z, n);
+		break;
+	case 1:
+		switch(p->arg) {
+		case 1:
+			gmove(n, &nod);
+			shiftit(n->type, &nods, &nod);
+			break;
+		case 3:
+		case 5:
+		case 9:
+			genmuladd(&nod, n, m0(p->arg), n);
+			shiftit(n->type, &nods, &nod);
+			break;
+		default:
+			goto bad;
+		}
+		if(p->neg)
+			gopcode(o, n->type, &nod, n);
+		else {
+			gopcode(o, n->type, n, &nod);
+			gmove(&nod, n);
+		}
+		break;
+	case 2:
+		genmuladd(&nod, n, m0(p->off), n);
+		shiftit(n->type, &nods, n);
+		goto comop;
+	case 3:
+		genmuladd(&nod, n, m0(p->off), n);
+		shiftit(n->type, &nods, n);
+		genmuladd(n, &nod, m2(p->off), n);
+		break;
+	case 4:
+		genmuladd(&nod, n, m0(p->off), nodconst(0));
+		shiftit(n->type, &nods, n);
+		goto comop;
+	default:
+		diag(Z, "bad mul alg");
+		break;
+	comop:
+		if(p->neg) {
+			gopcode(o, n->type, n, &nod);
+			gmove(&nod, n);
+		}
+		else
+			gopcode(o, n->type, &nod, n);
+	}
+
+	if(p->alg > 0)
+		regfree(&nod);
+
+	return 1;
+
+bad:
+	diag(Z, "mulgen botch");
+	return 1;
+}
+
+void
+mulgen(Type *t, Node *r, Node *n)
+{
+	if(!mulgen1(r->vconst, n))
+		gopcode(OMUL, t, r, n);
+}

+ 846 - 0
sys/src/cmd/6c/peep.c

@@ -0,0 +1,846 @@
+#include "gc.h"
+
+static int
+needc(Prog *p)
+{
+	while(p != P) {
+		switch(p->as) {
+		case AADCL:
+		case AADCQ:
+		case ASBBL:
+		case ASBBQ:
+		case ARCRL:
+		case ARCRQ:
+			return 1;
+		case AADDL:
+		case AADDQ:
+		case ASUBL:
+		case ASUBQ:
+		case AJMP:
+		case ARET:
+		case ACALL:
+			return 0;
+		default:
+			if(p->to.type == D_BRANCH)
+				return 0;
+		}
+		p = p->link;
+	}
+	return 0;
+}
+
+static Reg*
+rnops(Reg *r)
+{
+	Prog *p;
+	Reg *r1;
+
+	if(r != R)
+	for(;;){
+		p = r->prog;
+		if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
+			break;
+		r1 = uniqs(r);
+		if(r1 == R)
+			break;
+		r = r1;
+	}
+	return r;
+}
+
+void
+peep(void)
+{
+	Reg *r, *r1, *r2;
+	Prog *p, *p1;
+	int t;
+
+	/*
+	 * complete R structure
+	 */
+	t = 0;
+	for(r=firstr; r!=R; r=r1) {
+		r1 = r->link;
+		if(r1 == R)
+			break;
+		p = r->prog->link;
+		while(p != r1->prog)
+		switch(p->as) {
+		default:
+			r2 = rega();
+			r->link = r2;
+			r2->link = r1;
+
+			r2->prog = p;
+			r2->p1 = r;
+			r->s1 = r2;
+			r2->s1 = r1;
+			r1->p1 = r2;
+
+			r = r2;
+			t++;
+
+		case ADATA:
+		case AGLOBL:
+		case ANAME:
+		case ASIGNAME:
+			p = p->link;
+		}
+	}
+
+	pc = 0;	/* speculating it won't kill */
+
+loop1:
+
+	t = 0;
+	for(r=firstr; r!=R; r=r->link) {
+		p = r->prog;
+		switch(p->as) {
+		case AMOVL:
+		case AMOVQ:
+		case AMOVSS:
+		case AMOVSD:
+			if(regtyp(&p->to))
+			if(regtyp(&p->from)) {
+				if(copyprop(r)) {
+					excise(r);
+					t++;
+				} else
+				if(subprop(r) && copyprop(r)) {
+					excise(r);
+					t++;
+				}
+			}
+			break;
+
+		case AMOVBLZX:
+		case AMOVWLZX:
+		case AMOVBLSX:
+		case AMOVWLSX:
+			if(regtyp(&p->to)) {
+				r1 = rnops(uniqs(r));
+				if(r1 != R) {
+					p1 = r1->prog;
+					if(p->as == p1->as && p->to.type == p1->from.type){
+						p1->as = AMOVL;
+						t++;
+					}
+				}
+			}
+			break;
+
+		case AMOVBQSX:
+		case AMOVBQZX:
+		case AMOVWQSX:
+		case AMOVWQZX:
+		case AMOVLQSX:
+		case AMOVLQZX:
+			if(regtyp(&p->to)) {
+				r1 = rnops(uniqs(r));
+				if(r1 != R) {
+					p1 = r1->prog;
+					if(p->as == p1->as && p->to.type == p1->from.type){
+						p1->as = AMOVQ;
+						t++;
+					}
+				}
+			}
+			break;
+
+		case AADDL:
+		case AADDQ:
+		case AADDW:
+			if(p->from.type != D_CONST || needc(p->link))
+				break;
+			if(p->from.offset == -1){
+				if(p->as == AADDQ)
+					p->as = ADECQ;
+				else if(p->as == AADDL)
+					p->as = ADECL;
+				else
+					p->as = ADECW;
+				p->from = zprog.from;
+			}
+			else if(p->from.offset == 1){
+				if(p->as == AADDQ)
+					p->as = AINCQ;
+				else if(p->as == AADDL)
+					p->as = AINCL;
+				else
+					p->as = AINCW;
+				p->from = zprog.from;
+			}
+			break;
+
+		case ASUBL:
+		case ASUBQ:
+		case ASUBW:
+			if(p->from.type != D_CONST || needc(p->link))
+				break;
+			if(p->from.offset == -1) {
+				if(p->as == ASUBQ)
+					p->as = AINCQ;
+				else if(p->as == ASUBL)
+					p->as = AINCL;
+				else
+					p->as = AINCW;
+				p->from = zprog.from;
+			}
+			else if(p->from.offset == 1){
+				if(p->as == ASUBQ)
+					p->as = ADECQ;
+				else if(p->as == ASUBL)
+					p->as = ADECL;
+				else
+					p->as = ADECW;
+				p->from = zprog.from;
+			}
+			break;
+		}
+	}
+	if(t)
+		goto loop1;
+}
+
+void
+excise(Reg *r)
+{
+	Prog *p;
+
+	p = r->prog;
+	p->as = ANOP;
+	p->from = zprog.from;
+	p->to = zprog.to;
+}
+
+Reg*
+uniqp(Reg *r)
+{
+	Reg *r1;
+
+	r1 = r->p1;
+	if(r1 == R) {
+		r1 = r->p2;
+		if(r1 == R || r1->p2link != R)
+			return R;
+	} else
+		if(r->p2 != R)
+			return R;
+	return r1;
+}
+
+Reg*
+uniqs(Reg *r)
+{
+	Reg *r1;
+
+	r1 = r->s1;
+	if(r1 == R) {
+		r1 = r->s2;
+		if(r1 == R)
+			return R;
+	} else
+		if(r->s2 != R)
+			return R;
+	return r1;
+}
+
+int
+regtyp(Adr *a)
+{
+	int t;
+
+	t = a->type;
+	if(t >= D_AX && t <= D_R15)
+		return 1;
+	if(t >= D_X0 && t <= D_X0+15)
+		return 1;
+	return 0;
+}
+
+/*
+ * the idea is to substitute
+ * one register for another
+ * from one MOV to another
+ *	MOV	a, R0
+ *	ADD	b, R0	/ no use of R1
+ *	MOV	R0, R1
+ * would be converted to
+ *	MOV	a, R1
+ *	ADD	b, R1
+ *	MOV	R1, R0
+ * hopefully, then the former or latter MOV
+ * will be eliminated by copy propagation.
+ */
+int
+subprop(Reg *r0)
+{
+	Prog *p;
+	Adr *v1, *v2;
+	Reg *r;
+	int t;
+
+	p = r0->prog;
+	v1 = &p->from;
+	if(!regtyp(v1))
+		return 0;
+	v2 = &p->to;
+	if(!regtyp(v2))
+		return 0;
+	for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+		if(uniqs(r) == R)
+			break;
+		p = r->prog;
+		switch(p->as) {
+		case ACALL:
+			return 0;
+
+		case AIMULL:
+		case AIMULQ:
+		case AIMULW:
+			if(p->to.type != D_NONE)
+				break;
+
+		case ADIVB:
+		case ADIVL:
+		case ADIVQ:
+		case ADIVW:
+		case AIDIVB:
+		case AIDIVL:
+		case AIDIVQ:
+		case AIDIVW:
+		case AIMULB:
+		case AMULB:
+		case AMULL:
+		case AMULQ:
+		case AMULW:
+
+		case AROLB:
+		case AROLL:
+		case AROLQ:
+		case AROLW:
+		case ARORB:
+		case ARORL:
+		case ARORQ:
+		case ARORW:
+		case ASALB:
+		case ASALL:
+		case ASALQ:
+		case ASALW:
+		case ASARB:
+		case ASARL:
+		case ASARQ:
+		case ASARW:
+		case ASHLB:
+		case ASHLL:
+		case ASHLQ:
+		case ASHLW:
+		case ASHRB:
+		case ASHRL:
+		case ASHRQ:
+		case ASHRW:
+
+		case AREP:
+		case AREPN:
+
+		case ACWD:
+		case ACDQ:
+		case ACQO:
+
+		case AMOVSL:
+		case AMOVSQ:
+			return 0;
+
+		case AMOVL:
+		case AMOVQ:
+			if(p->to.type == v1->type)
+				goto gotit;
+			break;
+		}
+		if(copyau(&p->from, v2) ||
+		   copyau(&p->to, v2))
+			break;
+		if(copysub(&p->from, v1, v2, 0) ||
+		   copysub(&p->to, v1, v2, 0))
+			break;
+	}
+	return 0;
+
+gotit:
+	copysub(&p->to, v1, v2, 1);
+	if(debug['P']) {
+		print("gotit: %D->%D\n%P", v1, v2, r->prog);
+		if(p->from.type == v2->type)
+			print(" excise");
+		print("\n");
+	}
+	for(r=uniqs(r); r!=r0; r=uniqs(r)) {
+		p = r->prog;
+		copysub(&p->from, v1, v2, 1);
+		copysub(&p->to, v1, v2, 1);
+		if(debug['P'])
+			print("%P\n", r->prog);
+	}
+	t = v1->type;
+	v1->type = v2->type;
+	v2->type = t;
+	if(debug['P'])
+		print("%P last\n", r->prog);
+	return 1;
+}
+
+/*
+ * The idea is to remove redundant copies.
+ *	v1->v2	F=0
+ *	(use v2	s/v2/v1/)*
+ *	set v1	F=1
+ *	use v2	return fail
+ *	-----------------
+ *	v1->v2	F=0
+ *	(use v2	s/v2/v1/)*
+ *	set v1	F=1
+ *	set v2	return success
+ */
+int
+copyprop(Reg *r0)
+{
+	Prog *p;
+	Adr *v1, *v2;
+	Reg *r;
+
+	p = r0->prog;
+	v1 = &p->from;
+	v2 = &p->to;
+	if(copyas(v1, v2))
+		return 1;
+	for(r=firstr; r!=R; r=r->link)
+		r->active = 0;
+	return copy1(v1, v2, r0->s1, 0);
+}
+
+int
+copy1(Adr *v1, Adr *v2, Reg *r, int f)
+{
+	int t;
+	Prog *p;
+
+	if(r->active) {
+		if(debug['P'])
+			print("act set; return 1\n");
+		return 1;
+	}
+	r->active = 1;
+	if(debug['P'])
+		print("copy %D->%D f=%d\n", v1, v2, f);
+	for(; r != R; r = r->s1) {
+		p = r->prog;
+		if(debug['P'])
+			print("%P", p);
+		if(!f && uniqp(r) == R) {
+			f = 1;
+			if(debug['P'])
+				print("; merge; f=%d", f);
+		}
+		t = copyu(p, v2, A);
+		switch(t) {
+		case 2:	/* rar, cant split */
+			if(debug['P'])
+				print("; %D rar; return 0\n", v2);
+			return 0;
+
+		case 3:	/* set */
+			if(debug['P'])
+				print("; %D set; return 1\n", v2);
+			return 1;
+
+		case 1:	/* used, substitute */
+		case 4:	/* use and set */
+			if(f) {
+				if(!debug['P'])
+					return 0;
+				if(t == 4)
+					print("; %D used+set and f=%d; return 0\n", v2, f);
+				else
+					print("; %D used and f=%d; return 0\n", v2, f);
+				return 0;
+			}
+			if(copyu(p, v2, v1)) {
+				if(debug['P'])
+					print("; sub fail; return 0\n");
+				return 0;
+			}
+			if(debug['P'])
+				print("; sub %D/%D", v2, v1);
+			if(t == 4) {
+				if(debug['P'])
+					print("; %D used+set; return 1\n", v2);
+				return 1;
+			}
+			break;
+		}
+		if(!f) {
+			t = copyu(p, v1, A);
+			if(!f && (t == 2 || t == 3 || t == 4)) {
+				f = 1;
+				if(debug['P'])
+					print("; %D set and !f; f=%d", v1, f);
+			}
+		}
+		if(debug['P'])
+			print("\n");
+		if(r->s2)
+			if(!copy1(v1, v2, r->s2, f))
+				return 0;
+	}
+	return 1;
+}
+
+/*
+ * return
+ * 1 if v only used (and substitute),
+ * 2 if read-alter-rewrite
+ * 3 if set
+ * 4 if set and used
+ * 0 otherwise (not touched)
+ */
+int
+copyu(Prog *p, Adr *v, Adr *s)
+{
+
+	switch(p->as) {
+
+	default:
+		if(debug['P'])
+			print("unknown op %A\n", p->as);
+		/* SBBL; ADCL; FLD1; SAHF */
+		return 2;
+
+
+	case ANEGB:
+	case ANEGW:
+	case ANEGL:
+	case ANEGQ:
+	case ANOTB:
+	case ANOTW:
+	case ANOTL:
+	case ANOTQ:
+		if(copyas(&p->to, v))
+			return 2;
+		break;
+
+	case ALEAL:	/* lhs addr, rhs store */
+	case ALEAQ:
+		if(copyas(&p->from, v))
+			return 2;
+
+
+	case ANOP:	/* rhs store */
+	case AMOVL:
+	case AMOVQ:
+	case AMOVBLSX:
+	case AMOVBLZX:
+	case AMOVBQSX:
+	case AMOVBQZX:
+	case AMOVLQSX:
+	case AMOVLQZX:
+	case AMOVWLSX:
+	case AMOVWLZX:
+	case AMOVWQSX:
+	case AMOVWQZX:
+
+	case AMOVSS:
+	case AMOVSD:
+	case ACVTSD2SL:
+	case ACVTSD2SQ:
+	case ACVTSD2SS:
+	case ACVTSL2SD:
+	case ACVTSL2SS:
+	case ACVTSQ2SD:
+	case ACVTSQ2SS:
+	case ACVTSS2SD:
+	case ACVTSS2SL:
+	case ACVTSS2SQ:
+	case ACVTTSD2SL:
+	case ACVTTSD2SQ:
+	case ACVTTSS2SL:
+	case ACVTTSS2SQ:
+		if(copyas(&p->to, v)) {
+			if(s != A)
+				return copysub(&p->from, v, s, 1);
+			if(copyau(&p->from, v))
+				return 4;
+			return 3;
+		}
+		goto caseread;
+
+	case AROLB:
+	case AROLL:
+	case AROLQ:
+	case AROLW:
+	case ARORB:
+	case ARORL:
+	case ARORQ:
+	case ARORW:
+	case ASALB:
+	case ASALL:
+	case ASALQ:
+	case ASALW:
+	case ASARB:
+	case ASARL:
+	case ASARQ:
+	case ASARW:
+	case ASHLB:
+	case ASHLL:
+	case ASHLQ:
+	case ASHLW:
+	case ASHRB:
+	case ASHRL:
+	case ASHRQ:
+	case ASHRW:
+		if(copyas(&p->to, v))
+			return 2;
+		if(copyas(&p->from, v))
+			if(p->from.type == D_CX)
+				return 2;
+		goto caseread;
+
+	case AADDB:	/* rhs rar */
+	case AADDL:
+	case AADDQ:
+	case AADDW:
+	case AANDB:
+	case AANDL:
+	case AANDQ:
+	case AANDW:
+	case ADECL:
+	case ADECQ:
+	case ADECW:
+	case AINCL:
+	case AINCQ:
+	case AINCW:
+	case ASUBB:
+	case ASUBL:
+	case ASUBQ:
+	case ASUBW:
+	case AORB:
+	case AORL:
+	case AORQ:
+	case AORW:
+	case AXORB:
+	case AXORL:
+	case AXORQ:
+	case AXORW:
+	case AMOVB:
+	case AMOVW:
+
+	case AADDSD:
+	case AADDSS:
+	case ACMPSD:
+	case ACMPSS:
+	case ADIVSD:
+	case ADIVSS:
+	case AMAXSD:
+	case AMAXSS:
+	case AMINSD:
+	case AMINSS:
+	case AMULSD:
+	case AMULSS:
+	case ARCPSS:
+	case ARSQRTSS:
+	case ASQRTSD:
+	case ASQRTSS:
+	case ASUBSD:
+	case ASUBSS:
+	case AXORPD:
+		if(copyas(&p->to, v))
+			return 2;
+		goto caseread;
+
+	case ACMPL:	/* read only */
+	case ACMPW:
+	case ACMPB:
+	case ACMPQ:
+
+	case ACOMISD:
+	case ACOMISS:
+	case AUCOMISD:
+	case AUCOMISS:
+	caseread:
+		if(s != A) {
+			if(copysub(&p->from, v, s, 1))
+				return 1;
+			return copysub(&p->to, v, s, 1);
+		}
+		if(copyau(&p->from, v))
+			return 1;
+		if(copyau(&p->to, v))
+			return 1;
+		break;
+
+	case AJGE:	/* no reference */
+	case AJNE:
+	case AJLE:
+	case AJEQ:
+	case AJHI:
+	case AJLS:
+	case AJMI:
+	case AJPL:
+	case AJGT:
+	case AJLT:
+	case AJCC:
+	case AJCS:
+
+	case AADJSP:
+	case AWAIT:
+	case ACLD:
+		break;
+
+	case AIMULL:
+	case AIMULQ:
+	case AIMULW:
+		if(p->to.type != D_NONE) {
+			if(copyas(&p->to, v))
+				return 2;
+			goto caseread;
+		}
+
+	case ADIVB:
+	case ADIVL:
+	case ADIVQ:
+	case ADIVW:
+	case AIDIVB:
+	case AIDIVL:
+	case AIDIVQ:
+	case AIDIVW:
+	case AIMULB:
+	case AMULB:
+	case AMULL:
+	case AMULQ:
+	case AMULW:
+
+	case ACWD:
+	case ACDQ:
+	case ACQO:
+		if(v->type == D_AX || v->type == D_DX)
+			return 2;
+		goto caseread;
+
+	case AMOVSL:
+	case AMOVSQ:
+	case AREP:
+	case AREPN:
+		if(v->type == D_CX || v->type == D_DI || v->type == D_SI)
+			return 2;
+		goto caseread;
+
+	case AJMP:	/* funny */
+		if(s != A) {
+			if(copysub(&p->to, v, s, 1))
+				return 1;
+			return 0;
+		}
+		if(copyau(&p->to, v))
+			return 1;
+		return 0;
+
+	case ARET:	/* funny */
+		if(v->type == REGRET || v->type == FREGRET)
+			return 2;
+		if(s != A)
+			return 1;
+		return 3;
+
+	case ACALL:	/* funny */
+		if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
+			return 2;
+		if(REGARG && v->type == REGARG)
+			return 2;
+
+		if(s != A) {
+			if(copysub(&p->to, v, s, 1))
+				return 1;
+			return 0;
+		}
+		if(copyau(&p->to, v))
+			return 4;
+		return 3;
+
+	case ATEXT:	/* funny */
+		if(REGARG && v->type == REGARG)
+			return 3;
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * direct reference,
+ * could be set/use depending on
+ * semantics
+ */
+int
+copyas(Adr *a, Adr *v)
+{
+	if(a->type != v->type)
+		return 0;
+	if(regtyp(v))
+		return 1;
+	if(v->type == D_AUTO || v->type == D_PARAM)
+		if(v->offset == a->offset)
+			return 1;
+	return 0;
+}
+
+/*
+ * either direct or indirect
+ */
+int
+copyau(Adr *a, Adr *v)
+{
+
+	if(copyas(a, v))
+		return 1;
+	if(regtyp(v)) {
+		if(a->type-D_INDIR == v->type)
+			return 1;
+		if(a->index == v->type)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * substitute s for v in a
+ * return failure to substitute
+ */
+int
+copysub(Adr *a, Adr *v, Adr *s, int f)
+{
+	int t;
+
+	if(copyas(a, v)) {
+		t = s->type;
+		if(t >= D_AX && t <= D_R15 || t >= D_X0 && t <= D_X0+15) {
+			if(f)
+				a->type = t;
+		}
+		return 0;
+	}
+	if(regtyp(v)) {
+		t = v->type;
+		if(a->type == t+D_INDIR) {
+			if((s->type == D_BP || s->type == D_R13) && a->index != D_NONE)
+				return 1;	/* can't use BP-base with index */
+			if(f)
+				a->type = s->type+D_INDIR;
+//			return 0;
+		}
+		if(a->index == t) {
+			if(f)
+				a->index = s->type;
+			return 0;
+		}
+		return 0;
+	}
+	return 0;
+}

+ 1356 - 0
sys/src/cmd/6c/reg.c

@@ -0,0 +1,1356 @@
+#include "gc.h"
+
+Reg*
+rega(void)
+{
+	Reg *r;
+
+	r = freer;
+	if(r == R) {
+		r = alloc(sizeof(*r));
+	} else
+		freer = r->link;
+
+	*r = zreg;
+	return r;
+}
+
+int
+rcmp(const void *a1, const void *a2)
+{
+	Rgn *p1, *p2;
+	int c1, c2;
+
+	p1 = (Rgn*)a1;
+	p2 = (Rgn*)a2;
+	c1 = p2->cost;
+	c2 = p1->cost;
+	if(c1 -= c2)
+		return c1;
+	return p2->varno - p1->varno;
+}
+
+void
+regopt(Prog *p)
+{
+	Reg *r, *r1, *r2;
+	Prog *p1;
+	int i, z;
+	long initpc, val, npc;
+	ulong vreg;
+	Bits bit;
+	struct
+	{
+		long	m;
+		long	c;
+		Reg*	p;
+	} log5[6], *lp;
+
+	firstr = R;
+	lastr = R;
+	nvar = 0;
+	regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
+	for(z=0; z<BITS; z++) {
+		externs.b[z] = 0;
+		params.b[z] = 0;
+		consts.b[z] = 0;
+		addrs.b[z] = 0;
+	}
+
+	/*
+	 * pass 1
+	 * build aux data structure
+	 * allocate pcs
+	 * find use and set of variables
+	 */
+	val = 5L * 5L * 5L * 5L * 5L;
+	lp = log5;
+	for(i=0; i<5; i++) {
+		lp->m = val;
+		lp->c = 0;
+		lp->p = R;
+		val /= 5L;
+		lp++;
+	}
+	val = 0;
+	for(; p != P; p = p->link) {
+		switch(p->as) {
+		case ADATA:
+		case AGLOBL:
+		case ANAME:
+		case ASIGNAME:
+			continue;
+		}
+		r = rega();
+		if(firstr == R) {
+			firstr = r;
+			lastr = r;
+		} else {
+			lastr->link = r;
+			r->p1 = lastr;
+			lastr->s1 = r;
+			lastr = r;
+		}
+		r->prog = p;
+		r->pc = val;
+		val++;
+
+		lp = log5;
+		for(i=0; i<5; i++) {
+			lp->c--;
+			if(lp->c <= 0) {
+				lp->c = lp->m;
+				if(lp->p != R)
+					lp->p->log5 = r;
+				lp->p = r;
+				(lp+1)->c = 0;
+				break;
+			}
+			lp++;
+		}
+
+		r1 = r->p1;
+		if(r1 != R)
+		switch(r1->prog->as) {
+		case ARET:
+		case AJMP:
+		case AIRETL:
+		case AIRETQ:
+			r->p1 = R;
+			r1->s1 = R;
+		}
+
+		bit = mkvar(r, &p->from);
+		if(bany(&bit))
+		switch(p->as) {
+		/*
+		 * funny
+		 */
+		case ALEAL:
+		case ALEAQ:
+			for(z=0; z<BITS; z++)
+				addrs.b[z] |= bit.b[z];
+			break;
+
+		/*
+		 * left side read
+		 */
+		default:
+			for(z=0; z<BITS; z++)
+				r->use1.b[z] |= bit.b[z];
+			break;
+		}
+
+		bit = mkvar(r, &p->to);
+		if(bany(&bit))
+		switch(p->as) {
+		default:
+			diag(Z, "reg: unknown op: %A", p->as);
+			break;
+
+		/*
+		 * right side read
+		 */
+		case ACMPB:
+		case ACMPL:
+		case ACMPQ:
+		case ACMPW:
+		case ACOMISS:
+		case ACOMISD:
+		case AUCOMISS:
+		case AUCOMISD:
+			for(z=0; z<BITS; z++)
+				r->use2.b[z] |= bit.b[z];
+			break;
+
+		/*
+		 * right side write
+		 */
+		case ANOP:
+		case AMOVL:
+		case AMOVQ:
+		case AMOVB:
+		case AMOVW:
+		case AMOVBLSX:
+		case AMOVBLZX:
+		case AMOVBQSX:
+		case AMOVBQZX:
+		case AMOVLQSX:
+		case AMOVLQZX:
+		case AMOVWLSX:
+		case AMOVWLZX:
+		case AMOVWQSX:
+		case AMOVWQZX:
+
+		case AMOVSS:
+		case AMOVSD:
+		case ACVTSD2SL:
+		case ACVTSD2SQ:
+		case ACVTSD2SS:
+		case ACVTSL2SD:
+		case ACVTSL2SS:
+		case ACVTSQ2SD:
+		case ACVTSQ2SS:
+		case ACVTSS2SD:
+		case ACVTSS2SL:
+		case ACVTSS2SQ:
+		case ACVTTSD2SL:
+		case ACVTTSD2SQ:
+		case ACVTTSS2SL:
+		case ACVTTSS2SQ:
+			for(z=0; z<BITS; z++)
+				r->set.b[z] |= bit.b[z];
+			break;
+
+		/*
+		 * right side read+write
+		 */
+		case AADDB:
+		case AADDL:
+		case AADDQ:
+		case AADDW:
+		case AANDB:
+		case AANDL:
+		case AANDQ:
+		case AANDW:
+		case ASUBB:
+		case ASUBL:
+		case ASUBQ:
+		case ASUBW:
+		case AORB:
+		case AORL:
+		case AORQ:
+		case AORW:
+		case AXORB:
+		case AXORL:
+		case AXORQ:
+		case AXORW:
+		case ASALB:
+		case ASALL:
+		case ASALQ:
+		case ASALW:
+		case ASARB:
+		case ASARL:
+		case ASARQ:
+		case ASARW:
+		case AROLB:
+		case AROLL:
+		case AROLQ:
+		case AROLW:
+		case ARORB:
+		case ARORL:
+		case ARORQ:
+		case ARORW:
+		case ASHLB:
+		case ASHLL:
+		case ASHLQ:
+		case ASHLW:
+		case ASHRB:
+		case ASHRL:
+		case ASHRQ:
+		case ASHRW:
+		case AIMULL:
+		case AIMULQ:
+		case AIMULW:
+		case ANEGL:
+		case ANEGQ:
+		case ANOTL:
+		case ANOTQ:
+		case AADCL:
+		case AADCQ:
+		case ASBBL:
+		case ASBBQ:
+
+		case AADDSD:
+		case AADDSS:
+		case ACMPSD:
+		case ACMPSS:
+		case ADIVSD:
+		case ADIVSS:
+		case AMAXSD:
+		case AMAXSS:
+		case AMINSD:
+		case AMINSS:
+		case AMULSD:
+		case AMULSS:
+		case ARCPSS:
+		case ARSQRTSS:
+		case ASQRTSD:
+		case ASQRTSS:
+		case ASUBSD:
+		case ASUBSS:
+		case AXORPD:
+			for(z=0; z<BITS; z++) {
+				r->set.b[z] |= bit.b[z];
+				r->use2.b[z] |= bit.b[z];
+			}
+			break;
+
+		/*
+		 * funny
+		 */
+		case ACALL:
+			for(z=0; z<BITS; z++)
+				addrs.b[z] |= bit.b[z];
+			break;
+		}
+
+		switch(p->as) {
+		case AIMULL:
+		case AIMULQ:
+		case AIMULW:
+			if(p->to.type != D_NONE)
+				break;
+
+		case AIDIVB:
+		case AIDIVL:
+		case AIDIVQ:
+		case AIDIVW:
+		case AIMULB:
+		case ADIVB:
+		case ADIVL:
+		case ADIVQ:
+		case ADIVW:
+		case AMULB:
+		case AMULL:
+		case AMULQ:
+		case AMULW:
+
+		case ACWD:
+		case ACDQ:
+		case ACQO:
+			r->regu |= RtoB(D_AX) | RtoB(D_DX);
+			break;
+
+		case AREP:
+		case AREPN:
+		case ALOOP:
+		case ALOOPEQ:
+		case ALOOPNE:
+			r->regu |= RtoB(D_CX);
+			break;
+
+		case AMOVSB:
+		case AMOVSL:
+		case AMOVSQ:
+		case AMOVSW:
+		case ACMPSB:
+		case ACMPSL:
+		case ACMPSQ:
+		case ACMPSW:
+			r->regu |= RtoB(D_SI) | RtoB(D_DI);
+			break;
+
+		case ASTOSB:
+		case ASTOSL:
+		case ASTOSQ:
+		case ASTOSW:
+		case ASCASB:
+		case ASCASL:
+		case ASCASQ:
+		case ASCASW:
+			r->regu |= RtoB(D_AX) | RtoB(D_DI);
+			break;
+
+		case AINSB:
+		case AINSL:
+		case AINSW:
+		case AOUTSB:
+		case AOUTSL:
+		case AOUTSW:
+			r->regu |= RtoB(D_DI) | RtoB(D_DX);
+			break;
+		}
+	}
+	if(firstr == R)
+		return;
+	initpc = pc - val;
+	npc = val;
+
+	/*
+	 * pass 2
+	 * turn branch references to pointers
+	 * build back pointers
+	 */
+	for(r = firstr; r != R; r = r->link) {
+		p = r->prog;
+		if(p->to.type == D_BRANCH) {
+			val = p->to.offset - initpc;
+			r1 = firstr;
+			while(r1 != R) {
+				r2 = r1->log5;
+				if(r2 != R && val >= r2->pc) {
+					r1 = r2;
+					continue;
+				}
+				if(r1->pc == val)
+					break;
+				r1 = r1->link;
+			}
+			if(r1 == R) {
+				nearln = p->lineno;
+				diag(Z, "ref not found\n%P", p);
+				continue;
+			}
+			if(r1 == r) {
+				nearln = p->lineno;
+				diag(Z, "ref to self\n%P", p);
+				continue;
+			}
+			r->s2 = r1;
+			r->p2link = r1->p2;
+			r1->p2 = r;
+		}
+	}
+	if(debug['R']) {
+		p = firstr->prog;
+		print("\n%L %D\n", p->lineno, &p->from);
+	}
+
+	/*
+	 * pass 2.5
+	 * find looping structure
+	 */
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	change = 0;
+	loopit(firstr, npc);
+	if(debug['R'] && debug['v']) {
+		print("\nlooping structure:\n");
+		for(r = firstr; r != R; r = r->link) {
+			print("%ld:%P", r->loop, r->prog);
+			for(z=0; z<BITS; z++)
+				bit.b[z] = r->use1.b[z] |
+					   r->use2.b[z] |
+					   r->set.b[z];
+			if(bany(&bit)) {
+				print("\t");
+				if(bany(&r->use1))
+					print(" u1=%B", r->use1);
+				if(bany(&r->use2))
+					print(" u2=%B", r->use2);
+				if(bany(&r->set))
+					print(" st=%B", r->set);
+			}
+			print("\n");
+		}
+	}
+
+	/*
+	 * pass 3
+	 * iterate propagating usage
+	 * 	back until flow graph is complete
+	 */
+loop1:
+	change = 0;
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	for(r = firstr; r != R; r = r->link)
+		if(r->prog->as == ARET)
+			prop(r, zbits, zbits);
+loop11:
+	/* pick up unreachable code */
+	i = 0;
+	for(r = firstr; r != R; r = r1) {
+		r1 = r->link;
+		if(r1 && r1->active && !r->active) {
+			prop(r, zbits, zbits);
+			i = 1;
+		}
+	}
+	if(i)
+		goto loop11;
+	if(change)
+		goto loop1;
+
+
+	/*
+	 * pass 4
+	 * iterate propagating register/variable synchrony
+	 * 	forward until graph is complete
+	 */
+loop2:
+	change = 0;
+	for(r = firstr; r != R; r = r->link)
+		r->active = 0;
+	synch(firstr, zbits);
+	if(change)
+		goto loop2;
+
+
+	/*
+	 * pass 5
+	 * isolate regions
+	 * calculate costs (paint1)
+	 */
+	r = firstr;
+	if(r) {
+		for(z=0; z<BITS; z++)
+			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+		if(bany(&bit)) {
+			nearln = r->prog->lineno;
+			warn(Z, "used and not set: %B", bit);
+			if(debug['R'] && !debug['w'])
+				print("used and not set: %B\n", bit);
+		}
+	}
+	if(debug['R'] && debug['v'])
+		print("\nprop structure:\n");
+	for(r = firstr; r != R; r = r->link)
+		r->act = zbits;
+	rgp = region;
+	nregion = 0;
+	for(r = firstr; r != R; r = r->link) {
+		if(debug['R'] && debug['v']) {
+			print("%P\t", r->prog);
+			if(bany(&r->set))
+				print("s:%B ", r->set);
+			if(bany(&r->refahead))
+				print("ra:%B ", r->refahead);
+			if(bany(&r->calahead))
+				print("ca:%B ", r->calahead);
+			print("\n");
+		}
+		for(z=0; z<BITS; z++)
+			bit.b[z] = r->set.b[z] &
+			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+		if(bany(&bit)) {
+			nearln = r->prog->lineno;
+			warn(Z, "set and not used: %B", bit);
+			if(debug['R'])
+				print("set and not used: %B\n", bit);
+			excise(r);
+		}
+		for(z=0; z<BITS; z++)
+			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+		while(bany(&bit)) {
+			i = bnum(bit);
+			rgp->enter = r;
+			rgp->varno = i;
+			change = 0;
+			if(debug['R'] && debug['v'])
+				print("\n");
+			paint1(r, i);
+			bit.b[i/32] &= ~(1L<<(i%32));
+			if(change <= 0) {
+				if(debug['R'])
+					print("%L$%d: %B\n",
+						r->prog->lineno, change, blsh(i));
+				continue;
+			}
+			rgp->cost = change;
+			nregion++;
+			if(nregion >= NRGN) {
+				warn(Z, "too many regions");
+				goto brk;
+			}
+			rgp++;
+		}
+	}
+brk:
+	qsort(region, nregion, sizeof(region[0]), rcmp);
+
+	/*
+	 * pass 6
+	 * determine used registers (paint2)
+	 * replace code (paint3)
+	 */
+	rgp = region;
+	for(i=0; i<nregion; i++) {
+		bit = blsh(rgp->varno);
+		vreg = paint2(rgp->enter, rgp->varno);
+		vreg = allreg(vreg, rgp);
+		if(debug['R']) {
+			print("%L$%d %R: %B\n",
+				rgp->enter->prog->lineno,
+				rgp->cost,
+				rgp->regno,
+				bit);
+		}
+		if(rgp->regno != 0)
+			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+		rgp++;
+	}
+	/*
+	 * pass 7
+	 * peep-hole on basic block
+	 */
+	if(!debug['R'] || debug['P'])
+		peep();
+
+	/*
+	 * pass 8
+	 * recalculate pc
+	 */
+	val = initpc;
+	for(r = firstr; r != R; r = r1) {
+		r->pc = val;
+		p = r->prog;
+		p1 = P;
+		r1 = r->link;
+		if(r1 != R)
+			p1 = r1->prog;
+		for(; p != p1; p = p->link) {
+			switch(p->as) {
+			default:
+				val++;
+				break;
+
+			case ANOP:
+			case ADATA:
+			case AGLOBL:
+			case ANAME:
+			case ASIGNAME:
+				break;
+			}
+		}
+	}
+	pc = val;
+
+	/*
+	 * fix up branches
+	 */
+	if(debug['R'])
+		if(bany(&addrs))
+			print("addrs: %B\n", addrs);
+
+	r1 = 0; /* set */
+	for(r = firstr; r != R; r = r->link) {
+		p = r->prog;
+		if(p->to.type == D_BRANCH)
+			p->to.offset = r->s2->pc;
+		r1 = r;
+	}
+
+	/*
+	 * last pass
+	 * eliminate nops
+	 * free aux structures
+	 */
+	for(p = firstr->prog; p != P; p = p->link){
+		while(p->link && p->link->as == ANOP)
+			p->link = p->link->link;
+	}
+	if(r1 != R) {
+		r1->link = freer;
+		freer = firstr;
+	}
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+	Prog *p, *p1;
+	Adr *a;
+	Var *v;
+
+	p1 = alloc(sizeof(*p1));
+	*p1 = zprog;
+	p = r->prog;
+
+	p1->link = p->link;
+	p->link = p1;
+	p1->lineno = p->lineno;
+
+	v = var + bn;
+
+	a = &p1->to;
+	a->sym = v->sym;
+	a->offset = v->offset;
+	a->etype = v->etype;
+	a->type = v->name;
+
+	p1->as = AMOVL;
+	if(v->etype == TCHAR || v->etype == TUCHAR)
+		p1->as = AMOVB;
+	if(v->etype == TSHORT || v->etype == TUSHORT)
+		p1->as = AMOVW;
+	if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
+		p1->as = AMOVQ;
+	if(v->etype == TFLOAT)
+		p1->as = AMOVSS;
+	if(v->etype == TDOUBLE)
+		p1->as = AMOVSD;
+
+	p1->from.type = rn;
+	if(!f) {
+		p1->from = *a;
+		*a = zprog.from;
+		a->type = rn;
+		if(v->etype == TUCHAR)
+			p1->as = AMOVB;
+		if(v->etype == TUSHORT)
+			p1->as = AMOVW;
+	}
+	if(debug['R'])
+		print("%P\t.a%P\n", p, p1);
+}
+
+ulong
+doregbits(int r)
+{
+	ulong b;
+
+	b = 0;
+	if(r >= D_INDIR)
+		r -= D_INDIR;
+	if(r >= D_AX && r <= D_R15)
+		b |= RtoB(r);
+	else
+	if(r >= D_AL && r <= D_R15B)
+		b |= RtoB(r-D_AL+D_AX);
+	else
+	if(r >= D_AH && r <= D_BH)
+		b |= RtoB(r-D_AH+D_AX);
+	else
+	if(r >= D_X0 && r <= D_X0+15)
+		b |= FtoB(r);
+	return b;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+	Var *v;
+	int i, t, n, et, z;
+	long o;
+	Bits bit;
+	Sym *s;
+
+	/*
+	 * mark registers used
+	 */
+	t = a->type;
+	r->regu |= doregbits(t);
+	r->regu |= doregbits(a->index);
+
+	switch(t) {
+	default:
+		goto none;
+	case D_ADDR:
+		a->type = a->index;
+		bit = mkvar(r, a);
+		for(z=0; z<BITS; z++)
+			addrs.b[z] |= bit.b[z];
+		a->type = t;
+		goto none;
+	case D_EXTERN:
+	case D_STATIC:
+	case D_PARAM:
+	case D_AUTO:
+		n = t;
+		break;
+	}
+	s = a->sym;
+	if(s == S)
+		goto none;
+	if(s->name[0] == '.')
+		goto none;
+	et = a->etype;
+	o = a->offset;
+	v = var;
+	for(i=0; i<nvar; i++) {
+		if(s == v->sym)
+		if(n == v->name)
+		if(o == v->offset)
+			goto out;
+		v++;
+	}
+	if(nvar >= NVAR) {
+		if(debug['w'] > 1 && s)
+			warn(Z, "variable not optimized: %s", s->name);
+		goto none;
+	}
+	i = nvar;
+	nvar++;
+	v = &var[i];
+	v->sym = s;
+	v->offset = o;
+	v->name = n;
+	v->etype = et;
+	if(debug['R'])
+		print("bit=%2d et=%2d %D\n", i, et, a);
+
+out:
+	bit = blsh(i);
+	if(n == D_EXTERN || n == D_STATIC)
+		for(z=0; z<BITS; z++)
+			externs.b[z] |= bit.b[z];
+	if(n == D_PARAM)
+		for(z=0; z<BITS; z++)
+			params.b[z] |= bit.b[z];
+	if(v->etype != et || !(typechlpfd[et] || typev[et]))	/* funny punning */
+		for(z=0; z<BITS; z++)
+			addrs.b[z] |= bit.b[z];
+	return bit;
+
+none:
+	return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+	Reg *r1, *r2;
+	int z;
+
+	for(r1 = r; r1 != R; r1 = r1->p1) {
+		for(z=0; z<BITS; z++) {
+			ref.b[z] |= r1->refahead.b[z];
+			if(ref.b[z] != r1->refahead.b[z]) {
+				r1->refahead.b[z] = ref.b[z];
+				change++;
+			}
+			cal.b[z] |= r1->calahead.b[z];
+			if(cal.b[z] != r1->calahead.b[z]) {
+				r1->calahead.b[z] = cal.b[z];
+				change++;
+			}
+		}
+		switch(r1->prog->as) {
+		case ACALL:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] |= ref.b[z] | externs.b[z];
+				ref.b[z] = 0;
+			}
+			break;
+
+		case ATEXT:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] = 0;
+				ref.b[z] = 0;
+			}
+			break;
+
+		case ARET:
+			for(z=0; z<BITS; z++) {
+				cal.b[z] = externs.b[z];
+				ref.b[z] = 0;
+			}
+		}
+		for(z=0; z<BITS; z++) {
+			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+				r1->use1.b[z] | r1->use2.b[z];
+			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+			r1->refbehind.b[z] = ref.b[z];
+			r1->calbehind.b[z] = cal.b[z];
+		}
+		if(r1->active)
+			break;
+		r1->active = 1;
+	}
+	for(; r != r1; r = r->p1)
+		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
+			prop(r2, r->refbehind, r->calbehind);
+}
+
+/*
+ * find looping structure
+ *
+ * 1) find reverse postordering
+ * 2) find approximate dominators,
+ *	the actual dominators if the flow graph is reducible
+ *	otherwise, dominators plus some other non-dominators.
+ *	See Matthew S. Hecht and Jeffrey D. Ullman,
+ *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
+ *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
+ *	Oct. 1-3, 1973, pp.  207-217.
+ * 3) find all nodes with a predecessor dominated by the current node.
+ *	such a node is a loop head.
+ *	recursively, all preds with a greater rpo number are in the loop
+ */
+long
+postorder(Reg *r, Reg **rpo2r, long n)
+{
+	Reg *r1;
+
+	r->rpo = 1;
+	r1 = r->s1;
+	if(r1 && !r1->rpo)
+		n = postorder(r1, rpo2r, n);
+	r1 = r->s2;
+	if(r1 && !r1->rpo)
+		n = postorder(r1, rpo2r, n);
+	rpo2r[n] = r;
+	n++;
+	return n;
+}
+
+long
+rpolca(long *idom, long rpo1, long rpo2)
+{
+	long t;
+
+	if(rpo1 == -1)
+		return rpo2;
+	while(rpo1 != rpo2){
+		if(rpo1 > rpo2){
+			t = rpo2;
+			rpo2 = rpo1;
+			rpo1 = t;
+		}
+		while(rpo1 < rpo2){
+			t = idom[rpo2];
+			if(t >= rpo2)
+				fatal(Z, "bad idom");
+			rpo2 = t;
+		}
+	}
+	return rpo1;
+}
+
+int
+doms(long *idom, long r, long s)
+{
+	while(s > r)
+		s = idom[s];
+	return s == r;
+}
+
+int
+loophead(long *idom, Reg *r)
+{
+	long src;
+
+	src = r->rpo;
+	if(r->p1 != R && doms(idom, src, r->p1->rpo))
+		return 1;
+	for(r = r->p2; r != R; r = r->p2link)
+		if(doms(idom, src, r->rpo))
+			return 1;
+	return 0;
+}
+
+void
+loopmark(Reg **rpo2r, long head, Reg *r)
+{
+	if(r->rpo < head || r->active == head)
+		return;
+	r->active = head;
+	r->loop += LOOP;
+	if(r->p1 != R)
+		loopmark(rpo2r, head, r->p1);
+	for(r = r->p2; r != R; r = r->p2link)
+		loopmark(rpo2r, head, r);
+}
+
+void
+loopit(Reg *r, long nr)
+{
+	Reg *r1;
+	long i, d, me;
+
+	if(nr > maxnr) {
+		rpo2r = alloc(nr * sizeof(Reg*));
+		idom = alloc(nr * sizeof(long));
+		maxnr = nr;
+	}
+
+	d = postorder(r, rpo2r, 0);
+	if(d > nr)
+		fatal(Z, "too many reg nodes");
+	nr = d;
+	for(i = 0; i < nr / 2; i++){
+		r1 = rpo2r[i];
+		rpo2r[i] = rpo2r[nr - 1 - i];
+		rpo2r[nr - 1 - i] = r1;
+	}
+	for(i = 0; i < nr; i++)
+		rpo2r[i]->rpo = i;
+
+	idom[0] = 0;
+	for(i = 0; i < nr; i++){
+		r1 = rpo2r[i];
+		me = r1->rpo;
+		d = -1;
+		if(r1->p1 != R && r1->p1->rpo < me)
+			d = r1->p1->rpo;
+		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
+			if(r1->rpo < me)
+				d = rpolca(idom, d, r1->rpo);
+		idom[i] = d;
+	}
+
+	for(i = 0; i < nr; i++){
+		r1 = rpo2r[i];
+		r1->loop++;
+		if(r1->p2 != R && loophead(idom, r1))
+			loopmark(rpo2r, i, r1);
+	}
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+	Reg *r1;
+	int z;
+
+	for(r1 = r; r1 != R; r1 = r1->s1) {
+		for(z=0; z<BITS; z++) {
+			dif.b[z] = (dif.b[z] &
+				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+					r1->set.b[z] | r1->regdiff.b[z];
+			if(dif.b[z] != r1->regdiff.b[z]) {
+				r1->regdiff.b[z] = dif.b[z];
+				change++;
+			}
+		}
+		if(r1->active)
+			break;
+		r1->active = 1;
+		for(z=0; z<BITS; z++)
+			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+		if(r1->s2 != R)
+			synch(r1->s2, dif);
+	}
+}
+
+ulong
+allreg(ulong b, Rgn *r)
+{
+	Var *v;
+	int i;
+
+	v = var + r->varno;
+	r->regno = 0;
+	switch(v->etype) {
+
+	default:
+		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
+		break;
+
+	case TCHAR:
+	case TUCHAR:
+	case TSHORT:
+	case TUSHORT:
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+	case TVLONG:
+	case TUVLONG:
+	case TIND:
+	case TARRAY:
+		i = BtoR(~b);
+		if(i && r->cost > 0) {
+			r->regno = i;
+			return RtoB(i);
+		}
+		break;
+
+	case TDOUBLE:
+	case TFLOAT:
+		i = BtoF(~b);
+		if(i && r->cost > 0) {
+			r->regno = i;
+			return FtoB(i);
+		}
+		break;
+	}
+	return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+	Reg *r1;
+	Prog *p;
+	int z;
+	ulong bb;
+
+	z = bn/32;
+	bb = 1L<<(bn%32);
+	if(r->act.b[z] & bb)
+		return;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(r1->act.b[z] & bb)
+			break;
+		r = r1;
+	}
+
+	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+		change -= CLOAD * r->loop;
+		if(debug['R'] && debug['v'])
+			print("%ld%P\tld %B $%d\n", r->loop,
+				r->prog, blsh(bn), change);
+	}
+	for(;;) {
+		r->act.b[z] |= bb;
+		p = r->prog;
+
+		if(r->use1.b[z] & bb) {
+			change += CREF * r->loop;
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tu1 %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if((r->use2.b[z]|r->set.b[z]) & bb) {
+			change += CREF * r->loop;
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tu2 %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if(STORE(r) & r->regdiff.b[z] & bb) {
+			change -= CLOAD * r->loop;
+			if(debug['R'] && debug['v'])
+				print("%ld%P\tst %B $%d\n", r->loop,
+					p, blsh(bn), change);
+		}
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					paint1(r1, bn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				paint1(r1, bn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(r->act.b[z] & bb)
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+}
+
+ulong
+regset(Reg *r, ulong bb)
+{
+	ulong b, set;
+	Adr v;
+	int c;
+
+	set = 0;
+	v = zprog.from;
+	while(b = bb & ~(bb-1)) {
+		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
+		if(v.type == 0)
+			diag(Z, "zero v.type for %#lux", b);
+		c = copyu(r->prog, &v, A);
+		if(c == 3)
+			set |= b;
+		bb &= ~b;
+	}
+	return set;
+}
+
+ulong
+reguse(Reg *r, ulong bb)
+{
+	ulong b, set;
+	Adr v;
+	int c;
+
+	set = 0;
+	v = zprog.from;
+	while(b = bb & ~(bb-1)) {
+		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
+		c = copyu(r->prog, &v, A);
+		if(c == 1 || c == 2 || c == 4)
+			set |= b;
+		bb &= ~b;
+	}
+	return set;
+}
+
+ulong
+paint2(Reg *r, int bn)
+{
+	Reg *r1;
+	int z;
+	ulong bb, vreg, x;
+
+	z = bn/32;
+	bb = 1L << (bn%32);
+	vreg = regbits;
+	if(!(r->act.b[z] & bb))
+		return vreg;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(!(r1->act.b[z] & bb))
+			break;
+		r = r1;
+	}
+	for(;;) {
+		r->act.b[z] &= ~bb;
+
+		vreg |= r->regu;
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					vreg |= paint2(r1, bn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				vreg |= paint2(r1, bn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(!(r->act.b[z] & bb))
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+
+	bb = vreg;
+	for(; r; r=r->s1) {
+		x = r->regu & ~bb;
+		if(x) {
+			vreg |= reguse(r, x);
+			bb |= regset(r, x);
+		}
+	}
+	return vreg;
+}
+
+void
+paint3(Reg *r, int bn, long rb, int rn)
+{
+	Reg *r1;
+	Prog *p;
+	int z;
+	ulong bb;
+
+	z = bn/32;
+	bb = 1L << (bn%32);
+	if(r->act.b[z] & bb)
+		return;
+	for(;;) {
+		if(!(r->refbehind.b[z] & bb))
+			break;
+		r1 = r->p1;
+		if(r1 == R)
+			break;
+		if(!(r1->refahead.b[z] & bb))
+			break;
+		if(r1->act.b[z] & bb)
+			break;
+		r = r1;
+	}
+
+	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+		addmove(r, bn, rn, 0);
+	for(;;) {
+		r->act.b[z] |= bb;
+		p = r->prog;
+
+		if(r->use1.b[z] & bb) {
+			if(debug['R'])
+				print("%P", p);
+			addreg(&p->from, rn);
+			if(debug['R'])
+				print("\t.c%P\n", p);
+		}
+		if((r->use2.b[z]|r->set.b[z]) & bb) {
+			if(debug['R'])
+				print("%P", p);
+			addreg(&p->to, rn);
+			if(debug['R'])
+				print("\t.c%P\n", p);
+		}
+
+		if(STORE(r) & r->regdiff.b[z] & bb)
+			addmove(r, bn, rn, 1);
+		r->regu |= rb;
+
+		if(r->refbehind.b[z] & bb)
+			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+				if(r1->refahead.b[z] & bb)
+					paint3(r1, bn, rb, rn);
+
+		if(!(r->refahead.b[z] & bb))
+			break;
+		r1 = r->s2;
+		if(r1 != R)
+			if(r1->refbehind.b[z] & bb)
+				paint3(r1, bn, rb, rn);
+		r = r->s1;
+		if(r == R)
+			break;
+		if(r->act.b[z] & bb)
+			break;
+		if(!(r->refbehind.b[z] & bb))
+			break;
+	}
+}
+
+void
+addreg(Adr *a, int rn)
+{
+
+	a->sym = 0;
+	a->offset = 0;
+	a->type = rn;
+}
+
+long
+RtoB(int r)
+{
+
+	if(r < D_AX || r > D_R15)
+		return 0;
+	return 1L << (r-D_AX);
+}
+
+int
+BtoR(long b)
+{
+
+	b &= 0xffffL;
+	if(b == 0)
+		return 0;
+	return bitno(b) + D_AX;
+}
+
+/*
+ *	bit	reg
+ *	16	X5
+ *	17	X6
+ *	18	X7
+ */
+long
+FtoB(int f)
+{
+	if(f < FREGMIN || f > FREGEXT)
+		return 0;
+	return 1L << (f - FREGMIN + 16);
+}
+
+int
+BtoF(long b)
+{
+
+	b &= 0x70000L;
+	if(b == 0)
+		return 0;
+	return bitno(b) - 16 + FREGMIN;
+}

+ 435 - 0
sys/src/cmd/6c/sgen.c

@@ -0,0 +1,435 @@
+#include "gc.h"
+
+void
+noretval(int n)
+{
+
+	if(n & 1) {
+		gins(ANOP, Z, Z);
+		p->to.type = REGRET;
+	}
+	if(n & 2) {
+		gins(ANOP, Z, Z);
+		p->to.type = FREGRET;
+	}
+}
+
+/* welcome to commute */
+static void
+commute(Node *n)
+{
+	Node *l, *r;
+
+	l = n->left;
+	r = n->right;
+	if(r->complex > l->complex) {
+		n->left = r;
+		n->right = l;
+	}
+}
+
+void
+indexshift(Node *n)
+{
+	int g;
+
+	if(!typechlpv[n->type->etype])
+		return;
+	simplifyshift(n);
+	if(n->op == OASHL && n->right->op == OCONST){
+		g = vconst(n->right);
+		if(g >= 0 && g <= 3)
+			n->addable = 7;
+	}
+}
+
+/*
+ *	calculate addressability as follows
+ *		NAME ==> 10/11		name+value(SB/SP)
+ *		REGISTER ==> 12		register
+ *		CONST ==> 20		$value
+ *		*(20) ==> 21		value
+ *		&(10) ==> 13		$name+value(SB)
+ *		&(11) ==> 1		$name+value(SP)
+ *		(13) + (20) ==> 13	fold constants
+ *		(1) + (20) ==> 1	fold constants
+ *		*(13) ==> 10		back to name
+ *		*(1) ==> 11		back to name
+ *
+ *		(20) * (X) ==> 7	multiplier in indexing
+ *		(X,7) + (13,1) ==> 8	adder in indexing (addresses)
+ *		(8) ==> &9(OINDEX)	index, almost addressable
+ *
+ *	calculate complexity (number of registers)
+ */
+void
+xcom(Node *n)
+{
+	Node *l, *r;
+	int g;
+
+	if(n == Z)
+		return;
+	l = n->left;
+	r = n->right;
+	n->complex = 0;
+	n->addable = 0;
+	switch(n->op) {
+	case OCONST:
+		n->addable = 20;
+		break;
+
+	case ONAME:
+		n->addable = 10;
+		if(n->class == CPARAM || n->class == CAUTO)
+			n->addable = 11;
+		break;
+
+	case OREGISTER:
+		n->addable = 12;
+		break;
+
+	case OINDREG:
+		n->addable = 12;
+		break;
+
+	case OADDR:
+		xcom(l);
+		if(l->addable == 10)
+			n->addable = 13;
+		else
+		if(l->addable == 11)
+			n->addable = 1;
+		break;
+
+	case OADD:
+		xcom(l);
+		xcom(r);
+		if(n->type->etype != TIND)
+			break;
+
+		switch(r->addable) {
+		case 20:
+			switch(l->addable) {
+			case 1:
+			case 13:
+			commadd:
+				l->type = n->type;
+				*n = *l;
+				l = new(0, Z, Z);
+				*l = *(n->left);
+				l->xoffset += r->vconst;
+				n->left = l;
+				r = n->right;
+				goto brk;
+			}
+			break;
+
+		case 1:
+		case 13:
+		case 10:
+		case 11:
+			/* l is the base, r is the index */
+			if(l->addable != 20)
+				n->addable = 8;
+			break;
+		}
+		switch(l->addable) {
+		case 20:
+			switch(r->addable) {
+			case 13:
+			case 1:
+				r = n->left;
+				l = n->right;
+				n->left = l;
+				n->right = r;
+				goto commadd;
+			}
+			break;
+
+		case 13:
+		case 1:
+		case 10:
+		case 11:
+			/* r is the base, l is the index */
+			if(r->addable != 20)
+				n->addable = 8;
+			break;
+		}
+		if(n->addable == 8 && !side(n)) {
+			indx(n);
+			l = new1(OINDEX, idx.basetree, idx.regtree);
+			l->scale = idx.scale;
+			l->addable = 9;
+			l->complex = l->right->complex;
+			l->type = l->left->type;
+			n->op = OADDR;
+			n->left = l;
+			n->right = Z;
+			n->addable = 8;
+			break;
+		}
+		break;
+
+	case OINDEX:
+		xcom(l);
+		xcom(r);
+		n->addable = 9;
+		break;
+
+	case OIND:
+		xcom(l);
+		if(l->op == OADDR) {
+			l = l->left;
+			l->type = n->type;
+			*n = *l;
+			return;
+		}
+		switch(l->addable) {
+		case 20:
+			n->addable = 21;
+			break;
+		case 1:
+			n->addable = 11;
+			break;
+		case 13:
+			n->addable = 10;
+			break;
+		}
+		break;
+
+	case OASHL:
+		xcom(l);
+		xcom(r);
+		indexshift(n);
+		break;
+
+	case OMUL:
+	case OLMUL:
+		xcom(l);
+		xcom(r);
+		g = vlog(l);
+		if(g >= 0) {
+			n->left = r;
+			n->right = l;
+			l = r;
+			r = n->right;
+		}
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OASHL;
+			r->vconst = g;
+			r->type = types[TINT];
+			indexshift(n);
+			break;
+		}
+		commute(n);
+		break;
+
+	case OASLDIV:
+		xcom(l);
+		xcom(r);
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OASLSHR;
+			r->vconst = g;
+			r->type = types[TINT];
+		}
+		break;
+
+	case OLDIV:
+		xcom(l);
+		xcom(r);
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OLSHR;
+			r->vconst = g;
+			r->type = types[TINT];
+			indexshift(n);
+			break;
+		}
+		break;
+
+	case OASLMOD:
+		xcom(l);
+		xcom(r);
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OASAND;
+			r->vconst--;
+		}
+		break;
+
+	case OLMOD:
+		xcom(l);
+		xcom(r);
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OAND;
+			r->vconst--;
+		}
+		break;
+
+	case OASMUL:
+	case OASLMUL:
+		xcom(l);
+		xcom(r);
+		g = vlog(r);
+		if(g >= 0) {
+			n->op = OASASHL;
+			r->vconst = g;
+		}
+		break;
+
+	case OLSHR:
+	case OASHR:
+		xcom(l);
+		xcom(r);
+		indexshift(n);
+		break;
+
+	default:
+		if(l != Z)
+			xcom(l);
+		if(r != Z)
+			xcom(r);
+		break;
+	}
+brk:
+	if(n->addable >= 10)
+		return;
+	if(l != Z)
+		n->complex = l->complex;
+	if(r != Z) {
+		if(r->complex == n->complex)
+			n->complex = r->complex+1;
+		else
+		if(r->complex > n->complex)
+			n->complex = r->complex;
+	}
+	if(n->complex == 0)
+		n->complex++;
+
+	switch(n->op) {
+
+	case OFUNC:
+		n->complex = FNX;
+		break;
+
+	case OCAST:
+		if(l->type->etype == TUVLONG && typefd[n->type->etype])
+			n->complex += 2;
+		break;
+
+	case OLMOD:
+	case OMOD:
+	case OLMUL:
+	case OLDIV:
+	case OMUL:
+	case ODIV:
+	case OASLMUL:
+	case OASLDIV:
+	case OASLMOD:
+	case OASMUL:
+	case OASDIV:
+	case OASMOD:
+		if(r->complex >= l->complex) {
+			n->complex = l->complex + 3;
+			if(r->complex > n->complex)
+				n->complex = r->complex;
+		} else {
+			n->complex = r->complex + 3;
+			if(l->complex > n->complex)
+				n->complex = l->complex;
+		}
+		break;
+
+	case OLSHR:
+	case OASHL:
+	case OASHR:
+	case OASLSHR:
+	case OASASHL:
+	case OASASHR:
+		if(r->complex >= l->complex) {
+			n->complex = l->complex + 2;
+			if(r->complex > n->complex)
+				n->complex = r->complex;
+		} else {
+			n->complex = r->complex + 2;
+			if(l->complex > n->complex)
+				n->complex = l->complex;
+		}
+		break;
+
+	case OADD:
+	case OXOR:
+	case OAND:
+	case OOR:
+		/*
+		 * immediate operators, make const on right
+		 */
+		if(l->op == OCONST) {
+			n->left = r;
+			n->right = l;
+		}
+		break;
+
+	case OEQ:
+	case ONE:
+	case OLE:
+	case OLT:
+	case OGE:
+	case OGT:
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		/*
+		 * compare operators, make const on left
+		 */
+		if(r->op == OCONST) {
+			n->left = r;
+			n->right = l;
+			n->op = invrel[relindex(n->op)];
+		}
+		break;
+	}
+}
+
+void
+indx(Node *n)
+{
+	Node *l, *r;
+
+	if(debug['x'])
+		prtree(n, "indx");
+
+	l = n->left;
+	r = n->right;
+	if(l->addable == 1 || l->addable == 13 || r->complex > l->complex) {
+		n->right = l;
+		n->left = r;
+		l = r;
+		r = n->right;
+	}
+	if(l->addable != 7) {
+		idx.regtree = l;
+		idx.scale = 1;
+	} else
+	if(l->right->addable == 20) {
+		idx.regtree = l->left;
+		idx.scale = 1 << l->right->vconst;
+	} else
+	if(l->left->addable == 20) {
+		idx.regtree = l->right;
+		idx.scale = 1 << l->left->vconst;
+	} else
+		diag(n, "bad index");
+
+	idx.basetree = r;
+	if(debug['x']) {
+		print("scale = %d\n", idx.scale);
+		prtree(idx.regtree, "index");
+		prtree(idx.basetree, "base");
+	}
+}

+ 530 - 0
sys/src/cmd/6c/swt.c

@@ -0,0 +1,530 @@
+#include "gc.h"
+
+void
+swit1(C1 *q, int nc, long def, Node *n)
+{
+	C1 *r;
+	int i;
+	Prog *sp;
+
+	if(nc < 5) {
+		for(i=0; i<nc; i++) {
+			if(debug['W'])
+				print("case = %.8llux\n", q->val);
+			gcmp(OEQ, n, q->val);
+			patch(p, q->label);
+			q++;
+		}
+		gbranch(OGOTO);
+		patch(p, def);
+		return;
+	}
+	i = nc / 2;
+	r = q+i;
+	if(debug['W'])
+		print("case > %.8llux\n", r->val);
+	gcmp(OGT, n, r->val);
+	sp = p;
+	gbranch(OGOTO);
+	p->as = AJEQ;
+	patch(p, r->label);
+	swit1(q, i, def, n);
+
+	if(debug['W'])
+		print("case < %.8llux\n", r->val);
+	patch(sp, pc);
+	swit1(r+1, nc-i-1, def, n);
+}
+
+void
+bitload(Node *b, Node *n1, Node *n2, Node *n3, Node *nn)
+{
+	int sh;
+	long v;
+	Node *l;
+
+	/*
+	 * n1 gets adjusted/masked value
+	 * n2 gets address of cell
+	 * n3 gets contents of cell
+	 */
+	l = b->left;
+	if(n2 != Z) {
+		regalloc(n1, l, nn);
+		reglcgen(n2, l, Z);
+		regalloc(n3, l, Z);
+		gmove(n2, n3);
+		gmove(n3, n1);
+	} else {
+		regalloc(n1, l, nn);
+		cgen(l, n1);
+	}
+	if(b->type->shift == 0 && typeu[b->type->etype]) {
+		v = ~0 + (1L << b->type->nbits);
+		gopcode(OAND, tfield, nodconst(v), n1);
+	} else {
+		sh = 32 - b->type->shift - b->type->nbits;
+		if(sh > 0)
+			gopcode(OASHL, tfield, nodconst(sh), n1);
+		sh += b->type->shift;
+		if(sh > 0)
+			if(typeu[b->type->etype])
+				gopcode(OLSHR, tfield, nodconst(sh), n1);
+			else
+				gopcode(OASHR, tfield, nodconst(sh), n1);
+	}
+}
+
+void
+bitstore(Node *b, Node *n1, Node *n2, Node *n3, Node *nn)
+{
+	long v;
+	Node nod;
+	int sh;
+
+	regalloc(&nod, b->left, Z);
+	v = ~0 + (1L << b->type->nbits);
+	gopcode(OAND, types[TLONG], nodconst(v), n1);
+	gmove(n1, &nod);
+	if(nn != Z)
+		gmove(n1, nn);
+	sh = b->type->shift;
+	if(sh > 0)
+		gopcode(OASHL, types[TLONG], nodconst(sh), &nod);
+	v <<= sh;
+	gopcode(OAND, types[TLONG], nodconst(~v), n3);
+	gopcode(OOR, types[TLONG], n3, &nod);
+	gmove(&nod, n2);
+
+	regfree(&nod);
+	regfree(n1);
+	regfree(n2);
+	regfree(n3);
+}
+
+long
+outstring(char *s, long n)
+{
+	long r;
+
+	if(suppress)
+		return nstring;
+	r = nstring;
+	while(n) {
+		string[mnstring] = *s++;
+		mnstring++;
+		nstring++;
+		if(mnstring >= NSNAME) {
+			gpseudo(ADATA, symstring, nodconst(0L));
+			p->from.offset += nstring - NSNAME;
+			p->from.scale = NSNAME;
+			p->to.type = D_SCONST;
+			memmove(p->to.sval, string, NSNAME);
+			mnstring = 0;
+		}
+		n--;
+	}
+	return r;
+}
+
+void
+sextern(Sym *s, Node *a, long o, long w)
+{
+	long e, lw;
+
+	for(e=0; e<w; e+=NSNAME) {
+		lw = NSNAME;
+		if(w-e < lw)
+			lw = w-e;
+		gpseudo(ADATA, s, nodconst(0L));
+		p->from.offset += o+e;
+		p->from.scale = lw;
+		p->to.type = D_SCONST;
+		memmove(p->to.sval, a->cstring+e, lw);
+	}
+}
+
+void
+gextern(Sym *s, Node *a, long o, long w)
+{
+	if(0 && a->op == OCONST && typev[a->type->etype]) {
+		gpseudo(ADATA, s, lo64(a));
+		p->from.offset += o;
+		p->from.scale = 4;
+		gpseudo(ADATA, s, hi64(a));
+		p->from.offset += o + 4;
+		p->from.scale = 4;
+		return;
+	}
+	gpseudo(ADATA, s, a);
+	p->from.offset += o;
+	p->from.scale = w;
+	switch(p->to.type) {
+	default:
+		p->to.index = p->to.type;
+		p->to.type = D_ADDR;
+	case D_CONST:
+	case D_FCONST:
+	case D_ADDR:
+		break;
+	}
+}
+
+void	zname(Biobuf*, Sym*, int);
+void	zaddr(Biobuf*, Adr*, int);
+void	outhist(Biobuf*);
+
+void
+outcode(void)
+{
+	struct { Sym *sym; short type; } h[NSYM];
+	Prog *p;
+	Sym *s;
+	int f, sf, st, t, sym;
+	Biobuf b;
+
+	if(debug['S']) {
+		for(p = firstp; p != P; p = p->link)
+			if(p->as != ADATA && p->as != AGLOBL)
+				pc--;
+		for(p = firstp; p != P; p = p->link) {
+			print("%P\n", p);
+			if(p->as != ADATA && p->as != AGLOBL)
+				pc++;
+		}
+	}
+	f = open(outfile, OWRITE);
+	if(f < 0) {
+		diag(Z, "cannot open %s", outfile);
+		return;
+	}
+	Binit(&b, f, OWRITE);
+	Bseek(&b, 0L, 2);
+	outhist(&b);
+	for(sym=0; sym<NSYM; sym++) {
+		h[sym].sym = S;
+		h[sym].type = 0;
+	}
+	sym = 1;
+	for(p = firstp; p != P; p = p->link) {
+	jackpot:
+		sf = 0;
+		s = p->from.sym;
+		while(s != S) {
+			sf = s->sym;
+			if(sf < 0 || sf >= NSYM)
+				sf = 0;
+			t = p->from.type;
+			if(t == D_ADDR)
+				t = p->from.index;
+			if(h[sf].type == t)
+			if(h[sf].sym == s)
+				break;
+			s->sym = sym;
+			zname(&b, s, t);
+			h[sym].sym = s;
+			h[sym].type = t;
+			sf = sym;
+			sym++;
+			if(sym >= NSYM)
+				sym = 1;
+			break;
+		}
+		st = 0;
+		s = p->to.sym;
+		while(s != S) {
+			st = s->sym;
+			if(st < 0 || st >= NSYM)
+				st = 0;
+			t = p->to.type;
+			if(t == D_ADDR)
+				t = p->to.index;
+			if(h[st].type == t)
+			if(h[st].sym == s)
+				break;
+			s->sym = sym;
+			zname(&b, s, t);
+			h[sym].sym = s;
+			h[sym].type = t;
+			st = sym;
+			sym++;
+			if(sym >= NSYM)
+				sym = 1;
+			if(st == sf)
+				goto jackpot;
+			break;
+		}
+		Bputc(&b, p->as);
+		Bputc(&b, p->as>>8);
+		Bputc(&b, p->lineno);
+		Bputc(&b, p->lineno>>8);
+		Bputc(&b, p->lineno>>16);
+		Bputc(&b, p->lineno>>24);
+		zaddr(&b, &p->from, sf);
+		zaddr(&b, &p->to, st);
+	}
+	Bflush(&b);
+	close(f);
+	firstp = P;
+	lastp = P;
+}
+
+void
+outhist(Biobuf *b)
+{
+	Hist *h;
+	char *p, *q, *op, c;
+	Prog pg;
+	int n;
+
+	pg = zprog;
+	pg.as = AHISTORY;
+	c = pathchar();
+	for(h = hist; h != H; h = h->link) {
+		p = h->name;
+		op = 0;
+		/* on windows skip drive specifier in pathname */
+		if(systemtype(Windows) && p && p[1] == ':'){
+			p += 2;
+			c = *p;
+		}
+		if(p && p[0] != c && h->offset == 0 && pathname){
+			/* on windows skip drive specifier in pathname */
+			if(systemtype(Windows) && pathname[1] == ':') {
+				op = p;
+				p = pathname+2;
+				c = *p;
+			} else if(pathname[0] == c){
+				op = p;
+				p = pathname;
+			}
+		}
+		while(p) {
+			q = utfrune(p, c);
+			if(q) {
+				n = q-p;
+				if(n == 0){
+					n = 1;	/* leading "/" */
+					*p = '/';	/* don't emit "\" on windows */
+				}
+				q++;
+			} else {
+				n = strlen(p);
+				q = 0;
+			}
+			if(n) {
+				Bputc(b, ANAME);
+				Bputc(b, ANAME>>8);
+				Bputc(b, D_FILE);
+				Bputc(b, 1);
+				Bputc(b, '<');
+				Bwrite(b, p, n);
+				Bputc(b, 0);
+			}
+			p = q;
+			if(p == 0 && op) {
+				p = op;
+				op = 0;
+			}
+		}
+		pg.lineno = h->line;
+		pg.to.type = zprog.to.type;
+		pg.to.offset = h->offset;
+		if(h->offset)
+			pg.to.type = D_CONST;
+
+		Bputc(b, pg.as);
+		Bputc(b, pg.as>>8);
+		Bputc(b, pg.lineno);
+		Bputc(b, pg.lineno>>8);
+		Bputc(b, pg.lineno>>16);
+		Bputc(b, pg.lineno>>24);
+		zaddr(b, &pg.from, 0);
+		zaddr(b, &pg.to, 0);
+	}
+}
+
+void
+zname(Biobuf *b, Sym *s, int t)
+{
+	char *n;
+	ulong sig;
+
+	if(debug['T'] && t == D_EXTERN && s->sig != SIGDONE && s->type != types[TENUM] && s != symrathole){
+		sig = sign(s);
+		Bputc(b, ASIGNAME);
+		Bputc(b, ASIGNAME>>8);
+		Bputc(b, sig);
+		Bputc(b, sig>>8);
+		Bputc(b, sig>>16);
+		Bputc(b, sig>>24);
+		s->sig = SIGDONE;
+	}
+	else{
+		Bputc(b, ANAME);	/* as */
+		Bputc(b, ANAME>>8);	/* as */
+	}
+	Bputc(b, t);			/* type */
+	Bputc(b, s->sym);		/* sym */
+	n = s->name;
+	while(*n) {
+		Bputc(b, *n);
+		n++;
+	}
+	Bputc(b, 0);
+}
+
+void
+zaddr(Biobuf *b, Adr *a, int s)
+{
+	long l;
+	int i, t;
+	char *n;
+	Ieee e;
+
+	t = 0;
+	if(a->index != D_NONE || a->scale != 0)
+		t |= T_INDEX;
+	if(s != 0)
+		t |= T_SYM;
+
+	switch(a->type) {
+	default:
+		t |= T_TYPE;
+	case D_NONE:
+		if(a->offset != 0) {
+			t |= T_OFFSET;
+			l = a->offset;
+			if((vlong)l != a->offset)
+				t |= T_64;
+		}
+		break;
+	case D_FCONST:
+		t |= T_FCONST;
+		break;
+	case D_SCONST:
+		t |= T_SCONST;
+		break;
+	}
+	Bputc(b, t);
+
+	if(t & T_INDEX) {	/* implies index, scale */
+		Bputc(b, a->index);
+		Bputc(b, a->scale);
+	}
+	if(t & T_OFFSET) {	/* implies offset */
+		l = a->offset;
+		Bputc(b, l);
+		Bputc(b, l>>8);
+		Bputc(b, l>>16);
+		Bputc(b, l>>24);
+		if(t & T_64) {
+			l = a->offset>>32;
+			Bputc(b, l);
+			Bputc(b, l>>8);
+			Bputc(b, l>>16);
+			Bputc(b, l>>24);
+		}
+	}
+	if(t & T_SYM)		/* implies sym */
+		Bputc(b, s);
+	if(t & T_FCONST) {
+		ieeedtod(&e, a->dval);
+		l = e.l;
+		Bputc(b, l);
+		Bputc(b, l>>8);
+		Bputc(b, l>>16);
+		Bputc(b, l>>24);
+		l = e.h;
+		Bputc(b, l);
+		Bputc(b, l>>8);
+		Bputc(b, l>>16);
+		Bputc(b, l>>24);
+		return;
+	}
+	if(t & T_SCONST) {
+		n = a->sval;
+		for(i=0; i<NSNAME; i++) {
+			Bputc(b, *n);
+			n++;
+		}
+		return;
+	}
+	if(t & T_TYPE)
+		Bputc(b, a->type);
+}
+
+long
+align(long i, Type *t, int op)
+{
+	long o;
+	Type *v;
+	int w;
+
+	o = i;
+	w = 1;
+	switch(op) {
+	default:
+		diag(Z, "unknown align opcode %d", op);
+		break;
+
+	case Asu2:	/* padding at end of a struct */
+		w = SZ_VLONG;
+		if(packflg)
+			w = packflg;
+		break;
+
+	case Ael1:	/* initial align of struct element */
+		for(v=t; v->etype==TARRAY; v=v->link)
+			;
+		w = ewidth[v->etype];
+		if(w <= 0 || w >= SZ_VLONG)
+			w = SZ_VLONG;
+		if(packflg)
+			w = packflg;
+		break;
+
+	case Ael2:	/* width of a struct element */
+		o += t->width;
+		break;
+
+	case Aarg0:	/* initial passbyptr argument in arg list */
+		if(typesu[t->etype]) {
+			o = align(o, types[TIND], Aarg1);
+			o = align(o, types[TIND], Aarg2);
+		}
+		break;
+
+	case Aarg1:	/* initial align of parameter */
+		w = ewidth[t->etype];
+		if(w <= 0 || w >= SZ_VLONG) {
+			w = SZ_VLONG;
+			break;
+		}
+		w = 1;		/* little endian no adjustment */
+		break;
+
+	case Aarg2:	/* width of a parameter */
+		o += t->width;
+		w = SZ_VLONG;
+		break;
+
+	case Aaut3:	/* total allign of automatic */
+		o = align(o, t, Ael1);
+		o = align(o, t, Ael2);
+		break;
+	}
+	o = round(o, w);
+	if(debug['A'])
+		print("align %s %ld %T = %ld\n", bnames[op], i, t, o);
+	return o;
+}
+
+long
+maxround(long max, long v)
+{
+	v += SZ_VLONG-1;
+	if(v > max)
+		max = round(v, SZ_VLONG);
+	return max;
+}

+ 106 - 0
sys/src/cmd/6c/sys.c

@@ -0,0 +1,106 @@
+#include <u.h>
+#include <libc.h>
+#include "/sys/src/libc/9syscall/sys.h"
+
+vlong	_sysargs[6*4];
+vlong _callsys(void);
+
+/*
+ * syscalls
+ */
+
+int
+getpid(void)
+{
+	_sysargs[0] = -1;
+	return _callsys();
+}
+
+long
+pread(int fd, void *a, long n, vlong)
+{
+	_sysargs[0] = PREAD;
+	_sysargs[1] = fd;
+	_sysargs[2] = (vlong)a;
+	_sysargs[3] = n;
+	return _callsys();
+}
+
+long
+pwrite(int fd, void *a, long n, vlong)
+{
+	_sysargs[0] = PWRITE;
+	_sysargs[1] = fd;
+	_sysargs[2] = (vlong)a;
+	_sysargs[3] = n;
+	return _callsys();
+}
+
+int
+close(int fd)
+{
+	_sysargs[0] = CLOSE;
+	_sysargs[1] = fd;
+	return _callsys();
+}
+
+int
+open(char *name, int mode)
+{
+	_sysargs[0] = OPEN;
+	_sysargs[1] = (vlong)name;
+	_sysargs[2] = mode;
+	return _callsys();
+}
+
+int
+create(char *f, int mode, ulong perm)
+{
+	_sysargs[0] = CREATE;
+	_sysargs[1] = (vlong)f;
+	_sysargs[2] = mode;
+	_sysargs[3] = perm;
+	return _callsys();
+}
+
+void
+_exits(char *s)
+{
+	_sysargs[0] = EXITS;
+	_sysargs[1] = s!=nil? strlen(s): 0;
+	_callsys();
+}
+
+int
+dup(int f, int t)
+{
+	_sysargs[0] = DUP;
+	_sysargs[1] = f;
+	_sysargs[2] = t;
+	return _callsys();
+}
+
+int
+errstr(char *buf, uint n)
+{
+	_sysargs[0] = ERRSTR;
+	_sysargs[1] = (vlong)buf;
+	_sysargs[2] = n;
+	return _callsys();
+}
+
+int
+brk_(void *a)
+{
+	_sysargs[0] = BRK_;
+	_sysargs[1] = (vlong)a;
+	return _callsys();
+}
+
+void*
+sbrk(ulong n)
+{
+	_sysargs[0] = -2;
+	_sysargs[1] = n;
+	return (void*)_callsys();
+}

+ 1514 - 0
sys/src/cmd/6c/txt.c

@@ -0,0 +1,1514 @@
+#include "gc.h"
+
+void
+ginit(void)
+{
+	int i;
+	Type *t;
+
+	thechar = '6';
+	thestring = "amd64";
+	exregoffset = REGEXT;
+	exfregoffset = FREGEXT;
+	listinit();
+	nstring = 0;
+	mnstring = 0;
+	nrathole = 0;
+	pc = 0;
+	breakpc = -1;
+	continpc = -1;
+	cases = C;
+	firstp = P;
+	lastp = P;
+	tfield = types[TINT];
+
+	typeword = typechlvp;
+	typecmplx = typesu;
+
+	/* TO DO */
+	memmove(typechlpv, typechlp, sizeof(typechlpv));
+	typechlpv[TVLONG] = 1;
+	typechlpv[TUVLONG] = 1;
+
+	zprog.link = P;
+	zprog.as = AGOK;
+	zprog.from.type = D_NONE;
+	zprog.from.index = D_NONE;
+	zprog.from.scale = 0;
+	zprog.to = zprog.from;
+
+	lregnode.op = OREGISTER;
+	lregnode.class = CEXREG;
+	lregnode.reg = REGTMP;
+	lregnode.complex = 0;
+	lregnode.addable = 11;
+	lregnode.type = types[TLONG];
+
+	qregnode = lregnode;
+	qregnode.type = types[TVLONG];
+
+	constnode.op = OCONST;
+	constnode.class = CXXX;
+	constnode.complex = 0;
+	constnode.addable = 20;
+	constnode.type = types[TLONG];
+
+	vconstnode = constnode;
+	vconstnode.type = types[TVLONG];
+
+	fconstnode.op = OCONST;
+	fconstnode.class = CXXX;
+	fconstnode.complex = 0;
+	fconstnode.addable = 20;
+	fconstnode.type = types[TDOUBLE];
+
+	nodsafe = new(ONAME, Z, Z);
+	nodsafe->sym = slookup(".safe");
+	nodsafe->type = types[TINT];
+	nodsafe->etype = types[TINT]->etype;
+	nodsafe->class = CAUTO;
+	complex(nodsafe);
+
+	t = typ(TARRAY, types[TCHAR]);
+	symrathole = slookup(".rathole");
+	symrathole->class = CGLOBL;
+	symrathole->type = t;
+
+	nodrat = new(ONAME, Z, Z);
+	nodrat->sym = symrathole;
+	nodrat->type = types[TIND];
+	nodrat->etype = TVOID;
+	nodrat->class = CGLOBL;
+	complex(nodrat);
+	nodrat->type = t;
+
+	nodret = new(ONAME, Z, Z);
+	nodret->sym = slookup(".ret");
+	nodret->type = types[TIND];
+	nodret->etype = TIND;
+	nodret->class = CPARAM;
+	nodret = new(OIND, nodret, Z);
+	complex(nodret);
+
+	if(0)
+		com64init();
+
+	for(i=0; i<nelem(reg); i++) {
+		reg[i] = 1;
+		if(i >= D_AX && i <= D_R15 && i != D_SP)
+			reg[i] = 0;
+		if(i >= D_X0 && i <= D_X7)
+			reg[i] = 0;
+	}
+}
+
+void
+gclean(void)
+{
+	int i;
+	Sym *s;
+
+	reg[D_SP]--;
+	for(i=D_AX; i<=D_R15; i++)
+		if(reg[i])
+			diag(Z, "reg %R left allocated", i);
+	for(i=D_X0; i<=D_X7; i++)
+		if(reg[i])
+			diag(Z, "reg %R left allocated", i);
+	while(mnstring)
+		outstring("", 1L);
+	symstring->type->width = nstring;
+	symrathole->type->width = nrathole;
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type == T)
+			continue;
+		if(s->type->width == 0)
+			continue;
+		if(s->class != CGLOBL && s->class != CSTATIC)
+			continue;
+		if(s->type == types[TENUM])
+			continue;
+		gpseudo(AGLOBL, s, nodconst(s->type->width));
+	}
+	nextpc();
+	p->as = AEND;
+	outcode();
+}
+
+void
+nextpc(void)
+{
+
+	p = alloc(sizeof(*p));
+	*p = zprog;
+	p->lineno = nearln;
+	pc++;
+	if(firstp == P) {
+		firstp = p;
+		lastp = p;
+		return;
+	}
+	lastp->link = p;
+	lastp = p;
+}
+
+void
+gargs(Node *n, Node *tn1, Node *tn2)
+{
+	long regs;
+	Node fnxargs[20], *fnxp;
+
+	regs = cursafe;
+
+	fnxp = fnxargs;
+	garg1(n, tn1, tn2, 0, &fnxp);	/* compile fns to temps */
+
+	curarg = 0;
+	fnxp = fnxargs;
+	garg1(n, tn1, tn2, 1, &fnxp);	/* compile normal args and temps */
+
+	cursafe = regs;
+}
+
+int
+nareg(void)
+{
+	int i, n;
+
+	n = 0;
+	for(i=D_AX; i<=D_R15; i++)
+		if(reg[i] == 0)
+			n++;
+	return n;
+}
+
+void
+garg1(Node *n, Node *tn1, Node *tn2, int f, Node **fnxp)
+{
+	Node nod;
+
+	if(n == Z)
+		return;
+	if(n->op == OLIST) {
+		garg1(n->left, tn1, tn2, f, fnxp);
+		garg1(n->right, tn1, tn2, f, fnxp);
+		return;
+	}
+	if(f == 0) {
+		if(n->complex >= FNX) {
+			regsalloc(*fnxp, n);
+			nod = znode;
+			nod.op = OAS;
+			nod.left = *fnxp;
+			nod.right = n;
+			nod.type = n->type;
+			cgen(&nod, Z);
+			(*fnxp)++;
+		}
+		return;
+	}
+	if(typesu[n->type->etype]) {
+		regaalloc(tn2, n);
+		if(n->complex >= FNX) {
+			sugen(*fnxp, tn2, n->type->width);
+			(*fnxp)++;
+		} else
+			sugen(n, tn2, n->type->width);
+		return;
+	}
+	if(REGARG && curarg == 0 && typechlpv[n->type->etype]) {
+		regaalloc1(tn1, n);
+		if(n->complex >= FNX) {
+			cgen(*fnxp, tn1);
+			(*fnxp)++;
+		} else
+			cgen(n, tn1);
+		return;
+	}
+	if(vconst(n) == 0) {
+		regaalloc(tn2, n);
+		gmove(n, tn2);
+		return;
+	}
+	regalloc(tn1, n, Z);
+	if(n->complex >= FNX) {
+		cgen(*fnxp, tn1);
+		(*fnxp)++;
+	} else
+		cgen(n, tn1);
+	regaalloc(tn2, n);
+	gmove(tn1, tn2);
+	regfree(tn1);
+}
+
+Node*
+nodgconst(vlong v, Type *t)
+{
+	if(!typev[t->etype])
+		return nodconst((long)v);
+	vconstnode.vconst = v;
+	return &vconstnode;
+}
+
+Node*
+nodconst(long v)
+{
+	constnode.vconst = v;
+	return &constnode;
+}
+
+Node*
+nodfconst(double d)
+{
+	fconstnode.fconst = d;
+	return &fconstnode;
+}
+
+int
+isreg(Node *n, int r)
+{
+
+	if(n->op == OREGISTER)
+		if(n->reg == r)
+			return 1;
+	return 0;
+}
+
+int
+nodreg(Node *n, Node *nn, int r)
+{
+	int et;
+
+	*n = qregnode;
+	n->reg = r;
+	if(nn != Z){
+		et = nn->type->etype;
+		if(!typefd[et] && nn->type->width <= SZ_LONG && 0)
+			n->type = typeu[et]? types[TUINT]: types[TINT];
+		else
+			n->type = nn->type;
+//print("nodreg %s [%s]\n", tnames[et], tnames[n->type->etype]);
+		n->lineno = nn->lineno;
+	}
+	if(reg[r] == 0)
+		return 0;
+	if(nn != Z) {
+		if(nn->op == OREGISTER)
+		if(nn->reg == r)
+			return 0;
+	}
+	return 1;
+}
+
+void
+regret(Node *n, Node *nn)
+{
+	int r;
+
+	r = REGRET;
+	if(typefd[nn->type->etype])
+		r = FREGRET;
+	nodreg(n, nn, r);
+	reg[r]++;
+}
+
+void
+regalloc(Node *n, Node *tn, Node *o)
+{
+	int i;
+
+	switch(tn->type->etype) {
+	case TCHAR:
+	case TUCHAR:
+	case TSHORT:
+	case TUSHORT:
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+	case TVLONG:
+	case TUVLONG:
+	case TIND:
+		if(o != Z && o->op == OREGISTER) {
+			i = o->reg;
+			if(i >= D_AX && i <= D_R15)
+				goto out;
+		}
+		for(i=D_AX; i<=D_R15; i++)
+			if(reg[i] == 0)
+				goto out;
+		diag(tn, "out of fixed registers");
+		goto err;
+
+	case TFLOAT:
+	case TDOUBLE:
+		if(o != Z && o->op == OREGISTER) {
+			i = o->reg;
+			if(i >= D_X0 && i <= D_X7)
+				goto out;
+		}
+		for(i=D_X0; i<=D_X7; i++)
+			if(reg[i] == 0)
+				goto out;
+		diag(tn, "out of float registers");
+		goto out;
+	}
+	diag(tn, "unknown type in regalloc: %T", tn->type);
+err:
+	i = 0;
+out:
+	if(i)
+		reg[i]++;
+	nodreg(n, tn, i);
+}
+
+void
+regialloc(Node *n, Node *tn, Node *o)
+{
+	Node nod;
+
+	nod = *tn;
+	nod.type = types[TIND];
+	regalloc(n, &nod, o);
+}
+
+void
+regfree(Node *n)
+{
+	int i;
+
+	i = 0;
+	if(n->op != OREGISTER && n->op != OINDREG)
+		goto err;
+	i = n->reg;
+	if(i < 0 || i >= sizeof(reg))
+		goto err;
+	if(reg[i] <= 0)
+		goto err;
+	reg[i]--;
+	return;
+err:
+	diag(n, "error in regfree: %R", i);
+}
+
+void
+regsalloc(Node *n, Node *nn)
+{
+	cursafe = align(cursafe, nn->type, Aaut3);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+	*n = *nodsafe;
+	n->xoffset = -(stkoff + cursafe);
+	n->type = nn->type;
+	n->etype = nn->type->etype;
+	n->lineno = nn->lineno;
+}
+
+void
+regaalloc1(Node *n, Node *nn)
+{
+	nodreg(n, nn, REGARG);
+	reg[REGARG]++;
+	curarg = align(curarg, nn->type, Aarg1);
+	curarg = align(curarg, nn->type, Aarg2);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+}
+
+void
+regaalloc(Node *n, Node *nn)
+{
+	curarg = align(curarg, nn->type, Aarg1);
+	*n = *nn;
+	n->op = OINDREG;
+	n->reg = REGSP;
+	n->xoffset = curarg;
+	n->complex = 0;
+	n->addable = 20;
+	curarg = align(curarg, nn->type, Aarg2);
+	maxargsafe = maxround(maxargsafe, cursafe+curarg);
+}
+
+void
+regind(Node *n, Node *nn)
+{
+
+	if(n->op != OREGISTER) {
+		diag(n, "regind not OREGISTER");
+		return;
+	}
+	n->op = OINDREG;
+	n->type = nn->type;
+}
+
+void
+naddr(Node *n, Adr *a)
+{
+	long v;
+
+	a->type = D_NONE;
+	if(n == Z)
+		return;
+	switch(n->op) {
+	default:
+	bad:
+		diag(n, "bad in naddr: %O %D", n->op, a);
+		break;
+
+	case OREGISTER:
+		a->type = n->reg;
+		a->sym = S;
+		break;
+
+
+	case OIND:
+		naddr(n->left, a);
+		if(a->type >= D_AX && a->type <= D_R15)
+			a->type += D_INDIR;
+		else
+		if(a->type == D_CONST)
+			a->type = D_NONE+D_INDIR;
+		else
+		if(a->type == D_ADDR) {
+			a->type = a->index;
+			a->index = D_NONE;
+		} else
+			goto bad;
+		break;
+
+	case OINDEX:
+		a->type = idx.ptr;
+		if(n->left->op == OADDR || n->left->op == OCONST)
+			naddr(n->left, a);
+		if(a->type >= D_AX && a->type <= D_R15)
+			a->type += D_INDIR;
+		else
+		if(a->type == D_CONST)
+			a->type = D_NONE+D_INDIR;
+		else
+		if(a->type == D_ADDR) {
+			a->type = a->index;
+			a->index = D_NONE;
+		} else
+			goto bad;
+		a->index = idx.reg;
+		a->scale = n->scale;
+		a->offset += n->xoffset;
+		break;
+
+	case OINDREG:
+		a->type = n->reg+D_INDIR;
+		a->sym = S;
+		a->offset = n->xoffset;
+		break;
+
+	case ONAME:
+		a->etype = n->etype;
+		a->type = D_STATIC;
+		a->sym = n->sym;
+		a->offset = n->xoffset;
+		if(n->class == CSTATIC)
+			break;
+		if(n->class == CEXTERN || n->class == CGLOBL) {
+			a->type = D_EXTERN;
+			break;
+		}
+		if(n->class == CAUTO) {
+			a->type = D_AUTO;
+			break;
+		}
+		if(n->class == CPARAM) {
+			a->type = D_PARAM;
+			break;
+		}
+		goto bad;
+
+	case OCONST:
+		if(typefd[n->type->etype]) {
+			a->type = D_FCONST;
+			a->dval = n->fconst;
+			break;
+		}
+		a->sym = S;
+		a->type = D_CONST;
+		if(typev[n->type->etype] || n->type->etype == TIND)
+			a->offset = n->vconst;
+		else
+			a->offset = convvtox(n->vconst, typeu[n->type->etype]? TULONG: TLONG);
+		break;
+
+	case OADDR:
+		naddr(n->left, a);
+		if(a->type >= D_INDIR) {
+			a->type -= D_INDIR;
+			break;
+		}
+		if(a->type == D_EXTERN || a->type == D_STATIC ||
+		   a->type == D_AUTO || a->type == D_PARAM)
+			if(a->index == D_NONE) {
+				a->index = a->type;
+				a->type = D_ADDR;
+				break;
+			}
+		goto bad;
+
+	case OADD:
+		if(n->right->op == OCONST) {
+			v = n->right->vconst;
+			naddr(n->left, a);
+		} else
+		if(n->left->op == OCONST) {
+			v = n->left->vconst;
+			naddr(n->right, a);
+		} else
+			goto bad;
+		a->offset += v;
+		break;
+
+	}
+}
+
+void
+gcmp(int op, Node *n, vlong val)
+{
+	Node *cn, nod;
+
+	cn = nodgconst(val, n->type);
+	if(!immconst(cn)){
+		regalloc(&nod, n, Z);
+		gmove(cn, &nod);
+		gopcode(op, n->type, n, &nod);
+		regfree(&nod);
+	}else
+		gopcode(op, n->type, n, cn);
+}
+
+#define	CASE(a,b)	((a<<8)|(b<<0))
+
+void
+gmove(Node *f, Node *t)
+{
+	int ft, tt, t64, a;
+	Node nod, nod1, nod2, nod3;
+	Prog *p1, *p2;
+
+	ft = f->type->etype;
+	tt = t->type->etype;
+	t64 = tt == TVLONG || tt == TUVLONG || tt == TIND;
+	if(debug['M'])
+		print("gop: %O %O[%s],%O[%s]\n", OAS,
+			f->op, tnames[ft], t->op, tnames[tt]);
+	if(typefd[ft] && f->op == OCONST) {
+		/* TO DO: pick up special constants, possibly preloaded */
+		if(f->fconst == 0.0){
+			regalloc(&nod, t, t);
+			gins(AXORPD, &nod, &nod);
+			gmove(&nod, t);
+			regfree(&nod);
+			return;
+		}
+	}
+/*
+ * load
+ */
+	if(f->op == ONAME || f->op == OINDREG ||
+	   f->op == OIND || f->op == OINDEX)
+	switch(ft) {
+	case TCHAR:
+		a = AMOVBLSX;
+		if(t64)
+			a = AMOVBQSX;
+		goto ld;
+	case TUCHAR:
+		a = AMOVBLZX;
+		if(t64)
+			a = AMOVBQZX;
+		goto ld;
+	case TSHORT:
+		a = AMOVWLSX;
+		if(t64)
+			a = AMOVWQSX;
+		goto ld;
+	case TUSHORT:
+		a = AMOVWLZX;
+		if(t64)
+			a = AMOVWQZX;
+		goto ld;
+	case TINT:
+	case TLONG:
+		if(typefd[tt]) {
+			regalloc(&nod, t, t);
+			if(tt == TDOUBLE)
+				a = ACVTSL2SD;
+			else
+				a = ACVTSL2SS;
+			gins(a, f, &nod);
+			gmove(&nod, t);
+			regfree(&nod);
+			return;
+		}
+		a = AMOVL;
+		if(t64)
+			a = AMOVLQSX;
+		goto ld;
+	case TUINT:
+	case TULONG:
+		a = AMOVL;
+		if(t64)
+			a = AMOVLQZX;	/* could probably use plain MOVL */
+		goto ld;
+	case TVLONG:
+		if(typefd[tt]) {
+			regalloc(&nod, t, t);
+			if(tt == TDOUBLE)
+				a = ACVTSQ2SD;
+			else
+				a = ACVTSQ2SS;
+			gins(a, f, &nod);
+			gmove(&nod, t);
+			regfree(&nod);
+			return;
+		}
+	case TUVLONG:
+		a = AMOVQ;
+		goto ld;
+	case TIND:
+		a = AMOVQ;
+
+	ld:
+		regalloc(&nod, f, t);
+		nod.type = t64? types[TVLONG]: types[TINT];
+		gins(a, f, &nod);
+		gmove(&nod, t);
+		regfree(&nod);
+		return;
+
+	case TFLOAT:
+		a = AMOVSS;
+		goto fld;
+	case TDOUBLE:
+		a = AMOVSD;
+	fld:
+		regalloc(&nod, f, t);
+		if(tt != TDOUBLE && tt != TFLOAT){	/* TO DO: why is this here */
+			prtree(f, "odd tree");
+			nod.type = t64? types[TVLONG]: types[TINT];
+		}
+		gins(a, f, &nod);
+		gmove(&nod, t);
+		regfree(&nod);
+		return;
+	}
+
+/*
+ * store
+ */
+	if(t->op == ONAME || t->op == OINDREG ||
+	   t->op == OIND || t->op == OINDEX)
+	switch(tt) {
+	case TCHAR:
+	case TUCHAR:
+		a = AMOVB;	goto st;
+	case TSHORT:
+	case TUSHORT:
+		a = AMOVW;	goto st;
+	case TINT:
+	case TUINT:
+	case TLONG:
+	case TULONG:
+		a = AMOVL;	goto st;
+	case TVLONG:
+	case TUVLONG:
+	case TIND:
+		a = AMOVQ;	goto st;
+
+	st:
+		if(f->op == OCONST) {
+			gins(a, f, t);
+			return;
+		}
+	fst:
+		regalloc(&nod, t, f);
+		gmove(f, &nod);
+		gins(a, &nod, t);
+		regfree(&nod);
+		return;
+
+	case TFLOAT:
+		a = AMOVSS;
+		goto fst;
+	case TDOUBLE:
+		a = AMOVSD;
+		goto fst;
+	}
+
+/*
+ * convert
+ */
+	switch(CASE(ft,tt)) {
+	default:
+/*
+ * integer to integer
+ ********
+		a = AGOK;	break;
+
+	case CASE(	TCHAR,	TCHAR):
+	case CASE(	TUCHAR,	TCHAR):
+	case CASE(	TSHORT,	TCHAR):
+	case CASE(	TUSHORT,TCHAR):
+	case CASE(	TINT,	TCHAR):
+	case CASE(	TUINT,	TCHAR):
+	case CASE(	TLONG,	TCHAR):
+	case CASE(	TULONG,	TCHAR):
+	case CASE(	TIND,	TCHAR):
+
+	case CASE(	TCHAR,	TUCHAR):
+	case CASE(	TUCHAR,	TUCHAR):
+	case CASE(	TSHORT,	TUCHAR):
+	case CASE(	TUSHORT,TUCHAR):
+	case CASE(	TINT,	TUCHAR):
+	case CASE(	TUINT,	TUCHAR):
+	case CASE(	TLONG,	TUCHAR):
+	case CASE(	TULONG,	TUCHAR):
+	case CASE(	TIND,	TUCHAR):
+
+	case CASE(	TSHORT,	TSHORT):
+	case CASE(	TUSHORT,TSHORT):
+	case CASE(	TINT,	TSHORT):
+	case CASE(	TUINT,	TSHORT):
+	case CASE(	TLONG,	TSHORT):
+	case CASE(	TULONG,	TSHORT):
+	case CASE(	TIND,	TSHORT):
+
+	case CASE(	TSHORT,	TUSHORT):
+	case CASE(	TUSHORT,TUSHORT):
+	case CASE(	TINT,	TUSHORT):
+	case CASE(	TUINT,	TUSHORT):
+	case CASE(	TLONG,	TUSHORT):
+	case CASE(	TULONG,	TUSHORT):
+	case CASE(	TIND,	TUSHORT):
+
+	case CASE(	TINT,	TINT):
+	case CASE(	TUINT,	TINT):
+	case CASE(	TLONG,	TINT):
+	case CASE(	TULONG,	TINT):
+	case CASE(	TIND,	TINT):
+
+	case CASE(	TINT,	TUINT):
+	case CASE(	TUINT,	TUINT):
+	case CASE(	TLONG,	TUINT):
+	case CASE(	TULONG,	TUINT):
+	case CASE(	TIND,	TUINT):
+
+	case CASE(	TUINT,	TIND):
+	case CASE(	TVLONG,	TUINT):
+	case CASE(	TVLONG,	TULONG):
+	case CASE(	TUVLONG, TUINT):
+	case CASE(	TUVLONG, TULONG):
+ *****/
+		a = AMOVL;
+		break;
+
+	case CASE(	TVLONG,	TCHAR):
+	case	CASE(	TVLONG,	TSHORT):
+	case CASE(	TVLONG,	TINT):
+	case CASE(	TVLONG,	TLONG):
+	case CASE(	TUVLONG, TCHAR):
+	case	CASE(	TUVLONG, TSHORT):
+	case CASE(	TUVLONG, TINT):
+	case CASE(	TUVLONG, TLONG):
+	case CASE(	TINT,	TVLONG):
+	case CASE(	TINT,	TUVLONG):
+	case CASE(	TLONG,	TVLONG):
+	case CASE(	TINT,	TIND):
+	case CASE(	TLONG,	TIND):
+		a = AMOVLQSX;
+		if(f->op == OCONST) {
+			f->vconst &= (uvlong)0xffffffffU;
+			if(f->vconst & 0x80000000)
+				f->vconst |= (vlong)0xffffffff << 32;
+			a = AMOVQ;
+		}
+		break;
+
+	case CASE(	TUINT,	TIND):
+	case CASE(	TUINT,	TVLONG):
+	case CASE(	TUINT,	TUVLONG):
+	case CASE(	TULONG,	TVLONG):
+	case CASE(	TULONG,	TUVLONG):
+	case CASE(	TULONG,	TIND):
+		a = AMOVL;	/* same effect as AMOVLQZX */
+		if(f->op == OCONST) {
+			f->vconst &= (uvlong)0xffffffffU;
+			a = AMOVQ;
+		}
+		break;
+
+	case CASE(	TIND,	TVLONG):
+	case CASE(	TVLONG,	TVLONG):
+	case CASE(	TUVLONG,	TVLONG):
+	case CASE(	TVLONG,	TUVLONG):
+	case CASE(	TUVLONG,	TUVLONG):
+	case CASE(	TIND,	TUVLONG):
+	case CASE(	TVLONG,	TIND):
+	case CASE(	TUVLONG,	TIND):
+	case CASE(	TIND,	TIND):
+		a = AMOVQ;
+		break;
+
+	case CASE(	TSHORT,	TINT):
+	case CASE(	TSHORT,	TUINT):
+	case CASE(	TSHORT,	TLONG):
+	case CASE(	TSHORT,	TULONG):
+		a = AMOVWLSX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xffff;
+			if(f->vconst & 0x8000)
+				f->vconst |= 0xffff0000;
+			a = AMOVL;
+		}
+		break;
+
+	case CASE(	TSHORT,	TVLONG):
+	case CASE(	TSHORT,	TUVLONG):
+	case CASE(	TSHORT,	TIND):
+		a = AMOVWQSX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xffff;
+			if(f->vconst & 0x8000){
+				f->vconst |= 0xffff0000;
+				f->vconst |= (vlong)~0 << 32;
+			}
+			a = AMOVL;
+		}
+		break;
+
+	case CASE(	TUSHORT,TINT):
+	case CASE(	TUSHORT,TUINT):
+	case CASE(	TUSHORT,TLONG):
+	case CASE(	TUSHORT,TULONG):
+		a = AMOVWLZX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xffff;
+			a = AMOVL;
+		}
+		break;
+
+	case CASE(	TUSHORT,TVLONG):
+	case CASE(	TUSHORT,TUVLONG):
+	case CASE(	TUSHORT,TIND):
+		a = AMOVWQZX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xffff;
+			a = AMOVL;	/* MOVL also zero-extends to 64 bits */
+		}
+		break;
+
+	case CASE(	TCHAR,	TSHORT):
+	case CASE(	TCHAR,	TUSHORT):
+	case CASE(	TCHAR,	TINT):
+	case CASE(	TCHAR,	TUINT):
+	case CASE(	TCHAR,	TLONG):
+	case CASE(	TCHAR,	TULONG):
+		a = AMOVBLSX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xff;
+			if(f->vconst & 0x80)
+				f->vconst |= 0xffffff00;
+			a = AMOVL;
+		}
+		break;
+
+	case CASE(	TCHAR,	TVLONG):
+	case CASE(	TCHAR,	TUVLONG):
+	case CASE(	TCHAR,	TIND):
+		a = AMOVBQSX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xff;
+			if(f->vconst & 0x80){
+				f->vconst |= 0xffffff00;
+				f->vconst |= (vlong)~0 << 32;
+			}
+			a = AMOVQ;
+		}
+		break;
+
+	case CASE(	TUCHAR,	TSHORT):
+	case CASE(	TUCHAR,	TUSHORT):
+	case CASE(	TUCHAR,	TINT):
+	case CASE(	TUCHAR,	TUINT):
+	case CASE(	TUCHAR,	TLONG):
+	case CASE(	TUCHAR,	TULONG):
+		a = AMOVBLZX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xff;
+			a = AMOVL;
+		}
+		break;
+
+	case CASE(	TUCHAR,	TVLONG):
+	case CASE(	TUCHAR,	TUVLONG):
+	case CASE(	TUCHAR,	TIND):
+		a = AMOVBQZX;
+		if(f->op == OCONST) {
+			f->vconst &= 0xff;
+			a = AMOVL;	/* zero-extends to 64-bits */
+		}
+		break;
+
+/*
+ * float to fix
+ */
+	case CASE(	TFLOAT,	TCHAR):
+	case CASE(	TFLOAT,	TUCHAR):
+	case CASE(	TFLOAT,	TSHORT):
+	case CASE(	TFLOAT,	TUSHORT):
+	case CASE(	TFLOAT,	TINT):
+	case CASE(	TFLOAT,	TUINT):
+	case CASE(	TFLOAT,	TLONG):
+	case CASE(	TFLOAT,	TULONG):
+	case CASE(	TFLOAT,	TVLONG):
+	case CASE(	TFLOAT,	TUVLONG):
+	case CASE(	TFLOAT,	TIND):
+
+	case CASE(	TDOUBLE,TCHAR):
+	case CASE(	TDOUBLE,TUCHAR):
+	case CASE(	TDOUBLE,TSHORT):
+	case CASE(	TDOUBLE,TUSHORT):
+	case CASE(	TDOUBLE,TINT):
+	case CASE(	TDOUBLE,TUINT):
+	case CASE(	TDOUBLE,TLONG):
+	case CASE(	TDOUBLE,TULONG):
+	case CASE(	TDOUBLE,TVLONG):
+	case CASE(	TDOUBLE,TUVLONG):
+	case CASE(	TDOUBLE,TIND):
+		regalloc(&nod, t, Z);
+		if(ewidth[tt] == SZ_VLONG || typeu[tt] && ewidth[tt] == SZ_INT){
+			if(ft == TFLOAT)
+				a = ACVTTSS2SQ;
+			else
+				a = ACVTTSD2SQ;
+		}else{
+			if(ft == TFLOAT)
+				a = ACVTTSS2SL;
+			else
+				a = ACVTTSD2SL;
+		}
+		gins(a, f, &nod);
+		gmove(&nod, t);
+		regfree(&nod);
+		return;
+
+/*
+ * ulong to float
+ */
+	case CASE(	TUVLONG,	TDOUBLE):
+	case CASE(	TUVLONG,	TFLOAT):
+		a = ACVTSQ2SS;
+		if(tt == TDOUBLE)
+			a = ACVTSQ2SD;
+		regalloc(&nod, f, f);
+		gmove(f, &nod);
+		regalloc(&nod1, t, t);
+		gins(ACMPQ, &nod, nodconst(0));
+		gins(AJLT, Z, Z);
+		p1 = p;
+		gins(a, &nod, &nod1);
+		gins(AJMP, Z, Z);
+		p2 = p;
+		patch(p1, pc);
+		regalloc(&nod2, f, Z);
+		regalloc(&nod3, f, Z);
+		gmove(&nod, &nod2);
+		gins(ASHRQ, nodconst(1), &nod2);
+		gmove(&nod, &nod3);
+		gins(AANDL, nodconst(1), &nod3);
+		gins(AORQ, &nod3, &nod2);
+		gins(a, &nod2, &nod1);
+		gins(tt == TDOUBLE? AADDSD: AADDSS, &nod1, &nod1);
+		regfree(&nod2);
+		regfree(&nod3);
+		patch(p2, pc);
+		regfree(&nod);
+		regfree(&nod1);
+		return;
+
+	case CASE(	TULONG,	TDOUBLE):
+	case CASE(	TUINT,	TDOUBLE):
+	case CASE(	TULONG,	TFLOAT):
+	case CASE(	TUINT,	TFLOAT):
+		a = ACVTSQ2SS;
+		if(tt == TDOUBLE)
+			a = ACVTSQ2SD;
+		regalloc(&nod, f, f);
+		gins(AMOVLQZX, f, &nod);
+		regalloc(&nod1, t, t);
+		gins(a, &nod, &nod1);
+		gmove(&nod1, t);
+		regfree(&nod);
+		regfree(&nod1);
+		return;
+
+/*
+ * fix to float
+ */
+	case CASE(	TCHAR,	TFLOAT):
+	case CASE(	TUCHAR,	TFLOAT):
+	case CASE(	TSHORT,	TFLOAT):
+	case CASE(	TUSHORT,TFLOAT):
+	case CASE(	TINT,	TFLOAT):
+	case CASE(	TLONG,	TFLOAT):
+	case	CASE(	TVLONG,	TFLOAT):
+	case CASE(	TIND,	TFLOAT):
+
+	case CASE(	TCHAR,	TDOUBLE):
+	case CASE(	TUCHAR,	TDOUBLE):
+	case CASE(	TSHORT,	TDOUBLE):
+	case CASE(	TUSHORT,TDOUBLE):
+	case CASE(	TINT,	TDOUBLE):
+	case CASE(	TLONG,	TDOUBLE):
+	case CASE(	TVLONG,	TDOUBLE):
+	case CASE(	TIND,	TDOUBLE):
+		regalloc(&nod, t, t);
+		if(ewidth[ft] == SZ_VLONG){
+			if(tt == TFLOAT)
+				a = ACVTSQ2SS;
+			else
+				a = ACVTSQ2SD;
+		}else{
+			if(tt == TFLOAT)
+				a = ACVTSL2SS;
+			else
+				a = ACVTSL2SD;
+		}
+		gins(a, f, &nod);
+		gmove(&nod, t);
+		regfree(&nod);
+		return;
+
+/*
+ * float to float
+ */
+	case CASE(	TFLOAT,	TFLOAT):
+		a = AMOVSS;
+		break;
+	case CASE(	TDOUBLE,TFLOAT):
+		a = ACVTSD2SS;
+		break;
+	case CASE(	TFLOAT,	TDOUBLE):
+		a = ACVTSS2SD;
+		break;
+	case CASE(	TDOUBLE,TDOUBLE):
+		a = AMOVSD;
+		break;
+	}
+	if(a == AMOVQ || a == AMOVSD || a == AMOVSS || a == AMOVL && ewidth[ft] == ewidth[tt])	/* TO DO: check AMOVL */
+	if(samaddr(f, t))
+		return;
+	gins(a, f, t);
+}
+
+void
+doindex(Node *n)
+{
+	Node nod, nod1;
+	long v;
+
+if(debug['Y'])
+prtree(n, "index");
+
+if(n->left->complex >= FNX)
+print("botch in doindex\n");
+
+	regalloc(&nod, &qregnode, Z);
+	v = constnode.vconst;
+	cgen(n->right, &nod);
+	idx.ptr = D_NONE;
+	if(n->left->op == OCONST)
+		idx.ptr = D_CONST;
+	else if(n->left->op == OREGISTER)
+		idx.ptr = n->left->reg;
+	else if(n->left->op != OADDR) {
+		reg[D_BP]++;	// cant be used as a base
+		regalloc(&nod1, &qregnode, Z);
+		cgen(n->left, &nod1);
+		idx.ptr = nod1.reg;
+		regfree(&nod1);
+		reg[D_BP]--;
+	}
+	idx.reg = nod.reg;
+	regfree(&nod);
+	constnode.vconst = v;
+}
+
+void
+gins(int a, Node *f, Node *t)
+{
+
+	if(f != Z && f->op == OINDEX)
+		doindex(f);
+	if(t != Z && t->op == OINDEX)
+		doindex(t);
+	nextpc();
+	p->as = a;
+	if(f != Z)
+		naddr(f, &p->from);
+	if(t != Z)
+		naddr(t, &p->to);
+	if(debug['g'])
+		print("%P\n", p);
+}
+
+void
+gopcode(int o, Type *ty, Node *f, Node *t)
+{
+	int a, et;
+
+	et = TLONG;
+	if(ty != T)
+		et = ty->etype;
+	if(debug['M']) {
+		if(f != Z && f->type != T)
+			print("gop: %O %O[%s],", o, f->op, tnames[et]);
+		else
+			print("gop: %O Z,", o);
+		if(t != Z && t->type != T)
+			print("%O[%s]\n", t->op, tnames[t->type->etype]);
+		else
+			print("Z\n");
+	}
+	a = AGOK;
+	switch(o) {
+	case OCOM:
+		a = ANOTL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ANOTB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ANOTW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ANOTQ;
+		break;
+
+	case ONEG:
+		a = ANEGL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ANEGB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ANEGW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ANEGQ;
+		break;
+
+	case OADDR:
+		a = ALEAQ;
+		break;
+
+	case OASADD:
+	case OADD:
+		a = AADDL;
+		if(et == TCHAR || et == TUCHAR)
+			a = AADDB;
+		if(et == TSHORT || et == TUSHORT)
+			a = AADDW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AADDQ;
+		if(et == TFLOAT)
+			a = AADDSS;
+		if(et == TDOUBLE)
+			a = AADDSD;
+		break;
+
+	case OASSUB:
+	case OSUB:
+		a = ASUBL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ASUBB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ASUBW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ASUBQ;
+		if(et == TFLOAT)
+			a = ASUBSS;
+		if(et == TDOUBLE)
+			a = ASUBSD;
+		break;
+
+	case OASOR:
+	case OOR:
+		a = AORL;
+		if(et == TCHAR || et == TUCHAR)
+			a = AORB;
+		if(et == TSHORT || et == TUSHORT)
+			a = AORW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AORQ;
+		break;
+
+	case OASAND:
+	case OAND:
+		a = AANDL;
+		if(et == TCHAR || et == TUCHAR)
+			a = AANDB;
+		if(et == TSHORT || et == TUSHORT)
+			a = AANDW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AANDQ;
+		break;
+
+	case OASXOR:
+	case OXOR:
+		a = AXORL;
+		if(et == TCHAR || et == TUCHAR)
+			a = AXORB;
+		if(et == TSHORT || et == TUSHORT)
+			a = AXORW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AXORQ;
+		break;
+
+	case OASLSHR:
+	case OLSHR:
+		a = ASHRL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ASHRB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ASHRW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ASHRQ;
+		break;
+
+	case OASASHR:
+	case OASHR:
+		a = ASARL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ASARB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ASARW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ASARQ;
+		break;
+
+	case OASASHL:
+	case OASHL:
+		a = ASALL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ASALB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ASALW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ASALQ;
+		break;
+
+	case OFUNC:
+		a = ACALL;
+		break;
+
+	case OASMUL:
+	case OMUL:
+		if(f->op == OREGISTER && t != Z && isreg(t, D_AX) && reg[D_DX] == 0)
+			t = Z;
+		a = AIMULL;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AIMULQ;
+		if(et == TFLOAT)
+			a = AMULSS;
+		if(et == TDOUBLE)
+			a = AMULSD;
+		break;
+
+	case OASMOD:
+	case OMOD:
+	case OASDIV:
+	case ODIV:
+		a = AIDIVL;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AIDIVQ;
+		if(et == TFLOAT)
+			a = ADIVSS;
+		if(et == TDOUBLE)
+			a = ADIVSD;
+		break;
+
+	case OASLMUL:
+	case OLMUL:
+		a = AMULL;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = AMULQ;
+		break;
+
+	case OASLMOD:
+	case OLMOD:
+	case OASLDIV:
+	case OLDIV:
+		a = ADIVL;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ADIVQ;
+		break;
+
+	case OEQ:
+	case ONE:
+	case OLT:
+	case OLE:
+	case OGE:
+	case OGT:
+	case OLO:
+	case OLS:
+	case OHS:
+	case OHI:
+		a = ACMPL;
+		if(et == TCHAR || et == TUCHAR)
+			a = ACMPB;
+		if(et == TSHORT || et == TUSHORT)
+			a = ACMPW;
+		if(et == TVLONG || et == TUVLONG || et == TIND)
+			a = ACMPQ;
+		if(et == TFLOAT)
+			a = AUCOMISS;
+		if(et == TDOUBLE)
+			a = AUCOMISD;
+		gins(a, f, t);
+		switch(o) {
+		case OEQ:	a = AJEQ; break;
+		case ONE:	a = AJNE; break;
+		case OLT:	a = AJLT; break;
+		case OLE:	a = AJLE; break;
+		case OGE:	a = AJGE; break;
+		case OGT:	a = AJGT; break;
+		case OLO:	a = AJCS; break;
+		case OLS:	a = AJLS; break;
+		case OHS:	a = AJCC; break;
+		case OHI:	a = AJHI; break;
+		}
+		gins(a, Z, Z);
+		return;
+	}
+	if(a == AGOK)
+		diag(Z, "bad in gopcode %O", o);
+	gins(a, f, t);
+}
+
+int
+samaddr(Node *f, Node *t)
+{
+	return f->op == OREGISTER && t->op == OREGISTER && f->reg == t->reg;
+}
+
+void
+gbranch(int o)
+{
+	int a;
+
+	a = AGOK;
+	switch(o) {
+	case ORETURN:
+		a = ARET;
+		break;
+	case OGOTO:
+		a = AJMP;
+		break;
+	}
+	nextpc();
+	if(a == AGOK) {
+		diag(Z, "bad in gbranch %O",  o);
+		nextpc();
+	}
+	p->as = a;
+}
+
+void
+patch(Prog *op, long pc)
+{
+
+	op->to.offset = pc;
+	op->to.type = D_BRANCH;
+}
+
+void
+gpseudo(int a, Sym *s, Node *n)
+{
+
+	nextpc();
+	p->as = a;
+	p->from.type = D_EXTERN;
+	p->from.sym = s;
+	p->from.scale = (profileflg ? 0 : NOPROF);
+	if(s->class == CSTATIC)
+		p->from.type = D_STATIC;
+	naddr(n, &p->to);
+	if(a == ADATA || a == AGLOBL)
+		pc--;
+}
+
+int
+sconst(Node *n)
+{
+	long v;
+
+	if(n->op == OCONST && !typefd[n->type->etype]) {
+		v = n->vconst;
+		if(v >= -32766L && v < 32766L)
+			return 1;
+	}
+	return 0;
+}
+
+long
+exreg(Type *t)
+{
+	long o;
+
+	if(typechlpv[t->etype]) {
+		if(exregoffset <= REGEXT-4)
+			return 0;
+		o = exregoffset;
+		exregoffset--;
+		return o;
+	}
+	return 0;
+}
+
+schar	ewidth[NTYPE] =
+{
+	-1,		/*[TXXX]*/	
+	SZ_CHAR,	/*[TCHAR]*/	
+	SZ_CHAR,	/*[TUCHAR]*/
+	SZ_SHORT,	/*[TSHORT]*/
+	SZ_SHORT,	/*[TUSHORT]*/
+	SZ_INT,		/*[TINT]*/
+	SZ_INT,		/*[TUINT]*/
+	SZ_LONG,	/*[TLONG]*/
+	SZ_LONG,	/*[TULONG]*/
+	SZ_VLONG,	/*[TVLONG]*/
+	SZ_VLONG,	/*[TUVLONG]*/
+	SZ_FLOAT,	/*[TFLOAT]*/
+	SZ_DOUBLE,	/*[TDOUBLE]*/
+	SZ_IND,		/*[TIND]*/
+	0,		/*[TFUNC]*/
+	-1,		/*[TARRAY]*/
+	0,		/*[TVOID]*/
+	-1,		/*[TSTRUCT]*/
+	-1,		/*[TUNION]*/
+	SZ_INT,		/*[TENUM]*/
+};
+long	ncast[NTYPE] =
+{
+	0,				/*[TXXX]*/
+	BCHAR|BUCHAR,			/*[TCHAR]*/
+	BCHAR|BUCHAR,			/*[TUCHAR]*/	
+	BSHORT|BUSHORT,			/*[TSHORT]*/
+	BSHORT|BUSHORT,			/*[TUSHORT]*/
+	BINT|BUINT|BLONG|BULONG,	/*[TINT]*/		
+	BINT|BUINT|BLONG|BULONG,	/*[TUINT]*/
+	BINT|BUINT|BLONG|BULONG,	/*[TLONG]*/
+	BINT|BUINT|BLONG|BULONG,	/*[TULONG]*/
+	BVLONG|BUVLONG|BIND,			/*[TVLONG]*/
+	BVLONG|BUVLONG|BIND,			/*[TUVLONG]*/
+	BFLOAT,				/*[TFLOAT]*/
+	BDOUBLE,			/*[TDOUBLE]*/
+	BVLONG|BUVLONG|BIND,		/*[TIND]*/
+	0,				/*[TFUNC]*/
+	0,				/*[TARRAY]*/
+	0,				/*[TVOID]*/
+	BSTRUCT,			/*[TSTRUCT]*/
+	BUNION,				/*[TUNION]*/
+	0,				/*[TENUM]*/
+};

+ 746 - 0
sys/src/cmd/6c/vlrt.c

@@ -0,0 +1,746 @@
+typedef	unsigned long	ulong;
+typedef	unsigned int	uint;
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef	signed char	schar;
+
+#define	SIGN(n)	(1UL<<(n-1))
+
+typedef	struct	Vlong	Vlong;
+struct	Vlong
+{
+	union
+	{
+		struct
+		{
+			uint	lo;
+			uint	hi;
+		};
+		struct
+		{
+			ushort	lols;
+			ushort	loms;
+			ushort	hils;
+			ushort	hims;
+		};
+	};
+};
+
+void	abort(void);
+
+void _subv(Vlong*, Vlong, Vlong);
+
+void
+_d2v(Vlong *y, double d)
+{
+	union { double d; struct Vlong; } x;
+	uint xhi, xlo, ylo, yhi;
+	int sh;
+
+	x.d = d;
+
+	xhi = (x.hi & 0xfffff) | 0x100000;
+	xlo = x.lo;
+	sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+	ylo = 0;
+	yhi = 0;
+	if(sh >= 0) {
+		/* v = (hi||lo) >> sh */
+		if(sh < 32) {
+			if(sh == 0) {
+				ylo = xlo;
+				yhi = xhi;
+			} else {
+				ylo = (xlo >> sh) | (xhi << (32-sh));
+				yhi = xhi >> sh;
+			}
+		} else {
+			if(sh == 32) {
+				ylo = xhi;
+			} else
+			if(sh < 64) {
+				ylo = xhi >> (sh-32);
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh = -sh;
+		if(sh <= 10) {
+			ylo = xlo << sh;
+			yhi = (xhi << sh) | (xlo >> (32-sh));
+		} else {
+			/* overflow */
+			yhi = d;	/* causes something awful */
+		}
+	}
+	if(x.hi & SIGN(32)) {
+		if(ylo != 0) {
+			ylo = -ylo;
+			yhi = ~yhi;
+		} else
+			yhi = -yhi;
+	}
+
+	y->hi = yhi;
+	y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+
+	_d2v(y, f);
+}
+
+double
+_v2d(Vlong x)
+{
+	if(x.hi & SIGN(32)) {
+		if(x.lo) {
+			x.lo = -x.lo;
+			x.hi = ~x.hi;
+		} else
+			x.hi = -x.hi;
+		return -((long)x.hi*4294967296. + x.lo);
+	}
+	return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+	return _v2d(x);
+}
+
+uint	_div64by32(Vlong, uint, uint*);
+void	_mul64by32(Vlong*, Vlong, uint);
+
+static void
+slowdodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
+{
+	uint numlo, numhi, denhi, denlo, quohi, quolo, t;
+	int i;
+
+	numhi = num.hi;
+	numlo = num.lo;
+	denhi = den.hi;
+	denlo = den.lo;
+
+	/*
+	 * get a divide by zero
+	 */
+	if(denlo==0 && denhi==0) {
+		numlo = numlo / denlo;
+	}
+
+	/*
+	 * set up the divisor and find the number of iterations needed
+	 */
+	if(numhi >= SIGN(32)) {
+		quohi = SIGN(32);
+		quolo = 0;
+	} else {
+		quohi = numhi;
+		quolo = numlo;
+	}
+	i = 0;
+	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+		denhi = (denhi<<1) | (denlo>>31);
+		denlo <<= 1;
+		i++;
+	}
+
+	quohi = 0;
+	quolo = 0;
+	for(; i >= 0; i--) {
+		quohi = (quohi<<1) | (quolo>>31);
+		quolo <<= 1;
+		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+			t = numlo;
+			numlo -= denlo;
+			if(numlo > t)
+				numhi--;
+			numhi -= denhi;
+			quolo |= 1;
+		}
+		denlo = (denlo>>1) | (denhi<<31);
+		denhi >>= 1;
+	}
+
+	if(q) {
+		q->lo = quolo;
+		q->hi = quohi;
+	}
+	if(r) {
+		r->lo = numlo;
+		r->hi = numhi;
+	}
+}
+
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	uint n;
+	Vlong x, q, r;
+
+	if(den.hi > num.hi || (den.hi == num.hi && den.lo > num.lo)){
+		if(qp) {
+			qp->hi = 0;
+			qp->lo = 0;
+		}
+		if(rp) {
+			rp->hi = num.hi;
+			rp->lo = num.lo;
+		}
+		return;
+	}
+
+	if(den.hi != 0){
+		q.hi = 0;
+		n = num.hi/den.hi;
+		_mul64by32(&x, den, n);
+		if(x.hi > num.hi || (x.hi == num.hi && x.lo > num.lo))
+			slowdodiv(num, den, &q, &r);
+		else {
+			q.lo = n;
+			_subv(&r, num, x);
+		}
+	} else {
+		if(num.hi >= den.lo){
+			q.hi = n = num.hi/den.lo;
+			num.hi -= den.lo*n;
+		} else {
+			q.hi = 0;
+		}
+		q.lo = _div64by32(num, den.lo, &r.lo);
+		r.hi = 0;
+	}
+	if(qp) {
+		qp->lo = q.lo;
+		qp->hi = q.hi;
+	}
+	if(rp) {
+		rp->lo = r.lo;
+		rp->hi = r.hi;
+	}
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		q->hi = 0;
+		q->lo = n.lo / d.lo;
+		return;
+	}
+	dodiv(n, d, q, 0);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		r->hi = 0;
+		r->lo = n.lo % d.lo;
+		return;
+	}
+	dodiv(n, d, 0, r);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+	if(v->lo == 0) {
+		v->hi = -v->hi;
+		return;
+	}
+	v->lo = -v->lo;
+	v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		q->lo = (long)n.lo / (long)d.lo;
+		q->hi = ((long)q->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, q, 0);
+	if(nneg != dneg)
+		vneg(q);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		r->lo = (long)n.lo % (long)d.lo;
+		r->hi = ((long)r->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, 0, r);
+	if(nneg)
+		vneg(r);
+}
+
+void
+_rshav(Vlong *r, Vlong a, int b)
+{
+	long t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = t>>31;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = t>>31;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_rshlv(Vlong *r, Vlong a, int b)
+{
+	uint t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = 0;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_lshv(Vlong *r, Vlong a, int b)
+{
+	uint t;
+
+	t = a.lo;
+	if(b >= 32) {
+		r->lo = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->hi = 0;
+			return;
+		}
+		r->hi = t << (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->lo = t;
+		r->hi = a.hi;
+		return;
+	}
+	r->lo = t << b;
+	r->hi = (t >> (32-b)) | (a.hi << b);
+}
+
+void
+_andv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi & b.hi;
+	r->lo = a.lo & b.lo;
+}
+
+void
+_orv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi | b.hi;
+	r->lo = a.lo | b.lo;
+}
+
+void
+_xorv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi ^ b.hi;
+	r->lo = a.lo ^ b.lo;
+}
+
+void
+_vpp(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+}
+
+void
+_vmm(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+}
+
+void
+_ppv(Vlong *l, Vlong *r)
+{
+
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_mmv(Vlong *l, Vlong *r)
+{
+
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+	Vlong t, u;
+
+	u.lo = 0;
+	u.hi = 0;
+	switch(type) {
+	default:
+		abort();
+		break;
+
+	case 1:	/* schar */
+		t.lo = *(schar*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(schar*)lv = u.lo;
+		break;
+
+	case 2:	/* uchar */
+		t.lo = *(uchar*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uchar*)lv = u.lo;
+		break;
+
+	case 3:	/* short */
+		t.lo = *(short*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(short*)lv = u.lo;
+		break;
+
+	case 4:	/* ushort */
+		t.lo = *(ushort*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(ushort*)lv = u.lo;
+		break;
+
+	case 9:	/* int */
+		t.lo = *(int*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(int*)lv = u.lo;
+		break;
+
+	case 10:	/* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 5:	/* long */
+		t.lo = *(long*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(long*)lv = u.lo;
+		break;
+
+	case 6:	/* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 7:	/* vlong */
+	case 8:	/* uvlong */
+		fn(&u, *(Vlong*)lv, rv);
+		*(Vlong*)lv = u;
+		break;
+	}
+	*ret = u;
+}
+
+void
+_p2v(Vlong *ret, void *p)
+{
+	long t;
+
+	t = (uint)p;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sl2v(Vlong *ret, long sl)
+{
+	long t;
+
+	t = sl;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ul2v(Vlong *ret, uint ul)
+{
+	long t;
+
+	t = ul;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_si2v(Vlong *ret, int si)
+{
+	long t;
+
+	t = si;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ui2v(Vlong *ret, uint ui)
+{
+	long t;
+
+	t = ui;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sh2v(Vlong *ret, long sh)
+{
+	long t;
+
+	t = (sh << 16) >> 16;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uh2v(Vlong *ret, uint ul)
+{
+	long t;
+
+	t = ul & 0xffff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sc2v(Vlong *ret, long uc)
+{
+	long t;
+
+	t = (uc << 24) >> 24;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uc2v(Vlong *ret, uint ul)
+{
+	long t;
+
+	t = ul & 0xff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+long
+_v2sc(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xff;
+	return (t << 24) >> 24;
+}
+
+long
+_v2uc(Vlong rv)
+{
+
+	return rv.lo & 0xff;
+}
+
+long
+_v2sh(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xffff;
+	return (t << 16) >> 16;
+}
+
+long
+_v2uh(Vlong rv)
+{
+
+	return rv.lo & 0xffff;
+}
+
+long
+_v2sl(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ul(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2si(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ui(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+int
+_testv(Vlong rv)
+{
+	return rv.lo || rv.hi;
+}
+
+int
+_eqv(Vlong lv, Vlong rv)
+{
+	return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+int
+_nev(Vlong lv, Vlong rv)
+{
+	return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+int
+_ltv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_gtv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_gev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+int
+_lov(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi || 
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lsv(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi || 
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_hiv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi || 
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_hsv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi || 
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}

+ 461 - 0
sys/src/cmd/6l/asm.c

@@ -0,0 +1,461 @@
+#include	"l.h"
+
+#define	Dbufslop	100
+
+#define PADDR(a)	((a) & ~0xfffffffff0000000ull)
+
+vlong
+entryvalue(void)
+{
+	char *a;
+	Sym *s;
+
+	a = INITENTRY;
+	if(*a >= '0' && *a <= '9')
+		return atolwhex(a);
+	s = lookup(a, 0);
+	if(s->type == 0)
+		return INITTEXT;
+	switch(s->type) {
+	case STEXT:
+		break;
+	case SDATA:
+		if(dlm)
+			return s->value+INITDAT;
+	default:
+		diag("entry not text: %s", s->name);
+	}
+	return s->value;
+}
+
+void
+wputl(ushort w)
+{
+	cput(w);
+	cput(w>>8);
+}
+
+void
+wput(ushort w)
+{
+	cput(w>>8);
+	cput(w);
+}
+
+void
+lput(long l)
+{
+	cput(l>>24);
+	cput(l>>16);
+	cput(l>>8);
+	cput(l);
+}
+
+void
+llput(vlong v)
+{
+	lput(v>>32);
+	lput(v);
+}
+
+void
+lputl(long l)
+{
+	cput(l);
+	cput(l>>8);
+	cput(l>>16);
+	cput(l>>24);
+}
+
+void
+strnput(char *s, int n)
+{
+	for(; *s && n > 0; s++){
+		cput(*s);
+		n--;
+	}
+	while(n > 0){
+		cput(0);
+		n--;
+	}
+}
+
+void
+asmb(void)
+{
+	Prog *p;
+	long v, magic;
+	int a;
+	uchar *op1;
+	vlong vl;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f asmb\n", cputime());
+	Bflush(&bso);
+
+	seek(cout, HEADR, 0);
+	pc = INITTEXT;
+	curp = firstp;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		if(p->pc != pc) {
+			if(!debug['a'])
+				print("%P\n", curp);
+			diag("phase error %llux sb %llux in %s", p->pc, pc, TNAME);
+			pc = p->pc;
+		}
+		curp = p;
+		asmins(p);
+		a = (andptr - and);
+		if(cbc < a)
+			cflush();
+		if(debug['a']) {
+			Bprint(&bso, pcstr, pc);
+			for(op1 = and; op1 < andptr; op1++)
+				Bprint(&bso, "%.2ux", *op1 & 0xff);
+			Bprint(&bso, "\t%P\n", curp);
+		}
+		if(dlm) {
+			if(p->as == ATEXT)
+				reloca = nil;
+			else if(reloca != nil)
+				diag("reloc failure: %P", curp);
+		}
+		memmove(cbp, and, a);
+		cbp += a;
+		pc += a;
+		cbc -= a;
+	}
+	cflush();
+	switch(HEADTYPE) {
+	default:
+		diag("unknown header type %ld", HEADTYPE);
+	case 2:
+	case 5:
+		seek(cout, HEADR+textsize, 0);
+		break;
+	}
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f datblk\n", cputime());
+	Bflush(&bso);
+
+	if(dlm){
+		char buf[8];
+
+		write(cout, buf, INITDAT-textsize);
+		textsize = INITDAT;
+	}
+
+	for(v = 0; v < datsize; v += sizeof(buf)-Dbufslop) {
+		if(datsize-v > sizeof(buf)-Dbufslop)
+			datblk(v, sizeof(buf)-Dbufslop);
+		else
+			datblk(v, datsize-v);
+	}
+
+	symsize = 0;
+	spsize = 0;
+	lcsize = 0;
+	if(!debug['s']) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f sym\n", cputime());
+		Bflush(&bso);
+		switch(HEADTYPE) {
+		default:
+		case 2:
+		case 5:
+			seek(cout, HEADR+textsize+datsize, 0);
+			break;
+		}
+		if(!debug['s'])
+			asmsym();
+		if(debug['v'])
+			Bprint(&bso, "%5.2f sp\n", cputime());
+		Bflush(&bso);
+		if(debug['v'])
+			Bprint(&bso, "%5.2f pc\n", cputime());
+		Bflush(&bso);
+		if(!debug['s'])
+			asmlc();
+		if(dlm)
+			asmdyn();
+		cflush();
+	}
+	else if(dlm){
+		seek(cout, HEADR+textsize+datsize, 0);
+		asmdyn();
+		cflush();
+	}
+	if(debug['v'])
+		Bprint(&bso, "%5.2f headr\n", cputime());
+	Bflush(&bso);
+	seek(cout, 0L, 0);
+	switch(HEADTYPE) {
+	default:
+	case 2:	/* plan9 */
+		magic = 4*26*26+7;
+		magic |= 0x00008000;		/* fat header */
+		if(dlm)
+			magic |= 0x80000000;	/* dlm */
+		lput(magic);			/* magic */
+		lput(textsize);			/* sizes */
+		lput(datsize);
+		lput(bsssize);
+		lput(symsize);			/* nsyms */
+		vl = entryvalue();
+		lput(PADDR(vl));		/* va of entry */
+		lput(spsize);			/* sp offsets */
+		lput(lcsize);			/* line offsets */
+		llput(vl);			/* va of entry */
+		break;
+	case 3:	/* plan9 */
+		magic = 4*26*26+7;
+		if(dlm)
+			magic |= 0x80000000;
+		lput(magic);			/* magic */
+		lput(textsize);			/* sizes */
+		lput(datsize);
+		lput(bsssize);
+		lput(symsize);			/* nsyms */
+		lput(entryvalue());		/* va of entry */
+		lput(spsize);			/* sp offsets */
+		lput(lcsize);			/* line offsets */
+		break;
+	case 5:
+		strnput("\177ELF", 4);		/* e_ident */
+		cput(1);			/* class = 32 bit */
+		cput(1);			/* data = LSB */
+		cput(1);			/* version = CURRENT */
+		strnput("", 9);
+		wputl(2);			/* type = EXEC */
+		if(debug['8'])
+			wputl(3);		/* machine = 386 */
+		else
+			wputl(62);		/* machine = AMD64 */
+		lputl(1L);			/* version = CURRENT */
+		lputl(PADDR(entryvalue()));	/* entry vaddr */
+		lputl(52L);			/* offset to first phdr */
+		lputl(0L);			/* offset to first shdr */
+		lputl(0L);			/* processor specific flags */
+		wputl(52);			/* Ehdr size */
+		wputl(32);			/* Phdr size */
+		wputl(3);			/* # of Phdrs */
+		wputl(0);			/* Shdr size */
+		wputl(0);			/* # of Shdrs */
+		wputl(0);			/* Shdr string size */
+
+		lputl(1L);			/* text - type = PT_LOAD */
+		lputl(HEADR);			/* file offset */
+		lputl(INITTEXT);		/* vaddr */
+		lputl(PADDR(INITTEXT));		/* paddr */
+		lputl(textsize);		/* file size */
+		lputl(textsize);		/* memory size */
+		lputl(0x05L);			/* protections = RX */
+		lputl(INITRND);			/* alignment */
+
+		lputl(1L);			/* data - type = PT_LOAD */
+		lputl(HEADR+textsize);		/* file offset */
+		lputl(INITDAT);			/* vaddr */
+		lputl(PADDR(INITDAT));		/* paddr */
+		lputl(datsize);			/* file size */
+		lputl(datsize+bsssize);		/* memory size */
+		lputl(0x06L);			/* protections = RW */
+		lputl(INITRND);			/* alignment */
+
+		lputl(0L);			/* data - type = PT_NULL */
+		lputl(HEADR+textsize+datsize);	/* file offset */
+		lputl(0L);
+		lputl(0L);
+		lputl(symsize);			/* symbol table size */
+		lputl(lcsize);			/* line number size */
+		lputl(0x04L);			/* protections = R */
+		lputl(0x04L);			/* alignment */
+		break;
+	}
+	cflush();
+}
+
+void
+cflush(void)
+{
+	int n;
+
+	n = sizeof(buf.cbuf) - cbc;
+	if(n)
+		write(cout, buf.cbuf, n);
+	cbp = buf.cbuf;
+	cbc = sizeof(buf.cbuf);
+}
+
+void
+datblk(long s, long n)
+{
+	Prog *p;
+	uchar *cast;
+	long l, fl, j;
+	vlong o;
+	int i, c;
+
+	memset(buf.dbuf, 0, n+Dbufslop);
+	for(p = datap; p != P; p = p->link) {
+		curp = p;
+		l = p->from.sym->value + p->from.offset - s;
+		c = p->from.scale;
+		i = 0;
+		if(l < 0) {
+			if(l+c <= 0)
+				continue;
+			while(l < 0) {
+				l++;
+				i++;
+			}
+		}
+		if(l >= n)
+			continue;
+		if(p->as != AINIT && p->as != ADYNT) {
+			for(j=l+(c-i)-1; j>=l; j--)
+				if(buf.dbuf[j]) {
+					print("%P\n", p);
+					diag("multiple initialization");
+					break;
+				}
+		}
+		switch(p->to.type) {
+		case D_FCONST:
+			switch(c) {
+			default:
+			case 4:
+				fl = ieeedtof(&p->to.ieee);
+				cast = (uchar*)&fl;
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[fnuxi4[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[fnuxi4[i]];
+					l++;
+				}
+				break;
+			case 8:
+				cast = (uchar*)&p->to.ieee;
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[fnuxi8[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[fnuxi8[i]];
+					l++;
+				}
+				break;
+			}
+			break;
+
+		case D_SCONST:
+			if(debug['a'] && i == 0) {
+				Bprint(&bso, pcstr, l+s+INITDAT);
+				for(j=0; j<c; j++)
+					Bprint(&bso, "%.2ux", p->to.scon[j] & 0xff);
+				Bprint(&bso, "\t%P\n", curp);
+			}
+			for(; i<c; i++) {
+				buf.dbuf[l] = p->to.scon[i];
+				l++;
+			}
+			break;
+		default:
+			o = p->to.offset;
+			if(p->to.type == D_ADDR) {
+				if(p->to.index != D_STATIC && p->to.index != D_EXTERN)
+					diag("DADDR type%P", p);
+				if(p->to.sym) {
+					if(p->to.sym->type == SUNDEF)
+						ckoff(p->to.sym, o);
+					o += p->to.sym->value;
+					if(p->to.sym->type != STEXT && p->to.sym->type != SUNDEF)
+						o += INITDAT;
+					if(dlm)
+						dynreloc(p->to.sym, l+s+INITDAT, 1);
+				}
+			}
+			fl = o;
+			cast = (uchar*)&fl;
+			switch(c) {
+			default:
+				diag("bad nuxi %d %d\n%P", c, i, curp);
+				break;
+			case 1:
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[inuxi1[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi1[i]];
+					l++;
+				}
+				break;
+			case 2:
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[inuxi2[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi2[i]];
+					l++;
+				}
+				break;
+			case 4:
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[inuxi4[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi4[i]];
+					l++;
+				}
+				break;
+			case 8:
+				cast = (uchar*)&o;
+				if(debug['a'] && i == 0) {
+					Bprint(&bso, pcstr, l+s+INITDAT);
+					for(j=0; j<c; j++)
+						Bprint(&bso, "%.2ux", cast[inuxi8[j]]);
+					Bprint(&bso, "\t%P\n", curp);
+				}
+				for(; i<c; i++) {
+					buf.dbuf[l] = cast[inuxi8[i]];
+					l++;
+				}
+				break;
+			}
+			break;
+		}
+	}
+	write(cout, buf.dbuf, n);
+}
+
+vlong
+rnd(vlong v, vlong r)
+{
+	vlong c;
+
+	if(r <= 0)
+		return v;
+	v += r - 1;
+	c = v % r;
+	if(c < 0)
+		c += r;
+	v -= c;
+	return v;
+}

+ 55 - 0
sys/src/cmd/6l/compat.c

@@ -0,0 +1,55 @@
+#include	"l.h"
+
+/*
+ * fake malloc
+ */
+void*
+malloc(ulong n)
+{
+	void *p;
+
+	while(n & 7)
+		n++;
+	while(nhunk < n)
+		gethunk();
+	p = hunk;
+	nhunk -= n;
+	hunk += n;
+	return p;
+}
+
+void
+free(void *p)
+{
+	USED(p);
+}
+
+void*
+calloc(ulong m, ulong n)
+{
+	void *p;
+
+	n *= m;
+	p = malloc(n);
+	memset(p, 0, n);
+	return p;
+}
+
+void*
+realloc(void*, ulong)
+{
+	fprint(2, "realloc called\n");
+	abort();
+	return 0;
+}
+
+void*
+mysbrk(ulong size)
+{
+	return sbrk(size);
+}
+
+void
+setmalloctag(void*, ulong)
+{
+}

+ 390 - 0
sys/src/cmd/6l/l.h

@@ -0,0 +1,390 @@
+#include	<u.h>
+#include	<libc.h>
+#include	<bio.h>
+#include	"../6c/6.out.h"
+
+#ifndef	EXTERN
+#define	EXTERN	extern
+#endif
+
+#define	P		((Prog*)0)
+#define	S		((Sym*)0)
+#define	TNAME		(curtext?curtext->from.sym->name:noname)
+#define	cput(c)\
+	{ *cbp++ = c;\
+	if(--cbc <= 0)\
+		cflush(); }
+
+typedef	struct	Adr	Adr;
+typedef	struct	Prog	Prog;
+typedef	struct	Sym	Sym;
+typedef	struct	Auto	Auto;
+typedef	struct	Optab	Optab;
+typedef	struct	Movtab	Movtab;
+
+struct	Adr
+{
+	union
+	{
+		vlong	u0offset;
+		char	u0scon[8];
+		Prog	*u0cond;	/* not used, but should be D_BRANCH */
+		Ieee	u0ieee;
+	} u0;
+	union
+	{
+		Auto*	u1autom;
+		Sym*	u1sym;
+	} u1;
+	short	type;
+	char	index;
+	char	scale;
+};
+
+#define	offset	u0.u0offset
+#define	scon	u0.u0scon
+#define	cond	u0.u0cond
+#define	ieee	u0.u0ieee
+
+#define	autom	u1.u1autom
+#define	sym	u1.u1sym
+
+struct	Prog
+{
+	Adr	from;
+	Adr	to;
+	Prog	*forwd;
+	Prog*	link;
+	Prog*	pcond;	/* work on this */
+	vlong	pc;
+	long	line;
+	uchar	mark;	/* work on these */
+	uchar	back;
+
+	short	as;
+	char	width;		/* fake for DATA */
+	char	mode;	/* 16, 32, or 64 */
+};
+struct	Auto
+{
+	Sym*	asym;
+	Auto*	link;
+	long	aoffset;
+	short	type;
+};
+struct	Sym
+{
+	char	*name;
+	short	type;
+	short	version;
+	short	become;
+	short	frame;
+	uchar	subtype;
+	ushort	file;
+	vlong	value;
+	long	sig;
+	Sym*	link;
+};
+struct	Optab
+{
+	short	as;
+	uchar*	ytab;
+	uchar	prefix;
+	uchar	op[20];
+};
+struct	Movtab
+{
+	short	as;
+	uchar	ft;
+	uchar	tt;
+	uchar	code;
+	uchar	op[4];
+};
+
+enum
+{
+	STEXT		= 1,
+	SDATA,
+	SBSS,
+	SDATA1,
+	SXREF,
+	SFILE,
+	SCONST,
+	SUNDEF,
+
+	SIMPORT,
+	SEXPORT,
+
+	NHASH		= 10007,
+	NHUNK		= 100000,
+	MINSIZ		= 8,
+	STRINGSZ	= 200,
+	MINLC		= 1,
+	MAXIO		= 8192,
+	MAXHIST		= 20,				/* limit of path elements for history symbols */
+
+	Yxxx		= 0,
+	Ynone,
+	Yi0,
+	Yi1,
+	Yi8,
+	Ys32,
+	Yi32,
+	Yi64,
+	Yiauto,
+	Yal,
+	Ycl,
+	Yax,
+	Ycx,
+	Yrb,
+	Yrl,
+	Yrf,
+	Yf0,
+	Yrx,
+	Ymb,
+	Yml,
+	Ym,
+	Ybr,
+	Ycol,
+
+	Ycs,	Yss,	Yds,	Yes,	Yfs,	Ygs,
+	Ygdtr,	Yidtr,	Yldtr,	Ymsw,	Ytask,
+	Ycr0,	Ycr1,	Ycr2,	Ycr3,	Ycr4,	Ycr5,	Ycr6,	Ycr7,	Ycr8,
+	Ydr0,	Ydr1,	Ydr2,	Ydr3,	Ydr4,	Ydr5,	Ydr6,	Ydr7,
+	Ytr0,	Ytr1,	Ytr2,	Ytr3,	Ytr4,	Ytr5,	Ytr6,	Ytr7,	Yrl32,	Yrl64,
+	Ymr, Ymm,
+	Yxr, Yxm,
+	Ymax,
+
+	Zxxx		= 0,
+
+	Zlit,
+	Z_rp,
+	Zbr,
+	Zcall,
+	Zib_,
+	Zib_rp,
+	Zibo_m,
+	Zibo_m_xm,
+	Zil_,
+	Zil_rp,
+	Ziq_rp,
+	Zilo_m,
+	Ziqo_m,
+	Zjmp,
+	Zloop,
+	Zo_iw,
+	Zm_o,
+	Zm_r,
+	Zm_r_xm,
+	Zm_r_i_xm,
+	Zm_r_3d,
+	Zm_r_xm_nr,
+	Zr_m_xm_nr,
+	Zibm_r,	/* mmx1,mmx2/mem64,imm8 */
+	Zmb_r,
+	Zaut_r,
+	Zo_m,
+	Zo_m64,
+	Zpseudo,
+	Zr_m,
+	Zr_m_xm,
+	Zr_m_i_xm,
+	Zrp_,
+	Z_ib,
+	Z_il,
+	Zm_ibo,
+	Zm_ilo,
+	Zib_rr,
+	Zil_rr,
+	Zclr,
+	Zbyte,
+	Zmax,
+
+	Px		= 0,
+	P32		= 0x32,	/* 32-bit only */
+	Pe		= 0x66,	/* operand escape */
+	Pm		= 0x0f,	/* 2byte opcode escape */
+	Pq		= 0xff,	/* both escape */
+	Pb		= 0xfe,	/* byte operands */
+	Pf2		= 0xf2,	/* xmm escape 1 */
+	Pf3		= 0xf3,	/* xmm escape 2 */
+	Pw		= 0x48,	/* Rex.w */
+	Py		= 0x80,	/* defaults to 64-bit mode */
+
+	Rxf		= 1<<9,	/* internal flag for Rxr on from */
+	Rxt		= 1<<8,	/* internal flag for Rxr on to */
+	Rxw		= 1<<3,	/* =1, 64-bit operand size */
+	Rxr		= 1<<2,	/* extend modrm reg */
+	Rxx		= 1<<1,	/* extend sib index */
+	Rxb		= 1<<0,	/* extend modrm r/m, sib base, or opcode reg */
+
+	Roffset	= 22,		/* no. bits for offset in relocation address */
+	Rindex	= 10,		/* no. bits for index in relocation address */
+};
+
+EXTERN union
+{
+	struct
+	{
+		char	obuf[MAXIO];			/* output buffer */
+		uchar	ibuf[MAXIO];			/* input buffer */
+	} u;
+	char	dbuf[1];
+} buf;
+
+#define	cbuf	u.obuf
+#define	xbuf	u.ibuf
+
+#pragma	varargck	type	"A"	int
+#pragma	varargck	type	"A"	uint
+#pragma	varargck	type	"D"	Adr*
+#pragma	varargck	type	"P"	Prog*
+#pragma	varargck	type	"R"	int
+#pragma	varargck	type	"S"	char*
+
+#pragma	varargck	argpos	diag 1
+
+EXTERN	long	HEADR;
+EXTERN	long	HEADTYPE;
+EXTERN	vlong	INITDAT;
+EXTERN	long	INITRND;
+EXTERN	vlong	INITTEXT;
+EXTERN	char*	INITENTRY;		/* entry point */
+EXTERN	Biobuf	bso;
+EXTERN	long	bsssize;
+EXTERN	int	cbc;
+EXTERN	char*	cbp;
+EXTERN	char*	pcstr;
+EXTERN	int	cout;
+EXTERN	Auto*	curauto;
+EXTERN	Auto*	curhist;
+EXTERN	Prog*	curp;
+EXTERN	Prog*	curtext;
+EXTERN	Prog*	datap;
+EXTERN	Prog*	edatap;
+EXTERN	vlong	datsize;
+EXTERN	char	debug[128];
+EXTERN	char	literal[32];
+EXTERN	Prog*	etextp;
+EXTERN	Prog*	firstp;
+EXTERN	uchar	fnuxi8[8];
+EXTERN	uchar	fnuxi4[4];
+EXTERN	Sym*	hash[NHASH];
+EXTERN	Sym*	histfrog[MAXHIST];
+EXTERN	int	histfrogp;
+EXTERN	int	histgen;
+EXTERN	char*	library[50];
+EXTERN	char*	libraryobj[50];
+EXTERN	int	libraryp;
+EXTERN	int	xrefresolv;
+EXTERN	char*	hunk;
+EXTERN	uchar	inuxi1[1];
+EXTERN	uchar	inuxi2[2];
+EXTERN	uchar	inuxi4[4];
+EXTERN	uchar	inuxi8[8];
+EXTERN	char	ycover[Ymax*Ymax];
+EXTERN	uchar*	andptr;
+EXTERN	uchar*	rexptr;
+EXTERN	uchar	and[30];
+EXTERN	int	reg[D_NONE];
+EXTERN	int	regrex[D_NONE+1];
+EXTERN	Prog*	lastp;
+EXTERN	long	lcsize;
+EXTERN	int	nerrors;
+EXTERN	long	nhunk;
+EXTERN	long	nsymbol;
+EXTERN	char*	noname;
+EXTERN	char*	outfile;
+EXTERN	vlong	pc;
+EXTERN	long	spsize;
+EXTERN	Sym*	symlist;
+EXTERN	long	symsize;
+EXTERN	Prog*	textp;
+EXTERN	vlong	textsize;
+EXTERN	long	thunk;
+EXTERN	int	version;
+EXTERN	Prog	zprg;
+EXTERN	int	dtype;
+EXTERN	char*	paramspace;
+
+EXTERN	Adr*	reloca;
+EXTERN	int	doexp, dlm;
+EXTERN	int	imports, nimports;
+EXTERN	int	exports, nexports;
+EXTERN	char*	EXPTAB;
+EXTERN	Prog	undefp;
+
+#define	UP	(&undefp)
+
+extern	Optab	optab[];
+extern	Optab*	opindex[];
+extern	char*	anames[];
+
+int	Aconv(Fmt*);
+int	Dconv(Fmt*);
+int	Pconv(Fmt*);
+int	Rconv(Fmt*);
+int	Sconv(Fmt*);
+void	addhist(long, int);
+Prog*	appendp(Prog*);
+void	asmb(void);
+void	asmdyn(void);
+void	asmins(Prog*);
+void	asmlc(void);
+void	asmsp(void);
+void	asmsym(void);
+vlong	atolwhex(char*);
+Prog*	brchain(Prog*);
+Prog*	brloop(Prog*);
+void	buildop(void);
+void	cflush(void);
+void	ckoff(Sym*, long);
+Prog*	copyp(Prog*);
+double	cputime(void);
+void	datblk(long, long);
+void	diag(char*, ...);
+void	dodata(void);
+void	doinit(void);
+void	doprof1(void);
+void	doprof2(void);
+void	dostkoff(void);
+void	dynreloc(Sym*, ulong, int);
+vlong	entryvalue(void);
+void	errorexit(void);
+void	export(void);
+int	find1(long, int);
+int	find2(long, int);
+void	follow(void);
+void	gethunk(void);
+void	histtoauto(void);
+double	ieeedtod(Ieee*);
+long	ieeedtof(Ieee*);
+void	import(void);
+void	ldobj(int, long, char*);
+void	loadlib(void);
+void	listinit(void);
+Sym*	lookup(char*, int);
+void	lput(long);
+void	lputl(long);
+void	main(int, char*[]);
+void	mkfwd(void);
+void*	mysbrk(ulong);
+void	nuxiinit(void);
+void	objfile(char*);
+int	opsize(Prog*);
+void	patch(void);
+Prog*	prg(void);
+void	readundefs(char*, int);
+int	relinv(int);
+long	reuse(Prog*, Sym*);
+vlong	rnd(vlong, vlong);
+void	span(void);
+void	undef(void);
+void	undefsym(Sym*);
+vlong	vaddr(Adr*);
+void	wput(ushort);
+void	xdefine(char*, int, vlong);
+void	xfol(Prog*);
+int	zaddr(uchar*, Adr*, Sym*[]);
+void	zerosig(char*);

+ 350 - 0
sys/src/cmd/6l/list.c

@@ -0,0 +1,350 @@
+#include	"l.h"
+
+void
+listinit(void)
+{
+
+	fmtinstall('R', Rconv);
+	fmtinstall('A', Aconv);
+	fmtinstall('D', Dconv);
+	fmtinstall('S', Sconv);
+	fmtinstall('P', Pconv);
+}
+
+static	Prog	*bigP;
+
+int
+Pconv(Fmt *fp)
+{
+	char str[STRINGSZ];
+	Prog *p;
+
+	p = va_arg(fp->args, Prog*);
+	bigP = p;
+	switch(p->as) {
+	case ATEXT:
+		if(p->from.scale) {
+			sprint(str, "(%ld)	%A	%D,%d,%D",
+				p->line, p->as, &p->from, p->from.scale, &p->to);
+			break;
+		}
+	default:
+		sprint(str, "(%ld)	%A	%D,%D",
+			p->line, p->as, &p->from, &p->to);
+		break;
+	case ADATA:
+	case AINIT:
+	case ADYNT:
+		sprint(str, "(%ld)	%A	%D/%d,%D",
+			p->line, p->as, &p->from, p->from.scale, &p->to);
+		break;
+	}
+	bigP = P;
+	return fmtstrcpy(fp, str);
+}
+
+int
+Aconv(Fmt *fp)
+{
+	int i;
+
+	i = va_arg(fp->args, int);
+	return fmtstrcpy(fp, anames[i]);
+}
+
+int
+Dconv(Fmt *fp)
+{
+	char str[40], s[20];
+	Adr *a;
+	int i;
+
+	a = va_arg(fp->args, Adr*);
+	i = a->type;
+	if(i >= D_INDIR) {
+		if(a->offset)
+			sprint(str, "%lld(%R)", a->offset, i-D_INDIR);
+		else
+			sprint(str, "(%R)", i-D_INDIR);
+		goto brk;
+	}
+	switch(i) {
+
+	default:
+		if(a->offset)
+			sprint(str, "$%lld,%R", a->offset, i);
+		else
+			sprint(str, "%R", i);
+		break;
+
+	case D_NONE:
+		str[0] = 0;
+		break;
+
+	case D_BRANCH:
+		if(bigP != P && bigP->pcond != P)
+			if(a->sym != S)
+				sprint(str, "%llux+%s", bigP->pcond->pc,
+					a->sym->name);
+			else
+				sprint(str, "%llux", bigP->pcond->pc);
+		else
+			sprint(str, "%lld(PC)", a->offset);
+		break;
+
+	case D_EXTERN:
+		sprint(str, "%s+%lld(SB)", a->sym->name, a->offset);
+		break;
+
+	case D_STATIC:
+		sprint(str, "%s<%d>+%lld(SB)", a->sym->name,
+			a->sym->version, a->offset);
+		break;
+
+	case D_AUTO:
+		sprint(str, "%s+%lld(SP)", a->sym->name, a->offset);
+		break;
+
+	case D_PARAM:
+		if(a->sym)
+			sprint(str, "%s+%lld(%s)", a->sym->name, a->offset, paramspace);
+		else
+			sprint(str, "%lld(%s)", a->offset, paramspace);
+		break;
+
+	case D_CONST:
+		sprint(str, "$%lld", a->offset);
+		break;
+
+	case D_FCONST:
+		sprint(str, "$(%.8lux,%.8lux)", a->ieee.h, a->ieee.l);
+		break;
+
+	case D_SCONST:
+		sprint(str, "$\"%S\"", a->scon);
+		break;
+
+	case D_ADDR:
+		a->type = a->index;
+		a->index = D_NONE;
+		sprint(str, "$%D", a);
+		a->index = a->type;
+		a->type = D_ADDR;
+		goto conv;
+	}
+brk:
+	if(a->index != D_NONE) {
+		sprint(s, "(%R*%d)", a->index, a->scale);
+		strcat(str, s);
+	}
+conv:
+	return fmtstrcpy(fp, str);
+}
+
+char*	regstr[] =
+{
+	"AL",		/* [D_AL] */
+	"CL",
+	"DL",
+	"BL",
+	"SPB",
+	"BPB",
+	"SIB",
+	"DIB",
+	"R8B",
+	"R9B",
+	"R10B",
+	"R11B",
+	"R12B",
+	"R13B",
+	"R14B",
+	"R15B",
+
+	"AX",		/* [D_AX] */
+	"CX",
+	"DX",
+	"BX",
+	"SP",
+	"BP",
+	"SI",
+	"DI",
+	"R8",
+	"R9",
+	"R10",
+	"R11",
+	"R12",
+	"R13",
+	"R14",
+	"R15",
+
+	"AH",
+	"CH",
+	"DH",
+	"BH",
+
+	"F0",		/* [D_F0] */
+	"F1",
+	"F2",
+	"F3",
+	"F4",
+	"F5",
+	"F6",
+	"F7",
+
+	"M0",
+	"M1",
+	"M2",
+	"M3",
+	"M4",
+	"M5",
+	"M6",
+	"M7",
+
+	"X0",
+	"X1",
+	"X2",
+	"X3",
+	"X4",
+	"X5",
+	"X6",
+	"X7",
+	"X8",
+	"X9",
+	"X10",
+	"X11",
+	"X12",
+	"X13",
+	"X14",
+	"X15",
+
+	"CS",		/* [D_CS] */
+	"SS",
+	"DS",
+	"ES",
+	"FS",
+	"GS",
+
+	"GDTR",		/* [D_GDTR] */
+	"IDTR",		/* [D_IDTR] */
+	"LDTR",		/* [D_LDTR] */
+	"MSW",		/* [D_MSW] */
+	"TASK",		/* [D_TASK] */
+
+	"CR0",		/* [D_CR] */
+	"CR1",
+	"CR2",
+	"CR3",
+	"CR4",
+	"CR5",
+	"CR6",
+	"CR7",
+	"CR8",
+	"CR9",
+	"CR10",
+	"CR11",
+	"CR12",
+	"CR13",
+	"CR14",
+	"CR15",
+
+	"DR0",		/* [D_DR] */
+	"DR1",
+	"DR2",
+	"DR3",
+	"DR4",
+	"DR5",
+	"DR6",
+	"DR7",
+
+	"TR0",		/* [D_TR] */
+	"TR1",
+	"TR2",
+	"TR3",
+	"TR4",
+	"TR5",
+	"TR6",
+	"TR7",
+
+	"NONE",		/* [D_NONE] */
+};
+
+int
+Rconv(Fmt *fp)
+{
+	char str[20];
+	int r;
+
+	r = va_arg(fp->args, int);
+	if(r >= D_AL && r <= D_NONE)
+		sprint(str, "%s", regstr[r-D_AL]);
+	else
+		sprint(str, "gok(%d)", r);
+
+	return fmtstrcpy(fp, str);
+}
+
+int
+Sconv(Fmt *fp)
+{
+	int i, c;
+	char str[30], *p, *a;
+
+	a = va_arg(fp->args, char*);
+	p = str;
+	for(i=0; i<sizeof(double); i++) {
+		c = a[i] & 0xff;
+		if(c >= 'a' && c <= 'z' ||
+		   c >= 'A' && c <= 'Z' ||
+		   c >= '0' && c <= '9') {
+			*p++ = c;
+			continue;
+		}
+		*p++ = '\\';
+		switch(c) {
+		default:
+			if(c < 040 || c >= 0177)
+				break;	/* not portable */
+			p[-1] = c;
+			continue;
+		case 0:
+			*p++ = 'z';
+			continue;
+		case '\\':
+		case '"':
+			*p++ = c;
+			continue;
+		case '\n':
+			*p++ = 'n';
+			continue;
+		case '\t':
+			*p++ = 't';
+			continue;
+		}
+		*p++ = (c>>6) + '0';
+		*p++ = ((c>>3) & 7) + '0';
+		*p++ = (c & 7) + '0';
+	}
+	*p = 0;
+	return fmtstrcpy(fp, str);
+}
+
+void
+diag(char *fmt, ...)
+{
+	char buf[STRINGSZ], *tn;
+	va_list arg;
+
+	tn = "??none??";
+	if(curtext != P && curtext->from.sym != S)
+		tn = curtext->from.sym->name;
+	va_start(arg, fmt);
+	vseprint(buf, buf+sizeof(buf), fmt, arg);
+	va_end(arg);
+	print("%s: %s\n", tn, buf);
+
+	nerrors++;
+	if(nerrors > 20) {
+		print("too many errors\n");
+		errorexit();
+	}
+}

+ 31 - 0
sys/src/cmd/6l/mkfile

@@ -0,0 +1,31 @@
+</$objtype/mkfile
+
+TARG=6l
+OFILES=\
+	asm.$O\
+	obj.$O\
+	optab.$O\
+	pass.$O\
+	span.$O\
+	list.$O\
+	enam.$O\
+	compat.$O\
+
+HFILES=\
+	l.h\
+	../6c/6.out.h\
+
+BIN=/$objtype/bin
+CFILES=${OFILES:%.$O=%.c}
+CFILES=${CFILES:enam.c=../6c/enam.c}
+UPDATE=\
+	mkfile\
+	$HFILES\
+	$CFILES\
+	${TARG:%=/386/bin/%}\
+
+</sys/src/cmd/mkone
+
+enam.$O:	../6c/enam.c
+	$CC $CFLAGS ../6c/enam.c
+

+ 1595 - 0
sys/src/cmd/6l/obj.c

@@ -0,0 +1,1595 @@
+#define	EXTERN
+#include	"l.h"
+#include	<ar.h>
+
+#ifndef	DEFAULT
+#define	DEFAULT	'9'
+#endif
+
+char	*noname		= "<none>";
+char	symname[]	= SYMDEF;
+char	thechar		= '6';
+char	*thestring 	= "amd64";
+char	*paramspace	= "FP";
+
+/*
+ *	-H2 -T0x200028 -R0x200000	is plan9 format (was -T4136 -R4096)
+ *	-H3 -T4128 -R4096		is plan9 32-bit format
+ *	-H5 -T0x80110000 -R4096		is ELF32
+ *
+ *	options used: 189BLQSWabcjlnpsvz
+ */
+
+static int
+isobjfile(char *f)
+{
+	int n, v;
+	Biobuf *b;
+	char buf1[5], buf2[SARMAG];
+
+	b = Bopen(f, OREAD);
+	if(b == nil)
+		return 0;
+	n = Bread(b, buf1, 5);
+	if(n == 5 && (buf1[2] == 1 && buf1[3] == '<' || buf1[3] == 1 && buf1[4] == '<'))
+		v = 1;	/* good enough for our purposes */
+	else{
+		Bseek(b, 0, 0);
+		n = Bread(b, buf2, SARMAG);
+		v = n == SARMAG && strncmp(buf2, ARMAG, SARMAG) == 0;
+	}
+	Bterm(b);
+	return v;
+}
+
+void
+main(int argc, char *argv[])
+{
+	int i, c;
+	char *a;
+
+	Binit(&bso, 1, OWRITE);
+	cout = -1;
+	listinit();
+	memset(debug, 0, sizeof(debug));
+	nerrors = 0;
+	outfile = "6.out";
+	HEADTYPE = -1;
+	INITTEXT = -1;
+	INITDAT = -1;
+	INITRND = -1;
+	INITENTRY = 0;
+	ARGBEGIN {
+	default:
+		c = ARGC();
+		if(c >= 0 && c < sizeof(debug))
+			debug[c]++;
+		break;
+	case 'o': /* output to (next arg) */
+		outfile = ARGF();
+		break;
+	case 'E':
+		a = ARGF();
+		if(a)
+			INITENTRY = a;
+		break;
+	case 'H':
+		a = ARGF();
+		if(a)
+			HEADTYPE = atolwhex(a);
+		break;
+	case 'T':
+		a = ARGF();
+		if(a)
+			INITTEXT = atolwhex(a);
+		break;
+	case 'D':
+		a = ARGF();
+		if(a)
+			INITDAT = atolwhex(a);
+		break;
+	case 'R':
+		a = ARGF();
+		if(a)
+			INITRND = atolwhex(a);
+		break;
+	case 'x':	/* produce export table */
+		doexp = 1;
+		if(argv[1] != nil && argv[1][0] != '-' && !isobjfile(argv[1]))
+			readundefs(ARGF(), SEXPORT);
+		break;
+	case 'u':	/* produce dynamically loadable module */
+		dlm = 1;
+		debug['l']++;
+		if(argv[1] != nil && argv[1][0] != '-' && !isobjfile(argv[1]))
+			readundefs(ARGF(), SIMPORT);
+		break;
+	} ARGEND
+	USED(argc);
+	if(*argv == 0) {
+		diag("usage: 6l [-options] objects");
+		errorexit();
+	}
+	if(!debug['9'] && !debug['U'] && !debug['B'])
+		debug[DEFAULT] = 1;
+	if(HEADTYPE == -1) {
+		if(debug['B'])
+			HEADTYPE = 2;
+		if(debug['9'])
+			HEADTYPE = 2;
+	}
+	switch(HEADTYPE) {
+	default:
+		diag("unknown -H option");
+		errorexit();
+	case 2:	/* plan 9 */
+		HEADR = 32L+8L;
+		if(INITTEXT == -1)
+			INITTEXT = 0x200000+HEADR;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 0x200000;
+		break;
+	case 3:	/* plan 9 */
+		HEADR = 32L;
+		if(INITTEXT == -1)
+			INITTEXT = 4096+HEADR;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 4096;
+		break;
+	case 5:	/* elf32 executable */
+		HEADR = rnd(52L+3*32L, 16);
+		if(INITTEXT == -1)
+			INITTEXT = 0xf0110000L;
+		if(INITDAT == -1)
+			INITDAT = 0;
+		if(INITRND == -1)
+			INITRND = 4096;
+		break;
+	}
+	if(INITDAT != 0 && INITRND != 0)
+		print("warning: -D0x%llux is ignored because of -R0x%lux\n",
+			INITDAT, INITRND);
+	if(debug['v'])
+		Bprint(&bso, "HEADER = -H%ld -T0x%llux -D0x%llux -R0x%lux\n",
+			HEADTYPE, INITTEXT, INITDAT, INITRND);
+	Bflush(&bso);
+	for(i=1; optab[i].as; i++) {
+		c = optab[i].as;
+		if(opindex[c] != nil) {
+			diag("phase error in optab: %d (%A)", i, c);
+			errorexit();
+		}
+		opindex[c] = &optab[i];
+	}
+
+	for(i=0; i<Ymax; i++)
+		ycover[i*Ymax + i] = 1;
+
+	ycover[Yi0*Ymax + Yi8] = 1;
+	ycover[Yi1*Ymax + Yi8] = 1;
+
+	ycover[Yi0*Ymax + Ys32] = 1;
+	ycover[Yi1*Ymax + Ys32] = 1;
+	ycover[Yi8*Ymax + Ys32] = 1;
+
+	ycover[Yi0*Ymax + Yi32] = 1;
+	ycover[Yi1*Ymax + Yi32] = 1;
+	ycover[Yi8*Ymax + Yi32] = 1;
+	ycover[Ys32*Ymax + Yi32] = 1;
+
+	ycover[Yi0*Ymax + Yi64] = 1;
+	ycover[Yi1*Ymax + Yi64] = 1;
+	ycover[Yi8*Ymax + Yi64] = 1;
+	ycover[Ys32*Ymax + Yi64] = 1;
+	ycover[Yi32*Ymax + Yi64] = 1;
+
+	ycover[Yal*Ymax + Yrb] = 1;
+	ycover[Ycl*Ymax + Yrb] = 1;
+	ycover[Yax*Ymax + Yrb] = 1;
+	ycover[Ycx*Ymax + Yrb] = 1;
+	ycover[Yrx*Ymax + Yrb] = 1;
+	ycover[Yrl*Ymax + Yrb] = 1;
+
+	ycover[Ycl*Ymax + Ycx] = 1;
+
+	ycover[Yax*Ymax + Yrx] = 1;
+	ycover[Ycx*Ymax + Yrx] = 1;
+
+	ycover[Yax*Ymax + Yrl] = 1;
+	ycover[Ycx*Ymax + Yrl] = 1;
+	ycover[Yrx*Ymax + Yrl] = 1;
+
+	ycover[Yf0*Ymax + Yrf] = 1;
+
+	ycover[Yal*Ymax + Ymb] = 1;
+	ycover[Ycl*Ymax + Ymb] = 1;
+	ycover[Yax*Ymax + Ymb] = 1;
+	ycover[Ycx*Ymax + Ymb] = 1;
+	ycover[Yrx*Ymax + Ymb] = 1;
+	ycover[Yrb*Ymax + Ymb] = 1;
+	ycover[Yrl*Ymax + Ymb] = 1;
+	ycover[Ym*Ymax + Ymb] = 1;
+
+	ycover[Yax*Ymax + Yml] = 1;
+	ycover[Ycx*Ymax + Yml] = 1;
+	ycover[Yrx*Ymax + Yml] = 1;
+	ycover[Yrl*Ymax + Yml] = 1;
+	ycover[Ym*Ymax + Yml] = 1;
+
+	ycover[Yax*Ymax + Ymm] = 1;
+	ycover[Ycx*Ymax + Ymm] = 1;
+	ycover[Yrx*Ymax + Ymm] = 1;
+	ycover[Yrl*Ymax + Ymm] = 1;
+	ycover[Ym*Ymax + Ymm] = 1;
+	ycover[Ymr*Ymax + Ymm] = 1;
+
+	ycover[Yax*Ymax + Yxm] = 1;
+	ycover[Ycx*Ymax + Yxm] = 1;
+	ycover[Yrx*Ymax + Yxm] = 1;
+	ycover[Yrl*Ymax + Yxm] = 1;
+	ycover[Ym*Ymax + Yxm] = 1;
+	ycover[Yxr*Ymax + Yxm] = 1;
+
+	for(i=0; i<D_NONE; i++) {
+		reg[i] = -1;
+		if(i >= D_AL && i <= D_R15B) {
+			reg[i] = (i-D_AL) & 7;
+			if(i >= D_SPB && i <= D_DIB)
+				regrex[i] = 0x40;
+			if(i >= D_R8B && i <= D_R15B)
+				regrex[i] = Rxr | Rxx | Rxb;
+		}
+		if(i >= D_AH && i<= D_BH)
+			reg[i] = 4 + ((i-D_AH) & 7);
+		if(i >= D_AX && i <= D_R15) {
+			reg[i] = (i-D_AX) & 7;
+			if(i >= D_R8)
+				regrex[i] = Rxr | Rxx | Rxb;
+		}
+		if(i >= D_F0 && i <= D_F0+7)
+			reg[i] = (i-D_F0) & 7;
+		if(i >= D_M0 && i <= D_M0+7)
+			reg[i] = (i-D_M0) & 7;
+		if(i >= D_X0 && i <= D_X0+15) {
+			reg[i] = (i-D_X0) & 7;
+			if(i >= D_X0+8)
+				regrex[i] = Rxr | Rxx | Rxb;
+		}
+		if(i >= D_CR+8 && i <= D_CR+15) 
+			regrex[i] = Rxr;
+	}
+
+	zprg.link = P;
+	zprg.pcond = P;
+	zprg.back = 2;
+	zprg.as = AGOK;
+	zprg.from.type = D_NONE;
+	zprg.from.index = D_NONE;
+	zprg.from.scale = 1;
+	zprg.to = zprg.from;
+	zprg.mode = 64;
+
+	pcstr = "%.6llux ";
+	nuxiinit();
+	histgen = 0;
+	textp = P;
+	datap = P;
+	edatap = P;
+	pc = 0;
+	dtype = 4;
+	cout = create(outfile, 1, 0775);
+	if(cout < 0) {
+		diag("cannot create %s", outfile);
+		errorexit();
+	}
+	version = 0;
+	cbp = buf.cbuf;
+	cbc = sizeof(buf.cbuf);
+	firstp = prg();
+	lastp = firstp;
+
+	if(INITENTRY == 0) {
+		INITENTRY = "_main";
+		if(debug['p'])
+			INITENTRY = "_mainp";
+		if(!debug['l'])
+			lookup(INITENTRY, 0)->type = SXREF;
+	} else if(!(*INITENTRY >= '0' && *INITENTRY <= '9'))
+		lookup(INITENTRY, 0)->type = SXREF;
+
+	while(*argv)
+		objfile(*argv++);
+	if(!debug['l'])
+		loadlib();
+	firstp = firstp->link;
+	if(firstp == P)
+		errorexit();
+	if(doexp || dlm){
+		EXPTAB = "_exporttab";
+		zerosig(EXPTAB);
+		zerosig("etext");
+		zerosig("edata");
+		zerosig("end");
+		if(dlm){
+			import();
+			HEADTYPE = 2;
+			INITTEXT = 0;
+			INITDAT = 0;
+			INITRND = 8;
+			INITENTRY = EXPTAB;
+		}
+		export();
+	}
+	patch();
+	follow();
+	dodata();
+	dostkoff();
+	paramspace = "SP";	/* (FP) now (SP) on output */
+	if(debug['p'])
+		if(debug['1'])
+			doprof1();
+		else
+			doprof2();
+	span();
+	doinit();
+	asmb();
+	undef();
+	if(debug['v']) {
+		Bprint(&bso, "%5.2f cpu time\n", cputime());
+		Bprint(&bso, "%ld symbols\n", nsymbol);
+		Bprint(&bso, "%ld memory used\n", thunk);
+		Bprint(&bso, "%d sizeof adr\n", sizeof(Adr));
+		Bprint(&bso, "%d sizeof prog\n", sizeof(Prog));
+	}
+	Bflush(&bso);
+
+	errorexit();
+}
+
+void
+loadlib(void)
+{
+	int i;
+	long h;
+	Sym *s;
+
+loop:
+	xrefresolv = 0;
+	for(i=0; i<libraryp; i++) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f autolib: %s (from %s)\n", cputime(), library[i], libraryobj[i]);
+		objfile(library[i]);
+	}
+	if(xrefresolv)
+	for(h=0; h<nelem(hash); h++)
+	for(s = hash[h]; s != S; s = s->link)
+		if(s->type == SXREF)
+			goto loop;
+}
+
+void
+errorexit(void)
+{
+
+	if(nerrors) {
+		if(cout >= 0)
+			remove(outfile);
+		exits("error");
+	}
+	exits(0);
+}
+
+void
+objfile(char *file)
+{
+	long off, esym, cnt, l;
+	int f, work;
+	Sym *s;
+	char magbuf[SARMAG];
+	char name[100], pname[150];
+	struct ar_hdr arhdr;
+	char *e, *start, *stop;
+
+	if(file[0] == '-' && file[1] == 'l') {
+		if(debug['9'])
+			sprint(name, "/%s/lib/lib", thestring);
+		else
+			sprint(name, "/usr/%clib/lib", thechar);
+		strcat(name, file+2);
+		strcat(name, ".a");
+		file = name;
+	}
+	if(debug['v'])
+		Bprint(&bso, "%5.2f ldobj: %s\n", cputime(), file);
+	Bflush(&bso);
+	f = open(file, 0);
+	if(f < 0) {
+		diag("cannot open file: %s", file);
+		errorexit();
+	}
+	l = read(f, magbuf, SARMAG);
+	if(l != SARMAG || strncmp(magbuf, ARMAG, SARMAG)){
+		/* load it as a regular file */
+		l = seek(f, 0L, 2);
+		seek(f, 0L, 0);
+		ldobj(f, l, file);
+		close(f);
+		return;
+	}
+
+	l = read(f, &arhdr, SAR_HDR);
+	if(l != SAR_HDR) {
+		diag("%s: short read on archive file symbol header", file);
+		goto out;
+	}
+	if(strncmp(arhdr.name, symname, strlen(symname))) {
+		diag("%s: first entry not symbol header", file);
+		goto out;
+	}
+
+	esym = SARMAG + SAR_HDR + atolwhex(arhdr.size);
+	off = SARMAG + SAR_HDR;
+
+	/*
+	 * just bang the whole symbol file into memory
+	 */
+	seek(f, off, 0);
+	cnt = esym - off;
+	start = malloc(cnt + 10);
+	cnt = read(f, start, cnt);
+	if(cnt <= 0){
+		close(f);
+		return;
+	}
+	stop = &start[cnt];
+	memset(stop, 0, 10);
+
+	work = 1;
+	while(work) {
+		if(debug['v'])
+			Bprint(&bso, "%5.2f library pass: %s\n", cputime(), file);
+		Bflush(&bso);
+		work = 0;
+		for(e = start; e < stop; e = strchr(e+5, 0) + 1) {
+			s = lookup(e+5, 0);
+			if(s->type != SXREF)
+				continue;
+			sprint(pname, "%s(%s)", file, s->name);
+			if(debug['v'])
+				Bprint(&bso, "%5.2f library: %s\n", cputime(), pname);
+			Bflush(&bso);
+			l = e[1] & 0xff;
+			l |= (e[2] & 0xff) << 8;
+			l |= (e[3] & 0xff) << 16;
+			l |= (e[4] & 0xff) << 24;
+			seek(f, l, 0);
+			l = read(f, &arhdr, SAR_HDR);
+			if(l != SAR_HDR)
+				goto bad;
+			if(strncmp(arhdr.fmag, ARFMAG, sizeof(arhdr.fmag)))
+				goto bad;
+			l = atolwhex(arhdr.size);
+			ldobj(f, l, pname);
+			if(s->type == SXREF) {
+				diag("%s: failed to load: %s", file, s->name);
+				errorexit();
+			}
+			work = 1;
+			xrefresolv = 1;
+		}
+	}
+	return;
+
+bad:
+	diag("%s: bad or out of date archive", file);
+out:
+	close(f);
+}
+
+int
+zaddr(uchar *p, Adr *a, Sym *h[])
+{
+	int c, t, i;
+	long l;
+	Sym *s;
+	Auto *u;
+
+	t = p[0];
+	c = 1;
+	if(t & T_INDEX) {
+		a->index = p[c];
+		a->scale = p[c+1];
+		c += 2;
+	} else {
+		a->index = D_NONE;
+		a->scale = 0;
+	}
+	a->offset = 0;
+	if(t & T_OFFSET) {
+		/*
+		 * Hack until Charles fixes the compiler.
+		a->offset = (long)(p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24));
+		 */
+		l = p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24);
+		a->offset = l;
+		c += 4;
+		if(t & T_64) {
+			l = p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24);
+			a->offset = ((vlong)l<<32) | (a->offset & 0xFFFFFFFFUL);
+			c += 4;
+		}
+	}
+	a->sym = S;
+	if(t & T_SYM) {
+		a->sym = h[p[c]];
+		c++;
+	}
+	a->type = D_NONE;
+	if(t & T_FCONST) {
+		a->ieee.l = p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24);
+		a->ieee.h = p[c+4] | (p[c+5]<<8) | (p[c+6]<<16) | (p[c+7]<<24);
+		c += 8;
+		a->type = D_FCONST;
+	} else
+	if(t & T_SCONST) {
+		for(i=0; i<NSNAME; i++)
+			a->scon[i] = p[c+i];
+		c += NSNAME;
+		a->type = D_SCONST;
+	}
+	if(t & T_TYPE) {
+		a->type = p[c];
+		c++;
+	}
+	s = a->sym;
+	if(s == S)
+		return c;
+
+	t = a->type;
+	if(t != D_AUTO && t != D_PARAM)
+		return c;
+	l = a->offset;
+	for(u=curauto; u; u=u->link) {
+		if(u->asym == s)
+		if(u->type == t) {
+			if(u->aoffset > l)
+				u->aoffset = l;
+			return c;
+		}
+	}
+
+	while(nhunk < sizeof(Auto))
+		gethunk();
+	u = (Auto*)hunk;
+	nhunk -= sizeof(Auto);
+	hunk += sizeof(Auto);
+
+	u->link = curauto;
+	curauto = u;
+	u->asym = s;
+	u->aoffset = l;
+	u->type = t;
+	return c;
+}
+
+void
+addlib(char *obj)
+{
+	char name[1024], comp[256], *p;
+	int i;
+
+	if(histfrogp <= 0)
+		return;
+
+	if(histfrog[0]->name[1] == '/') {
+		sprint(name, "");
+		i = 1;
+	} else
+	if(histfrog[0]->name[1] == '.') {
+		sprint(name, ".");
+		i = 0;
+	} else {
+		if(debug['9'])
+			sprint(name, "/%s/lib", thestring);
+		else
+			sprint(name, "/usr/%clib", thechar);
+		i = 0;
+	}
+
+	for(; i<histfrogp; i++) {
+		snprint(comp, sizeof comp, histfrog[i]->name+1);
+		for(;;) {
+			p = strstr(comp, "$O");
+			if(p == 0)
+				break;
+			memmove(p+1, p+2, strlen(p+2)+1);
+			p[0] = thechar;
+		}
+		for(;;) {
+			p = strstr(comp, "$M");
+			if(p == 0)
+				break;
+			if(strlen(comp)+strlen(thestring)-2+1 >= sizeof comp) {
+				diag("library component too long");
+				return;
+			}
+			memmove(p+strlen(thestring), p+2, strlen(p+2)+1);
+			memmove(p, thestring, strlen(thestring));
+		}
+		if(strlen(name) + strlen(comp) + 3 >= sizeof(name)) {
+			diag("library component too long");
+			return;
+		}
+		strcat(name, "/");
+		strcat(name, comp);
+	}
+	for(i=0; i<libraryp; i++)
+		if(strcmp(name, library[i]) == 0)
+			return;
+	if(libraryp == nelem(library)){
+		diag("too many autolibs; skipping %s", name);
+		return;
+	}
+
+	p = malloc(strlen(name) + 1);
+	strcpy(p, name);
+	library[libraryp] = p;
+	p = malloc(strlen(obj) + 1);
+	strcpy(p, obj);
+	libraryobj[libraryp] = p;
+	libraryp++;
+}
+
+void
+addhist(long line, int type)
+{
+	Auto *u;
+	Sym *s;
+	int i, j, k;
+
+	u = malloc(sizeof(Auto));
+	s = malloc(sizeof(Sym));
+	s->name = malloc(2*(histfrogp+1) + 1);
+
+	u->asym = s;
+	u->type = type;
+	u->aoffset = line;
+	u->link = curhist;
+	curhist = u;
+
+	j = 1;
+	for(i=0; i<histfrogp; i++) {
+		k = histfrog[i]->value;
+		s->name[j+0] = k>>8;
+		s->name[j+1] = k;
+		j += 2;
+	}
+}
+
+void
+histtoauto(void)
+{
+	Auto *l;
+
+	while(l = curhist) {
+		curhist = l->link;
+		l->link = curauto;
+		curauto = l;
+	}
+}
+
+void
+collapsefrog(Sym *s)
+{
+	int i;
+
+	/*
+	 * bad encoding of path components only allows
+	 * MAXHIST components. if there is an overflow,
+	 * first try to collapse xxx/..
+	 */
+	for(i=1; i<histfrogp; i++)
+		if(strcmp(histfrog[i]->name+1, "..") == 0) {
+			memmove(histfrog+i-1, histfrog+i+1,
+				(histfrogp-i-1)*sizeof(histfrog[0]));
+			histfrogp--;
+			goto out;
+		}
+
+	/*
+	 * next try to collapse .
+	 */
+	for(i=0; i<histfrogp; i++)
+		if(strcmp(histfrog[i]->name+1, ".") == 0) {
+			memmove(histfrog+i, histfrog+i+1,
+				(histfrogp-i-1)*sizeof(histfrog[0]));
+			goto out;
+		}
+
+	/*
+	 * last chance, just truncate from front
+	 */
+	memmove(histfrog+0, histfrog+1,
+		(histfrogp-1)*sizeof(histfrog[0]));
+
+out:
+	histfrog[histfrogp-1] = s;
+}
+
+void
+nopout(Prog *p)
+{
+	p->as = ANOP;
+	p->from.type = D_NONE;
+	p->to.type = D_NONE;
+}
+
+uchar*
+readsome(int f, uchar *buf, uchar *good, uchar *stop, int max)
+{
+	int n;
+
+	n = stop - good;
+	memmove(buf, good, stop - good);
+	stop = buf + n;
+	n = MAXIO - n;
+	if(n > max)
+		n = max;
+	n = read(f, stop, n);
+	if(n <= 0)
+		return 0;
+	return stop + n;
+}
+
+void
+ldobj(int f, long c, char *pn)
+{
+	vlong ipc;
+	Prog *p, *t;
+	uchar *bloc, *bsize, *stop;
+	int v, o, r, skip, mode;
+	Sym *h[NSYM], *s, *di;
+	ulong sig;
+	static int files;
+	static char **filen;
+	char **nfilen;
+
+	if((files&15) == 0){
+		nfilen = malloc((files+16)*sizeof(char*));
+		memmove(nfilen, filen, files*sizeof(char*));
+		free(filen);
+		filen = nfilen;
+	}
+	filen[files++] = strdup(pn);
+
+	bsize = buf.xbuf;
+	bloc = buf.xbuf;
+	di = S;
+
+newloop:
+	memset(h, 0, sizeof(h));
+	version++;
+	histfrogp = 0;
+	ipc = pc;
+	skip = 0;
+	mode = 64;
+
+loop:
+	if(c <= 0)
+		goto eof;
+	r = bsize - bloc;
+	if(r < 100 && r < c) {		/* enough for largest prog */
+		bsize = readsome(f, buf.xbuf, bloc, bsize, c);
+		if(bsize == 0)
+			goto eof;
+		bloc = buf.xbuf;
+		goto loop;
+	}
+	o = bloc[0] | (bloc[1] << 8);
+	if(o <= AXXX || o >= ALAST) {
+		if(o < 0)
+			goto eof;
+		diag("%s: opcode out of range %d", pn, o);
+		print("	probably not a .6 file\n");
+		errorexit();
+	}
+
+	if(o == ANAME || o == ASIGNAME) {
+		sig = 0;
+		if(o == ASIGNAME) {
+			sig = bloc[2] | (bloc[3]<<8) | (bloc[4]<<16) | (bloc[5]<<24);
+			bloc += 4;
+			c -= 4;
+		}
+		stop = memchr(&bloc[4], 0, bsize-&bloc[4]);
+		if(stop == 0){
+			bsize = readsome(f, buf.xbuf, bloc, bsize, c);
+			if(bsize == 0)
+				goto eof;
+			bloc = buf.xbuf;
+			stop = memchr(&bloc[4], 0, bsize-&bloc[4]);
+			if(stop == 0){
+				fprint(2, "%s: name too long\n", pn);
+				errorexit();
+			}
+		}
+		v = bloc[2];	/* type */
+		o = bloc[3];	/* sym */
+		bloc += 4;
+		c -= 4;
+
+		r = 0;
+		if(v == D_STATIC)
+			r = version;
+		s = lookup((char*)bloc, r);
+		c -= &stop[1] - bloc;
+		bloc = stop + 1;
+
+		if(debug['S'] && r == 0)
+			sig = 1729;
+		if(sig != 0){
+			if(s->sig != 0 && s->sig != sig)
+				diag("incompatible type signatures %lux(%s) and %lux(%s) for %s", s->sig, filen[s->file], sig, pn, s->name);
+			s->sig = sig;
+			s->file = files-1;
+		}
+
+		if(debug['W'])
+			print("	ANAME	%s\n", s->name);
+		h[o] = s;
+		if((v == D_EXTERN || v == D_STATIC) && s->type == 0)
+			s->type = SXREF;
+		if(v == D_FILE) {
+			if(s->type != SFILE) {
+				histgen++;
+				s->type = SFILE;
+				s->value = histgen;
+			}
+			if(histfrogp < MAXHIST) {
+				histfrog[histfrogp] = s;
+				histfrogp++;
+			} else
+				collapsefrog(s);
+		}
+		goto loop;
+	}
+
+	while(nhunk < sizeof(Prog))
+		gethunk();
+	p = (Prog*)hunk;
+	nhunk -= sizeof(Prog);
+	hunk += sizeof(Prog);
+
+	p->as = o;
+	p->line = bloc[2] | (bloc[3] << 8) | (bloc[4] << 16) | (bloc[5] << 24);
+	p->back = 2;
+	p->mode = mode;
+	r = zaddr(bloc+6, &p->from, h) + 6;
+	r += zaddr(bloc+r, &p->to, h);
+	bloc += r;
+	c -= r;
+
+	if(debug['W'])
+		print("%P\n", p);
+
+	switch(p->as) {
+	case AHISTORY:
+		if(p->to.offset == -1) {
+			addlib(pn);
+			histfrogp = 0;
+			goto loop;
+		}
+		addhist(p->line, D_FILE);		/* 'z' */
+		if(p->to.offset)
+			addhist(p->to.offset, D_FILE1);	/* 'Z' */
+		histfrogp = 0;
+		goto loop;
+
+	case AEND:
+		histtoauto();
+		if(curtext != P)
+			curtext->to.autom = curauto;
+		curauto = 0;
+		curtext = P;
+		if(c)
+			goto newloop;
+		return;
+
+	case AGLOBL:
+		s = p->from.sym;
+		if(s->type == 0 || s->type == SXREF) {
+			s->type = SBSS;
+			s->value = 0;
+		}
+		if(s->type != SBSS) {
+			diag("%s: redefinition: %s in %s",
+				pn, s->name, TNAME);
+			s->type = SBSS;
+			s->value = 0;
+		}
+		if(p->to.offset > s->value)
+			s->value = p->to.offset;
+		goto loop;
+
+	case ADYNT:
+		if(p->to.sym == S) {
+			diag("DYNT without a sym\n%P", p);
+			break;
+		}
+		di = p->to.sym;
+		p->from.scale = 4;
+		if(di->type == SXREF) {
+			if(debug['z'])
+				Bprint(&bso, "%P set to %d\n", p, dtype);
+			di->type = SCONST;
+			di->value = dtype;
+			dtype += 4;
+		}
+		if(p->from.sym == S)
+			break;
+
+		p->from.offset = di->value;
+		p->from.sym->type = SDATA;
+		if(curtext == P) {
+			diag("DYNT not in text: %P", p);
+			break;
+		}
+		p->to.sym = curtext->from.sym;
+		p->to.type = D_ADDR;
+		p->to.index = D_EXTERN;
+		goto data;
+
+	case AINIT:
+		if(p->from.sym == S) {
+			diag("INIT without a sym\n%P", p);
+			break;
+		}
+		if(di == S) {
+			diag("INIT without previous DYNT\n%P", p);
+			break;
+		}
+		p->from.offset = di->value;
+		p->from.sym->type = SDATA;
+		goto data;
+
+	case ADATA:
+	data:
+		if(edatap == P)
+			datap = p;
+		else
+			edatap->link = p;
+		edatap = p;
+		p->link = P;
+		goto loop;
+
+	case AGOK:
+		diag("%s: GOK opcode in %s", pn, TNAME);
+		pc++;
+		goto loop;
+
+	case ATEXT:
+		if(curtext != P) {
+			histtoauto();
+			curtext->to.autom = curauto;
+			curauto = 0;
+		}
+		skip = 0;
+		curtext = p;
+		s = p->from.sym;
+		if(s == S) {
+			diag("%s: no TEXT symbol: %P", pn, p);
+			errorexit();
+		}
+		if(s->type != 0 && s->type != SXREF) {
+			if(p->from.scale & DUPOK) {
+				skip = 1;
+				goto casdef;
+			}
+			diag("%s: redefinition: %s\n%P", pn, s->name, p);
+		}
+		s->type = STEXT;
+		s->value = pc;
+		lastp->link = p;
+		lastp = p;
+		p->pc = pc;
+		pc++;
+		if(textp == P) {
+			textp = p;
+			etextp = p;
+			goto loop;
+		}
+		etextp->pcond = p;
+		etextp = p;
+		goto loop;
+
+	case AMODE:
+		if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE){
+			switch((int)p->from.offset){
+			case 16: case 32: case 64:
+				mode = p->from.offset;
+				break;
+			}
+		}
+		goto loop;
+
+	case AFMOVF:
+	case AFADDF:
+	case AFSUBF:
+	case AFSUBRF:
+	case AFMULF:
+	case AFDIVF:
+	case AFDIVRF:
+	case AFCOMF:
+	case AFCOMFP:
+	case AMOVSS:
+	case AADDSS:
+	case ASUBSS:
+	case AMULSS:
+	case ADIVSS:
+	case ACOMISS:
+	case AUCOMISS:
+		if(skip)
+			goto casdef;
+		if(p->from.type == D_FCONST) {
+			/* size sb 9 max */
+			sprint(literal, "$%lux", ieeedtof(&p->from.ieee));
+			s = lookup(literal, 0);
+			if(s->type == 0) {
+				s->type = SBSS;
+				s->value = 4;
+				t = prg();
+				t->as = ADATA;
+				t->line = p->line;
+				t->from.type = D_EXTERN;
+				t->from.sym = s;
+				t->from.scale = 4;
+				t->to = p->from;
+				if(edatap == P)
+					datap = t;
+				else
+					edatap->link = t;
+				edatap = t;
+				t->link = P;
+			}
+			p->from.type = D_EXTERN;
+			p->from.sym = s;
+			p->from.offset = 0;
+		}
+		goto casdef;
+
+	case AFMOVD:
+	case AFADDD:
+	case AFSUBD:
+	case AFSUBRD:
+	case AFMULD:
+	case AFDIVD:
+	case AFDIVRD:
+	case AFCOMD:
+	case AFCOMDP:
+	case AMOVSD:
+	case AADDSD:
+	case ASUBSD:
+	case AMULSD:
+	case ADIVSD:
+	case ACOMISD:
+	case AUCOMISD:
+		if(skip)
+			goto casdef;
+		if(p->from.type == D_FCONST) {
+			/* size sb 18 max */
+			sprint(literal, "$%lux.%lux",
+				p->from.ieee.l, p->from.ieee.h);
+			s = lookup(literal, 0);
+			if(s->type == 0) {
+				s->type = SBSS;
+				s->value = 8;
+				t = prg();
+				t->as = ADATA;
+				t->line = p->line;
+				t->from.type = D_EXTERN;
+				t->from.sym = s;
+				t->from.scale = 8;
+				t->to = p->from;
+				if(edatap == P)
+					datap = t;
+				else
+					edatap->link = t;
+				edatap = t;
+				t->link = P;
+			}
+			p->from.type = D_EXTERN;
+			p->from.sym = s;
+			p->from.offset = 0;
+		}
+		goto casdef;
+
+	casdef:
+	default:
+		if(skip)
+			nopout(p);
+
+		if(p->to.type == D_BRANCH)
+			p->to.offset += ipc;
+		lastp->link = p;
+		lastp = p;
+		p->pc = pc;
+		pc++;
+		goto loop;
+	}
+	goto loop;
+
+eof:
+	diag("truncated object file: %s", pn);
+}
+
+Sym*
+lookup(char *symb, int v)
+{
+	Sym *s;
+	char *p;
+	long h;
+	int l, c;
+
+	h = v;
+	for(p=symb; c = *p; p++)
+		h = h+h+h + c;
+	l = (p - symb) + 1;
+	if(h < 0)
+		h = ~h;
+	h %= NHASH;
+	for(s = hash[h]; s != S; s = s->link)
+		if(s->version == v)
+		if(memcmp(s->name, symb, l) == 0)
+			return s;
+
+	while(nhunk < sizeof(Sym))
+		gethunk();
+	s = (Sym*)hunk;
+	nhunk -= sizeof(Sym);
+	hunk += sizeof(Sym);
+
+	s->name = malloc(l + 1);
+	memmove(s->name, symb, l);
+
+	s->link = hash[h];
+	s->type = 0;
+	s->version = v;
+	s->value = 0;
+	s->sig = 0;
+	hash[h] = s;
+	nsymbol++;
+	return s;
+}
+
+Prog*
+prg(void)
+{
+	Prog *p;
+
+	while(nhunk < sizeof(Prog))
+		gethunk();
+	p = (Prog*)hunk;
+	nhunk -= sizeof(Prog);
+	hunk += sizeof(Prog);
+
+	*p = zprg;
+	return p;
+}
+
+Prog*
+copyp(Prog *q)
+{
+	Prog *p;
+
+	p = prg();
+	*p = *q;
+	return p;
+}
+
+Prog*
+appendp(Prog *q)
+{
+	Prog *p;
+
+	p = prg();
+	p->link = q->link;
+	q->link = p;
+	p->line = q->line;
+	p->mode = q->mode;
+	return p;
+}
+
+void
+gethunk(void)
+{
+	char *h;
+	long nh;
+
+	nh = NHUNK;
+	if(thunk >= 5L*NHUNK) {
+		nh = 5L*NHUNK;
+		if(thunk >= 25L*NHUNK)
+			nh = 25L*NHUNK;
+	}
+	h = mysbrk(nh);
+	if(h == (char*)-1) {
+		diag("out of memory");
+		errorexit();
+	}
+	hunk = h;
+	nhunk = nh;
+	thunk += nh;
+}
+
+void
+doprof1(void)
+{
+	Sym *s;
+	long n;
+	Prog *p, *q;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f profile 1\n", cputime());
+	Bflush(&bso);
+	s = lookup("__mcount", 0);
+	n = 1;
+	for(p = firstp->link; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			q = prg();
+			q->line = p->line;
+			q->link = datap;
+			datap = q;
+			q->as = ADATA;
+			q->from.type = D_EXTERN;
+			q->from.offset = n*4;
+			q->from.sym = s;
+			q->from.scale = 4;
+			q->to = p->from;
+			q->to.type = D_CONST;
+
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			p->link = q;
+			p = q;
+			p->as = AADDL;
+			p->from.type = D_CONST;
+			p->from.offset = 1;
+			p->to.type = D_EXTERN;
+			p->to.sym = s;
+			p->to.offset = n*4 + 4;
+
+			n += 2;
+			continue;
+		}
+	}
+	q = prg();
+	q->line = 0;
+	q->link = datap;
+	datap = q;
+
+	q->as = ADATA;
+	q->from.type = D_EXTERN;
+	q->from.sym = s;
+	q->from.scale = 4;
+	q->to.type = D_CONST;
+	q->to.offset = n;
+
+	s->type = SBSS;
+	s->value = n*4;
+}
+
+void
+doprof2(void)
+{
+	Sym *s2, *s4;
+	Prog *p, *q, *q2, *ps2, *ps4;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f profile 2\n", cputime());
+	Bflush(&bso);
+
+	if(debug['e']){
+		s2 = lookup("_tracein", 0);
+		s4 = lookup("_traceout", 0);
+	}else{
+		s2 = lookup("_profin", 0);
+		s4 = lookup("_profout", 0);
+	}
+	if(s2->type != STEXT || s4->type != STEXT) {
+		if(debug['e'])
+			diag("_tracein/_traceout not defined %d %d", s2->type, s4->type);
+		else
+			diag("_profin/_profout not defined");
+		return;
+	}
+
+	ps2 = P;
+	ps4 = P;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			if(p->from.sym == s2) {
+				p->from.scale = 1;
+				ps2 = p;
+			}
+			if(p->from.sym == s4) {
+				p->from.scale = 1;
+				ps4 = p;
+			}
+		}
+	}
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			curtext = p;
+
+			if(p->from.scale & NOPROF) {	/* dont profile */
+				for(;;) {
+					q = p->link;
+					if(q == P)
+						break;
+					if(q->as == ATEXT)
+						break;
+					p = q;
+				}
+				continue;
+			}
+
+			/*
+			 * JMPL	profin
+			 */
+			q = prg();
+			q->line = p->line;
+			q->pc = p->pc;
+			q->link = p->link;
+			if(debug['e']){		/* embedded tracing */
+				q2 = prg();
+				p->link = q2;
+				q2->link = q;
+
+				q2->line = p->line;
+				q2->pc = p->pc;
+
+				q2->as = AJMP;
+				q2->to.type = D_BRANCH;
+				q2->to.sym = p->to.sym;
+				q2->pcond = q->link;
+			}else
+				p->link = q;
+			p = q;
+			p->as = ACALL;
+			p->to.type = D_BRANCH;
+			p->pcond = ps2;
+			p->to.sym = s2;
+
+			continue;
+		}
+		if(p->as == ARET) {
+			/*
+			 * RET (default)
+			 */
+			if(debug['e']){		/* embedded tracing */
+				q = prg();
+				q->line = p->line;
+				q->pc = p->pc;
+				q->link = p->link;
+				p->link = q;
+				p = q;
+			}
+			/*
+			 * RET
+			 */
+			q = prg();
+			q->as = ARET;
+			q->from = p->from;
+			q->to = p->to;
+			q->link = p->link;
+			p->link = q;
+
+			/*
+			 * JAL	profout
+			 */
+			p->as = ACALL;
+			p->from = zprg.from;
+			p->to = zprg.to;
+			p->to.type = D_BRANCH;
+			p->pcond = ps4;
+			p->to.sym = s4;
+
+			p = q;
+
+			continue;
+		}
+	}
+}
+
+void
+nuxiinit(void)
+{
+	int i, c;
+
+	for(i=0; i<4; i++) {
+		c = find1(0x04030201L, i+1);
+		if(i < 2)
+			inuxi2[i] = c;
+		if(i < 1)
+			inuxi1[i] = c;
+		inuxi4[i] = c;
+		inuxi8[i] = c;
+		inuxi8[i+4] = c+4;
+		fnuxi4[i] = c;
+		fnuxi8[i] = c;
+		fnuxi8[i+4] = c+4;
+	}
+	if(debug['v']) {
+		Bprint(&bso, "inuxi = ");
+		for(i=0; i<1; i++)
+			Bprint(&bso, "%d", inuxi1[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<2; i++)
+			Bprint(&bso, "%d", inuxi2[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<4; i++)
+			Bprint(&bso, "%d", inuxi4[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<8; i++)
+			Bprint(&bso, "%d", inuxi8[i]);
+		Bprint(&bso, "\nfnuxi = ");
+		for(i=0; i<4; i++)
+			Bprint(&bso, "%d", fnuxi4[i]);
+		Bprint(&bso, " ");
+		for(i=0; i<8; i++)
+			Bprint(&bso, "%d", fnuxi8[i]);
+		Bprint(&bso, "\n");
+	}
+	Bflush(&bso);
+}
+
+int
+find1(long l, int c)
+{
+	char *p;
+	int i;
+
+	p = (char*)&l;
+	for(i=0; i<4; i++)
+		if(*p++ == c)
+			return i;
+	return 0;
+}
+
+int
+find2(long l, int c)
+{
+	short *p;
+	int i;
+
+	p = (short*)&l;
+	for(i=0; i<4; i+=2) {
+		if(((*p >> 8) & 0xff) == c)
+			return i;
+		if((*p++ & 0xff) == c)
+			return i+1;
+	}
+	return 0;
+}
+
+long
+ieeedtof(Ieee *e)
+{
+	int exp;
+	long v;
+
+	if(e->h == 0)
+		return 0;
+	exp = (e->h>>20) & ((1L<<11)-1L);
+	exp -= (1L<<10) - 2L;
+	v = (e->h & 0xfffffL) << 3;
+	v |= (e->l >> 29) & 0x7L;
+	if((e->l >> 28) & 1) {
+		v++;
+		if(v & 0x800000L) {
+			v = (v & 0x7fffffL) >> 1;
+			exp++;
+		}
+	}
+	if(exp <= -126 || exp >= 130)
+		diag("double fp to single fp overflow");
+	v |= ((exp + 126) & 0xffL) << 23;
+	v |= e->h & 0x80000000L;
+	return v;
+}
+
+double
+ieeedtod(Ieee *ieeep)
+{
+	Ieee e;
+	double fr;
+	int exp;
+
+	if(ieeep->h & (1L<<31)) {
+		e.h = ieeep->h & ~(1L<<31);
+		e.l = ieeep->l;
+		return -ieeedtod(&e);
+	}
+	if(ieeep->l == 0 && ieeep->h == 0)
+		return 0;
+	fr = ieeep->l & ((1L<<16)-1L);
+	fr /= 1L<<16;
+	fr += (ieeep->l>>16) & ((1L<<16)-1L);
+	fr /= 1L<<16;
+	fr += (ieeep->h & (1L<<20)-1L) | (1L<<20);
+	fr /= 1L<<21;
+	exp = (ieeep->h>>20) & ((1L<<11)-1L);
+	exp -= (1L<<10) - 2L;
+	return ldexp(fr, exp);
+}
+
+void
+undefsym(Sym *s)
+{
+	int n;
+
+	n = imports;
+	if(s->value != 0)
+		diag("value != 0 on SXREF");
+	if(n >= 1<<Rindex)
+		diag("import index %d out of range", n);
+	s->value = n<<Roffset;
+	s->type = SUNDEF;
+	imports++;
+}
+
+void
+zerosig(char *sp)
+{
+	Sym *s;
+
+	s = lookup(sp, 0);
+	s->sig = 0;
+}
+
+void
+readundefs(char *f, int t)
+{
+	int i, n;
+	Sym *s;
+	Biobuf *b;
+	char *l, buf[256], *fields[64];
+
+	if(f == nil)
+		return;
+	b = Bopen(f, OREAD);
+	if(b == nil){
+		diag("could not open %s: %r", f);
+		errorexit();
+	}
+	while((l = Brdline(b, '\n')) != nil){
+		n = Blinelen(b);
+		if(n >= sizeof(buf)){
+			diag("%s: line too long", f);
+			errorexit();
+		}
+		memmove(buf, l, n);
+		buf[n-1] = '\0';
+		n = getfields(buf, fields, nelem(fields), 1, " \t\r\n");
+		if(n == nelem(fields)){
+			diag("%s: bad format", f);
+			errorexit();
+		}
+		for(i = 0; i < n; i++){
+			s = lookup(fields[i], 0);
+			s->type = SXREF;
+			s->subtype = t;
+			if(t == SIMPORT)
+				nimports++;
+			else
+				nexports++;
+		}
+	}
+	Bterm(b);
+}

+ 1183 - 0
sys/src/cmd/6l/optab.c

@@ -0,0 +1,1183 @@
+#include	"l.h"
+
+uchar	ynone[] =
+{
+	Ynone,	Ynone,	Zlit,	1,
+	0
+};
+uchar	ytext[] =
+{
+	Ymb,	Yi32,	Zpseudo,1,
+	0
+};
+uchar	ynop[] =
+{
+	Ynone,	Ynone,	Zpseudo,1,
+	Ynone,	Yml,	Zpseudo,1,
+	Ynone,	Yrf,	Zpseudo,1,
+	Yml,	Ynone,	Zpseudo,1,
+	Yrf,	Ynone,	Zpseudo,1,
+	0
+};
+uchar	yxorb[] =
+{
+	Yi32,	Yal,	Zib_,	1,
+	Yi32,	Ymb,	Zibo_m,	2,
+	Yrb,	Ymb,	Zr_m,	1,
+	Ymb,	Yrb,	Zm_r,	1,
+	0
+};
+uchar	yxorl[] =
+{
+	Yi8,	Yml,	Zibo_m,	2,
+	Yi32,	Yax,	Zil_,	1,
+	Yi32,	Yml,	Zilo_m,	2,
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	yaddl[] =
+{
+	Yi8,	Yml,	Zibo_m,	2,
+	Yi32,	Yax,	Zil_,	1,
+	Yi32,	Yml,	Zilo_m,	2,
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	yincb[] =
+{
+	Ynone,	Ymb,	Zo_m,	2,
+	0
+};
+uchar	yincw[] =
+{
+	Ynone,	Yml,	Zo_m,	2,
+	0
+};
+uchar	yincl[] =
+{
+	Ynone,	Yml,	Zo_m,	2,
+	0
+};
+uchar	ycmpb[] =
+{
+	Yal,	Yi32,	Z_ib,	1,
+	Ymb,	Yi32,	Zm_ibo,	2,
+	Ymb,	Yrb,	Zm_r,	1,
+	Yrb,	Ymb,	Zr_m,	1,
+	0
+};
+uchar	ycmpl[] =
+{
+	Yml,	Yi8,	Zm_ibo,	2,
+	Yax,	Yi32,	Z_il,	1,
+	Yml,	Yi32,	Zm_ilo,	2,
+	Yml,	Yrl,	Zm_r,	1,
+	Yrl,	Yml,	Zr_m,	1,
+	0
+};
+uchar	yshb[] =
+{
+	Yi1,	Ymb,	Zo_m,	2,
+	Yi32,	Ymb,	Zibo_m,	2,
+	Ycx,	Ymb,	Zo_m,	2,
+	0
+};
+uchar	yshl[] =
+{
+	Yi1,	Yml,	Zo_m,	2,
+	Yi32,	Yml,	Zibo_m,	2,
+	Ycl,	Yml,	Zo_m,	2,
+	Ycx,	Yml,	Zo_m,	2,
+	0
+};
+uchar	ytestb[] =
+{
+	Yi32,	Yal,	Zib_,	1,
+	Yi32,	Ymb,	Zibo_m,	2,
+	Yrb,	Ymb,	Zr_m,	1,
+	Ymb,	Yrb,	Zm_r,	1,
+	0
+};
+uchar	ytestl[] =
+{
+	Yi32,	Yax,	Zil_,	1,
+	Yi32,	Yml,	Zilo_m,	2,
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	ymovb[] =
+{
+	Yrb,	Ymb,	Zr_m,	1,
+	Ymb,	Yrb,	Zm_r,	1,
+	Yi32,	Yrb,	Zib_rp,	1,
+	Yi32,	Ymb,	Zibo_m,	2,
+	0
+};
+uchar	ymbs[] =
+{
+	Ymb,	Ynone,	Zm_o,	2,
+	0
+};
+uchar	ybtl[] =
+{
+	Yi8,	Yml,	Zibo_m,	2,
+	Yrl,	Yml,	Zr_m,	1,
+	0
+};
+uchar	ymovw[] =
+{
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	Yi0,	Yrl,	Zclr,	1,
+	Yi32,	Yrl,	Zil_rp,	1,
+	Yi32,	Yml,	Zilo_m,	2,
+	Yiauto,	Yrl,	Zaut_r,	2,
+	0
+};
+uchar	ymovl[] =
+{
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	Yi0,	Yrl,	Zclr,	1,
+	Yi32,	Yrl,	Zil_rp,	1,
+	Yi32,	Yml,	Zilo_m,	2,
+	Yml,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
+	Ymr,	Yml,	Zr_m_xm,	1,	// MMX MOVD
+	Yml,	Yxr,	Zm_r_xm,	2,	// XMM MOVD (32 bit)
+	Yxr,	Yml,	Zr_m_xm,	2,	// XMM MOVD (32 bit)
+	Yiauto,	Yrl,	Zaut_r,	2,
+	0
+};
+uchar	yret[] =
+{
+	Ynone,	Ynone,	Zo_iw,	1,
+	Yi32,	Ynone,	Zo_iw,	1,
+	0
+};
+uchar	ymovq[] =
+{
+	Yrl,	Yml,	Zr_m,	1,	// 0x89
+	Yml,	Yrl,	Zm_r,	1,	// 0x8b
+	Yi0,	Yrl,	Zclr,	1,	// 0x31
+	Ys32,	Yrl,	Zilo_m,	2,	// 32 bit signed 0xc7,(0)
+	Yi64,	Yrl,	Ziq_rp,	1,	// 0xb8 -- 32/64 bit immediate
+	Yi32,	Yml,	Zilo_m,	2,	// 0xc7,(0)
+	Ym,	Ymr,	Zm_r_xm_nr,	1,	// MMX MOVQ (shorter encoding)
+	Ymr,	Ym,	Zr_m_xm_nr,	1,	// MMX MOVQ
+	Ymm,	Ymr,	Zm_r_xm,	1,	// MMX MOVD
+	Ymr,	Ymm,	Zr_m_xm,	1,	// MMX MOVD
+	Yxr,	Ymr,	Zm_r_xm_nr,	2,	// MOVDQ2Q
+	Yxr,	Ym,	Zr_m_xm_nr,	2,	// MOVQ xmm store
+	Yml,	Yxr,	Zm_r_xm,	2,	// MOVD xmm load
+	Yxr,	Yml,	Zr_m_xm,	2,	// MOVD xmm store
+	Yiauto,	Yrl,	Zaut_r,	2,	// built-in LEAQ
+	0
+};
+uchar	ym_rl[] =
+{
+	Ym,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	yrl_m[] =
+{
+	Yrl,	Ym,	Zr_m,	1,
+	0
+};
+uchar	ymb_rl[] =
+{
+	Ymb,	Yrl,	Zmb_r,	1,
+	0
+};
+uchar	yml_rl[] =
+{
+	Yml,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	yrl_ml[] =
+{
+	Yrl,	Yml,	Zr_m,	1,
+	0
+};
+uchar	yml_mb[] =
+{
+	Yrb,	Ymb,	Zr_m,	1,
+	Ymb,	Yrb,	Zm_r,	1,
+	0
+};
+uchar	yrb_mb[] =
+{
+	Yrb,	Ymb,	Zr_m,	1,
+	0
+};
+uchar	yml_ml[] =
+{
+	Yrl,	Yml,	Zr_m,	1,
+	Yml,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	ydivl[] =
+{
+	Yml,	Ynone,	Zm_o,	2,
+	0
+};
+uchar	ydivb[] =
+{
+	Ymb,	Ynone,	Zm_o,	2,
+	0
+};
+uchar	yimul[] =
+{
+	Yml,	Ynone,	Zm_o,	2,
+	Yi8,	Yrl,	Zib_rr,	1,
+	Yi32,	Yrl,	Zil_rr,	1,
+	Yml,	Yrl,	Zm_r,	2,
+	0
+};
+uchar	ybyte[] =
+{
+	Yi64,	Ynone,	Zbyte,	1,
+	0
+};
+uchar	yin[] =
+{
+	Yi32,	Ynone,	Zib_,	1,
+	Ynone,	Ynone,	Zlit,	1,
+	0
+};
+uchar	yint[] =
+{
+	Yi32,	Ynone,	Zib_,	1,
+	0
+};
+uchar	ypushl[] =
+{
+	Yrl,	Ynone,	Zrp_,	1,
+	Ym,	Ynone,	Zm_o,	2,
+	Yi8,	Ynone,	Zib_,	1,
+	Yi32,	Ynone,	Zil_,	1,
+	0
+};
+uchar	ypopl[] =
+{
+	Ynone,	Yrl,	Z_rp,	1,
+	Ynone,	Ym,	Zo_m,	2,
+	0
+};
+uchar	yscond[] =
+{
+	Ynone,	Ymb,	Zo_m,	2,
+	0
+};
+uchar	yjcond[] =
+{
+	Ynone,	Ybr,	Zbr,	1,
+	0
+};
+uchar	yloop[] =
+{
+	Ynone,	Ybr,	Zloop,	1,
+	0
+};
+uchar	ycall[] =
+{
+	Ynone,	Yml,	Zo_m64,	2,
+	Ynone,	Ybr,	Zcall,	1,
+	0
+};
+uchar	yjmp[] =
+{
+	Ynone,	Yml,	Zo_m64,	2,
+	Ynone,	Ybr,	Zjmp,	1,
+	0
+};
+
+uchar	yfmvd[] =
+{
+	Ym,	Yf0,	Zm_o,	2,
+	Yf0,	Ym,	Zo_m,	2,
+	Yrf,	Yf0,	Zm_o,	2,
+	Yf0,	Yrf,	Zo_m,	2,
+	0
+};
+uchar	yfmvdp[] =
+{
+	Yf0,	Ym,	Zo_m,	2,
+	Yf0,	Yrf,	Zo_m,	2,
+	0
+};
+uchar	yfmvf[] =
+{
+	Ym,	Yf0,	Zm_o,	2,
+	Yf0,	Ym,	Zo_m,	2,
+	0
+};
+uchar	yfmvx[] =
+{
+	Ym,	Yf0,	Zm_o,	2,
+	0
+};
+uchar	yfmvp[] =
+{
+	Yf0,	Ym,	Zo_m,	2,
+	0
+};
+uchar	yfadd[] =
+{
+	Ym,	Yf0,	Zm_o,	2,
+	Yrf,	Yf0,	Zm_o,	2,
+	Yf0,	Yrf,	Zo_m,	2,
+	0
+};
+uchar	yfaddp[] =
+{
+	Yf0,	Yrf,	Zo_m,	2,
+	0
+};
+uchar	yfxch[] =
+{
+	Yf0,	Yrf,	Zo_m,	2,
+	Yrf,	Yf0,	Zm_o,	2,
+	0
+};
+uchar	ycompp[] =
+{
+	Yf0,	Yrf,	Zo_m,	2,	/* botch is really f0,f1 */
+	0
+};
+uchar	ystsw[] =
+{
+	Ynone,	Ym,	Zo_m,	2,
+	Ynone,	Yax,	Zlit,	1,
+	0
+};
+uchar	ystcw[] =
+{
+	Ynone,	Ym,	Zo_m,	2,
+	Ym,	Ynone,	Zm_o,	2,
+	0
+};
+uchar	ysvrs[] =
+{
+	Ynone,	Ym,	Zo_m,	2,
+	Ym,	Ynone,	Zm_o,	2,
+	0
+};
+uchar	ymm[] = 
+{
+	Ymm,	Ymr,	Zm_r_xm,	1,
+	Yxm,	Yxr,	Zm_r_xm,	2,
+	0
+};
+uchar	yxm[] = 
+{
+	Yxm,	Yxr,	Zm_r_xm,	1,
+	0
+};
+uchar	yxcvm1[] = 
+{
+	Yxm,	Yxr,	Zm_r_xm,	2,
+	Yxm,	Ymr,	Zm_r_xm,	2,
+	0
+};
+uchar	yxcvm2[] =
+{
+	Yxm,	Yxr,	Zm_r_xm,	2,
+	Ymm,	Yxr,	Zm_r_xm,	2,
+	0
+};
+uchar	yxmq[] = 
+{
+	Yxm,	Yxr,	Zm_r_xm,	2,
+	0
+};
+uchar	yxr[] = 
+{
+	Yxr,	Yxr,	Zm_r_xm,	1,
+	0
+};
+uchar	yxr_ml[] =
+{
+	Yxr,	Yml,	Zr_m_xm,	1,
+	0
+};
+uchar	ymr[] =
+{
+	Ymr,	Ymr,	Zm_r,	1,
+	0
+};
+uchar	ymr_ml[] =
+{
+	Ymr,	Yml,	Zr_m_xm,	1,
+	0
+};
+uchar	yxcmp[] =
+{
+	Yxm,	Yxr, Zm_r_xm,	1,
+	0
+};
+uchar	yxcmpi[] =
+{
+	Yxm,	Yxr, Zm_r_i_xm,	2,
+	0
+};
+uchar	yxmov[] =
+{
+	Yxm,	Yxr,	Zm_r_xm,	1,
+	Yxr,	Yxm,	Zr_m_xm,	1,
+	0
+};
+uchar	yxcvfl[] = 
+{
+	Yxm,	Yrl,	Zm_r_xm,	1,
+	0
+};
+uchar	yxcvlf[] =
+{
+	Yml,	Yxr,	Zm_r_xm,	1,
+	0
+};
+uchar	yxcvfq[] = 
+{
+	Yxm,	Yrl,	Zm_r_xm,	2,
+	0
+};
+uchar	yxcvqf[] =
+{
+	Yml,	Yxr,	Zm_r_xm,	2,
+	0
+};
+uchar	yps[] = 
+{
+	Ymm,	Ymr,	Zm_r_xm,	1,
+	Yi8,	Ymr,	Zibo_m_xm,	2,
+	Yxm,	Yxr,	Zm_r_xm,	2,
+	Yi8,	Yxr,	Zibo_m_xm,	3,
+	0
+};
+uchar	yxrrl[] =
+{
+	Yxr,	Yrl,	Zm_r,	1,
+	0
+};
+uchar	ymfp[] =
+{
+	Ymm,	Ymr,	Zm_r_3d,	1,
+	0,
+};
+uchar	ymrxr[] =
+{
+	Ymr,	Yxr,	Zm_r,	1,
+	Yxm,	Yxr,	Zm_r_xm,	1,
+	0
+};
+uchar	ymshuf[] =
+{
+	Ymm,	Ymr,	Zibm_r,	1,
+	0
+};
+uchar	yxshuf[] =
+{
+	Yxm,	Yxr,	Zibm_r,	1,
+	0
+};
+uchar	yextrw[] =
+{
+	Yxr,	Yrl,	Zibm_r,	1,
+	0
+};
+uchar	ypsdq[] =
+{
+	Yi8,	Yxr,	Zibo_m,	2,
+	0
+};
+uchar	ymskb[] =
+{
+	Yxr,	Yrl,	Zm_r_xm,	2,
+	Ymr,	Yrl,	Zm_r_xm,	1,
+	0
+};
+
+Optab optab[] =
+/*	as, ytab, andproto, opcode */
+{
+	{ AXXX },
+	{ AAAA,		ynone,	P32, 0x37 },
+	{ AAAD,		ynone,	P32, 0xd5,0x0a },
+	{ AAAM,		ynone,	P32, 0xd4,0x0a },
+	{ AAAS,		ynone,	P32, 0x3f },
+	{ AADCB,	yxorb,	Pb, 0x14,0x80,(02),0x10,0x10 },
+	{ AADCL,	yxorl,	Px, 0x83,(02),0x15,0x81,(02),0x11,0x13 },
+	{ AADCQ,	yxorl,	Pw, 0x83,(02),0x15,0x81,(02),0x11,0x13 },
+	{ AADCW,	yxorl,	Pe, 0x83,(02),0x15,0x81,(02),0x11,0x13 },
+	{ AADDB,	yxorb,	Pb, 0x04,0x80,(00),0x00,0x02 },
+	{ AADDL,	yaddl,	Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+	{ AADDPD,	yxm,	Pq, 0x58 },
+	{ AADDPS,	yxm,	Pm, 0x58 },
+	{ AADDQ,	yaddl,	Pw, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+	{ AADDSD,	yxm,	Pf2, 0x58 },
+	{ AADDSS,	yxm,	Pf3, 0x58 },
+	{ AADDW,	yaddl,	Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+	{ AADJSP },
+	{ AANDB,	yxorb,	Pb, 0x24,0x80,(04),0x20,0x22 },
+	{ AANDL,	yxorl,	Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
+	{ AANDNPD,	yxm,	Pq, 0x55 },
+	{ AANDNPS,	yxm,	Pm, 0x55 },
+	{ AANDPD,	yxm,	Pq, 0x54 },
+	{ AANDPS,	yxm,	Pq, 0x54 },
+	{ AANDQ,	yxorl,	Pw, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
+	{ AANDW,	yxorl,	Pe, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
+	{ AARPL,	yrl_ml,	P32, 0x63 },
+	{ ABOUNDL,	yrl_m,	P32, 0x62 },
+	{ ABOUNDW,	yrl_m,	Pe, 0x62 },
+	{ ABSFL,	yml_rl,	Pm, 0xbc },
+	{ ABSFQ,	yml_rl,	Pw, 0x0f,0xbc },
+	{ ABSFW,	yml_rl,	Pq, 0xbc },
+	{ ABSRL,	yml_rl,	Pm, 0xbd },
+	{ ABSRQ,	yml_rl,	Pw, 0x0f,0xbd },
+	{ ABSRW,	yml_rl,	Pq, 0xbd },
+	{ ABTCL,	ybtl,	Pm, 0xba,(07),0xbb },
+	{ ABTCQ,	ybtl,	Pw, 0x0f,0xba,(07),0x0f,0xbb },
+	{ ABTCW,	ybtl,	Pq, 0xba,(07),0xbb },
+	{ ABTL,		ybtl,	Pm, 0xba,(04),0xa3 },
+	{ ABTQ,		ybtl,	Pw, 0x0f,0xba,(04),0x0f,0xa3},
+	{ ABTRL,	ybtl,	Pm, 0xba,(06),0xb3 },
+	{ ABTRQ,	ybtl,	Pw, 0x0f,0xba,(06),0x0f,0xb3 },
+	{ ABTRW,	ybtl,	Pq, 0xba,(06),0xb3 },
+	{ ABTSL,	ybtl,	Pm, 0xba,(05),0xab  },
+	{ ABTSQ,	ybtl,	Pw, 0x0f,0xba,(05),0x0f,0xab },
+	{ ABTSW,	ybtl,	Pq, 0xba,(05),0xab  },
+	{ ABTW,		ybtl,	Pq, 0xba,(04),0xa3 },
+	{ ABYTE,	ybyte,	Px, 1 },
+	{ ACALL,	ycall,	Px, 0xff,(02),0xe8 },
+	{ ACDQ,		ynone,	Px, 0x99 },
+	{ ACLC,		ynone,	Px, 0xf8 },
+	{ ACLD,		ynone,	Px, 0xfc },
+	{ ACLI,		ynone,	Px, 0xfa },
+	{ ACLTS,	ynone,	Pm, 0x06 },
+	{ ACMC,		ynone,	Px, 0xf5 },
+	{ ACMOVLCC,	yml_rl,	Pm, 0x43 },
+	{ ACMOVLCS,	yml_rl,	Pm, 0x42 },
+	{ ACMOVLEQ,	yml_rl,	Pm, 0x44 },
+	{ ACMOVLGE,	yml_rl,	Pm, 0x4d },
+	{ ACMOVLGT,	yml_rl,	Pm, 0x4f },
+	{ ACMOVLHI,	yml_rl,	Pm, 0x47 },
+	{ ACMOVLLE,	yml_rl,	Pm, 0x4e },
+	{ ACMOVLLS,	yml_rl,	Pm, 0x46 },
+	{ ACMOVLLT,	yml_rl,	Pm, 0x4c },
+	{ ACMOVLMI,	yml_rl,	Pm, 0x48 },
+	{ ACMOVLNE,	yml_rl,	Pm, 0x45 },
+	{ ACMOVLOC,	yml_rl,	Pm, 0x41 },
+	{ ACMOVLOS,	yml_rl,	Pm, 0x40 },
+	{ ACMOVLPC,	yml_rl,	Pm, 0x4b },
+	{ ACMOVLPL,	yml_rl,	Pm, 0x49 },
+	{ ACMOVLPS,	yml_rl,	Pm, 0x4a },
+	{ ACMOVQCC,	yml_rl,	Pw, 0x0f,0x43 },
+	{ ACMOVQCS,	yml_rl,	Pw, 0x0f,0x42 },
+	{ ACMOVQEQ,	yml_rl,	Pw, 0x0f,0x44 },
+	{ ACMOVQGE,	yml_rl,	Pw, 0x0f,0x4d },
+	{ ACMOVQGT,	yml_rl,	Pw, 0x0f,0x4f },
+	{ ACMOVQHI,	yml_rl,	Pw, 0x0f,0x47 },
+	{ ACMOVQLE,	yml_rl,	Pw, 0x0f,0x4e },
+	{ ACMOVQLS,	yml_rl,	Pw, 0x0f,0x46 },
+	{ ACMOVQLT,	yml_rl,	Pw, 0x0f,0x4c },
+	{ ACMOVQMI,	yml_rl,	Pw, 0x0f,0x48 },
+	{ ACMOVQNE,	yml_rl,	Pw, 0x0f,0x45 },
+	{ ACMOVQOC,	yml_rl,	Pw, 0x0f,0x41 },
+	{ ACMOVQOS,	yml_rl,	Pw, 0x0f,0x40 },
+	{ ACMOVQPC,	yml_rl,	Pw, 0x0f,0x4b },
+	{ ACMOVQPL,	yml_rl,	Pw, 0x0f,0x49 },
+	{ ACMOVQPS,	yml_rl,	Pw, 0x0f,0x4a },
+	{ ACMOVWCC,	yml_rl,	Pq, 0x43 },
+	{ ACMOVWCS,	yml_rl,	Pq, 0x42 },
+	{ ACMOVWEQ,	yml_rl,	Pq, 0x44 },
+	{ ACMOVWGE,	yml_rl,	Pq, 0x4d },
+	{ ACMOVWGT,	yml_rl,	Pq, 0x4f },
+	{ ACMOVWHI,	yml_rl,	Pq, 0x47 },
+	{ ACMOVWLE,	yml_rl,	Pq, 0x4e },
+	{ ACMOVWLS,	yml_rl,	Pq, 0x46 },
+	{ ACMOVWLT,	yml_rl,	Pq, 0x4c },
+	{ ACMOVWMI,	yml_rl,	Pq, 0x48 },
+	{ ACMOVWNE,	yml_rl,	Pq, 0x45 },
+	{ ACMOVWOC,	yml_rl,	Pq, 0x41 },
+	{ ACMOVWOS,	yml_rl,	Pq, 0x40 },
+	{ ACMOVWPC,	yml_rl,	Pq, 0x4b },
+	{ ACMOVWPL,	yml_rl,	Pq, 0x49 },
+	{ ACMOVWPS,	yml_rl,	Pq, 0x4a },
+	{ ACMPB,	ycmpb,	Pb, 0x3c,0x80,(07),0x38,0x3a },
+	{ ACMPL,	ycmpl,	Px, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },
+	{ ACMPPD,	yxcmpi,	Px, Pe,0xc2 },
+	{ ACMPPS,	yxcmpi,	Pm, 0xc2,0 },
+	{ ACMPQ,	ycmpl,	Pw, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },
+	{ ACMPSB,	ynone,	Pb, 0xa6 },
+	{ ACMPSD,	yxcmpi,	Px, Pf2,0xc2 },
+	{ ACMPSL,	ynone,	Px, 0xa7 },
+	{ ACMPSQ,	ynone,	Pw, 0xa7 },
+	{ ACMPSS,	yxcmpi,	Px, Pf3,0xc2 },
+	{ ACMPSW,	ynone,	Pe, 0xa7 },
+	{ ACMPW,	ycmpl,	Pe, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },
+	{ ACOMISD,	yxcmp,	Pe, 0x2f },
+	{ ACOMISS,	yxcmp,	Pm, 0x2f },
+	{ ACPUID,	ynone,	Pm, 0xa2 },
+	{ ACVTPL2PD,	yxcvm2,	Px, Pf3,0xe6,Pe,0x2a },
+	{ ACVTPL2PS,	yxcvm2,	Pm, 0x5b,0,0x2a,0, },
+	{ ACVTPD2PL,	yxcvm1,	Px, Pf2,0xe6,Pe,0x2d },
+	{ ACVTPD2PS,	yxm,	Pe, 0x5a },
+	{ ACVTPS2PL,	yxcvm1, Px, Pe,0x5b,Pm,0x2d },
+	{ ACVTPS2PD,	yxm,	Pm, 0x5a },
+	{ API2FW,	ymfp,	Px, 0x0c },
+	{ ACVTSD2SL,	yxcvfl, Pf2, 0x2d },
+	{ ACVTSD2SQ,	yxcvfq, Pw, Pf2,0x2d },
+	{ ACVTSD2SS,	yxm,	Pf2, 0x5a },
+	{ ACVTSL2SD,	yxcvlf, Pf2, 0x2a },
+	{ ACVTSQ2SD,	yxcvqf, Pw, Pf2,0x2a },
+	{ ACVTSL2SS,	yxcvlf, Pf3, 0x2a },
+	{ ACVTSQ2SS,	yxcvqf, Pw, Pf3,0x2a },
+	{ ACVTSS2SD,	yxm,	Pf3, 0x5a },
+	{ ACVTSS2SL,	yxcvfl, Pf3, 0x2d },
+	{ ACVTSS2SQ,	yxcvfq, Pw, Pf3,0x2d },
+	{ ACVTTPD2PL,	yxcvm1,	Px, Pe,0xe6,Pe,0x2c },
+	{ ACVTTPS2PL,	yxcvm1,	Px, Pf3,0x5b,Pm,0x2c },
+	{ ACVTTSD2SL,	yxcvfl, Pf2, 0x2c },
+	{ ACVTTSD2SQ,	yxcvfq, Pw, Pf2,0x2c },
+	{ ACVTTSS2SL,	yxcvfl,	Pf3, 0x2c },
+	{ ACVTTSS2SQ,	yxcvfq, Pw, Pf3,0x2c },
+	{ ACWD,		ynone,	Pe, 0x99 },
+	{ ACQO,		ynone,	Pw, 0x99 },
+	{ ADAA,		ynone,	P32, 0x27 },
+	{ ADAS,		ynone,	P32, 0x2f },
+	{ ADATA },
+	{ ADECB,	yincb,	Pb, 0xfe,(01) },
+	{ ADECL,	yincl,	Px, 0xff,(01) },
+	{ ADECQ,	yincl,	Pw, 0xff,(01) },
+	{ ADECW,	yincw,	Pe, 0xff,(01) },
+	{ ADIVB,	ydivb,	Pb, 0xf6,(06) },
+	{ ADIVL,	ydivl,	Px, 0xf7,(06) },
+	{ ADIVPD,	yxm,	Pe, 0x5e },
+	{ ADIVPS,	yxm,	Pm, 0x5e },
+	{ ADIVQ,	ydivl,	Pw, 0xf7,(06) },
+	{ ADIVSD,	yxm,	Pf2, 0x5e },
+	{ ADIVSS,	yxm,	Pf3, 0x5e },
+	{ ADIVW,	ydivl,	Pe, 0xf7,(06) },
+	{ AEMMS,	ynone,	Pm, 0x77 },
+	{ AENTER },				/* botch */
+	{ AFXRSTOR,	ysvrs,	Pm, 0xae,(01),0xae,(01) },
+	{ AFXSAVE,	ysvrs,	Pm, 0xae,(00),0xae,(00) },
+	{ AFXRSTOR64,	ysvrs,	Pw, 0x0f,0xae,(01),0x0f,0xae,(01) },
+	{ AFXSAVE64,	ysvrs,	Pw, 0x0f,0xae,(00),0x0f,0xae,(00) },
+	{ AGLOBL },
+	{ AGOK },
+	{ AHISTORY },
+	{ AHLT,		ynone,	Px, 0xf4 },
+	{ AIDIVB,	ydivb,	Pb, 0xf6,(07) },
+	{ AIDIVL,	ydivl,	Px, 0xf7,(07) },
+	{ AIDIVQ,	ydivl,	Pw, 0xf7,(07) },
+	{ AIDIVW,	ydivl,	Pe, 0xf7,(07) },
+	{ AIMULB,	ydivb,	Pb, 0xf6,(05) },
+	{ AIMULL,	yimul,	Px, 0xf7,(05),0x6b,0x69,Pm,0xaf },
+	{ AIMULQ,	yimul,	Pw, 0xf7,(05),0x6b,0x69,Pm,0xaf },
+	{ AIMULW,	yimul,	Pe, 0xf7,(05),0x6b,0x69,Pm,0xaf },
+	{ AINB,		yin,	Pb, 0xe4,0xec },
+	{ AINCB,	yincb,	Pb, 0xfe,(00) },
+	{ AINCL,	yincl,	Px, 0xff,(00) },
+	{ AINCQ,	yincl,	Pw, 0xff,(00) },
+	{ AINCW,	yincw,	Pe, 0xff,(00) },
+	{ AINL,		yin,	Px, 0xe5,0xed },
+	{ AINSB,	ynone,	Pb, 0x6c },
+	{ AINSL,	ynone,	Px, 0x6d },
+	{ AINSW,	ynone,	Pe, 0x6d },
+	{ AINT,		yint,	Px, 0xcd },
+	{ AINTO,	ynone,	P32, 0xce },
+	{ AINW,		yin,	Pe, 0xe5,0xed },
+	{ AIRETL,	ynone,	Px, 0xcf },
+	{ AIRETQ,	ynone,	Pw, 0xcf },
+	{ AIRETW,	ynone,	Pe, 0xcf },
+	{ AJCC,		yjcond,	Px, 0x73,0x83,(00) },
+	{ AJCS,		yjcond,	Px, 0x72,0x82 },
+	{ AJCXZ,	yloop,	Px, 0xe3 },
+	{ AJEQ,		yjcond,	Px, 0x74,0x84 },
+	{ AJGE,		yjcond,	Px, 0x7d,0x8d },
+	{ AJGT,		yjcond,	Px, 0x7f,0x8f },
+	{ AJHI,		yjcond,	Px, 0x77,0x87 },
+	{ AJLE,		yjcond,	Px, 0x7e,0x8e },
+	{ AJLS,		yjcond,	Px, 0x76,0x86 },
+	{ AJLT,		yjcond,	Px, 0x7c,0x8c },
+	{ AJMI,		yjcond,	Px, 0x78,0x88 },
+	{ AJMP,		yjmp,	Px, 0xff,(04),0xeb,0xe9 },
+	{ AJNE,		yjcond,	Px, 0x75,0x85 },
+	{ AJOC,		yjcond,	Px, 0x71,0x81,(00) },
+	{ AJOS,		yjcond,	Px, 0x70,0x80,(00) },
+	{ AJPC,		yjcond,	Px, 0x7b,0x8b },
+	{ AJPL,		yjcond,	Px, 0x79,0x89 },
+	{ AJPS,		yjcond,	Px, 0x7a,0x8a },
+	{ ALAHF,	ynone,	Px, 0x9f },
+	{ ALARL,	yml_rl,	Pm, 0x02 },
+	{ ALARW,	yml_rl,	Pq, 0x02 },
+	{ ALDMXCSR,	ysvrs,	Pm, 0xae,(02),0xae,(02) },
+	{ ALEAL,	ym_rl,	Px, 0x8d },
+	{ ALEAQ,	ym_rl,	Pw, 0x8d },
+	{ ALEAVEL,	ynone,	P32, 0xc9 },
+	{ ALEAVEQ,	ynone,	Py, 0xc9 },
+	{ ALEAVEW,	ynone,	Pe, 0xc9 },
+	{ ALEAW,	ym_rl,	Pe, 0x8d },
+	{ ALOCK,	ynone,	Px, 0xf0 },
+	{ ALODSB,	ynone,	Pb, 0xac },
+	{ ALODSL,	ynone,	Px, 0xad },
+	{ ALODSQ,	ynone,	Pw, 0xad },
+	{ ALODSW,	ynone,	Pe, 0xad },
+	{ ALONG,	ybyte,	Px, 4 },
+	{ ALOOP,	yloop,	Px, 0xe2 },
+	{ ALOOPEQ,	yloop,	Px, 0xe1 },
+	{ ALOOPNE,	yloop,	Px, 0xe0 },
+	{ ALSLL,	yml_rl,	Pm, 0x03  },
+	{ ALSLW,	yml_rl,	Pq, 0x03  },
+	{ AMASKMOVOU,	yxr,	Pe, 0xf7 },
+	{ AMASKMOVQ,	ymr,	Pm, 0xf7 },
+	{ AMAXPD,	yxm,	Pe, 0x5f },
+	{ AMAXPS,	yxm,	Pm, 0x5f },
+	{ AMAXSD,	yxm,	Pf2, 0x5f },
+	{ AMAXSS,	yxm,	Pf3, 0x5f },
+	{ AMINPD,	yxm,	Pe, 0x5d },
+	{ AMINPS,	yxm,	Pm, 0x5d },
+	{ AMINSD,	yxm,	Pf2, 0x5d },
+	{ AMINSS,	yxm,	Pf3, 0x5d },
+	{ AMOVAPD,	yxmov,	Pe, 0x28,0x29 },
+	{ AMOVAPS,	yxmov,	Pm, 0x28,0x29 },
+	{ AMOVB,	ymovb,	Pb, 0x88,0x8a,0xb0,0xc6,(00) },
+	{ AMOVBLSX,	ymb_rl,	Pm, 0xbe },
+	{ AMOVBLZX,	ymb_rl,	Pm, 0xb6 },
+	{ AMOVBQSX,	ymb_rl,	Pw, 0x0f,0xbe },
+	{ AMOVBQZX,	ymb_rl,	Pw, 0x0f,0xb6 },
+	{ AMOVBWSX,	ymb_rl,	Pq, 0xbe },
+	{ AMOVBWZX,	ymb_rl,	Pq, 0xb6 },
+	{ AMOVO,	yxmov,	Pe, 0x6f,0x7f },
+	{ AMOVOU,	yxmov,	Pf2, 0x6f,0x7f },
+	{ AMOVHLPS,	yxr,	Pm, 0x12 },
+	{ AMOVHPD,	yxmov,	Pe, 0x16,0x17 },
+	{ AMOVHPS,	yxmov,	Pm, 0x16,0x17 },
+	{ AMOVL,	ymovl,	Px, 0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e },
+	{ AMOVLHPS,	yxr,	Pm, 0x16 },
+	{ AMOVLPD,	yxmov,	Pe, 0x12,0x13 },
+	{ AMOVLPS,	yxmov,	Pm, 0x12,0x13 },
+	{ AMOVLQSX,	yml_rl,	Pw, 0x63 },
+	{ AMOVLQZX,	yml_rl,	Px, 0x63 },
+	{ AMOVMSKPD,	yxrrl,	Pq, 0x50 },
+	{ AMOVMSKPS,	yxrrl,	Pm, 0x50 },
+	{ AMOVNTO,	yxr_ml,	Pe, 0xe7 },
+	{ AMOVNTPD,	yxr_ml,	Pe, 0x2b },
+	{ AMOVNTPS,	yxr_ml,	Pm, 0x2b },
+	{ AMOVNTQ,	ymr_ml,	Pm, 0xe7 },
+	{ AMOVQ,	ymovq,	Pw, 0x89,0x8b,0x31,0xc7,(00),0xb8,0xc7,(00),0x6f,0x7f,0x6e,0x7e,Pf2,0xd6,Pe,0xd6,Pe,0x6e,Pe,0x7e },
+	{ AMOVQOZX,	ymrxr,	Pf3, 0xd6,0x7e },
+	{ AMOVSB,	ynone,	Pb, 0xa4 },
+	{ AMOVSD,	yxmov,	Pf2, 0x10,0x11 },
+	{ AMOVSL,	ynone,	Px, 0xa5 },
+	{ AMOVSQ,	ynone,	Pw, 0xa5 },
+	{ AMOVSS,	yxmov,	Pf3, 0x10,0x11 },
+	{ AMOVSW,	ynone,	Pe, 0xa5 },
+	{ AMOVUPD,	yxmov,	Pe, 0x10,0x11 },
+	{ AMOVUPS,	yxmov,	Pm, 0x10,0x11 },
+	{ AMOVW,	ymovw,	Pe, 0x89,0x8b,0x31,0xb8,0xc7,(00) },
+	{ AMOVWLSX,	yml_rl,	Pm, 0xbf },
+	{ AMOVWLZX,	yml_rl,	Pm, 0xb7 },
+	{ AMOVWQSX,	yml_rl,	Pw, 0x0f,0xbf },
+	{ AMOVWQZX,	yml_rl,	Pw, 0x0f,0xb7 },
+	{ AMULB,	ydivb,	Pb, 0xf6,(04) },
+	{ AMULL,	ydivl,	Px, 0xf7,(04) },
+	{ AMULPD,	yxm,	Pe, 0x59 },
+	{ AMULPS,	yxm,	Ym, 0x59 },
+	{ AMULQ,	ydivl,	Pw, 0xf7,(04) },
+	{ AMULSD,	yxm,	Pf2, 0x59 },
+	{ AMULSS,	yxm,	Pf3, 0x59 },
+	{ AMULW,	ydivl,	Pe, 0xf7,(04) },
+	{ ANAME },
+	{ ANEGB,	yscond,	Pb, 0xf6,(03) },
+	{ ANEGL,	yscond,	Px, 0xf7,(03) },
+	{ ANEGQ,	yscond,	Pw, 0xf7,(03) },
+	{ ANEGW,	yscond,	Pe, 0xf7,(03) },
+	{ ANOP,		ynop,	Px, 0,0 },
+	{ ANOTB,	yscond,	Pb, 0xf6,(02) },
+	{ ANOTL,	yscond,	Px, 0xf7,(02) },
+	{ ANOTQ,	yscond,	Pw, 0xf7,(02) },
+	{ ANOTW,	yscond,	Pe, 0xf7,(02) },
+	{ AORB,		yxorb,	Pb, 0x0c,0x80,(01),0x08,0x0a },
+	{ AORL,		yxorl,	Px, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },
+	{ AORPD,	yxm,	Pq, 0x56 },
+	{ AORPS,	yxm,	Pm, 0x56 },
+	{ AORQ,		yxorl,	Pw, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },
+	{ AORW,		yxorl,	Pe, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },
+	{ AOUTB,	yin,	Pb, 0xe6,0xee },
+	{ AOUTL,	yin,	Px, 0xe7,0xef },
+	{ AOUTSB,	ynone,	Pb, 0x6e },
+	{ AOUTSL,	ynone,	Px, 0x6f },
+	{ AOUTSW,	ynone,	Pe, 0x6f },
+	{ AOUTW,	yin,	Pe, 0xe7,0xef },
+	{ APACKSSLW,	ymm,	Py, 0x6b,Pe,0x6b },
+	{ APACKSSWB,	ymm,	Py, 0x63,Pe,0x63 },
+	{ APACKUSWB,	ymm,	Py, 0x67,Pe,0x67 },
+	{ APADDB,	ymm,	Py, 0xfc,Pe,0xfc },
+	{ APADDL,	ymm,	Py, 0xfe,Pe,0xfe },
+	{ APADDQ,	yxm,	Pe, 0xd4 },
+	{ APADDSB,	ymm,	Py, 0xec,Pe,0xec },
+	{ APADDSW,	ymm,	Py, 0xed,Pe,0xed },
+	{ APADDUSB,	ymm,	Py, 0xdc,Pe,0xdc },
+	{ APADDUSW,	ymm,	Py, 0xdd,Pe,0xdd },
+	{ APADDW,	ymm,	Py, 0xfd,Pe,0xfd },
+	{ APAND,	ymm,	Py, 0xdb,Pe,0xdb },
+	{ APANDN,	ymm,	Py, 0xdf,Pe,0xdf },
+	{ APAVGB,	ymm,	Py, 0xe0,Pe,0xe0 },
+	{ APAVGW,	ymm,	Py, 0xe3,Pe,0xe3 },
+	{ APCMPEQB,	ymm,	Py, 0x74,Pe,0x74 },
+	{ APCMPEQL,	ymm,	Py, 0x76,Pe,0x76 },
+	{ APCMPEQW,	ymm,	Py, 0x75,Pe,0x75 },
+	{ APCMPGTB,	ymm,	Py, 0x64,Pe,0x64 },
+	{ APCMPGTL,	ymm,	Py, 0x66,Pe,0x66 },
+	{ APCMPGTW,	ymm,	Py, 0x65,Pe,0x65 },
+	{ APEXTRW,	yextrw,	Pq, 0xc5 },
+	{ APF2IL,	ymfp,	Px, 0x1d },
+	{ APF2IW,	ymfp,	Px, 0x1c },
+	{ API2FL,	ymfp,	Px, 0x0d },
+	{ APFACC,	ymfp,	Px, 0xae },
+	{ APFADD,	ymfp,	Px, 0x9e },
+	{ APFCMPEQ,	ymfp,	Px, 0xb0 },
+	{ APFCMPGE,	ymfp,	Px, 0x90 },
+	{ APFCMPGT,	ymfp,	Px, 0xa0 },
+	{ APFMAX,	ymfp,	Px, 0xa4 },
+	{ APFMIN,	ymfp,	Px, 0x94 },
+	{ APFMUL,	ymfp,	Px, 0xb4 },
+	{ APFNACC,	ymfp,	Px, 0x8a },
+	{ APFPNACC,	ymfp,	Px, 0x8e },
+	{ APFRCP,	ymfp,	Px, 0x96 },
+	{ APFRCPIT1,	ymfp,	Px, 0xa6 },
+	{ APFRCPI2T,	ymfp,	Px, 0xb6 },
+	{ APFRSQIT1,	ymfp,	Px, 0xa7 },
+	{ APFRSQRT,	ymfp,	Px, 0x97 },
+	{ APFSUB,	ymfp,	Px, 0x9a },
+	{ APFSUBR,	ymfp,	Px, 0xaa },
+	{ APINSRW,	yextrw,	Pq, 0xc4 },
+	{ APMADDWL,	ymm,	Py, 0xf5,Pe,0xf5 },
+	{ APMAXSW,	yxm,	Pe, 0xee },
+	{ APMAXUB,	yxm,	Pe, 0xde },
+	{ APMINSW,	yxm,	Pe, 0xea },
+	{ APMINUB,	yxm,	Pe, 0xda },
+	{ APMOVMSKB,	ymskb,	Px, Pe,0xd7,0xd7 },
+	{ APMULHRW,	ymfp,	Px, 0xb7 },
+	{ APMULHUW,	ymm,	Py, 0xe4,Pe,0xe4 },
+	{ APMULHW,	ymm,	Py, 0xe5,Pe,0xe5 },
+	{ APMULLW,	ymm,	Py, 0xd5,Pe,0xd5 },
+	{ APMULULQ,	ymm,	Py, 0xf4,Pe,0xf4 },
+	{ APOPAL,	ynone,	P32, 0x61 },
+	{ APOPAW,	ynone,	Pe, 0x61 },
+	{ APOPFL,	ynone,	P32, 0x9d },
+	{ APOPFQ,	ynone,	Py, 0x9d },
+	{ APOPFW,	ynone,	Pe, 0x9d },
+	{ APOPL,	ypopl,	P32, 0x58,0x8f,(00) },
+	{ APOPQ,	ypopl,	Py, 0x58,0x8f,(00) },
+	{ APOPW,	ypopl,	Pe, 0x58,0x8f,(00) },
+	{ APOR,		ymm,	Py, 0xeb,Pe,0xeb },
+	{ APSADBW,	yxm,	Pw, Pe,0xf6 },
+	{ APSHUFHW,	yxshuf,	Pf3, 0x70 },
+	{ APSHUFL,	yxm,	Pw, Pe,0x70 },
+	{ APSHUFLW,	yxshuf,	Pf2, 0x70 },
+	{ APSHUFW,	ymshuf,	Pm, 0x70 },
+	{ APSLLO,	ypsdq,	Pq, 0x73,(07) },
+	{ APSLLL,	yps,	Py, 0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06) },
+	{ APSLLQ,	yps,	Py, 0xf3, 0x73,(06), Pe,0xf3, Pe,0x7e,(06) },
+	{ APSLLW,	yps,	Py, 0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06) },
+	{ APSRAL,	yps,	Py, 0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04) },
+	{ APSRAW,	yps,	Py, 0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04) },
+	{ APSRLO,	ypsdq,	Pq, 0x73,(03) },
+	{ APSRLL,	yps,	Py, 0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02) },
+	{ APSRLQ,	yps,	Py, 0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02) },
+	{ APSRLW,	yps,	Py, 0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02) },
+	{ APSUBB,	yxm,	Pe, 0xf8 },
+	{ APSUBL,	yxm,	Pe, 0xfa },
+	{ APSUBQ,	yxm,	Pe, 0xfb },
+	{ APSUBSB,	yxm,	Pe, 0xe8 },
+	{ APSUBSW,	yxm,	Pe, 0xe9 },
+	{ APSUBUSB,	yxm,	Pe, 0xd8 },
+	{ APSUBUSW,	yxm,	Pe, 0xd9 },
+	{ APSUBW,	yxm,	Pe, 0xf9 },
+	{ APSWAPL,	ymfp,	Px, 0xbb },
+	{ APUNPCKHBW,	ymm,	Py, 0x68,Pe,0x68 },
+	{ APUNPCKHLQ,	ymm,	Py, 0x6a,Pe,0x6a },
+	{ APUNPCKHQDQ,	yxm,	Pe, 0x6d },
+	{ APUNPCKHWL,	ymm,	Py, 0x69,Pe,0x69 },
+	{ APUNPCKLBW,	ymm,	Py, 0x60,Pe,0x60 },
+	{ APUNPCKLLQ,	ymm,	Py, 0x62,Pe,0x62 },
+	{ APUNPCKLQDQ,	yxm,	Pe, 0x6c },
+	{ APUNPCKLWL,	ymm,	Py, 0x61,Pe,0x61 },
+	{ APUSHAL,	ynone,	P32, 0x60 },
+	{ APUSHAW,	ynone,	Pe, 0x60 },
+	{ APUSHFL,	ynone,	P32, 0x9c },
+	{ APUSHFQ,	ynone,	Py, 0x9c },
+	{ APUSHFW,	ynone,	Pe, 0x9c },
+	{ APUSHL,	ypushl,	P32, 0x50,0xff,(06),0x6a,0x68 },
+	{ APUSHQ,	ypushl,	Py, 0x50,0xff,(06),0x6a,0x68 },
+	{ APUSHW,	ypushl,	Pe, 0x50,0xff,(06),0x6a,0x68 },
+	{ APXOR,	ymm,	Py, 0xef,Pe,0xef },
+	{ AQUAD,	ybyte,	Px, 8 },
+	{ ARCLB,	yshb,	Pb, 0xd0,(02),0xc0,(02),0xd2,(02) },
+	{ ARCLL,	yshl,	Px, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },
+	{ ARCLQ,	yshl,	Pw, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },
+	{ ARCLW,	yshl,	Pe, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },
+	{ ARCPPS,	yxm,	Pm, 0x53 },
+	{ ARCPSS,	yxm,	Pf3, 0x53 },
+	{ ARCRB,	yshb,	Pb, 0xd0,(03),0xc0,(03),0xd2,(03) },
+	{ ARCRL,	yshl,	Px, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },
+	{ ARCRQ,	yshl,	Pw, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },
+	{ ARCRW,	yshl,	Pe, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },
+	{ AREP,		ynone,	Px, 0xf3 },
+	{ AREPN,	ynone,	Px, 0xf2 },
+	{ ARET,		ynone,	Px, 0xc3 },
+	{ ARETFW,	yret,	Pe, 0xcb,0xca },
+	{ ARETFL,	yret,	Px, 0xcb,0xca },
+	{ ARETFQ,	yret,	Pw, 0xcb,0xca },
+	{ AROLB,	yshb,	Pb, 0xd0,(00),0xc0,(00),0xd2,(00) },
+	{ AROLL,	yshl,	Px, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) },
+	{ AROLQ,	yshl,	Pw, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) },
+	{ AROLW,	yshl,	Pe, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) },
+	{ ARORB,	yshb,	Pb, 0xd0,(01),0xc0,(01),0xd2,(01) },
+	{ ARORL,	yshl,	Px, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },
+	{ ARORQ,	yshl,	Pw, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },
+	{ ARORW,	yshl,	Pe, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },
+	{ ARSQRTPS,	yxm,	Pm, 0x52 },
+	{ ARSQRTSS,	yxm,	Pf3, 0x52 },
+	{ ASAHF,	ynone,	Px, 0x86,0xe0,0x50,0x9d },	/* XCHGB AH,AL; PUSH AX; POPFL */
+	{ ASALB,	yshb,	Pb, 0xd0,(04),0xc0,(04),0xd2,(04) },
+	{ ASALL,	yshl,	Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASALQ,	yshl,	Pw, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASALW,	yshl,	Pe, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASARB,	yshb,	Pb, 0xd0,(07),0xc0,(07),0xd2,(07) },
+	{ ASARL,	yshl,	Px, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) },
+	{ ASARQ,	yshl,	Pw, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) },
+	{ ASARW,	yshl,	Pe, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) },
+	{ ASBBB,	yxorb,	Pb, 0x1c,0x80,(03),0x18,0x1a },
+	{ ASBBL,	yxorl,	Px, 0x83,(03),0x1d,0x81,(03),0x19,0x1b },
+	{ ASBBQ,	yxorl,	Pw, 0x83,(03),0x1d,0x81,(03),0x19,0x1b },
+	{ ASBBW,	yxorl,	Pe, 0x83,(03),0x1d,0x81,(03),0x19,0x1b },
+	{ ASCASB,	ynone,	Pb, 0xae },
+	{ ASCASL,	ynone,	Px, 0xaf },
+	{ ASCASQ,	ynone,	Pw, 0xaf },
+	{ ASCASW,	ynone,	Pe, 0xaf },
+	{ ASETCC,	yscond,	Pm, 0x93,(00) },
+	{ ASETCS,	yscond,	Pm, 0x92,(00) },
+	{ ASETEQ,	yscond,	Pm, 0x94,(00) },
+	{ ASETGE,	yscond,	Pm, 0x9d,(00) },
+	{ ASETGT,	yscond,	Pm, 0x9f,(00) },
+	{ ASETHI,	yscond,	Pm, 0x97,(00) },
+	{ ASETLE,	yscond,	Pm, 0x9e,(00) },
+	{ ASETLS,	yscond,	Pm, 0x96,(00) },
+	{ ASETLT,	yscond,	Pm, 0x9c,(00) },
+	{ ASETMI,	yscond,	Pm, 0x98,(00) },
+	{ ASETNE,	yscond,	Pm, 0x95,(00) },
+	{ ASETOC,	yscond,	Pm, 0x91,(00) },
+	{ ASETOS,	yscond,	Pm, 0x90,(00) },
+	{ ASETPC,	yscond,	Pm, 0x96,(00) },
+	{ ASETPL,	yscond,	Pm, 0x99,(00) },
+	{ ASETPS,	yscond,	Pm, 0x9a,(00) },
+	{ ASHLB,	yshb,	Pb, 0xd0,(04),0xc0,(04),0xd2,(04) },
+	{ ASHLL,	yshl,	Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASHLQ,	yshl,	Pw, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASHLW,	yshl,	Pe, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },
+	{ ASHRB,	yshb,	Pb, 0xd0,(05),0xc0,(05),0xd2,(05) },
+	{ ASHRL,	yshl,	Px, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },
+	{ ASHRQ,	yshl,	Pw, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },
+	{ ASHRW,	yshl,	Pe, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },
+	{ ASHUFPD,	yxshuf,	Pq, 0xc6 },
+	{ ASHUFPS,	yxshuf,	Pm, 0xc6 },
+	{ ASQRTPD,	yxm,	Pe, 0x51 },
+	{ ASQRTPS,	yxm,	Pm, 0x51 },
+	{ ASQRTSD,	yxm,	Pf2, 0x51 },
+	{ ASQRTSS,	yxm,	Pf3, 0x51 },
+	{ ASTC,		ynone,	Px, 0xf9 },
+	{ ASTD,		ynone,	Px, 0xfd },
+	{ ASTI,		ynone,	Px, 0xfb },
+	{ ASTMXCSR,	ysvrs,	Pm, 0xae,(03),0xae,(03) },
+	{ ASTOSB,	ynone,	Pb, 0xaa },
+	{ ASTOSL,	ynone,	Px, 0xab },
+	{ ASTOSQ,	ynone,	Pw, 0xab },
+	{ ASTOSW,	ynone,	Pe, 0xab },
+	{ ASUBB,	yxorb,	Pb, 0x2c,0x80,(05),0x28,0x2a },
+	{ ASUBL,	yaddl,	Px, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+	{ ASUBPD,	yxm,	Pe, 0x5c },
+	{ ASUBPS,	yxm,	Pm, 0x5c },
+	{ ASUBQ,	yaddl,	Pw, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+	{ ASUBSD,	yxm,	Pf2, 0x5c },
+	{ ASUBSS,	yxm,	Pf3, 0x5c },
+	{ ASUBW,	yaddl,	Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+	{ ASWAPGS,	ynone,	Pm, 0x01,0xf8 },
+	{ ASYSCALL,	ynone,	Px, 0x0f,0x05 },	/* fast syscall */
+	{ ATESTB,	ytestb,	Pb, 0xa8,0xf6,(00),0x84,0x84 },
+	{ ATESTL,	ytestl,	Px, 0xa9,0xf7,(00),0x85,0x85 },
+	{ ATESTQ,	ytestl,	Pw, 0xa9,0xf7,(00),0x85,0x85 },
+	{ ATESTW,	ytestl,	Pe, 0xa9,0xf7,(00),0x85,0x85 },
+	{ ATEXT,	ytext,	Px },
+	{ AUCOMISD,	yxcmp,	Pe, 0x2e },
+	{ AUCOMISS,	yxcmp,	Pm, 0x2e },
+	{ AUNPCKHPD,	yxm,	Pe, 0x15 },
+	{ AUNPCKHPS,	yxm,	Pm, 0x15 },
+	{ AUNPCKLPD,	yxm,	Pe, 0x14 },
+	{ AUNPCKLPS,	yxm,	Pm, 0x14 },
+	{ AVERR,	ydivl,	Pm, 0x00,(04) },
+	{ AVERW,	ydivl,	Pm, 0x00,(05) },
+	{ AWAIT,	ynone,	Px, 0x9b },
+	{ AWORD,	ybyte,	Px, 2 },
+	{ AXCHGB,	yml_mb,	Pb, 0x86,0x86 },
+	{ AXCHGL,	yml_ml,	Px, 0x87,0x87 },
+	{ AXCHGQ,	yml_ml,	Pw, 0x87,0x87 },
+	{ AXCHGW,	yml_ml,	Pe, 0x87,0x87 },
+	{ AXLAT,	ynone,	Px, 0xd7 },
+	{ AXORB,	yxorb,	Pb, 0x34,0x80,(06),0x30,0x32 },
+	{ AXORL,	yxorl,	Px, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+	{ AXORPD,	yxm,	Pe, 0x57 },
+	{ AXORPS,	yxm,	Pm, 0x57 },
+	{ AXORQ,	yxorl,	Pw, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+	{ AXORW,	yxorl,	Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+
+	{ AFMOVB,	yfmvx,	Px, 0xdf,(04) },
+	{ AFMOVBP,	yfmvp,	Px, 0xdf,(06) },
+	{ AFMOVD,	yfmvd,	Px, 0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02) },
+	{ AFMOVDP,	yfmvdp,	Px, 0xdd,(03),0xdd,(03) },
+	{ AFMOVF,	yfmvf,	Px, 0xd9,(00),0xd9,(02) },
+	{ AFMOVFP,	yfmvp,	Px, 0xd9,(03) },
+	{ AFMOVL,	yfmvf,	Px, 0xdb,(00),0xdb,(02) },
+	{ AFMOVLP,	yfmvp,	Px, 0xdb,(03) },
+	{ AFMOVV,	yfmvx,	Px, 0xdf,(05) },
+	{ AFMOVVP,	yfmvp,	Px, 0xdf,(07) },
+	{ AFMOVW,	yfmvf,	Px, 0xdf,(00),0xdf,(02) },
+	{ AFMOVWP,	yfmvp,	Px, 0xdf,(03) },
+	{ AFMOVX,	yfmvx,	Px, 0xdb,(05) },
+	{ AFMOVXP,	yfmvp,	Px, 0xdb,(07) },
+
+	{ AFCOMB },
+	{ AFCOMBP },
+	{ AFCOMD,	yfadd,	Px, 0xdc,(02),0xd8,(02),0xdc,(02) },	/* botch */
+	{ AFCOMDP,	yfadd,	Px, 0xdc,(03),0xd8,(03),0xdc,(03) },	/* botch */
+	{ AFCOMDPP,	ycompp,	Px, 0xde,(03) },
+	{ AFCOMF,	yfmvx,	Px, 0xd8,(02) },
+	{ AFCOMFP,	yfmvx,	Px, 0xd8,(03) },
+	{ AFCOML,	yfmvx,	Px, 0xda,(02) },
+	{ AFCOMLP,	yfmvx,	Px, 0xda,(03) },
+	{ AFCOMW,	yfmvx,	Px, 0xde,(02) },
+	{ AFCOMWP,	yfmvx,	Px, 0xde,(03) },
+
+	{ AFUCOM,	ycompp,	Px, 0xdd,(04) },
+	{ AFUCOMP,	ycompp, Px, 0xdd,(05) },
+	{ AFUCOMPP,	ycompp,	Px, 0xda,(13) },
+
+	{ AFADDDP,	yfaddp,	Px, 0xde,(00) },
+	{ AFADDW,	yfmvx,	Px, 0xde,(00) },
+	{ AFADDL,	yfmvx,	Px, 0xda,(00) },
+	{ AFADDF,	yfmvx,	Px, 0xd8,(00) },
+	{ AFADDD,	yfadd,	Px, 0xdc,(00),0xd8,(00),0xdc,(00) },
+
+	{ AFMULDP,	yfaddp,	Px, 0xde,(01) },
+	{ AFMULW,	yfmvx,	Px, 0xde,(01) },
+	{ AFMULL,	yfmvx,	Px, 0xda,(01) },
+	{ AFMULF,	yfmvx,	Px, 0xd8,(01) },
+	{ AFMULD,	yfadd,	Px, 0xdc,(01),0xd8,(01),0xdc,(01) },
+
+	{ AFSUBDP,	yfaddp,	Px, 0xde,(05) },
+	{ AFSUBW,	yfmvx,	Px, 0xde,(04) },
+	{ AFSUBL,	yfmvx,	Px, 0xda,(04) },
+	{ AFSUBF,	yfmvx,	Px, 0xd8,(04) },
+	{ AFSUBD,	yfadd,	Px, 0xdc,(04),0xd8,(04),0xdc,(05) },
+
+	{ AFSUBRDP,	yfaddp,	Px, 0xde,(04) },
+	{ AFSUBRW,	yfmvx,	Px, 0xde,(05) },
+	{ AFSUBRL,	yfmvx,	Px, 0xda,(05) },
+	{ AFSUBRF,	yfmvx,	Px, 0xd8,(05) },
+	{ AFSUBRD,	yfadd,	Px, 0xdc,(05),0xd8,(05),0xdc,(04) },
+
+	{ AFDIVDP,	yfaddp,	Px, 0xde,(07) },
+	{ AFDIVW,	yfmvx,	Px, 0xde,(06) },
+	{ AFDIVL,	yfmvx,	Px, 0xda,(06) },
+	{ AFDIVF,	yfmvx,	Px, 0xd8,(06) },
+	{ AFDIVD,	yfadd,	Px, 0xdc,(06),0xd8,(06),0xdc,(07) },
+
+	{ AFDIVRDP,	yfaddp,	Px, 0xde,(06) },
+	{ AFDIVRW,	yfmvx,	Px, 0xde,(07) },
+	{ AFDIVRL,	yfmvx,	Px, 0xda,(07) },
+	{ AFDIVRF,	yfmvx,	Px, 0xd8,(07) },
+	{ AFDIVRD,	yfadd,	Px, 0xdc,(07),0xd8,(07),0xdc,(06) },
+
+	{ AFXCHD,	yfxch,	Px, 0xd9,(01),0xd9,(01) },
+	{ AFFREE },
+	{ AFLDCW,	ystcw,	Px, 0xd9,(05),0xd9,(05) },
+	{ AFLDENV,	ystcw,	Px, 0xd9,(04),0xd9,(04) },
+	{ AFRSTOR,	ysvrs,	Px, 0xdd,(04),0xdd,(04) },
+	{ AFSAVE,	ysvrs,	Px, 0xdd,(06),0xdd,(06) },
+	{ AFSTCW,	ystcw,	Px, 0xd9,(07),0xd9,(07) },
+	{ AFSTENV,	ystcw,	Px, 0xd9,(06),0xd9,(06) },
+	{ AFSTSW,	ystsw,	Px, 0xdd,(07),0xdf,0xe0 },
+	{ AF2XM1,	ynone,	Px, 0xd9, 0xf0 },
+	{ AFABS,	ynone,	Px, 0xd9, 0xe1 },
+	{ AFCHS,	ynone,	Px, 0xd9, 0xe0 },
+	{ AFCLEX,	ynone,	Px, 0xdb, 0xe2 },
+	{ AFCOS,	ynone,	Px, 0xd9, 0xff },
+	{ AFDECSTP,	ynone,	Px, 0xd9, 0xf6 },
+	{ AFINCSTP,	ynone,	Px, 0xd9, 0xf7 },
+	{ AFINIT,	ynone,	Px, 0xdb, 0xe3 },
+	{ AFLD1,	ynone,	Px, 0xd9, 0xe8 },
+	{ AFLDL2E,	ynone,	Px, 0xd9, 0xea },
+	{ AFLDL2T,	ynone,	Px, 0xd9, 0xe9 },
+	{ AFLDLG2,	ynone,	Px, 0xd9, 0xec },
+	{ AFLDLN2,	ynone,	Px, 0xd9, 0xed },
+	{ AFLDPI,	ynone,	Px, 0xd9, 0xeb },
+	{ AFLDZ,	ynone,	Px, 0xd9, 0xee },
+	{ AFNOP,	ynone,	Px, 0xd9, 0xd0 },
+	{ AFPATAN,	ynone,	Px, 0xd9, 0xf3 },
+	{ AFPREM,	ynone,	Px, 0xd9, 0xf8 },
+	{ AFPREM1,	ynone,	Px, 0xd9, 0xf5 },
+	{ AFPTAN,	ynone,	Px, 0xd9, 0xf2 },
+	{ AFRNDINT,	ynone,	Px, 0xd9, 0xfc },
+	{ AFSCALE,	ynone,	Px, 0xd9, 0xfd },
+	{ AFSIN,	ynone,	Px, 0xd9, 0xfe },
+	{ AFSINCOS,	ynone,	Px, 0xd9, 0xfb },
+	{ AFSQRT,	ynone,	Px, 0xd9, 0xfa },
+	{ AFTST,	ynone,	Px, 0xd9, 0xe4 },
+	{ AFXAM,	ynone,	Px, 0xd9, 0xe5 },
+	{ AFXTRACT,	ynone,	Px, 0xd9, 0xf4 },
+	{ AFYL2X,	ynone,	Px, 0xd9, 0xf1 },
+	{ AFYL2XP1,	ynone,	Px, 0xd9, 0xf9 },
+
+	{ ACMPXCHGB,	yrb_mb,	Pb, 0x0f,0xb0 },
+	{ ACMPXCHGL,	yrl_ml,	Px, 0x0f,0xb1 },
+	{ ACMPXCHGW,	yrl_ml,	Pe, 0x0f,0xb1 },
+	{ ACMPXCHGQ,	yrl_ml,	Pw, 0x0f,0xb1 },
+	{ ACMPXCHG8B,	yscond,	Pm, 0xc7,(01) },
+	{ AINVD,	ynone,	Pm, 0x08 },
+	{ AINVLPG,	ymbs,	Pm, 0x01,(07) },
+	{ ALFENCE,	ynone,	Pm, 0xae,0xe8 },
+	{ AMFENCE,	ynone,	Pm, 0xae,0xf0 },
+	{ AMOVNTIL,	yrl_ml,	Pm, 0xc3 },
+	{ AMOVNTIQ,	yrl_ml, Pw, 0x0f,0xc3 },
+	{ ARDMSR,	ynone,	Pm, 0x32 },
+	{ ARDPMC,	ynone,	Pm, 0x33 },
+	{ ARDTSC,	ynone,	Pm, 0x31 },
+	{ ARSM,		ynone,	Pm, 0xaa },
+	{ ASFENCE,	ynone,	Pm, 0xae,0xf8 },
+	{ ASYSRET,	ynone,	Pm, 0x07 },
+	{ AWBINVD,	ynone,	Pm, 0x09 },
+	{ AWRMSR,	ynone,	Pm, 0x30 },
+
+	{ AXADDB,	yrb_mb,	Pb, 0x0f,0xc0 },
+	{ AXADDL,	yrl_ml,	Px, 0x0f,0xc1 },
+	{ AXADDQ,	yrl_ml,	Pw, 0x0f,0xc1 },
+	{ AXADDW,	yrl_ml,	Pe, 0x0f,0xc1 },
+
+	{ AEND },
+	0
+};
+
+Optab*	opindex[ALAST+1];
+
+/*
+AMOVD	0f 6e/r	mmx,reg/mem32[mem64-rex?]
+AMOVD	0f 7e/r	reg/mem32[64],mmx	STORE
+AMOVQ	0f 6f/r	mmx1,mmx2/mem64
+AMOVQ	0f 7f/r	mmx1/mem64,mmx2
+*/

+ 790 - 0
sys/src/cmd/6l/pass.c

@@ -0,0 +1,790 @@
+#include	"l.h"
+
+void
+dodata(void)
+{
+	int i;
+	Sym *s;
+	Prog *p;
+	long t, u;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f dodata\n", cputime());
+	Bflush(&bso);
+	for(p = datap; p != P; p = p->link) {
+		s = p->from.sym;
+		if(p->as == ADYNT || p->as == AINIT)
+			s->value = dtype;
+		if(s->type == SBSS)
+			s->type = SDATA;
+		if(s->type != SDATA)
+			diag("initialize non-data (%d): %s\n%P",
+				s->type, s->name, p);
+		t = p->from.offset + p->width;
+		if(t > s->value)
+			diag("initialize bounds (%lld): %s\n%P",
+				s->value, s->name, p);
+	}
+	/* allocate small guys */
+	datsize = 0;
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type != SDATA)
+		if(s->type != SBSS)
+			continue;
+		t = s->value;
+		if(t == 0) {
+			diag("%s: no size", s->name);
+			t = 1;
+		}
+		t = rnd(t, 4);
+		s->value = t;
+		if(t > MINSIZ)
+			continue;
+		if(t >= 8)
+			datsize = rnd(datsize, 8);
+		s->value = datsize;
+		datsize += t;
+		s->type = SDATA1;
+	}
+
+	/* allocate the rest of the data */
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type != SDATA) {
+			if(s->type == SDATA1)
+				s->type = SDATA;
+			continue;
+		}
+		t = s->value;
+		if(t >= 8)
+			datsize = rnd(datsize, 8);
+		s->value = datsize;
+		datsize += t;
+	}
+	if(datsize)
+		datsize = rnd(datsize, 8);
+
+	if(debug['j']) {
+		/*
+		 * pad data with bss that fits up to next
+		 * 8k boundary, then push data to 8k
+		 */
+		u = rnd(datsize, 8192);
+		u -= datsize;
+		for(i=0; i<NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link) {
+			if(s->type != SBSS)
+				continue;
+			t = s->value;
+			if(t > u)
+				continue;
+			u -= t;
+			s->value = datsize;
+			s->type = SDATA;
+			datsize += t;
+		}
+		datsize += u;
+	}
+
+	/* now the bss */
+	bsssize = 0;
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link) {
+		if(s->type != SBSS)
+			continue;
+		t = s->value;
+		if(t >= 8)
+			bsssize = rnd(bsssize, 8);
+		s->value = bsssize + datsize;
+		bsssize += t;
+	}
+	xdefine("edata", SBSS, datsize);
+	xdefine("end", SBSS, bsssize + datsize);
+}
+
+Prog*
+brchain(Prog *p)
+{
+	int i;
+
+	for(i=0; i<20; i++) {
+		if(p == P || p->as != AJMP)
+			return p;
+		p = p->pcond;
+	}
+	return P;
+}
+
+void
+follow(void)
+{
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f follow\n", cputime());
+	Bflush(&bso);
+	firstp = prg();
+	lastp = firstp;
+	xfol(textp);
+	lastp->link = P;
+	firstp = firstp->link;
+}
+
+void
+xfol(Prog *p)
+{
+	Prog *q;
+	int i;
+	enum as a;
+
+loop:
+	if(p == P)
+		return;
+	if(p->as == ATEXT)
+		curtext = p;
+	if(p->as == AJMP)
+	if((q = p->pcond) != P) {
+		p->mark = 1;
+		p = q;
+		if(p->mark == 0)
+			goto loop;
+	}
+	if(p->mark) {
+		/* copy up to 4 instructions to avoid branch */
+		for(i=0,q=p; i<4; i++,q=q->link) {
+			if(q == P)
+				break;
+			if(q == lastp)
+				break;
+			a = q->as;
+			if(a == ANOP) {
+				i--;
+				continue;
+			}
+			switch(a) {
+			case AJMP:
+			case ARET:
+			case AIRETL:
+			case AIRETQ:
+			case AIRETW:
+			case ARETFL:
+			case ARETFQ:
+			case ARETFW:
+
+			case APUSHL:
+			case APUSHFL:
+			case APUSHQ:
+			case APUSHFQ:
+			case APUSHW:
+			case APUSHFW:
+			case APOPL:
+			case APOPFL:
+			case APOPQ:
+			case APOPFQ:
+			case APOPW:
+			case APOPFW:
+				goto brk;
+			}
+			if(q->pcond == P || q->pcond->mark)
+				continue;
+			if(a == ACALL || a == ALOOP)
+				continue;
+			for(;;) {
+				if(p->as == ANOP) {
+					p = p->link;
+					continue;
+				}
+				q = copyp(p);
+				p = p->link;
+				q->mark = 1;
+				lastp->link = q;
+				lastp = q;
+				if(q->as != a || q->pcond == P || q->pcond->mark)
+					continue;
+
+				q->as = relinv(q->as);
+				p = q->pcond;
+				q->pcond = q->link;
+				q->link = p;
+				xfol(q->link);
+				p = q->link;
+				if(p->mark)
+					return;
+				goto loop;
+			}
+		} /* */
+	brk:;
+		q = prg();
+		q->as = AJMP;
+		q->line = p->line;
+		q->to.type = D_BRANCH;
+		q->to.offset = p->pc;
+		q->pcond = p;
+		p = q;
+	}
+	p->mark = 1;
+	lastp->link = p;
+	lastp = p;
+	a = p->as;
+	if(a == AJMP || a == ARET || a == AIRETL || a == AIRETQ || a == AIRETW ||
+	   a == ARETFL || a == ARETFQ || a == ARETFW)
+		return;
+	if(p->pcond != P)
+	if(a != ACALL) {
+		q = brchain(p->link);
+		if(q != P && q->mark)
+		if(a != ALOOP) {
+			p->as = relinv(a);
+			p->link = p->pcond;
+			p->pcond = q;
+		}
+		xfol(p->link);
+		q = brchain(p->pcond);
+		if(q->mark) {
+			p->pcond = q;
+			return;
+		}
+		p = q;
+		goto loop;
+	}
+	p = p->link;
+	goto loop;
+}
+
+int
+relinv(int a)
+{
+
+	switch(a) {
+	case AJEQ:	return AJNE;
+	case AJNE:	return AJEQ;
+	case AJLE:	return AJGT;
+	case AJLS:	return AJHI;
+	case AJLT:	return AJGE;
+	case AJMI:	return AJPL;
+	case AJGE:	return AJLT;
+	case AJPL:	return AJMI;
+	case AJGT:	return AJLE;
+	case AJHI:	return AJLS;
+	case AJCS:	return AJCC;
+	case AJCC:	return AJCS;
+	case AJPS:	return AJPC;
+	case AJPC:	return AJPS;
+	case AJOS:	return AJOC;
+	case AJOC:	return AJOS;
+	}
+	diag("unknown relation: %s in %s", anames[a], TNAME);
+	return a;
+}
+
+void
+doinit(void)
+{
+	Sym *s;
+	Prog *p;
+	int x;
+
+	for(p = datap; p != P; p = p->link) {
+		x = p->to.type;
+		if(x != D_EXTERN && x != D_STATIC)
+			continue;
+		s = p->to.sym;
+		if(s->type == 0 || s->type == SXREF)
+			diag("undefined %s initializer of %s",
+				s->name, p->from.sym->name);
+		p->to.offset += s->value;
+		p->to.type = D_CONST;
+		if(s->type == SDATA || s->type == SBSS)
+			p->to.offset += INITDAT;
+	}
+}
+
+void
+patch(void)
+{
+	long c;
+	Prog *p, *q;
+	Sym *s;
+	long vexit;
+
+	if(debug['v'])
+		Bprint(&bso, "%5.2f mkfwd\n", cputime());
+	Bflush(&bso);
+	mkfwd();
+	if(debug['v'])
+		Bprint(&bso, "%5.2f patch\n", cputime());
+	Bflush(&bso);
+	s = lookup("exit", 0);
+	vexit = s->value;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		if(p->as == ACALL || p->as == ARET) {
+			s = p->to.sym;
+			if(s) {
+				if(debug['c'])
+					Bprint(&bso, "%s calls %s\n", TNAME, s->name);
+				switch(s->type) {
+				default:
+					diag("undefined: %s in %s", s->name, TNAME);
+					s->type = STEXT;
+					s->value = vexit;
+					break;	/* or fall through to set offset? */
+				case STEXT:
+					p->to.offset = s->value;
+					break;
+				case SUNDEF:
+					p->pcond = UP;
+					p->to.offset = 0;
+					break;
+				}
+				p->to.type = D_BRANCH;
+			}
+		}
+		if(p->to.type != D_BRANCH || p->pcond == UP)
+			continue;
+		c = p->to.offset;
+		for(q = firstp; q != P;) {
+			if(q->forwd != P)
+			if(c >= q->forwd->pc) {
+				q = q->forwd;
+				continue;
+			}
+			if(c == q->pc)
+				break;
+			q = q->link;
+		}
+		if(q == P) {
+			diag("branch out of range in %s\n%P", TNAME, p);
+			p->to.type = D_NONE;
+		}
+		p->pcond = q;
+	}
+
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		p->mark = 0;	/* initialization for follow */
+		if(p->pcond != P && p->pcond != UP) {
+			p->pcond = brloop(p->pcond);
+			if(p->pcond != P)
+			if(p->to.type == D_BRANCH)
+				p->to.offset = p->pcond->pc;
+		}
+	}
+}
+
+#define	LOG	5
+void
+mkfwd(void)
+{
+	Prog *p;
+	int i;
+	long dwn[LOG], cnt[LOG];
+	Prog *lst[LOG];
+
+	for(i=0; i<LOG; i++) {
+		if(i == 0)
+			cnt[i] = 1; else
+			cnt[i] = LOG * cnt[i-1];
+		dwn[i] = 1;
+		lst[i] = P;
+	}
+	i = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		i--;
+		if(i < 0)
+			i = LOG-1;
+		p->forwd = P;
+		dwn[i]--;
+		if(dwn[i] <= 0) {
+			dwn[i] = cnt[i];
+			if(lst[i] != P)
+				lst[i]->forwd = p;
+			lst[i] = p;
+		}
+	}
+}
+
+Prog*
+brloop(Prog *p)
+{
+	int c;
+	Prog *q;
+
+	c = 0;
+	for(q = p; q != P; q = q->pcond) {
+		if(q->as != AJMP)
+			break;
+		c++;
+		if(c >= 5000)
+			return P;
+	}
+	return q;
+}
+
+void
+dostkoff(void)
+{
+	Prog *p, *q;
+	long autoffset, deltasp;
+	int a, f, curframe, curbecome, maxbecome, pcsize;
+
+	curframe = 0;
+	curbecome = 0;
+	maxbecome = 0;
+	curtext = 0;
+	for(p = firstp; p != P; p = p->link) {
+
+		/* find out how much arg space is used in this TEXT */
+		if(p->to.type == (D_INDIR+D_SP))
+			if(p->to.offset > curframe)
+				curframe = p->to.offset;
+
+		switch(p->as) {
+		case ATEXT:
+			if(curtext && curtext->from.sym) {
+				curtext->from.sym->frame = curframe;
+				curtext->from.sym->become = curbecome;
+				if(curbecome > maxbecome)
+					maxbecome = curbecome;
+			}
+			curframe = 0;
+			curbecome = 0;
+
+			curtext = p;
+			break;
+
+		case ARET:
+			/* special form of RET is BECOME */
+			if(p->from.type == D_CONST)
+				if(p->from.offset > curbecome)
+					curbecome = p->from.offset;
+			break;
+		}
+	}
+	if(curtext && curtext->from.sym) {
+		curtext->from.sym->frame = curframe;
+		curtext->from.sym->become = curbecome;
+		if(curbecome > maxbecome)
+			maxbecome = curbecome;
+	}
+
+	if(debug['b'])
+		print("max become = %d\n", maxbecome);
+	xdefine("ALEFbecome", STEXT, maxbecome);
+
+	curtext = 0;
+	for(p = firstp; p != P; p = p->link) {
+		switch(p->as) {
+		case ATEXT:
+			curtext = p;
+			break;
+		case ACALL:
+			if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
+				f = maxbecome - curtext->from.sym->frame;
+				if(f <= 0)
+					break;
+				/* calling a become or calling a variable */
+				if(p->to.sym == S || p->to.sym->become) {
+					curtext->to.offset += f;
+					if(debug['b']) {
+						curp = p;
+						print("%D calling %D increase %d\n",
+							&curtext->from, &p->to, f);
+					}
+				}
+			}
+			break;
+		}
+	}
+
+	autoffset = 0;
+	deltasp = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT) {
+			curtext = p;
+			autoffset = p->to.offset;
+			if(autoffset < 0)
+				autoffset = 0;
+			if(autoffset) {
+				p = appendp(p);
+				p->as = AADJSP;
+				p->from.type = D_CONST;
+				p->from.offset = autoffset;
+			}
+			deltasp = autoffset;
+		}
+		pcsize = p->mode/8;
+		a = p->from.type;
+		if(a == D_AUTO)
+			p->from.offset += deltasp;
+		if(a == D_PARAM)
+			p->from.offset += deltasp + pcsize;
+		a = p->to.type;
+		if(a == D_AUTO)
+			p->to.offset += deltasp;
+		if(a == D_PARAM)
+			p->to.offset += deltasp + pcsize;
+
+		switch(p->as) {
+		default:
+			continue;
+		case APUSHL:
+		case APUSHFL:
+			deltasp += 4;
+			continue;
+		case APUSHQ:
+		case APUSHFQ:
+			deltasp += 8;
+			continue;
+		case APUSHW:
+		case APUSHFW:
+			deltasp += 2;
+			continue;
+		case APOPL:
+		case APOPFL:
+			deltasp -= 4;
+			continue;
+		case APOPQ:
+		case APOPFQ:
+			deltasp -= 8;
+			continue;
+		case APOPW:
+		case APOPFW:
+			deltasp -= 2;
+			continue;
+		case ARET:
+			break;
+		}
+
+		if(autoffset != deltasp)
+			diag("unbalanced PUSH/POP");
+		if(p->from.type == D_CONST)
+			goto become;
+
+		if(autoffset) {
+			q = p;
+			p = appendp(p);
+			p->as = ARET;
+
+			q->as = AADJSP;
+			q->from.type = D_CONST;
+			q->from.offset = -autoffset;
+		}
+		continue;
+
+	become:
+		q = p;
+		p = appendp(p);
+		p->as = AJMP;
+		p->to = q->to;
+		p->pcond = q->pcond;
+
+		q->as = AADJSP;
+		q->from = zprg.from;
+		q->from.type = D_CONST;
+		q->from.offset = -autoffset;
+		q->to = zprg.to;
+		continue;
+	}
+}
+
+vlong
+atolwhex(char *s)
+{
+	vlong n;
+	int f;
+
+	n = 0;
+	f = 0;
+	while(*s == ' ' || *s == '\t')
+		s++;
+	if(*s == '-' || *s == '+') {
+		if(*s++ == '-')
+			f = 1;
+		while(*s == ' ' || *s == '\t')
+			s++;
+	}
+	if(s[0]=='0' && s[1]){
+		if(s[1]=='x' || s[1]=='X'){
+			s += 2;
+			for(;;){
+				if(*s >= '0' && *s <= '9')
+					n = n*16 + *s++ - '0';
+				else if(*s >= 'a' && *s <= 'f')
+					n = n*16 + *s++ - 'a' + 10;
+				else if(*s >= 'A' && *s <= 'F')
+					n = n*16 + *s++ - 'A' + 10;
+				else
+					break;
+			}
+		} else
+			while(*s >= '0' && *s <= '7')
+				n = n*8 + *s++ - '0';
+	} else
+		while(*s >= '0' && *s <= '9')
+			n = n*10 + *s++ - '0';
+	if(f)
+		n = -n;
+	return n;
+}
+
+void
+undef(void)
+{
+	int i;
+	Sym *s;
+
+	for(i=0; i<NHASH; i++)
+	for(s = hash[i]; s != S; s = s->link)
+		if(s->type == SXREF)
+			diag("%s: not defined", s->name);
+}
+
+void
+import(void)
+{
+	int i;
+	Sym *s;
+
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->sig != 0 && s->type == SXREF && (nimports == 0 || s->subtype == SIMPORT)){
+				if(s->value != 0)
+					diag("value != 0 on SXREF");
+				undefsym(s);
+				Bprint(&bso, "IMPORT: %s sig=%lux v=%lld\n", s->name, s->sig, s->value);
+				if(debug['S'])
+					s->sig = 0;
+			}
+}
+
+void
+ckoff(Sym *s, long v)
+{
+	if(v < 0 || v >= 1<<Roffset)
+		diag("relocation offset %ld for %s out of range", v, s->name);
+}
+
+static Prog*
+newdata(Sym *s, int o, int w, int t)
+{
+	Prog *p;
+
+	p = prg();
+	if(edatap == P)
+		datap = p;
+	else
+		edatap->link = p;
+	edatap = p;
+	p->as = ADATA;
+	p->width = w;
+	p->from.scale = w;
+	p->from.type = t;
+	p->from.sym = s;
+	p->from.offset = o;
+	p->to.type = D_CONST;
+	return p;
+}
+
+void
+export(void)
+{
+	int i, j, n, off, nb, sv, ne;
+	Sym *s, *et, *str, **esyms;
+	Prog *p;
+	char buf[NSNAME], *t;
+
+	n = 0;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->sig != 0 && s->type != SXREF && s->type != SUNDEF && (nexports == 0 || s->subtype == SEXPORT))
+				n++;
+	esyms = malloc(n*sizeof(Sym*));
+	ne = n;
+	n = 0;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->sig != 0 && s->type != SXREF && s->type != SUNDEF && (nexports == 0 || s->subtype == SEXPORT))
+				esyms[n++] = s;
+	for(i = 0; i < ne-1; i++)
+		for(j = i+1; j < ne; j++)
+			if(strcmp(esyms[i]->name, esyms[j]->name) > 0){
+				s = esyms[i];
+				esyms[i] = esyms[j];
+				esyms[j] = s;
+			}
+
+	nb = 0;
+	off = 0;
+	et = lookup(EXPTAB, 0);
+	if(et->type != 0 && et->type != SXREF)
+		diag("%s already defined", EXPTAB);
+	et->type = SDATA;
+	str = lookup(".string", 0);
+	if(str->type == 0)
+		str->type = SDATA;
+	sv = str->value;
+	for(i = 0; i < ne; i++){
+		s = esyms[i];
+		if(debug['S'])
+			s->sig = 0;
+		/* Bprint(&bso, "EXPORT: %s sig=%lux t=%d\n", s->name, s->sig, s->type); */
+
+		/* signature */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+		p->to.offset = s->sig;
+
+		/* address */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+		p->to.type = D_ADDR;
+		p->to.index = D_EXTERN;
+		p->to.sym = s;
+
+		/* string */
+		t = s->name;
+		n = strlen(t)+1;
+		for(;;){
+			buf[nb++] = *t;
+			sv++;
+			if(nb >= NSNAME){
+				p = newdata(str, sv-NSNAME, NSNAME, D_STATIC);
+				p->to.type = D_SCONST;
+				memmove(p->to.scon, buf, NSNAME);
+				nb = 0;
+			}
+			if(*t++ == 0)
+				break;
+		}
+
+		/* name */
+		p = newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+		p->to.type = D_ADDR;
+		p->to.index = D_STATIC;
+		p->to.sym = str;
+		p->to.offset = sv-n;
+	}
+
+	if(nb > 0){
+		p = newdata(str, sv-nb, nb, D_STATIC);
+		p->to.type = D_SCONST;
+		memmove(p->to.scon, buf, nb);
+	}
+
+	for(i = 0; i < 3; i++){
+		newdata(et, off, sizeof(long), D_EXTERN);
+		off += sizeof(long);
+	}
+	et->value = off;
+	if(sv == 0)
+		sv = 1;
+	str->value = sv;
+	exports = ne;
+	free(esyms);
+}

+ 1746 - 0
sys/src/cmd/6l/span.c

@@ -0,0 +1,1746 @@
+#include	"l.h"
+
+static int	rexflag;
+static int	asmode;
+
+void
+span(void)
+{
+	Prog *p, *q;
+	long v;
+	vlong c, idat;
+	int m, n, again;
+
+	xdefine("etext", STEXT, 0L);
+	idat = INITDAT;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		n = 0;
+		if(p->to.type == D_BRANCH)
+			if(p->pcond == P)
+				p->pcond = p;
+		if((q = p->pcond) != P)
+			if(q->back != 2)
+				n = 1;
+		p->back = n;
+		if(p->as == AADJSP) {
+			p->to.type = D_SP;
+			v = -p->from.offset;
+			p->from.offset = v;
+			p->as = p->mode != 64? AADDL: AADDQ;
+			if(v < 0) {
+				p->as = p->mode != 64? ASUBL: ASUBQ;
+				v = -v;
+				p->from.offset = v;
+			}
+			if(v == 0)
+				p->as = ANOP;
+		}
+	}
+	n = 0;
+
+start:
+	if(debug['v'])
+		Bprint(&bso, "%5.2f span\n", cputime());
+	Bflush(&bso);
+	c = INITTEXT;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		if(p->to.type == D_BRANCH)
+			if(p->back)
+				p->pc = c;
+		asmins(p);
+		p->pc = c;
+		m = andptr-and;
+		p->mark = m;
+		c += m;
+	}
+
+loop:
+	n++;
+	if(debug['v'])
+		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
+	Bflush(&bso);
+	if(n > 50) {
+		print("span must be looping\n");
+		errorexit();
+	}
+	again = 0;
+	c = INITTEXT;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->as == ATEXT)
+			curtext = p;
+		if(p->to.type == D_BRANCH || p->back & 0100) {
+			if(p->back)
+				p->pc = c;
+			asmins(p);
+			m = andptr-and;
+			if(m != p->mark) {
+				p->mark = m;
+				again++;
+			}
+		}
+		p->pc = c;
+		c += p->mark;
+	}
+	if(again) {
+		textsize = c;
+		goto loop;
+	}
+	if(INITRND) {
+		INITDAT = rnd(c, INITRND);
+		if(INITDAT != idat) {
+			idat = INITDAT;
+			goto start;
+		}
+	}
+	xdefine("etext", STEXT, c);
+	if(debug['v'])
+		Bprint(&bso, "etext = %llux\n", c);
+	Bflush(&bso);
+	for(p = textp; p != P; p = p->pcond)
+		p->from.sym->value = p->pc;
+	textsize = c - INITTEXT;
+}
+
+void
+xdefine(char *p, int t, vlong v)
+{
+	Sym *s;
+
+	s = lookup(p, 0);
+	if(s->type == 0 || s->type == SXREF) {
+		s->type = t;
+		s->value = v;
+	}
+	if(s->type == STEXT && s->value == 0)
+		s->value = v;
+}
+
+void
+putsymb(char *s, int t, vlong v, int ver)
+{
+	int i, f, l;
+
+	if(t == 'f')
+		s++;
+	l = 4;
+	if(!debug['8']){
+		lput(v>>32);
+		l = 8;
+	}
+	lput(v);
+	if(ver)
+		t += 'a' - 'A';
+	cput(t+0x80);			/* 0x80 is variable length */
+
+	if(t == 'Z' || t == 'z') {
+		cput(s[0]);
+		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
+			cput(s[i]);
+			cput(s[i+1]);
+		}
+		cput(0);
+		cput(0);
+		i++;
+	}
+	else {
+		for(i=0; s[i]; i++)
+			cput(s[i]);
+		cput(0);
+	}
+	symsize += l + 1 + i + 1;
+
+	if(debug['n']) {
+		if(t == 'z' || t == 'Z') {
+			Bprint(&bso, "%c %.8llux ", t, v);
+			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
+				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
+				Bprint(&bso, "/%x", f);
+			}
+			Bprint(&bso, "\n");
+			return;
+		}
+		if(ver)
+			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
+		else
+			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
+	}
+}
+
+void
+asmsym(void)
+{
+	Prog *p;
+	Auto *a;
+	Sym *s;
+	int h;
+
+	s = lookup("etext", 0);
+	if(s->type == STEXT)
+		putsymb(s->name, 'T', s->value, s->version);
+
+	for(h=0; h<NHASH; h++)
+		for(s=hash[h]; s!=S; s=s->link)
+			switch(s->type) {
+			case SCONST:
+				putsymb(s->name, 'D', s->value, s->version);
+				continue;
+
+			case SDATA:
+				putsymb(s->name, 'D', s->value+INITDAT, s->version);
+				continue;
+
+			case SBSS:
+				putsymb(s->name, 'B', s->value+INITDAT, s->version);
+				continue;
+
+			case SFILE:
+				putsymb(s->name, 'f', s->value, s->version);
+				continue;
+			}
+
+	for(p=textp; p!=P; p=p->pcond) {
+		s = p->from.sym;
+		if(s->type != STEXT)
+			continue;
+
+		/* filenames first */
+		for(a=p->to.autom; a; a=a->link)
+			if(a->type == D_FILE)
+				putsymb(a->asym->name, 'z', a->aoffset, 0);
+			else
+			if(a->type == D_FILE1)
+				putsymb(a->asym->name, 'Z', a->aoffset, 0);
+
+		putsymb(s->name, 'T', s->value, s->version);
+
+		/* frame, auto and param after */
+		putsymb(".frame", 'm', p->to.offset+8, 0);
+
+		for(a=p->to.autom; a; a=a->link)
+			if(a->type == D_AUTO)
+				putsymb(a->asym->name, 'a', -a->aoffset, 0);
+			else
+			if(a->type == D_PARAM)
+				putsymb(a->asym->name, 'p', a->aoffset, 0);
+	}
+	if(debug['v'] || debug['n'])
+		Bprint(&bso, "symsize = %lud\n", symsize);
+	Bflush(&bso);
+}
+
+void
+asmlc(void)
+{
+	vlong oldpc;
+	Prog *p;
+	long oldlc, v, s;
+
+	oldpc = INITTEXT;
+	oldlc = 0;
+	for(p = firstp; p != P; p = p->link) {
+		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
+			if(p->as == ATEXT)
+				curtext = p;
+			if(debug['L'])
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			continue;
+		}
+		if(debug['L'])
+			Bprint(&bso, "\t\t%6ld", lcsize);
+		v = (p->pc - oldpc) / MINLC;
+		while(v) {
+			s = 127;
+			if(v < 127)
+				s = v;
+			cput(s+128);	/* 129-255 +pc */
+			if(debug['L'])
+				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
+			v -= s;
+			lcsize++;
+		}
+		s = p->line - oldlc;
+		oldlc = p->line;
+		oldpc = p->pc + MINLC;
+		if(s > 64 || s < -64) {
+			cput(0);	/* 0 vv +lc */
+			cput(s>>24);
+			cput(s>>16);
+			cput(s>>8);
+			cput(s);
+			if(debug['L']) {
+				if(s > 0)
+					Bprint(&bso, " lc+%ld(%d,%ld)\n",
+						s, 0, s);
+				else
+					Bprint(&bso, " lc%ld(%d,%ld)\n",
+						s, 0, s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+			lcsize += 5;
+			continue;
+		}
+		if(s > 0) {
+			cput(0+s);	/* 1-64 +lc */
+			if(debug['L']) {
+				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+		} else {
+			cput(64-s);	/* 65-128 -lc */
+			if(debug['L']) {
+				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
+				Bprint(&bso, "%6llux %P\n",
+					p->pc, p);
+			}
+		}
+		lcsize++;
+	}
+	while(lcsize & 1) {
+		s = 129;
+		cput(s);
+		lcsize++;
+	}
+	if(debug['v'] || debug['L'])
+		Bprint(&bso, "lcsize = %ld\n", lcsize);
+	Bflush(&bso);
+}
+
+int
+oclass(Adr *a)
+{
+	vlong v;
+	long l;
+
+	if(a->type >= D_INDIR || a->index != D_NONE) {
+		if(a->index != D_NONE && a->scale == 0) {
+			if(a->type == D_ADDR) {
+				switch(a->index) {
+				case D_EXTERN:
+				case D_STATIC:
+					return Yi32;	/* TO DO: Yi64 */
+				case D_AUTO:
+				case D_PARAM:
+					return Yiauto;
+				}
+				return Yxxx;
+			}
+			return Ycol;
+		}
+		return Ym;
+	}
+	switch(a->type)
+	{
+	case D_AL:
+		return Yal;
+
+	case D_AX:
+		return Yax;
+
+/*
+	case D_SPB:
+*/
+	case D_BPB:
+	case D_SIB:
+	case D_DIB:
+	case D_R8B:
+	case D_R9B:
+	case D_R10B:
+	case D_R11B:
+	case D_R12B:
+	case D_R13B:
+	case D_R14B:
+	case D_R15B:
+		if(asmode != 64)
+			return Yxxx;
+	case D_DL:
+	case D_BL:
+	case D_AH:
+	case D_CH:
+	case D_DH:
+	case D_BH:
+		return Yrb;
+
+	case D_CL:
+		return Ycl;
+
+	case D_CX:
+		return Ycx;
+
+	case D_DX:
+	case D_BX:
+		return Yrx;
+
+	case D_R8:	/* not really Yrl */
+	case D_R9:
+	case D_R10:
+	case D_R11:
+	case D_R12:
+	case D_R13:
+	case D_R14:
+	case D_R15:
+		if(asmode != 64)
+			return Yxxx;
+	case D_SP:
+	case D_BP:
+	case D_SI:
+	case D_DI:
+		return Yrl;
+
+	case D_F0+0:
+		return	Yf0;
+
+	case D_F0+1:
+	case D_F0+2:
+	case D_F0+3:
+	case D_F0+4:
+	case D_F0+5:
+	case D_F0+6:
+	case D_F0+7:
+		return	Yrf;
+
+	case D_M0+0:
+	case D_M0+1:
+	case D_M0+2:
+	case D_M0+3:
+	case D_M0+4:
+	case D_M0+5:
+	case D_M0+6:
+	case D_M0+7:
+		return	Ymr;
+
+	case D_X0+0:
+	case D_X0+1:
+	case D_X0+2:
+	case D_X0+3:
+	case D_X0+4:
+	case D_X0+5:
+	case D_X0+6:
+	case D_X0+7:
+	case D_X0+8:
+	case D_X0+9:
+	case D_X0+10:
+	case D_X0+11:
+	case D_X0+12:
+	case D_X0+13:
+	case D_X0+14:
+	case D_X0+15:
+		return	Yxr;
+
+	case D_NONE:
+		return Ynone;
+
+	case D_CS:	return	Ycs;
+	case D_SS:	return	Yss;
+	case D_DS:	return	Yds;
+	case D_ES:	return	Yes;
+	case D_FS:	return	Yfs;
+	case D_GS:	return	Ygs;
+
+	case D_GDTR:	return	Ygdtr;
+	case D_IDTR:	return	Yidtr;
+	case D_LDTR:	return	Yldtr;
+	case D_MSW:	return	Ymsw;
+	case D_TASK:	return	Ytask;
+
+	case D_CR+0:	return	Ycr0;
+	case D_CR+1:	return	Ycr1;
+	case D_CR+2:	return	Ycr2;
+	case D_CR+3:	return	Ycr3;
+	case D_CR+4:	return	Ycr4;
+	case D_CR+5:	return	Ycr5;
+	case D_CR+6:	return	Ycr6;
+	case D_CR+7:	return	Ycr7;
+	case D_CR+8:	return	Ycr8;
+
+	case D_DR+0:	return	Ydr0;
+	case D_DR+1:	return	Ydr1;
+	case D_DR+2:	return	Ydr2;
+	case D_DR+3:	return	Ydr3;
+	case D_DR+4:	return	Ydr4;
+	case D_DR+5:	return	Ydr5;
+	case D_DR+6:	return	Ydr6;
+	case D_DR+7:	return	Ydr7;
+
+	case D_TR+0:	return	Ytr0;
+	case D_TR+1:	return	Ytr1;
+	case D_TR+2:	return	Ytr2;
+	case D_TR+3:	return	Ytr3;
+	case D_TR+4:	return	Ytr4;
+	case D_TR+5:	return	Ytr5;
+	case D_TR+6:	return	Ytr6;
+	case D_TR+7:	return	Ytr7;
+
+	case D_EXTERN:
+	case D_STATIC:
+	case D_AUTO:
+	case D_PARAM:
+		return Ym;
+
+	case D_CONST:
+	case D_ADDR:
+		if(a->sym == S) {
+			v = a->offset;
+			if(v == 0)
+				return Yi0;
+			if(v == 1)
+				return Yi1;
+			if(v >= -128 && v <= 127)
+				return Yi8;
+			l = v;
+			if((vlong)l == v)
+				return Ys32;	/* can sign extend */
+			if((v>>32) == 0)
+				return Yi32;	/* unsigned */
+			return Yi64;
+		}
+		return Yi32;	/* TO DO: D_ADDR as Yi64 */
+
+	case D_BRANCH:
+		return Ybr;
+	}
+	return Yxxx;
+}
+
+void
+asmidx(Adr *a, int base)
+{
+	int i;
+
+	switch(a->index) {
+	default:
+		goto bad;
+
+	case D_NONE:
+		i = 4 << 3;
+		goto bas;
+
+	case D_R8:
+	case D_R9:
+	case D_R10:
+	case D_R11:
+	case D_R12:
+	case D_R13:
+	case D_R14:
+	case D_R15:
+		if(asmode != 64)
+			goto bad;
+	case D_AX:
+	case D_CX:
+	case D_DX:
+	case D_BX:
+	case D_BP:
+	case D_SI:
+	case D_DI:
+		i = reg[a->index] << 3;
+		break;
+	}
+	switch(a->scale) {
+	default:
+		goto bad;
+	case 1:
+		break;
+	case 2:
+		i |= (1<<6);
+		break;
+	case 4:
+		i |= (2<<6);
+		break;
+	case 8:
+		i |= (3<<6);
+		break;
+	}
+bas:
+	switch(base) {
+	default:
+		goto bad;
+	case D_NONE:	/* must be mod=00 */
+		i |= 5;
+		break;
+	case D_R8:
+	case D_R9:
+	case D_R10:
+	case D_R11:
+	case D_R12:
+	case D_R13:
+	case D_R14:
+	case D_R15:
+		if(asmode != 64)
+			goto bad;
+	case D_AX:
+	case D_CX:
+	case D_DX:
+	case D_BX:
+	case D_SP:
+	case D_BP:
+	case D_SI:
+	case D_DI:
+		i |= reg[base];
+		break;
+	}
+	*andptr++ = i;
+	return;
+bad:
+	diag("asmidx: bad address %D", a);
+	*andptr++ = 0;
+	return;
+}
+
+static void
+put4(long v)
+{
+	if(dlm && curp != P && reloca != nil){
+		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
+		reloca = nil;
+	}
+	andptr[0] = v;
+	andptr[1] = v>>8;
+	andptr[2] = v>>16;
+	andptr[3] = v>>24;
+	andptr += 4;
+}
+
+static void
+put8(vlong v)
+{
+	if(dlm && curp != P && reloca != nil){
+		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
+		reloca = nil;
+	}
+	andptr[0] = v;
+	andptr[1] = v>>8;
+	andptr[2] = v>>16;
+	andptr[3] = v>>24;
+	andptr[4] = v>>32;
+	andptr[5] = v>>40;
+	andptr[6] = v>>48;
+	andptr[7] = v>>56;
+	andptr += 8;
+}
+
+vlong
+vaddr(Adr *a)
+{
+	int t;
+	vlong v;
+	Sym *s;
+
+	t = a->type;
+	v = a->offset;
+	if(t == D_ADDR)
+		t = a->index;
+	switch(t) {
+	case D_STATIC:
+	case D_EXTERN:
+		s = a->sym;
+		if(s != nil) {
+			if(dlm && curp != P)
+				reloca = a;
+			switch(s->type) {
+			case SUNDEF:
+				ckoff(s, v);
+			case STEXT:
+			case SCONST:
+				if((uvlong)s->value < (uvlong)INITTEXT)
+					v += INITTEXT;	/* TO DO */
+				v += s->value;
+				break;
+			default:
+				v += INITDAT + s->value;
+			}
+		}
+	}
+	return v;
+}
+
+static void
+asmandsz(Adr *a, int r, int rex, int m64)
+{
+	long v;
+	int t;
+	Adr aa;
+
+	rex &= (0x40 | Rxr);
+	v = a->offset;
+	t = a->type;
+	if(a->index != D_NONE) {
+		if(t >= D_INDIR) {
+			t -= D_INDIR;
+			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
+			if(t == D_NONE) {
+				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
+				asmidx(a, t);
+				put4(v);
+				return;
+			}
+			if(v == 0 && t != D_BP && t != D_R13) {
+				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
+				asmidx(a, t);
+				return;
+			}
+			if(v >= -128 && v < 128) {
+				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
+				asmidx(a, t);
+				*andptr++ = v;
+				return;
+			}
+			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
+			asmidx(a, t);
+			put4(v);
+			return;
+		}
+		switch(t) {
+		default:
+			goto bad;
+		case D_STATIC:
+		case D_EXTERN:
+			aa.type = D_NONE+D_INDIR;
+			break;
+		case D_AUTO:
+		case D_PARAM:
+			aa.type = D_SP+D_INDIR;
+			break;
+		}
+		aa.offset = vaddr(a);
+		aa.index = a->index;
+		aa.scale = a->scale;
+		asmandsz(&aa, r, rex, m64);
+		return;
+	}
+	if(t >= D_AL && t <= D_X0+15) {
+		if(v)
+			goto bad;
+		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
+		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
+		return;
+	}
+	if(t >= D_INDIR) {
+		t -= D_INDIR;
+		rexflag |= (regrex[t] & Rxb) | rex;
+		if(t == D_NONE) {
+			if(asmode != 64){
+				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
+				put4(v);
+				return;
+			}
+			/* temporary */
+			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
+			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
+			put4(v);
+			return;
+		}
+		if(t == D_SP || t == D_R12) {
+			if(v == 0) {
+				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
+				asmidx(a, t);
+				return;
+			}
+			if(v >= -128 && v < 128) {
+				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
+				asmidx(a, t);
+				*andptr++ = v;
+				return;
+			}
+			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
+			asmidx(a, t);
+			put4(v);
+			return;
+		}
+		if(t >= D_AX && t <= D_R15) {
+			if(v == 0 && t != D_BP && t != D_R13) {
+				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
+				return;
+			}
+			if(v >= -128 && v < 128) {
+				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
+				andptr[1] = v;
+				andptr += 2;
+				return;
+			}
+			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
+			put4(v);
+			return;
+		}
+		goto bad;
+	}
+	switch(a->type) {
+	default:
+		goto bad;
+	case D_STATIC:
+	case D_EXTERN:
+		aa.type = D_NONE+D_INDIR;
+		break;
+	case D_AUTO:
+	case D_PARAM:
+		aa.type = D_SP+D_INDIR;
+		break;
+	}
+	aa.index = D_NONE;
+	aa.scale = 1;
+	aa.offset = vaddr(a);
+	asmandsz(&aa, r, rex, m64);
+	return;
+bad:
+	diag("asmand: bad address %D", a);
+	return;
+}
+
+void
+asmand(Adr *a, Adr *ra)
+{
+	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
+}
+
+void
+asmando(Adr *a, int o)
+{
+	asmandsz(a, o, 0, 0);
+}
+
+static void
+bytereg(Adr *a)
+{
+	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
+		a->type = D_AL + (a->type-D_AX);
+}
+
+#define	E	0xff
+Movtab	ymovtab[] =
+{
+/* push */
+	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
+	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
+	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
+	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
+	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
+	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
+	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
+	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
+
+	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
+	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
+	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
+	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
+	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
+	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
+
+/* pop */
+	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
+	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
+	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
+	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
+	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
+	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
+	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
+
+	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
+	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
+	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
+	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
+	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
+
+/* mov seg */
+	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
+	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
+	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
+	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
+	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
+	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
+
+	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
+	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
+	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
+	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
+	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
+	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
+
+/* mov cr */
+	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
+	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
+	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
+	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
+	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
+	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
+	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
+	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
+	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
+	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
+
+	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
+	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
+	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
+	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
+	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
+	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
+	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
+	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
+	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
+	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
+
+/* mov dr */
+	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
+	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
+	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
+	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
+	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
+	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
+
+	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
+	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
+	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
+	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
+	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
+	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
+
+/* mov tr */
+	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
+	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
+
+	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
+	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
+
+/* lgdt, sgdt, lidt, sidt */
+	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
+	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
+	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
+	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
+	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
+	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
+	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
+	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
+
+/* lldt, sldt */
+	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
+	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
+
+/* lmsw, smsw */
+	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
+	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
+
+/* ltr, str */
+	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
+	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
+
+/* load full pointer */
+	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
+	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
+
+/* double shift */
+	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
+	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
+	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
+	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
+	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
+	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
+	0
+};
+
+int
+isax(Adr *a)
+{
+
+	switch(a->type) {
+	case D_AX:
+	case D_AL:
+	case D_AH:
+	case D_INDIR+D_AX:
+		return 1;
+	}
+	if(a->index == D_AX)
+		return 1;
+	return 0;
+}
+
+void
+subreg(Prog *p, int from, int to)
+{
+
+	if(debug['Q'])
+		print("\n%P	s/%R/%R/\n", p, from, to);
+
+	if(p->from.type == from)
+		p->from.type = to;
+	if(p->to.type == from)
+		p->to.type = to;
+
+	if(p->from.index == from)
+		p->from.index = to;
+	if(p->to.index == from)
+		p->to.index = to;
+
+	from += D_INDIR;
+	if(p->from.type == from)
+		p->from.type = to+D_INDIR;
+	if(p->to.type == from)
+		p->to.type = to+D_INDIR;
+
+	if(debug['Q'])
+		print("%P\n", p);
+}
+
+static int
+mediaop(Optab *o, int op, int osize, int z)
+{
+	switch(op){
+	case Pm:
+	case Pe:
+	case Pf2:
+	case Pf3:
+		if(osize != 1){
+			if(op != Pm)
+				*andptr++ = op;
+			*andptr++ = Pm;
+			op = o->op[++z];
+			break;
+		}
+	default:
+		if(andptr == and || andptr[-1] != Pm)
+			*andptr++ = Pm;
+		break;
+	}
+	*andptr++ = op;
+	return z;
+}
+
+void
+doasm(Prog *p)
+{
+	Optab *o;
+	Prog *q, pp;
+	uchar *t;
+	Movtab *mo;
+	int z, op, ft, tt, xo, l;
+	vlong v;
+
+	o = opindex[p->as];
+	if(o == nil) {
+		diag("asmins: missing op %P", p);
+		return;
+	}
+	ft = oclass(&p->from) * Ymax;
+	tt = oclass(&p->to) * Ymax;
+	t = o->ytab;
+	if(t == 0) {
+		diag("asmins: noproto %P", p);
+		return;
+	}
+	xo = o->op[0] == 0x0f;
+	for(z=0; *t; z+=t[3]+xo,t+=4)
+		if(ycover[ft+t[0]])
+		if(ycover[tt+t[1]])
+			goto found;
+	goto domov;
+
+found:
+	switch(o->prefix) {
+	case Pq:	/* 16 bit escape and opcode escape */
+		*andptr++ = Pe;
+		*andptr++ = Pm;
+		break;
+
+	case Pf2:	/* xmm opcode escape */
+	case Pf3:
+		*andptr++ = o->prefix;
+		*andptr++ = Pm;
+		break;
+
+	case Pm:	/* opcode escape */
+		*andptr++ = Pm;
+		break;
+
+	case Pe:	/* 16 bit escape */
+		*andptr++ = Pe;
+		break;
+
+	case Pw:	/* 64-bit escape */
+		if(p->mode != 64)
+			diag("asmins: illegal 64: %P", p);
+		rexflag |= Pw;
+		break;
+
+	case Pb:	/* botch */
+		bytereg(&p->from);
+		bytereg(&p->to);
+		break;
+
+	case P32:	/* 32 bit but illegal if 64-bit mode */
+		if(p->mode == 64)
+			diag("asmins: illegal in 64-bit mode: %P", p);
+		break;
+
+	case Py:	/* 64-bit only, no prefix */
+		if(p->mode != 64)
+			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
+		break;
+	}
+	v = vaddr(&p->from);
+	op = o->op[z];
+	if(op == 0x0f) {
+		*andptr++ = op;
+		op = o->op[++z];
+	}
+	switch(t[2]) {
+	default:
+		diag("asmins: unknown z %d %P", t[2], p);
+		return;
+
+	case Zpseudo:
+		break;
+
+	case Zlit:
+		for(; op = o->op[z]; z++)
+			*andptr++ = op;
+		break;
+
+	case Zmb_r:
+		bytereg(&p->from);
+		/* fall through */
+	case Zm_r:
+		*andptr++ = op;
+		asmand(&p->from, &p->to);
+		break;
+
+	case Zm_r_xm:
+		mediaop(o, op, t[3], z);
+		asmand(&p->from, &p->to);
+		break;
+
+	case Zm_r_xm_nr:
+		rexflag = 0;
+		mediaop(o, op, t[3], z);
+		asmand(&p->from, &p->to);
+		break;
+
+	case Zm_r_i_xm:
+		mediaop(o, op, t[3], z);
+		asmand(&p->from, &p->to);
+		*andptr++ = p->to.offset;
+		break;
+
+	case Zm_r_3d:
+		*andptr++ = 0x0f;
+		*andptr++ = 0x0f;
+		asmand(&p->from, &p->to);
+		*andptr++ = op;
+		break;
+
+	case Zibm_r:
+		*andptr++ = op;
+		asmand(&p->from, &p->to);
+		*andptr++ = p->to.offset;
+		break;
+
+	case Zaut_r:
+		*andptr++ = 0x8d;	/* leal */
+		if(p->from.type != D_ADDR)
+			diag("asmins: Zaut sb type ADDR");
+		p->from.type = p->from.index;
+		p->from.index = D_NONE;
+		asmand(&p->from, &p->to);
+		p->from.index = p->from.type;
+		p->from.type = D_ADDR;
+		break;
+
+	case Zm_o:
+		*andptr++ = op;
+		asmando(&p->from, o->op[z+1]);
+		break;
+
+	case Zr_m:
+		*andptr++ = op;
+		asmand(&p->to, &p->from);
+		break;
+
+	case Zr_m_xm:
+		mediaop(o, op, t[3], z);
+		asmand(&p->to, &p->from);
+		break;
+
+	case Zr_m_xm_nr:
+		rexflag = 0;
+		mediaop(o, op, t[3], z);
+		asmand(&p->to, &p->from);
+		break;
+
+	case Zr_m_i_xm:
+		mediaop(o, op, t[3], z);
+		asmand(&p->to, &p->from);
+		*andptr++ = p->from.offset;
+		break;
+
+	case Zo_m:
+		*andptr++ = op;
+		asmando(&p->to, o->op[z+1]);
+		break;
+
+	case Zo_m64:
+		*andptr++ = op;
+		asmandsz(&p->to, o->op[z+1], 0, 1);
+		break;
+
+	case Zm_ibo:
+		v = vaddr(&p->to);
+		*andptr++ = op;
+		asmando(&p->from, o->op[z+1]);
+		*andptr++ = v;
+		break;
+
+	case Zibo_m:
+		*andptr++ = op;
+		asmando(&p->to, o->op[z+1]);
+		*andptr++ = v;
+		break;
+
+	case Zibo_m_xm:
+		z = mediaop(o, op, t[3], z);
+		asmando(&p->to, o->op[z+1]);
+		*andptr++ = v;
+		break;
+
+	case Z_ib:
+		v = vaddr(&p->to);
+	case Zib_:
+		*andptr++ = op;
+		*andptr++ = v;
+		break;
+
+	case Zib_rp:
+		rexflag |= regrex[p->to.type] & (Rxb|0x40);
+		*andptr++ = op + reg[p->to.type];
+		*andptr++ = v;
+		break;
+
+	case Zil_rp:
+		rexflag |= regrex[p->to.type] & Rxb;
+		*andptr++ = op + reg[p->to.type];
+		if(o->prefix == Pe) {
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		else
+			put4(v);
+		break;
+
+	case Zo_iw:
+		*andptr++ = op;
+		if(p->from.type != D_NONE){
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		break;
+
+	case Ziq_rp:
+		l = v>>32;
+		if(l == 0){
+			//p->mark |= 0100;
+			//print("zero: %llux %P\n", v, p);
+			rexflag &= ~(0x40|Rxw);
+			rexflag |= regrex[p->to.type] & Rxb;
+			*andptr++ = 0xb8 + reg[p->to.type];
+			put4(v);
+		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
+			//p->mark |= 0100;
+			//print("sign: %llux %P\n", v, p);
+			*andptr ++ = 0xc7;
+			asmando(&p->to, 0);
+			put4(v);
+		}else{	/* need all 8 */
+			//print("all: %llux %P\n", v, p);
+			rexflag |= regrex[p->to.type] & Rxb;
+			*andptr++ = op + reg[p->to.type];
+			put8(v);
+		}
+		break;
+
+	case Zib_rr:
+		*andptr++ = op;
+		asmand(&p->to, &p->to);
+		*andptr++ = v;
+		break;
+
+	case Z_il:
+		v = vaddr(&p->to);
+	case Zil_:
+		*andptr++ = op;
+		if(o->prefix == Pe) {
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		else
+			put4(v);
+		break;
+
+	case Zm_ilo:
+		v = vaddr(&p->to);
+		*andptr++ = op;
+		asmando(&p->from, o->op[z+1]);
+		if(o->prefix == Pe) {
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		else
+			put4(v);
+		break;
+
+	case Zilo_m:
+		*andptr++ = op;
+		asmando(&p->to, o->op[z+1]);
+		if(o->prefix == Pe) {
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		else
+			put4(v);
+		break;
+
+	case Zil_rr:
+		*andptr++ = op;
+		asmand(&p->to, &p->to);
+		if(o->prefix == Pe) {
+			*andptr++ = v;
+			*andptr++ = v>>8;
+		}
+		else
+			put4(v);
+		break;
+
+	case Z_rp:
+		rexflag |= regrex[p->to.type] & (Rxb|0x40);
+		*andptr++ = op + reg[p->to.type];
+		break;
+
+	case Zrp_:
+		rexflag |= regrex[p->from.type] & (Rxb|0x40);
+		*andptr++ = op + reg[p->from.type];
+		break;
+
+	case Zclr:
+		*andptr++ = op;
+		asmand(&p->to, &p->to);
+		break;
+
+	case Zbr:
+		q = p->pcond;
+		if(q) {
+			v = q->pc - p->pc - 2;
+			if(v >= -128 && v <= 127) {
+				*andptr++ = op;
+				*andptr++ = v;
+			} else {
+				v -= 6-2;
+				*andptr++ = 0x0f;
+				*andptr++ = o->op[z+1];
+				*andptr++ = v;
+				*andptr++ = v>>8;
+				*andptr++ = v>>16;
+				*andptr++ = v>>24;
+			}
+		}
+		break;
+
+	case Zcall:
+		q = p->pcond;
+		if(q) {
+			v = q->pc - p->pc - 5;
+			if(dlm && curp != P && p->to.sym->type == SUNDEF){
+				/* v = 0 - p->pc - 5; */
+				v = 0;
+				ckoff(p->to.sym, v);
+				v += p->to.sym->value;
+				dynreloc(p->to.sym, p->pc+1, 0);
+			}
+			*andptr++ = op;
+			*andptr++ = v;
+			*andptr++ = v>>8;
+			*andptr++ = v>>16;
+			*andptr++ = v>>24;
+		}
+		break;
+
+	case Zjmp:
+		q = p->pcond;
+		if(q) {
+			v = q->pc - p->pc - 2;
+			if(v >= -128 && v <= 127) {
+				*andptr++ = op;
+				*andptr++ = v;
+			} else {
+				v -= 5-2;
+				*andptr++ = o->op[z+1];
+				*andptr++ = v;
+				*andptr++ = v>>8;
+				*andptr++ = v>>16;
+				*andptr++ = v>>24;
+			}
+		}
+		break;
+
+	case Zloop:
+		q = p->pcond;
+		if(q) {
+			v = q->pc - p->pc - 2;
+			if(v < -128 && v > 127)
+				diag("loop too far: %P", p);
+			*andptr++ = op;
+			*andptr++ = v;
+		}
+		break;
+
+	case Zbyte:
+		*andptr++ = v;
+		if(op > 1) {
+			*andptr++ = v>>8;
+			if(op > 2) {
+				*andptr++ = v>>16;
+				*andptr++ = v>>24;
+				if(op > 4) {
+					*andptr++ = v>>32;
+					*andptr++ = v>>40;
+					*andptr++ = v>>48;
+					*andptr++ = v>>56;
+				}
+			}
+		}
+		break;
+	}
+	return;
+
+domov:
+	for(mo=ymovtab; mo->as; mo++)
+		if(p->as == mo->as)
+		if(ycover[ft+mo->ft])
+		if(ycover[tt+mo->tt]){
+			t = mo->op;
+			goto mfound;
+		}
+bad:
+	if(p->mode != 64){
+		/*
+		 * here, the assembly has failed.
+		 * if its a byte instruction that has
+		 * unaddressable registers, try to
+		 * exchange registers and reissue the
+		 * instruction with the operands renamed.
+		 */
+		pp = *p;
+		z = p->from.type;
+		if(z >= D_BP && z <= D_DI) {
+			if(isax(&p->to)) {
+				*andptr++ = 0x87;			/* xchg lhs,bx */
+				asmando(&p->from, reg[D_BX]);
+				subreg(&pp, z, D_BX);
+				doasm(&pp);
+				*andptr++ = 0x87;			/* xchg lhs,bx */
+				asmando(&p->from, reg[D_BX]);
+			} else {
+				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
+				subreg(&pp, z, D_AX);
+				doasm(&pp);
+				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
+			}
+			return;
+		}
+		z = p->to.type;
+		if(z >= D_BP && z <= D_DI) {
+			if(isax(&p->from)) {
+				*andptr++ = 0x87;			/* xchg rhs,bx */
+				asmando(&p->to, reg[D_BX]);
+				subreg(&pp, z, D_BX);
+				doasm(&pp);
+				*andptr++ = 0x87;			/* xchg rhs,bx */
+				asmando(&p->to, reg[D_BX]);
+			} else {
+				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
+				subreg(&pp, z, D_AX);
+				doasm(&pp);
+				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
+			}
+			return;
+		}
+	}
+	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
+	return;
+
+mfound:
+	switch(mo->code) {
+	default:
+		diag("asmins: unknown mov %d %P", mo->code, p);
+		break;
+
+	case 0:	/* lit */
+		for(z=0; t[z]!=E; z++)
+			*andptr++ = t[z];
+		break;
+
+	case 1:	/* r,m */
+		*andptr++ = t[0];
+		asmando(&p->to, t[1]);
+		break;
+
+	case 2:	/* m,r */
+		*andptr++ = t[0];
+		asmando(&p->from, t[1]);
+		break;
+
+	case 3:	/* r,m - 2op */
+		*andptr++ = t[0];
+		*andptr++ = t[1];
+		asmando(&p->to, t[2]);
+		rexflag |= regrex[p->from.type] & (Rxr|0x40);
+		break;
+
+	case 4:	/* m,r - 2op */
+		*andptr++ = t[0];
+		*andptr++ = t[1];
+		asmando(&p->from, t[2]);
+		rexflag |= regrex[p->to.type] & (Rxr|0x40);
+		break;
+
+	case 5:	/* load full pointer, trash heap */
+		if(t[0])
+			*andptr++ = t[0];
+		switch(p->to.index) {
+		default:
+			goto bad;
+		case D_DS:
+			*andptr++ = 0xc5;
+			break;
+		case D_SS:
+			*andptr++ = 0x0f;
+			*andptr++ = 0xb2;
+			break;
+		case D_ES:
+			*andptr++ = 0xc4;
+			break;
+		case D_FS:
+			*andptr++ = 0x0f;
+			*andptr++ = 0xb4;
+			break;
+		case D_GS:
+			*andptr++ = 0x0f;
+			*andptr++ = 0xb5;
+			break;
+		}
+		asmand(&p->from, &p->to);
+		break;
+
+	case 6:	/* double shift */
+		if(t[0] == Pw){
+			if(p->mode != 64)
+				diag("asmins: illegal 64: %P", p);
+			rexflag |= Pw;
+			t++;
+		}else if(t[0] == Pe){
+			*andptr++ = Pe;
+			t++;
+		}
+		z = p->from.type;
+		switch(z) {
+		default:
+			goto bad;
+		case D_CONST:
+			*andptr++ = 0x0f;
+			*andptr++ = t[0];
+			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
+			*andptr++ = p->from.offset;
+			break;
+		case D_CL:
+		case D_CX:
+			*andptr++ = 0x0f;
+			*andptr++ = t[1];
+			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
+			break;
+		}
+		break;
+	}
+}
+
+void
+asmins(Prog *p)
+{
+	int n, np, c;
+
+	rexflag = 0;
+	andptr = and;
+	asmode = p->mode;
+	doasm(p);
+	if(rexflag){
+		/*
+		 * as befits the whole approach of the architecture,
+		 * the rex prefix must appear before the first opcode byte
+		 * (and thus after any 66/67/f2/f3 prefix bytes, but
+		 * before the 0f opcode escape!), or it might be ignored.
+		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
+		 */
+		if(p->mode != 64)
+			diag("asmins: illegal in mode %d: %P", p->mode, p);
+		n = andptr - and;
+		for(np = 0; np < n; np++) {
+			c = and[np];
+			if(c != 0x66 && c != 0xf2 && c != 0xf3 && c != 0x67)
+				break;
+		}
+		memmove(and+np+1, and+np, n-np);
+		and[np] = 0x40 | rexflag;
+		andptr++;
+	}
+}
+
+enum{
+	ABSD = 0,
+	ABSU = 1,
+	RELD = 2,
+	RELU = 3,
+};
+
+int modemap[4] = { 0, 1, -1, 2, };
+
+typedef struct Reloc Reloc;
+
+struct Reloc
+{
+	int n;
+	int t;
+	uchar *m;
+	ulong *a;
+};
+
+Reloc rels;
+
+static void
+grow(Reloc *r)
+{
+	int t;
+	uchar *m, *nm;
+	ulong *a, *na;
+
+	t = r->t;
+	r->t += 64;
+	m = r->m;
+	a = r->a;
+	r->m = nm = malloc(r->t*sizeof(uchar));
+	r->a = na = malloc(r->t*sizeof(ulong));
+	memmove(nm, m, t*sizeof(uchar));
+	memmove(na, a, t*sizeof(ulong));
+	free(m);
+	free(a);
+}
+
+void
+dynreloc(Sym *s, ulong v, int abs)
+{
+	int i, k, n;
+	uchar *m;
+	ulong *a;
+	Reloc *r;
+
+	if(s->type == SUNDEF)
+		k = abs ? ABSU : RELU;
+	else
+		k = abs ? ABSD : RELD;
+	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
+	k = modemap[k];
+	r = &rels;
+	n = r->n;
+	if(n >= r->t)
+		grow(r);
+	m = r->m;
+	a = r->a;
+	for(i = n; i > 0; i--){
+		if(v < a[i-1]){	/* happens occasionally for data */
+			m[i] = m[i-1];
+			a[i] = a[i-1];
+		}
+		else
+			break;
+	}
+	m[i] = k;
+	a[i] = v;
+	r->n++;
+}
+
+static int
+sput(char *s)
+{
+	char *p;
+
+	p = s;
+	while(*s)
+		cput(*s++);
+	cput(0);
+	return s-p+1;
+}
+
+void
+asmdyn()
+{
+	int i, n, t, c;
+	Sym *s;
+	ulong la, ra, *a;
+	vlong off;
+	uchar *m;
+	Reloc *r;
+
+	cflush();
+	off = seek(cout, 0, 1);
+	lput(0);
+	t = 0;
+	lput(imports);
+	t += 4;
+	for(i = 0; i < NHASH; i++)
+		for(s = hash[i]; s != S; s = s->link)
+			if(s->type == SUNDEF){
+				lput(s->sig);
+				t += 4;
+				t += sput(s->name);
+			}
+
+	la = 0;
+	r = &rels;
+	n = r->n;
+	m = r->m;
+	a = r->a;
+	lput(n);
+	t += 4;
+	for(i = 0; i < n; i++){
+		ra = *a-la;
+		if(*a < la)
+			diag("bad relocation order");
+		if(ra < 256)
+			c = 0;
+		else if(ra < 65536)
+			c = 1;
+		else
+			c = 2;
+		cput((c<<6)|*m++);
+		t++;
+		if(c == 0){
+			cput(ra);
+			t++;
+		}
+		else if(c == 1){
+			wput(ra);
+			t += 2;
+		}
+		else{
+			lput(ra);
+			t += 4;
+		}
+		la = *a++;
+	}
+
+	cflush();
+	seek(cout, off, 0);
+	lput(t);
+
+	if(debug['v']){
+		Bprint(&bso, "import table entries = %d\n", imports);
+		Bprint(&bso, "export table entries = %d\n", exports);
+	}
+}

+ 4 - 4
sys/src/libmach/6.c

@@ -105,10 +105,10 @@ Mach mamd64=
 	0,			/* link register */
 	"setSB",		/* static base register name (bogus anyways) */
 	0,			/* static base register value */
-	0x1000,			/* page size */
-	0xFFFFFFFF80110000ULL,	/* kernel base */
-	0xFFFF800000000000ULL,	/* kernel text mask */
-	0x00007FFFFFFFF000ULL,	/* user stack top */
+	0x200000,		/* page size */
+	0xfffffffff0110000ull,	/* kernel base */
+	0xffff800000000000ull,	/* kernel text mask */
+	0x00007ffffffff000ull,	/* user stack top */
 	1,			/* quantization of pc */
 	8,			/* szaddr */
 	4,			/* szreg */

+ 4 - 3
sys/src/libmach/executable.c

@@ -52,7 +52,7 @@ typedef struct Exectable{
 	uchar	_magic;			/* _MAGIC() magic */
 	Mach	*mach;			/* Per-machine data */
 	long	hsize;			/* header size */
-	ulong	(*swal)(ulong);		/* header swap, beswal or leswal */
+	ulong	(*swal)(ulong);		/* beswal or leswal */
 	int	(*hparse)(int, Fhdr*, ExecHdr*);
 } ExecTable;
 
@@ -186,7 +186,7 @@ ExecTable exectab[] =
 		1,
 		&mamd64,
 		sizeof(Exec)+8,
-		beswal,
+		nil,
 		commonllp64 },
 	{ Q_MAGIC,			/* PowerPC q.out & boot image */
 		"power plan 9 executable",
@@ -204,7 +204,7 @@ ExecTable exectab[] =
 		1,
 		&mpower64,
 		sizeof(Exec)+8,
-		beswal,
+		nil,
 		commonllp64 },
 	{ ELF_MAG,			/* any elf32 */
 		"elf executable",
@@ -441,6 +441,7 @@ commonllp64(int, Fhdr *fp, ExecHdr *hp)
 	long pgsize;
 	uvlong entry;
 
+	hswal(&hp->e, sizeof(Exec)/sizeof(long), beswal);
 	if(!(hp->e.magic & HDR_MAGIC))
 		return 0;