Browse Source

Plan 9 from Bell Labs 2009-07-08

David du Colombier 10 years ago
parent
commit
d3428530fa
57 changed files with 2689 additions and 1608 deletions
  1. 94 40
      lib/vgadb
  2. 6 2
      power/include/ape/stdarg.h
  3. 4 4
      power/include/u.h
  4. 3 3
      rc/bin/psu
  5. 113 0
      sys/man/2/atom
  6. 32 1
      sys/src/9/pc/mp.c
  7. 2 0
      sys/src/9/pc/pcf
  8. 131 13
      sys/src/ape/lib/ap/power/vlop.s
  9. 8 472
      sys/src/ape/lib/ap/power/vlrt.c
  10. 2 2
      sys/src/cmd/aux/vga/vesa.c
  11. 11 9
      sys/src/cmd/aux/vga/vesadb.c
  12. 3 2
      sys/src/cmd/cc/cc.h
  13. 2 0
      sys/src/cmd/cc/com.c
  14. 5 1
      sys/src/cmd/cc/com64.c
  15. 6 0
      sys/src/cmd/cc/dcl.c
  16. 31 3
      sys/src/cmd/cc/pgen.c
  17. 0 94
      sys/src/cmd/cc/y.tab.h
  18. 1 1
      sys/src/cmd/postscript/text2post/mkfile
  19. 3 3
      sys/src/cmd/postscript/text2post/text2post.c
  20. 1 1
      sys/src/cmd/postscript/tr2post/tr2post.c
  21. 27 3
      sys/src/cmd/qa/a.y
  22. 85 2
      sys/src/cmd/qa/lex.c
  23. 0 126
      sys/src/cmd/qc/bits.c
  24. 442 76
      sys/src/cmd/qc/cgen.c
  25. 68 12
      sys/src/cmd/qc/enam.c
  26. 8 0
      sys/src/cmd/qc/gc.h
  27. 26 12
      sys/src/cmd/qc/list.c
  28. 98 0
      sys/src/cmd/qc/machcap.c
  29. 1 0
      sys/src/cmd/qc/mkfile
  30. 78 4
      sys/src/cmd/qc/peep.c
  31. 74 12
      sys/src/cmd/qc/q.out.h
  32. 0 1
      sys/src/cmd/qc/reg.c
  33. 12 3
      sys/src/cmd/qc/sgen.c
  34. 88 28
      sys/src/cmd/qc/swt.c
  35. 511 43
      sys/src/cmd/qc/txt.c
  36. 46 7
      sys/src/cmd/qi/float.c
  37. 18 16
      sys/src/cmd/qi/iu.c
  38. 2 19
      sys/src/cmd/qi/power.h
  39. 9 13
      sys/src/cmd/qi/qi.c
  40. 4 1
      sys/src/cmd/qi/stats.c
  41. 101 3
      sys/src/cmd/ql/asm.c
  42. 100 8
      sys/src/cmd/ql/asmout.c
  43. 1 0
      sys/src/cmd/ql/l.h
  44. 4 1
      sys/src/cmd/ql/list.c
  45. 25 14
      sys/src/cmd/ql/obj.c
  46. 13 0
      sys/src/cmd/ql/optab.c
  47. 134 9
      sys/src/cmd/ql/span.c
  48. 1 1
      sys/src/games/mp3enc/mkfile
  49. 6 14
      sys/src/games/music/mkfile
  50. 65 0
      sys/src/libc/power/atom.s
  51. 10 10
      sys/src/libc/power/memmove.s
  52. 2 2
      sys/src/libc/power/tas.s
  53. 131 13
      sys/src/libc/power/vlop.s
  54. 8 476
      sys/src/libc/power/vlrt.c
  55. 21 19
      sys/src/libmach/qdb.c
  56. 12 6
      sys/src/libmach/qobj.c
  57. 0 3
      sys/src/libstdio/mkfile

+ 94 - 40
lib/vgadb

@@ -139,7 +139,7 @@ ctlr
 	# vid=0x5333 did=0x8A21		# Savage 3DMV, not supported
 	vid=0x5333 did=0x8A22		# Savage 4
 	vid=0x5333 did=0x8A25		# ProSavage PN133
-	vid=0x5333 did=0x8A26		# ProSavage KN133 
+	vid=0x5333 did=0x8A26		# ProSavage KN133
 	vid=0x5333 did=0x883D		# ViRGE VX
 	vid=0x5333 did=0x8C01		# ViRGE MX
 	vid=0x5333 did=0x8C03		# ViRGE MXP
@@ -202,7 +202,7 @@ ctlr							# CL-GD542x
 	hwgc=clgd542xhwgc
 ctlr
 	0xC0039="CL-GD5436/46 PCI VGA BIOS Version"
-	0xC0039="CL-GD5446 PCI VGA BIOS Version"	
+	0xC0039="CL-GD5446 PCI VGA BIOS Version"
 	vid=0x1013 did=0x00b8		# CL-GD5446, at least in QEMU
 	link=vga
 	ctlr=clgd542x linear=1
@@ -291,7 +291,7 @@ ctlr
 	0xC0093="Trident TGUI96xx"
 	0xC0044="GL A6.00E"
 	0xC68A5="TVGA BIOS LS  6.0 (08)"		# Sharp Actius A250, Cyber 9525/DVD
-	0xC7E5F="TVGA BIOS 1.14"				# iTuner 
+	0xC7E5F="TVGA BIOS 1.14"				# iTuner
 	link=vga
 	ctlr=cyber938x linear=1
 	hwgc=cyber938xhwgc
@@ -427,7 +427,7 @@ include=640x480					# 60Hz, 31.5KHz
 include=640x480@72Hz					# 72Hz, 38.5KHz
 	clock=32
 	shb=664 ehb=704 ht=832
-	vrs=489 vre=492 vt=520 
+	vrs=489 vre=492 vt=520
 
 include=800x600					# 60Hz, 37.9KHz
 	defaultclock=40
@@ -602,7 +602,7 @@ vs17x=1024x768					# ??Hz, ??.?KHz
 
 #
 # Dell UltraScan 21TE (MODEL No. D2130T-HS)
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 30-93KHz
 # Vertical timing:
 #	Allowable frequency range: 50-152Hz
@@ -635,7 +635,7 @@ dell2001fp=1600x1200
 # Dell 2007FP at 1600x1200
 #
 # Horz=30-81kHz
-# Vert=56-76Hz, 1600x1200 at 60Hz only 
+# Vert=56-76Hz, 1600x1200 at 60Hz only
 #
 dell2007fp
 	videobw=200					# actually 162
@@ -681,7 +681,7 @@ cm751u=1600x1200					# 75Hz, 93.75 kHz
 
 #
 # Hitachi CM801U
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 31-96KHz
 # Vertical timing:
 #	Allowable frequency range: 50-160Hz
@@ -707,7 +707,7 @@ cm801u=1376x1024						# ??Hz, ??.?KHz
 #
 # We can't seem to set the clock higher than 100MHz here.
 # Maybe it's because we don't have clock doubling code in aux/vga,
-# maybe it's because the chip won't go that high.  
+# maybe it's because the chip won't go that high.
 # Using a clock of 75 produces noticeable refresh pulsing on the LCD,
 # a clock of 100 seems okay.  I'd like to go higher.
 # If only we had documentation.  -rsc
@@ -748,7 +748,7 @@ ms8617
 
 #
 # IDEK Vision Master 21 (model no. MF-8221E)
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 24.8-94.0KHz
 # Vertical timing:
 #	Allowable frequency range: 50-160Hz
@@ -804,14 +804,14 @@ pro400=1600x1200					# 76Hz, 90.0KHz
 	vrs=1202 vre=1208 vt=1240
 pro400=1280x1024
 	defaultclock=135				# 90hz, 103Khz
-	shb=1352 ehb=1544 ht=1712			# 
+	shb=1352 ehb=1544 ht=1712			#
 	shs=1328
 	vrs=1028 vre=1034 vt=1075
 pro400=1024x768
 
 #
 # Micron 17FGx
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 30-64KHz
 # Vertical timing:
 #	Allowable frequency range: 50-100Hz
@@ -941,7 +941,7 @@ versalx=1024x768
 	shb=664 ehb=760 ht=800
 	vrs=491 vre=493 vt=525
 
-# 
+#
 # Panasonic E70i 17" monitor
 # from aam396@mail.usask.ca
 #
@@ -956,7 +956,7 @@ e70i=1280x1024
 #
 #
 # Sampo KDM-1788
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 30-82KHz
 # Vertical timing:
 #	Allowable frequency range: 50-120Hz
@@ -969,7 +969,7 @@ kdm-1788
 
 #
 # Samsung SyncMaster 17GLsi
-# Horizontal timing: 
+# Horizontal timing:
 #	Allowable frequency range: 30-85KHz
 # Vertical timing:
 #	Allowable frequency range: 50-120Hz
@@ -1132,7 +1132,7 @@ multilink=1600x1024
 multilinx=1600x1024
 	clock=103.125
 	shb=1592 ehb=1624 ht=1672
-	shs=1592 
+	shs=1592
 	vrs=1024 vre=1029 vt=1029
 	hsync=+ vsync=+
 #
@@ -1255,49 +1255,49 @@ planar=1280x1024
 #
 # Dell 2405FPW LCD
 #
-2405fpw=640x480				# 60Hz 
+2405fpw=640x480				# 60Hz
 	clock=25.175
 	shb=648 ehb=792 ht=800
 	vrs=490 vre=492 vt=525
-	hsync=- vsync=- 
-2405fpw=640x480				# 75Hz 
-	clock=31.5
-	shb=640 ehb=840 ht=840
-	vrs=481 vre=484 vt=500
-	hsync=- vsync=- 
-2405fpw=800x600				# 60Hz 
+	hsync=- vsync=-
+#2405fpw=640x480				# 75Hz
+#	clock=31.5
+#	shb=640 ehb=840 ht=840
+#	vrs=481 vre=484 vt=500
+#	hsync=- vsync=-
+2405fpw=800x600				# 60Hz
 	clock=40
 	shb=800 ehb=1056 ht=1056
 	vrs=601 vre=605 vt=628
-	hsync=+ vsync=+ 
-2405fpw=800x600				# 75Hz 
-	clock=49.5
-	shb=800 ehb=1056 ht=1056
-	vrs=601 vre=604 vt=625
-	hsync=+ vsync=+ 
+	hsync=+ vsync=+
+#2405fpw=800x600				# 75Hz
+#	clock=49.5
+#	shb=800 ehb=1056 ht=1056
+#	vrs=601 vre=604 vt=625
+#	hsync=+ vsync=+
 2405fpw=1024x768			# 60Hz
 	clock=65
 	shb=1024 ehb=1344 ht=1344
 	vrs=771 vre=777 vt=806
-	hsync=- vsync=- 
-2405fpw=1024x768			# 75Hz
-	clock=78.75
-	shb=1024 ehb=1312 ht=1312
-	vrs=769 vre=772 vt=800
-	hsync=+ vsync=+ 
+	hsync=- vsync=-
+#2405fpw=1024x768			# 75Hz
+#	clock=78.75
+#	shb=1024 ehb=1312 ht=1312
+#	vrs=769 vre=772 vt=800
+#	hsync=+ vsync=+
 2405fpw=1280x1024			# 75Hz
 	clock=135
 	shb=1280 ehb=1688 ht=1688
 	vrs=1025 vre=1028 vt=1066
-	hsync=+ vsync=+ 
+	hsync=+ vsync=+
 2405fpw=1920x1200			# 60Hz
 	clock=154
 	shb=1968 ehb=2000 ht=2080
 	vrs=1203 vre=1209 vt=1235
-	hsync=+ vsync=- 
+	hsync=+ vsync=-
 
 #
-# Viewsonic VP201(b) at 1600x1200 
+# Viewsonic VP201(b) at 1600x1200
 #
 vp201
 	videobw=200
@@ -1355,6 +1355,60 @@ e198wfp=1440x900	# 60 Hz
 	vrs=903 vre=909 vt=934
 	hsync=- vsync=+
 
+#
+# Dell E228WFP LCD monitor (1680x1050 native)
+#
+e228wfp
+	videobw=150
+
+e228wfp=640x480		# 60Hz
+	clock=25.175
+	shb=648 ehb=792 ht=800
+	vrs=490 vre=492 vt=525
+	hsync=- vsync=-
+
+#e228wfp=640x480		# 75Hz
+#	clock=31.5
+#	shb=640 ehb=840 ht=840
+#	vrs=481 vre=484 vt=500
+#	hsync=- vsync=-
+
+e228wfp=800x600		# 60Hz
+	clock=40
+	shb=800 ehb=1056 ht=1056
+	vrs=601 vre=605 vt=628
+	hsync=+ vsync=+
+
+#e228wfp=800x600		# 75Hz
+#	clock=49.5
+#	shb=800 ehb=1056 ht=1056
+#	vrs=601 vre=604 vt=625
+#	hsync=+ vsync=+
+
+e228wfp=1024x768	# 60Hz
+	clock=65
+	shb=1024 ehb=1344 ht=1344
+	vrs=771 vre=777 vt=806
+	hsync=- vsync=-
+
+#e228wfp=1024x768	# 75Hz
+#	clock=78.75
+#	shb=1024 ehb=1312 ht=1312
+#	vrs=769 vre=772 vt=800
+#	hsync=+ vsync=+
+
+e228wfp=1280x1024	# 75Hz
+	clock=135
+	shb=1280 ehb=1688 ht=1688
+	vrs=1025 vre=1028 vt=1066
+	hsync=+ vsync=+
+
+e228wfp=1680x1050	# 60Hz
+	clock=146.25
+	shb=1784 ehb=1960 ht=2240
+	vrs=1053 vre=1059 vt=1089
+	hsync=- vsync=+
+
 #
 # HannsG JC199D LCD monitor (1280x1024 native)
 #
@@ -1389,8 +1443,8 @@ jc199d=1280x1024		#	60 Hz
 # UXGA		1600x1200	Ultra eXtended Graphics Array
 # WSXGA+	1680x1050	Wide SXGA+
 # WUXGA		1920x1200	Wide UXGA
-# QXGA		2048x1536	Quad XGA 
-# QSXGA		2560x2048	Quad SXGA 
+# QXGA		2048x1536	Quad XGA
+# QSXGA		2560x2048	Quad SXGA
 # QUXGA		3200x2400	Quad UXGA
 # QUXGA-W	3840x2400	Wide-QUXGA
 #

+ 6 - 2
power/include/ape/stdarg.h

@@ -5,7 +5,11 @@ typedef char *va_list;
 
 #define va_start(list, start) list = (char *)(&(start)+1)
 #define va_end(list)
-#define va_arg(list, mode) (sizeof(mode)==1 ? ((mode *) (list += 4))[-4] : \
-sizeof(mode)==2 ? ((mode *) (list += 4))[-2] : ((mode *) (list += sizeof(mode)))[-1])
+#define va_arg(list, mode)\
+	((sizeof(mode) <= 4)?\
+		((list += 4), (mode*)list)[-1]:\
+	(signof(mode) != signof(double))?\
+		((list += sizeof(mode)), (mode*)list)[-1]:\
+		((list = (char*)((unsigned long)(list+7) & ~7) + sizeof(mode)), (mode*)list)[-1])
 
 #endif /* __STDARG */

+ 4 - 4
power/include/u.h

@@ -78,8 +78,8 @@ typedef	char*	va_list;
 #define va_end(list)\
 	USED(list)
 #define va_arg(list, mode)\
-	((sizeof(mode) == 1)?\
+	((sizeof(mode) <= 4)?\
 		((list += 4), (mode*)list)[-1]:\
-	(sizeof(mode) == 2)?\
-		((list += 4), (mode*)list)[-1]:\
-		((list += sizeof(mode)), (mode*)list)[-1])
+	(signof(mode) != signof(double))?\
+		((list += sizeof(mode)), (mode*)list)[-1]:\
+		((list = (char*)((uintptr)(list+7) & ~7) + sizeof(mode)), (mode*)list)[-1])

+ 3 - 3
rc/bin/psu

@@ -1,8 +1,8 @@
 #!/bin/rc
-
+# psu - ps for just one user
+rfork e
 flags=()
-switch($1){
-case -*
+while (! ~ $#* 0 && ~ $1 -*) {
 	flags = ($flags $1)
 	shift
 }

+ 113 - 0
sys/man/2/atom

@@ -0,0 +1,113 @@
+.TH ATOM 2
+.SH NAME
+ainc, adec, cas, cas32, cas64, casp, casl, loadlink, storecond, tas \- atomic RMW operations
+.SH SYNOPSIS
+.B #include <u.h>
+.br
+.B #include <libc.h>
+.PP
+.B
+long ainc(long *addr);
+.PP
+.B
+long adec(long *addr);
+.PP
+.B
+int cas32(u32int *addr, u32int ov, u32int nv);
+.PP
+.B
+int cas64(u64int *addr, u64int ov, u64int nv);
+.PP
+.B
+int cas(int *addr, int ov, int nv);
+.PP
+.B
+int casp(void **addr, void *ov, void *nv);
+.PP
+.B
+int casl(ulong *addr, ulong ov, ulong nv);
+.PP
+.B
+int tas(ulong *addr);
+.PP
+.B
+ulong loadlink(ulong*);
+.PP
+.B
+int storecond(ulong*, ulong);
+.SH DESCRIPTION
+.I Ainc
+atomically increments the value pointed to by
+.I addr
+and returns the new value.
+.PP
+.I Adec
+atomically decrements the value pointed to by
+.I addr
+and returns the new value.
+.PP
+.IR Cas ,
+.IR cas32 ,
+.IR cas64 ,
+.IR casp ,
+and
+.I casl
+implement
+.I Compare-and-Swap
+on, respectively,
+.IR int ,
+.IR u32int ,
+.IR u64int ,
+.IR void* ,
+and
+.IR ulong
+values.  The availability of these functions depends on the
+\s-2CPU\s0 architecture:  Pentium III and later, as well as AMD64
+have 64-bit CAS instructions.  Other architectures don't.
+ARM-5 processors and earlier do not have CAS (nor have they
+.I Load-Linked
+or
+.I Store-Conditional ).
+These instructions are, however, emulated by the Plan 9 kernel.
+All other architectures have 32-bit CAS.
+.PP
+.I Tas
+implements
+.IR Test-and-Set ,
+which is available on all architectures and used for the implementation
+of kernel locks
+(see
+.IR lock (2)
+and
+.IR thread (2)).
+.PP
+.I Loadlink
+and
+.I Storecond
+access the
+.I load-linked
+and
+.I store-conditional
+instructions present on MIPS (LL/SC), ARM (Strex/Ldrex), PowerPC (LWAR/STWCCC), Alpha (MOVLL, MOVLC).
+These are not present on Pentium or AMD64.
+.PP
+On the architectures that have
+.I load-linked
+and
+.IR store-conditional ,
+these are used to implement
+.IR compare-and-swap .
+.SH SOURCE
+.B /sys/src/libc/*/atom.s
+.br
+.B /sys/src/libc/*/tas.s
+.SH SEE ALSO
+.IR semacquire (2),
+.IR lock (2),
+.IR thread (2)
+.SH DIAGNOSTICS
+The CAS functions,
+.IR tas ,
+and
+.I storecond
+return 0 for failure and 1 for success.

+ 32 - 1
sys/src/9/pc/mp.c

@@ -9,6 +9,7 @@
 #include "mp.h"
 #include "apbootstrap.h"
 
+static PCMP* mppcmp;
 static Bus* mpbus;
 static Bus* mpbuslast;
 static int mpisabus = -1;
@@ -163,6 +164,7 @@ mkiointr(PCMPintr* p)
 {
 	Bus *bus;
 	Aintr *aintr;
+	PCMPintr* pcmpintr;
 
 	/*
 	 * According to the MultiProcessor Specification, a destination
@@ -177,6 +179,28 @@ mkiointr(PCMPintr* p)
 
 	aintr = xalloc(sizeof(Aintr));
 	aintr->intr = p;
+
+	if(0)
+		print("iointr: type %d intr type %d flags %#o "
+			"bus %d irq %d apicno %d intin %d\n",
+			p->type, p->intr, p->flags,
+			p->busno, p->irq, p->apicno, p->intin);
+	/*
+	 * Hack for Intel SR1520ML motherboard, which BIOS describes
+	 * the i82575 dual ethernet controllers incorrectly.
+	 */
+	if(memcmp(mppcmp->product, "INTEL   X38MLST     ", 20) == 0){
+		if(p->busno == 1 && p->intin == 16 && p->irq == 1){
+			pcmpintr = malloc(sizeof(PCMPintr));
+			memmove(pcmpintr, p, sizeof(PCMPintr));
+			print("mkiointr: %20.20s bus %d intin %d irq %d\n",
+				(char*)mppcmp->product,
+				pcmpintr->busno, pcmpintr->intin,
+				pcmpintr->irq);
+			pcmpintr->intin = 17;
+			aintr->intr = pcmpintr;
+		}
+	}
 	aintr->apic = &mpapic[p->apicno];
 	aintr->next = bus->aintr;
 	bus->aintr = aintr;
@@ -484,6 +508,7 @@ mpinit(void)
 	 */
 	if((va = vmap(pcmp->lapicbase, 1024)) == nil)
 		return;
+	mppcmp = pcmp;
 	print("LAPIC: %.8lux %.8lux\n", pcmp->lapicbase, (ulong)va);
 
 	bpapic = nil;
@@ -653,7 +678,12 @@ mpintrenablex(Vctl* v, int tbdf)
 	for(aintr = bus->aintr; aintr; aintr = aintr->next){
 		if(aintr->intr->irq != irq)
 			continue;
+		if (0) {
+			PCMPintr* p = aintr->intr;
 
+	   	 	print("mpintrenablex: bus %d intin %d irq %d\n",
+				p->busno, p->intin, p->irq);
+		}
 		/*
 		 * Check if already enabled. Multifunction devices may share
 		 * INT[A-D]# so, if already enabled, check the polarity matches
@@ -666,9 +696,9 @@ mpintrenablex(Vctl* v, int tbdf)
 		 */
 		apic = aintr->apic;
 		ioapicrdtr(apic, aintr->intr->intin, 0, &lo);
-
 		if(!(lo & ApicIMASK)){
 			vno = lo & 0xFF;
+//print("%s vector %d (!imask)\n", v->name, vno);
 			n = mpintrinit(bus, aintr->intr, vno, v->irq);
 			n |= ApicLOGICAL;
 			lo &= ~(ApicRemoteIRR|ApicDELIVS);
@@ -698,6 +728,7 @@ mpintrenablex(Vctl* v, int tbdf)
 		 *    the same IRQ as devices on another pin.
 		 */
 		vno = VectorAPIC + (incref(&mpvnoref)-1)*8;
+//print("%s vector %d (imask)\n", v->name, vno);
 		if(vno > MaxVectorAPIC){
 			print("mpintrenable: vno %d, irq %d, tbdf %uX\n",
 				vno, v->irq, tbdf);

+ 2 - 0
sys/src/9/pc/pcf

@@ -28,6 +28,7 @@ dev
 
 	sd
 	floppy		dma
+	aoe
 	lpt
 
 	audio		dma
@@ -79,6 +80,7 @@ misc
 	sd53c8xx	pci sdscsi
 	sdmylex		pci sdscsi
 	sdiahci		pci sdscsi
+	sdaoe
 
 	uarti8250
 	uartpci		pci

+ 131 - 13
sys/src/ape/lib/ap/power/vlop.s

@@ -1,14 +1,132 @@
-TEXT	_mulv(SB), $0
-	MOVW	8(FP), R9
-	MOVW	4(FP), R10
-	MOVW	16(FP), R4
-	MOVW	12(FP), R5
-	MULLW	R4, R9, R6
-	MULHWU	R4, R9, R7
-	MULLW	R10, R4, R8
-	ADD	R8, R7
-	MULLW	R9, R5, R8
-	ADD	R8, R7
-	MOVW	R6, 4(R3)
-	MOVW	R7, 0(R3)
+#define	BDNZ	BC	16,0,
+
+/*
+ * 64/64 division adapted from powerpc compiler writer's handbook
+ *
+ * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
+ * quo dvd dvs
+ *
+ * Remainder is left in R7:R8
+ *
+ * Code comment notation:
+ * msw = most-significant (high-order) word, i.e. bits 0..31
+ * lsw = least-significant (low-order) word, i.e. bits 32..63
+ * LZ = Leading Zeroes
+ * SD = Significant Digits
+ *
+ * R3:R4 = dvd (input dividend); quo (output quotient)
+ * R5:R6 = dvs (input divisor)
+ *
+ * R7:R8 = tmp; rem (output remainder)
+ */
+
+TEXT	_divu64(SB), $0
+	MOVW	a+0(FP), R3
+	MOVW	a+4(FP), R4
+	MOVW	b+8(FP), R5
+	MOVW	b+12(FP), R6
+
+	/*  count the number of leading 0s in the dividend */
+	CMP	R3, $0 	/*  dvd.msw == 0? 	R3, */
+	CNTLZW 	R3, R11 	/*  R11 = dvd.msw.LZ */
+	CNTLZW 	R4, R9 	/*  R9 = dvd.lsw.LZ */
+	BNE 	lab1 	/*  if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
+	ADD 	$32, R9, R11 	/*  dvd.LZ = dvd.lsw.LZ + 32 */
+
+lab1:
+	/*  count the number of leading 0s in the divisor */
+	CMP 	R5, $0 	/*  dvd.msw == 0? */
+	CNTLZW 	R5, R9 	/*  R9 = dvs.msw.LZ */
+	CNTLZW 	R6, R10 	/*  R10 = dvs.lsw.LZ */
+	BNE 	lab2 	/*  if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
+	ADD 	$32, R10, R9 	/*  dvs.LZ = dvs.lsw.LZ + 32 */
+
+lab2:
+	/*  determine shift amounts to minimize the number of iterations */
+	CMP 	R11, R9 	/*  compare dvd.LZ to dvs.LZ */
+	SUBC	R11, $64, R10	/*  R10 = dvd.SD */
+	BGT 	lab9 	/*  if(dvs > dvd) quotient = 0 */
+	ADD 	$1, R9 	/*  ++dvs.LZ (or --dvs.SD) */
+	SUBC 	R9, $64, R9 	/*  R9 = dvs.SD */
+	ADD 	R9, R11 	/*  (dvd.LZ + dvs.SD) = left shift of dvd for */
+			/*  initial dvd */
+	SUB		R9, R10, R9 	/*  (dvd.SD - dvs.SD) = right shift of dvd for */
+			/*  initial tmp */
+	MOVW 	R9, CTR 	/*  number of iterations = dvd.SD - dvs.SD */
+
+	/*  R7:R8 = R3:R4 >> R9 */
+	CMP 	 R9, $32
+	ADD	$-32, R9, R7
+	BLT	lab3 	/*  if(R9 < 32) jump to lab3 */
+	SRW	R7, R3, R8 	/*  tmp.lsw = dvd.msw >> (R9 - 32) */
+	MOVW 	$0, R7 	/*  tmp.msw = 0 */
+	BR 	lab4
+lab3:
+	SRW	R9, R4, R8 	/*  R8 = dvd.lsw >> R9 */
+	SUBC	R9, $32, R7
+	SLW	R7, R3, R7		/*  R7 = dvd.msw << 32 - R9 */
+	OR	R7, R8 		/*  tmp.lsw = R8 | R7 */
+	SRW	R9, R3, R7		/*  tmp.msw = dvd.msw >> R9 */
+
+lab4:
+	/*  R3:R4 = R3:R4 << R11 */
+	CMP	R11,$32
+	ADDC	$-32, R11, R9
+	BLT 	lab5 	/*  (R11 < 32)? */
+	SLW	R9, R4, R3	/*  dvd.msw = dvs.lsw << R9 */
+	MOVW 	$0, R4 	/*  dvd.lsw = 0 */
+	BR 	lab6
+
+lab5:
+	SLW	R11, R3	/*  R3 = dvd.msw << R11 */
+	SUBC	R11, $32, R9
+	SRW	R9, R4, R9	/*  R9 = dvd.lsw >> 32 - R11 */
+	OR	R9, R3	/*  dvd.msw = R3 | R9 */
+	SLW	R11, R4	/*  dvd.lsw = dvd.lsw << R11 */
+
+lab6:
+	/*  restoring division shift and subtract loop */
+	MOVW	$-1, R10
+	ADDC	$0, R7	/*  clear carry bit before loop starts */
+lab7:
+	/*  tmp:dvd is considered one large register */
+	/*  each portion is shifted left 1 bit by adding it to itself */
+	/*  adde sums the carry from the previous and creates a new carry */
+	ADDE 	R4,R4 	/*  shift dvd.lsw left 1 bit */
+	ADDE 	R3,R3 	/*  shift dvd.msw to left 1 bit */
+	ADDE 	R8,R8 	/*  shift tmp.lsw to left 1 bit */
+	ADDE 	R7,R7 	/*  shift tmp.msw to left 1 bit */
+	SUBC	R6, R8, R11	/*  tmp.lsw - dvs.lsw */
+	SUBECC	R5, R7, R9	/*  tmp.msw - dvs.msw */
+	BLT 	lab8 	/*  if(result < 0) clear carry bit */
+	MOVW	R11, R8 	/*  move lsw */
+	MOVW	R9, R7	/*  move msw */
+	ADDC 	$1, R10, R11 	/*  set carry bit */
+lab8:
+	BDNZ 	lab7
+
+	ADDE 	R4,R4 	/*  quo.lsw (lsb = CA) */
+	ADDE 	R3,R3 	/*  quo.msw (lsb from lsw) */
+
+lab10:
+	MOVW	qp+16(FP), R9
+	MOVW	rp+20(FP), R10
+	CMP	R9, $0
+	BEQ	lab11
+	MOVW	R3, 0(R9)
+	MOVW	R4, 4(R9)
+lab11:
+	CMP	R10, $0
+	BEQ	lab12
+	MOVW	R7, 0(R10)
+	MOVW	R8, 4(R10)
+lab12:
 	RETURN
+
+lab9:
+	/*  Quotient is 0 (dvs > dvd) */
+	MOVW	R4, R8	/*  rmd.lsw = dvd.lsw */
+	MOVW	R3, R7	/*  rmd.msw = dvd.msw */
+	MOVW	$0, R4	/*  dvd.lsw = 0 */
+	MOVW	$0, R3	/*  dvd.msw = 0 */
+	BR	lab10

+ 8 - 472
sys/src/ape/lib/ap/power/vlrt.c

@@ -9,55 +9,17 @@ typedef	signed char	schar;
 typedef	struct	Vlong	Vlong;
 struct	Vlong
 {
-	union
-	{
-		struct
-		{
-			ulong	hi;
-			ulong	lo;
-		};
-		struct
-		{
-			ushort	hims;
-			ushort	hils;
-			ushort	loms;
-			ushort	lols;
-		};
-	};
+	ulong	hi;
+	ulong	lo;
 };
 
 void	abort(void);
-
-void
-_addv(Vlong *r, Vlong a, Vlong b)
-{
-	ulong lo, hi;
-
-	lo = a.lo + b.lo;
-	hi = a.hi + b.hi;
-	if(lo < a.lo)
-		hi++;
-	r->lo = lo;
-	r->hi = hi;
-}
-
-void
-_subv(Vlong *r, Vlong a, Vlong b)
-{
-	ulong lo, hi;
-
-	lo = a.lo - b.lo;
-	hi = a.hi - b.hi;
-	if(lo > a.lo)
-		hi--;
-	r->lo = lo;
-	r->hi = hi;
-}
+void	_divu64(Vlong, Vlong, Vlong*, Vlong*);
 
 void
 _d2v(Vlong *y, double d)
 {
-	union { double d; struct Vlong; } x;
+	union { double d; Vlong; } x;
 	ulong xhi, xlo, ylo, yhi;
 	int sh;
 
@@ -137,68 +99,6 @@ _v2f(Vlong x)
 	return _v2d(x);
 }
 
-static void
-dodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
-{
-	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
-	int i;
-
-	numhi = num.hi;
-	numlo = num.lo;
-	denhi = den.hi;
-	denlo = den.lo;
-
-	/*
-	 * get a divide by zero
-	 */
-	if(denlo==0 && denhi==0) {
-		numlo = numlo / denlo;
-	}
-
-	/*
-	 * set up the divisor and find the number of iterations needed
-	 */
-	if(numhi >= SIGN(32)) {
-		quohi = SIGN(32);
-		quolo = 0;
-	} else {
-		quohi = numhi;
-		quolo = numlo;
-	}
-	i = 0;
-	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
-		denhi = (denhi<<1) | (denlo>>31);
-		denlo <<= 1;
-		i++;
-	}
-
-	quohi = 0;
-	quolo = 0;
-	for(; i >= 0; i--) {
-		quohi = (quohi<<1) | (quolo>>31);
-		quolo <<= 1;
-		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
-			t = numlo;
-			numlo -= denlo;
-			if(numlo > t)
-				numhi--;
-			numhi -= denhi;
-			quolo |= 1;
-		}
-		denlo = (denlo>>1) | (denhi<<31);
-		denhi >>= 1;
-	}
-
-	if(q) {
-		q->lo = quolo;
-		q->hi = quohi;
-	}
-	if(r) {
-		r->lo = numlo;
-		r->hi = numhi;
-	}
-}
-
 void
 _divvu(Vlong *q, Vlong n, Vlong d)
 {
@@ -208,7 +108,7 @@ _divvu(Vlong *q, Vlong n, Vlong d)
 		q->lo = n.lo / d.lo;
 		return;
 	}
-	dodiv(n, d, q, 0);
+	_divu64(n, d, q, 0);
 }
 
 void
@@ -220,7 +120,7 @@ _modvu(Vlong *r, Vlong n, Vlong d)
 		r->lo = n.lo % d.lo;
 		return;
 	}
-	dodiv(n, d, 0, r);
+	_divu64(n, d, 0, r);
 }
 
 static void
@@ -251,7 +151,7 @@ _divv(Vlong *q, Vlong n, Vlong d)
 	dneg = d.hi >> 31;
 	if(dneg)
 		vneg(&d);
-	dodiv(n, d, q, 0);
+	_divu64(n, d, q, 0);
 	if(nneg != dneg)
 		vneg(q);
 }
@@ -272,151 +172,11 @@ _modv(Vlong *r, Vlong n, Vlong d)
 	dneg = d.hi >> 31;
 	if(dneg)
 		vneg(&d);
-	dodiv(n, d, 0, r);
+	_divu64(n, d, 0, r);
 	if(nneg)
 		vneg(r);
 }
 
-void
-_rshav(Vlong *r, Vlong a, int b)
-{
-	long t;
-
-	t = a.hi;
-	if(b >= 32) {
-		r->hi = t>>31;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->lo = t>>31;
-			return;
-		}
-		r->lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->hi = t;
-		r->lo = a.lo;
-		return;
-	}
-	r->hi = t >> b;
-	r->lo = (t << (32-b)) | (a.lo >> b);
-}
-
-void
-_rshlv(Vlong *r, Vlong a, int b)
-{
-	ulong t;
-
-	t = a.hi;
-	if(b >= 32) {
-		r->hi = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->lo = 0;
-			return;
-		}
-		r->lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->hi = t;
-		r->lo = a.lo;
-		return;
-	}
-	r->hi = t >> b;
-	r->lo = (t << (32-b)) | (a.lo >> b);
-}
-
-void
-_lshv(Vlong *r, Vlong a, int b)
-{
-	ulong t;
-
-	t = a.lo;
-	if(b >= 32) {
-		r->lo = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->hi = 0;
-			return;
-		}
-		r->hi = t << (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->lo = t;
-		r->hi = a.hi;
-		return;
-	}
-	r->lo = t << b;
-	r->hi = (t >> (32-b)) | (a.hi << b);
-}
-
-void
-_andv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi & b.hi;
-	r->lo = a.lo & b.lo;
-}
-
-void
-_orv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi | b.hi;
-	r->lo = a.lo | b.lo;
-}
-
-void
-_xorv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi ^ b.hi;
-	r->lo = a.lo ^ b.lo;
-}
-
-void
-_vpp(Vlong *l, Vlong *r)
-{
-
-	l->hi = r->hi;
-	l->lo = r->lo;
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-}
-
-void
-_vmm(Vlong *l, Vlong *r)
-{
-
-	l->hi = r->hi;
-	l->lo = r->lo;
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-}
-
-void
-_ppv(Vlong *l, Vlong *r)
-{
-
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-	l->hi = r->hi;
-	l->lo = r->lo;
-}
-
-void
-_mmv(Vlong *l, Vlong *r)
-{
-
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-	l->hi = r->hi;
-	l->lo = r->lo;
-}
-
 void
 _vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
 {
@@ -492,227 +252,3 @@ _vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
 	}
 	*ret = u;
 }
-
-void
-_p2v(Vlong *ret, void *p)
-{
-	long t;
-
-	t = (ulong)p;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sl2v(Vlong *ret, long sl)
-{
-	long t;
-
-	t = sl;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_ul2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_si2v(Vlong *ret, int si)
-{
-	long t;
-
-	t = si;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_ui2v(Vlong *ret, uint ui)
-{
-	long t;
-
-	t = ui;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sh2v(Vlong *ret, long sh)
-{
-	long t;
-
-	t = (sh << 16) >> 16;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_uh2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xffff;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sc2v(Vlong *ret, long uc)
-{
-	long t;
-
-	t = (uc << 24) >> 24;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_uc2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xff;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-long
-_v2sc(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xff;
-	return (t << 24) >> 24;
-}
-
-long
-_v2uc(Vlong rv)
-{
-
-	return rv.lo & 0xff;
-}
-
-long
-_v2sh(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xffff;
-	return (t << 16) >> 16;
-}
-
-long
-_v2uh(Vlong rv)
-{
-
-	return rv.lo & 0xffff;
-}
-
-long
-_v2sl(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-long
-_v2ul(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-long
-_v2si(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-long
-_v2ui(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-int
-_testv(Vlong rv)
-{
-	return rv.lo || rv.hi;
-}
-
-int
-_eqv(Vlong lv, Vlong rv)
-{
-	return lv.lo == rv.lo && lv.hi == rv.hi;
-}
-
-int
-_nev(Vlong lv, Vlong rv)
-{
-	return lv.lo != rv.lo || lv.hi != rv.hi;
-}
-
-int
-_ltv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi || 
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-int
-_lev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi || 
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-int
-_gtv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi || 
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-int
-_gev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi || 
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}
-
-int
-_lov(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi || 
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-int
-_lsv(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi || 
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-int
-_hiv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi || 
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-int
-_hsv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi || 
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}

+ 2 - 2
sys/src/cmd/aux/vga/vesa.c

@@ -80,7 +80,7 @@ enum {
 	Fdpmssuspend = 1<<2,	/* supports DPMS suspend mode */
 	Fdpmsactiveoff = 1<<3,	/* supports DPMS active off mode */
 	Fmonochrome = 1<<4,	/* is a monochrome display */
-	Fgtf = 1<<5,			/* supports VESA GTF: see /lib/vesa/gtf10.pdf */
+	Fgtf = 1<<5,		/* supports VESA GTF: see /public/doc/vesa/gtf10.pdf */
 };
 
 #define WORD(p) ((p)[0] | ((p)[1]<<8))
@@ -684,7 +684,7 @@ addmode(Modelist *l, Mode m)
 /*
  * Parse VESA EDID information.  Based on the VESA
  * Extended Display Identification Data standard, Version 3,
- * November 13, 1997.  See /lib/vesa/edidv3.pdf.
+ * November 13, 1997.  See /public/doc/vesa/edidv3.pdf.
  *
  * This only handles 128-byte EDID blocks.  Until I find
  * a monitor that produces 256-byte blocks, I'm not going

+ 11 - 9
sys/src/cmd/aux/vga/vesadb.c

@@ -1,4 +1,4 @@
-// DO NOT EDIT; this file is automatically generated from vesa.txt
+/* this file was automatically generated from vesa.txt */
 
 #include <u.h>
 #include <libc.h>
@@ -6,14 +6,16 @@
 #include "pci.h"
 #include "vga.h"
 
-// VESA Monitor Timing Standard mode definitions as per
-// VESA and Industry Standards and Guidelines for Computer
-// Display Monitor Timing, Version 1.0, Revision 0.8, 17 September 1998.
-//
-// See /lib/vesa/dmtv1r08.pdf.
-//
-// This might go back into vgadb at some point. It's here mainly
-// so that people don't change it, and so that we can run without vgadb.
+/*
+ * VESA Monitor Timing Standard mode definitions as per
+ * VESA and Industry Standards and Guidelines for Computer
+ * Display Monitor Timing, Version 1.0, Revision 0.8, 17 September 1998.
+ *
+ * See /public/doc/vesa/dmtv1r08.pdf.
+ *
+ * This might go back into vgadb at some point. It's here mainly
+ * so that people don't change it, and so that we can run without vgadb.
+ */
 
 static Mode vesa640x480x60 = {
 	.name = "640x480@60Hz",

+ 3 - 2
sys/src/cmd/cc/cc.h

@@ -123,7 +123,7 @@ struct	Type
 	long	width;
 	long	offset;
 	long	lineno;
-	char	shift;
+	schar	shift;
 	char	nbits;
 	char	etype;
 	char	garb;
@@ -319,6 +319,7 @@ enum
 	TSTRUCT,
 	TUNION,
 	TENUM,
+	TDOT,
 	NTYPE,
 
 	TAUTO	= NTYPE,
@@ -331,7 +332,6 @@ enum
 	TVOLATILE,
 	TUNSIGNED,
 	TSIGNED,
-	TDOT,
 	TFILE,
 	TOLD,
 	NALLTYPES,
@@ -432,6 +432,7 @@ EXTERN	Type*	firstargtype;
 EXTERN	Decl*	firstdcl;
 EXTERN	int	fperror;
 EXTERN	Sym*	hash[NHASH];
+EXTERN	int	hasdoubled;
 EXTERN	char*	hunk;
 EXTERN	char*	include[20];
 EXTERN	Io*	iofree;

+ 2 - 0
sys/src/cmd/cc/com.c

@@ -221,6 +221,8 @@ tcomo(Node *n, int f)
 		if(tcompat(n, l->type, r->type, tand))
 			goto bad;
 		n->type = l->type;
+		n->right = new1(OCAST, r, Z);
+		n->right->type = types[TINT];
 		if(typeu[n->type->etype]) {
 			if(n->op == OASASHR)
 				n->op = OASLSHR;

+ 5 - 1
sys/src/cmd/cc/com64.c

@@ -1,7 +1,7 @@
 #include "cc.h"
 
 /*
- * this is machine depend, but it is totally
+ * this is machine dependent, but it is totally
  * common on all of the 64-bit symulating machines.
  */
 
@@ -258,6 +258,8 @@ com64(Node *n)
 			r->op = OFUNC;
 			r->type = types[TLONG];
 			return 1;
+		case OCOND:
+			return 1;
 		}
 	}
 
@@ -503,6 +505,7 @@ setvinc:
 	n->left = a;
 	l = new(OADDR, l, Z);
 	l->type = typ(TIND, l->left->type);
+	l->complex = l->left->complex;
 	n->right = new(OLIST, l, r);
 	n->complex = FNX;
 	n->op = OFUNC;
@@ -536,6 +539,7 @@ setasop:
 
 	t = new(OADDR, l, 0);
 	t->type = typ(TIND, l->type);
+	t->complex = l->complex;
 	r = new(OLIST, t, r);
 
 	n->left = nodvasop;

+ 6 - 0
sys/src/cmd/cc/dcl.c

@@ -975,6 +975,12 @@ rsametype(Type *t1, Type *t2, int n, int f)
 				snap(t1);
 			if(t2->link == T)
 				snap(t2);
+			if(t1 != t2 && t1->link == T && t2->link == T){
+				/* structs with missing or different tag names aren't considered equal */
+				if(t1->tag == nil || t2->tag == nil ||
+				   strcmp(t1->tag->name, t2->tag->name) != 0)
+					return 0;
+			}
 			t1 = t1->link;
 			t2 = t2->link;
 			for(;;) {

+ 31 - 3
sys/src/cmd/cc/pgen.c

@@ -9,6 +9,7 @@ codgen(Node *n, Node *nn)
 	cursafe = 0;
 	curarg = 0;
 	maxargsafe = 0;
+	hasdoubled = 0;
 
 	/*
 	 * isolate name
@@ -25,6 +26,24 @@ codgen(Node *n, Node *nn)
 	gpseudo(ATEXT, n1->sym, nodconst(stkoff));
 	sp = p;
 
+	if(typecmplx[thisfn->link->etype]) {
+		if(nodret == nil) {
+			nodret = new(ONAME, Z, Z);
+			nodret->sym = slookup(".ret");
+			nodret->class = CPARAM;
+			nodret->type = types[TIND];
+			nodret->etype = TIND;
+			nodret = new(OIND, nodret, Z);
+		}
+		n1 = nodret->left;
+		if(n1->type == T || n1->type->link != thisfn->link) {
+			n1->type = typ(TIND, thisfn->link);
+			n1->etype = n1->type->etype;
+			nodret = new(OIND, n1, Z);
+			complex(nodret);
+		}
+	}
+
 	/*
 	 * isolate first argument
 	 */
@@ -35,11 +54,14 @@ codgen(Node *n, Node *nn)
 			gmove(&nod, &nod1);
 		} else
 		if(firstarg && typeword[firstargtype->etype]) {
-			nod1 = *nodret->left;
+			nod1 = znode;
+			nod1.op = ONAME;
 			nod1.sym = firstarg;
 			nod1.type = firstargtype;
+			nod1.class = CPARAM;
 			nod1.xoffset = align(0, firstargtype, Aarg1);
 			nod1.etype = firstargtype->etype;
+			xcom(&nod1);
 			nodreg(&nod, &nod1, REGARG);
 			gmove(&nod, &nod1);
 		}
@@ -56,7 +78,7 @@ codgen(Node *n, Node *nn)
 	if(!debug['N'] || debug['R'] || debug['P'])
 		regopt(sp);
 	
-	if(thechar=='6' || thechar=='7')	/* [sic] */
+	if(thechar=='6' || thechar=='7' || thechar=='9' || hasdoubled)	/* [sic] */
 		maxargsafe = round(maxargsafe, 8);
 	sp->to.offset += maxargsafe;
 }
@@ -148,7 +170,13 @@ loop:
 			break;
 		}
 		if(typecmplx[n->type->etype]) {
-			sugen(l, nodret, n->type->width);
+			nod = znode;
+			nod.op = OAS;
+			nod.left = nodret;
+			nod.right = l;
+			nod.type = n->type;
+			nod.complex = l->complex;
+			cgen(&nod, Z);
 			noretval(3);
 			gbranch(ORETURN);
 			break;

+ 0 - 94
sys/src/cmd/cc/y.tab.h

@@ -1,94 +0,0 @@
-
-typedef union 	{
-	Node*	node;
-	Sym*	sym;
-	Type*	type;
-	struct
-	{
-		Type*	t;
-		char	c;
-	} tycl;
-	struct
-	{
-		Type*	t1;
-		Type*	t2;
-	} tyty;
-	struct
-	{
-		char*	s;
-		long	l;
-	} sval;
-	long	lval;
-	double	dval;
-	vlong	vval;
-}	YYSTYPE;
-extern	YYSTYPE	yylval;
-#define	LPE	57346
-#define	LME	57347
-#define	LMLE	57348
-#define	LDVE	57349
-#define	LMDE	57350
-#define	LRSHE	57351
-#define	LLSHE	57352
-#define	LANDE	57353
-#define	LXORE	57354
-#define	LORE	57355
-#define	LOROR	57356
-#define	LANDAND	57357
-#define	LEQ	57358
-#define	LNE	57359
-#define	LLE	57360
-#define	LGE	57361
-#define	LLSH	57362
-#define	LRSH	57363
-#define	LMM	57364
-#define	LPP	57365
-#define	LMG	57366
-#define	LNAME	57367
-#define	LTYPE	57368
-#define	LFCONST	57369
-#define	LDCONST	57370
-#define	LCONST	57371
-#define	LLCONST	57372
-#define	LUCONST	57373
-#define	LULCONST	57374
-#define	LVLCONST	57375
-#define	LUVLCONST	57376
-#define	LSTRING	57377
-#define	LLSTRING	57378
-#define	LAUTO	57379
-#define	LBREAK	57380
-#define	LCASE	57381
-#define	LCHAR	57382
-#define	LCONTINUE	57383
-#define	LDEFAULT	57384
-#define	LDO	57385
-#define	LDOUBLE	57386
-#define	LELSE	57387
-#define	LEXTERN	57388
-#define	LFLOAT	57389
-#define	LFOR	57390
-#define	LGOTO	57391
-#define	LIF	57392
-#define	LINT	57393
-#define	LLONG	57394
-#define	LREGISTER	57395
-#define	LRETURN	57396
-#define	LSHORT	57397
-#define	LSIZEOF	57398
-#define	LUSED	57399
-#define	LSTATIC	57400
-#define	LSTRUCT	57401
-#define	LSWITCH	57402
-#define	LTYPEDEF	57403
-#define	LTYPESTR	57404
-#define	LUNION	57405
-#define	LUNSIGNED	57406
-#define	LWHILE	57407
-#define	LVOID	57408
-#define	LENUM	57409
-#define	LSIGNED	57410
-#define	LCONSTNT	57411
-#define	LVOLATILE	57412
-#define	LSET	57413
-#define	LSIGNOF	57414

+ 1 - 1
sys/src/cmd/postscript/text2post/mkfile

@@ -14,7 +14,7 @@ HFILES=$COMMONDIR/comments.h\
 BIN=$POSTBIN
 </sys/src/cmd/mkone
 
-CFLAGS=-c -D'PROGRAMVERSION="0.1"' -D'DOROUND=1' -I$COMMONDIR
+CFLAGS=-D'PROGRAMVERSION="0.1"' -D'DOROUND=1' -I$COMMONDIR
 
 install:V:	$POSTLIB/pjw.char.ps
 

+ 3 - 3
sys/src/cmd/postscript/text2post/text2post.c

@@ -474,13 +474,13 @@ main(int argc, char *argv[]) {
 	char *t;
 	Biobuf *input;
 
-	if ((bstderr = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+	if ((bstderr = (Biobuf *)malloc(sizeof(Biobuf))) == nil)
 		exits("malloc");
 	if (Binit(bstderr, 2, OWRITE) == Beof)
 		exits("Binit");
 	Bstderr = &(bstderr->Biobufhdr);
 
-	if ((bstdout = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+	if ((bstdout = (Biobuf *)malloc(sizeof(Biobuf))) == nil)
 		exits("malloc");
 	if (Binit(bstdout, 1, OWRITE) == Beof)
 		exits("Binit");
@@ -542,7 +542,7 @@ main(int argc, char *argv[]) {
 	}ARGEND;
 	prologues();
 	if (argc <= 0) {
-		if ((bstdin = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+		if ((bstdin = (Biobuf *)malloc(sizeof(Biobuf))) == nil)
 			exits("malloc");
 		if (Binit(bstdin, 0, OREAD) == Beof) {
 			fprint(2, "cannot Binit stdin\n");

+ 1 - 1
sys/src/cmd/postscript/tr2post/tr2post.c

@@ -162,7 +162,7 @@ main(int argc, char *argv[]) {
 	}ARGEND;
 	readDESC();
 	if (argc == 0) {
-		if ((binp = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0) {
+		if ((binp = (Biobuf *)malloc(sizeof(Biobuf))) == nil) {
 			Bprint(Bstderr, "malloc failed.\n");
 			exits("malloc");
 		}

+ 27 - 3
sys/src/cmd/qa/a.y

@@ -20,9 +20,9 @@
 %token	<lval>	LNOP LEND LRETT LWORD LTEXT LDATA LRETRN
 %token	<lval>	LCONST LSP LSB LFP LPC LCREG LFLUSH
 %token	<lval>	LREG LFREG LR LCR LF LFPSCR
-%token	<lval>	LLR LCTR LSPR LSPREG LSEG LMSR
+%token	<lval>	LLR LCTR LSPR LSPREG LSEG LMSR LDCR
 %token	<lval>	LSCHED LXLD LXST LXOP LXMV
-%token	<lval>	LRLWM LMOVMW LMOVEM LMOVFL LMTFSB LMA
+%token	<lval>	LRLWM LMOVMW LMOVEM LMOVFL LMTFSB LMA LFMOVX
 %token	<dval>	LFCONST
 %token	<sval>	LSCONST
 %token	<sym>	LNAME LLAB LVAR
@@ -95,7 +95,7 @@ inst:
 		outcode($1, &$2, NREG, &$4);
 	}
 /*
- * load floats
+ * load and store floats
  */
 |	LFMOV addr ',' freg
 	{
@@ -121,6 +121,17 @@ inst:
 	{
 		outcode($1, &$2, NREG, &$4);
 	}
+/*
+ * load and store floats, indexed only
+ */
+|	LFMOVX regaddr ',' freg
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
+|	LFMOVX freg ',' regaddr
+	{
+		outcode($1, &$2, NREG, &$4);
+	}
 /*
  * store ints and bytes
  */
@@ -697,6 +708,19 @@ psr:
 		$$.type = $1;
 		$$.offset = $3;
 	}
+|	LDCR '(' con ')'
+	{
+		$$ = nullgen;
+		$$.type = $1;
+		$$.offset = $3;
+	}
+|	LDCR '(' sreg ')'
+	{
+		$$ = nullgen;
+		$$.type = $1;
+		$$.reg = $3;
+		$$.offset = 0;
+	}
 |	msr
 
 seg:

+ 85 - 2
sys/src/cmd/qa/lex.c

@@ -169,7 +169,7 @@ struct
 	"MSR",		LMSR,	D_MSR,
 	"FPSCR",	LFPSCR,	D_FPSCR,
 	"SPR",		LSPR,	D_SPR,
-	"DCR",		LSPR,	D_DCR,
+	"DCR",		LDCR,	D_DCR,
 
 	"SEG",		LSEG,	D_SREG,
 
@@ -539,7 +539,85 @@ struct
 	"NMACLHWV", LMA, ANMACLHWV,
 	"NMACLHWVCC", LMA, ANMACLHWVCC,
 
-/* special instructions */
+	/* optional on 32-bit */
+	"FRES", LFCONV, AFRES,
+	"FRESCC", LFCONV, AFRESCC,
+	"FRSQRTE", LFCONV, AFRSQRTE,
+	"FRSQRTECC", LFCONV, AFRSQRTECC,
+	"FSEL", LFMA, AFSEL,
+	"FSELCC", LFMA, AFSELCC,
+	"FSQRT", LFCONV, AFSQRT,
+	"FSQRTCC", LFCONV, AFSQRTCC,
+	"FSQRTS", LFCONV, AFSQRTS,
+	"FSQRTSCC", LFCONV, AFSQRTSCC,
+
+	/* parallel, cross, and secondary (fp2) */
+	"FPSEL", LFMA, AFPSEL,
+	"FPMUL", LFADD, AFPMUL,
+	"FXMUL", LFADD, AFXMUL,
+	"FXPMUL", LFADD, AFXPMUL,
+	"FXSMUL", LFADD, AFXSMUL,
+	"FPADD", LFADD, AFPADD,
+	"FPSUB", LFADD, AFPSUB,
+	"FPRE", LFCONV, AFPRE,
+	"FPRSQRTE", LFCONV, AFPRSQRTE,
+	"FPMADD", LFMA, AFPMADD,
+	"FXMADD", LFMA, AFXMADD,
+	"FXCPMADD", LFMA, AFXCPMADD,
+	"FXCSMADD", LFMA, AFXCSMADD,
+	"FPNMADD", LFMA, AFPNMADD,
+	"FXNMADD", LFMA, AFXNMADD,
+	"FXCPNMADD", LFMA, AFXCPNMADD,
+	"FXCSNMADD", LFMA, AFXCSNMADD,
+	"FPMSUB", LFMA, AFPMSUB,
+	"FXMSUB", LFMA, AFXMSUB,
+	"FXCPMSUB", LFMA, AFXCPMSUB,
+	"FXCSMSUB", LFMA, AFXCSMSUB,
+	"FPNMSUB", LFMA, AFPNMSUB,
+	"FXNMSUB", LFMA, AFXNMSUB,
+	"FXCPNMSUB", LFMA, AFXCPNMSUB,
+	"FXCSNMSUB", LFMA, AFXCSNMSUB,
+	"FPABS", LFCONV, AFPABS,
+	"FPNEG", LFCONV, AFPNEG,
+	"FPRSP", LFCONV, AFPRSP,
+	"FPNABS", LFCONV, AFPNABS,
+	"FSMOVD", LFMOV, AFSMOVD,
+	"FSCMP", LFCMP, AFSCMP,
+	"FSABS", LFCONV, AFSABS,
+	"FSNEG", LFCONV, AFSNEG,
+	"FSNABS", LFCONV, AFSNABS,
+	"FPCTIW", LFCONV, AFPCTIW,
+	"FPCTIWZ", LFCONV, AFPCTIWZ,
+	"FMOVSPD", LFCONV, AFMOVSPD,
+	"FMOVPSD", LFCONV, AFMOVPSD,
+	"FXCPNPMA", LFMA, AFXCPNPMA,
+	"FXCSNPMA", LFMA, AFXCSNPMA,
+	"FXCPNSMA", LFMA, AFXCPNSMA,
+	"FXCSNSMA", LFMA, AFXCSNSMA,
+	"FXCXNPMA", LFMA, AFXCXNPMA,
+	"FXCXNSMA", LFMA, AFXCXNSMA,
+	"FXCXMA", LFMA, AFXCXMA,
+	"FXCXNMS", LFMA, AFXCXNMS,
+
+	/* parallel, cross, and secondary load and store (fp2) */
+	"FSMOVS", LFMOVX, AFSMOVS,
+	"FSMOVSU", LFMOVX, AFSMOVSU,
+	"FSMOVD", LFMOVX, AFSMOVD,
+	"FSMOVDU", LFMOVX, AFSMOVDU,
+	"FXMOVS", LFMOVX, AFXMOVS,
+	"FXMOVSU", LFMOVX, AFXMOVSU,
+	"FXMOVD", LFMOVX, AFXMOVD,
+	"FXMOVDU", LFMOVX, AFXMOVDU,
+	"FPMOVS", LFMOVX, AFPMOVS,
+	"FPMOVSU", LFMOVX, AFPMOVSU,
+	"FPMOVD", LFMOVX, AFPMOVD,
+	"FPMOVDU", LFMOVX, AFPMOVDU,
+	"FPMOVIW", LFMOVX, AFPMOVIW,
+
+	"AFMOVSPD",	LFMOV,	AFMOVSPD,
+	"AFMOVPSD",	LFMOV,	AFMOVPSD,
+
+	/* special instructions */
 	"DCBF",		LXOP,	ADCBF,
 	"DCBI",		LXOP,	ADCBI,
 	"DCBST",	LXOP,	ADCBST,
@@ -626,6 +704,7 @@ zname(char *n, int t, int s)
 {
 
 	Bputc(&obuf, ANAME);
+	Bputc(&obuf, ANAME>>8);
 	Bputc(&obuf, t);	/* type */
 	Bputc(&obuf, s);	/* sym */
 	while(*n) {
@@ -746,6 +825,7 @@ outcode(int a, Gen *g1, int reg, Gen *g2)
 		st = outsim(g2);
 	} while(sf != 0 && st == sf);
 	Bputc(&obuf, a);
+	Bputc(&obuf, a>>8);
 	Bputc(&obuf, reg|nosched);
 	Bputc(&obuf, lineno);
 	Bputc(&obuf, lineno>>8);
@@ -773,6 +853,7 @@ outgcode(int a, Gen *g1, int reg, Gen *g2, Gen *g3)
 	if(g2->type != D_NONE)
 		flag = 0x40;	/* flags extra operand */
 	Bputc(&obuf, a);
+	Bputc(&obuf, a>>8);
 	Bputc(&obuf, reg | nosched | flag);
 	Bputc(&obuf, lineno);
 	Bputc(&obuf, lineno>>8);
@@ -828,6 +909,7 @@ outhist(void)
 			}
 			if(n) {
 				Bputc(&obuf, ANAME);
+				Bputc(&obuf, ANAME>>8);
 				Bputc(&obuf, D_FILE);	/* type */
 				Bputc(&obuf, 1);	/* sym */
 				Bputc(&obuf, '<');
@@ -843,6 +925,7 @@ outhist(void)
 		g.offset = h->offset;
 
 		Bputc(&obuf, AHISTORY);
+		Bputc(&obuf, AHISTORY>>8);
 		Bputc(&obuf, 0);
 		Bputc(&obuf, h->line);
 		Bputc(&obuf, h->line>>8);

+ 0 - 126
sys/src/cmd/qc/bits.c

@@ -1,126 +0,0 @@
-#include "gc.h"
-
-/*
-Bits
-bor(Bits a, Bits b)
-{
-	Bits c;
-	int i;
-
-	for(i=0; i<BITS; i++)
-		c.b[i] = a.b[i] | b.b[i];
-	return c;
-}
-*/
-
-/*
-Bits
-band(Bits a, Bits b)
-{
-	Bits c;
-	int i;
-
-	for(i=0; i<BITS; i++)
-		c.b[i] = a.b[i] & b.b[i];
-	return c;
-}
-*/
-
-/*
-Bits
-bnot(Bits a)
-{
-	Bits c;
-	int i;
-
-	for(i=0; i<BITS; i++)
-		c.b[i] = ~a.b[i];
-	return c;
-}
-*/
-
-int
-bany(Bits *a)
-{
-	int i;
-
-	for(i=0; i<BITS; i++)
-		if(a->b[i])
-			return 1;
-	return 0;
-}
-
-/*
-int
-beq(Bits a, Bits b)
-{
-	int i;
-
-	for(i=0; i<BITS; i++)
-		if(a.b[i] != b.b[i])
-			return 0;
-	return 1;
-}
-*/
-
-int
-bnum(Bits a)
-{
-	int i;
-	long b;
-
-	for(i=0; i<BITS; i++)
-		if(b = a.b[i])
-			return 32*i + bitno(b);
-	diag(Z, "bad in bnum");
-	return 0;
-}
-
-Bits
-blsh(unsigned n)
-{
-	Bits c;
-
-	c = zbits;
-	c.b[n/32] = 1L << (n%32);
-	return c;
-}
-
-/*
-int
-bset(Bits a, unsigned n)
-{
-	int i;
-
-	if(a.b[n/32] & (1L << (n%32)))
-		return 1;
-	return 0;
-}
-*/
-
-int
-Bconv(va_list *arg, Fconv *fp)
-{
-	char str[STRINGSZ], ss[STRINGSZ], *s;
-	Bits bits;
-	int i;
-
-	str[0] = 0;
-	bits = va_arg(*arg, Bits);
-	while(bany(&bits)) {
-		i = bnum(bits);
-		if(str[0])
-			strcat(str, " ");
-		if(var[i].sym == S) {
-			sprint(ss, "$%ld", var[i].offset);
-			s = ss;
-		} else
-			s = var[i].sym->name;
-		if(strlen(str) + strlen(s) + 1 >= STRINGSZ)
-			break;
-		strcat(str, s);
-		bits.b[i/32] &= ~(1L << (i%32));
-	}
-	strconv(str, fp);
-	return 0;
-}

+ 442 - 76
sys/src/cmd/qc/cgen.c

@@ -1,5 +1,10 @@
 #include "gc.h"
 
+static void cmpv(Node*, int, Node*);
+static void testv(Node*, int);
+static void cgen64(Node*, Node*);
+static int isvconstable(int, vlong);
+
 void
 cgen(Node *n, Node *nn)
 {
@@ -15,10 +20,18 @@ cgen(Node *n, Node *nn)
 	}
 	if(n == Z || n->type == T)
 		return;
-	if(typesuv[n->type->etype]) {
+	if(typesu[n->type->etype]) {
 		sugen(n, nn, n->type->width);
 		return;
 	}
+	if(typev[n->type->etype]) {
+		switch(n->op) {
+		case OCONST:
+		case OFUNC:
+			cgen64(n, nn);
+			return;
+		}
+	}
 	l = n->left;
 	r = n->right;
 	o = n->op;
@@ -44,13 +57,17 @@ cgen(Node *n, Node *nn)
 	if(r != Z && r->complex >= FNX)
 	switch(o) {
 	default:
-		regret(&nod, r);
-		cgen(r, &nod);
-
-		regsalloc(&nod1, r);
-		gopcode(OAS, &nod, Z, &nod1);
+		if(!typev[r->type->etype]) {
+			regret(&nod, r);
+			cgen(r, &nod);
+			regsalloc(&nod1, r);
+			gmove(&nod, &nod1);
+			regfree(&nod);
+		} else {
+			regsalloc(&nod1, r);
+			cgen(r, &nod1);
+		}
 
-		regfree(&nod);
 		nod = *n;
 		nod.right = &nod1;
 		cgen(&nod, nn);
@@ -70,6 +87,19 @@ cgen(Node *n, Node *nn)
 		diag(n, "unknown op in cgen: %O", o);
 		break;
 
+	case ONEG:
+	case OCOM:
+		if(nn == Z) {
+			nullwarn(l, Z);
+			break;
+		}
+		regalloc(&nod, l, nn);
+		cgen(l, &nod);
+		gopcode(o, &nod, Z, &nod);
+		gmove(&nod, nn);
+		regfree(&nod);
+		break;
+
 	case OAS:
 		if(l->op == OBIT)
 			goto bitas;
@@ -132,8 +162,11 @@ cgen(Node *n, Node *nn)
 	case OXOR:
 		if(nn != Z)
 		if(r->op == OCONST && r->vconst == -1){
-			cgen(l, nn);
-			gopcode(OCOM, nn, Z, nn);
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			gopcode(OCOM, &nod, Z, &nod);
+			gmove(&nod, nn);
+			regfree(&nod);
 			break;
 		}
 
@@ -147,15 +180,14 @@ cgen(Node *n, Node *nn)
 		/*
 		 * immediate operands
 		 */
-		if(nn != Z)
-		if(r->op == OCONST)
-		if(!typefd[n->type->etype]) {
-			cgen(l, nn);
-			if(r->vconst == 0)
-			if(o != OAND)
-				break;
-			if(nn != Z)
-				gopcode(o, r, Z, nn);
+		if(nn != Z && r->op == OCONST && !typefd[n->type->etype] &&
+		    (!typev[n->type->etype] || isvconstable(o, r->vconst))) {
+			regalloc(&nod, l, nn);
+			cgen(l, &nod);
+			if(o == OAND || r->vconst != 0)
+				gopcode(o, r, Z, &nod);
+			gmove(&nod, nn);
+			regfree(&nod);
 			break;
 		}
 
@@ -169,7 +201,7 @@ cgen(Node *n, Node *nn)
 			nullwarn(l, r);
 			break;
 		}
-		if(o == OMUL || o == OLMUL) {
+		if((o == OMUL || o == OLMUL) && !typev[n->type->etype]) {
 			if(mulcon(n, nn))
 				break;
 			if(debug['M'])
@@ -178,19 +210,23 @@ cgen(Node *n, Node *nn)
 		if(l->complex >= r->complex) {
 			regalloc(&nod, l, nn);
 			cgen(l, &nod);
-			regalloc(&nod1, r, Z);
+			if(o != OMUL || typev[n->type->etype] || !sconst(r)) {
+				regalloc(&nod1, r, Z);
+				cgen(r, &nod1);
+				gopcode(o, &nod1, Z, &nod);
+				regfree(&nod1);
+			} else
+				gopcode(o, r, Z, &nod);
+		} else {
+			regalloc(&nod1, r, nn);
 			cgen(r, &nod1);
+			regalloc(&nod, l, Z);
+			cgen(l, &nod);
 			gopcode(o, &nod1, Z, &nod);
-		} else {
-			regalloc(&nod, r, nn);
-			cgen(r, &nod);
-			regalloc(&nod1, l, Z);
-			cgen(l, &nod1);
-			gopcode(o, &nod, &nod1, &nod);
+			regfree(&nod1);
 		}
 		gopcode(OAS, &nod, Z, nn);
 		regfree(&nod);
-		regfree(&nod1);
 		break;
 
 	case OASLSHR:
@@ -203,14 +239,13 @@ cgen(Node *n, Node *nn)
 	case OASOR:
 		if(l->op == OBIT)
 			goto asbitop;
-		if(r->op == OCONST)
-		if(!typefd[r->type->etype])
-		if(!typefd[n->type->etype]) {
+		if(r->op == OCONST && !typefd[r->type->etype] && !typefd[n->type->etype] &&
+		   (!typev[n->type->etype] || isvconstable(o, r->vconst))) {
 			if(l->addable < INDEXED)
 				reglcgen(&nod2, l, Z);
 			else
 				nod2 = *l;
-			regalloc(&nod, r, nn);
+			regalloc(&nod, l, nn);
 			gopcode(OAS, &nod2, Z, &nod);
 			gopcode(o, r, Z, &nod);
 			gopcode(OAS, &nod, Z, &nod2);
@@ -234,20 +269,22 @@ cgen(Node *n, Node *nn)
 				reglcgen(&nod2, l, Z);
 			else
 				nod2 = *l;
-			regalloc(&nod, n, nn);
+			regalloc(&nod, r, Z);
 			cgen(r, &nod);
 		} else {
-			regalloc(&nod, n, nn);
+			regalloc(&nod, r, Z);
 			cgen(r, &nod);
 			if(l->addable < INDEXED)
 				reglcgen(&nod2, l, Z);
 			else
 				nod2 = *l;
 		}
-		regalloc(&nod1, n, Z);
+		regalloc(&nod1, n, nn);
 		gopcode(OAS, &nod2, Z, &nod1);
-		gopcode(o, &nod, &nod1, &nod);
-		gopcode(OAS, &nod, Z, &nod2);
+		gopcode(o, &nod, Z, &nod1);
+		gopcode(OAS, &nod1, Z, &nod2);
+		if(nn != Z)
+			gopcode(OAS, &nod1, Z, nn);
 		regfree(&nod);
 		regfree(&nod1);
 		if(l->addable < INDEXED)
@@ -336,7 +373,7 @@ cgen(Node *n, Node *nn)
 		} else
 			cgen(l, &nod);
 		regind(&nod, n);
-		gopcode(OAS, &nod, Z, nn);
+		gmove(&nod, nn);
 		regfree(&nod);
 		break;
 
@@ -390,11 +427,15 @@ cgen(Node *n, Node *nn)
 			cgen(l, nn);
 			break;
 		}
+		if(typev[l->type->etype] || typev[n->type->etype]) {
+			cgen64(n, nn);
+			break;
+		}
 		regalloc(&nod, l, nn);
 		cgen(l, &nod);
 		regalloc(&nod1, n, &nod);
-		gopcode(OAS, &nod, Z, &nod1);
-		gopcode(OAS, &nod1, Z, nn);
+		gmove(&nod, &nod1);
+		gmove(&nod1, nn);
 		regfree(&nod1);
 		regfree(&nod);
 		break;
@@ -610,7 +651,7 @@ bcgen(Node *n, int true)
 void
 boolgen(Node *n, int true, Node *nn)
 {
-	int o;
+	int o, uns;
 	Prog *p1, *p2;
 	Node *l, *r, nod, nod1;
 	long curs;
@@ -619,6 +660,7 @@ boolgen(Node *n, int true, Node *nn)
 		prtree(nn, "boolgen lhs");
 		prtree(n, "boolgen");
 	}
+	uns = 0;
 	curs = cursafe;
 	l = n->left;
 	r = n->right;
@@ -637,6 +679,10 @@ boolgen(Node *n, int true, Node *nn)
 			}
 			goto com;
 		}
+		if(typev[n->type->etype]) {
+			testv(n, true);
+			goto com;
+		}
 		regalloc(&nod, n, nn);
 		cgen(n, &nod);
 		o = ONE;
@@ -703,16 +749,22 @@ boolgen(Node *n, int true, Node *nn)
 		patch(p2, pc);
 		goto com;
 
+	case OHI:
+	case OHS:
+	case OLO:
+	case OLS:
+		uns = 1;
+		/* fall through */
 	case OEQ:
 	case ONE:
 	case OLE:
 	case OLT:
 	case OGE:
 	case OGT:
-	case OHI:
-	case OHS:
-	case OLO:
-	case OLS:
+		if(typev[l->type->etype]){
+			cmpv(n, true, Z);
+			goto com;
+		}
 		o = n->op;
 		if(true)
 			o = comrel[relindex(o)];
@@ -727,7 +779,7 @@ boolgen(Node *n, int true, Node *nn)
 			boolgen(&nod, true, nn);
 			break;
 		}
-		if(sconst(r)) {
+		if(!uns && sconst(r) || (uns || o == OEQ || o == ONE) && uconst(r)) {
 			regalloc(&nod, l, nn);
 			cgen(l, &nod);
 			gopcode(o, &nod, Z, r);
@@ -775,13 +827,17 @@ sugen(Node *n, Node *nn, long w)
 
 	if(n == Z || n->type == T)
 		return;
+	if(nn == nodrat)
+		if(w > nrathole)
+			nrathole = w;
 	if(debug['g']) {
 		prtree(nn, "sugen lhs");
 		prtree(n, "sugen");
 	}
-	if(nn == nodrat)
-		if(w > nrathole)
-			nrathole = w;
+	if(typev[n->type->etype]) {
+		diag(n, "old vlong sugen: %O", n->op);
+		return;
+	}
 	switch(n->op) {
 	case OIND:
 		if(nn == Z) {
@@ -792,33 +848,6 @@ sugen(Node *n, Node *nn, long w)
 	default:
 		goto copy;
 
-	case OCONST:
-		if(n->type && typev[n->type->etype]) {
-			if(nn == Z) {
-				nullwarn(n->left, Z);
-				break;
-			}
-
-			t = nn->type;
-			nn->type = types[TLONG];
-			reglcgen(&nod1, nn, Z);
-			nn->type = t;
-
-			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
-				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
-			else
-				gopcode(OAS, nod32const(n->vconst), Z, &nod1);
-			nod1.xoffset += SZ_LONG;
-			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
-				gopcode(OAS, nod32const(n->vconst), Z, &nod1);
-			else
-				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
-
-			regfree(&nod1);
-			break;
-		}
-		goto copy;
-
 	case ODOT:
 		l = n->left;
 		sugen(l, nodrat, l->type->width);
@@ -924,6 +953,7 @@ sugen(Node *n, Node *nn, long w)
 		break;
 
 	case OFUNC:
+		/* this transformation should probably be done earlier */
 		if(nn == Z) {
 			sugen(n, nodrat, w);
 			break;
@@ -935,6 +965,7 @@ sugen(Node *n, Node *nn, long w)
 		} else
 			nn = nn->left;
 		n = new(OFUNC, n->left, new(OLIST, nn, n->right));
+		n->complex = FNX;
 		n->type = types[TVOID];
 		n->left->type = types[TVOID];
 		cgen(n, Z);
@@ -969,7 +1000,7 @@ copy:
 		regsalloc(&nod2, nn);
 		nn->type = t;
 
-		gopcode(OAS, &nod1, Z, &nod2);
+		gmove(&nod1, &nod2);
 		regfree(&nod1);
 
 		nod2.type = typ(TIND, t);
@@ -1091,3 +1122,338 @@ layout(Node *f, Node *t, int c, int cv, Node *cn)
 	regfree(&t1);
 	regfree(&t2);
 }
+
+/*
+ * is the vlong's value directly addressible?
+ */
+int
+isvdirect(Node *n)
+{
+	return n->op == ONAME || n->op == OCONST || n->op == OINDREG;
+}
+
+/*
+ * can the constant be used with given vlong op?
+ */
+static int
+isvconstable(int o, vlong v)
+{
+	switch(o) {
+	case OADD:
+	case OASADD:
+		/* there isn't an immediate form for ADDE/SUBE, but there are special ADDME/ADDZE etc */
+		return v == 0 || v == -1;
+	case OAND:
+	case OOR:
+	case OXOR:
+	case OLSHR:
+	case OASHL:
+	case OASHR:
+	case OASLSHR:
+	case OASASHL:
+	case OASASHR:
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * most 64-bit operations: cgen into a register pair, then operate.
+ * 64-bit comparisons are handled a little differently because the two underlying
+ * comparisons can be compiled separately, since the calculations don't interact.
+ */
+
+static void
+vcgen(Node *n, Node *o, int *f)
+{
+	*f = 0;
+	if(!isvdirect(n)) {
+		if(n->complex >= FNX) {
+			regsalloc(o, n);
+			cgen(n, o);
+			return;
+		}
+		*f = 1;
+		if(n->addable < INDEXED && n->op != OIND && n->op != OINDEX) {
+			regalloc(o, n, Z);
+			cgen(n, o);
+		} else
+			reglcgen(o, n, Z);
+	} else
+		*o = *n;
+}
+
+static int
+isuns(int op)
+{
+	switch(op){
+	case OLO:
+	case OLS:
+	case OHI:
+	case OHS:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static void
+gcmpv(Node *l, Node *r, void (*mov)(Node*, Node*, int), int op)
+{
+	Node vl, vr;
+
+	regalloc(&vl, &regnode, Z);
+	mov(l, &vl, 0);
+	regalloc(&vr, &regnode, Z);
+	mov(r, &vr, 1+isuns(op));
+	gopcode(op, &vl, Z, &vr);
+	if(vl.op == OREGISTER)
+		regfree(&vl);
+	if(vr.op == OREGISTER)
+		regfree(&vr);
+}
+
+static void
+brcondv(Node *l, Node *r, int chi, int clo)
+{
+	Prog *p1, *p2, *p3, *p4;
+
+	gcmpv(l, r, gloadhi, chi);
+	p1 = p;
+	gins(ABNE, Z, Z);
+	p2 = p;
+	gcmpv(l, r, gloadlo, clo);
+	p3 = p;
+	gbranch(OGOTO);
+	p4 = p;
+	patch(p1, pc);
+	patch(p3, pc);
+	gbranch(OGOTO);
+	patch(p2, pc);
+	patch(p4, pc);
+}
+
+static void
+testv(Node *n, int true)
+{
+	Node nod;
+
+	nod = znode;
+	nod.op = ONE;
+	nod.left = n;
+	nod.right = new1(0, Z, Z);
+	*nod.right = *nodconst(0);
+	nod.right->type = n->type;
+	nod.type = types[TLONG];
+	cmpv(&nod, true, Z);
+}
+
+/*
+ * comparison for vlong does high and low order parts separately,
+ * which saves loading the latter if the high order comparison suffices
+ */
+static void
+cmpv(Node *n, int true, Node *nn)
+{
+	Node *l, *r, nod, nod1;
+	int o, f1, f2;
+	Prog *p1, *p2;
+	long curs;
+
+	if(debug['g']) {
+		if(nn != nil)
+			prtree(nn, "cmpv lhs");
+		prtree(n, "cmpv");
+	}
+	curs = cursafe;
+	l = n->left;
+	r = n->right;
+	if(l->complex >= FNX && r->complex >= FNX) {
+		regsalloc(&nod1, r);
+		cgen(r, &nod1);
+		nod = *n;
+		nod.right = &nod1;
+		cmpv(&nod, true, nn);
+		cursafe = curs;
+		return;
+	}
+	if(l->complex >= r->complex) {
+		vcgen(l, &nod1, &f1);
+		vcgen(r, &nod, &f2);
+	} else {
+		vcgen(r, &nod, &f2);
+		vcgen(l, &nod1, &f1);
+	}
+	nod.type = types[TLONG];
+	nod1.type = types[TLONG];
+	o = n->op;
+	if(true)
+		o = comrel[relindex(o)];
+	switch(o){
+	case OEQ:
+		gcmpv(&nod1, &nod, gloadhi, ONE);
+		p1 = p;
+		gcmpv(&nod1, &nod, gloadlo, ONE);
+		p2 = p;
+		gbranch(OGOTO);
+		patch(p1, pc);
+		patch(p2, pc);
+		break;
+	case ONE:
+		gcmpv(&nod1, &nod, gloadhi, ONE);
+		p1 = p;
+		gcmpv(&nod1, &nod, gloadlo, OEQ);
+		p2 = p;
+		patch(p1, pc);
+		gbranch(OGOTO);
+		patch(p2, pc);
+		break;
+	case OLE:
+		brcondv(&nod1, &nod, OLT, OLS);
+		break;
+	case OGT:
+		brcondv(&nod1, &nod, OGT, OHI);
+		break;
+	case OLS:
+		brcondv(&nod1, &nod, OLO, OLS);
+		break;
+	case OHI:
+		brcondv(&nod1, &nod, OHI, OHI);
+		break;
+	case OLT:
+		brcondv(&nod1, &nod, OLT, OLO);
+		break;
+	case OGE:
+		brcondv(&nod1, &nod, OGT, OHS);
+		break;
+	case OLO:
+		brcondv(&nod1, &nod, OLO, OLO);
+		break;
+	case OHS:
+		brcondv(&nod1, &nod, OHI, OHS);
+		break;
+	default:
+		diag(n, "bad cmpv");
+		return;
+	}
+	if(f1)
+		regfree(&nod1);
+	if(f2)
+		regfree(&nod);
+	cursafe = curs;
+}
+
+static void
+cgen64(Node *n, Node *nn)
+{
+	Node *l, *r, *d;
+	Node nod, nod1;
+	long curs;
+	Type *t;
+	int o, m;
+
+	curs = cursafe;
+	l = n->left;
+	r = n->right;
+	o = n->op;
+	switch(o) {
+
+	case OCONST:
+		if(nn == Z) {
+			nullwarn(n->left, Z);
+			break;
+		}
+		if(nn->op != OREGPAIR) {
+//prtree(n, "cgen64 const");
+			t = nn->type;
+			nn->type = types[TLONG];
+			reglcgen(&nod1, nn, Z);
+			nn->type = t;
+
+			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+				gmove(nod32const(n->vconst>>32), &nod1);
+			else
+				gmove(nod32const(n->vconst), &nod1);
+			nod1.xoffset += SZ_LONG;
+			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
+				gmove(nod32const(n->vconst), &nod1);
+			else
+				gmove(nod32const(n->vconst>>32), &nod1);
+
+			regfree(&nod1);
+		} else
+			gmove(n, nn);
+		break;
+
+	case OCAST:
+		/*
+		 * convert from types l->n->nn
+		 */
+		if(typev[l->type->etype]){
+			/* vlong to non-vlong */
+			if(!isvdirect(l)) {
+				if(l->addable < INDEXED && l->op != OIND && l->op != OINDEX) {
+					regalloc(&nod, l, l);
+					cgen(l, &nod);
+					regalloc(&nod1, n, nn);
+					gmove(nod.right, &nod1);
+				} else {
+					reglcgen(&nod, l, Z);
+					regalloc(&nod1, n, nn);
+					gloadlo(&nod, &nod1, 0);	/* TO DO: not correct for typefd */
+				}
+				regfree(&nod);
+			} else {
+				regalloc(&nod1, n, nn);
+				gloadlo(l, &nod1, 0);	/* TO DO: not correct for typefd */
+			}
+		}else{
+			/* non-vlong to vlong */
+			regalloc(&nod, l, Z);
+			cgen(l, &nod);
+			regalloc(&nod1, n, nn);
+			gmove(&nod, nod1.right);
+			if(typeu[l->type->etype])
+				gmove(nodconst(0), nod1.left);
+			else
+				gopcode(OASHR, nodconst(31), nod1.right, nod1.left);
+			regfree(&nod);
+		}
+		gmove(&nod1, nn);
+		regfree(&nod1);
+		break;
+
+	case OFUNC:
+		/* this transformation should probably be done earlier */
+		if(nn == Z) {
+			regsalloc(&nod1, n);
+			nn = &nod1;
+		}
+		m = 0;
+		if(nn->op != OIND) {
+			if(nn->op == OREGPAIR) {
+				m = 1;
+				regsalloc(&nod1, nn);
+				d = &nod1;
+			}else
+				d = nn;
+			d = new1(OADDR, d, Z);
+			d->type = types[TIND];
+			d->addable = 0;
+		} else
+			d = nn->left;
+		n = new(OFUNC, l, new(OLIST, d, r));
+		n->complex = FNX;
+		n->type = types[TVOID];
+		n->left->type = types[TVOID];
+		cgen(n, Z);
+		if(m)
+			gmove(&nod1, nn);
+		break;
+
+	default:
+		diag(n, "bad cgen64 %O", o);
+		break;
+	}
+	cursafe = curs;
+}

+ 68 - 12
sys/src/cmd/qc/enam.c

@@ -130,21 +130,11 @@ char	*anames[] =
 	"MOVW",
 	"MOVWU",
 	"MOVFL",
-	"MOVCRFXXX",
 	"MOVCRFS",
-	"MOVCRXRXXX",
-	"MOVFCRXXX",
-	"MFFSXXX",
-	"MFFSCCXXX",
-	"MTCRFXXX",
 	"MTFSB0",
 	"MTFSB0CC",
 	"MTFSB1",
 	"MTFSB1CC",
-	"MTFSFXXX",
-	"MTFSFCCXXX",
-	"MTFSFIXXX",
-	"MTFSFIXXXCC",
 	"MULHW",
 	"MULHWCC",
 	"MULHWU",
@@ -184,9 +174,7 @@ char	*anames[] =
 	"SRAW",
 	"SRAWCC",
 	"SRWCC",
-	"ILLXXX1",
 	"STSW",
-	"STWBRXXX",
 	"STWCCC",
 	"SUB",
 	"SUBCC",
@@ -323,5 +311,73 @@ char	*anames[] =
 	"NMACLHWV",
 	"NMACLHWVCC",
 	"RFCI",
+	"FRES",
+	"FRESCC",
+	"FRSQRTE",
+	"FRSQRTECC",
+	"FSEL",
+	"FSELCC",
+	"FSQRT",
+	"FSQRTCC",
+	"FSQRTS",
+	"FSQRTSCC",
+	"FPSEL",
+	"FPMUL",
+	"FXMUL",
+	"FXPMUL",
+	"FXSMUL",
+	"FPADD",
+	"FPSUB",
+	"FPRE",
+	"FPRSQRTE",
+	"FPMADD",
+	"FXMADD",
+	"FXCPMADD",
+	"FXCSMADD",
+	"FPNMADD",
+	"FXNMADD",
+	"FXCPNMADD",
+	"FXCSNMADD",
+	"FPMSUB",
+	"FXMSUB",
+	"FXCPMSUB",
+	"FXCSMSUB",
+	"FPNMSUB",
+	"FXNMSUB",
+	"FXCPNMSUB",
+	"FXCSNMSUB",
+	"FPABS",
+	"FPNEG",
+	"FPRSP",
+	"FPNABS",
+	"FSCMP",
+	"FSABS",
+	"FSNEG",
+	"FSNABS",
+	"FPCTIW",
+	"FPCTIWZ",
+	"FMOVSPD",
+	"FMOVPSD",
+	"FXCPNPMA",
+	"FXCSNPMA",
+	"FXCPNSMA",
+	"FXCSNSMA",
+	"FXCXNPMA",
+	"FXCXNSMA",
+	"FXCXMA",
+	"FXCXNMS",
+	"FSMOVS",
+	"FSMOVSU",
+	"FSMOVD",
+	"FSMOVDU",
+	"FXMOVS",
+	"FXMOVSU",
+	"FXMOVD",
+	"FXMOVDU",
+	"FPMOVS",
+	"FPMOVSU",
+	"FPMOVD",
+	"FPMOVDU",
+	"FPMOVIW",
 	"LAST",
 };

+ 8 - 0
sys/src/cmd/qc/gc.h

@@ -150,6 +150,7 @@ EXTERN	int	mnstring;
 EXTERN	Node*	nodrat;
 EXTERN	Node*	nodret;
 EXTERN	Node*	nodsafe;
+EXTERN	Node*	nodretv;
 EXTERN	long	nrathole;
 EXTERN	long	nstring;
 EXTERN	Prog*	p;
@@ -249,8 +250,11 @@ void	regind(Node*, Node*);
 void	gprep(Node*, Node*);
 void	raddr(Node*, Prog*);
 void	naddr(Node*, Adr*);
+void	gloadhi(Node*, Node*, int);
+void	gloadlo(Node*, Node*, int);
 void	gmove(Node*, Node*);
 void	gins(int a, Node*, Node*);
+void	gins3(int a, Node*, Node*, Node*);
 void	gopcode(int, Node*, Node*, Node*);
 int	samaddr(Node*, Node*);
 void	gbranch(int);
@@ -258,6 +262,10 @@ void	patch(Prog*, long);
 int	sconst(Node*);
 int	sval(long);
 int	uconst(Node*);
+long	hi64v(Node*);
+long	lo64v(Node*);
+Node*	hi64(Node*);
+Node*	lo64(Node*);
 void	gpseudo(int, Sym*, Node*);
 
 /*

+ 26 - 12
sys/src/cmd/qc/list.c

@@ -42,7 +42,7 @@ Bconv(Fmt *fp)
 int
 Pconv(Fmt *fp)
 {
-	char str[STRINGSZ];
+	char str[STRINGSZ], *s;
 	Prog *p;
 	int a;
 
@@ -50,17 +50,16 @@ Pconv(Fmt *fp)
 	a = p->as;
 	if(a == ADATA)
 		sprint(str, "	%A	%D/%d,%D", a, &p->from, p->reg, &p->to);
-	else
-	if(p->as == ATEXT)
+	else if(p->as == ATEXT)
 		sprint(str, "	%A	%D,%d,%D", a, &p->from, p->reg, &p->to);
-	else
-	if(p->reg == NREG)
-		sprint(str, "	%A	%D,%D", a, &p->from, &p->to);
-	else
-	if(p->from.type != D_FREG)
-		sprint(str, "	%A	%D,R%d,%D", a, &p->from, p->reg, &p->to);
-	else
-		sprint(str, "	%A	%D,F%d,%D", a, &p->from, p->reg, &p->to);
+	else {
+		s = seprint(str, str+sizeof(str), "	%A	%D", a, &p->from);
+		if(p->reg != NREG)
+			s = seprint(s, str+sizeof(str), ",%c%d", p->from.type==D_FREG? 'F': 'R', p->reg);
+		if(p->from3.type != D_NONE)
+			s = seprint(s, str+sizeof(str), ",%D", &p->from3);
+		seprint(s, s+sizeof(str), ",%D", &p->to);
+	}
 	return fmtstrcpy(fp, str);
 }
 
@@ -72,7 +71,7 @@ Aconv(Fmt *fp)
 
 	a = va_arg(fp->args, int);
 	s = "???";
-	if(a >= AXXX && a <= AEND)
+	if(a >= AXXX && a <= ALAST)
 		s = anames[a];
 	return fmtstrcpy(fp, s);
 }
@@ -196,10 +195,25 @@ Nconv(Fmt *fp)
 	char str[STRINGSZ];
 	Adr *a;
 	Sym *s;
+	int i, l, b, n;
 
 	a = va_arg(fp->args, Adr*);
 	s = a->sym;
 	if(s == S) {
+		if(a->offset > 64 || -a->offset > 64) {
+			n = 0;
+			l = a->offset & 1;
+			for(i=0; i<32; i++){
+				b = (a->offset >> i) & 1;
+				if(b != l)
+					n++;
+				l = b;
+			}
+			if(n < 2) {
+				sprint(str, "%#lux", a->offset);
+				goto out;
+			}
+		}
 		sprint(str, "%ld", a->offset);
 		goto out;
 	}

+ 98 - 0
sys/src/cmd/qc/machcap.c

@@ -0,0 +1,98 @@
+#include	"gc.h"
+
+int
+machcap(Node *n)
+{
+	if(n == Z)
+		return 1;	/* test */
+	switch(n->op){
+
+	case OADD:
+	case OAND:
+	case OOR:
+	case OSUB:
+	case OXOR:
+		if(typev[n->left->type->etype])
+			return 1;